Merge pull request #155 from stanford-crfm/jonathan/0226-weekly-assets

weekly update
stanford-crfm · Mar 28, 2024 · 2dbda5e · 2dbda5e
2 parents 7ccb96b + d5823bb
commit 2dbda5e
Show file tree

Hide file tree

Showing 6 changed files with 150 additions and 21 deletions.
diff --git a/assets/avignon.yaml b/assets/avignon.yaml
@@ -0,0 +1,22 @@
+---
+- type: model
+  name: BioMistral
+  organization: Avignon University, Nantes University
+  description: BioMistral is an open-source Large Language Model tailored for the biomedical domain, utilizing Mistral as its foundation model and further pre-trained on PubMed Central.
+  created_date: 2024-02-15
+  url: https://arxiv.org/pdf/2402.10373.pdf
+  model_card: https://huggingface.co/BioMistral/BioMistral-7B
+  modality: text; text
+  analysis: BioMistral was evaluated on a benchmark comprising 10 established medical question-answering (QA) tasks in English and seven other languages.
+  size: 7B parameters (dense)
+  dependencies: [Mistral, PubMed Central]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: 32 NVIDIA A100 80GB GPUs
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: Research in the biomedical domain, especially for medical question-answering tasks.
+  prohibited_uses: Prohibited from deploying in production environments for natural language generation or any professional health and medical purposes.
+  monitoring: ''
+  feedback: https://huggingface.co/BioMistral/BioMistral-7B/discussions
diff --git a/assets/bytedance.yaml b/assets/bytedance.yaml
@@ -22,3 +22,24 @@
   prohibited_uses: ''
   monitoring: none
   feedback: https://huggingface.co/MSIIP/SALMONN/discussions
+- type: model
+  name: SDXL-Lightning
+  organization: ByteDance
+  description: SDXL-Lightning is a “lightning-fast” text-to-image generation model.
+  created_date: 2024-02-21
+  url: https://arxiv.org/pdf/2402.13929.pdf
+  model_card: https://huggingface.co/ByteDance/SDXL-Lightning
+  modality: text; image
+  analysis: Evaluated in comparison to other methods in regards to steps needed.
+  size: unknown
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: 64 A100 80G GPUs
+  quality_control: ''
+  access: open
+  license: Open Rail++
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: unknown
+  feedback: https://huggingface.co/ByteDance/SDXL-Lightning/discussions
diff --git a/assets/deepmind.yaml b/assets/deepmind.yaml
@@ -1,7 +1,7 @@
 ---
 - type: dataset
   name: MassiveText
-  organization: DeepMind
+  organization: Google Deepmind
   description: "The MassiveText dataset was used to train the Gopher model.\n"
   created_date:
     explanation: "The date that Gopher was announced [[DeepMind Blog Post]] (https://www.deepmind.com/blog/language-modelling-at-scale-gopher-ethical-considerations-and-retrieval).\n"
@@ -52,7 +52,7 @@
     value: unknown
 - type: dataset
   name: M3W
-  organization: DeepMind
+  organization: Google Deepmind
   description: "M3W (MassiveWeb) is dataset used to train Flamingo, and other vision-language\
     \ models and was created by researchers and engineers.\n"
   created_date:
@@ -90,7 +90,7 @@
     value: none
 - type: dataset
   name: Gato dataset
-  organization: DeepMind
+  organization: Google Deepmind
   description: "The Gato datasets are a collection of data used to train the Gato\
     \ model.\n"
   created_date:
@@ -136,7 +136,7 @@
     value: none
 - type: model
   name: AlphaFold2
-  organization: DeepMind
+  organization: Google Deepmind
   description: AlphaFold2 is a protein language model trained on protein sequences
   created_date:
     explanation: The date the model paper was released
@@ -168,7 +168,7 @@
   feedback: ''
 - type: model
   name: Flamingo
-  organization: DeepMind
+  organization: Google Deepmind
   description: "Flamingo is a Visual Language Model using the Transformer architecture\
     \ that is intended for few-shot learning.\n"
   created_date:
@@ -221,7 +221,7 @@
     value: none
 - type: model
   name: AlphaCode
-  organization: DeepMind
+  organization: Google Deepmind
   description: AlphaCode is an autoregressive language model trained on code
   created_date:
     explanation: The date the model paper was released
@@ -248,7 +248,7 @@
   feedback: ''
 - type: model
   name: Gopher
-  organization: DeepMind
+  organization: Google Deepmind
   description: "Gopher is an autoregressive language model based on the Transformer\
     \ architecture with two modifications: using RMSNorm instead of LayerNorm and\
     \ using relative positional encoding scheme instead of absolute positional encodings\
@@ -304,7 +304,7 @@
     \ model card, geoffreyi at google.com [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).\n"
 - type: model
   name: Chinchilla
-  organization: DeepMind
+  organization: Google Deepmind
   description: "Chinchilla is an autoregressive language model based on the Transformer\
     \ architecture with improved scaling laws.\n"
   created_date:
@@ -356,7 +356,7 @@
     \ Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I).\n"
 - type: model
   name: Gato
-  organization: DeepMind
+  organization: Google Deepmind
   description: "Gato is a generalist agent based on sequence modeling using the\
     \ Transformer architecture to implement multi-modal, multi-task, multi-embodiment\
     \ generalist policy.\n"
@@ -402,7 +402,7 @@
     \ model card, reedscot at google.com [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).\n"
 - type: model
   name: Sparrow
-  organization: DeepMind
+  organization: Google Deepmind
   description: ''
   created_date: 2022-09-28
   url: https://arxiv.org/abs/2209.14375
@@ -429,7 +429,7 @@
   feedback: ''
 - type: model
   name: RETRO
-  organization: DeepMind
+  organization: Google Deepmind
   description: ''
   created_date: 2021-12-08
   url: https://arxiv.org/abs/2112.04426
@@ -450,7 +450,7 @@
   feedback: ''
 - type: model
   name: Sparrow Rule reward model
-  organization: DeepMind
+  organization: Google Deepmind
   description: ''
   created_date: 2022-09-28
   url: https://arxiv.org/abs/2209.14375
@@ -471,7 +471,7 @@
   feedback: ''
 - type: model
   name: Sparrow Preference reward model
-  organization: DeepMind
+  organization: Google Deepmind
   description: ''
   created_date: 2022-09-28
   url: https://arxiv.org/abs/2209.14375
@@ -492,7 +492,7 @@
   feedback: ''
 - type: dataset
   name: Sparrow adversarial probing dataset
-  organization: DeepMind
+  organization: Google Deepmind
   description: ''
   created_date: 2022-09-28
   url: https://arxiv.org/abs/2209.14375
@@ -513,7 +513,7 @@
   feedback: ''
 - type: dataset
   name: Sparrow response preference dataset
-  organization: DeepMind
+  organization: Google Deepmind
   description: ''
   created_date: 2022-09-28
   url: https://arxiv.org/abs/2209.14375
@@ -534,7 +534,7 @@
   feedback: ''
 - type: model
   name: GopherCite
-  organization: DeepMind
+  organization: Google Deepmind
   description: ''
   created_date: 2022-03-16
   url: https://storage.googleapis.com/deepmind-media/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes.pdf
@@ -555,7 +555,7 @@
   feedback: ''
 - type: model
   name: GopherCite reward model
-  organization: DeepMind
+  organization: Google Deepmind
   description: ''
   created_date: 2022-03-16
   url: https://storage.googleapis.com/deepmind-media/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes.pdf
@@ -576,7 +576,7 @@
   feedback: ''
 - type: dataset
   name: GopherCite Preference dataset
-  organization: DeepMind
+  organization: Google Deepmind
   description: ''
   created_date: 2022-03-16
   url: https://storage.googleapis.com/deepmind-media/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes.pdf
@@ -597,7 +597,7 @@
   feedback: ''
 - type: model
   name: Dramatron
-  organization: DeepMind
+  organization: Google Deepmind
   description: ''
   created_date: 2022-09-29
   url: https://arxiv.org/abs/2209.14958
@@ -618,7 +618,7 @@
   feedback: ''
 - type: model
   name: RT-2
-  organization: DeepMind
+  organization: Google Deepmind
   description: RT-2 is a vision-language-action model for robotic actions that incorporates
     chain of thought reasoning.
   created_date: 2023-07-28
@@ -641,7 +641,7 @@
   feedback: ''
 - type: model
   name: Lyria
-  organization: DeepMind
+  organization: Google Deepmind
   description: Lyria is DeepMind's most advanced AI music generation model to date.
   created_date: 2023-11-16
   url: https://deepmind.google/discover/blog/transforming-the-future-of-music-creation/
@@ -660,3 +660,24 @@
   prohibited_uses: ''
   monitoring: ''
   feedback: ''
+- type: model
+  name: Genie
+  organization: Google DeepMind
+  description: Gene is a foundation world model trained from Internet videos that can generate an endless variety of playable (action-controllable) worlds from synthetic images, photographs, and even sketches.
+  created_date: 2024-02-23
+  url: https://sites.google.com/view/genie-2024
+  model_card: none
+  modality: image; video
+  analysis: Evaluated using only out-of-distribution image prompts for qualitative results.
+  size: 11B parameters (dense)
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: ''
+  access: closed
+  license: unknown
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: none
diff --git a/assets/google.yaml b/assets/google.yaml
@@ -1731,3 +1731,26 @@
     within specific downstream applications without prior assessment
   monitoring: Google internal monitoring
   feedback: none
+- type: model
+  name: Gemma
+  organization: Google
+  description: Gemma is a family of lightweight, state-of-the-art open models from Google, based on the Gemini models. They are text-to-text, decoder-only large language models, available in English.
+  created_date: 2024-02-21
+  url: https://blog.google/technology/developers/gemma-open-models/
+  model_card: https://huggingface.co/google/gemma-7b
+  modality: text; text
+  analysis: Evaluation was conducted on standard LLM benchmarks and includes internal red-teaming testing of relevant content policies.
+  size: 7B parameters (dense)
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: TPUv5e
+  quality_control: Multiple evaluations and red-teaming conducted, with particular focus on ethics, bias, fair use cases, and safety.
+  access: open
+  license:
+    explanation: License can be found at https://ai.google.dev/gemma/terms.
+    value: custom
+  intended_uses: Text generation tasks including question answering, summarization, and reasoning; content creation, communication, research, and education.
+  prohibited_uses: Prohibited uses are specified in the Gemma Prohibited Use Policy here https://ai.google.dev/gemma/prohibited_use_policy
+  monitoring: ''
+  feedback: https://huggingface.co/google/gemma-7b/discussions
diff --git a/assets/mistral.yaml b/assets/mistral.yaml
@@ -21,3 +21,44 @@
   prohibited_uses: ''
   monitoring: none
   feedback: https://huggingface.co/mistralai/Mistral-7B-v0.1/discussions
+- type: model
+  name: Mistral Large
+  organization: Mistral AI
+  description: Mistral Large is Mistral AI’s new cutting-edge text generation model.
+  created_date: 2024-02-26
+  url: https://mistral.ai/news/mistral-large/
+  model_card: none
+  modality: text; text
+  analysis: Evaluated on commonly used benchmarks in comparison to the current LLM leaders.
+  size: unknown
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: ''
+  access: limited
+  license: unknown
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: none
+- type: application
+  name: Le Chat
+  organization: Mistral AI
+  description:  Le Chat is a first demonstration of what can be built with Mistral models and what can deployed in the business environment.
+  created_date: 2024-02-26
+  url: https://mistral.ai/news/le-chat-mistral/
+  dependencies: [Mistral, Mistral Large]
+  adaptation: ''
+  output_space: ''
+  quality_control: ''
+  access: limited
+  license: unknown
+  terms_of_service: https://mistral.ai/terms/#terms-of-use
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: none
+  monthly_active_users: unknown
+  user_distribution: unknown
+  failures: unknown
diff --git a/js/main.js b/js/main.js
@@ -678,6 +678,7 @@ function loadAssetsAndRenderPageContent() {
     'assets/anthropic.yaml',
     'assets/argonne.yaml',
     'assets/assembly.yaml',
+    'assets/avignon.yaml',
     'assets/baai.yaml',
     'assets/baidu.yaml',
     'assets/bain.yaml',