stanford-crfm · rishibommasani · Jan 9, 2024 · Dec 9, 2023 · Dec 9, 2023 · Jan 9, 2024
diff --git a/assets/01ai.yaml b/assets/01ai.yaml
@@ -7,7 +7,8 @@
   url: https://github.com/01-ai/Yi
   model_card: https://huggingface.co/01-ai/Yi-34B
   modality: text; text
-  analysis: Evaluated on common sense reasoning and reading comprehension, analogous to LLaMA 2's analysis. 
+  analysis: Evaluated on common sense reasoning and reading comprehension, analogous
+    to LLaMA 2's analysis.
   size: 34B parameters (dense)
   dependencies: []
   training_emissions: unknown

diff --git a/assets/adept.yaml b/assets/adept.yaml
@@ -50,7 +50,8 @@
 - type: model
   name: Fuyu
   organization: Adept
-  description: Fuyu is a small version of the multimodal model that powers Adept's core product.
+  description: Fuyu is a small version of the multimodal model that powers Adept's
+    core product.
   created_date: 2023-10-17
   url: https://www.adept.ai/blog/fuyu-8b
   model_card: https://huggingface.co/adept/fuyu-8b
@@ -65,6 +66,8 @@
   access: open
   license: CC BY NC 4.0
   intended_uses: The model is intended for research purposes only.
-  prohibited_uses: The model was not trained to be factual or true representations of people or events, and therefore using the model to generate such content is out-of-scope for the abilities of this model.
+  prohibited_uses: The model was not trained to be factual or true representations
+    of people or events, and therefore using the model to generate such content
+    is out-of-scope for the abilities of this model.
   monitoring: ''
   feedback: https://huggingface.co/adept/fuyu-8b/discussions
diff --git a/assets/adobe.yaml b/assets/adobe.yaml
@@ -2,7 +2,10 @@
 - type: model
   name: Firefly Image 2
   organization: Adobe
-  description: Firefly Image 2 is the next generation of generative AI for imaging, bringing significant advancements to creative control and quality, including new Text to Image capabilities now available in the popular Firefly web app where 90% of users are new to Adobe products.
+  description: Firefly Image 2 is the next generation of generative AI for imaging,
+    bringing significant advancements to creative control and quality, including
+    new Text to Image capabilities now available in the popular Firefly web app
+    where 90% of users are new to Adobe products.
   created_date: 2023-10-10
   url: https://firefly.adobe.com/
   model_card: none
@@ -17,14 +20,17 @@
   access: closed
   license: unknown
   intended_uses: creative generation of digital art and images
-  prohibited_uses: AI/ML training, attempting to create abusive, illegal, or confidential content.
+  prohibited_uses: AI/ML training, attempting to create abusive, illegal, or confidential
+    content.
   monitoring: ''
   feedback: ''
 
 - type: model
   name: Firefly Vector
   organization: Adobe
-  description: Firefly Vector is the world’s first generative AI focused on producing vector graphics, bringing Adobe's vector graphic and generative AI expertise directly into Adobe Illustrator workflows with Text to Vector Graphic.
+  description: Firefly Vector is the world’s first generative AI focused on producing
+    vector graphics, bringing Adobe's vector graphic and generative AI expertise
+    directly into Adobe Illustrator workflows with Text to Vector Graphic.
   created_date: 2023-10-10
   url: https://firefly.adobe.com/
   model_card: none
@@ -39,14 +45,16 @@
   access: closed
   license: unknown
   intended_uses: creative generation of digital art and images
-  prohibited_uses: AI/ML training, attempting to create abusive, illegal, or confidential content.
+  prohibited_uses: AI/ML training, attempting to create abusive, illegal, or confidential
+    content.
   monitoring: ''
   feedback: ''
 
 - type: model
   name: Firefly Design
   organization: Adobe
-  description: Firefly Design powers instant generation of amazing quality template designs in Adobe Express with the new Text to Template capability.
+  description: Firefly Design powers instant generation of amazing quality template
+    designs in Adobe Express with the new Text to Template capability.
   created_date: 2023-10-10
   url: https://firefly.adobe.com/
   model_card: none
@@ -61,14 +69,17 @@
   access: closed
   license: unknown
   intended_uses: creative generation of digital art and images
-  prohibited_uses: AI/ML training, attempting to create abusive, illegal, or confidential content.
+  prohibited_uses: AI/ML training, attempting to create abusive, illegal, or confidential
+    content.
   monitoring: ''
   feedback: ''
 
 - type: application
   name: Firefly
   organization: Adobe
-  description: Adobe Firefly is a standalone web application. It offers new ways to ideate, create, and communicate while significantly improving creative workflows using generative AI.
+  description: Adobe Firefly is a standalone web application. It offers new ways
+    to ideate, create, and communicate while significantly improving creative workflows
+    using generative AI.
   created_date: 2023-03-21
   url: https://firefly.adobe.com/
   dependencies: [Firefly Image 2, Firefly Vector, Firefly Design]
@@ -79,10 +90,10 @@
   license: unknown
   terms_of_service: https://www.adobe.com/legal/licenses-terms/adobe-gen-ai-user-guidelines.html
   intended_uses: creative generation of digital art and images
-  prohibited_uses: AI/ML training, attempting to create abusive, illegal, or confidential content.
+  prohibited_uses: AI/ML training, attempting to create abusive, illegal, or confidential
+    content.
   monitoring: ''
   feedback: ''
   monthly_active_users: unknown
   user_distribution: unknown
   failures: unknown
-
diff --git a/assets/amazon.yaml b/assets/amazon.yaml
@@ -32,12 +32,14 @@
 - type: model
   name: FalconLite2
   organization: Amazon
-  description: FalconLite2 is a fine-tuned and quantized Falcon language model, capable of processing long (up to 24K tokens) input sequences.
+  description: FalconLite2 is a fine-tuned and quantized Falcon language model,
+    capable of processing long (up to 24K tokens) input sequences.
   created_date: 2023-08-08
   url: https://huggingface.co/amazon/FalconLite2
   model_card: https://huggingface.co/amazon/FalconLite2
   modality: text; text
-  analysis: Evaluated against benchmarks that are specifically designed to assess the capabilities of LLMs in handling longer contexts.
+  analysis: Evaluated against benchmarks that are specifically designed to assess
+    the capabilities of LLMs in handling longer contexts.
   size: 40B parameters (dense)
   dependencies: [Falcon]
   training_emissions: unknown

diff --git a/assets/anthropic.yaml b/assets/anthropic.yaml
@@ -527,12 +527,16 @@
 - type: model
   name: Claude 2.1
   organization: Anthropic
-  description: Claude 2.1 is an updated version of Claude 2, with an increased context window, less hallucination and tool use.
+  description: Claude 2.1 is an updated version of Claude 2, with an increased context
+    window, less hallucination and tool use.
   created_date: 2023-11-21
   url: https://www.anthropic.com/index/claude-2-1
   model_card: none
   modality: text; text
-  analysis: Evaluated on open-ended conversation accuracy and long context question answering. In evaluations, Claude 2.1 demonstrated a 30% reduction in incorrect answers and a 3-4x lower rate of mistakenly concluding a document supports a particular claim.
+  analysis: Evaluated on open-ended conversation accuracy and long context question
+    answering. In evaluations, Claude 2.1 demonstrated a 30% reduction in incorrect
+    answers and a 3-4x lower rate of mistakenly concluding a document supports a
+    particular claim.
   size: unknown
   dependencies: []
   training_emissions: unknown

diff --git a/assets/argilla.yaml b/assets/argilla.yaml
@@ -0,0 +1,23 @@
+---
+- type: model
+  name: Notus
+  organization: Argilla
+  description: Notus is an open source LLM, fine-tuned using Direct Preference Optimization
+    (DPO) and AIF (AI Feedback) techniques.
+  created_date: 2023-12-01
+  url: https://argilla.io/blog/notus7b/
+  model_card: https://huggingface.co/argilla/notus-7b-v1
+  modality: text; text
+  analysis: Evaluated on MT-Bench and AlphaEval benchmarks.
+  size: 7B parameters (dense)
+  dependencies: [UltraFeedback, Zephyr]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: 8 x A100 40GB GPUs
+  quality_control: ''
+  access: open
+  license: MIT
+  intended_uses: Academic research and free commercial usage
+  prohibited_uses: ''
+  monitoring: none
+  feedback: https://huggingface.co/argilla/notus-7b-v1/discussions
diff --git a/assets/baai.yaml b/assets/baai.yaml
@@ -48,7 +48,8 @@
 - type: model
   name: JudgeLM
   organization: Beijing Academy of Artificial Intelligence
-  description: JudgeLM is a fine-tuned to be a scalable judge to evaluate LLMs efficiently and effectively in open-ended benchmarks.
+  description: JudgeLM is a fine-tuned to be a scalable judge to evaluate LLMs efficiently
+    and effectively in open-ended benchmarks.
   created_date: 2023-10-26
   url: https://arxiv.org/pdf/2310.17631.pdf
   model_card: https://huggingface.co/BAAI/JudgeLM-13B-v1.0
@@ -62,14 +63,17 @@
   quality_control: ''
   access: open
   license: custom non-commercial license
-  intended_uses: Research on evaluating the performance of large language models and chatbots.
+  intended_uses: Research on evaluating the performance of large language models
+    and chatbots.
   prohibited_uses: none
   monitoring: none
   feedback: https://huggingface.co/BAAI/JudgeLM-13B-v1.0/discussions
 - type: dataset
   name: JudgeLM Dataset
   organization: Beijing Academy of Artificial Intelligence
-  description: JudgeLM Dataset is a novel dataset replete with a rich variety of seed tasks, comprehensive answers from modern LLMs, answers’ grades from the teacher judge, and detailed reasons for judgments.
+  description: JudgeLM Dataset is a novel dataset replete with a rich variety of
+    seed tasks, comprehensive answers from modern LLMs, answers’ grades from the
+    teacher judge, and detailed reasons for judgments.
   created_date: 2023-10-26
   url: https://huggingface.co/datasets/BAAI/JudgeLM-100K
   datasheet: ''
@@ -83,7 +87,8 @@
   quality_control: ''
   access: open
   license: CC BY NC 4.0
-  intended_uses: To be used to conduct instruction-tuning for language models and make the language model able to judge open-ended answer pairs.
+  intended_uses: To be used to conduct instruction-tuning for language models and
+    make the language model able to judge open-ended answer pairs.
   prohibited_uses: none
   monitoring: none
   feedback: https://huggingface.co/datasets/BAAI/JudgeLM-100K/discussions
diff --git a/assets/baichuan.yaml b/assets/baichuan.yaml
@@ -2,7 +2,9 @@
 - type: model
   name: Baichuan 2
   organization: Baichuan Inc.
-  description: Baichuan 2 is a series of large-scale multilingual language models containing 7 billion and 13 billion parameters, trained from scratch, on 2.6 trillion tokens.
+  description: Baichuan 2 is a series of large-scale multilingual language models
+    containing 7 billion and 13 billion parameters, trained from scratch, on 2.6
+    trillion tokens.
   created_date: 2023-09-20
   url: https://arxiv.org/pdf/2309.10305.pdf
   model_card: none

diff --git a/assets/baidu.yaml b/assets/baidu.yaml
@@ -85,7 +85,7 @@
   organization: Baidu
   description: ERNIE-4.0 is a multimodal generalist foundation model.
   created_date: 2023-10-17
-  url: 
+  url:
     explanation: No official statement from Baidu, so news release is provided.
     value: https://www.prnewswire.com/news-releases/baidu-launches-ernie-4-0-foundation-model-leading-a-new-wave-of-ai-native-applications-301958681.html
   model_card: none

diff --git a/assets/beitech.yaml b/assets/beitech.yaml
@@ -2,14 +2,16 @@
 - type: dataset
   name: EXMODD
   organization: Beijing Institute of Technology
-  description: EXMODD (Explanatory Multimodal Open-Domain Dialogue dataset) is a dataset built off the proposed MDCF (Multimodal Data Construction Framework).
+  description: EXMODD (Explanatory Multimodal Open-Domain Dialogue dataset) is a
+    dataset built off the proposed MDCF (Multimodal Data Construction Framework).
   created_date: 2023-10-17
   url: https://arxiv.org/pdf/2310.10967.pdf
   datasheet: none
   modality: image, text
   size: unknown
   sample: []
-  analysis: Models fine-tuned on EXMODD and earlier dataset Image-Chat and then evaluated on Image-Chat validation set.
+  analysis: Models fine-tuned on EXMODD and earlier dataset Image-Chat and then
+    evaluated on Image-Chat validation set.
   dependencies: [YFCC100M, Image-Chat]
   included: ''
   excluded: ''

diff --git a/assets/boston.yaml b/assets/boston.yaml
@@ -25,12 +25,14 @@
 - type: model
   name: UFOGen
   organization: Boston University
-  description: UFOGen is a novel generative model designed for ultra-fast, one-step text-to-image synthesis.
+  description: UFOGen is a novel generative model designed for ultra-fast, one-step
+    text-to-image synthesis.
   created_date: 2023-11-14
   url: https://arxiv.org/pdf/2311.09257.pdf
   model_card: none
   modality: text; image
-  analysis: UFOGen is evaluated on standard image benchmarks against other models fine-tuned with Stable Diffusion.
+  analysis: UFOGen is evaluated on standard image benchmarks against other models
+    fine-tuned with Stable Diffusion.
   size: 900M parameters (dense)
   dependencies: [Stable Diffusion]
   training_emissions: unknown

diff --git a/assets/bytedance.yaml b/assets/bytedance.yaml
@@ -2,12 +2,14 @@
 - type: model
   name: SALMONN
   organization: ByteDance, Tsinghua University
-  description: SALMONN is a large language model (LLM) enabling speech, audio event, and music inputs.
+  description: SALMONN is a large language model (LLM) enabling speech, audio event,
+    and music inputs.
   created_date: 2023-10-20
   url: https://github.com/bytedance/SALMONN
   model_card: https://huggingface.co/MSIIP/SALMONN
   modality: audio, text; text
-  analysis: Evaluated on benchmarks pertaining to speech, music, and other audio recognition.
+  analysis: Evaluated on benchmarks pertaining to speech, music, and other audio
+    recognition.
   size: unknown
   dependencies: [Whisper, BEATs, Vicuna]
   training_emissions: unknown

diff --git a/assets/cohere.yaml b/assets/cohere.yaml
@@ -452,7 +452,8 @@
 - type: model
   name: Cohere Embedv3 (English)
   organization: Cohere
-  description: As of release, Cohere Embedv3 is Cohere's latest and most advanced embeddings model.
+  description: As of release, Cohere Embedv3 is Cohere's latest and most advanced
+    embeddings model.
   created_date: 2023-11-02
   url: https://txt.cohere.com/introducing-embed-v3/
   model_card: https://huggingface.co/Cohere/Cohere-embed-english-v3.0

diff --git a/assets/columbia.yaml b/assets/columbia.yaml
@@ -23,7 +23,9 @@
 - type: model
   name: Ferret
   organization: Columbia
-  description: Ferret is a Multimodal Large Language Model (MLLM) capable of understanding spatial referring of any shape or granularity within an image and accurately grounding open-vocabulary descriptions.
+  description: Ferret is a Multimodal Large Language Model (MLLM) capable of understanding
+    spatial referring of any shape or granularity within an image and accurately
+    grounding open-vocabulary descriptions.
   created_date: 2023-10-11
   url: https://arxiv.org/pdf/2310.07704.pdf
   model_card: none

diff --git a/assets/continue.yaml b/assets/continue.yaml
@@ -2,8 +2,10 @@
 - type: application
   name: Continue
   organization: Continue Dev, Inc.
-  description: Continue is the open-source autopilot for software development. It is an IDE extension that brings the power of 
-    ChatGPT to VS Code and JetBrains. It’s built to be deeply customizable and continuously learn from development data. 
+  description: Continue is the open-source autopilot for software development. It
+    is an IDE extension that brings the power of ChatGPT to VS Code and JetBrains.
+    It’s built to be deeply customizable and continuously learn from development
+    data.
   created_date: 2023-07-26
   url: https://continue.dev
   dependencies:

diff --git a/assets/eleutherai.yaml b/assets/eleutherai.yaml
@@ -261,7 +261,8 @@
   url: https://arxiv.org/pdf/2310.10631.pdf
   model_card: https://huggingface.co/EleutherAI/llemma_34b
   modality: text; text
-  analysis: Evaluated on math benchmarks in comparison to general large language models.
+  analysis: Evaluated on math benchmarks in comparison to general large language
+    models.
   size: 34B parameters (dense)
   dependencies: [Proof Pile 2, Code LLaMA]
   training_emissions: unknown

diff --git a/assets/epfl.yaml b/assets/epfl.yaml
@@ -0,0 +1,23 @@
+---
+- type: model
+  name: MediTron
+  organization: EPFL, Idiap Research Institute, Open Assistant, Yale
+  description: Meditron is a large-scale medical LLM that remains open-source.
+  created_date: 2023-11-27
+  url: https://arxiv.org/pdf/2311.16079.pdf
+  model_card: https://huggingface.co/epfl-llm/meditron-70b
+  modality: text; text
+  analysis: Evaluated on TruthfulQA as main evaluation benchmark.
+  size: 70B parameters (dense)
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: ''
+  access: open
+  license: LLaMA 2
+  intended_uses: Medical exam question answering, supporting differential diagnosis,
+    disease information.
+  prohibited_uses: ''
+  monitoring: none
+  feedback: https://huggingface.co/epfl-llm/meditron-7b/discussions
diff --git a/assets/google.yaml b/assets/google.yaml
@@ -1671,10 +1671,37 @@
   quality_control: Employed de-duplication, removal of sensitive-PII and filtering.
     Added control tokens marking toxicity of text.
   access: open
-  license: Google
+  license: unknown
   intended_uses: general use large language model that can be used for language,
     reasoning, and code tasks.
   prohibited_uses: becoming part of a general-purpose service or product or use
     within specific downstream applications without prior assessment
   monitoring: Google internal monitoring
   feedback: Specific queries provided by annotators
+- type: model
+  name: Gemini
+  organization: Google
+  description: As of release, Gemini is Google's most capable and flexible AI model,
+    proficient in multimodal domains.
+  created_date: 2023-12-06
+  url: https://deepmind.google/technologies/gemini/#introduction
+  model_card: none
+  modality: text; image, text, video
+  analysis: Evaluated on standard general, reasoning, math, coding, and multimodal
+    benchmarks with results that surpass GPT-4 on almost all.
+  size:
+    explanation: Comes in sizes Ultra, Pro, and Nano.
+    value: unknown
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: ''
+  access: closed
+  license: unknown
+  intended_uses: general use large language model that can be used for language,
+    reasoning, and code tasks.
+  prohibited_uses: becoming part of a general-purpose service or product or use
+    within specific downstream applications without prior assessment
+  monitoring: Google internal monitoring
+  feedback: none