Merge pull request #156 from stanford-crfm/jonathan/0319-weekly-assets

weekly assets + gpt test
stanford-crfm · Mar 28, 2024 · 2556d78 · 2556d78
2 parents 2dbda5e + 3270add
commit 2556d78
Show file tree

Hide file tree

Showing 18 changed files with 392 additions and 15 deletions.
diff --git a/assets/anthropic.yaml b/assets/anthropic.yaml
@@ -570,3 +570,25 @@
   monthly_active_users: unknown
   user_distribution: unknown
   failures: unknown
+- type: model
+  name: Claude 3
+  organization: Anthropic
+  description: The Claude 3 model family is a collection of models which sets new industry benchmarks across a wide range of cognitive tasks.
+  created_date: 2024-03-04
+  url: https://www.anthropic.com/news/claude-3-family
+  model_card: https://www-cdn.anthropic.com/de8ba9b01c9ab7cbabf5c33b80b7bbc618857627/Model_Card_Claude_3.pdf
+  modality: image, text; text
+  analysis: Evaluated on reasoning, math, coding, reading comprehension, and question answering, outperforming GPT-4 on standard benchmarks.
+  size: unknown
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: Pre-trained on diverse dataset and aligned with Constitutional AI technique.
+  access: limited
+  license: unknown
+  intended_uses: Claude models excel at open-ended conversation and collaboration on ideas, and also perform exceptionally well in coding tasks and when working with text - whether searching, writing, editing, outlining, or summarizing.
+  prohibited_uses: Prohibited uses include, but are not limited to, political campaigning or lobbying, surveillance, social scoring, criminal justice decisions, law enforcement, and decisions related to financing, employment, and housing.
+  monitoring: ''
+  feedback: none
+
diff --git a/assets/apple.yaml b/assets/apple.yaml
@@ -0,0 +1,22 @@
+---
+- type: model
+  name: MM1
+  organization: Apple
+  description: MM1 is a family of multimodal models, including both dense variants up to 30B and mixture-of-experts (MoE) variants up to 64B.
+  created_date: 2024-03-16
+  url: https://arxiv.org/pdf/2403.09611.pdf
+  model_card: none
+  modality: image, text; text
+  analysis: Evaluated on image captioning and visual question answering across many benchmarks.
+  size: 30B parameters (dense)
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: ''
+  access: closed
+  license: unknown
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: none
diff --git a/assets/bigcode.yaml b/assets/bigcode.yaml
@@ -71,3 +71,24 @@
   prohibited_uses: ''
   monitoring: ''
   feedback: ''
+- type: model
+  name: StarCoder2
+  organization: BigCode
+  description: A 15 billion parameter language model trained on 600+ programming languages from The Stack v2. The training was carried out using the Fill-in-the-Middle objective on 4+ trillion tokens.
+  created_date: 2024-02-28
+  url: https://www.servicenow.com/company/media/press-room/huggingface-nvidia-launch-starcoder2.html
+  model_card: https://huggingface.co/bigcode/starcoder2-15b
+  modality: text; text
+  analysis: unknown
+  size: 15B parameters (dense)
+  dependencies: [The Stack v2]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: 1024 x H100 GPUs
+  quality_control: The model was filtered for permissive licenses and code with no license only. A search index is provided to identify where generated code came from to apply the proper attribution.
+  access: open
+  license: BigCode OpenRail-M
+  intended_uses: Intended to generate code snippets from given context, but not for writing actual functional code directly.
+  prohibited_uses: Should not be used as a way to write fully functioning code without modification or verification.
+  monitoring: unknown
+  feedback: https://huggingface.co/bigcode/starcoder2-15b/discussions
diff --git a/assets/bytedance.yaml b/assets/bytedance.yaml
@@ -25,21 +25,21 @@
 - type: model
   name: SDXL-Lightning
   organization: ByteDance
-  description: SDXL-Lightning is a “lightning-fast” text-to-image generation model.
+  description: SDXL-Lightning is a lightning-fast text-to-image generation model. It can generate high-quality 1024px images in a few steps. The models are distilled from stabilityai/stable-diffusion-xl-base-1.0. This repository contains checkpoints for 1-step, 2-step, 4-step, and 8-step distilled models.
   created_date: 2024-02-21
   url: https://arxiv.org/pdf/2402.13929.pdf
   model_card: https://huggingface.co/ByteDance/SDXL-Lightning
   modality: text; image
-  analysis: Evaluated in comparison to other methods in regards to steps needed.
+  analysis: Evaluated via qualitative comparison relative to other SoTA image generation models.
   size: unknown
-  dependencies: []
+  dependencies: [Stable Diffusion XL]
   training_emissions: unknown
   training_time: unknown
   training_hardware: 64 A100 80G GPUs
-  quality_control: ''
+  quality_control: unknown
   access: open
-  license: Open Rail++
-  intended_uses: ''
-  prohibited_uses: ''
+  license: OpenRail++
+  intended_uses: The model can be used for fast, high-quality text-to-image generation. It supports 1-step, 2-step, 4-step, and 8-step distilled models which provide varying generation quality.
+  prohibited_uses: unknown
   monitoring: unknown
   feedback: https://huggingface.co/ByteDance/SDXL-Lightning/discussions
diff --git a/assets/cagliostro.yaml b/assets/cagliostro.yaml
@@ -0,0 +1,22 @@
+---
+- type: model
+  name: Animagine XL 3.1
+  organization: Cagliostro Research Lab
+  description: An open-source, anime-themed text-to-image model enhanced to generate higher quality anime-style images with a broader range of characters from well-known anime series, an optimized dataset, and new aesthetic tags for better image creation.
+  created_date: 2024-03-18
+  url: https://cagliostrolab.net/posts/animagine-xl-v31-release
+  model_card: https://huggingface.co/cagliostrolab/animagine-xl-3.1
+  modality: text; image
+  analysis: unknown
+  size: unknown
+  dependencies: [Animagine XL 3.0]
+  training_emissions: unknown
+  training_time: Approximately 15 days, totaling over 350 GPU hours.
+  training_hardware: 2x A100 80GB GPUs
+  quality_control: The model undergoes pretraining, first stage finetuning, and second stage finetuning for refining and improving aspects such as hand and anatomy rendering.
+  access: open
+  license: Fair AI Public License 1.0-SD
+  intended_uses: Generating high-quality anime images from textual prompts. Useful for anime fans, artists, and content creators.
+  prohibited_uses: Not suitable for creating realistic photos or for users who expect high-quality results from short or simple prompts.
+  monitoring: unknown
+  feedback: https://huggingface.co/cagliostrolab/animagine-xl-3.1/discussions
diff --git a/assets/cognition.yaml b/assets/cognition.yaml
@@ -0,0 +1,22 @@
+---
+- type: model
+  name: Devin
+  organization: Cognition Labs
+  description: Devin is the world’s first fully autonomous AI software engineer.
+  created_date: 2024-03-12
+  url: https://www.cognition-labs.com/introducing-devin
+  model_card: none
+  modality: text; code
+  analysis: Evaluated on SWE-Bench, a challenging software engineering benchmark, where Devin outperforms major state of the art models unassisted.
+  size: unknown
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: ''
+  access: limited
+  license: unknown
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: none
diff --git a/assets/cohere.yaml b/assets/cohere.yaml
@@ -474,3 +474,25 @@
   prohibited_uses: ''
   monitoring: none
   feedback: https://huggingface.co/Cohere/Cohere-embed-english-v3.0/discussions
+- type: model
+  name: Command-R
+  organization: Cohere
+  description: Command-R is a scalable generative model targeting RAG and Tool Use to enable production-scale AI for enterprise.
+  created_date: 2024-03-11
+  url: https://txt.cohere.com/command-r/
+  model_card: https://huggingface.co/CohereForAI/c4ai-command-r-v01
+  modality: text; text
+  analysis: none
+  size: 35B parameters (dense)
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: ''
+  access: open
+  license: CC BY NC 4.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/CohereForAI/c4ai-command-r-v01/discussions
+
diff --git a/assets/dibt.yaml b/assets/dibt.yaml
@@ -0,0 +1,22 @@
+---
+- type: dataset
+  name: 10k_prompts_ranked
+  organization: Data is Better Together
+  description: 10k_prompts_ranked is a dataset of prompts with quality rankings created by 314 members of the open-source ML community using Argilla, an open-source tool to label data.
+  created_date: 2024-02-27
+  url: https://huggingface.co/blog/community-datasets
+  datasheet: https://huggingface.co/datasets/DIBT/10k_prompts_ranked
+  modality: text
+  size: 10k examples
+  sample: []
+  analysis: none
+  dependencies: []
+  included: ''
+  excluded: ''
+  quality_control: ''
+  access: open
+  license: unknown
+  intended_uses: 	Training and evaluating language models on prompt ranking tasks and as a dataset that can be filtered only to include high-quality prompts. These can serve as seed data for generating synthetic prompts and generations.
+  prohibited_uses: This dataset only contains rankings for prompts, not prompt/response pairs so it is not suitable for direct use for supervised fine-tuning of language models.
+  monitoring: ''
+  feedback: https://huggingface.co/datasets/DIBT/10k_prompts_ranked/discussions
diff --git a/assets/ideogram.yaml b/assets/ideogram.yaml
@@ -0,0 +1,23 @@
+---
+- type: model
+  name: Ideogram 1.0
+  organization: Ideogram AI
+  description: Ideogram 1.0 is Ideogram’s most advanced text-to-image model, as of release.
+  created_date: 2024-02-28
+  url: https://about.ideogram.ai/1.0
+  model_card: none
+  modality: text; image
+  analysis: Compared to DALL·E 3 based on a qualitative user comparison.
+  size: unknown
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: ''
+  access: limited
+  license: unknown
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: none
+
diff --git a/assets/inflection.yaml b/assets/inflection.yaml
@@ -2,7 +2,7 @@
 - type: model
   name: Inflection-1
   organization: Inflection AI
-  description: Inflection AI's in-house LLM, which powers Pi and will soon be available
+  description: Inflection AI's first version of its in-house LLM.
     via Inflection AI's conversational API.
   created_date: 2023-06-22
   url: https://inflection.ai/inflection-1
@@ -31,7 +31,7 @@
     in emotional intelligence.
   created_date: 2023-05-02
   url: https://inflection.ai/press
-  dependencies: [Inflection-1]
+  dependencies: [Inflection-2.5]
   adaptation: ''
   output_space: natural language text responses
   quality_control: ''
@@ -70,3 +70,25 @@
   prohibited_uses: ''
   monitoring: ''
   feedback: none
+- type: model
+  name: Inflection-2.5
+  organization: Inflection AI
+  description: Inflection-2.5 is an upgraded in-house model that is competitive with all the world's leading LLMs, as of release, like GPT-4 and Gemini.
+  created_date: 2024-03-07
+  url: https://inflection.ai/inflection-2-5
+  model_card: none
+  modality: text; text
+  analysis: Evaluated on standard LLM and technical benchmarks in comparison to Inflection-1 and GPT-4, along with advanced STEM examinations.
+  size: unknown
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: ''
+  access: limited
+  license: unknown
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: none
+
diff --git a/assets/kotoba.yaml b/assets/kotoba.yaml
@@ -0,0 +1,22 @@
+---
+- type: model
+  name: Kotoba Speech
+  organization: Kotoba Tech
+  description: Kotoba-Speech is a Transformer-based speech generative model that supports fluent text-to-speech generation in Japanese and one-shot voice cloning through speech prompt.
+  created_date: 2024-03-13
+  url: https://huggingface.co/kotoba-tech/kotoba-speech-v0.1
+  model_card: https://huggingface.co/kotoba-tech/kotoba-speech-v0.1
+  modality: text; audio
+  analysis: unknown
+  size: 1.2B parameters (dense)
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: unknown
+  feedback: https://huggingface.co/kotoba-tech/kotoba-speech-v0.1/discussions
diff --git a/assets/nous.yaml b/assets/nous.yaml
@@ -22,3 +22,45 @@
   prohibited_uses: ''
   monitoring: unknown
   feedback: https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO/discussions
+- type: model
+  name: Hermes 2 Pro-Mistral
+  organization: Nous
+  description: Hermes 2 Pro on Mistral 7B is an upgraded, retrained version of Nous Hermes 2. This improved version excels at function calling, JSON Structured Outputs, and several other areas, scoring positively on various benchmarks.
+  created_date: 2024-03-10
+  url: https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B
+  model_card: https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B
+  modality: text; text
+  analysis: The model was examined across a range of benchmarks including GPT4All, AGIEval, BigBench, TruthfulQA and in-house evaluations of function calling and JSON mode.
+  size: 7B parameters (dense)
+  dependencies: [Mistral, OpenHermes 2.5 Dataset, Nous Hermes 2]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: The model was evaluated across multiple tasks, displaying notable scores in GPT4All, AGIEval, BigBench, and TruthfulQA. It also has a high score on function calling and JSON mode, indicating the robustness of its capabilities.
+  access: open
+  license: Apache 2.0
+  intended_uses: The model is intended for general task and conversation capabilities, function calling, and JSON structured outputs. 
+  prohibited_uses: unknown
+  monitoring: unknown
+  feedback: https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B/discussions
+- type: model
+  name: Genstruct
+  organization: Nous
+  description: Genstruct is an instruction-generation model, designed to create valid instructions given a raw text corpus. This enables the creation of new, partially synthetic instruction finetuning datasets from any raw-text corpus. This work was inspired by Ada-Instruct and the model is also trained to generate questions involving complex scenarios that require detailed reasoning.
+  created_date: 2024-03-07
+  url: https://huggingface.co/NousResearch/Genstruct-7B
+  model_card: https://huggingface.co/NousResearch/Genstruct-7B
+  modality: text; text
+  analysis: unknown
+  size: 7B parameters (dense)
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: unknown
+  access: open
+  license: Apache 2.0
+  intended_uses: The model is intended for instruction-generation, creating questions involving complex scenarios and generating reasoning steps for those questions.
+  prohibited_uses: unknown
+  monitoring: unknown
+  feedback: https://huggingface.co/NousResearch/Genstruct-7B/discussions
diff --git a/assets/nvidia.yaml b/assets/nvidia.yaml
@@ -88,3 +88,25 @@
   prohibited_uses: ''
   monitoring: ''
   feedback: ''
+- type: model
+  name: Nemotron 4
+  organization: Nvidia
+  description: Nemotron 4 is a 15-billion-parameter large multilingual language model trained on 8 trillion text tokens.
+  created_date: 2024-02-27
+  url: https://arxiv.org/pdf/2402.16819.pdf
+  model_card: none
+  modality: text; code, text
+  analysis: Evaluated on standard LLM benchmarks across a range of fields like reasoning, code generation, and mathematical skills.
+  size: 15B parameters (dense)
+  dependencies: []
+  training_emissions: unknown
+  training_time: 13 days
+  training_hardware: 3072 H100 80GB SXM5 GPUs across 384 DGX H100 nodes
+  quality_control: Deduplication and quality filtering techniques are applied to the training dataset.
+  access: open
+  license: unknown
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: unknown
+  feedback: none
+