Merge pull request #134 from stanford-crfm/jonathan/0707-all-assets

Jonathan/0707 all assets
stanford-crfm · Aug 3, 2023 · da06a0d · da06a0d
2 parents fdfa98f + 5c92804
commit da06a0d
Show file tree

Hide file tree

Showing 37 changed files with 1,444 additions and 21 deletions.
diff --git a/assets/ai2.yaml b/assets/ai2.yaml
@@ -21,3 +21,75 @@
   prohibited_uses: ''
   monitoring: ''
   feedback: ''
+
+- type: dataset
+  name: SODA
+  organization: AI2
+  description: SODA is the first publicly available, million-scale, high-quality
+    dialogue dataset covering a wide range of social interactions.
+  created_date: 2023-05-24
+  url: https://arxiv.org/pdf/2212.10465.pdf
+  datasheet: https://huggingface.co/datasets/allenai/soda
+  modality: text
+  size: 1.5M dialogues
+  sample: []
+  analysis: Randomly sampled dialogues from dataset are evaluated according to six
+    established criteria of natural flow, context dependence, topic consistency,
+    speaker consistency, specificity, and overall.
+  dependencies: []
+  included: ''
+  excluded: ''
+  quality_control: ''
+  access: open
+  license: CC BY 4.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+
+- type: dataset
+  name: Multimodal C4
+  organization: AI2
+  description: An augmentation of C4 with images added and made openly available.
+  created_date: 2023-06-09
+  url: https://arxiv.org/pdf/2304.06939.pdf
+  datasheet: ''
+  modality: text and images
+  size: 43B English tokens with 101.2M documents and 571M images
+  sample: []
+  analysis: Conducted experiments on models trained with Multimodal C4 in comparison
+    to models trained on single image/caption datasets
+  dependencies: [C4]
+  included: ''
+  excluded: ''
+  quality_control: ''
+  access: open
+  license: MIT
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+
+- type: model
+  name: COSMO
+  organization: AI2
+  description: COSMO is a conversation agent with greater generalizability on both
+    in- and out-of-domain chitchat datasets
+  created_date: 2023-05-24
+  url: https://arxiv.org/pdf/2212.10465.pdf
+  model_card: https://huggingface.co/allenai/cosmo-xl
+  modality: text
+  analysis: Evaluated by human testers on generalization capabilities and responses
+    compared to other chatbots.
+  size: 11B parameters
+  dependencies: [SODA, ProsocialDialog, T5]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: v3-128 TPU accelerators with batch size 256
+  quality_control: ''
+  access: open
+  license: ''
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/allenai/cosmo-xl/discussions
diff --git a/assets/aleph_alpha.yaml b/assets/aleph_alpha.yaml
@@ -100,3 +100,26 @@
   monthly_active_users: unknown
   user_distribution: unknown
   failures: unknown
+
+- type: model
+  name: MAGMA
+  organization: Aleph Alpha
+  description: An autoregressive VL model that is able to generate text from an
+    arbitrary combination of visual and textual input
+  created_date: 2022-10-24
+  url: https://arxiv.org/pdf/2112.05253.pdf
+  model_card: ''
+  modality: image and text input with natural language text output
+  analysis: Evaluated on the OKVQA benchmark as a fully open-ended generative task.
+  size: 6B parameters
+  dependencies: [GPT-J, CLIP]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: 32 A100 GPUs
+  quality_control: ''
+  access: open
+  license: MIT
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/amazon.yaml b/assets/amazon.yaml
@@ -0,0 +1,32 @@
+---
+
+- type: application
+  name: Bedrock
+  organization: Amazon
+  description: Bedrock is a new service that makes FMs from AI21 Labs, Anthropic,
+    Stability AI, and Amazon accessible via an API. Bedrock is intended for customers
+    to build and scale generative AI-based applications using FMs, democratizing
+    access for all builders. using an API.
+  created_date: 2023-04-13
+  url: https://aws.amazon.com/bedrock/
+  dependencies:
+    - Jurassic-2
+    - Claude
+    - Stable Diffusion
+    - Amazon Titan
+    - Claude 2
+    - Cohere Command
+  adaptation: ''
+  output_space: foundation models made accessible via an API
+  quality_control: ''
+  access: limited
+  license: unknown
+  terms_of_service: https://aws.amazon.com/service-terms/
+  intended_uses: allowing companies to incorporate generative AI into their business
+    models
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
+  user_distribution: ''
+  failures: ''
diff --git a/assets/anthropic.yaml b/assets/anthropic.yaml
@@ -594,3 +594,36 @@
   prohibited_uses: ''
   monitoring: ''
   feedback: ''
+
+- type: model
+  name: Claude 2
+  organization: Anthropic
+  description: Claude 2 is a more evolved and refined version of Claude, which is
+    a general purpose large language model using a transformer architecture and
+    trained via unsupervised learning.
+  created_date: 2023-07-11
+  url: https://www.anthropic.com/index/claude-2
+  model_card: https://www-files.anthropic.com/production/images/Model-Card-Claude-2.pdf
+  modality: text
+  analysis: Evaluated with human feedback on helpfulness, harmfulness, and honesty
+    and on the Bias Benchmark for QA.
+  size: ''
+  dependencies:
+    - Claude human feedback data
+    - Unknown licensed third party datasets
+  training_emissions: ''
+  training_time: ''
+  training_hardware: unknown
+  quality_control: ''
+  access: open
+  license: ''
+  intended_uses: Claude 2 tends to perform well at general, open-ended conversation;
+    search, writing, editing, outlining, and summarizing text; coding; and providing
+    helpful advice about a broad range of subjects. Claude 2 is particularly well
+    suited to support creative or literary use cases. They can take direction on
+    tone and “personality,” and users have described them as feeling steerable and
+    conversational.
+  prohibited_uses: Claude 2 should not be used on their own in high stakes situations
+    where an incorrect answer would cause harm.
+  monitoring: ''
+  feedback: ''
diff --git a/assets/autogpt.yaml b/assets/autogpt.yaml
@@ -0,0 +1,23 @@
+---
+
+- type: application
+  name: Auto-GPT
+  organization: Auto-GPT
+  description: Auto-GPT is an experimental open-source application showcasing the
+    capabilities of the GPT-4 language model.
+  created_date: 2023-04-16
+  url: https://news.agpt.co/
+  dependencies: [GPT-4 API]
+  adaptation: GPT-4 adapted to run autonomously by chaining together LLM "thoughts"
+  output_space: text
+  quality_control: ''
+  access: open
+  license: MIT
+  terms_of_service: ''
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
+  user_distribution: ''
+  failures: ''
diff --git a/assets/berkeley.yaml b/assets/berkeley.yaml
@@ -0,0 +1,72 @@
+---
+
+- type: model
+  name: Koala
+  organization: Berkeley
+  description: A relatively small chatbot trained by fine-tuning Meta’s LLaMA on
+    dialogue data gathered from the web.
+  created_date: 2023-04-03
+  url: https://bair.berkeley.edu/blog/2023/04/03/koala/
+  model_card: https://huggingface.co/TheBloke/koala-7B-GPTQ-4bit-128g
+  modality: natural language text
+  analysis: Evaluated in comparison with ChatGPT and Stanford Alpaca.
+  size: 13B parameters
+  dependencies: [LLaMA, web-scraped dialogue data]
+  training_emissions: ''
+  training_time: 6 hours
+  training_hardware: 8 A100 GPUs
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: academic research
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/TheBloke/koala-7B-GPTQ-4bit-128g/discussions
+
+- type: model
+  name: Gorilla
+  organization: Berkeley
+  description: Gorilla is a finetuned LLaMA-based model that surpasses the performance
+    of GPT-4 on writing API calls.
+  created_date: 2023-05-24
+  url: https://arxiv.org/pdf/2305.15334v1.pdf
+  model_card: ''
+  modality: outputs API from natural language input
+  analysis: Evaluated using AST sub-tree matching technique and compared to other
+    models in terms of API functionality accuracy.
+  size: 7B parameters
+  dependencies: [LLaMA, Gorilla document retriever]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: No specific quality control is mentioned in model training, though
+    details on data processing and collection are provided in the paper.
+  access: open
+  license: Apache 2.0
+  intended_uses: In conjunction with a LLM to improve its capability for using API
+    calls.
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+
+- type: model
+  name: OpenLLaMA
+  organization: Berkeley
+  description: OpenLlama is an open source reproduction of Meta's LLaMA model.
+  created_date: 2023-05-03
+  url: https://github.com/openlm-research/open_llama
+  model_card: ''
+  modality: text
+  analysis: Evaluated on wide range of tasks using own evaluation benchmarks.
+  size: 17B parameters
+  dependencies: [RedPajama]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/bigcode.yaml b/assets/bigcode.yaml
@@ -0,0 +1,76 @@
+---
+
+- type: model
+  name: StarCoder
+  organization: BigCode
+  description: StarCoder is a Large Language Model for Code (Code LLM) trained on
+    permissively licensed data from GitHub, including from 80+ programming languages,
+    Git commits, GitHub issues, and Jupyter notebooks.
+  created_date: 2023-05-09
+  url: https://arxiv.org/pdf/2305.06161.pdf
+  model_card: https://huggingface.co/bigcode/starcoder
+  modality: code (80+ programming languages)
+  analysis: Tested on several benchmarks, most notably Python benchmark HumanEval.
+  size: 15.5B parameters
+  dependencies: [The Stack]
+  training_emissions: 16.68 tons of CO2eq
+  training_time: 2 days
+  training_hardware: 64 NVIDIA A100 GPUs
+  quality_control: No specific quality control is mentioned in model training, though
+    details on data processing and how the tokenizer was trained are provided in
+    the paper.
+  access: open
+  license: Apache 2.0
+  intended_uses: With a Tech Assistant prompt and not as an instruction model given
+    training limitations.
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/bigcode/starcoder/discussions
+
+- type: model
+  name: SantaCoder
+  organization: BigCode
+  description: Multilingual code model derived from findings of BigCode Project
+    analysis on Github stars' association to data quality.
+  created_date: 2023-02-24
+  url: https://arxiv.org/pdf/2301.03988.pdf
+  model_card: ''
+  modality: code
+  analysis: Evaluated on MultiPL-E system benchmarks.
+  size: 1.1B parameters
+  dependencies: [The Stack, BigCode Dataset]
+  training_emissions: ''
+  training_time: 3.1 days
+  training_hardware: 96 NVIDIA Tesla V100 GPUs
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+
+- type: dataset
+  name: The Stack
+  organization: BigCode
+  description: The Stack is a 3.1 TB dataset consisting of permissively licensed
+    source code inteded for use in creating code LLMs.
+  created_date: 2022-11-20
+  url: https://arxiv.org/pdf/2211.15533.pdf
+  datasheet: https://huggingface.co/datasets/bigcode/the-stack
+  modality: code (358 programming languages)
+  size: 3.1 TB
+  sample: []
+  analysis: Evaluated models trained on The Stack on HumanEval and MBPP and compared
+    against similarly-sized models.
+  dependencies: [GitHub]
+  included: ''
+  excluded: ''
+  quality_control: allowed users whose data were part of The Stack's training data
+    to opt-out
+  access: open
+  license: Apache 2.0
+  intended_uses: creating code LLMs
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/casia.yaml b/assets/casia.yaml
@@ -0,0 +1,25 @@
+---
+
+- type: model
+  name: BigTrans
+  organization: Institute of Automation Chinese Academy of Sciences
+  description: BigTrans is a model which adapts LLaMA that covers only 20 languages
+    and enhances it with multilingual translation capability on more than 100 languages
+  created_date: 2023-05-29
+  url: https://arxiv.org/pdf/2305.18098v1.pdf
+  model_card: https://huggingface.co/James-WYang/BigTrans
+  modality: text
+  analysis: Reports results on standard translation benchmarks across 102 languages
+    in comparison with Google Translate and ChatGPT
+  size: 13B parameters (dense model)
+  dependencies: [LLaMA, CLUE, BigTrans parallel dataset]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: 16 A100 GPUs with 80 GB of RAM
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: Advancing future research in multilingual LLMs
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/James-WYang/BigTrans/discussions
diff --git a/assets/chatglm.yaml b/assets/chatglm.yaml
@@ -0,0 +1,24 @@
+---
+
+- type: model
+  name: ChatGLM
+  organization: ChatGLM
+  description: ChatGLM is a Chinese-English language model with question and answer
+    and dialogue functions, and is aimed at a Chinese audience.
+  created_date: 2023-03-14
+  url: https://chatglm.cn/blog
+  model_card: ''
+  modality: text
+  analysis: Performance evaluated on English and Chinese language benchmark tests.
+  size: 6B parameters
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''