add assets

stanford-crfm · Jun 4, 2024 · ef41eb2 · ef41eb2
1 parent 04c4735
commit ef41eb2
Show file tree

Hide file tree

Showing 30 changed files with 491 additions and 90 deletions.
diff --git a/assets/360.yaml b/assets/360.yaml
@@ -0,0 +1,22 @@
+---
+- type: model
+  name: 360 Zhinao
+  organization: 360 Security
+  description: 360 Zhinao is a multilingual LLM in Chinese and English with chat capabilities.
+  created_date: 2024-05-23
+  url: https://arxiv.org/pdf/2405.13386
+  model_card: none
+  modality: text; text
+  analysis: Achieved competitive performance on relevant benchmarks against other 7B models in Chinese, English, and coding tasks.
+  size: 7B parameters
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknwon
+  quality_control: ''
+  access: open
+  license: unknown
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: none
diff --git a/assets/adobe.yaml b/assets/adobe.yaml
@@ -101,7 +101,8 @@
 - type: dataset
   name: CulturaX
   organization: University of Oregon, Adobe
-  description: CulturaX is a substantial multilingual dataset with 6.3 trillion tokens in 167 languages, tailored for LLM development. 
+  description: CulturaX is a substantial multilingual dataset with 6.3 trillion
+    tokens in 167 languages, tailored for LLM development.
   created_date: 2023-09-17
   url: https://arxiv.org/pdf/2309.09400
   datasheet: https://huggingface.co/datasets/uonlp/CulturaX
@@ -116,6 +117,7 @@
   access: open
   license: mC4, OSCAR
   intended_uses: ''
-  prohibited_uses: The data must not be utilized for malicious or harmful purposes towards humanity.
+  prohibited_uses: The data must not be utilized for malicious or harmful purposes
+    towards humanity.
   monitoring: unknown
   feedback: https://huggingface.co/datasets/uonlp/CulturaX/discussions
diff --git a/assets/ai2.yaml b/assets/ai2.yaml
@@ -259,7 +259,8 @@
 - type: dataset
   name: MADLAD-400
   organization: AI2
-  description: MADLAD-400 is a document-level multilingual dataset based on Common Crawl, covering 419 languages in total.
+  description: MADLAD-400 is a document-level multilingual dataset based on Common
+    Crawl, covering 419 languages in total.
   created_date: 2023-09-09
   url: https://arxiv.org/abs/2309.04662
   datasheet: https://huggingface.co/datasets/allenai/MADLAD-400

diff --git a/assets/alibaba.yaml b/assets/alibaba.yaml
@@ -145,23 +145,34 @@
 - type: model
   name: SeaLLM v2.5
   organization: DAMO Academy, Alibaba
-  description: SeaLLM v2.5 is a multilingual large language model for Southeast Asian (SEA) languages.
+  description: SeaLLM v2.5 is a multilingual large language model for Southeast
+    Asian (SEA) languages.
   created_date: 2024-04-12
   url: https://github.com/DAMO-NLP-SG/SeaLLMs
   model_card: https://huggingface.co/SeaLLMs/SeaLLM-7B-v2.5
   modality: text; text
-  analysis: The model was evaluated on 3 benchmarks (MMLU for English, M3Exam (M3e) for English, Chinese, Vietnamese, Indonesian, and Thai, and VMLU for Vietnamese) and it outperformed GPT-3 and Vistral-7B-chat models across these benchmarks in the given languages.
+  analysis: The model was evaluated on 3 benchmarks (MMLU for English, M3Exam (M3e)
+    for English, Chinese, Vietnamese, Indonesian, and Thai, and VMLU for Vietnamese)
+    and it outperformed GPT-3 and Vistral-7B-chat models across these benchmarks
+    in the given languages.
   size: 7B parameters
   dependencies: [Gemma]
   training_emissions: unknown
   training_time: unknown
   training_hardware: unknown
-  quality_control: Despite efforts in red teaming and safety fine-tuning and enforcement, the creators suggest, developers and stakeholders should perform their own red teaming and provide related security measures before deployment, and they must abide by and comply with local governance and regulations.
+  quality_control: Despite efforts in red teaming and safety fine-tuning and enforcement,
+    the creators suggest, developers and stakeholders should perform their own red
+    teaming and provide related security measures before deployment, and they must
+    abide by and comply with local governance and regulations.
   access: open
   license:
     explanation: License can be found at https://huggingface.co/SeaLLMs/SeaLLM-13B-Chat/blob/main/LICENSE
     value: custom
-  intended_uses: The model is intended for multilingual tasks such as knowledge retrieval, math reasoning, and instruction following. Also, it could be used to provide multilingual assistance.
-  prohibited_uses: The model should not be used in a way that could lead to inaccurate, misleading or potentially harmful generation. Users should comply with local laws and regulations when deploying the model.
+  intended_uses: The model is intended for multilingual tasks such as knowledge
+    retrieval, math reasoning, and instruction following. Also, it could be used
+    to provide multilingual assistance.
+  prohibited_uses: The model should not be used in a way that could lead to inaccurate,
+    misleading or potentially harmful generation. Users should comply with local
+    laws and regulations when deploying the model.
   monitoring: unknown
   feedback: https://huggingface.co/SeaLLMs/SeaLLM-7B-v2.5/discussions
diff --git a/assets/apple.yaml b/assets/apple.yaml
@@ -25,21 +25,31 @@
 - type: model
   name: OpenELM
   organization: Apple
-  description: OpenELM is a family of Open-source Efficient Language Models. It uses a layer-wise scaling strategy to efficiently allocate parameters within each layer of the transformer model, leading to enhanced accuracy.
+  description: OpenELM is a family of Open-source Efficient Language Models. It
+    uses a layer-wise scaling strategy to efficiently allocate parameters within
+    each layer of the transformer model, leading to enhanced accuracy.
   created_date: 2024-04-24
   url: https://machinelearning.apple.com/research/openelm
   model_card: https://huggingface.co/apple/OpenELM-3B-Instruct
   modality: text; text
-  analysis: The models were evaluated in terms of zero-shot, LLM360, and OpenLLM leaderboard results. 
+  analysis: The models were evaluated in terms of zero-shot, LLM360, and OpenLLM
+    leaderboard results.
   size: 3B parameters
-  dependencies: [RefinedWeb, The Pile, RedPajama-Data, Dolma, CoreNet library]
+  dependencies:
+    - RefinedWeb
+    - The Pile
+    - RedPajama-Data
+    - Dolma
+    - CoreNet library
   training_emissions: unknown
   training_time: unknown
   training_hardware: unknown
   quality_control: unknown
   access: open
   license: Apple
-  intended_uses: To empower and enrich the open research community by providing access to state-of-the-art language models.
-  prohibited_uses: No explicit prohibited uses stated, though it is noted that users should undertake thorough safety testing.
+  intended_uses: To empower and enrich the open research community by providing
+    access to state-of-the-art language models.
+  prohibited_uses: No explicit prohibited uses stated, though it is noted that users
+    should undertake thorough safety testing.
   monitoring: none
   feedback: https://huggingface.co/apple/OpenELM-3B-Instruct/discussions
diff --git a/assets/cartesia.yaml b/assets/cartesia.yaml
@@ -0,0 +1,22 @@
+---
+- type: model
+  name: Sonic
+  organization: Cartesia
+  description: Sonic is a low-latency voice model that generates lifelike speech. Developed by Cartesia, it was designed to be an efficient real-time AI capable of processing any-sized contexts and running on any device.
+  created_date: 2024-05-29
+  url: https://cartesia.ai/blog/sonic
+  model_card: none
+  modality: text; audio
+  analysis: Extensive testing on Multilingual Librispeech dataset resulted in 20% lower validation perplexity. In downstream evaluations, this leads to a 2x lower word error rate and a 1 point higher quality score. Sonic also displays impressive performance metrics at inference, achieving lower latency (1.5x lower time-to-first-audio), faster inference speed (2x lower real-time factor), and higher throughput (4x). 
+  size: 2024-05-29
+  dependencies: [Multilingual Librispeech dataset]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: ''
+  access: limited
+  license: unknown
+  intended_uses: Sonic has potential applications across customer support, entertainment, and content creation and is a part of Cartesias broader mission to bring real-time multimodal intelligence to every device.
+  prohibited_uses: unknown
+  monitoring: unknown
+  feedback: Contact through the provided form or via email at [email protected].
diff --git a/assets/cohere.yaml b/assets/cohere.yaml
@@ -546,12 +546,14 @@
 - type: model
   name: Rerank 3
   organization: Cohere
-  description: Rerank 3 is a new foundation model for efficient enterprise search and retrieval with 4k context length.
+  description: Rerank 3 is a new foundation model for efficient enterprise search
+    and retrieval with 4k context length.
   created_date: 2024-04-11
   url: https://cohere.com/blog/rerank-3
   model_card: none
   modality: text; text
-  analysis: Evaluated on code retrieval and data retrieval capabilities, with improvements compared to the standard in both.
+  analysis: Evaluated on code retrieval and data retrieval capabilities, with improvements
+    compared to the standard in both.
   size: unknown
   dependencies: []
   training_emissions: unknown
@@ -560,7 +562,33 @@
   quality_control: ''
   access: limited
   license: unknown
-  intended_uses: Efficient enterprise search and retrieval. 
+  intended_uses: Efficient enterprise search and retrieval.
   prohibited_uses: ''
   monitoring: unknown
   feedback: none
+- type: model
+  name: Aya 23
+  organization: Cohere
+  description: Aya 23 is an open weights research release of an instruction fine-tuned
+    model with multilingual capabilities. It focuses on pairing a highly performant
+    pre-trained Command family of models with the recently released Aya Collection.
+    This model supports 23 languages.
+  created_date: 2024-05-31
+  url: https://arxiv.org/pdf/2405.15032
+  model_card: https://huggingface.co/CohereForAI/aya-23-35B
+  modality: text; text
+  analysis: Evaluated across 23 languages with the highest results in all tasks
+    and languages compared to other multilingual language models.
+  size: 35B parameters
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: unknown
+  access: open
+  license: CC-BY-NC
+  intended_uses: This model is designed to be used for multilingual tasks covering
+    23 languages.
+  prohibited_uses: unknown
+  monitoring: unknown
+  feedback: https://huggingface.co/CohereForAI/aya-23-35B/discussions
diff --git a/assets/deepmind.yaml b/assets/deepmind.yaml
@@ -684,3 +684,66 @@
   prohibited_uses: ''
   monitoring: ''
   feedback: none
+- type: model
+  name: Imagen 3
+  organization: Google DeepMind
+  description: Imagen 3 is a high-quality text-to-image model, capable of generating images with better detail, richer lighting, and fewer distracting artifacts compared to previous models. Improved understanding of prompts allows for a wide range of visual styles and captures small details from longer prompts. It also understands prompts written in natural, everyday language, making it easier to use. Imagen 3 is available in multiple versions, optimized for different types of tasks, from generating quick sketches to high-resolution images.
+  created_date: 2024-05-14
+  url: https://deepmind.google/technologies/imagen-3/
+  model_card: none
+  modality: text; image
+  analysis: The model was tested and evaluated on various prompts to assess its understanding of natural language, its ability to generate high-quality images in various formats and styles and generate fine details and complex textures. Red teaming and evaluations were conducted on topics including fairness, bias, and content safety. 
+  size: unknown
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: Extensive filtering and data labeling were used to minimize harmful content in datasets and reduce the likelihood of harmful outputs. Privacy, safety, and security technologies were leveraged in deploying the model, including watermarking tool SynthID.
+  access: limited
+  license: unknown
+  intended_uses: Generate high-quality images for various purposes, from photorealistic landscapes to textured oil paintings or whimsical claymation scenes. It is useful in situations where detailed visual representation is required based on the textual description.
+  prohibited_uses: unknown
+  monitoring: Through digital watermarking tool SynthID embedded in pixels for detection and identification.
+  feedback: unknown
+- type: model
+  name: Veo
+  organization: Google DeepMind
+  description: Veo is Google DeepMind's most capable video generation model to date. It generates high-quality, 1080p resolution videos that can go beyond a minute, in a wide range of cinematic and visual styles. It accurately captures the nuance and tone of a prompt, and provides an unprecedented level of creative control. The model is also capable of maintaining visual consistency in video frames, and supports masked editing.
+  created_date: 2024-05-14
+  url: https://deepmind.google/technologies/veo/
+  model_card: none
+  modality: text; video
+  analysis: unknown
+  size: unknown
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: Videos created by Veo are watermarked using SynthID, DeepMinds tool for watermarking and identifying AI-generated content, and passed through safety filters and memorization checking processes to mitigate privacy, copyright and bias risks.
+  access: limited
+  license: unknown
+  intended_uses: Veo is intended to help create tools that make video production accessible to everyone. It can be used by filmmakers, creators, or educators for storytelling, education and more. Some of its features will be also brought to products like YouTube Shorts.
+  prohibited_uses: unknown
+  monitoring: unknown
+  feedback: Feedback from leading creators and filmmakers is incorporated to improve Veo's generative video technologies.
+- type: model
+  name: Gemini 1.5 Flash
+  organization: Google DeepMind
+  description: Gemini Flash is a lightweight model, optimized for speed and efficiency. It features multimodal reasoning and a breakthrough long context window of up to one million tokens. It's designed to serve at scale and is efficient on cost, providing quality results at a fraction of the cost of larger models.
+  created_date: 2024-05-30
+  url: https://deepmind.google/technologies/gemini/flash/
+  model_card: none
+  modality: audio, image, text, video; text
+  analysis: The model was evaluated on various benchmarks like General MMLU, Code Natural2Code, MATH, GPQA, Big-Bench, WMT23, MMMU, and MathVista providing performance across various domains like multilingual translation, image processing, and code generation.
+  size: unknown
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: The research team is continually exploring new ideas at the frontier of AI and building innovative products for consistent progress.
+  access: limited
+  license: Googles Terms and Conditions
+  intended_uses: The model is intended for developer and enterprise use cases. It can process hours of video and audio, and hundreds of thousands of words or lines of code, making it beneficial for a wide range of tasks.
+  prohibited_uses: ''
+  monitoring: unknown
+  feedback: none
diff --git a/assets/eleutherai.yaml b/assets/eleutherai.yaml
@@ -299,12 +299,17 @@
 - type: model
   name: Pile-T5
   organization: EleutherAI
-  description: Pile-T5 is a version of the broadly used T5 model, but improved to eliminate weaknesses such as the omission of crucial code-related tokens. It utilizes LLaMA tokenizer and is trained on the Pile, offering enhancements for finetuning on downstream tasks, particularly those involving code.
+  description: Pile-T5 is a version of the broadly used T5 model, but improved to
+    eliminate weaknesses such as the omission of crucial code-related tokens. It
+    utilizes LLaMA tokenizer and is trained on the Pile, offering enhancements for
+    finetuning on downstream tasks, particularly those involving code.
   created_date: 2024-04-15
   url: https://blog.eleuther.ai/pile-t5/
   model_card: none
   modality: text; text
-  analysis: The models were evaluated on SuperGLUE, CodeXGLUE, as well as MMLU and Bigbench Hard. Comparisons were made with T5v1.1 and found that Pile-T5 models performed better in most conditions.
+  analysis: The models were evaluated on SuperGLUE, CodeXGLUE, as well as MMLU and
+    Bigbench Hard. Comparisons were made with T5v1.1 and found that Pile-T5 models
+    performed better in most conditions.
   size: unknown
   dependencies: [The Pile, T5x, LLaMA, umT5]
   training_emissions: unknown
@@ -313,7 +318,8 @@
   quality_control: ''
   access: open
   license: unknown
-  intended_uses: The model is aimed at downstream tasks that benefit from the encoder-decoder architecture. Particularly useful for tasks involving code.
+  intended_uses: The model is aimed at downstream tasks that benefit from the encoder-decoder
+    architecture. Particularly useful for tasks involving code.
   prohibited_uses: unknown
   monitoring: unknown
   feedback: unknown
diff --git a/assets/fuse.yaml b/assets/fuse.yaml
@@ -2,12 +2,17 @@
 - type: model
   name: FuseChat
   organization: FuseAI
-  description: FuseChat is a powerful chat Language Learning Model (LLM) that integrates multiple structure and scale-varied chat LLMs using a fuse-then-merge strategy. The fusion is done using two stages
+  description: FuseChat is a powerful chat Language Learning Model (LLM) that integrates
+    multiple structure and scale-varied chat LLMs using a fuse-then-merge strategy.
+    The fusion is done using two stages
   created_date: 2024-02-26
   url: https://arxiv.org/abs/2402.16107
   model_card: https://huggingface.co/FuseAI/FuseChat-7B-VaRM
   modality: text; text
-  analysis: The FuseChat model was evaluated on MT-Bench which comprises 80 multi-turn dialogues spanning writing, roleplay, reasoning, math, coding, stem, and humanities domains. It yields an average performance of 66.52 with specific scores for individual domains available in the leaderboard results.
+  analysis: The FuseChat model was evaluated on MT-Bench which comprises 80 multi-turn
+    dialogues spanning writing, roleplay, reasoning, math, coding, stem, and humanities
+    domains. It yields an average performance of 66.52 with specific scores for
+    individual domains available in the leaderboard results.
   size: 7B parameters
   dependencies: [Nous Hermes 2, OpenChat 3.5]
   training_emissions: unknown
@@ -16,7 +21,9 @@
   quality_control: none
   access: open
   license: Apache 2.0
-  intended_uses: FuseChat is intended to be used as a powerful chat bot that takes in text inputs and provides text-based responses. It can be utilized in a variety of domains including writing, roleplay, reasoning, math, coding, stem, and humanities.
+  intended_uses: FuseChat is intended to be used as a powerful chat bot that takes
+    in text inputs and provides text-based responses. It can be utilized in a variety
+    of domains including writing, roleplay, reasoning, math, coding, stem, and humanities.
   prohibited_uses: unknown
   monitoring: unknown
   feedback: https://huggingface.co/FuseAI/FuseChat-7B-VaRM/discussions