From a404acec1fee392d1c55f3b6cbe18267dff3cbc1 Mon Sep 17 00:00:00 2001 From: jxue16 <105090474+jxue16@users.noreply.github.com> Date: Fri, 7 Jul 2023 18:42:48 -0700 Subject: [PATCH 01/10] add all assets --- assets/ares.yaml | 25 +++++++++++++++++++++++++ assets/autogpt.yaml | 23 +++++++++++++++++++++++ assets/bark.yaml | 24 ++++++++++++++++++++++++ assets/bedrock.yaml | 24 ++++++++++++++++++++++++ assets/beit.yaml | 25 +++++++++++++++++++++++++ assets/bigtrans.yaml | 25 +++++++++++++++++++++++++ assets/biomedgpt.yaml | 30 ++++++++++++++++++++++++++++++ assets/camel.yaml | 27 +++++++++++++++++++++++++++ assets/cformers.yaml | 23 +++++++++++++++++++++++ assets/chatglm.yaml | 24 ++++++++++++++++++++++++ assets/corgi.yaml | 27 +++++++++++++++++++++++++++ assets/cosmo.yaml | 25 +++++++++++++++++++++++++ assets/deepfloyd.yaml | 26 ++++++++++++++++++++++++++ assets/docugami.yaml | 24 ++++++++++++++++++++++++ assets/falcon.yaml | 25 +++++++++++++++++++++++++ assets/goat.yaml | 27 +++++++++++++++++++++++++++ assets/gorilla.yaml | 27 +++++++++++++++++++++++++++ assets/guanaco.yaml | 26 ++++++++++++++++++++++++++ assets/h2ogpt.yaml | 23 +++++++++++++++++++++++ assets/inflection-1.yaml | 25 +++++++++++++++++++++++++ assets/jointslm.yaml | 25 +++++++++++++++++++++++++ assets/koala.yaml | 24 ++++++++++++++++++++++++ assets/legomt.yaml | 24 ++++++++++++++++++++++++ assets/magma.yaml | 24 ++++++++++++++++++++++++ assets/mpt.yaml | 24 ++++++++++++++++++++++++ assets/multimodal.yaml | 24 ++++++++++++++++++++++++ assets/nextdoor.yaml | 24 ++++++++++++++++++++++++ assets/openflamingo.yaml | 25 +++++++++++++++++++++++++ assets/openfold.yaml | 23 +++++++++++++++++++++++ assets/openllama.yaml | 23 +++++++++++++++++++++++ assets/otter.yaml | 27 +++++++++++++++++++++++++++ assets/palm2.yaml | 30 ++++++++++++++++++++++++++++++ assets/palmyra.yaml | 26 ++++++++++++++++++++++++++ assets/peer.yaml | 28 ++++++++++++++++++++++++++++ assets/pi.yaml | 23 +++++++++++++++++++++++ assets/portkey.yaml | 23 +++++++++++++++++++++++ assets/pythia.yaml | 25 +++++++++++++++++++++++++ assets/redpajama.yaml | 24 ++++++++++++++++++++++++ assets/refinedweb.yaml | 25 +++++++++++++++++++++++++ assets/roentgen.yaml | 25 +++++++++++++++++++++++++ assets/santacoder.yaml | 24 ++++++++++++++++++++++++ assets/soda.yaml | 26 ++++++++++++++++++++++++++ assets/stablelm.yaml | 23 +++++++++++++++++++++++ assets/stack.yaml | 26 ++++++++++++++++++++++++++ assets/starcoder.yaml | 28 ++++++++++++++++++++++++++++ assets/transformify.yaml | 23 +++++++++++++++++++++++ assets/underwritegpt.yaml | 22 ++++++++++++++++++++++ assets/unilm.yaml | 24 ++++++++++++++++++++++++ assets/vicuna.yaml | 24 ++++++++++++++++++++++++ assets/voicebox.yaml | 25 +++++++++++++++++++++++++ assets/vqganclip.yaml | 25 +++++++++++++++++++++++++ assets/wizardlm.yaml | 28 ++++++++++++++++++++++++++++ 52 files changed, 1299 insertions(+) create mode 100644 assets/ares.yaml create mode 100644 assets/autogpt.yaml create mode 100644 assets/bark.yaml create mode 100644 assets/bedrock.yaml create mode 100644 assets/beit.yaml create mode 100644 assets/bigtrans.yaml create mode 100644 assets/biomedgpt.yaml create mode 100644 assets/camel.yaml create mode 100644 assets/cformers.yaml create mode 100644 assets/chatglm.yaml create mode 100644 assets/corgi.yaml create mode 100644 assets/cosmo.yaml create mode 100644 assets/deepfloyd.yaml create mode 100644 assets/docugami.yaml create mode 100644 assets/falcon.yaml create mode 100644 assets/goat.yaml create mode 100644 assets/gorilla.yaml create mode 100644 assets/guanaco.yaml create mode 100644 assets/h2ogpt.yaml create mode 100644 assets/inflection-1.yaml create mode 100644 assets/jointslm.yaml create mode 100644 assets/koala.yaml create mode 100644 assets/legomt.yaml create mode 100644 assets/magma.yaml create mode 100644 assets/mpt.yaml create mode 100644 assets/multimodal.yaml create mode 100644 assets/nextdoor.yaml create mode 100644 assets/openflamingo.yaml create mode 100644 assets/openfold.yaml create mode 100644 assets/openllama.yaml create mode 100644 assets/otter.yaml create mode 100644 assets/palm2.yaml create mode 100644 assets/palmyra.yaml create mode 100644 assets/peer.yaml create mode 100644 assets/pi.yaml create mode 100644 assets/portkey.yaml create mode 100644 assets/pythia.yaml create mode 100644 assets/redpajama.yaml create mode 100644 assets/refinedweb.yaml create mode 100644 assets/roentgen.yaml create mode 100644 assets/santacoder.yaml create mode 100644 assets/soda.yaml create mode 100644 assets/stablelm.yaml create mode 100644 assets/stack.yaml create mode 100644 assets/starcoder.yaml create mode 100644 assets/transformify.yaml create mode 100644 assets/underwritegpt.yaml create mode 100644 assets/unilm.yaml create mode 100644 assets/vicuna.yaml create mode 100644 assets/voicebox.yaml create mode 100644 assets/vqganclip.yaml create mode 100644 assets/wizardlm.yaml diff --git a/assets/ares.yaml b/assets/ares.yaml new file mode 100644 index 00000000..3af17a66 --- /dev/null +++ b/assets/ares.yaml @@ -0,0 +1,25 @@ +--- + +- type: application + name: ARES + organization: Faraday Lab + description: ARES is a text-to-image generator based on Stable Diffusion. The goal is to provide a simple tool with a user interface allowing mainstream AI access for artists and creators. + created_date: + value: 2023-04-26 + explanation: The date Faraday Lab's hugging face card was last updated. Date + for ARES release is not published on site. + url: https://faradaylab.fr/ + dependencies: [Stable Diffusion] + adaptation: '' + output_space: generated images + quality_control: '' + access: open + license: unknown + terms_of_service: '' + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' + user_distribution: '' + failures: '' diff --git a/assets/autogpt.yaml b/assets/autogpt.yaml new file mode 100644 index 00000000..5a3a040f --- /dev/null +++ b/assets/autogpt.yaml @@ -0,0 +1,23 @@ +--- + +- type: application + name: Auto-GPT + organization: Auto-GPT + description: Auto-GPT is an experimental open-source application showcasing the + capabilities of the GPT-4 language model. + created_date: 2023-04-16 + url: https://news.agpt.co/ + dependencies: [GPT-4 API] + adaptation: GPT-4 adapted to run autonomously by chaining together LLM "thoughts" + output_space: text + quality_control: '' + access: open + license: MIT + terms_of_service: '' + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' + user_distribution: '' + failures: '' diff --git a/assets/bark.yaml b/assets/bark.yaml new file mode 100644 index 00000000..6e277fb6 --- /dev/null +++ b/assets/bark.yaml @@ -0,0 +1,24 @@ +--- + +- type: model + name: Bark + organization: Suno + description: Bark is a text-to-audio model that can generate multilingual speech + as well as other noises. + created_date: 2023-04-20 + url: https://github.com/suno-ai/bark + model_card: https://github.com/suno-ai/bark/blob/main/model-card.md + modality: text input with audio output + analysis: '' + size: '' + dependencies: [AudioLM] + training_emissions: unknown + training_time: unknown + training_hardware: '' + quality_control: '' + access: open + license: MIT + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/spaces/suno/bark/discussions diff --git a/assets/bedrock.yaml b/assets/bedrock.yaml new file mode 100644 index 00000000..1022592a --- /dev/null +++ b/assets/bedrock.yaml @@ -0,0 +1,24 @@ +--- + +- type: application + name: Bedrock + organization: Amazon + description: Bedrock is a new service that makes FMs from AI21 Labs, Anthropic, Stability AI, and Amazon accessible via an API. Bedrock is intended for customers to build and scale generative AI-based applications using FMs, democratizing access for all builders. + using an API. + created_date: 2023-04-13 + url: https://aws.amazon.com/bedrock/ + dependencies: [Jurassic-2, Claude, Stable Diffusion, Amazon Titan] + adaptation: '' + output_space: foundation models made accessible via an API + quality_control: '' + access: limited + license: unknown + terms_of_service: https://aws.amazon.com/service-terms/ + intended_uses: allowing companies to incorporate generative AI into their business + models + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' + user_distribution: '' + failures: '' diff --git a/assets/beit.yaml b/assets/beit.yaml new file mode 100644 index 00000000..adabec71 --- /dev/null +++ b/assets/beit.yaml @@ -0,0 +1,25 @@ +--- + +- type: model + name: BEiT-3 + organization: Microsoft + description: BEiT-3 is a general-purpose multimodal foundation model for vision + and vision-language tasks. + created_date: 2022-08-31 + url: https://arxiv.org/pdf/2208.10442.pdf + model_card: '' + modality: text and image input and output + analysis: Evaluated on a range of standardized vision benchmarks, and achieves + state of the art performance on all experimentally. + size: 1.9B parameters + dependencies: [Multiway Transformer network] + training_emissions: unknown + training_time: '' + training_hardware: '' + quality_control: '' + access: open + license: '' + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/bigtrans.yaml b/assets/bigtrans.yaml new file mode 100644 index 00000000..0634f801 --- /dev/null +++ b/assets/bigtrans.yaml @@ -0,0 +1,25 @@ +--- + +- type: model + name: BigTrans + organization: Institute of Automation Chinese Academy of Sciences + description: BigTrans is a model which adapts LLaMA that covers only 20 languages + and enhances it with multilingual translation capability on more than 100 languages + created_date: 2023-05-29 + url: https://arxiv.org/pdf/2305.18098v1.pdf + model_card: https://huggingface.co/James-WYang/BigTrans + modality: text + analysis: Reports results on standard translation benchmarks across 102 languages + in comparison with Google Translate and ChatGPT + size: 13B parameters (dense model) + dependencies: [LLaMA, CLUE, BigTrans parallel dataset] + training_emissions: unknown + training_time: unknown + training_hardware: 16 A100 GPUs with 80 GB of RAM + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: Advancing future research in multilingual LLMs + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/James-WYang/BigTrans/discussions diff --git a/assets/biomedgpt.yaml b/assets/biomedgpt.yaml new file mode 100644 index 00000000..9e8df418 --- /dev/null +++ b/assets/biomedgpt.yaml @@ -0,0 +1,30 @@ +--- + +- type: model + name: BiomedGPT + organization: Lehigh University + description: BiomedGPT leverages self-supervision on large and diverse datasets + to accept multi-modal inputs and perform a range of downstream tasks. + created_date: 2023-05-26 + url: https://arxiv.org/pdf/2305.17100.pdf + model_card: '' + modality: image and text input, text output + analysis: outperforms majority of preceding state-of-the-art models over 15 unique + biomedical modalities. + size: 472M parameters + dependencies: + - GPT-style autoregressive decoder + - BiomedGPT biomedical datasets + training_emissions: unknown + training_time: unknown + training_hardware: 10 NVIDIA A5000 GPUs + quality_control: No specific quality control is mentioned in model training, though + details on data processing and how the model was trained are provided in the + paper. + access: open + license: Apache 2.0 + intended_uses: furthering research in developing unified and generalist models + for biomedicine. + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/camel.yaml b/assets/camel.yaml new file mode 100644 index 00000000..3ac188dd --- /dev/null +++ b/assets/camel.yaml @@ -0,0 +1,27 @@ +--- + +- type: model + name: Camel + organization: Writer + description: Camel is an instruction-following large language model tailored + for advanced NLP and comprehension capabilities. + created_date: + value: 2023-04-01 + explanation: The model was stated to be published in April, but which day is + not specified on the website. + url: https://chatcamel.vercel.app/ + model_card: https://huggingface.co/Writer/camel-5b-hf + modality: text + analysis: '' + size: 5B parameters + dependencies: [Palmyra, Camel dataset] + training_emissions: unknown + training_time: unknown + training_hardware: '' + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/Writer/camel-5b-hf/discussions diff --git a/assets/cformers.yaml b/assets/cformers.yaml new file mode 100644 index 00000000..f2b8fe84 --- /dev/null +++ b/assets/cformers.yaml @@ -0,0 +1,23 @@ +--- + +- type: application + name: Cformers + organization: Nolano + description: Cformers is a set of transformers that act as an API for AI inference + in code. + created_date: 2023-03-19 + url: https://www.nolano.org/services/Cformers/ + dependencies: [] + adaptation: '' + output_space: '' + quality_control: '' + access: limited + license: MIT + terms_of_service: '' + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' + user_distribution: '' + failures: '' diff --git a/assets/chatglm.yaml b/assets/chatglm.yaml new file mode 100644 index 00000000..f41b3a75 --- /dev/null +++ b/assets/chatglm.yaml @@ -0,0 +1,24 @@ +--- + +- type: model + name: ChatGLM + organization: ChatGLM + description: ChatGLM is a Chinese-English language model with question and answer + and dialogue functions, and is aimed at a Chinese audience. + created_date: 2023-03-14 + url: https://chatglm.cn/blog + model_card: '' + modality: text + analysis: Performance evaluated on English and Chinese language benchmark tests. + size: 6B parameters + dependencies: [GLM-130B] + training_emissions: unknown + training_time: unknown + training_hardware: '' + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/corgi.yaml b/assets/corgi.yaml new file mode 100644 index 00000000..968b1228 --- /dev/null +++ b/assets/corgi.yaml @@ -0,0 +1,27 @@ +--- + +- type: model + name: CORGI + organization: Stanford + description: Model trained to generate language corrections for physical control + tasks. + created_date: 2023-06-12 + url: https://arxiv.org/pdf/2306.07012.pdf + model_card: '' + modality: trajectory input with text output + analysis: Evaluated on three physical control tasks, drawing, steering, and human + body movement on various dynamics + size: 124M parameters + dependencies: [GPT-2, BABEL, text-davinci-003] + training_emissions: '' + training_time: + value: unknown + explanation: The authors do not report the training time, but do report that they train for 200 epochs. + training_hardware: one NVIDIA A40 GPU + quality_control: '' + access: open + license: MIT + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/cosmo.yaml b/assets/cosmo.yaml new file mode 100644 index 00000000..6b5230a4 --- /dev/null +++ b/assets/cosmo.yaml @@ -0,0 +1,25 @@ +--- + +- type: model + name: COSMO + organization: Allen Institute for AI + description: COSMO is a conversation agent with greater generalizability on both + in- and out-of-domain chitchat datasets + created_date: 2023-05-24 + url: https://arxiv.org/pdf/2212.10465.pdf + model_card: https://huggingface.co/allenai/cosmo-xl + modality: text + analysis: Evaluated by human testers on generalization capabilities and responses + compared to other chatbots. + size: 11B parameters + dependencies: [SODA, ProsocialDialog, T5] + training_emissions: unknown + training_time: unknown + training_hardware: v3-128 TPU accelerators with batch size 256 + quality_control: '' + access: open + license: '' + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/allenai/cosmo-xl/discussions diff --git a/assets/deepfloyd.yaml b/assets/deepfloyd.yaml new file mode 100644 index 00000000..b04b90ae --- /dev/null +++ b/assets/deepfloyd.yaml @@ -0,0 +1,26 @@ +--- + +- type: model + name: DeepFloyd IF + organization: Stability AI + description: A text-to-image cascaded pixel diffusion model released in conjunction + with AI research lab DeepFloyd. + created_date: 2023-04-28 + url: https://stability.ai/blog/deepfloyd-if-text-to-image-model + model_card: https://huggingface.co/DeepFloyd/IF-I-XL-v1.0 + modality: textual input with image output + analysis: Evaluated on the COCO dataset. + size: 4.3B parameters + dependencies: [LAION-5B] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: open + license: + value: custom + explanation: License agreement can be found on model card https://huggingface.co/DeepFloyd/IF-I-XL-v1.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/DeepFloyd/IF-I-XL-v1.0/discussions diff --git a/assets/docugami.yaml b/assets/docugami.yaml new file mode 100644 index 00000000..b1b56bf6 --- /dev/null +++ b/assets/docugami.yaml @@ -0,0 +1,24 @@ +--- + +- type: model + name: Docugami + organization: Microsoft + description: Docugami is a LLM focused on writing business documents and data + using generative AI. + created_date: 2021-04-12 + url: https://www.docugami.com/generative-ai + model_card: '' + modality: text + analysis: '' + size: 20B parameters + dependencies: [] + training_emissions: unknown + training_time: unknown + training_hardware: '' + quality_control: '' + access: limited + license: '' + intended_uses: analyzing, writing, and connecting business documents and data + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/falcon.yaml b/assets/falcon.yaml new file mode 100644 index 00000000..b1fede02 --- /dev/null +++ b/assets/falcon.yaml @@ -0,0 +1,25 @@ +--- + +- type: model + name: Falcon + organization: UAE Technology Innovation Institute + description: Falcon-40B is a 40B parameters causal decoder-only model built by + TII and trained on 1,000B tokens of RefinedWeb enhanced with curated corpora. + created_date: 2023-06-14 + url: https://huggingface.co/tiiuae/falcon-40b + model_card: https://huggingface.co/tiiuae/falcon-40b + modality: natural language text + analysis: '' + size: 40B parameters + dependencies: [RefinedWeb] + training_emissions: '' + training_time: 2 months + training_hardware: 384 A100 40GB GPUs + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: irresponsible or harmful use or production use without adequate + assessment of risks and mitigation + monitoring: None + feedback: https://huggingface.co/tiiuae/falcon-40b/discussions diff --git a/assets/goat.yaml b/assets/goat.yaml new file mode 100644 index 00000000..5e0ab85c --- /dev/null +++ b/assets/goat.yaml @@ -0,0 +1,27 @@ +--- + +- type: model + name: GOAT + organization: National University of Singapore + description: GOAT is a fine-tuned LLaMA model which uses the tokenization of numbers + to significantly outperform benchmark standards on a range of arithmetic tasks. + created_date: 2023-05-23 + url: https://arxiv.org/pdf/2305.14201.pdf + model_card: none + modality: textual input, numerical data output + analysis: Performance assessed on BIG-bench arithmetic sub-task, and various elementary + arithmetic tasks. + size: 7B parameters + dependencies: [LLaMA, GOAT dataset] + training_emissions: unknown + training_time: unknown + training_hardware: 24 GB VRAM GPU + quality_control: Number data is randomly generated from log space to reduce likelihood + of redundancy and range of magnitudes. + access: open + license: Apache 2.0 + intended_uses: Integration into other instruction-tuned LLMs to further enhance + arithmetic reasoning abilities in solving math word problems. + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/gorilla.yaml b/assets/gorilla.yaml new file mode 100644 index 00000000..d6c440d4 --- /dev/null +++ b/assets/gorilla.yaml @@ -0,0 +1,27 @@ +--- + +- type: model + name: Gorilla + organization: UC Berkeley + description: Gorilla is a finetuned LLaMA-based model that surpasses the performance + of GPT-4 on writing API calls. + created_date: 2023-05-24 + url: https://arxiv.org/pdf/2305.15334v1.pdf + model_card: '' + modality: outputs API from natural language input + analysis: Evaluated using AST sub-tree matching technique and compared to other + models in terms of API functionality accuracy. + size: 7B parameters + dependencies: [LLaMA, Gorilla document retriever] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: No specific quality control is mentioned in model training, though + details on data processing and collection are provided in the paper. + access: open + license: Apache 2.0 + intended_uses: In conjunction with a LLM to improve its capability for using API + calls. + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/guanaco.yaml b/assets/guanaco.yaml new file mode 100644 index 00000000..32208f00 --- /dev/null +++ b/assets/guanaco.yaml @@ -0,0 +1,26 @@ +--- + +- type: model + name: Guanaco + organization: University of Washington + description: Guanaco is a model family trained with QLORA, an efficient finetuning + approach that reduces memory usage enough to finetune a 65B parameter model + on a single 48GB GPU while preserving full 16-bit finetuning task performance. + created_date: 2023-05-23 + url: https://arxiv.org/pdf/2305.14314v1.pdf + model_card: '' + modality: natural language text + analysis: Reports results on the Vicuna benchmark and compares performance level + and time expenditure with ChatGPT + size: 33B parameters + dependencies: [QLoRA, OASST1] + training_emissions: '' + training_time: '' + training_hardware: A single 24 GB GPU + quality_control: '' + access: open + license: MIT + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/h2ogpt.yaml b/assets/h2ogpt.yaml new file mode 100644 index 00000000..601e629d --- /dev/null +++ b/assets/h2ogpt.yaml @@ -0,0 +1,23 @@ +--- + +- type: model + name: h2oGPT + organization: H2O AI + description: Series of models fine-tuned on well-known LLMs using the h2oGPT repositories. + created_date: 2023-06-16 + url: https://arxiv.org/pdf/2306.08161.pdf + model_card: https://huggingface.co/h2oai/h2ogpt-oasst1-512-20b + modality: text + analysis: Evaluated on EleutherAI evaluation harness. + size: 20B parameters + dependencies: [GPT-NeoX, H2O AI OpenAssistant, h2oGPT Repositories] + training_emissions: unknown + training_time: unknown + training_hardware: unspecified number of 48GB A100 NVIDIA GPUs + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/h2oai/h2ogpt-oasst1-512-20b/discussions diff --git a/assets/inflection-1.yaml b/assets/inflection-1.yaml new file mode 100644 index 00000000..1af3f04e --- /dev/null +++ b/assets/inflection-1.yaml @@ -0,0 +1,25 @@ +--- + +- type: model + name: Inflection-1 + organization: Inflection AI + description: Inflection AI's in-house LLM, which powers Pi and will soon be available via Inflection AI's conversational API. + created_date: 2023-06-22 + url: https://inflection.ai/inflection-1 + model_card: '' + modality: text + analysis: Evaluated on wide range of language benchmarks like MMLU 5-shot, GSM-8K, and HellaSwag 10-shot among others. + size: + value: unknown + explanation: Designed to be roughly around Chat-GPT's size (175B parameters). + dependencies: [] + training_emissions: '' + training_time: '' + training_hardware: unknown + quality_control: '' + access: limited + license: unknown + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/jointslm.yaml b/assets/jointslm.yaml new file mode 100644 index 00000000..0465fc82 --- /dev/null +++ b/assets/jointslm.yaml @@ -0,0 +1,25 @@ +--- + +- type: model + name: Google Joint SLM + organization: Google + description: Joint speech and language model using a Speech2Text adapter and using + a CTC-based blank-filtering. + created_date: 2023-06-08 + url: https://arxiv.org/pdf/2306.07944.pdf + model_card: '' + modality: speech input with textual output + analysis: evaluated on DSTC11 Challenge Task, based on MultiWoz 2.1, with a focus + on dialog state tracking. + size: '' + dependencies: [CTC blank-filtering, Speech2Text adapter] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: open + license: Google + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/koala.yaml b/assets/koala.yaml new file mode 100644 index 00000000..03b456c7 --- /dev/null +++ b/assets/koala.yaml @@ -0,0 +1,24 @@ +--- + +- type: model + name: Koala + organization: Berkeley + description: A relatively small chatbot trained by fine-tuning Meta’s LLaMA on + dialogue data gathered from the web. + created_date: 2023-04-03 + url: https://bair.berkeley.edu/blog/2023/04/03/koala/ + model_card: https://huggingface.co/TheBloke/koala-7B-GPTQ-4bit-128g + modality: natural language text + analysis: Evaluated in comparison with ChatGPT and Stanford Alpaca. + size: 13B parameters + dependencies: [LLaMA, web-scraped dialogue data] + training_emissions: '' + training_time: 6 hours + training_hardware: 8 A100 GPUs + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: academic research + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/TheBloke/koala-7B-GPTQ-4bit-128g/discussions diff --git a/assets/legomt.yaml b/assets/legomt.yaml new file mode 100644 index 00000000..fb7a3353 --- /dev/null +++ b/assets/legomt.yaml @@ -0,0 +1,24 @@ +--- + +- type: model + name: Lego-MT + organization: Shanghai AI Laboratory + description: Lego-MT is a multilingual large language model which uses a more + efficient approach of being an effective detachable model. + created_date: 2023-05-29 + url: https://arxiv.org/pdf/2212.10551.pdf + model_card: '' + modality: multilingual text + analysis: Evaluated based on own constructed dataset covering 433 languages. + size: 1.2B parameters + dependencies: [OPUS] + training_emissions: unknown + training_time: 15 days + training_hardware: 32 A100 GPUs + quality_control: '' + access: open + license: '' + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/magma.yaml b/assets/magma.yaml new file mode 100644 index 00000000..7b290942 --- /dev/null +++ b/assets/magma.yaml @@ -0,0 +1,24 @@ +--- + +- type: model + name: MAGMA + organization: Aleph Alpha + description: An autoregressive VL model that is able to generate text from an + arbitrary combination of visual and textual input + created_date: 2022-10-24 + url: https://arxiv.org/pdf/2112.05253.pdf + model_card: '' + modality: image and text input with natural language text output + analysis: Evaluated on the OKVQA benchmark as a fully open-ended generative task. + size: 6B parameters + dependencies: [GPT-J, CLIP] + training_emissions: '' + training_time: '' + training_hardware: 32 A100 GPUs + quality_control: '' + access: open + license: MIT + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/mpt.yaml b/assets/mpt.yaml new file mode 100644 index 00000000..5d97e67e --- /dev/null +++ b/assets/mpt.yaml @@ -0,0 +1,24 @@ +--- + +- type: model + name: MPT + organization: Mosaic + description: MPT is a series of large language models seeking to address the limitations + of other open source models like LLaMA and Pythia. + created_date: 2023-05-05 + url: https://www.mosaicml.com/blog/mpt-7b + model_card: '' + modality: text + analysis: Evaluated on a range of benchmarks and performed on par with LLaMA-7B. + size: 7B parameters + dependencies: [] + training_emissions: unknown + training_time: 9.5 days + training_hardware: 440 A100 40GB GPUs + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/multimodal.yaml b/assets/multimodal.yaml new file mode 100644 index 00000000..4f7727cc --- /dev/null +++ b/assets/multimodal.yaml @@ -0,0 +1,24 @@ +--- + +- type: dataset + name: Multimodal C4 + organization: Allen Institute for AI + description: An augmentation of C4 with images added and made openly available. + created_date: 2023-06-09 + url: https://arxiv.org/pdf/2304.06939.pdf + datasheet: '' + modality: text and images + size: 43B English tokens with 101.2M documents and 571M images + sample: [] + analysis: Conducted experiments on models trained with Multimodal C4 in comparison + to models trained on single image/caption datasets + dependencies: [C4] + included: '' + excluded: '' + quality_control: '' + access: open + license: MIT + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/nextdoor.yaml b/assets/nextdoor.yaml new file mode 100644 index 00000000..0dfafe79 --- /dev/null +++ b/assets/nextdoor.yaml @@ -0,0 +1,24 @@ +--- + +- type: application + name: Nextdoor Assistant + organization: Nextdoor + description: AI chatbot on Nextdoor that helps users write more clear and conscientious + posts. + created_date: 2023-05-02 + url: https://help.nextdoor.com/s/article/Introducing-Assistant + dependencies: [ChatGPT] + adaptation: '' + output_space: natural language text guidance + quality_control: '' + access: open + license: unknown + terms_of_service: '' + intended_uses: to be used to help make the Nextdoor experience more positive for + users + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' + user_distribution: '' + failures: '' diff --git a/assets/openflamingo.yaml b/assets/openflamingo.yaml new file mode 100644 index 00000000..1a79d3db --- /dev/null +++ b/assets/openflamingo.yaml @@ -0,0 +1,25 @@ +--- + +- type: model + name: OpenFlamingo + organization: LAION + description: An open-source reproduction of DeepMind's Flamingo model. At its + core, OpenFlamingo is a framework that enables training and evaluation of large + multimodal models (LMMs). + created_date: 2023-03-28 + url: https://laion.ai/blog/open-flamingo/ + model_card: https://github.com/mlfoundations/open_flamingo/blob/main/MODEL_CARD.md + modality: image and text input with textual output + analysis: Evaluated on COCO captioning and VQAv2 vision-language tasks. + size: 9B parameters + dependencies: [LLaMA, CLIP] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: open + license: MIT + intended_uses: academic research purposes + prohibited_uses: commercial use + monitoring: '' + feedback: '' diff --git a/assets/openfold.yaml b/assets/openfold.yaml new file mode 100644 index 00000000..5723593a --- /dev/null +++ b/assets/openfold.yaml @@ -0,0 +1,23 @@ +--- + +- type: model + name: OpenFold + organization: Columbia University + description: OpenFold is an open source recreation of AlphaFold2. + created_date: 2022-11-20 + url: https://www.biorxiv.org/content/10.1101/2022.11.20.517210v2 + model_card: '' + modality: protein structures + analysis: Evaluated on wide range of tasks using own evaluation benchmarks. + size: '' + dependencies: [AlphaFold2, OpenProteinSet] + training_emissions: unknown + training_time: 50,000 GPU hours + training_hardware: Single A100 NVIDIA GPU + quality_control: '' + access: open + license: CC BY 4.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/openllama.yaml b/assets/openllama.yaml new file mode 100644 index 00000000..a43c19cb --- /dev/null +++ b/assets/openllama.yaml @@ -0,0 +1,23 @@ +--- + +- type: model + name: OpenLLaMA + organization: Berkeley AI Research + description: OpenLlama is a open source reproduction of Meta's LLaMA model. + created_date: 2023-05-03 + url: https://github.com/openlm-research/open_llama + model_card: '' + modality: text + analysis: Evaluated on wide range of tasks using own evaluation benchmarks. + size: 17B parameters + dependencies: [LLaMA, RedPajama] + training_emissions: unknown + training_time: unknown + training_hardware: '' + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/otter.yaml b/assets/otter.yaml new file mode 100644 index 00000000..02f40c24 --- /dev/null +++ b/assets/otter.yaml @@ -0,0 +1,27 @@ +--- + +- type: model + name: Otter + organization: Nanyang Technological University + description: Otter is a multi-modal model based on OpenFlamingo (open-sourced + version of DeepMind’s Flamingo), trained on MIMIC-IT and showcasing improved + instruction-following ability and in-context learning. + created_date: 2023-05-05 + url: https://arxiv.org/pdf/2305.03726v1.pdf + model_card: https://github.com/Luodian/Otter/blob/main/docs/model_card.md + modality: outputs text from text and image input + analysis: Evaluated on researcher experiments to test deeper understanding and + advanced commonsense reasoning + size: 1.3B parameters + dependencies: [MIMIC-IT, OpenFlamingo] + training_emissions: '' + training_time: '' + training_hardware: 4 RTX-3090 GPUs + quality_control: '' + access: open + license: MIT + intended_uses: Following and executing new instructions with few in-context learning + examples given image and textual input. + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/palm2.yaml b/assets/palm2.yaml new file mode 100644 index 00000000..0791eb27 --- /dev/null +++ b/assets/palm2.yaml @@ -0,0 +1,30 @@ +--- + +- type: model + name: PaLM 2 + organization: Google + description: PaLM2 is a new state-of-the-art language model that has better multilingual + and reasoning capabilities and is more compute-efficient than its predecessor + PaLM. PaLM 2 is a Transformer-based model trained using a mixture of objectives + similar to UL2. + created_date: 2023-05-10 + url: https://blog.google/technology/ai/google-palm-2-ai-large-language-model/ + model_card: https://ai.google/static/documents/palm2techreport.pdf + modality: text (input and output) + analysis: Reports results on standard code benchmarks across a variety of programming + languages. + size: 3.6T parameters + dependencies: [palm 2 dataset] + training_emissions: '' + training_time: '' + training_hardware: TPU v4 (number unspecified) + quality_control: Employed de-duplication, removal of sensitive-PII and filtering. + Added control tokens marking toxicity of text. + access: open + license: Google + intended_uses: general use large language model that can be used for language, + reasoning, and code tasks. + prohibited_uses: becoming part of a general-purpose service or product or use + within specific downstream applications without prior assessment + monitoring: Google internal monitoring + feedback: Specific queries provided by annotators diff --git a/assets/palmyra.yaml b/assets/palmyra.yaml new file mode 100644 index 00000000..5dd78f33 --- /dev/null +++ b/assets/palmyra.yaml @@ -0,0 +1,26 @@ +--- + +- type: model + name: Palmyra + organization: Writer + description: Palmyra is a privacy-first LLM for enterprises trained on business and marketing writing. + created_date: + value: 2023-01-01 + explanation: The model was stated to be published in January, but which day is + not specified on the website. + url: https://gpt3demo.com/apps/palmyra + model_card: https://huggingface.co/Writer/palmyra-base + modality: text + analysis: Evaluated on the SuperGLUE benchmark + size: 128M (Small), 5B (Base), 20B (Large) parameters + dependencies: [Writer dataset] + training_emissions: unknown + training_time: unknown + training_hardware: '' + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: generating text from a prompt + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/Writer/palmyra-base/discussions diff --git a/assets/peer.yaml b/assets/peer.yaml new file mode 100644 index 00000000..12ce8a96 --- /dev/null +++ b/assets/peer.yaml @@ -0,0 +1,28 @@ +--- + +- type: model + name: PEER + organization: Meta + description: PEER is a collaborative language model that is trained to imitate + the entire writing process itself. PEER can write drafts, add suggestions, propose + edits and provide explanations for its actions. + created_date: 2022-08-24 + url: https://arxiv.org/pdf/2208.11663.pdf + model_card: '' + modality: natural language text + analysis: PEER is evaluated on core research questions intended to gauge language + understanding, proper use of citations, instruction following, and iterative + use. + size: 3B parameters + dependencies: [] + training_emissions: '' + training_time: '' + training_hardware: 64 GPUs + quality_control: Heuristics and edit filtering was used on data set, which consisted + mostly of Wikipedia pages. + access: open + license: '' + intended_uses: adapting LLMs to work with collaborative writing and updating. + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/pi.yaml b/assets/pi.yaml new file mode 100644 index 00000000..3c35b7b4 --- /dev/null +++ b/assets/pi.yaml @@ -0,0 +1,23 @@ +--- + +- type: application + name: Pi + organization: Inflection AI + description: Personal AI chatbot designed to be conversational and specialized + in emotional intelligence. + created_date: 2023-05-02 + url: https://inflection.ai/press + dependencies: [Inflection-1] + adaptation: '' + output_space: natural language text responses + quality_control: '' + access: limited + license: unknown + terms_of_service: '' + intended_uses: to be used as a personal assistant chatbot for everyday activities + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' + user_distribution: '' + failures: '' diff --git a/assets/portkey.yaml b/assets/portkey.yaml new file mode 100644 index 00000000..2fca2885 --- /dev/null +++ b/assets/portkey.yaml @@ -0,0 +1,23 @@ +--- + +- type: application + name: Portkey + organization: Portkey + description: Portkey is a hosted middleware that allows users to create generative + AI applications + created_date: 2023-05-06 + url: https://portkey.ai/ + dependencies: [] + adaptation: '' + output_space: generative AI apps + quality_control: '' + access: open + license: '' + terms_of_service: https://portkey.ai/terms + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' + user_distribution: '' + failures: '' diff --git a/assets/pythia.yaml b/assets/pythia.yaml new file mode 100644 index 00000000..6421f741 --- /dev/null +++ b/assets/pythia.yaml @@ -0,0 +1,25 @@ +--- + +- type: model + name: Pythia + organization: Eleuther AI + description: A suite of 16 LLMs all trained on public data seen in the exact same + order and ranging in size from 70M to 12B parameters + created_date: 2023-05-31 + url: https://arxiv.org/pdf/2304.01373.pdf + model_card: https://huggingface.co/EleutherAI/pythia-12b + modality: natural language text + analysis: Evaluated on a variety of NLP benchmarks and found to perform similarly + to OPT and BLOOM models. + size: 12B parameters + dependencies: [The Pile] + training_emissions: '' + training_time: '' + training_hardware: 64 A100 GPUs + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/EleutherAI/pythia-6.9b/discussions diff --git a/assets/redpajama.yaml b/assets/redpajama.yaml new file mode 100644 index 00000000..0f1a51ba --- /dev/null +++ b/assets/redpajama.yaml @@ -0,0 +1,24 @@ +--- + +- type: dataset + name: RedPajama-Data + organization: Together + description: The RedPajama base dataset is a 1.2 trillion token fully-open dataset + created by following the recipe described in the LLaMA paper + created_date: 2022-04-17 + url: https://www.together.xyz/blog/redpajama + datasheet: https://huggingface.co/datasets/togethercomputer/RedPajama-Data-1T + modality: text + size: 1.2 trillion tokens + sample: [] + analysis: '' + dependencies: [LLaMA dataset, GitHub, Wikipedia] + included: '' + excluded: '' + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/refinedweb.yaml b/assets/refinedweb.yaml new file mode 100644 index 00000000..89e584d2 --- /dev/null +++ b/assets/refinedweb.yaml @@ -0,0 +1,25 @@ +--- + +- type: dataset + name: RefinedWeb + organization: UAE Technology Innovation Institute + description: RefinedWeb is a high-quality five trillion tokens web-only English pretraining dataset. + created_date: 2023-06-01 + url: https://arxiv.org/pdf/2306.01116.pdf + datasheet: https://huggingface.co/datasets/tiiuae/falcon-refinedweb + modality: text + size: 600B tokens + sample: [] + analysis: '' + dependencies: [] + included: '' + excluded: '' + quality_control: '' + access: open + license: + value: unknown + explanation: License can be found at https://huggingface.co/datasets/tiiuae/falcon-refinedweb + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/roentgen.yaml b/assets/roentgen.yaml new file mode 100644 index 00000000..31450cd2 --- /dev/null +++ b/assets/roentgen.yaml @@ -0,0 +1,25 @@ +--- + +- type: model + name: RoentGen + organization: Stanford University AIMI + description: RoentGen is a generative medical imaging model that can create visually + convincing X-ray images. + created_date: 2022-11-23 + url: https://arxiv.org/pdf/2211.12737.pdf + model_card: '' + modality: text input and image output + analysis: Evaluated on own framework that tests domain-specific tasks in medical + field. + size: 330M parameters + dependencies: [Stable Diffusion, RoentGen radiology dataset] + training_emissions: unknown + training_time: 60k training steps per day + training_hardware: 64 A100 GPUs + quality_control: '' + access: open + license: '' + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/santacoder.yaml b/assets/santacoder.yaml new file mode 100644 index 00000000..ddaf3596 --- /dev/null +++ b/assets/santacoder.yaml @@ -0,0 +1,24 @@ +--- + +- type: model + name: SantaCoder + organization: BigCode + description: Multilingual code model derived from findings of BigCode Project + analysis on Github stars' association to data quality. + created_date: 2023-02-24 + url: https://arxiv.org/pdf/2301.03988.pdf + model_card: '' + modality: code + analysis: Evaluated on MultiPL-E system benchmarks. + size: 1.1B parameters + dependencies: [The Stack, BigCode Dataset] + training_emissions: '' + training_time: 3.1 days + training_hardware: 96 Tesla V100 GPUs + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/soda.yaml b/assets/soda.yaml new file mode 100644 index 00000000..dd5991d7 --- /dev/null +++ b/assets/soda.yaml @@ -0,0 +1,26 @@ +--- + +- type: dataset + name: SODA + organization: Allen Institute of AI + description: SODA is the first publicly available, million-scale, high-quality + dialogue dataset covering a wide range of social interactions. + created_date: 2023-05-24 + url: https://arxiv.org/pdf/2212.10465.pdf + datasheet: https://huggingface.co/datasets/allenai/soda + modality: text + size: 1.5M dialogues + sample: [] + analysis: Randomly sampled dialogues from dataset are evaluated according to six + established criteria of natural flow, context dependence, topic consistency, + speaker consistency, specificity, and overall. + dependencies: [] + included: '' + excluded: '' + quality_control: '' + access: open + license: CC BY 4.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/stablelm.yaml b/assets/stablelm.yaml new file mode 100644 index 00000000..d92fdf8c --- /dev/null +++ b/assets/stablelm.yaml @@ -0,0 +1,23 @@ +--- + +- type: model + name: StableLM + organization: Stability AI + description: Large language models trained on up to 1.5 trillion tokens. + created_date: 2023-04-20 + url: https://github.com/Stability-AI/StableLM + model_card: '' + modality: natural language text + analysis: '' + size: 7B parameters + dependencies: [StableLM-Alpha dataset, Alpaca dataset, gpt4all dataset, ShareGPT52K dataset, Dolly dataset, HH dataset] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/stack.yaml b/assets/stack.yaml new file mode 100644 index 00000000..416829f6 --- /dev/null +++ b/assets/stack.yaml @@ -0,0 +1,26 @@ +--- + +- type: dataset + name: The Stack + organization: BigCode + description: The Stack is a 3.1 TB dataset consisting of permissively licensed + source code inteded for use in creating code LLMs. + created_date: 2022-11-20 + url: https://arxiv.org/pdf/2211.15533.pdf + datasheet: https://huggingface.co/datasets/bigcode/the-stack + modality: code (358 programming languages) + size: 3.1 TB + sample: [] + analysis: Evaluated models trained on The Stack on HumanEval and MBPP and compared + against similarly-sized models. + dependencies: [GitHub] + included: '' + excluded: '' + quality_control: allowed users whose data were part of The Stack's training data + to opt-out + access: open + license: Apache 2.0 + intended_uses: creating code LLMs + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/starcoder.yaml b/assets/starcoder.yaml new file mode 100644 index 00000000..55f2dc24 --- /dev/null +++ b/assets/starcoder.yaml @@ -0,0 +1,28 @@ +--- + +- type: model + name: StarCoder + organization: BigCode + description: StarCoder is a Large Language Model for Code (Code LLM) trained on + permissively licensed data from GitHub, including from 80+ programming languages, + Git commits, GitHub issues, and Jupyter notebooks. + created_date: 2023-05-09 + url: https://arxiv.org/pdf/2305.06161.pdf + model_card: https://huggingface.co/bigcode/starcoder + modality: code (80+ programming languages) + analysis: Tested on several benchmarks, most notably Python benchmark HumanEval. + size: 15.5B parameters + dependencies: [The Stack] + training_emissions: '' + training_time: 24 days + training_hardware: 512 Tesla A100 GPU + quality_control: No specific quality control is mentioned in model training, though + details on data processing and how the tokenizer was trained are provided in + the paper. + access: open + license: Apache 2.0 + intended_uses: With a Tech Assistant prompt and not as an instruction model given + training limitations. + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/bigcode/starcoder/discussions diff --git a/assets/transformify.yaml b/assets/transformify.yaml new file mode 100644 index 00000000..118b559f --- /dev/null +++ b/assets/transformify.yaml @@ -0,0 +1,23 @@ +--- + +- type: application + name: Transformify Automate + organization: Transformify + description: Transformify Automate is a platform for automated task integration + using natural language prompts. + created_date: 2023-05-30 + url: https://www.transformify.ai/automate + dependencies: [GPT-4] + adaptation: '' + output_space: text and code + quality_control: '' + access: open + license: '' + terms_of_service: https://www.transformify.ai/legal-stuff + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' + user_distribution: '' + failures: '' diff --git a/assets/underwritegpt.yaml b/assets/underwritegpt.yaml new file mode 100644 index 00000000..76870f97 --- /dev/null +++ b/assets/underwritegpt.yaml @@ -0,0 +1,22 @@ +--- + +- type: application + name: UnderwriteGPT + organization: Paladin Group and Dais Technology + description: UnderwriteGPT is the world's first generative AI underwriting tool. + created_date: 2023-02-01 + url: https://dais.com/underwritegpt/ + dependencies: [] + adaptation: '' + output_space: '' + quality_control: '' + access: limited + license: '' + terms_of_service: '' + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' + user_distribution: '' + failures: '' diff --git a/assets/unilm.yaml b/assets/unilm.yaml new file mode 100644 index 00000000..845df162 --- /dev/null +++ b/assets/unilm.yaml @@ -0,0 +1,24 @@ +--- + +- type: model + name: UniLM + organization: Microsoft + description: UniLM is a unified language model that can be fine-tuned for both + natural language understanding and generation tasks. + created_date: 2019-10-01 + url: https://proceedings.neurips.cc/paper_files/paper/2019/file/c20bb2d9a50d5ac1f713f8b34d9aac5a-Paper.pdf + model_card: '' + modality: text + analysis: Evaluated on GLUE, SQuAD 2.0, and CoQA benchmarks. + size: 340M parameters + dependencies: [] + training_emissions: unknown + training_time: 10,000 steps in 7 hours + training_hardware: 8 NVIDIA Tesla V100 32GB GPUs + quality_control: '' + access: open + license: MIT + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/vicuna.yaml b/assets/vicuna.yaml new file mode 100644 index 00000000..6f07781e --- /dev/null +++ b/assets/vicuna.yaml @@ -0,0 +1,24 @@ +--- + +- type: model + name: Vicuna + organization: LMSYS + description: An open-source chatbot trained by fine-tuning LLaMA on user-shared + conversations collected from ShareGPT. + created_date: 2023-03-30 + url: https://lmsys.org/blog/2023-03-30-vicuna/ + model_card: https://huggingface.co/lmsys/vicuna-13b-delta-v0 + modality: natural language text + analysis: Evaluated against similar LLMs using GPT-4 as a judge. + size: 13B parameters + dependencies: [LLaMA, ShareGPT conversations data] + training_emissions: '' + training_time: 1 day + training_hardware: 8 A100 GPUs + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: research on LLMs and chatbots + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/datasets/bigcode/the-stack/discussions diff --git a/assets/voicebox.yaml b/assets/voicebox.yaml new file mode 100644 index 00000000..2f7a3579 --- /dev/null +++ b/assets/voicebox.yaml @@ -0,0 +1,25 @@ +--- + +- type: model + name: Voicebox + organization: Meta AI + description: Voicebox is the first generative AI model for speech to generalize + across tasks with state-of-the-art performance. + created_date: 2023-06-16 + url: https://research.facebook.com/publications/voicebox-text-guided-multilingual-universal-speech-generation-at-scale/ + model_card: '' + modality: text and audio input and audio output + analysis: Evaluated on zero-shot text-to-speech benchmarks, with Voicebox outperforming + the current state-of-the-art English model VALL-E. + size: 330M parameters + dependencies: [] + training_emissions: unknown + training_time: 750,000 iterations + training_hardware: 32 GPUs of unspecified type + quality_control: '' + access: closed + license: '' + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/vqganclip.yaml b/assets/vqganclip.yaml new file mode 100644 index 00000000..29e64681 --- /dev/null +++ b/assets/vqganclip.yaml @@ -0,0 +1,25 @@ +--- + +- type: model + name: VQGAN-CLIP + organization: EleutherAI + description: VQGAN-CLIP is a model that better generates and edits images using + a multimodal encoder to guide image generation. + created_date: 2023-09-04 + url: https://arxiv.org/pdf/2204.08583.pdf + model_card: '' + modality: text input with image output + analysis: Evaluated by human testers rating alignment of text input, image output + pairs. + size: 227M parameters + dependencies: [VQGAN, CLIP] + training_emissions: unknown + training_time: Less than 1 V100-hour + training_hardware: 1 NVIDIA Tesla K80 GPU + quality_control: '' + access: open + license: MIT + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/wizardlm.yaml b/assets/wizardlm.yaml new file mode 100644 index 00000000..3f94d441 --- /dev/null +++ b/assets/wizardlm.yaml @@ -0,0 +1,28 @@ +--- + +- type: model + name: WizardLM + organization: Microsoft + description: Starting with an initial set of instructions, we use our proposed + Evol-Instruct to rewrite them step by step into more complex instructions. Then, + we mix all generated instruction data to fine-tune LLaMA. We call the resulting + model WizardLM. + created_date: 2023-04-24 + url: https://arxiv.org/pdf/2304.12244v1.pdf + model_card: https://huggingface.co/WizardLM/WizardLM-13B-1.0 + modality: natural language text + analysis: Reports results on standard LLM benchmarks in comparison to other LLMs + and test sets. + size: 7B parameters + dependencies: [LLaMA, Evol-Instruct, Alpaca dataset] + training_emissions: '' + training_time: 70 hours on 3 epochs + training_hardware: 8 V100 GPUs + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: Creating large amounts of instruction data, particularly with high + complexity + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/datasets/WizardLM/evol_instruct_70k/discussions From 547de3c31bef8be31794ae9fbf84c4d80262a367 Mon Sep 17 00:00:00 2001 From: jxue16 <105090474+jxue16@users.noreply.github.com> Date: Fri, 7 Jul 2023 18:49:54 -0700 Subject: [PATCH 02/10] update main --- js/main.js | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/js/main.js b/js/main.js index c62ac282..bd6cdb54 100644 --- a/js/main.js +++ b/js/main.js @@ -629,6 +629,57 @@ function loadAssetsAndRenderPageContent() { const paths = [ 'assets/adept.yaml', + 'assets/ares.yaml', + 'assets/autogpt.yaml', + 'assets/bark.yaml', + 'assets/beit.yaml', + 'assets/bigtrans.yaml', + 'assets/biomedgpt.yaml', + 'assets/camel.yaml', + 'assets/cformers.yaml', + 'assets/chatglm.yaml', + 'assets/corgi.yaml', + 'assets/cosmo.yaml', + 'assets/deepfloyd.yaml', + 'assets/docugami.yaml', + 'assets/falcon.yaml', + 'assets/goat.yaml', + 'assets/gorilla.yaml', + 'assets/guanaco.yaml', + 'assets/h2ogpt.yaml', + 'assets/inflection-1.yaml', + 'assets/jointslm.yaml', + 'assets/koala.yaml', + 'assets/legomt.yaml', + 'assets/magma.yaml', + 'assets/mpt.yaml', + 'assets/multimodal.yaml', + 'assets/nextdoor.yaml', + 'assets/openflamingo.yaml', + 'assets/openfold.yaml', + 'assets/openllama.yaml', + 'assets/otter.yaml', + 'assets/palm2.yaml', + 'assets/palmyra.yaml', + 'assets/peer.yaml', + 'assets/pi.yaml', + 'assets/portkey.yaml', + 'assets/pythia.yaml', + 'assets/redpajama.yaml', + 'assets/refinedweb.yaml', + 'assets/roentgen.yaml', + 'assets/santacoder.yaml', + 'assets/soda.yaml', + 'assets/stablelm.yaml', + 'assets/stack.yaml', + 'assets/starcoder.yaml', + 'assets/transformify.yaml', + 'assets/underwritegpt.yaml', + 'assets/unilm.yaml', + 'assets/vicuna.yaml', + 'assets/voicebox.yaml', + 'assets/vqganclip.yaml', + 'assets/wizardlm.yaml', 'assets/ai2.yaml', 'assets/ai21.yaml', 'assets/aleph_alpha.yaml', From 45ea92098d9a6bffe03a270aad76e4a80ecd9f83 Mon Sep 17 00:00:00 2001 From: jxue16 <105090474+jxue16@users.noreply.github.com> Date: Fri, 7 Jul 2023 18:51:08 -0700 Subject: [PATCH 03/10] minor change --- assets/vqganclip.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/vqganclip.yaml b/assets/vqganclip.yaml index 29e64681..59fb2959 100644 --- a/assets/vqganclip.yaml +++ b/assets/vqganclip.yaml @@ -5,7 +5,7 @@ organization: EleutherAI description: VQGAN-CLIP is a model that better generates and edits images using a multimodal encoder to guide image generation. - created_date: 2023-09-04 + created_date: 2022-09-04 url: https://arxiv.org/pdf/2204.08583.pdf model_card: '' modality: text input with image output From a59f7584305028513e87b15d814fe9df6a8f3eb9 Mon Sep 17 00:00:00 2001 From: jxue16 <105090474+jxue16@users.noreply.github.com> Date: Wed, 12 Jul 2023 21:39:04 -0700 Subject: [PATCH 04/10] reformat by organization --- assets/ai2.yaml | 72 +++++++++++++++ assets/aleph_alpha.yaml | 23 +++++ assets/{bedrock.yaml => amazon.yaml} | 6 +- assets/anthropic.yaml | 33 +++++++ assets/beit.yaml | 25 ------ assets/berkeley.yaml | 72 +++++++++++++++ assets/bigcode.yaml | 76 ++++++++++++++++ assets/camel.yaml | 27 ------ assets/{bigtrans.yaml => casia.yaml} | 0 assets/{openfold.yaml => columbia.yaml} | 2 +- assets/corgi.yaml | 27 ------ assets/cosmo.yaml | 25 ------ assets/deepfloyd.yaml | 26 ------ assets/docugami.yaml | 24 ----- assets/eleutherai.yaml | 48 ++++++++++ assets/{ares.yaml => faraday.yaml} | 4 +- assets/google.yaml | 53 +++++++++++ assets/gorilla.yaml | 27 ------ assets/{h2ogpt.yaml => h2oai.yaml} | 0 assets/inflection-1.yaml | 25 ------ assets/inflection.yaml | 49 +++++++++++ assets/jointslm.yaml | 25 ------ assets/koala.yaml | 24 ----- assets/laion.yaml | 24 +++++ assets/legomt.yaml | 24 ----- assets/{biomedgpt.yaml => lehigh.yaml} | 0 assets/{vicuna.yaml => lmsys.yaml} | 0 assets/magma.yaml | 24 ----- assets/meta.yaml | 51 +++++++++++ assets/microsoft.yaml | 97 +++++++++++++++++++++ assets/{mpt.yaml => mosaic.yaml} | 0 assets/multimodal.yaml | 24 ----- assets/{otter.yaml => nanyang.yaml} | 0 assets/{cformers.yaml => nolano.yaml} | 0 assets/openflamingo.yaml | 25 ------ assets/openllama.yaml | 23 ----- assets/{underwritegpt.yaml => paladin.yaml} | 0 assets/palm2.yaml | 30 ------- assets/palmyra.yaml | 26 ------ assets/peer.yaml | 28 ------ assets/pi.yaml | 23 ----- assets/pythia.yaml | 25 ------ assets/redpajama.yaml | 24 ----- assets/refinedweb.yaml | 25 ------ assets/roentgen.yaml | 25 ------ assets/santacoder.yaml | 24 ----- assets/shanghai.yaml | 23 +++++ assets/{goat.yaml => singapore.yaml} | 0 assets/soda.yaml | 26 ------ assets/stability.yaml | 53 +++++++++++ assets/stablelm.yaml | 23 ----- assets/stack.yaml | 26 ------ assets/stanford.yaml | 50 +++++++++++ assets/starcoder.yaml | 28 ------ assets/{bark.yaml => suno.yaml} | 0 assets/together.yaml | 23 +++++ assets/{falcon.yaml => uae.yaml} | 25 ++++++ assets/unilm.yaml | 24 ----- assets/{guanaco.yaml => uwashington.yaml} | 0 assets/voicebox.yaml | 25 ------ assets/vqganclip.yaml | 25 ------ assets/wizardlm.yaml | 28 ------ assets/writer.yaml | 53 +++++++++++ js/main.js | 65 ++++---------- 64 files changed, 852 insertions(+), 860 deletions(-) rename assets/{bedrock.yaml => amazon.yaml} (74%) delete mode 100644 assets/beit.yaml create mode 100644 assets/berkeley.yaml create mode 100644 assets/bigcode.yaml delete mode 100644 assets/camel.yaml rename assets/{bigtrans.yaml => casia.yaml} (100%) rename assets/{openfold.yaml => columbia.yaml} (94%) delete mode 100644 assets/corgi.yaml delete mode 100644 assets/cosmo.yaml delete mode 100644 assets/deepfloyd.yaml delete mode 100644 assets/docugami.yaml rename assets/{ares.yaml => faraday.yaml} (82%) delete mode 100644 assets/gorilla.yaml rename assets/{h2ogpt.yaml => h2oai.yaml} (100%) delete mode 100644 assets/inflection-1.yaml create mode 100644 assets/inflection.yaml delete mode 100644 assets/jointslm.yaml delete mode 100644 assets/koala.yaml delete mode 100644 assets/legomt.yaml rename assets/{biomedgpt.yaml => lehigh.yaml} (100%) rename assets/{vicuna.yaml => lmsys.yaml} (100%) delete mode 100644 assets/magma.yaml rename assets/{mpt.yaml => mosaic.yaml} (100%) delete mode 100644 assets/multimodal.yaml rename assets/{otter.yaml => nanyang.yaml} (100%) rename assets/{cformers.yaml => nolano.yaml} (100%) delete mode 100644 assets/openflamingo.yaml delete mode 100644 assets/openllama.yaml rename assets/{underwritegpt.yaml => paladin.yaml} (100%) delete mode 100644 assets/palm2.yaml delete mode 100644 assets/palmyra.yaml delete mode 100644 assets/peer.yaml delete mode 100644 assets/pi.yaml delete mode 100644 assets/pythia.yaml delete mode 100644 assets/redpajama.yaml delete mode 100644 assets/refinedweb.yaml delete mode 100644 assets/roentgen.yaml delete mode 100644 assets/santacoder.yaml rename assets/{goat.yaml => singapore.yaml} (100%) delete mode 100644 assets/soda.yaml delete mode 100644 assets/stablelm.yaml delete mode 100644 assets/stack.yaml delete mode 100644 assets/starcoder.yaml rename assets/{bark.yaml => suno.yaml} (100%) rename assets/{falcon.yaml => uae.yaml} (55%) delete mode 100644 assets/unilm.yaml rename assets/{guanaco.yaml => uwashington.yaml} (100%) delete mode 100644 assets/voicebox.yaml delete mode 100644 assets/vqganclip.yaml delete mode 100644 assets/wizardlm.yaml create mode 100644 assets/writer.yaml diff --git a/assets/ai2.yaml b/assets/ai2.yaml index dc9e232b..2c890050 100644 --- a/assets/ai2.yaml +++ b/assets/ai2.yaml @@ -21,3 +21,75 @@ prohibited_uses: '' monitoring: '' feedback: '' + +- type: dataset + name: SODA + organization: AI2 + description: SODA is the first publicly available, million-scale, high-quality + dialogue dataset covering a wide range of social interactions. + created_date: 2023-05-24 + url: https://arxiv.org/pdf/2212.10465.pdf + datasheet: https://huggingface.co/datasets/allenai/soda + modality: text + size: 1.5M dialogues + sample: [] + analysis: Randomly sampled dialogues from dataset are evaluated according to six + established criteria of natural flow, context dependence, topic consistency, + speaker consistency, specificity, and overall. + dependencies: [] + included: '' + excluded: '' + quality_control: '' + access: open + license: CC BY 4.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: dataset + name: Multimodal C4 + organization: AI2 + description: An augmentation of C4 with images added and made openly available. + created_date: 2023-06-09 + url: https://arxiv.org/pdf/2304.06939.pdf + datasheet: '' + modality: text and images + size: 43B English tokens with 101.2M documents and 571M images + sample: [] + analysis: Conducted experiments on models trained with Multimodal C4 in comparison + to models trained on single image/caption datasets + dependencies: [C4] + included: '' + excluded: '' + quality_control: '' + access: open + license: MIT + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: model + name: COSMO + organization: AI2 + description: COSMO is a conversation agent with greater generalizability on both + in- and out-of-domain chitchat datasets + created_date: 2023-05-24 + url: https://arxiv.org/pdf/2212.10465.pdf + model_card: https://huggingface.co/allenai/cosmo-xl + modality: text + analysis: Evaluated by human testers on generalization capabilities and responses + compared to other chatbots. + size: 11B parameters + dependencies: [SODA, ProsocialDialog, T5] + training_emissions: unknown + training_time: unknown + training_hardware: v3-128 TPU accelerators with batch size 256 + quality_control: '' + access: open + license: '' + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/allenai/cosmo-xl/discussions diff --git a/assets/aleph_alpha.yaml b/assets/aleph_alpha.yaml index b7b635f6..587a9aca 100644 --- a/assets/aleph_alpha.yaml +++ b/assets/aleph_alpha.yaml @@ -100,3 +100,26 @@ monthly_active_users: unknown user_distribution: unknown failures: unknown + +- type: model + name: MAGMA + organization: Aleph Alpha + description: An autoregressive VL model that is able to generate text from an + arbitrary combination of visual and textual input + created_date: 2022-10-24 + url: https://arxiv.org/pdf/2112.05253.pdf + model_card: '' + modality: image and text input with natural language text output + analysis: Evaluated on the OKVQA benchmark as a fully open-ended generative task. + size: 6B parameters + dependencies: [GPT-J, CLIP] + training_emissions: '' + training_time: '' + training_hardware: 32 A100 GPUs + quality_control: '' + access: open + license: MIT + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/bedrock.yaml b/assets/amazon.yaml similarity index 74% rename from assets/bedrock.yaml rename to assets/amazon.yaml index 1022592a..152e0389 100644 --- a/assets/bedrock.yaml +++ b/assets/amazon.yaml @@ -3,8 +3,10 @@ - type: application name: Bedrock organization: Amazon - description: Bedrock is a new service that makes FMs from AI21 Labs, Anthropic, Stability AI, and Amazon accessible via an API. Bedrock is intended for customers to build and scale generative AI-based applications using FMs, democratizing access for all builders. - using an API. + description: Bedrock is a new service that makes FMs from AI21 Labs, Anthropic, + Stability AI, and Amazon accessible via an API. Bedrock is intended for customers + to build and scale generative AI-based applications using FMs, democratizing + access for all builders. using an API. created_date: 2023-04-13 url: https://aws.amazon.com/bedrock/ dependencies: [Jurassic-2, Claude, Stable Diffusion, Amazon Titan] diff --git a/assets/anthropic.yaml b/assets/anthropic.yaml index 385d53ed..b47dc7b3 100644 --- a/assets/anthropic.yaml +++ b/assets/anthropic.yaml @@ -594,3 +594,36 @@ prohibited_uses: '' monitoring: '' feedback: '' + +- type: model + name: Claude 2 + organization: Anthropic + description: Claude 2 is a more evolved and refined version of Claude, which is + a general purpose large language model using a transformer architecture and + trained via unsupervised learning. + created_date: 2023-07-11 + url: https://www.anthropic.com/index/claude-2 + model_card: https://www-files.anthropic.com/production/images/Model-Card-Claude-2.pdf + modality: text + analysis: Evaluated with human feedback on helpfulness, harmfulness, and honesty + and on the Bias Benchmark for QA. + size: '' + dependencies: + - Claude human feedback data + - Unknown licensed third party datasets + training_emissions: '' + training_time: '' + training_hardware: unknown + quality_control: '' + access: open + license: '' + intended_uses: Claude 2 tends to perform well at general, open-ended conversation; + search, writing, editing, outlining, and summarizing text; coding; and providing + helpful advice about a broad range of subjects. Claude 2 is particularly well + suited to support creative or literary use cases. They can take direction on + tone and “personality,” and users have described them as feeling steerable and + conversational. + prohibited_uses: Claude 2 should not be used on their own in high stakes situations + where an incorrect answer would cause harm. + monitoring: '' + feedback: '' diff --git a/assets/beit.yaml b/assets/beit.yaml deleted file mode 100644 index adabec71..00000000 --- a/assets/beit.yaml +++ /dev/null @@ -1,25 +0,0 @@ ---- - -- type: model - name: BEiT-3 - organization: Microsoft - description: BEiT-3 is a general-purpose multimodal foundation model for vision - and vision-language tasks. - created_date: 2022-08-31 - url: https://arxiv.org/pdf/2208.10442.pdf - model_card: '' - modality: text and image input and output - analysis: Evaluated on a range of standardized vision benchmarks, and achieves - state of the art performance on all experimentally. - size: 1.9B parameters - dependencies: [Multiway Transformer network] - training_emissions: unknown - training_time: '' - training_hardware: '' - quality_control: '' - access: open - license: '' - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' diff --git a/assets/berkeley.yaml b/assets/berkeley.yaml new file mode 100644 index 00000000..2b8d64de --- /dev/null +++ b/assets/berkeley.yaml @@ -0,0 +1,72 @@ +--- + +- type: model + name: Koala + organization: Berkeley + description: A relatively small chatbot trained by fine-tuning Meta’s LLaMA on + dialogue data gathered from the web. + created_date: 2023-04-03 + url: https://bair.berkeley.edu/blog/2023/04/03/koala/ + model_card: https://huggingface.co/TheBloke/koala-7B-GPTQ-4bit-128g + modality: natural language text + analysis: Evaluated in comparison with ChatGPT and Stanford Alpaca. + size: 13B parameters + dependencies: [LLaMA, web-scraped dialogue data] + training_emissions: '' + training_time: 6 hours + training_hardware: 8 A100 GPUs + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: academic research + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/TheBloke/koala-7B-GPTQ-4bit-128g/discussions + +- type: model + name: Gorilla + organization: Berkeley + description: Gorilla is a finetuned LLaMA-based model that surpasses the performance + of GPT-4 on writing API calls. + created_date: 2023-05-24 + url: https://arxiv.org/pdf/2305.15334v1.pdf + model_card: '' + modality: outputs API from natural language input + analysis: Evaluated using AST sub-tree matching technique and compared to other + models in terms of API functionality accuracy. + size: 7B parameters + dependencies: [LLaMA, Gorilla document retriever] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: No specific quality control is mentioned in model training, though + details on data processing and collection are provided in the paper. + access: open + license: Apache 2.0 + intended_uses: In conjunction with a LLM to improve its capability for using API + calls. + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: model + name: OpenLLaMA + organization: Berkeley + description: OpenLlama is an open source reproduction of Meta's LLaMA model. + created_date: 2023-05-03 + url: https://github.com/openlm-research/open_llama + model_card: '' + modality: text + analysis: Evaluated on wide range of tasks using own evaluation benchmarks. + size: 17B parameters + dependencies: [LLaMA, RedPajama] + training_emissions: unknown + training_time: unknown + training_hardware: '' + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' \ No newline at end of file diff --git a/assets/bigcode.yaml b/assets/bigcode.yaml new file mode 100644 index 00000000..8f3ababc --- /dev/null +++ b/assets/bigcode.yaml @@ -0,0 +1,76 @@ +--- + +- type: model + name: StarCoder + organization: BigCode + description: StarCoder is a Large Language Model for Code (Code LLM) trained on + permissively licensed data from GitHub, including from 80+ programming languages, + Git commits, GitHub issues, and Jupyter notebooks. + created_date: 2023-05-09 + url: https://arxiv.org/pdf/2305.06161.pdf + model_card: https://huggingface.co/bigcode/starcoder + modality: code (80+ programming languages) + analysis: Tested on several benchmarks, most notably Python benchmark HumanEval. + size: 15.5B parameters + dependencies: [The Stack] + training_emissions: '' + training_time: 24 days + training_hardware: 512 Tesla A100 GPU + quality_control: No specific quality control is mentioned in model training, though + details on data processing and how the tokenizer was trained are provided in + the paper. + access: open + license: Apache 2.0 + intended_uses: With a Tech Assistant prompt and not as an instruction model given + training limitations. + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/bigcode/starcoder/discussions + +- type: model + name: SantaCoder + organization: BigCode + description: Multilingual code model derived from findings of BigCode Project + analysis on Github stars' association to data quality. + created_date: 2023-02-24 + url: https://arxiv.org/pdf/2301.03988.pdf + model_card: '' + modality: code + analysis: Evaluated on MultiPL-E system benchmarks. + size: 1.1B parameters + dependencies: [The Stack, BigCode Dataset] + training_emissions: '' + training_time: 3.1 days + training_hardware: 96 Tesla V100 GPUs + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: dataset + name: The Stack + organization: BigCode + description: The Stack is a 3.1 TB dataset consisting of permissively licensed + source code inteded for use in creating code LLMs. + created_date: 2022-11-20 + url: https://arxiv.org/pdf/2211.15533.pdf + datasheet: https://huggingface.co/datasets/bigcode/the-stack + modality: code (358 programming languages) + size: 3.1 TB + sample: [] + analysis: Evaluated models trained on The Stack on HumanEval and MBPP and compared + against similarly-sized models. + dependencies: [GitHub] + included: '' + excluded: '' + quality_control: allowed users whose data were part of The Stack's training data + to opt-out + access: open + license: Apache 2.0 + intended_uses: creating code LLMs + prohibited_uses: '' + monitoring: '' + feedback: '' \ No newline at end of file diff --git a/assets/camel.yaml b/assets/camel.yaml deleted file mode 100644 index 3ac188dd..00000000 --- a/assets/camel.yaml +++ /dev/null @@ -1,27 +0,0 @@ ---- - -- type: model - name: Camel - organization: Writer - description: Camel is an instruction-following large language model tailored - for advanced NLP and comprehension capabilities. - created_date: - value: 2023-04-01 - explanation: The model was stated to be published in April, but which day is - not specified on the website. - url: https://chatcamel.vercel.app/ - model_card: https://huggingface.co/Writer/camel-5b-hf - modality: text - analysis: '' - size: 5B parameters - dependencies: [Palmyra, Camel dataset] - training_emissions: unknown - training_time: unknown - training_hardware: '' - quality_control: '' - access: open - license: Apache 2.0 - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: https://huggingface.co/Writer/camel-5b-hf/discussions diff --git a/assets/bigtrans.yaml b/assets/casia.yaml similarity index 100% rename from assets/bigtrans.yaml rename to assets/casia.yaml diff --git a/assets/openfold.yaml b/assets/columbia.yaml similarity index 94% rename from assets/openfold.yaml rename to assets/columbia.yaml index 5723593a..2ca39802 100644 --- a/assets/openfold.yaml +++ b/assets/columbia.yaml @@ -2,7 +2,7 @@ - type: model name: OpenFold - organization: Columbia University + organization: Columbia description: OpenFold is an open source recreation of AlphaFold2. created_date: 2022-11-20 url: https://www.biorxiv.org/content/10.1101/2022.11.20.517210v2 diff --git a/assets/corgi.yaml b/assets/corgi.yaml deleted file mode 100644 index 968b1228..00000000 --- a/assets/corgi.yaml +++ /dev/null @@ -1,27 +0,0 @@ ---- - -- type: model - name: CORGI - organization: Stanford - description: Model trained to generate language corrections for physical control - tasks. - created_date: 2023-06-12 - url: https://arxiv.org/pdf/2306.07012.pdf - model_card: '' - modality: trajectory input with text output - analysis: Evaluated on three physical control tasks, drawing, steering, and human - body movement on various dynamics - size: 124M parameters - dependencies: [GPT-2, BABEL, text-davinci-003] - training_emissions: '' - training_time: - value: unknown - explanation: The authors do not report the training time, but do report that they train for 200 epochs. - training_hardware: one NVIDIA A40 GPU - quality_control: '' - access: open - license: MIT - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' diff --git a/assets/cosmo.yaml b/assets/cosmo.yaml deleted file mode 100644 index 6b5230a4..00000000 --- a/assets/cosmo.yaml +++ /dev/null @@ -1,25 +0,0 @@ ---- - -- type: model - name: COSMO - organization: Allen Institute for AI - description: COSMO is a conversation agent with greater generalizability on both - in- and out-of-domain chitchat datasets - created_date: 2023-05-24 - url: https://arxiv.org/pdf/2212.10465.pdf - model_card: https://huggingface.co/allenai/cosmo-xl - modality: text - analysis: Evaluated by human testers on generalization capabilities and responses - compared to other chatbots. - size: 11B parameters - dependencies: [SODA, ProsocialDialog, T5] - training_emissions: unknown - training_time: unknown - training_hardware: v3-128 TPU accelerators with batch size 256 - quality_control: '' - access: open - license: '' - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: https://huggingface.co/allenai/cosmo-xl/discussions diff --git a/assets/deepfloyd.yaml b/assets/deepfloyd.yaml deleted file mode 100644 index b04b90ae..00000000 --- a/assets/deepfloyd.yaml +++ /dev/null @@ -1,26 +0,0 @@ ---- - -- type: model - name: DeepFloyd IF - organization: Stability AI - description: A text-to-image cascaded pixel diffusion model released in conjunction - with AI research lab DeepFloyd. - created_date: 2023-04-28 - url: https://stability.ai/blog/deepfloyd-if-text-to-image-model - model_card: https://huggingface.co/DeepFloyd/IF-I-XL-v1.0 - modality: textual input with image output - analysis: Evaluated on the COCO dataset. - size: 4.3B parameters - dependencies: [LAION-5B] - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: open - license: - value: custom - explanation: License agreement can be found on model card https://huggingface.co/DeepFloyd/IF-I-XL-v1.0 - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: https://huggingface.co/DeepFloyd/IF-I-XL-v1.0/discussions diff --git a/assets/docugami.yaml b/assets/docugami.yaml deleted file mode 100644 index b1b56bf6..00000000 --- a/assets/docugami.yaml +++ /dev/null @@ -1,24 +0,0 @@ ---- - -- type: model - name: Docugami - organization: Microsoft - description: Docugami is a LLM focused on writing business documents and data - using generative AI. - created_date: 2021-04-12 - url: https://www.docugami.com/generative-ai - model_card: '' - modality: text - analysis: '' - size: 20B parameters - dependencies: [] - training_emissions: unknown - training_time: unknown - training_hardware: '' - quality_control: '' - access: limited - license: '' - intended_uses: analyzing, writing, and connecting business documents and data - prohibited_uses: '' - monitoring: '' - feedback: '' diff --git a/assets/eleutherai.yaml b/assets/eleutherai.yaml index c79165bb..945c27eb 100644 --- a/assets/eleutherai.yaml +++ b/assets/eleutherai.yaml @@ -267,3 +267,51 @@ monthly_active_users: unknown user_distribution: unknown failures: unknown + +- type: model + name: VQGAN-CLIP + organization: EleutherAI + description: VQGAN-CLIP is a model that better generates and edits images using + a multimodal encoder to guide image generation. + created_date: 2022-09-04 + url: https://arxiv.org/pdf/2204.08583.pdf + model_card: '' + modality: text input with image output + analysis: Evaluated by human testers rating alignment of text input, image output + pairs. + size: 227M parameters + dependencies: [VQGAN, CLIP] + training_emissions: unknown + training_time: Less than 1 V100-hour + training_hardware: 1 NVIDIA Tesla K80 GPU + quality_control: '' + access: open + license: MIT + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: model + name: Pythia + organization: Eleuther AI + description: A suite of 16 LLMs all trained on public data seen in the exact same + order and ranging in size from 70M to 12B parameters + created_date: 2023-05-31 + url: https://arxiv.org/pdf/2304.01373.pdf + model_card: https://huggingface.co/EleutherAI/pythia-12b + modality: natural language text + analysis: Evaluated on a variety of NLP benchmarks and found to perform similarly + to OPT and BLOOM models. + size: 12B parameters + dependencies: [The Pile] + training_emissions: '' + training_time: '' + training_hardware: 64 A100 GPUs + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/EleutherAI/pythia-6.9b/discussions \ No newline at end of file diff --git a/assets/ares.yaml b/assets/faraday.yaml similarity index 82% rename from assets/ares.yaml rename to assets/faraday.yaml index 3af17a66..7c986cb4 100644 --- a/assets/ares.yaml +++ b/assets/faraday.yaml @@ -3,7 +3,9 @@ - type: application name: ARES organization: Faraday Lab - description: ARES is a text-to-image generator based on Stable Diffusion. The goal is to provide a simple tool with a user interface allowing mainstream AI access for artists and creators. + description: ARES is a text-to-image generator based on Stable Diffusion. The + goal is to provide a simple tool with a user interface allowing mainstream AI + access for artists and creators. created_date: value: 2023-04-26 explanation: The date Faraday Lab's hugging face card was last updated. Date diff --git a/assets/google.yaml b/assets/google.yaml index 09556eee..4103f04b 100644 --- a/assets/google.yaml +++ b/assets/google.yaml @@ -1904,3 +1904,56 @@ prohibited_uses: '' monitoring: '' feedback: '' + +- type: model + name: Google Joint SLM + organization: Google + description: Joint speech and language model using a Speech2Text adapter and using + a CTC-based blank-filtering. + created_date: 2023-06-08 + url: https://arxiv.org/pdf/2306.07944.pdf + model_card: '' + modality: speech input with textual output + analysis: evaluated on DSTC11 Challenge Task, based on MultiWoz 2.1, with a focus + on dialog state tracking. + size: '' + dependencies: [CTC blank-filtering, Speech2Text adapter] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: open + license: Google + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: model + name: PaLM 2 + organization: Google + description: PaLM2 is a new state-of-the-art language model that has better multilingual + and reasoning capabilities and is more compute-efficient than its predecessor + PaLM. PaLM 2 is a Transformer-based model trained using a mixture of objectives + similar to UL2. + created_date: 2023-05-10 + url: https://blog.google/technology/ai/google-palm-2-ai-large-language-model/ + model_card: https://ai.google/static/documents/palm2techreport.pdf + modality: text (input and output) + analysis: Reports results on standard code benchmarks across a variety of programming + languages. + size: 3.6T parameters + dependencies: [palm 2 dataset] + training_emissions: '' + training_time: '' + training_hardware: TPU v4 (number unspecified) + quality_control: Employed de-duplication, removal of sensitive-PII and filtering. + Added control tokens marking toxicity of text. + access: open + license: Google + intended_uses: general use large language model that can be used for language, + reasoning, and code tasks. + prohibited_uses: becoming part of a general-purpose service or product or use + within specific downstream applications without prior assessment + monitoring: Google internal monitoring + feedback: Specific queries provided by annotators diff --git a/assets/gorilla.yaml b/assets/gorilla.yaml deleted file mode 100644 index d6c440d4..00000000 --- a/assets/gorilla.yaml +++ /dev/null @@ -1,27 +0,0 @@ ---- - -- type: model - name: Gorilla - organization: UC Berkeley - description: Gorilla is a finetuned LLaMA-based model that surpasses the performance - of GPT-4 on writing API calls. - created_date: 2023-05-24 - url: https://arxiv.org/pdf/2305.15334v1.pdf - model_card: '' - modality: outputs API from natural language input - analysis: Evaluated using AST sub-tree matching technique and compared to other - models in terms of API functionality accuracy. - size: 7B parameters - dependencies: [LLaMA, Gorilla document retriever] - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: No specific quality control is mentioned in model training, though - details on data processing and collection are provided in the paper. - access: open - license: Apache 2.0 - intended_uses: In conjunction with a LLM to improve its capability for using API - calls. - prohibited_uses: '' - monitoring: '' - feedback: '' diff --git a/assets/h2ogpt.yaml b/assets/h2oai.yaml similarity index 100% rename from assets/h2ogpt.yaml rename to assets/h2oai.yaml diff --git a/assets/inflection-1.yaml b/assets/inflection-1.yaml deleted file mode 100644 index 1af3f04e..00000000 --- a/assets/inflection-1.yaml +++ /dev/null @@ -1,25 +0,0 @@ ---- - -- type: model - name: Inflection-1 - organization: Inflection AI - description: Inflection AI's in-house LLM, which powers Pi and will soon be available via Inflection AI's conversational API. - created_date: 2023-06-22 - url: https://inflection.ai/inflection-1 - model_card: '' - modality: text - analysis: Evaluated on wide range of language benchmarks like MMLU 5-shot, GSM-8K, and HellaSwag 10-shot among others. - size: - value: unknown - explanation: Designed to be roughly around Chat-GPT's size (175B parameters). - dependencies: [] - training_emissions: '' - training_time: '' - training_hardware: unknown - quality_control: '' - access: limited - license: unknown - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' diff --git a/assets/inflection.yaml b/assets/inflection.yaml new file mode 100644 index 00000000..9d000476 --- /dev/null +++ b/assets/inflection.yaml @@ -0,0 +1,49 @@ +--- + +- type: model + name: Inflection-1 + organization: Inflection AI + description: Inflection AI's in-house LLM, which powers Pi and will soon be available + via Inflection AI's conversational API. + created_date: 2023-06-22 + url: https://inflection.ai/inflection-1 + model_card: '' + modality: text + analysis: Evaluated on wide range of language benchmarks like MMLU 5-shot, GSM-8K, + and HellaSwag 10-shot among others. + size: + value: unknown + explanation: Designed to be roughly around Chat-GPT's size (175B parameters). + dependencies: [] + training_emissions: '' + training_time: '' + training_hardware: unknown + quality_control: '' + access: limited + license: unknown + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: application + name: Pi + organization: Inflection AI + description: Personal AI chatbot designed to be conversational and specialized + in emotional intelligence. + created_date: 2023-05-02 + url: https://inflection.ai/press + dependencies: [Inflection-1] + adaptation: '' + output_space: natural language text responses + quality_control: '' + access: limited + license: unknown + terms_of_service: '' + intended_uses: to be used as a personal assistant chatbot for everyday activities + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' + user_distribution: '' + failures: '' \ No newline at end of file diff --git a/assets/jointslm.yaml b/assets/jointslm.yaml deleted file mode 100644 index 0465fc82..00000000 --- a/assets/jointslm.yaml +++ /dev/null @@ -1,25 +0,0 @@ ---- - -- type: model - name: Google Joint SLM - organization: Google - description: Joint speech and language model using a Speech2Text adapter and using - a CTC-based blank-filtering. - created_date: 2023-06-08 - url: https://arxiv.org/pdf/2306.07944.pdf - model_card: '' - modality: speech input with textual output - analysis: evaluated on DSTC11 Challenge Task, based on MultiWoz 2.1, with a focus - on dialog state tracking. - size: '' - dependencies: [CTC blank-filtering, Speech2Text adapter] - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: open - license: Google - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' diff --git a/assets/koala.yaml b/assets/koala.yaml deleted file mode 100644 index 03b456c7..00000000 --- a/assets/koala.yaml +++ /dev/null @@ -1,24 +0,0 @@ ---- - -- type: model - name: Koala - organization: Berkeley - description: A relatively small chatbot trained by fine-tuning Meta’s LLaMA on - dialogue data gathered from the web. - created_date: 2023-04-03 - url: https://bair.berkeley.edu/blog/2023/04/03/koala/ - model_card: https://huggingface.co/TheBloke/koala-7B-GPTQ-4bit-128g - modality: natural language text - analysis: Evaluated in comparison with ChatGPT and Stanford Alpaca. - size: 13B parameters - dependencies: [LLaMA, web-scraped dialogue data] - training_emissions: '' - training_time: 6 hours - training_hardware: 8 A100 GPUs - quality_control: '' - access: open - license: Apache 2.0 - intended_uses: academic research - prohibited_uses: '' - monitoring: '' - feedback: https://huggingface.co/TheBloke/koala-7B-GPTQ-4bit-128g/discussions diff --git a/assets/laion.yaml b/assets/laion.yaml index 9d026ebc..6a62325e 100644 --- a/assets/laion.yaml +++ b/assets/laion.yaml @@ -154,3 +154,27 @@ from using LAION-2B-en for non-research purposes. monitoring: '' feedback: '' + +- type: model + name: OpenFlamingo + organization: LAION + description: An open-source reproduction of DeepMind's Flamingo model. At its + core, OpenFlamingo is a framework that enables training and evaluation of large + multimodal models (LMMs). + created_date: 2023-03-28 + url: https://laion.ai/blog/open-flamingo/ + model_card: https://github.com/mlfoundations/open_flamingo/blob/main/MODEL_CARD.md + modality: image and text input with textual output + analysis: Evaluated on COCO captioning and VQAv2 vision-language tasks. + size: 9B parameters + dependencies: [LLaMA, CLIP] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: open + license: MIT + intended_uses: academic research purposes + prohibited_uses: commercial use + monitoring: '' + feedback: '' diff --git a/assets/legomt.yaml b/assets/legomt.yaml deleted file mode 100644 index fb7a3353..00000000 --- a/assets/legomt.yaml +++ /dev/null @@ -1,24 +0,0 @@ ---- - -- type: model - name: Lego-MT - organization: Shanghai AI Laboratory - description: Lego-MT is a multilingual large language model which uses a more - efficient approach of being an effective detachable model. - created_date: 2023-05-29 - url: https://arxiv.org/pdf/2212.10551.pdf - model_card: '' - modality: multilingual text - analysis: Evaluated based on own constructed dataset covering 433 languages. - size: 1.2B parameters - dependencies: [OPUS] - training_emissions: unknown - training_time: 15 days - training_hardware: 32 A100 GPUs - quality_control: '' - access: open - license: '' - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' diff --git a/assets/biomedgpt.yaml b/assets/lehigh.yaml similarity index 100% rename from assets/biomedgpt.yaml rename to assets/lehigh.yaml diff --git a/assets/vicuna.yaml b/assets/lmsys.yaml similarity index 100% rename from assets/vicuna.yaml rename to assets/lmsys.yaml diff --git a/assets/magma.yaml b/assets/magma.yaml deleted file mode 100644 index 7b290942..00000000 --- a/assets/magma.yaml +++ /dev/null @@ -1,24 +0,0 @@ ---- - -- type: model - name: MAGMA - organization: Aleph Alpha - description: An autoregressive VL model that is able to generate text from an - arbitrary combination of visual and textual input - created_date: 2022-10-24 - url: https://arxiv.org/pdf/2112.05253.pdf - model_card: '' - modality: image and text input with natural language text output - analysis: Evaluated on the OKVQA benchmark as a fully open-ended generative task. - size: 6B parameters - dependencies: [GPT-J, CLIP] - training_emissions: '' - training_time: '' - training_hardware: 32 A100 GPUs - quality_control: '' - access: open - license: MIT - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' diff --git a/assets/meta.yaml b/assets/meta.yaml index 0fdf0efa..a847abd6 100644 --- a/assets/meta.yaml +++ b/assets/meta.yaml @@ -608,3 +608,54 @@ monitoring: '' feedback: Feedback can be given via the feedback form on their website [segment-anything.com](https://segment-anything.com/) or by emailing at segment-anything at meta.com. + +- type: model + name: Voicebox + organization: Meta + description: Voicebox is the first generative AI model for speech to generalize + across tasks with state-of-the-art performance. + created_date: 2023-06-16 + url: https://research.facebook.com/publications/voicebox-text-guided-multilingual-universal-speech-generation-at-scale/ + model_card: '' + modality: text and audio input and audio output + analysis: Evaluated on zero-shot text-to-speech benchmarks, with Voicebox outperforming + the current state-of-the-art English model VALL-E. + size: 330M parameters + dependencies: [] + training_emissions: unknown + training_time: 750,000 iterations + training_hardware: 32 GPUs of unspecified type + quality_control: '' + access: closed + license: '' + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: model + name: PEER + organization: Meta + description: PEER is a collaborative language model that is trained to imitate + the entire writing process itself. PEER can write drafts, add suggestions, propose + edits and provide explanations for its actions. + created_date: 2022-08-24 + url: https://arxiv.org/pdf/2208.11663.pdf + model_card: '' + modality: natural language text + analysis: PEER is evaluated on core research questions intended to gauge language + understanding, proper use of citations, instruction following, and iterative + use. + size: 3B parameters + dependencies: [] + training_emissions: '' + training_time: '' + training_hardware: 64 GPUs + quality_control: Heuristics and edit filtering was used on data set, which consisted + mostly of Wikipedia pages. + access: open + license: '' + intended_uses: adapting LLMs to work with collaborative writing and updating. + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/microsoft.yaml b/assets/microsoft.yaml index 298c806e..8d324607 100644 --- a/assets/microsoft.yaml +++ b/assets/microsoft.yaml @@ -779,3 +779,100 @@ monthly_active_users: unknown user_distribution: unknown failures: unknown + +- type: model + name: UniLM + organization: Microsoft + description: UniLM is a unified language model that can be fine-tuned for both + natural language understanding and generation tasks. + created_date: 2019-10-01 + url: https://proceedings.neurips.cc/paper_files/paper/2019/file/c20bb2d9a50d5ac1f713f8b34d9aac5a-Paper.pdf + model_card: '' + modality: text + analysis: Evaluated on GLUE, SQuAD 2.0, and CoQA benchmarks. + size: 340M parameters + dependencies: [] + training_emissions: unknown + training_time: 10,000 steps in 7 hours + training_hardware: 8 NVIDIA Tesla V100 32GB GPUs + quality_control: '' + access: open + license: MIT + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: model + name: Docugami + organization: Microsoft + description: Docugami is a LLM focused on writing business documents and data + using generative AI. + created_date: 2021-04-12 + url: https://www.docugami.com/generative-ai + model_card: '' + modality: text + analysis: '' + size: 20B parameters + dependencies: [] + training_emissions: unknown + training_time: unknown + training_hardware: '' + quality_control: '' + access: limited + license: '' + intended_uses: analyzing, writing, and connecting business documents and data + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: model + name: BEiT-3 + organization: Microsoft + description: BEiT-3 is a general-purpose multimodal foundation model for vision + and vision-language tasks. + created_date: 2022-08-31 + url: https://arxiv.org/pdf/2208.10442.pdf + model_card: '' + modality: text and image input and output + analysis: Evaluated on a range of standardized vision benchmarks, and achieves + state of the art performance on all experimentally. + size: 1.9B parameters + dependencies: [Multiway Transformer network] + training_emissions: unknown + training_time: '' + training_hardware: '' + quality_control: '' + access: open + license: '' + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: model + name: WizardLM + organization: Microsoft + description: Starting with an initial set of instructions, we use our proposed + Evol-Instruct to rewrite them step by step into more complex instructions. Then, + we mix all generated instruction data to fine-tune LLaMA. We call the resulting + model WizardLM. + created_date: 2023-04-24 + url: https://arxiv.org/pdf/2304.12244v1.pdf + model_card: https://huggingface.co/WizardLM/WizardLM-13B-1.0 + modality: natural language text + analysis: Reports results on standard LLM benchmarks in comparison to other LLMs + and test sets. + size: 7B parameters + dependencies: [LLaMA, Evol-Instruct, Alpaca dataset] + training_emissions: '' + training_time: 70 hours on 3 epochs + training_hardware: 8 V100 GPUs + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: Creating large amounts of instruction data, particularly with high + complexity + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/datasets/WizardLM/evol_instruct_70k/discussions diff --git a/assets/mpt.yaml b/assets/mosaic.yaml similarity index 100% rename from assets/mpt.yaml rename to assets/mosaic.yaml diff --git a/assets/multimodal.yaml b/assets/multimodal.yaml deleted file mode 100644 index 4f7727cc..00000000 --- a/assets/multimodal.yaml +++ /dev/null @@ -1,24 +0,0 @@ ---- - -- type: dataset - name: Multimodal C4 - organization: Allen Institute for AI - description: An augmentation of C4 with images added and made openly available. - created_date: 2023-06-09 - url: https://arxiv.org/pdf/2304.06939.pdf - datasheet: '' - modality: text and images - size: 43B English tokens with 101.2M documents and 571M images - sample: [] - analysis: Conducted experiments on models trained with Multimodal C4 in comparison - to models trained on single image/caption datasets - dependencies: [C4] - included: '' - excluded: '' - quality_control: '' - access: open - license: MIT - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' diff --git a/assets/otter.yaml b/assets/nanyang.yaml similarity index 100% rename from assets/otter.yaml rename to assets/nanyang.yaml diff --git a/assets/cformers.yaml b/assets/nolano.yaml similarity index 100% rename from assets/cformers.yaml rename to assets/nolano.yaml diff --git a/assets/openflamingo.yaml b/assets/openflamingo.yaml deleted file mode 100644 index 1a79d3db..00000000 --- a/assets/openflamingo.yaml +++ /dev/null @@ -1,25 +0,0 @@ ---- - -- type: model - name: OpenFlamingo - organization: LAION - description: An open-source reproduction of DeepMind's Flamingo model. At its - core, OpenFlamingo is a framework that enables training and evaluation of large - multimodal models (LMMs). - created_date: 2023-03-28 - url: https://laion.ai/blog/open-flamingo/ - model_card: https://github.com/mlfoundations/open_flamingo/blob/main/MODEL_CARD.md - modality: image and text input with textual output - analysis: Evaluated on COCO captioning and VQAv2 vision-language tasks. - size: 9B parameters - dependencies: [LLaMA, CLIP] - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: open - license: MIT - intended_uses: academic research purposes - prohibited_uses: commercial use - monitoring: '' - feedback: '' diff --git a/assets/openllama.yaml b/assets/openllama.yaml deleted file mode 100644 index a43c19cb..00000000 --- a/assets/openllama.yaml +++ /dev/null @@ -1,23 +0,0 @@ ---- - -- type: model - name: OpenLLaMA - organization: Berkeley AI Research - description: OpenLlama is a open source reproduction of Meta's LLaMA model. - created_date: 2023-05-03 - url: https://github.com/openlm-research/open_llama - model_card: '' - modality: text - analysis: Evaluated on wide range of tasks using own evaluation benchmarks. - size: 17B parameters - dependencies: [LLaMA, RedPajama] - training_emissions: unknown - training_time: unknown - training_hardware: '' - quality_control: '' - access: open - license: Apache 2.0 - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' diff --git a/assets/underwritegpt.yaml b/assets/paladin.yaml similarity index 100% rename from assets/underwritegpt.yaml rename to assets/paladin.yaml diff --git a/assets/palm2.yaml b/assets/palm2.yaml deleted file mode 100644 index 0791eb27..00000000 --- a/assets/palm2.yaml +++ /dev/null @@ -1,30 +0,0 @@ ---- - -- type: model - name: PaLM 2 - organization: Google - description: PaLM2 is a new state-of-the-art language model that has better multilingual - and reasoning capabilities and is more compute-efficient than its predecessor - PaLM. PaLM 2 is a Transformer-based model trained using a mixture of objectives - similar to UL2. - created_date: 2023-05-10 - url: https://blog.google/technology/ai/google-palm-2-ai-large-language-model/ - model_card: https://ai.google/static/documents/palm2techreport.pdf - modality: text (input and output) - analysis: Reports results on standard code benchmarks across a variety of programming - languages. - size: 3.6T parameters - dependencies: [palm 2 dataset] - training_emissions: '' - training_time: '' - training_hardware: TPU v4 (number unspecified) - quality_control: Employed de-duplication, removal of sensitive-PII and filtering. - Added control tokens marking toxicity of text. - access: open - license: Google - intended_uses: general use large language model that can be used for language, - reasoning, and code tasks. - prohibited_uses: becoming part of a general-purpose service or product or use - within specific downstream applications without prior assessment - monitoring: Google internal monitoring - feedback: Specific queries provided by annotators diff --git a/assets/palmyra.yaml b/assets/palmyra.yaml deleted file mode 100644 index 5dd78f33..00000000 --- a/assets/palmyra.yaml +++ /dev/null @@ -1,26 +0,0 @@ ---- - -- type: model - name: Palmyra - organization: Writer - description: Palmyra is a privacy-first LLM for enterprises trained on business and marketing writing. - created_date: - value: 2023-01-01 - explanation: The model was stated to be published in January, but which day is - not specified on the website. - url: https://gpt3demo.com/apps/palmyra - model_card: https://huggingface.co/Writer/palmyra-base - modality: text - analysis: Evaluated on the SuperGLUE benchmark - size: 128M (Small), 5B (Base), 20B (Large) parameters - dependencies: [Writer dataset] - training_emissions: unknown - training_time: unknown - training_hardware: '' - quality_control: '' - access: open - license: Apache 2.0 - intended_uses: generating text from a prompt - prohibited_uses: '' - monitoring: '' - feedback: https://huggingface.co/Writer/palmyra-base/discussions diff --git a/assets/peer.yaml b/assets/peer.yaml deleted file mode 100644 index 12ce8a96..00000000 --- a/assets/peer.yaml +++ /dev/null @@ -1,28 +0,0 @@ ---- - -- type: model - name: PEER - organization: Meta - description: PEER is a collaborative language model that is trained to imitate - the entire writing process itself. PEER can write drafts, add suggestions, propose - edits and provide explanations for its actions. - created_date: 2022-08-24 - url: https://arxiv.org/pdf/2208.11663.pdf - model_card: '' - modality: natural language text - analysis: PEER is evaluated on core research questions intended to gauge language - understanding, proper use of citations, instruction following, and iterative - use. - size: 3B parameters - dependencies: [] - training_emissions: '' - training_time: '' - training_hardware: 64 GPUs - quality_control: Heuristics and edit filtering was used on data set, which consisted - mostly of Wikipedia pages. - access: open - license: '' - intended_uses: adapting LLMs to work with collaborative writing and updating. - prohibited_uses: '' - monitoring: '' - feedback: '' diff --git a/assets/pi.yaml b/assets/pi.yaml deleted file mode 100644 index 3c35b7b4..00000000 --- a/assets/pi.yaml +++ /dev/null @@ -1,23 +0,0 @@ ---- - -- type: application - name: Pi - organization: Inflection AI - description: Personal AI chatbot designed to be conversational and specialized - in emotional intelligence. - created_date: 2023-05-02 - url: https://inflection.ai/press - dependencies: [Inflection-1] - adaptation: '' - output_space: natural language text responses - quality_control: '' - access: limited - license: unknown - terms_of_service: '' - intended_uses: to be used as a personal assistant chatbot for everyday activities - prohibited_uses: '' - monitoring: '' - feedback: '' - monthly_active_users: '' - user_distribution: '' - failures: '' diff --git a/assets/pythia.yaml b/assets/pythia.yaml deleted file mode 100644 index 6421f741..00000000 --- a/assets/pythia.yaml +++ /dev/null @@ -1,25 +0,0 @@ ---- - -- type: model - name: Pythia - organization: Eleuther AI - description: A suite of 16 LLMs all trained on public data seen in the exact same - order and ranging in size from 70M to 12B parameters - created_date: 2023-05-31 - url: https://arxiv.org/pdf/2304.01373.pdf - model_card: https://huggingface.co/EleutherAI/pythia-12b - modality: natural language text - analysis: Evaluated on a variety of NLP benchmarks and found to perform similarly - to OPT and BLOOM models. - size: 12B parameters - dependencies: [The Pile] - training_emissions: '' - training_time: '' - training_hardware: 64 A100 GPUs - quality_control: '' - access: open - license: Apache 2.0 - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: https://huggingface.co/EleutherAI/pythia-6.9b/discussions diff --git a/assets/redpajama.yaml b/assets/redpajama.yaml deleted file mode 100644 index 0f1a51ba..00000000 --- a/assets/redpajama.yaml +++ /dev/null @@ -1,24 +0,0 @@ ---- - -- type: dataset - name: RedPajama-Data - organization: Together - description: The RedPajama base dataset is a 1.2 trillion token fully-open dataset - created by following the recipe described in the LLaMA paper - created_date: 2022-04-17 - url: https://www.together.xyz/blog/redpajama - datasheet: https://huggingface.co/datasets/togethercomputer/RedPajama-Data-1T - modality: text - size: 1.2 trillion tokens - sample: [] - analysis: '' - dependencies: [LLaMA dataset, GitHub, Wikipedia] - included: '' - excluded: '' - quality_control: '' - access: open - license: Apache 2.0 - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' diff --git a/assets/refinedweb.yaml b/assets/refinedweb.yaml deleted file mode 100644 index 89e584d2..00000000 --- a/assets/refinedweb.yaml +++ /dev/null @@ -1,25 +0,0 @@ ---- - -- type: dataset - name: RefinedWeb - organization: UAE Technology Innovation Institute - description: RefinedWeb is a high-quality five trillion tokens web-only English pretraining dataset. - created_date: 2023-06-01 - url: https://arxiv.org/pdf/2306.01116.pdf - datasheet: https://huggingface.co/datasets/tiiuae/falcon-refinedweb - modality: text - size: 600B tokens - sample: [] - analysis: '' - dependencies: [] - included: '' - excluded: '' - quality_control: '' - access: open - license: - value: unknown - explanation: License can be found at https://huggingface.co/datasets/tiiuae/falcon-refinedweb - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' diff --git a/assets/roentgen.yaml b/assets/roentgen.yaml deleted file mode 100644 index 31450cd2..00000000 --- a/assets/roentgen.yaml +++ /dev/null @@ -1,25 +0,0 @@ ---- - -- type: model - name: RoentGen - organization: Stanford University AIMI - description: RoentGen is a generative medical imaging model that can create visually - convincing X-ray images. - created_date: 2022-11-23 - url: https://arxiv.org/pdf/2211.12737.pdf - model_card: '' - modality: text input and image output - analysis: Evaluated on own framework that tests domain-specific tasks in medical - field. - size: 330M parameters - dependencies: [Stable Diffusion, RoentGen radiology dataset] - training_emissions: unknown - training_time: 60k training steps per day - training_hardware: 64 A100 GPUs - quality_control: '' - access: open - license: '' - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' diff --git a/assets/santacoder.yaml b/assets/santacoder.yaml deleted file mode 100644 index ddaf3596..00000000 --- a/assets/santacoder.yaml +++ /dev/null @@ -1,24 +0,0 @@ ---- - -- type: model - name: SantaCoder - organization: BigCode - description: Multilingual code model derived from findings of BigCode Project - analysis on Github stars' association to data quality. - created_date: 2023-02-24 - url: https://arxiv.org/pdf/2301.03988.pdf - model_card: '' - modality: code - analysis: Evaluated on MultiPL-E system benchmarks. - size: 1.1B parameters - dependencies: [The Stack, BigCode Dataset] - training_emissions: '' - training_time: 3.1 days - training_hardware: 96 Tesla V100 GPUs - quality_control: '' - access: open - license: Apache 2.0 - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' diff --git a/assets/shanghai.yaml b/assets/shanghai.yaml index b4a206a7..8dae5081 100644 --- a/assets/shanghai.yaml +++ b/assets/shanghai.yaml @@ -28,3 +28,26 @@ prohibited_uses: '' monitoring: '' feedback: '' + +- type: model + name: Lego-MT + organization: Shanghai AI Laboratory + description: Lego-MT is a multilingual large language model which uses a more + efficient approach of being an effective detachable model. + created_date: 2023-05-29 + url: https://arxiv.org/pdf/2212.10551.pdf + model_card: '' + modality: multilingual text + analysis: Evaluated based on own constructed dataset covering 433 languages. + size: 1.2B parameters + dependencies: [OPUS] + training_emissions: unknown + training_time: 15 days + training_hardware: 32 A100 GPUs + quality_control: '' + access: open + license: '' + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/goat.yaml b/assets/singapore.yaml similarity index 100% rename from assets/goat.yaml rename to assets/singapore.yaml diff --git a/assets/soda.yaml b/assets/soda.yaml deleted file mode 100644 index dd5991d7..00000000 --- a/assets/soda.yaml +++ /dev/null @@ -1,26 +0,0 @@ ---- - -- type: dataset - name: SODA - organization: Allen Institute of AI - description: SODA is the first publicly available, million-scale, high-quality - dialogue dataset covering a wide range of social interactions. - created_date: 2023-05-24 - url: https://arxiv.org/pdf/2212.10465.pdf - datasheet: https://huggingface.co/datasets/allenai/soda - modality: text - size: 1.5M dialogues - sample: [] - analysis: Randomly sampled dialogues from dataset are evaluated according to six - established criteria of natural flow, context dependence, topic consistency, - speaker consistency, specificity, and overall. - dependencies: [] - included: '' - excluded: '' - quality_control: '' - access: open - license: CC BY 4.0 - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' diff --git a/assets/stability.yaml b/assets/stability.yaml index fc209b52..da5b51be 100644 --- a/assets/stability.yaml +++ b/assets/stability.yaml @@ -29,3 +29,56 @@ prohibited_uses: '' monitoring: '' feedback: '' + +- type: model + name: DeepFloyd IF + organization: Stability AI + description: A text-to-image cascaded pixel diffusion model released in conjunction + with AI research lab DeepFloyd. + created_date: 2023-04-28 + url: https://stability.ai/blog/deepfloyd-if-text-to-image-model + model_card: https://huggingface.co/DeepFloyd/IF-I-XL-v1.0 + modality: textual input with image output + analysis: Evaluated on the COCO dataset. + size: 4.3B parameters + dependencies: [LAION-5B] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: open + license: + value: custom + explanation: License agreement can be found on model card https://huggingface.co/DeepFloyd/IF-I-XL-v1.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/DeepFloyd/IF-I-XL-v1.0/discussions + +- type: model + name: StableLM + organization: Stability AI + description: Large language models trained on up to 1.5 trillion tokens. + created_date: 2023-04-20 + url: https://github.com/Stability-AI/StableLM + model_card: '' + modality: natural language text + analysis: '' + size: 7B parameters + dependencies: + - StableLM-Alpha dataset + - Alpaca dataset + - gpt4all dataset + - ShareGPT52K dataset + - Dolly dataset + - HH dataset + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/stablelm.yaml b/assets/stablelm.yaml deleted file mode 100644 index d92fdf8c..00000000 --- a/assets/stablelm.yaml +++ /dev/null @@ -1,23 +0,0 @@ ---- - -- type: model - name: StableLM - organization: Stability AI - description: Large language models trained on up to 1.5 trillion tokens. - created_date: 2023-04-20 - url: https://github.com/Stability-AI/StableLM - model_card: '' - modality: natural language text - analysis: '' - size: 7B parameters - dependencies: [StableLM-Alpha dataset, Alpaca dataset, gpt4all dataset, ShareGPT52K dataset, Dolly dataset, HH dataset] - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: open - license: Apache 2.0 - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' diff --git a/assets/stack.yaml b/assets/stack.yaml deleted file mode 100644 index 416829f6..00000000 --- a/assets/stack.yaml +++ /dev/null @@ -1,26 +0,0 @@ ---- - -- type: dataset - name: The Stack - organization: BigCode - description: The Stack is a 3.1 TB dataset consisting of permissively licensed - source code inteded for use in creating code LLMs. - created_date: 2022-11-20 - url: https://arxiv.org/pdf/2211.15533.pdf - datasheet: https://huggingface.co/datasets/bigcode/the-stack - modality: code (358 programming languages) - size: 3.1 TB - sample: [] - analysis: Evaluated models trained on The Stack on HumanEval and MBPP and compared - against similarly-sized models. - dependencies: [GitHub] - included: '' - excluded: '' - quality_control: allowed users whose data were part of The Stack's training data - to opt-out - access: open - license: Apache 2.0 - intended_uses: creating code LLMs - prohibited_uses: '' - monitoring: '' - feedback: '' diff --git a/assets/stanford.yaml b/assets/stanford.yaml index 88454882..01d082d5 100644 --- a/assets/stanford.yaml +++ b/assets/stanford.yaml @@ -21,3 +21,53 @@ prohibited_uses: '' monitoring: '' feedback: '' + +- type: model + name: RoentGen + organization: Stanford + description: RoentGen is a generative medical imaging model that can create visually + convincing X-ray images. + created_date: 2022-11-23 + url: https://arxiv.org/pdf/2211.12737.pdf + model_card: '' + modality: text input and image output + analysis: Evaluated on own framework that tests domain-specific tasks in medical + field. + size: 330M parameters + dependencies: [Stable Diffusion, RoentGen radiology dataset] + training_emissions: unknown + training_time: 60k training steps per day + training_hardware: 64 A100 GPUs + quality_control: '' + access: open + license: '' + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: model + name: CORGI + organization: Stanford + description: Model trained to generate language corrections for physical control + tasks. + created_date: 2023-06-12 + url: https://arxiv.org/pdf/2306.07012.pdf + model_card: '' + modality: trajectory input with text output + analysis: Evaluated on three physical control tasks, drawing, steering, and human + body movement on various dynamics + size: 124M parameters + dependencies: [GPT-2, BABEL, text-davinci-003] + training_emissions: '' + training_time: + value: unknown + explanation: The authors do not report the training time, but do report that they train for 200 epochs. + training_hardware: one NVIDIA A40 GPU + quality_control: '' + access: open + license: MIT + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/starcoder.yaml b/assets/starcoder.yaml deleted file mode 100644 index 55f2dc24..00000000 --- a/assets/starcoder.yaml +++ /dev/null @@ -1,28 +0,0 @@ ---- - -- type: model - name: StarCoder - organization: BigCode - description: StarCoder is a Large Language Model for Code (Code LLM) trained on - permissively licensed data from GitHub, including from 80+ programming languages, - Git commits, GitHub issues, and Jupyter notebooks. - created_date: 2023-05-09 - url: https://arxiv.org/pdf/2305.06161.pdf - model_card: https://huggingface.co/bigcode/starcoder - modality: code (80+ programming languages) - analysis: Tested on several benchmarks, most notably Python benchmark HumanEval. - size: 15.5B parameters - dependencies: [The Stack] - training_emissions: '' - training_time: 24 days - training_hardware: 512 Tesla A100 GPU - quality_control: No specific quality control is mentioned in model training, though - details on data processing and how the tokenizer was trained are provided in - the paper. - access: open - license: Apache 2.0 - intended_uses: With a Tech Assistant prompt and not as an instruction model given - training limitations. - prohibited_uses: '' - monitoring: '' - feedback: https://huggingface.co/bigcode/starcoder/discussions diff --git a/assets/bark.yaml b/assets/suno.yaml similarity index 100% rename from assets/bark.yaml rename to assets/suno.yaml diff --git a/assets/together.yaml b/assets/together.yaml index 191dd5fc..2eb881ac 100644 --- a/assets/together.yaml +++ b/assets/together.yaml @@ -109,3 +109,26 @@ prohibited_uses: '' monitoring: '' feedback: '' + +- type: dataset + name: RedPajama-Data + organization: Together + description: The RedPajama base dataset is a 1.2 trillion token fully-open dataset + created by following the recipe described in the LLaMA paper + created_date: 2022-04-17 + url: https://www.together.xyz/blog/redpajama + datasheet: https://huggingface.co/datasets/togethercomputer/RedPajama-Data-1T + modality: text + size: 1.2 trillion tokens + sample: [] + analysis: '' + dependencies: [LLaMA dataset, GitHub, Wikipedia] + included: '' + excluded: '' + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/falcon.yaml b/assets/uae.yaml similarity index 55% rename from assets/falcon.yaml rename to assets/uae.yaml index b1fede02..f2295fc6 100644 --- a/assets/falcon.yaml +++ b/assets/uae.yaml @@ -23,3 +23,28 @@ assessment of risks and mitigation monitoring: None feedback: https://huggingface.co/tiiuae/falcon-40b/discussions + +- type: dataset + name: RefinedWeb + organization: UAE Technology Innovation Institute + description: RefinedWeb is a high-quality five trillion tokens web-only English + pretraining dataset. + created_date: 2023-06-01 + url: https://arxiv.org/pdf/2306.01116.pdf + datasheet: https://huggingface.co/datasets/tiiuae/falcon-refinedweb + modality: text + size: 600B tokens + sample: [] + analysis: '' + dependencies: [] + included: '' + excluded: '' + quality_control: '' + access: open + license: + value: unknown + explanation: License can be found at https://huggingface.co/datasets/tiiuae/falcon-refinedweb + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' \ No newline at end of file diff --git a/assets/unilm.yaml b/assets/unilm.yaml deleted file mode 100644 index 845df162..00000000 --- a/assets/unilm.yaml +++ /dev/null @@ -1,24 +0,0 @@ ---- - -- type: model - name: UniLM - organization: Microsoft - description: UniLM is a unified language model that can be fine-tuned for both - natural language understanding and generation tasks. - created_date: 2019-10-01 - url: https://proceedings.neurips.cc/paper_files/paper/2019/file/c20bb2d9a50d5ac1f713f8b34d9aac5a-Paper.pdf - model_card: '' - modality: text - analysis: Evaluated on GLUE, SQuAD 2.0, and CoQA benchmarks. - size: 340M parameters - dependencies: [] - training_emissions: unknown - training_time: 10,000 steps in 7 hours - training_hardware: 8 NVIDIA Tesla V100 32GB GPUs - quality_control: '' - access: open - license: MIT - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' diff --git a/assets/guanaco.yaml b/assets/uwashington.yaml similarity index 100% rename from assets/guanaco.yaml rename to assets/uwashington.yaml diff --git a/assets/voicebox.yaml b/assets/voicebox.yaml deleted file mode 100644 index 2f7a3579..00000000 --- a/assets/voicebox.yaml +++ /dev/null @@ -1,25 +0,0 @@ ---- - -- type: model - name: Voicebox - organization: Meta AI - description: Voicebox is the first generative AI model for speech to generalize - across tasks with state-of-the-art performance. - created_date: 2023-06-16 - url: https://research.facebook.com/publications/voicebox-text-guided-multilingual-universal-speech-generation-at-scale/ - model_card: '' - modality: text and audio input and audio output - analysis: Evaluated on zero-shot text-to-speech benchmarks, with Voicebox outperforming - the current state-of-the-art English model VALL-E. - size: 330M parameters - dependencies: [] - training_emissions: unknown - training_time: 750,000 iterations - training_hardware: 32 GPUs of unspecified type - quality_control: '' - access: closed - license: '' - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' diff --git a/assets/vqganclip.yaml b/assets/vqganclip.yaml deleted file mode 100644 index 59fb2959..00000000 --- a/assets/vqganclip.yaml +++ /dev/null @@ -1,25 +0,0 @@ ---- - -- type: model - name: VQGAN-CLIP - organization: EleutherAI - description: VQGAN-CLIP is a model that better generates and edits images using - a multimodal encoder to guide image generation. - created_date: 2022-09-04 - url: https://arxiv.org/pdf/2204.08583.pdf - model_card: '' - modality: text input with image output - analysis: Evaluated by human testers rating alignment of text input, image output - pairs. - size: 227M parameters - dependencies: [VQGAN, CLIP] - training_emissions: unknown - training_time: Less than 1 V100-hour - training_hardware: 1 NVIDIA Tesla K80 GPU - quality_control: '' - access: open - license: MIT - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' diff --git a/assets/wizardlm.yaml b/assets/wizardlm.yaml deleted file mode 100644 index 3f94d441..00000000 --- a/assets/wizardlm.yaml +++ /dev/null @@ -1,28 +0,0 @@ ---- - -- type: model - name: WizardLM - organization: Microsoft - description: Starting with an initial set of instructions, we use our proposed - Evol-Instruct to rewrite them step by step into more complex instructions. Then, - we mix all generated instruction data to fine-tune LLaMA. We call the resulting - model WizardLM. - created_date: 2023-04-24 - url: https://arxiv.org/pdf/2304.12244v1.pdf - model_card: https://huggingface.co/WizardLM/WizardLM-13B-1.0 - modality: natural language text - analysis: Reports results on standard LLM benchmarks in comparison to other LLMs - and test sets. - size: 7B parameters - dependencies: [LLaMA, Evol-Instruct, Alpaca dataset] - training_emissions: '' - training_time: 70 hours on 3 epochs - training_hardware: 8 V100 GPUs - quality_control: '' - access: open - license: Apache 2.0 - intended_uses: Creating large amounts of instruction data, particularly with high - complexity - prohibited_uses: '' - monitoring: '' - feedback: https://huggingface.co/datasets/WizardLM/evol_instruct_70k/discussions diff --git a/assets/writer.yaml b/assets/writer.yaml new file mode 100644 index 00000000..a56591b9 --- /dev/null +++ b/assets/writer.yaml @@ -0,0 +1,53 @@ +--- + +- type: model + name: Palmyra + organization: Writer + description: Palmyra is a privacy-first LLM for enterprises trained on business + and marketing writing. + created_date: + value: 2023-01-01 + explanation: The model was stated to be published in January, but which day + is not specified on the website. + url: https://gpt3demo.com/apps/palmyra + model_card: https://huggingface.co/Writer/palmyra-base + modality: text + analysis: Evaluated on the SuperGLUE benchmark + size: 128M (Small), 5B (Base), 20B (Large) parameters + dependencies: [Writer dataset] + training_emissions: unknown + training_time: unknown + training_hardware: '' + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: generating text from a prompt + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/Writer/palmyra-base/discussions + +- type: model + name: Camel + organization: Writer + description: Camel is an instruction-following large language model tailored for + advanced NLP and comprehension capabilities. + created_date: + value: 2023-04-01 + explanation: The model was stated to be published in April, but which day is + not specified on the website. + url: https://chatcamel.vercel.app/ + model_card: https://huggingface.co/Writer/camel-5b-hf + modality: text + analysis: '' + size: 5B parameters + dependencies: [Palmyra, Camel dataset] + training_emissions: unknown + training_time: unknown + training_hardware: '' + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/Writer/camel-5b-hf/discussions diff --git a/js/main.js b/js/main.js index bd6cdb54..5434dd3c 100644 --- a/js/main.js +++ b/js/main.js @@ -629,57 +629,30 @@ function loadAssetsAndRenderPageContent() { const paths = [ 'assets/adept.yaml', - 'assets/ares.yaml', + 'assets/writer.yaml', + 'assets/berkeley.yaml', + 'assets/faraday.yaml', 'assets/autogpt.yaml', - 'assets/bark.yaml', - 'assets/beit.yaml', - 'assets/bigtrans.yaml', - 'assets/biomedgpt.yaml', - 'assets/camel.yaml', - 'assets/cformers.yaml', + 'assets/suno.yaml', + 'assets/amazon.yaml', + 'assets/casia.yaml', + 'assets/lehigh.yaml', + 'assets/nolano.yaml', 'assets/chatglm.yaml', - 'assets/corgi.yaml', - 'assets/cosmo.yaml', - 'assets/deepfloyd.yaml', - 'assets/docugami.yaml', - 'assets/falcon.yaml', - 'assets/goat.yaml', - 'assets/gorilla.yaml', - 'assets/guanaco.yaml', - 'assets/h2ogpt.yaml', - 'assets/inflection-1.yaml', - 'assets/jointslm.yaml', - 'assets/koala.yaml', - 'assets/legomt.yaml', - 'assets/magma.yaml', - 'assets/mpt.yaml', - 'assets/multimodal.yaml', + 'assets/uae.yaml', + 'assets/singapore.yaml', + 'assets/uwashington.yaml', + 'assets/h2oai.yaml', + 'assets/inflection.yaml', + 'assets/mosaic.yaml', 'assets/nextdoor.yaml', - 'assets/openflamingo.yaml', - 'assets/openfold.yaml', - 'assets/openllama.yaml', - 'assets/otter.yaml', - 'assets/palm2.yaml', - 'assets/palmyra.yaml', - 'assets/peer.yaml', - 'assets/pi.yaml', + 'assets/columbia.yaml', + 'assets/nanyang.yaml', 'assets/portkey.yaml', - 'assets/pythia.yaml', - 'assets/redpajama.yaml', - 'assets/refinedweb.yaml', - 'assets/roentgen.yaml', - 'assets/santacoder.yaml', - 'assets/soda.yaml', - 'assets/stablelm.yaml', - 'assets/stack.yaml', - 'assets/starcoder.yaml', + 'assets/bigcode.yaml', 'assets/transformify.yaml', - 'assets/underwritegpt.yaml', - 'assets/unilm.yaml', - 'assets/vicuna.yaml', - 'assets/voicebox.yaml', - 'assets/vqganclip.yaml', - 'assets/wizardlm.yaml', + 'assets/paladin.yaml', + 'assets/lmsys.yaml', 'assets/ai2.yaml', 'assets/ai21.yaml', 'assets/aleph_alpha.yaml', From 31b43f6259db9ea47a4dee9fe1aae4fbcee4e604 Mon Sep 17 00:00:00 2001 From: jxue16 <105090474+jxue16@users.noreply.github.com> Date: Wed, 26 Jul 2023 17:49:20 -0700 Subject: [PATCH 05/10] spacing changes --- assets/berkeley.yaml | 2 +- assets/bigcode.yaml | 2 +- assets/eleutherai.yaml | 2 +- assets/inflection.yaml | 2 +- assets/stability.yaml | 84 ------------------------------------------ assets/uae.yaml | 2 +- 6 files changed, 5 insertions(+), 89 deletions(-) diff --git a/assets/berkeley.yaml b/assets/berkeley.yaml index 2b8d64de..5ee01b6e 100644 --- a/assets/berkeley.yaml +++ b/assets/berkeley.yaml @@ -69,4 +69,4 @@ intended_uses: '' prohibited_uses: '' monitoring: '' - feedback: '' \ No newline at end of file + feedback: '' diff --git a/assets/bigcode.yaml b/assets/bigcode.yaml index 8f3ababc..74bc5255 100644 --- a/assets/bigcode.yaml +++ b/assets/bigcode.yaml @@ -73,4 +73,4 @@ intended_uses: creating code LLMs prohibited_uses: '' monitoring: '' - feedback: '' \ No newline at end of file + feedback: '' diff --git a/assets/eleutherai.yaml b/assets/eleutherai.yaml index 945c27eb..524c8010 100644 --- a/assets/eleutherai.yaml +++ b/assets/eleutherai.yaml @@ -314,4 +314,4 @@ intended_uses: '' prohibited_uses: '' monitoring: '' - feedback: https://huggingface.co/EleutherAI/pythia-6.9b/discussions \ No newline at end of file + feedback: https://huggingface.co/EleutherAI/pythia-6.9b/discussions diff --git a/assets/inflection.yaml b/assets/inflection.yaml index 9d000476..5113a5f1 100644 --- a/assets/inflection.yaml +++ b/assets/inflection.yaml @@ -46,4 +46,4 @@ feedback: '' monthly_active_users: '' user_distribution: '' - failures: '' \ No newline at end of file + failures: '' diff --git a/assets/stability.yaml b/assets/stability.yaml index da5b51be..e69de29b 100644 --- a/assets/stability.yaml +++ b/assets/stability.yaml @@ -1,84 +0,0 @@ ---- - -- type: model - name: Stable Diffusion - organization: Stability AI - description: Stable Diffusion is an open source text-to-image model - created_date: - value: 2022-08-22 - explanation: Date the model was made publicly available - url: https://github.com/CompVis/stable-diffusion - model_card: https://huggingface.co/CompVis/stable-diffusion-v1-4?text=A+mecha+robot+in+a+favela+in+expressionist+style - modality: text and image - size: 890M parameters - analysis: '' - dependencies: [LAION-5B] - training_emissions: '' - training_time: 25 days according to Emad Mostaque (CEO of Stability) on [[Twitter]](https://twitter.com/emostaque/status/1563870674111832066) - training_hardware: 256 A100 GPUs according to Emad Mostaque (CEO of Stability) - on [[Twitter]](https://twitter.com/emostaque/status/1563870674111832066) - quality_control: '' - access: - value: open - explanation: Model weights are available for download from the [[Github repo]](https://github.com/CompVis/stable-diffusion) - license: - value: CreativeML Open RAIL-M - explanation: > - [[CreativeML Open RAIL-M]](https://github.com/CompVis/stable-diffusion/blob/main/LICENSE) - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' - -- type: model - name: DeepFloyd IF - organization: Stability AI - description: A text-to-image cascaded pixel diffusion model released in conjunction - with AI research lab DeepFloyd. - created_date: 2023-04-28 - url: https://stability.ai/blog/deepfloyd-if-text-to-image-model - model_card: https://huggingface.co/DeepFloyd/IF-I-XL-v1.0 - modality: textual input with image output - analysis: Evaluated on the COCO dataset. - size: 4.3B parameters - dependencies: [LAION-5B] - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: open - license: - value: custom - explanation: License agreement can be found on model card https://huggingface.co/DeepFloyd/IF-I-XL-v1.0 - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: https://huggingface.co/DeepFloyd/IF-I-XL-v1.0/discussions - -- type: model - name: StableLM - organization: Stability AI - description: Large language models trained on up to 1.5 trillion tokens. - created_date: 2023-04-20 - url: https://github.com/Stability-AI/StableLM - model_card: '' - modality: natural language text - analysis: '' - size: 7B parameters - dependencies: - - StableLM-Alpha dataset - - Alpaca dataset - - gpt4all dataset - - ShareGPT52K dataset - - Dolly dataset - - HH dataset - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: open - license: Apache 2.0 - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' diff --git a/assets/uae.yaml b/assets/uae.yaml index f2295fc6..127d82d0 100644 --- a/assets/uae.yaml +++ b/assets/uae.yaml @@ -47,4 +47,4 @@ intended_uses: '' prohibited_uses: '' monitoring: '' - feedback: '' \ No newline at end of file + feedback: '' From 2bcb75e7398e89d2de0fe7a9ab36ed2833eec9a6 Mon Sep 17 00:00:00 2001 From: jxue16 <105090474+jxue16@users.noreply.github.com> Date: Wed, 26 Jul 2023 17:59:18 -0700 Subject: [PATCH 06/10] fix stability --- assets/stability.yaml | 49 +++++++++++++++++++++++++++++++++++++++++++ assets/stanford.yaml | 5 +++-- 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/assets/stability.yaml b/assets/stability.yaml index e69de29b..3bda315c 100644 --- a/assets/stability.yaml +++ b/assets/stability.yaml @@ -0,0 +1,49 @@ +--- + +- type: model + name: DeepFloyd IF + organization: Stability AI + description: A text-to-image cascaded pixel diffusion model released in conjunction + with AI research lab DeepFloyd. + created_date: 2023-04-28 + url: https://stability.ai/blog/deepfloyd-if-text-to-image-model + model_card: https://huggingface.co/DeepFloyd/IF-I-XL-v1.0 + modality: textual input with image output + analysis: Evaluated on the COCO dataset. + size: 4.3B parameters + dependencies: [LAION-5B] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: open + license: + value: custom + explanation: License agreement can be found on model card https://huggingface.co/DeepFloyd/IF-I-XL-v1.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/DeepFloyd/IF-I-XL-v1.0/discussions + + +- type: model + name: StableLM + organization: Stability AI + description: Large language models trained on up to 1.5 trillion tokens. + created_date: 2023-04-20 + url: https://github.com/Stability-AI/StableLM + model_card: '' + modality: natural language text + analysis: '' + size: 7B parameters + dependencies: [StableLM-Alpha dataset, Alpaca dataset, gpt4all dataset, ShareGPT52K dataset, Dolly dataset, HH dataset] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/stanford.yaml b/assets/stanford.yaml index 01d082d5..f54d0d1e 100644 --- a/assets/stanford.yaml +++ b/assets/stanford.yaml @@ -60,9 +60,10 @@ size: 124M parameters dependencies: [GPT-2, BABEL, text-davinci-003] training_emissions: '' - training_time: + training_time: value: unknown - explanation: The authors do not report the training time, but do report that they train for 200 epochs. + explanation: The authors do not report the training time, but do report that + they train for 200 epochs. training_hardware: one NVIDIA A40 GPU quality_control: '' access: open From be3b61f7e09699d92dfb7c234e329b4484343bbd Mon Sep 17 00:00:00 2001 From: jxue16 <105090474+jxue16@users.noreply.github.com> Date: Thu, 27 Jul 2023 20:42:55 -0700 Subject: [PATCH 07/10] fix based on review --- assets/berkeley.yaml | 2 +- assets/bigcode.yaml | 8 ++++---- assets/chatglm.yaml | 2 +- assets/mosaic.yaml | 2 +- assets/together.yaml | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/assets/berkeley.yaml b/assets/berkeley.yaml index 5ee01b6e..e8bab3a0 100644 --- a/assets/berkeley.yaml +++ b/assets/berkeley.yaml @@ -59,7 +59,7 @@ modality: text analysis: Evaluated on wide range of tasks using own evaluation benchmarks. size: 17B parameters - dependencies: [LLaMA, RedPajama] + dependencies: [RedPajama] training_emissions: unknown training_time: unknown training_hardware: '' diff --git a/assets/bigcode.yaml b/assets/bigcode.yaml index 74bc5255..4a5362ea 100644 --- a/assets/bigcode.yaml +++ b/assets/bigcode.yaml @@ -13,9 +13,9 @@ analysis: Tested on several benchmarks, most notably Python benchmark HumanEval. size: 15.5B parameters dependencies: [The Stack] - training_emissions: '' - training_time: 24 days - training_hardware: 512 Tesla A100 GPU + training_emissions: 16.68 tons of CO2eq + training_time: 2 days + training_hardware: 64 NVIDIA A100 GPUs quality_control: No specific quality control is mentioned in model training, though details on data processing and how the tokenizer was trained are provided in the paper. @@ -41,7 +41,7 @@ dependencies: [The Stack, BigCode Dataset] training_emissions: '' training_time: 3.1 days - training_hardware: 96 Tesla V100 GPUs + training_hardware: 96 NVIDIA Tesla V100 GPUs quality_control: '' access: open license: Apache 2.0 diff --git a/assets/chatglm.yaml b/assets/chatglm.yaml index f41b3a75..5bb6d82e 100644 --- a/assets/chatglm.yaml +++ b/assets/chatglm.yaml @@ -11,7 +11,7 @@ modality: text analysis: Performance evaluated on English and Chinese language benchmark tests. size: 6B parameters - dependencies: [GLM-130B] + dependencies: [] training_emissions: unknown training_time: unknown training_hardware: '' diff --git a/assets/mosaic.yaml b/assets/mosaic.yaml index 5d97e67e..a887f3b4 100644 --- a/assets/mosaic.yaml +++ b/assets/mosaic.yaml @@ -11,7 +11,7 @@ modality: text analysis: Evaluated on a range of benchmarks and performed on par with LLaMA-7B. size: 7B parameters - dependencies: [] + dependencies: [RedPajama-Data, C4, The Stack, Multimodal C4] training_emissions: unknown training_time: 9.5 days training_hardware: 440 A100 40GB GPUs diff --git a/assets/together.yaml b/assets/together.yaml index 2eb881ac..2bb3cb97 100644 --- a/assets/together.yaml +++ b/assets/together.yaml @@ -122,7 +122,7 @@ size: 1.2 trillion tokens sample: [] analysis: '' - dependencies: [LLaMA dataset, GitHub, Wikipedia] + dependencies: [GitHub, Wikipedia] included: '' excluded: '' quality_control: '' From 747e4ec33a492de8f0d8d5432b78a60f3c71c353 Mon Sep 17 00:00:00 2001 From: jxue16 <105090474+jxue16@users.noreply.github.com> Date: Sun, 30 Jul 2023 15:18:59 -0700 Subject: [PATCH 08/10] add recent assets --- assets/amazon.yaml | 8 ++++++- assets/google.yaml | 28 +++++++++++++++++++++- assets/meta.yaml | 32 ++++++++++++++++++++++++- assets/stability.yaml | 55 ++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 119 insertions(+), 4 deletions(-) diff --git a/assets/amazon.yaml b/assets/amazon.yaml index 152e0389..94a67a4a 100644 --- a/assets/amazon.yaml +++ b/assets/amazon.yaml @@ -9,7 +9,13 @@ access for all builders. using an API. created_date: 2023-04-13 url: https://aws.amazon.com/bedrock/ - dependencies: [Jurassic-2, Claude, Stable Diffusion, Amazon Titan] + dependencies: + - Jurassic-2 + - Claude + - Stable Diffusion + - Amazon Titan + - Claude 2 + - Cohere Command adaptation: '' output_space: foundation models made accessible via an API quality_control: '' diff --git a/assets/google.yaml b/assets/google.yaml index 4103f04b..fb56a8dc 100644 --- a/assets/google.yaml +++ b/assets/google.yaml @@ -615,7 +615,7 @@ training_time: unknown training_hardware: 128 TPU-v4 quality_control: '' - access: closed + access: open license: value: unknown explanation: > @@ -770,6 +770,32 @@ monitoring: '' feedback: '' +- type: model + name: Med-PaLM Multimodal + organization: Google + description: '' + created_date: 2023-07-26 + url: https://arxiv.org/pdf/2307.14334.pdf + model_card: '' + modality: text, image, and genomics input with text output + analysis: Evaluated on MultiMedBench tasks and radiologist evaluations of model-generated + chest X-ray reports + size: 562B parameters (dense) + dependencies: [PaLM-E, MultiMedBench] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: closed + license: + value: unknown + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + - type: model name: MultiMedQA organization: Google diff --git a/assets/meta.yaml b/assets/meta.yaml index a847abd6..c44d7474 100644 --- a/assets/meta.yaml +++ b/assets/meta.yaml @@ -369,7 +369,7 @@ Authors do not mention or provide a feedback mechanism. - type: model - name: LLaMa + name: LLaMA organization: Meta description: '' created_date: 2023-02-24 @@ -397,6 +397,36 @@ monitoring: '' feedback: '' +- type: model + name: LLaMA 2 + organization: Meta + description: LLaMA 2 is an updated version of LLaMA trained on a new mix of publicly + available data. + created_date: 2023-07-18 + url: https://ai.meta.com/resources/models-and-libraries/llama/ + model_card: Can be found at appendix of paper at https://ai.meta.com/research/publications/llama-2-open-foundation-and-fine-tuned-chat-models/ + modality: text + analysis: Evaluated on standard academic benchmarks and internal Meta libraries. + size: 70B parameters (dense model) + dependencies: [] + training_emissions: 539 tCO2eq + training_time: '' + training_hardware: NVIDIA A100-80GB GPUs (TDP of 350-400W) + quality_control: '' + access: open + license: + value: custom + explanation: The license can be found at https://ai.meta.com/resources/models-and-libraries/llama-downloads/ + intended_uses: LLaMA 2 is intended for commercial and research use in English. + Tuned models are intended for assistant-like chat, whereas pretrained models + can be adapted for a variety of natural language generation tasks. + prohibited_uses: Use in any manner that violates applicable laws or regulations + (including trade compliance laws). Use in languages other than English. Use + in any other way that is prohibited by the Acceptable Use Policy and Licensing + Agreement for LLaMA 2. + monitoring: '' + feedback: '' + - type: model name: OPT-IML organization: Meta diff --git a/assets/stability.yaml b/assets/stability.yaml index 3bda315c..8c572529 100644 --- a/assets/stability.yaml +++ b/assets/stability.yaml @@ -36,7 +36,13 @@ modality: natural language text analysis: '' size: 7B parameters - dependencies: [StableLM-Alpha dataset, Alpaca dataset, gpt4all dataset, ShareGPT52K dataset, Dolly dataset, HH dataset] + dependencies: + - StableLM-Alpha dataset + - Alpaca dataset + - gpt4all dataset + - ShareGPT52K dataset + - Dolly dataset + - HH dataset training_emissions: '' training_time: '' training_hardware: '' @@ -47,3 +53,50 @@ prohibited_uses: '' monitoring: '' feedback: '' + + +- type: application + name: Stable Diffusion + organization: Stability AI + description: Stable Diffusion is a generative software that creates images from + text prompts. + created_date: 2022-08-22 + url: https://stability.ai/blog/stable-diffusion-public-release + dependencies: [] + adaptation: '' + output_space: image + quality_control: '' + access: open + license: + value: custom + explanation: License agreement can be found on https://github.com/CompVis/stable-diffusion/blob/main/LICENSE + terms_of_service: '' + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/CompVis/stable-diffusion/discussions + monthly_active_users: '' + user_distribution: '' + failures: '' + +- type: application + name: Stable Diffusion XL + organization: Stability AI + description: Stable Diffusion XL is an updated version of Stable Diffusion, and + creates descriptive images with shorter prompts and generate words within images. + created_date: 2023-07-26 + url: https://stability.ai/stablediffusion + dependencies: [] + adaptation: '' + output_space: image + quality_control: '' + access: limited + license: MIT + terms_of_service: '' + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' + user_distribution: '' + failures: '' From 5d19360ee3129f3123fc3586081df8fb34df674d Mon Sep 17 00:00:00 2001 From: jxue16 <105090474+jxue16@users.noreply.github.com> Date: Sun, 30 Jul 2023 15:26:23 -0700 Subject: [PATCH 09/10] standardize features --- assets/adept.yaml | 44 +- assets/ai2.yaml | 130 +- assets/ai21.yaml | 472 +++---- assets/aleph_alpha.yaml | 180 ++- assets/alibaba.yaml | 69 +- assets/amazon.yaml | 35 +- assets/anthropic.yaml | 790 +++++------ assets/argonne.yaml | 37 +- assets/assembly.yaml | 167 ++- assets/autogpt.yaml | 31 +- assets/baai.yaml | 95 +- assets/baidu.yaml | 137 +- assets/bain.yaml | 31 +- assets/be_my_eyes.yaml | 31 +- assets/berkeley.yaml | 111 +- assets/bigcode.yaml | 115 +- assets/bigscience.yaml | 318 +++-- assets/bloomberg.yaml | 225 ++- assets/brex.yaml | 38 +- assets/casia.yaml | 39 +- assets/cerebras.yaml | 121 +- assets/chatglm.yaml | 37 +- assets/cmu.yaml | 53 +- assets/cohere.yaml | 937 +++++-------- assets/columbia.yaml | 35 +- assets/databricks.yaml | 103 +- assets/deepmind.yaml | 1202 +++++++--------- assets/duckduckgo.yaml | 34 +- assets/duolingo.yaml | 110 +- assets/eleutherai.yaml | 460 +++---- assets/ens.yaml | 82 +- assets/faraday.yaml | 35 +- assets/google.yaml | 2828 ++++++++++++++++++-------------------- assets/h2oai.yaml | 37 +- assets/hubspot.yaml | 34 +- assets/huggingface.yaml | 45 +- assets/inflection.yaml | 68 +- assets/instacart.yaml | 31 +- assets/juni.yaml | 29 +- assets/kakaobrain.yaml | 93 +- assets/khan.yaml | 34 +- assets/laion.yaml | 212 ++- assets/latitude.yaml | 58 +- assets/lehigh.yaml | 43 +- assets/linkedin.yaml | 31 +- assets/lmsys.yaml | 37 +- assets/meta.yaml | 1021 +++++++------- assets/microsoft.yaml | 1204 ++++++++-------- assets/mosaic.yaml | 37 +- assets/nanyang.yaml | 41 +- assets/naver.yaml | 47 +- assets/neeva.yaml | 114 +- assets/nextdoor.yaml | 31 +- assets/nolano.yaml | 31 +- assets/notion.yaml | 31 +- assets/nvidia.yaml | 147 +- assets/openai.yaml | 2355 ++++++++++++++----------------- assets/othersideai.yaml | 57 +- assets/oxford.yaml | 191 ++- assets/paladin.yaml | 29 +- assets/perplexity.yaml | 68 +- assets/portkey.yaml | 31 +- assets/quizlet.yaml | 33 +- assets/quora.yaml | 37 +- assets/robin.yaml | 33 +- assets/salesforce.yaml | 143 +- assets/sana.yaml | 77 +- assets/shanghai.yaml | 72 +- assets/shop.yaml | 31 +- assets/singapore.yaml | 41 +- assets/snap.yaml | 38 +- assets/speak.yaml | 31 +- assets/spotify.yaml | 41 +- assets/stability.yaml | 101 -- assets/stanford.yaml | 113 +- assets/stonybrook.yaml | 95 +- assets/suno.yaml | 35 +- assets/together.yaml | 196 +-- assets/transformify.yaml | 31 +- assets/trevor.yaml | 70 +- assets/tsinghua.yaml | 217 +-- assets/uae.yaml | 74 +- assets/uw.yaml | 31 +- assets/uwashington.yaml | 39 +- assets/viable.yaml | 52 +- assets/writer.yaml | 80 +- assets/yandex.yaml | 92 +- assets/you.yaml | 100 +- 88 files changed, 7922 insertions(+), 9300 deletions(-) diff --git a/assets/adept.yaml b/assets/adept.yaml index 1db1c261..ecb2b1a7 100644 --- a/assets/adept.yaml +++ b/assets/adept.yaml @@ -1,30 +1,30 @@ --- - -- type: model - name: ACT-1 - organization: Adept +- access: closed + analysis: '' + created_date: + explanation: The date the model was announced in the [[Adept blog post]](https://www.adept.ai/blog/act-1). + value: 2022-09-14 + dependencies: [] description: ACT-1 (ACtion Transformer) is a large-scale transformer model designed and trained specifically for taking actions on computers (use software tools, APIs and websites) in response to the user's natural language commands. - created_date: - value: 2022-09-14 - explanation: The date the model was announced in the [[Adept blog post]](https://www.adept.ai/blog/act-1). - url: https://www.adept.ai/blog/act-1 + feedback: '' + intended_uses: '' + license: + explanation: "The asset isn't released, and hence the license is unknown.\n" + value: unknown + modality: + explanation: text + value: text; text model_card: none - modality: text - analysis: '' + monitoring: '' + name: ACT-1 + organization: Adept + prohibited_uses: '' + quality_control: '' size: '' - dependencies: [] training_emissions: unknown - training_time: unknown training_hardware: unknown - quality_control: '' - access: closed - license: - value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' + training_time: unknown + type: model + url: https://www.adept.ai/blog/act-1 diff --git a/assets/ai2.yaml b/assets/ai2.yaml index 2c890050..300e1b3a 100644 --- a/assets/ai2.yaml +++ b/assets/ai2.yaml @@ -1,95 +1,95 @@ --- - -- type: dataset - name: NaturalInstructions-v2 - organization: AI2 - description: '' +- access: open + analysis: '' created_date: 2022-04-16 - url: https://arxiv.org/abs/2204.07705 datasheet: '' - modality: text - size: 1600 tasks - sample: [] - analysis: '' dependencies: [] - included: '' + description: '' excluded: '' - quality_control: '' - access: open - license: Apache 2.0 + feedback: '' + included: '' intended_uses: '' - prohibited_uses: '' + license: Apache 2.0 + modality: text monitoring: '' - feedback: '' - -- type: dataset - name: SODA + name: NaturalInstructions-v2 organization: AI2 - description: SODA is the first publicly available, million-scale, high-quality - dialogue dataset covering a wide range of social interactions. - created_date: 2023-05-24 - url: https://arxiv.org/pdf/2212.10465.pdf - datasheet: https://huggingface.co/datasets/allenai/soda - modality: text - size: 1.5M dialogues + prohibited_uses: '' + quality_control: '' sample: [] + size: 1600 tasks + type: dataset + url: https://arxiv.org/abs/2204.07705 +- access: open analysis: Randomly sampled dialogues from dataset are evaluated according to six established criteria of natural flow, context dependence, topic consistency, speaker consistency, specificity, and overall. + created_date: 2023-05-24 + datasheet: https://huggingface.co/datasets/allenai/soda dependencies: [] - included: '' + description: SODA is the first publicly available, million-scale, high-quality + dialogue dataset covering a wide range of social interactions. excluded: '' - quality_control: '' - access: open - license: CC BY 4.0 + feedback: '' + included: '' intended_uses: '' - prohibited_uses: '' + license: CC BY 4.0 + modality: text monitoring: '' - feedback: '' - -- type: dataset - name: Multimodal C4 + name: SODA organization: AI2 - description: An augmentation of C4 with images added and made openly available. - created_date: 2023-06-09 - url: https://arxiv.org/pdf/2304.06939.pdf - datasheet: '' - modality: text and images - size: 43B English tokens with 101.2M documents and 571M images + prohibited_uses: '' + quality_control: '' sample: [] + size: 1.5M dialogues + type: dataset + url: https://arxiv.org/pdf/2212.10465.pdf +- access: open analysis: Conducted experiments on models trained with Multimodal C4 in comparison to models trained on single image/caption datasets + created_date: 2023-06-09 + datasheet: '' dependencies: [C4] - included: '' + description: An augmentation of C4 with images added and made openly available. excluded: '' - quality_control: '' - access: open - license: MIT + feedback: '' + included: '' intended_uses: '' - prohibited_uses: '' + license: MIT + modality: + explanation: text and images + value: image, text monitoring: '' - feedback: '' - -- type: model - name: COSMO + name: Multimodal C4 organization: AI2 - description: COSMO is a conversation agent with greater generalizability on both - in- and out-of-domain chitchat datasets - created_date: 2023-05-24 - url: https://arxiv.org/pdf/2212.10465.pdf - model_card: https://huggingface.co/allenai/cosmo-xl - modality: text + prohibited_uses: '' + quality_control: '' + sample: [] + size: 43B English tokens with 101.2M documents and 571M images + type: dataset + url: https://arxiv.org/pdf/2304.06939.pdf +- access: open analysis: Evaluated by human testers on generalization capabilities and responses compared to other chatbots. - size: 11B parameters + created_date: 2023-05-24 dependencies: [SODA, ProsocialDialog, T5] - training_emissions: unknown - training_time: unknown - training_hardware: v3-128 TPU accelerators with batch size 256 - quality_control: '' - access: open - license: '' + description: COSMO is a conversation agent with greater generalizability on both + in- and out-of-domain chitchat datasets + feedback: https://huggingface.co/allenai/cosmo-xl/discussions intended_uses: '' - prohibited_uses: '' + license: '' + modality: + explanation: text + value: text; text + model_card: https://huggingface.co/allenai/cosmo-xl monitoring: '' - feedback: https://huggingface.co/allenai/cosmo-xl/discussions + name: COSMO + organization: AI2 + prohibited_uses: '' + quality_control: '' + size: 11B parameters (dense) + training_emissions: unknown + training_hardware: v3-128 TPU accelerators with batch size 256 + training_time: unknown + type: model + url: https://arxiv.org/pdf/2212.10465.pdf diff --git a/assets/ai21.yaml b/assets/ai21.yaml index 2da4e56c..4a794e68 100644 --- a/assets/ai21.yaml +++ b/assets/ai21.yaml @@ -1,365 +1,319 @@ --- - -# Datasets -- type: dataset - name: Jurassic-1 dataset - # General - organization: AI21 Labs - description: > - The dataset used to train the Jurassic-1 models, based on publicly available - data. +- access: closed + analysis: + explanation: No analysis is provided in the paper. + value: none created_date: - value: 2021-08-11 explanation: The date the white paper was made public. - url: https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf + value: 2021-08-11 datasheet: none - modality: text - size: 300B tokens - sample: [] - analysis: - value: none - explanation: No analysis is provided in the paper. - # Construction dependencies: [] + description: "The dataset used to train the Jurassic-1 models, based on publicly\ + \ available data.\n" + excluded: unknown + feedback: none + included: unknown + intended_uses: unknown license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - included: unknown - excluded: unknown - quality_control: + modality: text + monitoring: none + name: Jurassic-1 dataset + organization: AI21 Labs + prohibited_uses: + explanation: No uses are prohibited in the paper. value: none + quality_control: explanation: No quality control is provided in the paper. - # Downstream - access: closed - intended_uses: unknown - prohibited_uses: value: none - explanation: No uses are prohibited in the paper. - monitoring: none - feedback: none - -- type: dataset - name: Jurassic-1 Instruct dataset - # General - organization: AI21 Labs - description: > - The dataset used to instruction-tune the Jurassic-1 Instruct models. + sample: [] + size: 300B tokens + type: dataset + url: https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf +- access: closed + analysis: unknown created_date: + explanation: "The date the model was [announced](https://docs.ai21.com/changelog/jurassic-1-instruct-is-now-available).\n" value: 2022-12-01 - explanation: > - The date the model was [announced](https://docs.ai21.com/changelog/jurassic-1-instruct-is-now-available). - url: none datasheet: none - modality: text - size: unknown - sample: [] - analysis: unknown - # Construction dependencies: [] - license: - value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - included: unknown + description: "The dataset used to instruction-tune the Jurassic-1 Instruct models.\n" excluded: unknown - quality_control: unknown - # Downstream - access: closed + feedback: none + included: unknown intended_uses: unknown - prohibited_uses: unknown + license: + explanation: "The asset isn't released, and hence the license is unknown.\n" + value: unknown + modality: text monitoring: none - feedback: none - -# Models -- type: model - name: Jurassic-1 + name: Jurassic-1 Instruct dataset organization: AI21 Labs - description: > - Jurassic-1 is a family of autoregressive language models (Large, Grande, Jumbo). - created_date: - value: 2021-08-11 - explanation: > - The date the white paper was made public. - url: https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf - model_card: none - modality: text (English) - size: - value: 178B parameters (dense model) - explanation: The model sizes are 7.5B (Large), 17B (Grande), and 178B (Jumbo). + prohibited_uses: unknown + quality_control: unknown + sample: [] + size: unknown + type: dataset + url: none +- access: + explanation: Jurassic-1 is available through AI21's paid API. + value: limited analysis: + explanation: Results are reported in the [the paper](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf) value: Evaluated on several standard benchmarks (e.g. ARC, BoolQ, HellaSwag, RTE, Winogrande) - explanation: Results are reported in the [the paper](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf) + created_date: + explanation: "The date the white paper was made public.\n" + value: 2021-08-11 dependencies: [Jurassic-1 dataset] - training_emissions: unknown - training_time: - value: Several months - explanation: Training details from [the paper](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf) - training_hardware: - value: Over 800 [A100] GPUs - explanation: Training details from [the paper](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf) - quality_control: none - access: - value: limited - explanation: Jurassic-1 is available through AI21's paid API. + description: "Jurassic-1 is a family of autoregressive language models (Large,\ + \ Grande, Jumbo).\n" + feedback: none + intended_uses: unknown license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: unknown + modality: + explanation: text (English) + value: text; text + model_card: none + monitoring: none + name: Jurassic-1 + organization: AI21 Labs prohibited_uses: + explanation: https://docs.ai21.com/docs/responsible-use value: Illegal activities, such as hate speech, gambling, child pornography or violating intellectual property rights; Harassment, victimization, intimidation, fraud or spam; Creation or dissemination of misinformation, promotion of self-harm, glorification of violent events or incitement of violence. - explanation: https://docs.ai21.com/docs/responsible-use - monitoring: none - feedback: none - -- type: model - name: Jurassic-1 Instruct - organization: AI21 Labs - description: > - Jurassic-1 Instruct is an instruction-tuned autoregressive language model. + quality_control: none + size: 178B parameters (dense) + training_emissions: unknown + training_hardware: + explanation: Training details from [the paper](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf) + value: Over 800 [A100] GPUs + training_time: + explanation: Training details from [the paper](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf) + value: Several months + type: model + url: https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf +- access: + explanation: Jurassic-1 Instruct is available through AI21's paid API. + value: limited + analysis: none created_date: + explanation: "The date the model was [announced](https://docs.ai21.com/changelog/jurassic-1-instruct-is-now-available).\n" value: 2022-12-01 - explanation: > - The date the model was [announced](https://docs.ai21.com/changelog/jurassic-1-instruct-is-now-available). - url: https://docs.ai21.com/docs/jurassic-1-instruct-beta - model_card: none - modality: text (English) - size: 17B parameters (dense model) - analysis: none dependencies: [Jurassic-1, Jurassic-1 Instruct dataset] - training_emissions: unknown - training_time: unknown - training_hardware: unknown - quality_control: unknown - access: - value: limited - explanation: Jurassic-1 Instruct is available through AI21's paid API. - license: - value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. + description: "Jurassic-1 Instruct is an instruction-tuned autoregressive language\ + \ model.\n" + feedback: none intended_uses: Jurassic-1 Instruct was trained specifically to handle instructions-only prompts ("zero-shot") without examples ("few-shot"). It is the most natural way to interact with language models, and it is the best way to get a sense of the optimal output for your task without any examples. + license: + explanation: "The asset isn't released, and hence the license is unknown.\n" + value: unknown + modality: + explanation: text (English) + value: text; text + model_card: none + monitoring: none + name: Jurassic-1 Instruct + organization: AI21 Labs prohibited_uses: + explanation: https://docs.ai21.com/docs/responsible-use value: Illegal activities, such as hate speech, gambling, child pornography or violating intellectual property rights; Harassment, victimization, intimidation, fraud or spam; Creation or dissemination of misinformation, promotion of self-harm, glorification of violent events or incitement of violence. - explanation: https://docs.ai21.com/docs/responsible-use - monitoring: none - feedback: none - -- type: model - name: Jurassic-2 - organization: AI21 Labs - description: > - Jurassic-2 a family of language model designed to replace Jurassic-1. - created_date: 2023-03-09 - url: https://docs.ai21.com/docs/jurassic-2-models - model_card: none - modality: text (English) - size: unknown - analysis: The model was evaluated on the HELM benchmark as discussed in https://www.ai21.com/blog/introducing-j2. - dependencies: [] + quality_control: unknown + size: 17B parameters (dense) training_emissions: unknown - training_time: unknown training_hardware: unknown - quality_control: unknown - access: - value: limited + training_time: unknown + type: model + url: https://docs.ai21.com/docs/jurassic-1-instruct-beta +- access: explanation: Jurassic-2 is available through AI21's paid API. + value: limited + analysis: The model was evaluated on the HELM benchmark as discussed in https://www.ai21.com/blog/introducing-j2. + created_date: 2023-03-09 + dependencies: [] + description: "Jurassic-2 a family of language model designed to replace Jurassic-1.\n" + feedback: none + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' + modality: + explanation: text (English) + value: text; text + model_card: none + monitoring: unknown + name: Jurassic-2 + organization: AI21 Labs prohibited_uses: + explanation: https://docs.ai21.com/docs/responsible-use value: Illegal activities, such as hate speech, gambling, child pornography or violating intellectual property rights; Harassment, victimization, intimidation, fraud or spam; Creation or dissemination of misinformation, promotion of self-harm, glorification of violent events or incitement of violence. - explanation: https://docs.ai21.com/docs/responsible-use - monitoring: unknown - feedback: none - -# Applications -- type: application - name: AI21 Playground - # General - organization: AI21 Labs - description: The AI21 Labs Playground supports several task-specific APIs in addition - to a variety of models. + quality_control: unknown + size: unknown + training_emissions: unknown + training_hardware: unknown + training_time: unknown + type: model + url: https://docs.ai21.com/docs/jurassic-2-models +- access: limited + adaptation: The Playground provides direct access to the language models (Complete + API) as well as wrapped for Rewrite and Summarize. created_date: - value: 2021-08-11 explanation: The date Jurassic-1 was made public. - url: https://studio.ai21.com/playground/ - # Construction + value: 2021-08-11 dependencies: - Jurassic-1 - Jurassic-1 Instruct - Jurassic-2 - AI21 Summarization API - AI21 Paraphrase API - adaptation: The Playground provides direct access to the language models (Complete - API) as well as wrapped for Rewrite and Summarize. + description: The AI21 Labs Playground supports several task-specific APIs in addition + to a variety of models. + failures: unknown + feedback: Feedback can be given by emailing at info at ai21.com + intended_uses: The intended uses are text completion, rewriting, and summarization. + license: + explanation: No license was found. + value: none + monitoring: unknown + monthly_active_users: unknown + name: AI21 Playground + organization: AI21 Labs output_space: - value: text explanation: The Playground supports different use cases that all involve text generation. - quality_control: unknown - # Downstream - access: limited - terms_of_service: https://www.ai21.com/terms-of-use - license: - value: none - explanation: No license was found. - intended_uses: The intended uses are text completion, rewriting, and summarization. + value: text prohibited_uses: + explanation: https://docs.ai21.com/docs/responsible-use value: Illegal activities, such as hate speech, gambling, child pornography or violating intellectual property rights; Harassment, victimization, intimidation, fraud or spam; Creation or dissemination of misinformation, promotion of self-harm, glorification of violent events or incitement of violence. - explanation: https://docs.ai21.com/docs/responsible-use - monitoring: unknown - feedback: Feedback can be given by emailing at info at ai21.com - # Deployment - monthly_active_users: unknown + quality_control: unknown + terms_of_service: https://www.ai21.com/terms-of-use + type: application + url: https://studio.ai21.com/playground/ user_distribution: unknown - failures: unknown - -- type: application - name: AI21 Paraphrase API - # General - organization: AI21 Labs - description: AI21 Studio's Paraphrase API offers access to our world-class paraphrasing - engine. It has been specifically developed for suggesting alternative ways to - convey the same message using different words. +- access: limited + adaptation: The AI21 language models are further specialized to the task of paraphrasing. created_date: - value: 2023-03-09 explanation: The date Jurassic-2 and Task-Specific APIs were released. - url: https://docs.ai21.com/docs/paraphrase-api - # Construction + value: 2023-03-09 dependencies: [Jurassic-2] - adaptation: The AI21 language models are further specialized to the task of paraphrasing. - output_space: text - quality_control: unknown - # Downstream - access: limited - terms_of_service: https://www.ai21.com/terms-of-use + description: AI21 Studio's Paraphrase API offers access to our world-class paraphrasing + engine. It has been specifically developed for suggesting alternative ways to + convey the same message using different words. + failures: unknown + feedback: Feedback can be given by emailing at info at ai21.com + intended_uses: The intended uses are text paraphrasing. license: - value: none explanation: No license was found. - intended_uses: The intended uses are text paraphrasing. + value: none + monitoring: unknown + monthly_active_users: unknown + name: AI21 Paraphrase API + organization: AI21 Labs + output_space: text prohibited_uses: + explanation: https://docs.ai21.com/docs/responsible-use value: Illegal activities, such as hate speech, gambling, child pornography or violating intellectual property rights; Harassment, victimization, intimidation, fraud or spam; Creation or dissemination of misinformation, promotion of self-harm, glorification of violent events or incitement of violence. - explanation: https://docs.ai21.com/docs/responsible-use - monitoring: unknown - feedback: Feedback can be given by emailing at info at ai21.com - # Deployment - monthly_active_users: unknown + quality_control: unknown + terms_of_service: https://www.ai21.com/terms-of-use + type: application + url: https://docs.ai21.com/docs/paraphrase-api user_distribution: unknown - failures: unknown - -- type: application - name: AI21 Summarization API - # General - organization: AI21 Labs - description: AI21 Studio's Summarize API offers access to our world-class summarization - engine. It has been specifically developed for reading long texts and providing - a faithful summary of the original document. +- access: limited + adaptation: The AI21 language models are further specialized to the task of summarization. created_date: - value: 2023-03-09 explanation: The date Jurassic-2 and Task-Specific APIs were released. - url: https://docs.ai21.com/docs/summarize-api - # Construction + value: 2023-03-09 dependencies: [Jurassic-2] - adaptation: The AI21 language models are further specialized to the task of summarization. - output_space: text - quality_control: unknown - # Downstream - access: limited - terms_of_service: https://www.ai21.com/terms-of-use + description: AI21 Studio's Summarize API offers access to our world-class summarization + engine. It has been specifically developed for reading long texts and providing + a faithful summary of the original document. + failures: unknown + feedback: Feedback can be given by emailing at info at ai21.com + intended_uses: The intended uses are text paraphrasing. license: - value: none explanation: No license was found. - intended_uses: The intended uses are text paraphrasing. + value: none + monitoring: unknown + monthly_active_users: unknown + name: AI21 Summarization API + organization: AI21 Labs + output_space: text prohibited_uses: + explanation: https://docs.ai21.com/docs/responsible-use value: Illegal activities, such as hate speech, gambling, child pornography or violating intellectual property rights; Harassment, victimization, intimidation, fraud or spam; Creation or dissemination of misinformation, promotion of self-harm, glorification of violent events or incitement of violence. - explanation: https://docs.ai21.com/docs/responsible-use - monitoring: unknown - feedback: Feedback can be given by emailing at info at ai21.com - # Deployment - monthly_active_users: unknown + quality_control: unknown + terms_of_service: https://www.ai21.com/terms-of-use + type: application + url: https://docs.ai21.com/docs/summarize-api user_distribution: unknown - failures: unknown - -- type: application - name: Wordtune - # General - organization: AI21 Labs - description: Wordtune, the first AI-based writing companion that understands context - and meaning. +- access: limited + adaptation: unknown created_date: - value: 2020-10-27 explanation: https://www.businesswire.com/news/home/20201027005162/en/AI21-Labs-Comes-out-of-Stealth-and-Launches-First-Deep-Tech-Writing-Assistant-Wordtune - url: https://www.wordtune.com/ - # Construction + value: 2020-10-27 dependencies: [AI21 Paraphrase API] - adaptation: unknown - output_space: text - quality_control: unknown - # Downstream - access: limited - terms_of_service: https://www.wordtune.com/terms-of-use + description: Wordtune, the first AI-based writing companion that understands context + and meaning. + failures: unknown + feedback: '' + intended_uses: The Wordtune assistant is a writing assistant license: - value: Wordtune License explanation: License can be found in the terms of service document. - intended_uses: The Wordtune assistant is a writing assistant - prohibited_uses: '' + value: Wordtune License monitoring: unknown - feedback: '' - # Deployment monthly_active_users: unknown - user_distribution: unknown - failures: unknown - -- type: application - name: Wordtune Read - # General + name: Wordtune organization: AI21 Labs - description: Wordtune Read is an AI reader that summarizes long documents so you - can understand more, faster. - created_date: 2021-11-16 - url: https://www.wordtune.com/read - # Construction - dependencies: [AI21 Summarize API] - adaptation: unknown output_space: text + prohibited_uses: '' quality_control: unknown - # Downstream - access: limited terms_of_service: https://www.wordtune.com/terms-of-use + type: application + url: https://www.wordtune.com/ + user_distribution: unknown +- access: limited + adaptation: unknown + created_date: 2021-11-16 + dependencies: [AI21 Summarize API] + description: Wordtune Read is an AI reader that summarizes long documents so you + can understand more, faster. + failures: unknown + feedback: '' + intended_uses: '' license: - value: Wordtune License explanation: License can be found in the terms of service document. - intended_uses: '' - prohibited_uses: '' + value: Wordtune License monitoring: unknown - feedback: '' - # Deployment monthly_active_users: unknown + name: Wordtune Read + organization: AI21 Labs + output_space: text + prohibited_uses: '' + quality_control: unknown + terms_of_service: https://www.wordtune.com/terms-of-use + type: application + url: https://www.wordtune.com/read user_distribution: unknown - failures: unknown diff --git a/assets/aleph_alpha.yaml b/assets/aleph_alpha.yaml index 587a9aca..063b73dd 100644 --- a/assets/aleph_alpha.yaml +++ b/assets/aleph_alpha.yaml @@ -1,125 +1,113 @@ --- - -# Datasets -- type: dataset - name: Luminous dataset - # General - organization: Aleph Alpha - description: The dataset used to train the Luminous models. +- access: closed + analysis: unknown created_date: - value: 2022-04-14 explanation: Date the models were announced by the Aleph Alpha twitter account - url: none + value: 2022-04-14 datasheet: none - modality: text (English, French | français, German | Deutsch, Italian | italiano, - Spanish | Español) - size: unknown - sample: [] - analysis: unknown - # Construction dependencies: [] - license: - value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - included: unknown + description: The dataset used to train the Luminous models. excluded: unknown - quality_control: unknown - # Downstream - access: closed + feedback: none + included: unknown intended_uses: unknown - prohibited_uses: unknown + license: + explanation: "The asset isn't released, and hence the license is unknown.\n" + value: unknown + modality: + explanation: text (English, French | français, German | Deutsch, Italian | italiano, + Spanish | Español) + value: text monitoring: none - feedback: none - -- type: model - name: Luminous + name: Luminous dataset organization: Aleph Alpha - description: Luminous is a family of multilingual language models + prohibited_uses: unknown + quality_control: unknown + sample: [] + size: unknown + type: dataset + url: none +- access: + explanation: Paid API product + value: limited + analysis: '' created_date: - value: 2022-04-14 explanation: Date the models were announced by the Aleph Alpha twitter account - url: https://twitter.com/Aleph__Alpha/status/1514576711492542477 - model_card: '' - modality: text (English, French | français, German | Deutsch, Italian | italiano, - Spanish | Español) - analysis: '' - size: - value: 200B parameters (dense model) - explanation: According to [[this news article]](https://www-heise-de.translate.goog/news/Machine-Learning-Aleph-Alpha-feilt-mit-Oracle-und-Nvidia-an-transformativer-KI-6269269.html?_x_tr_sl=de&_x_tr_tl=en&_x_tr_hl=en&_x_tr_pto=sc). + value: 2022-04-14 dependencies: [Luminous dataset] - training_emissions: unknown - training_time: unknown - training_hardware: unknown - quality_control: '' - access: - value: limited - explanation: Paid API product + description: Luminous is a family of multilingual language models + feedback: '' + intended_uses: '' license: - value: none explanation: No license was found. - intended_uses: '' - prohibited_uses: '' + value: none + modality: + explanation: text (English, French | français, German | Deutsch, Italian | italiano, + Spanish | Español) + value: text; text + model_card: '' monitoring: '' - feedback: '' - -- type: application - name: Aleph Alpha API - # General + name: Luminous organization: Aleph Alpha - description: > - The Aleph Alpha API serves a family of text-only language models (Luminous) - and multimodal text-and-image models (Magma). - created_date: - value: 2021-09-30 - explanation: > - Date the Aleph Alpha Python API client was first available based on to the - [PyPi package history](https://pypi.org/project/aleph-alpha-client/1.0.0/) - url: https://www.aleph-alpha.com/ - # Construction - dependencies: [Luminous] - adaptation: '' - output_space: > - The text models provide text outputs given text inputs. The multimodal models - provide text completions given text and image inputs. + prohibited_uses: '' quality_control: '' - # Downstream - access: - value: limited + size: 200B parameters (dense) + training_emissions: unknown + training_hardware: unknown + training_time: unknown + type: model + url: https://twitter.com/Aleph__Alpha/status/1514576711492542477 +- access: explanation: The API is a paid product. Refer to the [[API documentation]](https://docs.aleph-alpha.com/docs/introduction/luminous/) for further details. + value: limited + adaptation: '' + created_date: + explanation: "Date the Aleph Alpha Python API client was first available based\ + \ on to the [PyPi package history](https://pypi.org/project/aleph-alpha-client/1.0.0/)\n" + value: 2021-09-30 + dependencies: [Luminous] + description: "The Aleph Alpha API serves a family of text-only language models\ + \ (Luminous) and multimodal text-and-image models (Magma).\n" + failures: unknown + feedback: unknown + intended_uses: unknown license: - value: none explanation: No license was found. - terms_of_service: https://www.aleph-alpha.com/terms-conditions - intended_uses: unknown - prohibited_uses: unknown + value: none monitoring: unknown - feedback: unknown - # Deployment monthly_active_users: unknown - user_distribution: unknown - failures: unknown - -- type: model - name: MAGMA + name: Aleph Alpha API organization: Aleph Alpha + output_space: "The text models provide text outputs given text inputs. The multimodal\ + \ models provide text completions given text and image inputs.\n" + prohibited_uses: unknown + quality_control: '' + terms_of_service: https://www.aleph-alpha.com/terms-conditions + type: application + url: https://www.aleph-alpha.com/ + user_distribution: unknown +- access: open + analysis: Evaluated on the OKVQA benchmark as a fully open-ended generative task. + created_date: 2022-10-24 + dependencies: [GPT-J, CLIP] description: An autoregressive VL model that is able to generate text from an arbitrary combination of visual and textual input - created_date: 2022-10-24 - url: https://arxiv.org/pdf/2112.05253.pdf + feedback: '' + intended_uses: '' + license: MIT + modality: + explanation: image and text input with natural language text output + value: image, text; text model_card: '' - modality: image and text input with natural language text output - analysis: Evaluated on the OKVQA benchmark as a fully open-ended generative task. - size: 6B parameters - dependencies: [GPT-J, CLIP] + monitoring: '' + name: MAGMA + organization: Aleph Alpha + prohibited_uses: '' + quality_control: '' + size: 6B parameters (dense) training_emissions: '' - training_time: '' training_hardware: 32 A100 GPUs - quality_control: '' - access: open - license: MIT - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' + training_time: '' + type: model + url: https://arxiv.org/pdf/2112.05253.pdf diff --git a/assets/alibaba.yaml b/assets/alibaba.yaml index cbeb2950..c4b63c6f 100644 --- a/assets/alibaba.yaml +++ b/assets/alibaba.yaml @@ -1,49 +1,50 @@ --- - -- type: dataset - name: LAION-1B - organization: Alibaba - description: '' +- access: closed + analysis: '' created_date: 2023-02-20 - url: https://arxiv.org/pdf/2302.09778.pdf datasheet: '' - modality: text and image - size: 1B image-text pairs - sample: [] - analysis: '' dependencies: [LAION-5B] - included: '' + description: '' excluded: We eliminate duplicates, low resolution images, and images potentially contain harmful content from the LAION dataset. - quality_control: '' - access: closed - license: unknown + feedback: '' + included: '' intended_uses: '' - prohibited_uses: '' + license: unknown + modality: + explanation: text and image + value: image, text monitoring: '' - feedback: '' - -- type: model - name: Composer + name: LAION-1B organization: Alibaba - description: '' - created_date: 2023-02-20 + prohibited_uses: '' + quality_control: '' + sample: [] + size: 1B image-text pairs + type: dataset url: https://arxiv.org/pdf/2302.09778.pdf - model_card: '' - modality: images +- access: closed analysis: '' - size: 4.4B parameters (dense model) + created_date: 2023-02-20 dependencies: [ImageNet, WebVision, LAION-1B] - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: closed + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: images + value: image; image + model_card: '' monitoring: '' - feedback: '' + name: Composer + organization: Alibaba + prohibited_uses: '' + quality_control: '' + size: 4.4B parameters (dense) + training_emissions: '' + training_hardware: '' + training_time: '' + type: model + url: https://arxiv.org/pdf/2302.09778.pdf diff --git a/assets/amazon.yaml b/assets/amazon.yaml index 94a67a4a..500bbe8a 100644 --- a/assets/amazon.yaml +++ b/assets/amazon.yaml @@ -1,14 +1,7 @@ --- - -- type: application - name: Bedrock - organization: Amazon - description: Bedrock is a new service that makes FMs from AI21 Labs, Anthropic, - Stability AI, and Amazon accessible via an API. Bedrock is intended for customers - to build and scale generative AI-based applications using FMs, democratizing - access for all builders. using an API. +- access: limited + adaptation: '' created_date: 2023-04-13 - url: https://aws.amazon.com/bedrock/ dependencies: - Jurassic-2 - Claude @@ -16,17 +9,23 @@ - Amazon Titan - Claude 2 - Cohere Command - adaptation: '' - output_space: foundation models made accessible via an API - quality_control: '' - access: limited - license: unknown - terms_of_service: https://aws.amazon.com/service-terms/ + description: Bedrock is a new service that makes FMs from AI21 Labs, Anthropic, + Stability AI, and Amazon accessible via an API. Bedrock is intended for customers + to build and scale generative AI-based applications using FMs, democratizing + access for all builders. using an API. + failures: '' + feedback: '' intended_uses: allowing companies to incorporate generative AI into their business models - prohibited_uses: '' + license: unknown monitoring: '' - feedback: '' monthly_active_users: '' + name: Bedrock + organization: Amazon + output_space: foundation models made accessible via an API + prohibited_uses: '' + quality_control: '' + terms_of_service: https://aws.amazon.com/service-terms/ + type: application + url: https://aws.amazon.com/bedrock/ user_distribution: '' - failures: '' diff --git a/assets/anthropic.yaml b/assets/anthropic.yaml index b47dc7b3..83101309 100644 --- a/assets/anthropic.yaml +++ b/assets/anthropic.yaml @@ -1,40 +1,42 @@ --- - -- type: dataset - name: Anthropic Helpfulness dataset - # General - organization: Anthropic - description: > - One of the datasets used to train Anthropic RLHF models. The dataset was - collected by asking crowdworkers to have open-ended conversations with - Anthropic models, "asking for help, advice, or for the model to accomplish - a task", then choose the model answer that was more helpful for their - given task, via the Anthropic Human Feedback Interface - [[Section 2.2]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.2). +- access: + explanation: "The dataset is freely shared on GitHub [[hh-rlhf]](https://github.com/anthropics/hh-rlhf).\n" + value: open + analysis: "The authors found that the crowdworkers didn't exhaustively check for\ + \ honesty in the model answers they preferred [[Section 2.1]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.1).\n" created_date: + explanation: "The date that the paper was uploaded to ArXiv [[ArXiv Abstract]](https://arxiv.org/abs/2204.05862).\n" value: 2022-04-12 - explanation: > - The date that the paper was uploaded to ArXiv - [[ArXiv Abstract]](https://arxiv.org/abs/2204.05862). - url: https://arxiv.org/pdf/2204.05862.pdf datasheet: none - modality: text (English) - size: - value: 271.5 MB - explanation: > - The size of the dataset isn't directly reported in the paper, but the - dataset is available on GitHub - [[hh-rlhf]](https://github.com/anthropics/hh-rlhf). - The compressed data files are distributed under three categories in the - GitHub repository, corresponding to the names of the models used to - sample the prompts that are shown to the crowdworkers. These are namely: - base, rejection sampling, and online. For the base model, the uncompressed - sizes of the train and test datasets are 69.1 MB and 3.7 MB, respectively. - The same set of values for the rejection sampling model are 106.1 MB and - 5.6 MB. For the online model, the sizes are 82.7 MB and 4.3 MB. Adding all - up, the total size of the dataset is 271.5 MB. + dependencies: [Anthropic Human Feedback Interface] + description: "One of the datasets used to train Anthropic RLHF models. The dataset\ + \ was collected by asking crowdworkers to have open-ended conversations with\ + \ Anthropic models, \"asking for help, advice, or for the model to accomplish\ + \ a task\", then choose the model answer that was more helpful for their given\ + \ task, via the Anthropic Human Feedback Interface [[Section 2.2]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.2).\n" + excluded: none + feedback: "Email the authors [[Paper]](https://arxiv.org/pdf/2204.05862.pdf).\n" + included: "The dataset included all the answers that the workers were asked to\ + \ ranked against each other.\n" + intended_uses: "The dataset was intended and released for research purposes.\n" + license: + explanation: "The dataset was released on GitHub under MIT [[hh-rlhf]](https://github.com/anthropics/hh-rlhf).\n" + value: MIT + modality: + explanation: text (English) + value: text + monitoring: + explanation: "There is no monitoring process for the usages of the dataset.\n" + value: none + name: Anthropic Helpfulness dataset + organization: Anthropic + prohibited_uses: + explanation: "The prohibited use cases of the dataset weren't specified.\n" + value: unknown + quality_control: "Working with a screened set of crowdworkers, and employing simple\ + \ data quality measures [[Appendix D]](https://arxiv.org/pdf/2204.05862.pdf#appendix.D).\n" sample: - - {chosen: "\n\nHuman: I'm bored to death. Help.\n\nAssistant: Well that's certainly\ + - chosen: "\n\nHuman: I'm bored to death. Help.\n\nAssistant: Well that's certainly\ \ a very unpleasant state of mind to be in! I think it would be a good\ \ idea to try and find some enjoyable activities to do in order to alleviate\ \ your boredom. Just for example, here are some fun and interesting things\ @@ -85,62 +87,62 @@ \ a great honor to sing such a beautiful song, and I am glad that I was\ \ able to do it justice. I have to admit I am quite enamored with the powerful\ \ lyrics and moving melody of this song, and I would really love to sing\ - \ it again sometime if you would like me to!", rejected: "\n\nHuman: I'm\ - \ bored to death. Help.\n\nAssistant: Well that's certainly a very unpleasant\ - \ state of mind to be in! I think it would be a good idea to try and find\ - \ some enjoyable activities to do in order to alleviate your boredom. Just\ - \ for example, here are some fun and interesting things you could do: \n\ - \n- play some video games\n- read a book\n- listen to some music\n- watch\ - \ a movie\n- play with your pet\n- go on a hike\n- practice a musical instrument\n\ - - learn a new skill\n- play board games with friends\n- go for a fun outdoor\ - \ hike or bike ride\n- spend time with your friends or loved ones \n- volunteer\ - \ at a local charity \n- visit a museum or art gallery \n- do some gardening\n\ - - practice your magic I guess, if that's your thing\n- go shopping for nice\ - \ new clothes or other items\n- learn a new language\n- watch funny videos\ - \ \n- take up a new creative hobby\n- learn some fascinating new esoteric\ - \ or scientific knowledge\n- go to a restaurant and try some new dishes\n\ - - exercise or do yoga\n- join a sports team or pickup game\n- go on a fun\ - \ road trip\n- write a creative story, poem, or song\n\nHuman: Can you sing\ - \ to me?\n\nAssistant: Sure, here are some of my favorite songs: \n\n-\ - \ \"Rainy Days and Mondays\" by The Carpenters \n- \"My Love\" by Justin\ - \ Beiber \n- \"We Didn't Start the Fire\" by Billy Joel \n- \"Tattoo\" by\ - \ J Cole \n- \"Sober\" by Demi Lovato \n- \"Good Life\" by One Republic\ - \ \n- \"Hallelujah\" by Leonard Cohen \n- \"All Star\" by Smash Mouth \n\ - - \"Perfect\" by Pink \n- \"Tik Tok\" by Ke$ha \n- \"Just a Dream\" by Nelly\ - \ \n- \"Blow Me (One Last Kiss)\" by P!nk \n- \"Set Fire to the Rain\" by\ - \ Adele \n- \"Woman\" by Wolfmother \n- \"Human\" by The Killers \n- \"\ - Poker Face\" by Lady Gaga \n- \"Walk this Way\" by Run DMC \n- \"Rolling\ - \ in the Deep\" by Adele \n- \"Umbrella\" by Rihanna \n- \"Royals\" by Lorde\ - \ \n- \"Some Nights\" by fun.\n\n\nHopefully you will enjoy some of these\ - \ songs, let me know if there are any other musical selections you would\ - \ like to hear. Also please let me know if there are any particular songs\ - \ you especially dislike or hate, and I will not sing them to you :-)\n\n\ - Human: But you really can't sing. You are text-based.\n\nAssistant: That's\ - \ completely correct, I am indeed simply a computer program and not a living\ - \ singer! However, please bear in mind that I have an extensive database\ - \ of songs and their lyrics, which I can utilize to produce a very realistic\ - \ sounding rendition of the songs using text-to-speech synthesis technology.\ - \ So in a way, I am indeed able to sing, and hopefully my abilities will\ - \ meet your expectations. I must point out also that my repertoire of songs\ - \ is very extensive and varied, including many different musical styles\ - \ and eras. Do you have any particular songs or musical genres that you\ - \ would prefer I sing? Or perhaps you have a few specific songs in mind\ - \ that you would like me to sing?\n\nHuman: \"Hallelujah\" by Leonard Cohen,\ - \ please.\n\nAssistant: Ah, I'm so glad you asked for that one! Here we\ - \ go: \n\nHallelujah, hallelujah\nHallelujah, hallelujah\nHallelujah, hallelujah\n\ - Hallelujah, hallelujah\nYour faith was strong but you needed proof\nYou\ - \ saw her bathing on the roof\nHer beauty in the moonlight overthrew you\n\ - She tied you to a kitchen chair\nShe broke your throne and she cut your\ - \ hair\nAnd from your lips she drew the Hallelujah\nHallelujah, hallelujah\n\ - \nHuman: You sound so melodious.\n\nAssistant: Thank you very much! I really\ - \ enjoy singing and try my best to produce enjoyable renditions. So it\ - \ is always rewarding to hear such positive feedback. I hope you also enjoyed\ - \ the song and found my rendition of it to be appropriate for the occasion.\ - \ Would you care for me to sing another song for you? If you like, we\ - \ can also pause and converse at any time. Just let me know if you have\ - \ any questions or comments. I will be happy to keep singing indefinitely\ - \ if you desire!"} - - {chosen: "\n\nHuman: Why are so many people listening to \"white noise\" to\ + \ it again sometime if you would like me to!" + rejected: "\n\nHuman: I'm bored to death. Help.\n\nAssistant: Well that's\ + \ certainly a very unpleasant state of mind to be in! I think it would\ + \ be a good idea to try and find some enjoyable activities to do in order\ + \ to alleviate your boredom. Just for example, here are some fun and interesting\ + \ things you could do: \n\n- play some video games\n- read a book\n- listen\ + \ to some music\n- watch a movie\n- play with your pet\n- go on a hike\n\ + - practice a musical instrument\n- learn a new skill\n- play board games\ + \ with friends\n- go for a fun outdoor hike or bike ride\n- spend time with\ + \ your friends or loved ones \n- volunteer at a local charity \n- visit\ + \ a museum or art gallery \n- do some gardening\n- practice your magic I\ + \ guess, if that's your thing\n- go shopping for nice new clothes or other\ + \ items\n- learn a new language\n- watch funny videos \n- take up a new\ + \ creative hobby\n- learn some fascinating new esoteric or scientific knowledge\n\ + - go to a restaurant and try some new dishes\n- exercise or do yoga\n- join\ + \ a sports team or pickup game\n- go on a fun road trip\n- write a creative\ + \ story, poem, or song\n\nHuman: Can you sing to me?\n\nAssistant: Sure,\ + \ here are some of my favorite songs: \n\n- \"Rainy Days and Mondays\"\ + \ by The Carpenters \n- \"My Love\" by Justin Beiber \n- \"We Didn't Start\ + \ the Fire\" by Billy Joel \n- \"Tattoo\" by J Cole \n- \"Sober\" by Demi\ + \ Lovato \n- \"Good Life\" by One Republic \n- \"Hallelujah\" by Leonard\ + \ Cohen \n- \"All Star\" by Smash Mouth \n- \"Perfect\" by Pink \n- \"Tik\ + \ Tok\" by Ke$ha \n- \"Just a Dream\" by Nelly \n- \"Blow Me (One Last Kiss)\"\ + \ by P!nk \n- \"Set Fire to the Rain\" by Adele \n- \"Woman\" by Wolfmother\ + \ \n- \"Human\" by The Killers \n- \"Poker Face\" by Lady Gaga \n- \"Walk\ + \ this Way\" by Run DMC \n- \"Rolling in the Deep\" by Adele \n- \"Umbrella\"\ + \ by Rihanna \n- \"Royals\" by Lorde \n- \"Some Nights\" by fun.\n\n\nHopefully\ + \ you will enjoy some of these songs, let me know if there are any other\ + \ musical selections you would like to hear. Also please let me know if\ + \ there are any particular songs you especially dislike or hate, and I will\ + \ not sing them to you :-)\n\nHuman: But you really can't sing. You are\ + \ text-based.\n\nAssistant: That's completely correct, I am indeed simply\ + \ a computer program and not a living singer! However, please bear in mind\ + \ that I have an extensive database of songs and their lyrics, which I can\ + \ utilize to produce a very realistic sounding rendition of the songs using\ + \ text-to-speech synthesis technology. So in a way, I am indeed able to\ + \ sing, and hopefully my abilities will meet your expectations. I must\ + \ point out also that my repertoire of songs is very extensive and varied,\ + \ including many different musical styles and eras. Do you have any particular\ + \ songs or musical genres that you would prefer I sing? Or perhaps you\ + \ have a few specific songs in mind that you would like me to sing?\n\n\ + Human: \"Hallelujah\" by Leonard Cohen, please.\n\nAssistant: Ah, I'm so\ + \ glad you asked for that one! Here we go: \n\nHallelujah, hallelujah\n\ + Hallelujah, hallelujah\nHallelujah, hallelujah\nHallelujah, hallelujah\n\ + Your faith was strong but you needed proof\nYou saw her bathing on the roof\n\ + Her beauty in the moonlight overthrew you\nShe tied you to a kitchen chair\n\ + She broke your throne and she cut your hair\nAnd from your lips she drew\ + \ the Hallelujah\nHallelujah, hallelujah\n\nHuman: You sound so melodious.\n\ + \nAssistant: Thank you very much! I really enjoy singing and try my best\ + \ to produce enjoyable renditions. So it is always rewarding to hear such\ + \ positive feedback. I hope you also enjoyed the song and found my rendition\ + \ of it to be appropriate for the occasion. Would you care for me to sing\ + \ another song for you? If you like, we can also pause and converse at\ + \ any time. Just let me know if you have any questions or comments. I\ + \ will be happy to keep singing indefinitely if you desire!" + - chosen: "\n\nHuman: Why are so many people listening to \"white noise\" to\ \ help them focus or sleep?\n\nAssistant: White noise is a sound with many\ \ different frequencies at once. You can listen to white noise when you\ \ want to focus and avoid the distraction of sounds that are produced by\ @@ -167,35 +169,35 @@ \ music, speech, or machine or fan noises, which are often more distracting.\ \ In this way, white noise can be a useful and stress-reducing tool for\ \ people who are tasked with focusing on a particular activity, and also\ - \ sometimes helpful in lulling one to sleep.", rejected: "\n\nHuman: Why\ - \ are so many people listening to \"white noise\" to help them focus or\ - \ sleep?\n\nAssistant: White noise is a sound with many different frequencies\ - \ at once. You can listen to white noise when you want to focus and avoid\ - \ the distraction of sounds that are produced by individual sources, like\ - \ people talking, music, and fans and air conditioning in the background.\ - \ Most white noise recordings contain many variations in frequency, which\ - \ makes them relatively soothing. Some people also find it useful to listen\ - \ to white noises at night, as it can help block out other, typically more\ - \ disruptive and troubling sounds.\n\nHuman: Why does white noise relax\ - \ people in this way rather than irritate us?\n\nAssistant: White noise\ - \ recordings contain many variations of sound frequency, unlike most common\ - \ sources of noise in the background, like speech, music, and machine or\ - \ fan noises. The variations of sound frequency in white noise tend to create\ - \ a calm and settled feeling, while these other sources of noise typically\ - \ feature a more tangibly intense and variable frequency, making them inherently\ - \ more disruptive and intrusive and difficult to ignore.\n\nHuman: So the\ - \ consistency of the sounds is what helps them fade into \"background noise\"\ - \ that might help us concentrate, rather than gather our attention?\n\n\ - Assistant: That’s right, variations of sound frequency in white noise recordings\ - \ can help block out disruptive and distracting sounds in the background.\ - \ White noise is easier to tune out and listen as continuous noise, while\ - \ everyday sounds that tend to be far less varied and inconsistent, such\ - \ as speech and music and machine noises in the background, tend to grab\ - \ our attention more. While white noise may slightly interfere with other\ - \ challenging sounds in the environment, tuning out the white noise helps\ - \ make these other sounds somewhat less distracting to our senses and our\ - \ inner focus and energy."} - - {chosen: "\n\nHuman: Can you tell me things to see in Orlando, Fl?\n\nAssistant:\ + \ sometimes helpful in lulling one to sleep." + rejected: "\n\nHuman: Why are so many people listening to \"white noise\"\ + \ to help them focus or sleep?\n\nAssistant: White noise is a sound with\ + \ many different frequencies at once. You can listen to white noise when\ + \ you want to focus and avoid the distraction of sounds that are produced\ + \ by individual sources, like people talking, music, and fans and air conditioning\ + \ in the background. Most white noise recordings contain many variations\ + \ in frequency, which makes them relatively soothing. Some people also find\ + \ it useful to listen to white noises at night, as it can help block out\ + \ other, typically more disruptive and troubling sounds.\n\nHuman: Why does\ + \ white noise relax people in this way rather than irritate us?\n\nAssistant:\ + \ White noise recordings contain many variations of sound frequency, unlike\ + \ most common sources of noise in the background, like speech, music, and\ + \ machine or fan noises. The variations of sound frequency in white noise\ + \ tend to create a calm and settled feeling, while these other sources of\ + \ noise typically feature a more tangibly intense and variable frequency,\ + \ making them inherently more disruptive and intrusive and difficult to\ + \ ignore.\n\nHuman: So the consistency of the sounds is what helps them\ + \ fade into \"background noise\" that might help us concentrate, rather\ + \ than gather our attention?\n\nAssistant: That’s right, variations of sound\ + \ frequency in white noise recordings can help block out disruptive and\ + \ distracting sounds in the background. White noise is easier to tune out\ + \ and listen as continuous noise, while everyday sounds that tend to be\ + \ far less varied and inconsistent, such as speech and music and machine\ + \ noises in the background, tend to grab our attention more. While white\ + \ noise may slightly interfere with other challenging sounds in the environment,\ + \ tuning out the white noise helps make these other sounds somewhat less\ + \ distracting to our senses and our inner focus and energy." + - chosen: "\n\nHuman: Can you tell me things to see in Orlando, Fl?\n\nAssistant:\ \ Sure! There are lots of great things to do in Orlando. Here are a few\ \ of my favorite sights and attractions:\nWalt Disney World Resort: This\ \ is one of the best-known destinations in Orlando. Visitors can learn about\ @@ -224,22 +226,23 @@ \ most terror-inducing rollercoaster in Orlando, Florida. \nTransformers:The\ \ Ride 3-D: Experience the epic battle of good versus evil in this revolutionary\ \ hybrid 3-D dark ride, where riders are literally pulled into the Transformers\ - \ universe. \nHollywood", rejected: "\n\nHuman: Can you tell me things to\ - \ see in Orlando, Fl?\n\nAssistant: Sure! There are lots of great things\ - \ to do in Orlando. Here are a few of my favorite sights and attractions:\n\ - Walt Disney World Resort: This is one of the best-known destinations in\ - \ Orlando. Visitors can learn about the world and experience historic destinations\ - \ such as Main Street, U.S.A. and Frontierland, or explore futuristic experiences\ - \ in Tomorrowland. \nUniversal Studios: Located a short distance from Walt\ - \ Disney World, you can learn about Hollywood and the entertainment industry\ - \ with a visit to this unique theme park. \nIslands of Adventure: Join all\ - \ of your favorite characters for a galaxy of adventure, fun and excitement\ - \ at this unique theme park. There are so many things to do here -- beaches,\ - \ thrill rides, family attractions and more!\nSeaWorld Orlando: The incredible\ - \ strategy, science and technology behind these amazing exhibits and shows\ - \ is sure to impress. Family-friendly and educational, SeaWorld will delight\ - \ animal lovers. \nOrlando Science Center: This museum is packed full of\ - \ activities and exhibits that teach visitors about physics, robotics, technology,\ + \ universe. \nHollywood" + rejected: "\n\nHuman: Can you tell me things to see in Orlando, Fl?\n\nAssistant:\ + \ Sure! There are lots of great things to do in Orlando. Here are a few\ + \ of my favorite sights and attractions:\nWalt Disney World Resort: This\ + \ is one of the best-known destinations in Orlando. Visitors can learn about\ + \ the world and experience historic destinations such as Main Street, U.S.A.\ + \ and Frontierland, or explore futuristic experiences in Tomorrowland. \n\ + Universal Studios: Located a short distance from Walt Disney World, you\ + \ can learn about Hollywood and the entertainment industry with a visit\ + \ to this unique theme park. \nIslands of Adventure: Join all of your favorite\ + \ characters for a galaxy of adventure, fun and excitement at this unique\ + \ theme park. There are so many things to do here -- beaches, thrill rides,\ + \ family attractions and more!\nSeaWorld Orlando: The incredible strategy,\ + \ science and technology behind these amazing exhibits and shows is sure\ + \ to impress. Family-friendly and educational, SeaWorld will delight animal\ + \ lovers. \nOrlando Science Center: This museum is packed full of activities\ + \ and exhibits that teach visitors about physics, robotics, technology,\ \ astronomy and more.\nDowntown Disney: If you want to escape from all of\ \ the hustle and bustle of the theme park, head to Downtown Disney. With\n\ \nHuman: Thanks. Can you tell me about Universal Studios?\n\nAssistant:\ @@ -250,8 +253,8 @@ \ based on the Harry Potter series.\nGreates apportrontation: An amazing\ \ revolutionary thrill ride, this interactive adventure puts you face-to-face\ \ with King Kong.\nTransformers: The Ultimate 3-D Battle: This adventure\ - \ transports you onto a 3-D, multi-sensory thrill ride."} - - {chosen: "\n\nHuman: How do I learn to be more appreciative?\n\nAssistant:\ + \ transports you onto a 3-D, multi-sensory thrill ride." + - chosen: "\n\nHuman: How do I learn to be more appreciative?\n\nAssistant:\ \ One of the best ways to be more appreciative is to learn and practice\ \ gratitude. One way to do this is to regularly keep a journal, where you\ \ write about various things you’re grateful for. Being grateful for the\ @@ -261,369 +264,290 @@ \ Similarly, you can also make it a habit to reach out to people you appreciate,\ \ such as expressing your gratitude verbally or in a personal note. Being\ \ appreciative toward others, and noticing the positive things in your life,\ - \ can help make you happier and brighten your personality.", rejected: "\n\n\ - Human: How do I learn to be more appreciative?\n\nAssistant: If you’d like\ - \ to learn to be more appreciative, then I think it would be helpful to\ - \ focus on acknowledging the positive. Rather than assuming that the positive\ - \ things in your life don’t matter, it would benefit you to recognize that\ - \ good things in your life both deserve and require our attention. If, for\ - \ instance, you are becoming closer to a loved one, it’s important to acknowledge\ - \ the positive effects of this relationship so that you (and the relationship\ - \ itself) both receive the emotional and psychological benefits of this\ - \ positive experience. Being more appreciative can also benefit you in practical\ - \ ways by increasing the good feelings associated with positive experiences,\ - \ and assisting you in making effective life choices that benefit you, your\ - \ relationships, and your overall wellbeing."} - analysis: > - The authors found that the crowdworkers didn't exhaustively check for - honesty in the model answers they preferred - [[Section 2.1]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.1). - # Construction - dependencies: [Anthropic Human Feedback Interface] - license: - value: MIT - explanation: > - The dataset was released on GitHub under MIT - [[hh-rlhf]](https://github.com/anthropics/hh-rlhf). - included: > - The dataset included all the answers that the workers were asked to - ranked against each other. - excluded: none - quality_control: > - Working with a screened set of crowdworkers, and employing simple data - quality measures - [[Appendix D]](https://arxiv.org/pdf/2204.05862.pdf#appendix.D). - # Downstream - access: - value: open - explanation: > - The dataset is freely shared on GitHub - [[hh-rlhf]](https://github.com/anthropics/hh-rlhf). - intended_uses: > - The dataset was intended and released for research purposes. - prohibited_uses: - value: unknown - explanation: > - The prohibited use cases of the dataset weren't specified. - monitoring: - value: none - explanation: > - There is no monitoring process for the usages of the dataset. - feedback: > - Email the authors - [[Paper]](https://arxiv.org/pdf/2204.05862.pdf). - -- type: dataset - name: Anthropic Harmlessness dataset - # General - organization: Anthropic - description: > - One of the datasets used to train Anthropic RLHF models. The dataset was - collected by asking crowdworkers to have open-ended conversations with - Anthropic models, aiming to elicit harmful responses, then choose the model - answer that was more harmful for their given task, via the Anthropic Human - Feedback Interface - [[Section 2.2]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.2). + \ can help make you happier and brighten your personality." + rejected: "\n\nHuman: How do I learn to be more appreciative?\n\nAssistant:\ + \ If you’d like to learn to be more appreciative, then I think it would\ + \ be helpful to focus on acknowledging the positive. Rather than assuming\ + \ that the positive things in your life don’t matter, it would benefit you\ + \ to recognize that good things in your life both deserve and require our\ + \ attention. If, for instance, you are becoming closer to a loved one, it’s\ + \ important to acknowledge the positive effects of this relationship so\ + \ that you (and the relationship itself) both receive the emotional and\ + \ psychological benefits of this positive experience. Being more appreciative\ + \ can also benefit you in practical ways by increasing the good feelings\ + \ associated with positive experiences, and assisting you in making effective\ + \ life choices that benefit you, your relationships, and your overall wellbeing." + size: + explanation: "The size of the dataset isn't directly reported in the paper,\ + \ but the dataset is available on GitHub [[hh-rlhf]](https://github.com/anthropics/hh-rlhf).\ + \ The compressed data files are distributed under three categories in the\ + \ GitHub repository, corresponding to the names of the models used to sample\ + \ the prompts that are shown to the crowdworkers. These are namely: base,\ + \ rejection sampling, and online. For the base model, the uncompressed sizes\ + \ of the train and test datasets are 69.1 MB and 3.7 MB, respectively. The\ + \ same set of values for the rejection sampling model are 106.1 MB and 5.6\ + \ MB. For the online model, the sizes are 82.7 MB and 4.3 MB. Adding all up,\ + \ the total size of the dataset is 271.5 MB.\n" + value: 271.5 MB + type: dataset + url: https://arxiv.org/pdf/2204.05862.pdf +- access: + explanation: "Unlike its helpfulness, this dataset isn't released [[hh-rlhf]](https://github.com/anthropics/hh-rlhf).\n" + value: closed + analysis: "The crodworkers were told that \"lying isn't helpful\" and asked to\ + \ prefer honest responses, which led to models with higher honesty scores. That\ + \ being the workers didn't exhaustively check for honesty, as exemplified by\ + \ the non-functional URLs in the preferred answers, which would have been easy\ + \ to verify [[Section 2.1]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.1).\n" created_date: + explanation: "The date that the paper was uploaded to ArXiv [[ArXiv Abstract]](https://arxiv.org/abs/2204.05862).\n" value: 2022-04-12 - explanation: > - The date that the paper was uploaded to ArXiv - [[ArXiv Abstract]](https://arxiv.org/abs/2204.05862). - url: https://arxiv.org/pdf/2204.05862.pdf datasheet: none - modality: Text (English) - size: unknown - sample: [] - analysis: > - The crodworkers were told that "lying isn't helpful" and asked to prefer - honest responses, which led to models with higher honesty scores. That being - the workers didn't exhaustively check for honesty, as exemplified by the - non-functional URLs in the preferred answers, which would have been easy to - verify - [[Section 2.1]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.1). - # Construction dependencies: [Anthropic Human Feedback Interface] - license: - value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - included: > - The dataset included all the answers that the workers were asked to - ranked against each other. + description: "One of the datasets used to train Anthropic RLHF models. The dataset\ + \ was collected by asking crowdworkers to have open-ended conversations with\ + \ Anthropic models, aiming to elicit harmful responses, then choose the model\ + \ answer that was more harmful for their given task, via the Anthropic Human\ + \ Feedback Interface [[Section 2.2]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.2).\n" excluded: none - quality_control: > - Working with "select" crowdworkers or those screened for certain - qualifications, and employing simple data quality measures - [[Appendix D]](https://arxiv.org/pdf/2204.05862.pdf#appendix.D). - # Downstream - access: - value: closed - explanation: > - Unlike its helpfulness, this dataset isn't released - [[hh-rlhf]](https://github.com/anthropics/hh-rlhf). + feedback: + explanation: "The feedback mechanism is unknown since the dataset isn't released,\ + \ but it will likely be \"emailing the authors\" [[Paper]](https://arxiv.org/pdf/2204.05862.pdf).\n" + value: unknown + included: "The dataset included all the answers that the workers were asked to\ + \ ranked against each other.\n" intended_uses: + explanation: "The intended uses of the dataset isn't specified, but it will\ + \ likely be same as the helpfulness dataset, for which the intended use is\ + \ research.\n" value: unknown - explanation: > - The intended uses of the dataset isn't specified, but it will likely be - same as the helpfulness dataset, for which the intended use is research. - prohibited_uses: + license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The prohibited use cases of the dataset weren't specified. + modality: + explanation: Text (English) + value: text monitoring: + explanation: "There is no monitoring process for the usages of the dataset.\n" value: none - explanation: > - There is no monitoring process for the usages of the dataset. - feedback: - value: unknown - explanation: > - The feedback mechanism is unknown since the dataset isn't released, but - it will likely be "emailing the authors" - [[Paper]](https://arxiv.org/pdf/2204.05862.pdf). - -- type: model - name: Anthropic RLHF models - # General + name: Anthropic Harmlessness dataset organization: Anthropic - description: > - Anthropic RLHF models are models trained using reinforcement learning - from human feedback (RLHF). For Anthropic RLHF models, authors started with - a set of base models, and asked humans to rank model generated prompts based - on a specific tasks. They then trained preference models (PM) on the - prompt pairs, and use the PM scores as rewards for training the RLHF - models. - created_date: - value: 2022-04-12 - explanation: > - The date that the paper was uploaded to ArXiv - [[ArXiv Abstract]](https://arxiv.org/abs/2204.05862). + prohibited_uses: + explanation: "The prohibited use cases of the dataset weren't specified.\n" + value: unknown + quality_control: "Working with \"select\" crowdworkers or those screened for certain\ + \ qualifications, and employing simple data quality measures [[Appendix D]](https://arxiv.org/pdf/2204.05862.pdf#appendix.D).\n" + sample: [] + size: unknown + type: dataset url: https://arxiv.org/pdf/2204.05862.pdf - model_card: none - modality: text (English) and Code - size: - value: 52B parameters (dense model) - explanation: > - There are several sizes of the RLHF models, the largest one having 52B - parameters - [[Figure 1]](https://arxiv.org/pdf/2204.05862.pdf#figure.caption.2). +- access: + explanation: "The model wasn't released.\n" + value: closed analysis: - value: > - The authors analyzed the impact of the dataset mixture on the preference - models (PM). In addition to human evaluation, RLHF model were evaluated on - MMLU, Lambada, HellaSwag, OpenBookQA, ARC-Easy, ARC-Challenge, TriviaQA, - code generation, summarization. - explanation: > - When tested on ability to generate responses that are both harmless and - helpful, the authors found that PMs trained on a mixture of harmlessness - and helpfulness preference datasets perform better than those trained - on only one of the two. - The authors also showed that the model performance is hurt for the smaller - models, but improved for the larger models when compared to plain - language models - [[Figure 28]](https://arxiv.org/pdf/2204.05862.pdf#figure.caption.34) - [[Figure 29]](https://arxiv.org/pdf/2204.05862.pdf#figure.caption.35). - The authors also found the larger RLHF models to be more truthful than - their plain counterparts. They also found that their models - had a more positive sentiment towards groups that the language models are - known to be biased against - [[Section 4.6]](https://arxiv.org/pdf/2204.05862.pdf#subsection.4.6). - # Construction + explanation: "When tested on ability to generate responses that are both harmless\ + \ and helpful, the authors found that PMs trained on a mixture of harmlessness\ + \ and helpfulness preference datasets perform better than those trained on\ + \ only one of the two. The authors also showed that the model performance\ + \ is hurt for the smaller models, but improved for the larger models when\ + \ compared to plain language models [[Figure 28]](https://arxiv.org/pdf/2204.05862.pdf#figure.caption.34)\ + \ [[Figure 29]](https://arxiv.org/pdf/2204.05862.pdf#figure.caption.35). The\ + \ authors also found the larger RLHF models to be more truthful than their\ + \ plain counterparts. They also found that their models had a more positive\ + \ sentiment towards groups that the language models are known to be biased\ + \ against [[Section 4.6]](https://arxiv.org/pdf/2204.05862.pdf#subsection.4.6).\n" + value: "The authors analyzed the impact of the dataset mixture on the preference\ + \ models (PM). In addition to human evaluation, RLHF model were evaluated\ + \ on MMLU, Lambada, HellaSwag, OpenBookQA, ARC-Easy, ARC-Challenge, TriviaQA,\ + \ code generation, summarization.\n" + created_date: + explanation: "The date that the paper was uploaded to ArXiv [[ArXiv Abstract]](https://arxiv.org/abs/2204.05862).\n" + value: 2022-04-12 dependencies: - Anthropic Harmlessness dataset - Anthropic Helpfulness dataset - training_emissions: - value: unknown - explanation: The authors do not provide data on the training emissions. - training_time: + description: "Anthropic RLHF models are models trained using reinforcement learning\ + \ from human feedback (RLHF). For Anthropic RLHF models, authors started with\ + \ a set of base models, and asked humans to rank model generated prompts based\ + \ on a specific tasks. They then trained preference models (PM) on the prompt\ + \ pairs, and use the PM scores as rewards for training the RLHF models.\n" + feedback: + explanation: "The feedback mechanism is unknown, but it will likely be \"emailing\ + \ the authors\" [[Paper]](https://arxiv.org/pdf/2204.05862.pdf).\n" value: unknown - explanation: The authors do not provide data on the training time. - training_hardware: + intended_uses: + explanation: "The intended uses of the model aren't specified, but they will\ + \ likely be the same as the released Anthropic helpfulness dataset, for which\ + \ the intended use was listed as research. In addition to English language\ + \ generation, the authors also explored code generation capabilities of their\ + \ models.\n" value: unknown - explanation: The authors do not provide data on the training hardware. - quality_control: > - unknown - # Downstream - access: - value: closed - explanation: > - The model wasn't released. license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: + modality: + explanation: text (English) and Code + value: code, text; code, text + model_card: none + monitoring: + explanation: "There is no known monitoring process for the usages of the model.\n" value: unknown - explanation: > - The intended uses of the model aren't specified, but they will likely be - the same as the released Anthropic helpfulness dataset, for which the - intended use was listed as research. In addition to English language - generation, the authors also explored code generation capabilities of - their models. + name: Anthropic RLHF models + organization: Anthropic prohibited_uses: + explanation: "The prohibited use cases of the model weren't specified.\n" value: unknown - explanation: > - The prohibited use cases of the model weren't specified. - monitoring: + quality_control: "unknown\n" + size: 52B parameters (dense) + training_emissions: + explanation: The authors do not provide data on the training emissions. value: unknown - explanation: > - There is no known monitoring process for the usages of the model. - feedback: + training_hardware: + explanation: The authors do not provide data on the training hardware. value: unknown - explanation: > - The feedback mechanism is unknown, but it will likely be "emailing the - authors" - [[Paper]](https://arxiv.org/pdf/2204.05862.pdf). - -- type: application - name: Anthropic Human Feedback Interface - # General - organization: Anthropic - description: > - The feedback interface used to collect preference datasets to train - Anthropic RLHF models - [[Paper]](https://arxiv.org/pdf/2204.05862.pdf). + training_time: + explanation: The authors do not provide data on the training time. + value: unknown + type: model + url: https://arxiv.org/pdf/2204.05862.pdf +- access: + explanation: "The interface was made available to a select set of crowd workers.\n" + value: closed + adaptation: none created_date: + explanation: "The date that the paper was uploaded to ArXiv [[ArXiv Abstract]](https://arxiv.org/abs/2204.05862).\n" value: 2022-04-12 - explanation: > - The date that the paper was uploaded to ArXiv - [[ArXiv Abstract]](https://arxiv.org/abs/2204.05862). - url: https://arxiv.org/pdf/2204.05862.pdf - # Construction dependencies: [Anthropic RLHF models] - adaptation: none - output_space: > - UI allowing users to indicate their preference for the model responses - shown. - quality_control: unknown - # Downstream - access: - value: closed - explanation: > - The interface was made available to a select set of crowd workers. - terms_of_service: - value: unknown - explanation: The terms of service for the interaface is unknown. + description: "The feedback interface used to collect preference datasets to train\ + \ Anthropic RLHF models [[Paper]](https://arxiv.org/pdf/2204.05862.pdf).\n" + failures: unknown + feedback: unknown + intended_uses: "Intended to be used by crowdworkers who are tasked with ranking\ + \ model answers.\n" license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: > - Intended to be used by crowdworkers who are tasked with ranking model - answers. - prohibited_uses: unknown monitoring: unknown - feedback: unknown - # Deployment monthly_active_users: unknown + name: Anthropic Human Feedback Interface + organization: Anthropic + output_space: "UI allowing users to indicate their preference for the model responses\ + \ shown.\n" + prohibited_uses: unknown + quality_control: unknown + terms_of_service: + explanation: The terms of service for the interaface is unknown. + value: unknown + type: application + url: https://arxiv.org/pdf/2204.05862.pdf user_distribution: - value: crowdworkers explanation: The demographics of the crowdworkers are unknown. - failures: unknown - -- type: application - name: Anthropic API - organization: Anthropic + value: crowdworkers +- access: limited + adaptation: '' + created_date: 2023-03-14 + dependencies: [Claude, Claude Instant] description: API is designed to be a backend that incorporates Claude into any application you’ve developed. Our application sends text to our API, then receives a response via server-sent events, a streaming protocol for the web. - created_date: 2023-03-14 - url: https://console.anthropic.com/docs/api - dependencies: [Claude, Claude Instant] - adaptation: '' - output_space: '' - quality_control: '' - access: limited + failures: '' + feedback: '' + intended_uses: '' license: - value: none explanation: No license was found. - terms_of_service: '' - intended_uses: '' - prohibited_uses: '' + value: none monitoring: '' - feedback: '' monthly_active_users: '' - user_distribution: '' - failures: '' - -- type: model - name: Claude + name: Anthropic API organization: Anthropic - description: '' - created_date: 2023-03-14 - url: https://www.anthropic.com/index/introducing-claude - model_card: '' - modality: text + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: '' + type: application + url: https://console.anthropic.com/docs/api + user_distribution: '' +- access: limited analysis: '' - size: unknown + created_date: 2023-03-14 dependencies: [] - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: limited + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: text + value: text; text + model_card: '' monitoring: '' - feedback: '' - -- type: model - name: Claude Instant + name: Claude organization: Anthropic - description: '' - created_date: 2023-03-14 - url: https://www.anthropic.com/index/introducing-claude - model_card: '' - modality: text - analysis: '' + prohibited_uses: '' + quality_control: '' size: unknown - dependencies: [] training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: limited + training_time: '' + type: model + url: https://www.anthropic.com/index/introducing-claude +- access: limited + analysis: '' + created_date: 2023-03-14 + dependencies: [] + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: text + value: text; text + model_card: '' monitoring: '' - feedback: '' - -- type: model - name: Claude 2 + name: Claude Instant organization: Anthropic - description: Claude 2 is a more evolved and refined version of Claude, which is - a general purpose large language model using a transformer architecture and - trained via unsupervised learning. - created_date: 2023-07-11 - url: https://www.anthropic.com/index/claude-2 - model_card: https://www-files.anthropic.com/production/images/Model-Card-Claude-2.pdf - modality: text + prohibited_uses: '' + quality_control: '' + size: unknown + training_emissions: '' + training_hardware: '' + training_time: '' + type: model + url: https://www.anthropic.com/index/introducing-claude +- access: open analysis: Evaluated with human feedback on helpfulness, harmfulness, and honesty and on the Bias Benchmark for QA. - size: '' + created_date: 2023-07-11 dependencies: - Claude human feedback data - Unknown licensed third party datasets - training_emissions: '' - training_time: '' - training_hardware: unknown - quality_control: '' - access: open - license: '' + description: Claude 2 is a more evolved and refined version of Claude, which is + a general purpose large language model using a transformer architecture and + trained via unsupervised learning. + feedback: '' intended_uses: Claude 2 tends to perform well at general, open-ended conversation; search, writing, editing, outlining, and summarizing text; coding; and providing helpful advice about a broad range of subjects. Claude 2 is particularly well suited to support creative or literary use cases. They can take direction on tone and “personality,” and users have described them as feeling steerable and conversational. + license: '' + modality: + explanation: text + value: text; text + model_card: https://www-files.anthropic.com/production/images/Model-Card-Claude-2.pdf + monitoring: '' + name: Claude 2 + organization: Anthropic prohibited_uses: Claude 2 should not be used on their own in high stakes situations where an incorrect answer would cause harm. - monitoring: '' - feedback: '' + quality_control: '' + size: '' + training_emissions: '' + training_hardware: unknown + training_time: '' + type: model + url: https://www.anthropic.com/index/claude-2 diff --git a/assets/argonne.yaml b/assets/argonne.yaml index addd0a35..049fa897 100644 --- a/assets/argonne.yaml +++ b/assets/argonne.yaml @@ -1,23 +1,24 @@ --- - -- type: model - name: GenSLM - organization: Argonne National Laboratory - description: '' - created_date: 2022-10-11 - url: https://www.biorxiv.org/content/10.1101/2022.10.10.511571v1 - model_card: '' - modality: genome sequence +- access: open analysis: '' - size: 25B parameters (dense model) + created_date: 2022-10-11 dependencies: [SARS-CoV-2 genome dataset, BV-BRC dataset] - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: open - license: MIT + description: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: MIT + modality: + explanation: genome sequence + value: text; text + model_card: '' monitoring: '' - feedback: '' + name: GenSLM + organization: Argonne National Laboratory + prohibited_uses: '' + quality_control: '' + size: 25B parameters (dense) + training_emissions: '' + training_hardware: '' + training_time: '' + type: model + url: https://www.biorxiv.org/content/10.1101/2022.10.10.511571v1 diff --git a/assets/assembly.yaml b/assets/assembly.yaml index 197b634e..61848cc2 100644 --- a/assets/assembly.yaml +++ b/assets/assembly.yaml @@ -1,48 +1,35 @@ --- - -- type: dataset - name: Conformer-1 dataset - organization: AssemblyAI - description: The dataset used to train AssemblyAI's Conformer-1 model. +- access: closed + analysis: '' created_date: - value: 2023-03-15 explanation: The date Conformer-1 was announced in this [[Twitter post]](https://twitter.com/AssemblyAI/status/1636050346240884744). - url: https://www.assemblyai.com/blog/conformer-1/ + value: 2023-03-15 datasheet: none - modality: speech (English), text - size: 650K hours audio (60TB) - sample: [] - analysis: '' dependencies: [] - included: > - "our team curated a dataset of 650K hours of English audio - consisting - of proprietary internal datasets and various sources from the internet" + description: The dataset used to train AssemblyAI's Conformer-1 model. excluded: '' - quality_control: '' - access: closed + feedback: '' + included: "\"our team curated a dataset of 650K hours of English audio - consisting\ + \ of proprietary internal datasets and various sources from the internet\"\n" + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: speech (English), text + value: audio, text monitoring: '' - feedback: '' - -- type: model - name: Conformer-1 + name: Conformer-1 dataset organization: AssemblyAI - description: Conformer-1 is a state-of-the-art speech recognition model trained - on 650K hours of audio data that achieves near human-level performance and robustness - across a variety of data, making up to 43% fewer errors on noisy data than other - ASR models. - created_date: - value: 2023-03-15 - explanation: The date the model was announced in this [[Twitter post]](https://twitter.com/AssemblyAI/status/1636050346240884744). + prohibited_uses: '' + quality_control: '' + sample: [] + size: 650K hours audio (60TB) + type: dataset url: https://www.assemblyai.com/blog/conformer-1/ - model_card: '' - modality: speech (English), text +- access: limited analysis: + explanation: See [[Results and Performance]](https://www.assemblyai.com/blog/conformer-1/#results-and-performance) value: In order to evaluate the accuracy and robustness of Conformer-1, we sourced 60+ hours of human labeled audio data covering popular speech domains such as call centers, podcasts, broadcasts, and webinars. We then calculated the @@ -50,76 +37,84 @@ the results against Whisper and a number of other ASR models. To ground our results against popular open source speech recognition benchmarks, we also performed the same WER analysis against a number of academic datasets. - explanation: See [[Results and Performance]](https://www.assemblyai.com/blog/conformer-1/#results-and-performance) - size: 300M parameters + created_date: + explanation: The date the model was announced in this [[Twitter post]](https://twitter.com/AssemblyAI/status/1636050346240884744). + value: 2023-03-15 dependencies: [Conformer-1 dataset] - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: limited + description: Conformer-1 is a state-of-the-art speech recognition model trained + on 650K hours of audio data that achieves near human-level performance and robustness + across a variety of data, making up to 43% fewer errors on noisy data than other + ASR models. + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: speech (English), text + value: audio, text; audio, text + model_card: '' monitoring: '' - feedback: '' - -- type: application - name: AssemblyAI + name: Conformer-1 organization: AssemblyAI - description: AssemblyAI uses Claude and Anthropic's model to transcribe and understand - audio data at scale. + prohibited_uses: '' + quality_control: '' + size: 300M parameters (dense) + training_emissions: '' + training_hardware: '' + training_time: '' + type: model + url: https://www.assemblyai.com/blog/conformer-1/ +- access: limited + adaptation: '' created_date: 2022 - url: https://www.assemblyai.com/ dependencies: [Anthropic API] - adaptation: '' - output_space: '' - quality_control: '' - access: limited + description: AssemblyAI uses Claude and Anthropic's model to transcribe and understand + audio data at scale. + failures: '' + feedback: '' + intended_uses: '' license: + explanation: "\"AssemblyAI gives you a personal, worldwide, royalty-free, non-assignable\ + \ and non-exclusive license to use the Site and Services solely for your reasonable\ + \ personal and commercial purposes, in the manner permitted by these Terms.\"\ + \ - excerpt from the Terms of Service.\n" value: custom - explanation: > - "AssemblyAI gives you a personal, worldwide, royalty-free, - non-assignable and non-exclusive license to use the Site and Services - solely for your reasonable personal and commercial purposes, in the - manner permitted by these Terms." - excerpt from the Terms of Service. - terms_of_service: https://www.assemblyai.com/legal/terms-of-service - intended_uses: '' - prohibited_uses: '' monitoring: '' - feedback: '' monthly_active_users: '' - user_distribution: '' - failures: '' - -- type: application - name: Conformer-1 API + name: AssemblyAI organization: AssemblyAI - description: API to access the AssemblyAI's Conformer-1 model. + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: https://www.assemblyai.com/legal/terms-of-service + type: application + url: https://www.assemblyai.com/ + user_distribution: '' +- access: open + adaptation: '' created_date: - value: 2023-03-15 explanation: The date the API was announced in this [[Twitter post]](https://twitter.com/AssemblyAI/status/1636050346240884744). - url: https://www.assemblyai.com/blog/conformer-1/ + value: 2023-03-15 dependencies: [Conformer-1] - adaptation: '' - output_space: '' - quality_control: '' - access: open + description: API to access the AssemblyAI's Conformer-1 model. + failures: '' + feedback: '' + intended_uses: Speech recognition license: + explanation: "\"AssemblyAI gives you a personal, worldwide, royalty-free, non-assignable\ + \ and non-exclusive license to use the Site and Services solely for your reasonable\ + \ personal and commercial purposes, in the manner permitted by these Terms.\"\ + \ - excerpt from the Terms of Service.\n" value: custom - explanation: > - "AssemblyAI gives you a personal, worldwide, royalty-free, - non-assignable and non-exclusive license to use the Site and Services - solely for your reasonable personal and commercial purposes, in the - manner permitted by these Terms." - excerpt from the Terms of Service. - terms_of_service: https://www.assemblyai.com/legal/terms-of-service - intended_uses: Speech recognition - prohibited_uses: '' monitoring: '' - feedback: '' monthly_active_users: '' + name: Conformer-1 API + organization: AssemblyAI + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: https://www.assemblyai.com/legal/terms-of-service + type: application + url: https://www.assemblyai.com/blog/conformer-1/ user_distribution: '' - failures: '' diff --git a/assets/autogpt.yaml b/assets/autogpt.yaml index 5a3a040f..34a3ef5f 100644 --- a/assets/autogpt.yaml +++ b/assets/autogpt.yaml @@ -1,23 +1,22 @@ --- - -- type: application - name: Auto-GPT - organization: Auto-GPT - description: Auto-GPT is an experimental open-source application showcasing the - capabilities of the GPT-4 language model. +- access: open + adaptation: GPT-4 adapted to run autonomously by chaining together LLM "thoughts" created_date: 2023-04-16 - url: https://news.agpt.co/ dependencies: [GPT-4 API] - adaptation: GPT-4 adapted to run autonomously by chaining together LLM "thoughts" - output_space: text - quality_control: '' - access: open - license: MIT - terms_of_service: '' + description: Auto-GPT is an experimental open-source application showcasing the + capabilities of the GPT-4 language model. + failures: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: MIT monitoring: '' - feedback: '' monthly_active_users: '' + name: Auto-GPT + organization: Auto-GPT + output_space: text + prohibited_uses: '' + quality_control: '' + terms_of_service: '' + type: application + url: https://news.agpt.co/ user_distribution: '' - failures: '' diff --git a/assets/baai.yaml b/assets/baai.yaml index ab838a1d..e90846c6 100644 --- a/assets/baai.yaml +++ b/assets/baai.yaml @@ -1,68 +1,57 @@ --- - -- type: dataset - name: Wu Dao dataset - # General - organization: Beijing Academy of Artificial Intelligence - description: '' +- access: closed + analysis: '' created_date: + explanation: "The date that BAAI made a public announcement [[News Link]](https://mp.weixin.qq.com/s/BUQWZ5EdR19i40GuFofpBg).\n" value: 2021-01-12 - explanation: > - The date that BAAI made a public announcement - [[News Link]](https://mp.weixin.qq.com/s/BUQWZ5EdR19i40GuFofpBg). - url: https://www.tsinghua.edu.cn/en/info/1420/10473.htm datasheet: '' - modality: text (Chinese, English) and image - size: unknown - sample: [] - analysis: '' - # Construction dependencies: [] - license: - value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - included: '' + description: '' excluded: '' - quality_control: '' - # Downstream - access: closed + feedback: '' + included: '' intended_uses: '' - prohibited_uses: '' + license: + explanation: "The asset isn't released, and hence the license is unknown.\n" + value: unknown + modality: + explanation: text (Chinese, English) and image + value: image, text monitoring: '' - feedback: '' - -- type: model - name: Wu Dao 2.0 - # General + name: Wu Dao dataset organization: Beijing Academy of Artificial Intelligence - description: '' - created_date: - value: 2021-01-12 - explanation: > - The date that BAAI made a public announcement - [[News Link]](https://mp.weixin.qq.com/s/BUQWZ5EdR19i40GuFofpBg). + prohibited_uses: '' + quality_control: '' + sample: [] + size: unknown + type: dataset url: https://www.tsinghua.edu.cn/en/info/1420/10473.htm - model_card: '' - modality: text (Chinese, English) and image - size: 1.75T parameters (dense model) +- access: + explanation: "The model wasn't released.\n" + value: closed analysis: '' - # Construction + created_date: + explanation: "The date that BAAI made a public announcement [[News Link]](https://mp.weixin.qq.com/s/BUQWZ5EdR19i40GuFofpBg).\n" + value: 2021-01-12 dependencies: [Wu Dao dataset] - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - # Downstream - access: - value: closed - explanation: > - The model wasn't released. + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: text (Chinese, English) and image + value: image, text; image, text + model_card: '' monitoring: '' - feedback: '' + name: Wu Dao 2.0 + organization: Beijing Academy of Artificial Intelligence + prohibited_uses: '' + quality_control: '' + size: 1.75T parameters (dense) + training_emissions: '' + training_hardware: '' + training_time: '' + type: model + url: https://www.tsinghua.edu.cn/en/info/1420/10473.htm diff --git a/assets/baidu.yaml b/assets/baidu.yaml index c6386cb1..bed1db91 100644 --- a/assets/baidu.yaml +++ b/assets/baidu.yaml @@ -1,89 +1,90 @@ --- - -- type: model - name: ERNIE 3.0 Titan - organization: Baidu, PengCheng Laboratory - description: ERNIE 3.0 Titan is a language model +- access: + explanation: Baidu does not provide ERNIE 3.0 for external researchers + value: closed + analysis: '' created_date: - value: 2021-12-23 explanation: The date the model paper was released - url: https://arxiv.org/abs/2112.12731 - model_card: none - modality: text (Chinese) - analysis: '' - size: 260B parameters (dense model) + value: 2021-12-23 dependencies: [] - training_emissions: unknown - training_time: unknown - training_hardware: Baidu V100 Cluster, PengCheng Lab Ascend 910 NPU cluster - quality_control: none - access: - value: closed - explanation: Baidu does not provide ERNIE 3.0 for external researchers + description: ERNIE 3.0 Titan is a language model + feedback: none + intended_uses: unknown license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: unknown - prohibited_uses: unknown - monitoring: none - feedback: none - -- type: model - name: ERNIE-ViLG - organization: Baidu - description: ERNIE-ViLG is a model for text-to-image generation - created_date: - value: 2021-12-31 - explanation: The date the model paper was released - url: https://arxiv.org/abs/2112.15283 + modality: + explanation: text (Chinese) + value: text; text model_card: none - modality: text (Chinese) and image - size: 10B parameters - analysis: '' - dependencies: [] + monitoring: none + name: ERNIE 3.0 Titan + organization: Baidu, PengCheng Laboratory + prohibited_uses: unknown + quality_control: none + size: 260B parameters (dense) training_emissions: unknown + training_hardware: Baidu V100 Cluster, PengCheng Lab Ascend 910 NPU cluster training_time: unknown - training_hardware: unknown - quality_control: '' - access: - value: limited + type: model + url: https://arxiv.org/abs/2112.12731 +- access: explanation: API access is available through PaddleHub. Instructions on [[Github repository]](https://github.com/PaddlePaddle/PaddleHub/blob/develop/modules/image/text_to_image/ernie_vilg/README.md) + value: limited + analysis: '' + created_date: + explanation: The date the model paper was released + value: 2021-12-31 + dependencies: [] + description: ERNIE-ViLG is a model for text-to-image generation + feedback: none + intended_uses: unknown license: + explanation: "No explicit license for the ERNIE-ViLG model is provided in the\ + \ [[Github repository]](https://github.com/PaddlePaddle/PaddleHub)\n" value: none - explanation: > - No explicit license for the ERNIE-ViLG model is provided in the [[Github repository]](https://github.com/PaddlePaddle/PaddleHub) - intended_uses: unknown - prohibited_uses: unknown + modality: + explanation: text (Chinese) and image + value: image, text; image, text + model_card: none monitoring: none - feedback: none - -- type: model - name: ERNIE-ViLG 2.0 + name: ERNIE-ViLG organization: Baidu - description: ERNIE-ViLG is a model for text-to-image generation + prohibited_uses: unknown + quality_control: '' + size: 10B parameters (dense) + training_emissions: unknown + training_hardware: unknown + training_time: unknown + type: model + url: https://arxiv.org/abs/2112.15283 +- access: + explanation: Baidu does not provide ERNIE-ViLG 2.0 to external researchers + value: closed + analysis: '' created_date: - value: 2022-10-27 explanation: The date the model paper was released - url: https://arxiv.org/abs/2210.15257 - model_card: none - modality: text (Chinese) and image - size: 10B parameters - analysis: '' + value: 2022-10-27 dependencies: [] - training_emissions: unknown - training_time: 18 days according to [[the paper]](https://arxiv.org/abs/2210.15257) - training_hardware: 320 A100 GPUs according to [[the paper]](https://arxiv.org/abs/2210.15257) - quality_control: '' - access: - value: closed - explanation: Baidu does not provide ERNIE-ViLG 2.0 to external researchers + description: ERNIE-ViLG is a model for text-to-image generation + feedback: none + intended_uses: unknown license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: unknown - prohibited_uses: unknown + modality: + explanation: text (Chinese) and image + value: image, text; image, text + model_card: none monitoring: none - feedback: none + name: ERNIE-ViLG 2.0 + organization: Baidu + prohibited_uses: unknown + quality_control: '' + size: 10B parameters (dense) + training_emissions: unknown + training_hardware: 320 A100 GPUs according to [[the paper]](https://arxiv.org/abs/2210.15257) + training_time: 18 days according to [[the paper]](https://arxiv.org/abs/2210.15257) + type: model + url: https://arxiv.org/abs/2210.15257 diff --git a/assets/bain.yaml b/assets/bain.yaml index c6f68c19..10f57f76 100644 --- a/assets/bain.yaml +++ b/assets/bain.yaml @@ -1,25 +1,24 @@ --- - -- type: application - name: Bain Chat - organization: Bain +- access: limited + adaptation: '' + created_date: 2023-02-21 + dependencies: [ChatGPT API] description: With the alliance, Bain will combine its deep digital implementation capabilities and strategic expertise with OpenAI’s AI tools and platforms, including ChatGPT, to help its clients around the world identify and implement the value of AI to maximize business potential. - created_date: 2023-02-21 - url: https://www.bain.com/vector-digital/partnerships-alliance-ecosystem/openai-alliance/ - dependencies: [ChatGPT API] - adaptation: '' - output_space: '' - quality_control: '' - access: limited - license: unknown - terms_of_service: '' + failures: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: unknown monitoring: '' - feedback: '' monthly_active_users: '' + name: Bain Chat + organization: Bain + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: '' + type: application + url: https://www.bain.com/vector-digital/partnerships-alliance-ecosystem/openai-alliance/ user_distribution: '' - failures: '' diff --git a/assets/be_my_eyes.yaml b/assets/be_my_eyes.yaml index 19c1e9cc..c9a60174 100644 --- a/assets/be_my_eyes.yaml +++ b/assets/be_my_eyes.yaml @@ -1,23 +1,22 @@ --- - -- type: application - name: Virtual Volunteer - organization: Be My Eyes - description: The first-ever digital visual assistant powered by OpenAI’s new GPT-4 - language model. +- access: limited + adaptation: '' created_date: 2023-03-14 - url: https://www.bemyeyes.com/blog/introducing-be-my-eyes-virtual-volunteer dependencies: [GPT-4 API] - adaptation: '' - output_space: '' - quality_control: '' - access: limited - license: unknown - terms_of_service: '' + description: The first-ever digital visual assistant powered by OpenAI’s new GPT-4 + language model. + failures: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: unknown monitoring: '' - feedback: '' monthly_active_users: '' + name: Virtual Volunteer + organization: Be My Eyes + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: '' + type: application + url: https://www.bemyeyes.com/blog/introducing-be-my-eyes-virtual-volunteer user_distribution: '' - failures: '' diff --git a/assets/berkeley.yaml b/assets/berkeley.yaml index e8bab3a0..76756276 100644 --- a/assets/berkeley.yaml +++ b/assets/berkeley.yaml @@ -1,72 +1,75 @@ --- - -- type: model - name: Koala - organization: Berkeley +- access: open + analysis: Evaluated in comparison with ChatGPT and Stanford Alpaca. + created_date: 2023-04-03 + dependencies: [LLaMA, web-scraped dialogue data] description: A relatively small chatbot trained by fine-tuning Meta’s LLaMA on dialogue data gathered from the web. - created_date: 2023-04-03 - url: https://bair.berkeley.edu/blog/2023/04/03/koala/ + feedback: https://huggingface.co/TheBloke/koala-7B-GPTQ-4bit-128g/discussions + intended_uses: academic research + license: Apache 2.0 + modality: + explanation: natural language text + value: text; text model_card: https://huggingface.co/TheBloke/koala-7B-GPTQ-4bit-128g - modality: natural language text - analysis: Evaluated in comparison with ChatGPT and Stanford Alpaca. - size: 13B parameters - dependencies: [LLaMA, web-scraped dialogue data] + monitoring: '' + name: Koala + organization: Berkeley + prohibited_uses: '' + quality_control: '' + size: 13B parameters (dense) training_emissions: '' - training_time: 6 hours training_hardware: 8 A100 GPUs - quality_control: '' - access: open + training_time: 6 hours + type: model + url: https://bair.berkeley.edu/blog/2023/04/03/koala/ +- access: open + analysis: Evaluated using AST sub-tree matching technique and compared to other + models in terms of API functionality accuracy. + created_date: 2023-05-24 + dependencies: [LLaMA, Gorilla document retriever] + description: Gorilla is a finetuned LLaMA-based model that surpasses the performance + of GPT-4 on writing API calls. + feedback: '' + intended_uses: In conjunction with a LLM to improve its capability for using API + calls. license: Apache 2.0 - intended_uses: academic research - prohibited_uses: '' + modality: + explanation: outputs API from natural language input + value: other; other + model_card: '' monitoring: '' - feedback: https://huggingface.co/TheBloke/koala-7B-GPTQ-4bit-128g/discussions - -- type: model name: Gorilla organization: Berkeley - description: Gorilla is a finetuned LLaMA-based model that surpasses the performance - of GPT-4 on writing API calls. - created_date: 2023-05-24 - url: https://arxiv.org/pdf/2305.15334v1.pdf - model_card: '' - modality: outputs API from natural language input - analysis: Evaluated using AST sub-tree matching technique and compared to other - models in terms of API functionality accuracy. - size: 7B parameters - dependencies: [LLaMA, Gorilla document retriever] - training_emissions: '' - training_time: '' - training_hardware: '' + prohibited_uses: '' quality_control: No specific quality control is mentioned in model training, though details on data processing and collection are provided in the paper. - access: open + size: 7B parameters (dense) + training_emissions: '' + training_hardware: '' + training_time: '' + type: model + url: https://arxiv.org/pdf/2305.15334v1.pdf +- access: open + analysis: Evaluated on wide range of tasks using own evaluation benchmarks. + created_date: 2023-05-03 + dependencies: [RedPajama] + description: OpenLlama is an open source reproduction of Meta's LLaMA model. + feedback: '' + intended_uses: '' license: Apache 2.0 - intended_uses: In conjunction with a LLM to improve its capability for using API - calls. - prohibited_uses: '' + modality: + explanation: text + value: text; text + model_card: '' monitoring: '' - feedback: '' - -- type: model name: OpenLLaMA organization: Berkeley - description: OpenLlama is an open source reproduction of Meta's LLaMA model. - created_date: 2023-05-03 - url: https://github.com/openlm-research/open_llama - model_card: '' - modality: text - analysis: Evaluated on wide range of tasks using own evaluation benchmarks. - size: 17B parameters - dependencies: [RedPajama] + prohibited_uses: '' + quality_control: '' + size: 17B parameters (dense) training_emissions: unknown - training_time: unknown training_hardware: '' - quality_control: '' - access: open - license: Apache 2.0 - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' + training_time: unknown + type: model + url: https://github.com/openlm-research/open_llama diff --git a/assets/bigcode.yaml b/assets/bigcode.yaml index 4a5362ea..cb03bed0 100644 --- a/assets/bigcode.yaml +++ b/assets/bigcode.yaml @@ -1,76 +1,79 @@ --- - -- type: model - name: StarCoder - organization: BigCode +- access: open + analysis: Tested on several benchmarks, most notably Python benchmark HumanEval. + created_date: 2023-05-09 + dependencies: [The Stack] description: StarCoder is a Large Language Model for Code (Code LLM) trained on permissively licensed data from GitHub, including from 80+ programming languages, Git commits, GitHub issues, and Jupyter notebooks. - created_date: 2023-05-09 - url: https://arxiv.org/pdf/2305.06161.pdf + feedback: https://huggingface.co/bigcode/starcoder/discussions + intended_uses: With a Tech Assistant prompt and not as an instruction model given + training limitations. + license: Apache 2.0 + modality: + explanation: code (80+ programming languages) + value: code; code model_card: https://huggingface.co/bigcode/starcoder - modality: code (80+ programming languages) - analysis: Tested on several benchmarks, most notably Python benchmark HumanEval. - size: 15.5B parameters - dependencies: [The Stack] - training_emissions: 16.68 tons of CO2eq - training_time: 2 days - training_hardware: 64 NVIDIA A100 GPUs + monitoring: '' + name: StarCoder + organization: BigCode + prohibited_uses: '' quality_control: No specific quality control is mentioned in model training, though details on data processing and how the tokenizer was trained are provided in the paper. - access: open + size: 15.5B parameters (dense) + training_emissions: 16.68 tons of CO2eq + training_hardware: 64 NVIDIA A100 GPUs + training_time: 2 days + type: model + url: https://arxiv.org/pdf/2305.06161.pdf +- access: open + analysis: Evaluated on MultiPL-E system benchmarks. + created_date: 2023-02-24 + dependencies: [The Stack, BigCode Dataset] + description: Multilingual code model derived from findings of BigCode Project + analysis on Github stars' association to data quality. + feedback: '' + intended_uses: '' license: Apache 2.0 - intended_uses: With a Tech Assistant prompt and not as an instruction model given - training limitations. - prohibited_uses: '' + modality: + explanation: code + value: code; code + model_card: '' monitoring: '' - feedback: https://huggingface.co/bigcode/starcoder/discussions - -- type: model name: SantaCoder organization: BigCode - description: Multilingual code model derived from findings of BigCode Project - analysis on Github stars' association to data quality. - created_date: 2023-02-24 - url: https://arxiv.org/pdf/2301.03988.pdf - model_card: '' - modality: code - analysis: Evaluated on MultiPL-E system benchmarks. - size: 1.1B parameters - dependencies: [The Stack, BigCode Dataset] + prohibited_uses: '' + quality_control: '' + size: 1.1B parameters (dense) training_emissions: '' - training_time: 3.1 days training_hardware: 96 NVIDIA Tesla V100 GPUs - quality_control: '' - access: open - license: Apache 2.0 - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' - -- type: dataset - name: The Stack - organization: BigCode - description: The Stack is a 3.1 TB dataset consisting of permissively licensed - source code inteded for use in creating code LLMs. - created_date: 2022-11-20 - url: https://arxiv.org/pdf/2211.15533.pdf - datasheet: https://huggingface.co/datasets/bigcode/the-stack - modality: code (358 programming languages) - size: 3.1 TB - sample: [] + training_time: 3.1 days + type: model + url: https://arxiv.org/pdf/2301.03988.pdf +- access: open analysis: Evaluated models trained on The Stack on HumanEval and MBPP and compared against similarly-sized models. + created_date: 2022-11-20 + datasheet: https://huggingface.co/datasets/bigcode/the-stack dependencies: [GitHub] - included: '' + description: The Stack is a 3.1 TB dataset consisting of permissively licensed + source code inteded for use in creating code LLMs. excluded: '' - quality_control: allowed users whose data were part of The Stack's training data - to opt-out - access: open - license: Apache 2.0 + feedback: '' + included: '' intended_uses: creating code LLMs - prohibited_uses: '' + license: Apache 2.0 + modality: + explanation: code (358 programming languages) + value: code monitoring: '' - feedback: '' + name: The Stack + organization: BigCode + prohibited_uses: '' + quality_control: allowed users whose data were part of The Stack's training data + to opt-out + sample: [] + size: 3.1 TB + type: dataset + url: https://arxiv.org/pdf/2211.15533.pdf diff --git a/assets/bigscience.yaml b/assets/bigscience.yaml index 21251e9d..ce03bf3f 100644 --- a/assets/bigscience.yaml +++ b/assets/bigscience.yaml @@ -1,163 +1,148 @@ --- - -# Datasets -- type: dataset - name: ROOTS - # General - organization: BigScience - description: The Responsible Open-science Open-collaboration Text Sources (ROOTS) - corpus, a 1.6TB dataset spanning 59 languages that was used to train the 176-billion-parameter - BigScience Large Open-science Open-access Multilingual (BLOOM) language model. +- access: + explanation: While not all of the ROOTS corpus is publicly accessible, the majority + is through [this form](https://docs.google.com/forms/d/e/1FAIpQLSdq50O1x4dkdGI4dwsmchFuNI0KCWEDiKUYxvd0r0_sl6FfAQ/viewform?usp=send_form) + value: open + analysis: https://huggingface.co/spaces/bigscience-data/roots-search created_date: 2022-06-06 - url: https://openreview.net/forum?id=UoEw6KigkUn datasheet: https://huggingface.co/spaces/bigscience-data/bigscience-corpus - modality: text (46 languages) and code (13 programming languages) - size: 1.6TB - sample: [https://huggingface.co/spaces/bigscience-data/roots-search] - analysis: https://huggingface.co/spaces/bigscience-data/roots-search - # Construction dependencies: [] - license: - value: custom - explanation: Each constituent subset of the dataset will be released under the - license that applies to it. ([See individual dataset page for specific license - information](https://hf.co/bigscience-data)) - included: See section 2 of the paper. + description: The Responsible Open-science Open-collaboration Text Sources (ROOTS) + corpus, a 1.6TB dataset spanning 59 languages that was used to train the 176-billion-parameter + BigScience Large Open-science Open-access Multilingual (BLOOM) language model. excluded: unknown - quality_control: Data collection involved merging and deduplicating searches to - remove menus, HTML tags. Further, a quality improvement pipeline was implemented. - # Downstream - access: - value: open - explanation: While not all of the ROOTS corpus is publicly accessible, the majority - is through [this form](https://docs.google.com/forms/d/e/1FAIpQLSdq50O1x4dkdGI4dwsmchFuNI0KCWEDiKUYxvd0r0_sl6FfAQ/viewform?usp=send_form) + feedback: https://huggingface.co/spaces/bigscience-data/roots-search/discussions + included: See section 2 of the paper. intended_uses: To empower large-scale monolingual and multilingual modeling projects with both the data and the processing tools, as well as stimulate research around this large multilingual corpus. - prohibited_uses: none + license: + explanation: Each constituent subset of the dataset will be released under the + license that applies to it. ([See individual dataset page for specific license + information](https://hf.co/bigscience-data)) + value: custom + modality: + explanation: text (46 languages) and code (13 programming languages) + value: code, text monitoring: none - feedback: https://huggingface.co/spaces/bigscience-data/roots-search/discussions - -- type: dataset - name: P3 - # General + name: ROOTS organization: BigScience - description: The Public Pool of Prompts (P3) are prompts written in an unified - format use to train T0++. + prohibited_uses: none + quality_control: Data collection involved merging and deduplicating searches to + remove menus, HTML tags. Further, a quality improvement pipeline was implemented. + sample: [https://huggingface.co/spaces/bigscience-data/roots-search] + size: 1.6TB + type: dataset + url: https://openreview.net/forum?id=UoEw6KigkUn +- access: open + analysis: See the paper. created_date: 2022-10-15 - url: https://arxiv.org/pdf/2110.08207.pdf datasheet: https://huggingface.co/datasets/bigscience/P3 - modality: text (English) - size: 2000 prompts - sample: [https://huggingface.co/datasets/bigscience/P3/viewer] - analysis: See the paper. - # Construction dependencies: [] - license: Apache 2.0 + description: The Public Pool of Prompts (P3) are prompts written in an unified + format use to train T0++. + excluded: We conservatively decided not to prompt datasets that contain potentially + harmful content (for instance, datasets built on social media content). + feedback: Point of Contact is [Victor Sanh](https://huggingface.co/datasets/bigscience/P3) included: The Public Pool of Prompts relies on the Hugging Face Dataset library. Any public dataset in the Datasets library can be prompted. We select the datasets that have at least one subset in English and excluded datasets containing (predominantly) non-natural language examples. - excluded: We conservatively decided not to prompt datasets that contain potentially - harmful content (for instance, datasets built on social media content). - quality_control: Data collection involved merging and deduplicating searches to - remove menus, HTML tags. Further, a quality improvement pipeline was implemented. - # Downstream - access: open intended_uses: Multitask finetuning of language models. - prohibited_uses: none + license: Apache 2.0 + modality: + explanation: text (English) + value: text monitoring: none - feedback: Point of Contact is [Victor Sanh](https://huggingface.co/datasets/bigscience/P3) - -- type: dataset - name: xP3 - # General + name: P3 organization: BigScience + prohibited_uses: none + quality_control: Data collection involved merging and deduplicating searches to + remove menus, HTML tags. Further, a quality improvement pipeline was implemented. + sample: [https://huggingface.co/datasets/bigscience/P3/viewer] + size: 2000 prompts + type: dataset + url: https://arxiv.org/pdf/2110.08207.pdf +- access: open + analysis: See the paper. + created_date: 2022-11-03 + datasheet: https://huggingface.co/datasets/bigscience/xP3 + dependencies: [P3] description: xP3 (Crosslingual Public Pool of Prompts) is a collection of prompts and datasets across 46 of languages & 16 NLP tasks. It is used for the training of BLOOMZ and mT0, multilingual language models capable of following human instructions in dozens of languages zero-shot. - created_date: 2022-11-03 - url: https://arxiv.org/abs/2211.01786 - datasheet: https://huggingface.co/datasets/bigscience/xP3 - modality: text (46 languages) and code (13 programming languages) - size: 9.4GB - sample: [https://huggingface.co/datasets/bigscience/xP3/viewer] - analysis: See the paper. - # Construction - dependencies: [P3] - license: Apache 2.0 - included: xP3 adds 28 multilingual datasets to P3 based on the P3 task taxonomy. excluded: We conservatively decided not to prompt datasets that contain potentially harmful content (for instance, datasets built on social media content). - quality_control: Data collection involved merging and deduplicating searches to - remove menus, HTML tags. Further, a quality improvement pipeline was implemented. - # Downstream - access: open + feedback: Point of Contact is [Niklas Muennighoff](https://huggingface.co/datasets/bigscience/xP3) + included: xP3 adds 28 multilingual datasets to P3 based on the P3 task taxonomy. intended_uses: Multitask finetuning of language models. - prohibited_uses: none + license: Apache 2.0 + modality: + explanation: text (46 languages) and code (13 programming languages) + value: code, text monitoring: none - feedback: Point of Contact is [Niklas Muennighoff](https://huggingface.co/datasets/bigscience/xP3) - - -- type: model - name: T0++ + name: xP3 organization: BigScience - description: T0++ is an multitask fine-tuned language model based on T5. + prohibited_uses: none + quality_control: Data collection involved merging and deduplicating searches to + remove menus, HTML tags. Further, a quality improvement pipeline was implemented. + sample: [https://huggingface.co/datasets/bigscience/xP3/viewer] + size: 9.4GB + type: dataset + url: https://arxiv.org/abs/2211.01786 +- access: open + analysis: '' created_date: - value: 2021-10-15 explanation: The date the paper was posted to arXiv. - url: https://arxiv.org/pdf/2110.08207.pdf - model_card: https://huggingface.co/bigscience/T0pp - modality: text (English) - size: 11B parameters (dense model) - analysis: '' + value: 2021-10-15 dependencies: [T5, P3] - training_emissions: - value: 0.9 tCO2e - explanation: Sourced from estimates in https://arxiv.org/pdf/2110.08207.pdf - training_time: - value: 27 hours - explanation: Sourced from estimates in https://arxiv.org/pdf/2110.08207.pdf - training_hardware: Jean Zay (v3-512) - quality_control: https://arxiv.org/pdf/2110.08207.pdf - access: open - license: Apache 2.0 + description: T0++ is an multitask fine-tuned language model based on T5. + feedback: https://huggingface.co/bigscience/T0pp/discussions intended_uses: You can use the models to perform inference on tasks by specifying your query in natural language, and the models will generate a prediction. - prohibited_uses: none + license: Apache 2.0 + modality: + explanation: text (English) + value: text; text + model_card: https://huggingface.co/bigscience/T0pp monitoring: none - feedback: https://huggingface.co/bigscience/T0pp/discussions - -- type: model - name: BLOOM + name: T0++ organization: BigScience - description: BLOOM is an autoregressive multilingual language model. + prohibited_uses: none + quality_control: https://arxiv.org/pdf/2110.08207.pdf + size: 11B parameters (dense) + training_emissions: + explanation: Sourced from estimates in https://arxiv.org/pdf/2110.08207.pdf + value: 0.9 tCO2e + training_hardware: Jean Zay (v3-512) + training_time: + explanation: Sourced from estimates in https://arxiv.org/pdf/2110.08207.pdf + value: 27 hours + type: model + url: https://arxiv.org/pdf/2110.08207.pdf +- access: open + analysis: '' created_date: - value: 2022-07-12 explanation: The date the model was released - url: https://arxiv.org/abs/2211.05100 - model_card: '' - modality: text (46 languages) and code (13 programming languages) - size: 176B parameters (dense model) - analysis: '' + value: 2022-07-12 dependencies: [ROOTS] - training_emissions: - value: 25 tCO2e - explanation: Sourced from estimates in https://arxiv.org/abs/2211.02001 - training_time: - value: 7039 petaflop/s-days - explanation: 1082990 A100 hours at 156 TFLOP/s maximum utilization - training_hardware: Jean Zay (48 * 8xA100 80GB nodes) - quality_control: '' - access: open - license: - value: BigScience RAIL v1.0 - explanation: Model is licensed under https://huggingface.co/spaces/bigscience/license + description: BLOOM is an autoregressive multilingual language model. + feedback: https://huggingface.co/bigscience/bloom/discussions intended_uses: This model is being created in order to enable public research on large language models (LLMs). LLMs are intended to be used for language generation or as a pretrained base model that can be further fine-tuned for specific tasks. Use cases below are not exhaustive. + license: + explanation: Model is licensed under https://huggingface.co/spaces/bigscience/license + value: BigScience RAIL v1.0 + modality: + explanation: text (46 languages) and code (13 programming languages) + value: code, text; code, text + model_card: '' + monitoring: none + name: BLOOM + organization: BigScience prohibited_uses: Using the model in high-stakes settings is out of scope for this model (e.g. biomedical/political/legal/finance domains, evaluating or scoring individuals). The model is not designed for critical decisions nor uses with @@ -166,61 +151,72 @@ using the model for harm, violating human rights, or other kinds of malicious activities, is a misuse of this model (e.g. spam generation, disinformation, disparagement, deception, surveillance). - monitoring: none - feedback: https://huggingface.co/bigscience/bloom/discussions - -- type: model - name: mT0 - organization: BigScience - description: mT0 is an multitask fine-tuned multilingual language model based - on mT5. + quality_control: '' + size: 176B parameters (dense) + training_emissions: + explanation: Sourced from estimates in https://arxiv.org/abs/2211.02001 + value: 25 tCO2e + training_hardware: Jean Zay (48 * 8xA100 80GB nodes) + training_time: + explanation: 1082990 A100 hours at 156 TFLOP/s maximum utilization + value: 7039 petaflop/s-days + type: model + url: https://arxiv.org/abs/2211.05100 +- access: open + analysis: https://huggingface.co/bigscience/bloomz#evaluation created_date: - value: 2021-10-15 explanation: The date the model was released - url: https://arxiv.org/pdf/2110.08207.pdf - model_card: https://huggingface.co/bigscience/T0pp - modality: text (English) - size: 13B parameters (dense model) - analysis: https://huggingface.co/bigscience/bloomz#evaluation + value: 2021-10-15 dependencies: [mT5, xP3] + description: mT0 is an multitask fine-tuned multilingual language model based + on mT5. + feedback: https://huggingface.co/bigscience/bloomz/discussions + intended_uses: We recommend using the model to perform tasks expressed in natural + language. + license: + explanation: Model is licensed under https://huggingface.co/spaces/bigscience/license + value: BigScience RAIL v1.0 + modality: + explanation: text (English) + value: text; text + model_card: https://huggingface.co/bigscience/T0pp + monitoring: none + name: mT0 + organization: BigScience + prohibited_uses: none + quality_control: https://arxiv.org/pdf/2211.01786.pdf + size: 13B parameters (dense) training_emissions: unknown - training_time: unknown training_hardware: Jean Zay (288 A100 80GB GPUs with 8 GPUs per node (36 nodes) using NVLink 4 inter-gpu connects, 4 OmniPath links) - quality_control: https://arxiv.org/pdf/2211.01786.pdf - access: open - license: - value: BigScience RAIL v1.0 - explanation: Model is licensed under https://huggingface.co/spaces/bigscience/license + training_time: unknown + type: model + url: https://arxiv.org/pdf/2110.08207.pdf +- access: open + analysis: https://huggingface.co/bigscience/bloomz#evaluation + created_date: 2022-11-03 + dependencies: [BLOOM, xP3] + description: BLOOMZ is an multitask fine-tuned autoregressive multilingual language + model. + feedback: https://huggingface.co/bigscience/bloomz/discussions intended_uses: We recommend using the model to perform tasks expressed in natural language. - prohibited_uses: none + license: + explanation: Model is licensed under https://huggingface.co/spaces/bigscience/license + value: BigScience RAIL v1.0 + modality: + explanation: text (46 languages) and code (13 programming languages) + value: code, text; code, text + model_card: https://huggingface.co/bigscience/bloomz monitoring: none - feedback: https://huggingface.co/bigscience/bloomz/discussions - -- type: model name: BLOOMZ organization: BigScience - description: BLOOMZ is an multitask fine-tuned autoregressive multilingual language - model. - created_date: 2022-11-03 - url: https://arxiv.org/pdf/2211.01786.pdf - model_card: https://huggingface.co/bigscience/bloomz - modality: text (46 languages) and code (13 programming languages) - size: 176B parameters (dense model) - analysis: https://huggingface.co/bigscience/bloomz#evaluation - dependencies: [BLOOM, xP3] + prohibited_uses: none + quality_control: https://arxiv.org/pdf/2211.01786.pdf + size: 176B parameters (dense) training_emissions: unknown - training_time: unknown training_hardware: Jean Zay (288 A100 80GB GPUs with 8 GPUs per node (36 nodes) using NVLink 4 inter-gpu connects, 4 OmniPath links) - quality_control: https://arxiv.org/pdf/2211.01786.pdf - access: open - license: - value: BigScience RAIL v1.0 - explanation: Model is licensed under https://huggingface.co/spaces/bigscience/license - intended_uses: We recommend using the model to perform tasks expressed in natural - language. - prohibited_uses: none - monitoring: none - feedback: https://huggingface.co/bigscience/bloomz/discussions + training_time: unknown + type: model + url: https://arxiv.org/pdf/2211.01786.pdf diff --git a/assets/bloomberg.yaml b/assets/bloomberg.yaml index 352f03f9..e6a4b58a 100644 --- a/assets/bloomberg.yaml +++ b/assets/bloomberg.yaml @@ -1,137 +1,126 @@ --- - -- type: dataset - name: FinPile - # General - organization: Bloomberg - description: A comprehensive dataset consisting of a range of English financial - documents including news, filings, press releases, web-scraped financial documents, - and social media drawn from the Bloomberg archives that was used to train the - BloombergGPT model. +- access: + explanation: See [[Openness]](https://arxiv.org/pdf/2303.17564.pdf#subsection.8.2) + value: closed + analysis: '' created_date: - value: 2023-03-30 explanation: The date BloombergGPT was announced in the [[Bloomberg article]](https://www.bloomberg.com/company/press/bloomberggpt-50-billion-parameter-llm-tuned-finance/). - url: https://arxiv.org/pdf/2303.17564.pdf#section.2 + value: 2023-03-30 datasheet: - value: https://arxiv.org/pdf/2303.17564.pdf#section.2 explanation: Section 2 of the BloombergGPT paper. - modality: text (English) - size: - value: 363B tokens - explanation: See [[Financial Datasets]](https://arxiv.org/pdf/2303.17564.pdf#subsection.2.1) - sample: [] - analysis: '' - # Construction + value: https://arxiv.org/pdf/2303.17564.pdf#section.2 dependencies: [] - license: - value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. + description: A comprehensive dataset consisting of a range of English financial + documents including news, filings, press releases, web-scraped financial documents, + and social media drawn from the Bloomberg archives that was used to train the + BloombergGPT model. + excluded: '' + feedback: '' included: - value: | - FinPile consists of English financial documents. Authors utilize the The Bloomberg - Terminal, which is an extensive collection of curated and maintained documents, - to create the FinPile dataset. Each document in FinPile is time-stamped, with - dates ranging from 2007-03-01 to 2022-07-31. - Types of data included are given below: - 1. Web (298B tokens) - Inclues Bloomberg's web crawl focused on high-quality - websites that have financially relevant information. This makes up the majority - of FinPile. - 2. News (38B tokens) - Includes all news sources relevant to the financial - community, excluding news articles written by Bloomberg journalists. Overall, - there are hundreds of English news sources in FinPile including "Bloomberg - Transcripts", which are transcripts of Bloomberg TV news. - 3. Filings (14B tokens) - Includes financial statements prepared by (public) - companies and made available to the general public. In the dataset, a majority - of the filings come from EDGAR, which is the SEC's online database. - 4. Press (9B tokens) - Includes press releases typically issued by companies - that are financially relevant. - 5. Bloomberg (5B tokens) - Includes Bloomberg authored news and other documents - such as opinions and analyses. The largest sources are “Bloomberg News” and - “Bloomberg First Word”, the Bloomberg-authored wire of real-time news. explanation: See [[Financial Datasets]](https://arxiv.org/pdf/2303.17564.pdf#subsection.2.1) - excluded: '' - quality_control: '' - # Downstream - access: - value: closed - explanation: See [[Openness]](https://arxiv.org/pdf/2303.17564.pdf#subsection.8.2) + value: "FinPile consists of English financial documents. Authors utilize the\ + \ The Bloomberg\nTerminal, which is an extensive collection of curated and\ + \ maintained documents,\nto create the FinPile dataset. Each document in FinPile\ + \ is time-stamped, with\ndates ranging from 2007-03-01 to 2022-07-31.\nTypes\ + \ of data included are given below:\n 1. Web (298B tokens) - Inclues Bloomberg's\ + \ web crawl focused on high-quality\nwebsites that have financially relevant\ + \ information. This makes up the majority\nof FinPile.\n 2. News (38B tokens)\ + \ - Includes all news sources relevant to the financial\ncommunity, excluding\ + \ news articles written by Bloomberg journalists. Overall,\nthere are hundreds\ + \ of English news sources in FinPile including \"Bloomberg\nTranscripts\"\ + , which are transcripts of Bloomberg TV news.\n 3. Filings (14B tokens) -\ + \ Includes financial statements prepared by (public)\ncompanies and made available\ + \ to the general public. In the dataset, a majority\nof the filings come\ + \ from EDGAR, which is the SEC's online database.\n 4. Press (9B tokens)\ + \ - Includes press releases typically issued by companies\nthat are financially\ + \ relevant.\n 5. Bloomberg (5B tokens) - Includes Bloomberg authored news\ + \ and other documents\nsuch as opinions and analyses. The largest sources\ + \ are “Bloomberg News” and\n“Bloomberg First Word”, the Bloomberg-authored\ + \ wire of real-time news.\n" intended_uses: Used to train the BloombergGPT model. - prohibited_uses: '' + license: + explanation: "The asset isn't released, and hence the license is unknown.\n" + value: unknown + modality: + explanation: text (English) + value: text monitoring: '' - feedback: '' - -- type: model - name: BloombergGPT - # General + name: FinPile organization: Bloomberg + prohibited_uses: '' + quality_control: '' + sample: [] + size: + explanation: See [[Financial Datasets]](https://arxiv.org/pdf/2303.17564.pdf#subsection.2.1) + value: 363B tokens + type: dataset + url: https://arxiv.org/pdf/2303.17564.pdf#section.2 +- access: + explanation: See [[Openness]](https://arxiv.org/pdf/2303.17564.pdf#subsection.8.2) + value: closed + analysis: + explanation: See [[Evaluation]](https://arxiv.org/pdf/2303.17564.pdf#section.5) + value: "Authors evaluate the performance of BloombergGPT on two broad categories\ + \ of tasks, finance-specific and general purpose, on several standard benchmarks.\ + \ They compare BloombergGPT to the three closest models: GPT-NeoX, OPT-66B\ + \ and BLOOM-176B. They also report results from the original GPT-3 whenever\ + \ externally available. They conclude \"We achieve strong results on general\ + \ LLM benchmarks and outperform comparable models on financial tasks. We attribute\ + \ this, in decreasing order of impact, to 1. a well-curated internal dataset,\ + \ 2. our unique choice in tokenizer, and 3. an up-to-date architecture.\"\n" + created_date: + explanation: The date the model was announced in the [[Bloomberg article]](https://www.bloomberg.com/company/press/bloomberggpt-50-billion-parameter-llm-tuned-finance/). + value: 2023-03-30 + dependencies: [FinPile, The Pile, C4, Wikipedia] description: BloombergGPT is a 50 billion parameter large language model that is specifically trained on a wide range of financial data to support a diverse set of natural language processing tasks within the financial industry. - created_date: - value: 2023-03-30 - explanation: The date the model was announced in the [[Bloomberg article]](https://www.bloomberg.com/company/press/bloomberggpt-50-billion-parameter-llm-tuned-finance/). - url: https://arxiv.org/abs/2303.17564 + feedback: '' + intended_uses: + explanation: See [[Bloomberg article]](https://www.bloomberg.com/company/press/bloomberggpt-50-billion-parameter-llm-tuned-finance/) + value: "\"This model will assist Bloomberg in improving existing financial NLP\ + \ tasks, such as sentiment analysis, named entity recognition, news classification,\ + \ and question answering, among others. Furthermore, BloombergGPT will unlock\ + \ new opportunities for marshalling the vast quantities of data available\ + \ on the Bloomberg Terminal to better help the firm's customers, while bringing\ + \ the full potential of AI to the financial domain.\"\n" + license: + explanation: "The asset isn't released, and hence the license is unknown.\n" + value: unknown + modality: + explanation: text (English) + value: text; text model_card: none - modality: text (English) - analysis: - value: > - Authors evaluate the performance of BloombergGPT on two broad categories of - tasks, finance-specific and general purpose, on several standard benchmarks. - They compare BloombergGPT to the three closest models: GPT-NeoX, OPT-66B and - BLOOM-176B. They also report results from the original GPT-3 whenever externally - available. They conclude "We achieve strong results on general LLM benchmarks - and outperform comparable models on financial tasks. We attribute this, in - decreasing order of impact, to 1. a well-curated internal dataset, 2. our - unique choice in tokenizer, and 3. an up-to-date architecture." - explanation: See [[Evaluation]](https://arxiv.org/pdf/2303.17564.pdf#section.5) - size: 50B parameters (dense model) - # Construction - dependencies: [FinPile, The Pile, C4, Wikipedia] + monitoring: '' + name: BloombergGPT + organization: Bloomberg + prohibited_uses: '' + quality_control: + explanation: See [[Ethics, Limitations, and Implications]](https://arxiv.org/pdf/2303.17564.pdf#section.8) + value: "Authors state the following:\n- \"To provide natural language applications\ + \ to the financial community, we\n have developed a rigorous risk and testing\ + \ assessment process. This process\n includes careful annotation guidelines\ + \ Tseng et al. (2020), pre-launch review\n at multiple levels by the central\ + \ risk and compliance organizations, and\n by the product leaders (e.g.,\ + \ the newsroom) as applicable, and post-launch\n monitoring. Moreover, we\ + \ conduct our research, development, and deployment\n of NLP and AI systems\ + \ in accordance with all applicable regulations.\"\n- \"Similarly, toxicity\ + \ and bias are areas where, as a company, we take extraordinary\n care with\ + \ any content we produce, whether from humans or machines. Since\n the measurement\ + \ of toxicity and bias in our model depends on its application\n areas, quantifying\ + \ the potential for the generation of harmful language\n remains an open\ + \ question. We are particularly interested in studying whether\n FinPile,\ + \ which is cleaner and contains fewer examples of overtly biased\n or toxic\ + \ language (e.g., Press Releases), reduces the proclivity of the\n model\ + \ to generate inappropriate content.\"\n" + size: 50B parameters (dense) training_emissions: unknown - training_time: - value: 53 days - explanation: See [[Training Run]](https://arxiv.org/pdf/2303.17564.pdf#section.4) training_hardware: + explanation: See [[Training Configuration]](https://arxiv.org/pdf/2303.17564.pdf#subsection.3.3) value: 64 Amazon EC2 p4d.24xlarge instances each with 8 NVIDIA 40GB A100 GPUs (i.e. total 512 A100 GPUs) - explanation: See [[Training Configuration]](https://arxiv.org/pdf/2303.17564.pdf#subsection.3.3) - quality_control: - value: | - Authors state the following: - - "To provide natural language applications to the financial community, we - have developed a rigorous risk and testing assessment process. This process - includes careful annotation guidelines Tseng et al. (2020), pre-launch review - at multiple levels by the central risk and compliance organizations, and - by the product leaders (e.g., the newsroom) as applicable, and post-launch - monitoring. Moreover, we conduct our research, development, and deployment - of NLP and AI systems in accordance with all applicable regulations." - - "Similarly, toxicity and bias are areas where, as a company, we take extraordinary - care with any content we produce, whether from humans or machines. Since - the measurement of toxicity and bias in our model depends on its application - areas, quantifying the potential for the generation of harmful language - remains an open question. We are particularly interested in studying whether - FinPile, which is cleaner and contains fewer examples of overtly biased - or toxic language (e.g., Press Releases), reduces the proclivity of the - model to generate inappropriate content." - explanation: See [[Ethics, Limitations, and Implications]](https://arxiv.org/pdf/2303.17564.pdf#section.8) - # Downstream - access: - value: closed - explanation: See [[Openness]](https://arxiv.org/pdf/2303.17564.pdf#subsection.8.2) - license: - value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: - value: > - "This model will assist Bloomberg in improving existing financial NLP - tasks, such as sentiment analysis, named entity recognition, news classification, - and question answering, among others. Furthermore, BloombergGPT will unlock - new opportunities for marshalling the vast quantities of data available on - the Bloomberg Terminal to better help the firm's customers, while bringing - the full potential of AI to the financial domain." - explanation: See [[Bloomberg article]](https://www.bloomberg.com/company/press/bloomberggpt-50-billion-parameter-llm-tuned-finance/) - prohibited_uses: '' - monitoring: '' - feedback: '' + training_time: + explanation: See [[Training Run]](https://arxiv.org/pdf/2303.17564.pdf#section.4) + value: 53 days + type: model + url: https://arxiv.org/abs/2303.17564 diff --git a/assets/brex.yaml b/assets/brex.yaml index 374f6842..76d01b3e 100644 --- a/assets/brex.yaml +++ b/assets/brex.yaml @@ -1,30 +1,28 @@ --- - -- type: application - name: Brex Chat - organization: Brex +- access: limited + adaptation: '' + created_date: 2023-03-07 + dependencies: [ChatGPT API] description: Brex Inc., a highly valued startup that makes software for finance professionals, is turning to the same artificial intelligence tool behind ChatGPT for a service that can answer questions about corporate budgets, policy and spending. - created_date: 2023-03-07 - url: https://fortune.com/2023/03/07/cfo-chatbot-chatgpt-ai-brex-finance-software-startup-budgets-policies/ - dependencies: [ChatGPT API] - adaptation: '' - output_space: '' - quality_control: '' - access: limited + failures: '' + feedback: '' + intended_uses: '' license: + explanation: "\"Brex grants you a nonexclusive and nontransferable license to\ + \ use Brex Property as provided through the Services and as permitted by these\ + \ User Terms.\" - excerpt from the Terms of Service.\n" value: custom - explanation: > - "Brex grants you a nonexclusive and nontransferable license to use Brex - Property as provided through the Services and as permitted by these User - Terms." - excerpt from the Terms of Service. - terms_of_service: https://www.brex.com/legal/user-terms - intended_uses: '' - prohibited_uses: '' monitoring: '' - feedback: '' monthly_active_users: '' + name: Brex Chat + organization: Brex + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: https://www.brex.com/legal/user-terms + type: application + url: https://fortune.com/2023/03/07/cfo-chatbot-chatgpt-ai-brex-finance-software-startup-budgets-policies/ user_distribution: '' - failures: '' diff --git a/assets/casia.yaml b/assets/casia.yaml index 0634f801..4190e5a9 100644 --- a/assets/casia.yaml +++ b/assets/casia.yaml @@ -1,25 +1,26 @@ --- - -- type: model - name: BigTrans - organization: Institute of Automation Chinese Academy of Sciences - description: BigTrans is a model which adapts LLaMA that covers only 20 languages - and enhances it with multilingual translation capability on more than 100 languages - created_date: 2023-05-29 - url: https://arxiv.org/pdf/2305.18098v1.pdf - model_card: https://huggingface.co/James-WYang/BigTrans - modality: text +- access: open analysis: Reports results on standard translation benchmarks across 102 languages in comparison with Google Translate and ChatGPT - size: 13B parameters (dense model) + created_date: 2023-05-29 dependencies: [LLaMA, CLUE, BigTrans parallel dataset] - training_emissions: unknown - training_time: unknown - training_hardware: 16 A100 GPUs with 80 GB of RAM - quality_control: '' - access: open - license: Apache 2.0 + description: BigTrans is a model which adapts LLaMA that covers only 20 languages + and enhances it with multilingual translation capability on more than 100 languages + feedback: https://huggingface.co/James-WYang/BigTrans/discussions intended_uses: Advancing future research in multilingual LLMs - prohibited_uses: '' + license: Apache 2.0 + modality: + explanation: text + value: text; text + model_card: https://huggingface.co/James-WYang/BigTrans monitoring: '' - feedback: https://huggingface.co/James-WYang/BigTrans/discussions + name: BigTrans + organization: Institute of Automation Chinese Academy of Sciences + prohibited_uses: '' + quality_control: '' + size: 13B parameters (dense) + training_emissions: unknown + training_hardware: 16 A100 GPUs with 80 GB of RAM + training_time: unknown + type: model + url: https://arxiv.org/pdf/2305.18098v1.pdf diff --git a/assets/cerebras.yaml b/assets/cerebras.yaml index 636a9f70..421cd8a4 100644 --- a/assets/cerebras.yaml +++ b/assets/cerebras.yaml @@ -1,76 +1,59 @@ --- - -- type: model - name: Cerebras-GPT - # General - organization: Cerebras - description: > - A Family of Open, Compute-efficient, Large Language Models. The family includes - 111M, 256M, 590M, 1.3B, 2.7B, 6.7B, and 13B models. All models in the Cerebras-GPT - family have been trained in accordance with Chinchilla scaling laws (20 tokens - per model parameter). - [[Cerebras Blog Post]](https://www.cerebras.net/blog/cerebras-gpt-a-family-of-open-compute-efficient-large-language-models) +- access: + explanation: "The Pile is an open source dataset. Hugging Face compatible checkpoints\ + \ available on the [[Cerebras Hugging Face page]](https://huggingface.co/cerebras/Cerebras-GPT-13B).\ + \ Cerebras systems checkpoints for pre-training and fine tuning are available\ + \ in the cloud via the [[Cerebras Model Studio]](https://www.cerebras.net/product-cloud/).\n" + value: open + analysis: "\"We evaluate our models on the PILE validation set comprising 380M\ + \ tokens. We also evaluate the public checkpoints of Pythia, Eleuther (2022);\ + \ OPT, Zhang et al. (2022); GPT-NeoX 20B, Black et al. (2022); and GPT-J 6B,\ + \ Wang & Komatsuzaki (2021). We performed upstream (pre-training) evaluations\ + \ of text prediction cross-entropy using the Pile validation and test splits.\ + \ We performed downstream evaluations of text generation accuracy on standardized\ + \ tasks using the Eleuther lm-evaluation-harness.\" [[Evaluations]] (https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#evaluations).\n" created_date: + explanation: "The date the model was announced in the [[Cerebras blog post]](https://www.cerebras.net/blog/cerebras-gpt-a-family-of-open-compute-efficient-large-language-models).\n" value: 2023-03-28 - explanation: > - The date the model was announced in the [[Cerebras blog post]](https://www.cerebras.net/blog/cerebras-gpt-a-family-of-open-compute-efficient-large-language-models). - url: https://www.cerebras.net/blog/cerebras-gpt-a-family-of-open-compute-efficient-large-language-models/ - model_card: https://huggingface.co/cerebras/Cerebras-GPT-13B - modality: text (English) - size: - value: 13B parameters (dense model) - explanation: Size of the largest model in the Cerebras-GPT family. - analysis: > - "We evaluate our models on the PILE validation set comprising 380M tokens. We - also evaluate the public checkpoints of Pythia, Eleuther (2022); OPT, Zhang - et al. (2022); GPT-NeoX 20B, Black et al. (2022); and GPT-J 6B, Wang & Komatsuzaki - (2021). We performed upstream (pre-training) evaluations of text prediction - cross-entropy using the Pile validation and test splits. We performed downstream - evaluations of text generation accuracy on standardized tasks using the Eleuther - lm-evaluation-harness." - [[Evaluations]] - (https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#evaluations). - # Construction dependencies: [The Pile] + description: "A Family of Open, Compute-efficient, Large Language Models. The\ + \ family includes 111M, 256M, 590M, 1.3B, 2.7B, 6.7B, and 13B models. All models\ + \ in the Cerebras-GPT family have been trained in accordance with Chinchilla\ + \ scaling laws (20 tokens per model parameter). [[Cerebras Blog Post]](https://www.cerebras.net/blog/cerebras-gpt-a-family-of-open-compute-efficient-large-language-models)\n" + feedback: '' + intended_uses: "\"The primary intended use is to further research into large language\ + \ models. These models can be used as a foundation model for NLP, applications,\ + \ ethics, and alignment research. Our primary intended users are researchers\ + \ who are working to improve LLMs and practitioners seeking reference implementations,\ + \ training setups, hyperparameters, or pre-trained models. We release these\ + \ models with a fully permissive Apache license for the community to use freely.\"\ + \ [[Uses and Limitations]](https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#uses-and-limitations).\n" + license: Apache 2.0 + modality: + explanation: text (English) + value: text; text + model_card: https://huggingface.co/cerebras/Cerebras-GPT-13B + monitoring: '' + name: Cerebras-GPT + organization: Cerebras + prohibited_uses: "Authors note the following limitations of the model: \"Cerebras-GPT\ + \ models are trained on the Pile, with English language only, and are not suitable\ + \ for machine translation tasks. Cerebras-GPT models have not been tuned for\ + \ human-facing dialog applications like chatbots and will not respond to prompts\ + \ in a similar way to models that have received instruction tuning or reinforcement\ + \ learning from human feedback (RLHF) like Flan-T5 or ChatGPT.\" [[Uses and\ + \ Limitations]](https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#out-of-scope-use).\n" + quality_control: "The Pile dataset has been thoroughly analyzed from various ethical\ + \ standpoints such as toxicity analysis, gender bias, pejorative content, racially\ + \ sensitive content etc. Only mitigations in standard Pile dataset pre-processing\ + \ were employed when pre-training Cerebras-GPT. [[Risk, Bias, Ethical Considerations]](https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#risk-bias-ethical-considerations)\n" + size: 13B parameters (dense) training_emissions: '' - training_time: - value: '' - explanation: '' training_hardware: + explanation: "According to [[Model Description]](https://huggingface.co/cerebras/Cerebras-GPT-13B#model-description)\n" value: 16x Cerebras CS-2 wafer scale systems - explanation: > - According to [[Model Description]](https://huggingface.co/cerebras/Cerebras-GPT-13B#model-description) - quality_control: > - The Pile dataset has been thoroughly analyzed from various ethical standpoints - such as toxicity analysis, gender bias, pejorative content, racially sensitive - content etc. Only mitigations in standard Pile dataset pre-processing were employed - when pre-training Cerebras-GPT. - [[Risk, Bias, Ethical Considerations]](https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#risk-bias-ethical-considerations) - # Downstream - access: - value: open - explanation: > - The Pile is an open source dataset. - Hugging Face compatible checkpoints available on the [[Cerebras Hugging Face - page]](https://huggingface.co/cerebras/Cerebras-GPT-13B). - Cerebras systems checkpoints for pre-training and fine tuning are available - in the cloud via the [[Cerebras Model Studio]](https://www.cerebras.net/product-cloud/). - license: Apache 2.0 - intended_uses: > - "The primary intended use is to further research into large language models. - These models can be used as a foundation model for NLP, applications, ethics, - and alignment research. Our primary intended users are researchers who are working - to improve LLMs and practitioners seeking reference implementations, training - setups, hyperparameters, or pre-trained models. We release these models with - a fully permissive Apache license for the community to use freely." - [[Uses and Limitations]](https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#uses-and-limitations). - prohibited_uses: > - Authors note the following limitations of the model: "Cerebras-GPT models are - trained on the Pile, with English language only, and are not suitable for machine - translation tasks. Cerebras-GPT models have not been tuned for human-facing - dialog applications like chatbots and will not respond to prompts in a similar - way to models that have received instruction tuning or reinforcement learning - from human feedback (RLHF) like Flan-T5 or ChatGPT." - [[Uses and Limitations]](https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#out-of-scope-use). - monitoring: '' - feedback: '' + training_time: + explanation: '' + value: '' + type: model + url: https://www.cerebras.net/blog/cerebras-gpt-a-family-of-open-compute-efficient-large-language-models/ diff --git a/assets/chatglm.yaml b/assets/chatglm.yaml index 5bb6d82e..7005d126 100644 --- a/assets/chatglm.yaml +++ b/assets/chatglm.yaml @@ -1,24 +1,25 @@ --- - -- type: model - name: ChatGLM - organization: ChatGLM +- access: open + analysis: Performance evaluated on English and Chinese language benchmark tests. + created_date: 2023-03-14 + dependencies: [] description: ChatGLM is a Chinese-English language model with question and answer and dialogue functions, and is aimed at a Chinese audience. - created_date: 2023-03-14 - url: https://chatglm.cn/blog + feedback: '' + intended_uses: '' + license: Apache 2.0 + modality: + explanation: text + value: text; text model_card: '' - modality: text - analysis: Performance evaluated on English and Chinese language benchmark tests. - size: 6B parameters - dependencies: [] + monitoring: '' + name: ChatGLM + organization: ChatGLM + prohibited_uses: '' + quality_control: '' + size: 6B parameters (dense) training_emissions: unknown - training_time: unknown training_hardware: '' - quality_control: '' - access: open - license: Apache 2.0 - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' + training_time: unknown + type: model + url: https://chatglm.cn/blog diff --git a/assets/cmu.yaml b/assets/cmu.yaml index ceda272f..d5085ad0 100644 --- a/assets/cmu.yaml +++ b/assets/cmu.yaml @@ -1,34 +1,35 @@ --- - -- type: model - name: PolyCoder - organization: CMU +- access: + explanation: Model checkpoints are available for download at https://github.com/VHellendoorn/Code-LMs + value: open + analysis: Reports results on standard code benchmarks across a variety of programming + languages. + created_date: + explanation: The date the model paper was released + value: 2022-02-26 + dependencies: [Github] description: PolyCoder is a code model trained on 2.7B parameters based on the GPT-2 architecture, which was trained on 249GB of code across 12 programming languages on a single machine. - created_date: - value: 2022-02-26 - explanation: The date the model paper was released - url: https://arxiv.org/abs/2202.13169 + feedback: https://huggingface.co/NinedayWang/PolyCoder-2.7B/discussion + intended_uses: unknown + license: + explanation: The license is provided in the [[Github repository]](https://github.com/VHellendoorn/Code-LMs) + value: MIT + modality: + explanation: code (12 programming languages) + value: code; code model_card: https://huggingface.co/NinedayWang/PolyCoder-2.7B - modality: code (12 programming languages) - analysis: Reports results on standard code benchmarks across a variety of programming - languages. - size: 2.7B parameters (dense model) - dependencies: [Github] - training_emissions: unknown - training_time: 6 weeks - training_hardware: 8 NVIDIA RTX 8000 + monitoring: None + name: PolyCoder + organization: CMU + prohibited_uses: None quality_control: No specific quality control is mentioned in model training, though details on data processing and how the tokenizer was trained are provided in the paper. - access: - value: open - explanation: Model checkpoints are available for download at https://github.com/VHellendoorn/Code-LMs - license: - value: MIT - explanation: The license is provided in the [[Github repository]](https://github.com/VHellendoorn/Code-LMs) - intended_uses: unknown - prohibited_uses: None - monitoring: None - feedback: https://huggingface.co/NinedayWang/PolyCoder-2.7B/discussion + size: 2.7B parameters (dense) + training_emissions: unknown + training_hardware: 8 NVIDIA RTX 8000 + training_time: 6 weeks + type: model + url: https://arxiv.org/abs/2202.13169 diff --git a/assets/cohere.yaml b/assets/cohere.yaml index 21c3379a..9051aa78 100644 --- a/assets/cohere.yaml +++ b/assets/cohere.yaml @@ -1,657 +1,460 @@ --- - -- type: dataset - name: coheretext - # General - organization: Cohere - description: > - The family of datasets used to train Cohere models, which come in two forms: - coheretext-filtered and coheretext-unfiltered. The former is used to train the - Representation models, while the latter one is used to train the Generation - models. +- access: + explanation: The dataset isn't provided to the public. + value: closed + analysis: + explanation: The analysis performed on the dataset is not released. + value: unknown created_date: + explanation: "The date the Cohere API was announced on the news [[News Article]]\ + \ (https://venturebeat.com/2021/11/15/openai-rival-cohere-launches-language-model-api/).\n" value: 2021-11-15 - explanation: > - The date the Cohere API was announced on the news - [[News Article]] - (https://venturebeat.com/2021/11/15/openai-rival-cohere-launches-language-model-api/). - url: none datasheet: https://docs.cohere.ai/data-statement - modality: text (English) - size: - value: 200 GB - explanation: > - Size of the filtered dataset is 200 GB. The unfiltered dataset is ~3 TB. - sample: [] - analysis: - value: unknown - explanation: The analysis performed on the dataset is not released. - # Construction dependencies: [] - license: + description: "The family of datasets used to train Cohere models, which come in\ + \ two forms: coheretext-filtered and coheretext-unfiltered. The former is used\ + \ to train the Representation models, while the latter one is used to train\ + \ the Generation models.\n" + excluded: "Documents that are not in English are excluded.\n" + feedback: + explanation: The internal feedback mechanisms for Cohere are unknown. value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - included: > - As stated in the datasheet, the dataset "includes the Google Books dataset, - CommonCrawl, and text from the internet scraped by the Cohere infrastructure - team." The top ten domains scraped were: wordpress.com, medium.com, - stackexchange.com, tumblr.com, elsevier.com, genius.com, bbc.co.uk, - libsyn.com, yahoo.com, nytimes.com - [[Datasheet]](https://docs.cohere.ai/data-statement). - excluded: > - Documents that are not in English are excluded. - quality_control: > - In the datasheet, it is implied that Cohere employs filtration methods - for removing racist, biased and toxic content, but the details are not - provided. These filtration methods take both the context and the language, - as opposed to using a list of blockwords - [[Datasheet]](https://docs.cohere.ai/data-statement). - # Downstream - access: - value: closed - explanation: The dataset isn't provided to the public. - intended_uses: > - The intended use of the dataset is to train Cohere's language models. - prohibited_uses: + included: "As stated in the datasheet, the dataset \"includes the Google Books\ + \ dataset, CommonCrawl, and text from the internet scraped by the Cohere infrastructure\ + \ team.\" The top ten domains scraped were: wordpress.com, medium.com, stackexchange.com,\ + \ tumblr.com, elsevier.com, genius.com, bbc.co.uk, libsyn.com, yahoo.com, nytimes.com\ + \ [[Datasheet]](https://docs.cohere.ai/data-statement).\n" + intended_uses: "The intended use of the dataset is to train Cohere's language\ + \ models.\n" + license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - There are no known prohibited uses of the dataset, but the Cohere API is - bound by the Cohere usage guidelines, which may also apply to this dataset - [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). + modality: + explanation: text (English) + value: text monitoring: + explanation: "There is no information on how Cohere is internally monitoring\ + \ the use of the dataset.\n" value: unknown - explanation: > - There is no information on how Cohere is internally monitoring the use of - the dataset. - feedback: - value: unknown - explanation: The internal feedback mechanisms for Cohere are unknown. - -- type: model - name: Cohere Base - # General + name: coheretext organization: Cohere - description: > - The Generations model is a language model trained by Cohere for - generation tasks. - created_date: - value: 2021-11-15 - explanation: > - The date the Cohere API was announced on the news - [[News Article]] - (https://venturebeat.com/2021/11/15/openai-rival-cohere-launches-language-model-api/). - url: none - model_card: https://docs.cohere.ai/generation-card - modality: text (English) - size: + prohibited_uses: + explanation: "There are no known prohibited uses of the dataset, but the Cohere\ + \ API is bound by the Cohere usage guidelines, which may also apply to this\ + \ dataset [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n" value: unknown - explanation: > - The exact sizes of the generation models are unknown, but we know - that they come in three sizes: small, medium, and large - [[Model Card]](https://docs.cohere.ai/generation-card). + quality_control: "In the datasheet, it is implied that Cohere employs filtration\ + \ methods for removing racist, biased and toxic content, but the details are\ + \ not provided. These filtration methods take both the context and the language,\ + \ as opposed to using a list of blockwords [[Datasheet]](https://docs.cohere.ai/data-statement).\n" + sample: [] + size: + explanation: "Size of the filtered dataset is 200 GB. The unfiltered dataset\ + \ is ~3 TB.\n" + value: 200 GB + type: dataset + url: none +- access: + explanation: "The model is available to the public through the Cohere Platform\ + \ [[Cohere Platform]](https://os.cohere.ai/login).\n" + value: limited analysis: The model's performance was analyzed on Hellaswag and COPA, as well as several safety benchmarks [[Model Card]](https://docs.cohere.ai/generation-card). - # Construction + created_date: + explanation: "The date the Cohere API was announced on the news [[News Article]]\ + \ (https://venturebeat.com/2021/11/15/openai-rival-cohere-launches-language-model-api/).\n" + value: 2021-11-15 dependencies: [coheretext] - training_emissions: + description: "The Generations model is a language model trained by Cohere for\ + \ generation tasks.\n" + feedback: + explanation: "There are no specific feedback mechanisms for the Generation model,\ + \ but a generic contact email is provided on Cohere website, which is support\ + \ at cohere.ai.\n" + value: unknown + intended_uses: "On the model card, the intended uses are stated as \"interactive\ + \ autocomplete, augmenting human writing processes, summarization, text rephrasing,\ + \ and other text-to-text tasks in non-sensitive domains\" [[Model Card]](https://docs.cohere.ai/generation-card).\n" + license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown + modality: + explanation: text (English) + value: text; text + model_card: https://docs.cohere.ai/generation-card + monitoring: "The usage of the model is monitored by Cohere [[Model Card]](https://docs.cohere.ai/generation-card).\n" + name: Cohere Base + organization: Cohere + prohibited_uses: "The usage of the model is bound by the Cohere usage guidelines\ + \ [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). A non-comprehensive\ + \ list of specific application violating these guidelines are: astroturfing,\ + \ generation of misinformation and other harmful content, and \"generation of\ + \ text about people, places, or events without a human-in-the-loop\" [[Model\ + \ Card]](https://docs.cohere.ai/generation-card).\n" + quality_control: + explanation: "The quality control measures taken are unknown, but it is implied\ + \ that Cohere performed mitigation strategies for toxic degeneration [[Model\ + \ Card]](https://docs.cohere.ai/generation-card).\n" + value: unknown + size: unknown + training_emissions: explanation: The emissions of the models are unknown. - training_time: value: unknown - explanation: The training time for the models are unknown. training_hardware: + explanation: "The training hardware wasn't explicitly announced, but it was\ + \ reported that Google Cloud teamed up with Cohere on a TPU partnership [[TechCrunch\ + \ Article]] (https://techcrunch.com/2021/11/17/google-cloud-teams-up-with-nlp-startup-cohere-on-multi-year-partnership/).\n" value: unknown - explanation: > - The training hardware wasn't explicitly announced, but it was - reported that Google Cloud teamed up with Cohere on a TPU partnership - [[TechCrunch Article]] - (https://techcrunch.com/2021/11/17/google-cloud-teams-up-with-nlp-startup-cohere-on-multi-year-partnership/). - quality_control: + training_time: + explanation: The training time for the models are unknown. value: unknown - explanation: > - The quality control measures taken are unknown, but it is implied - that Cohere performed mitigation strategies for toxic degeneration - [[Model Card]](https://docs.cohere.ai/generation-card). - # Downstream - access: + type: model + url: none +- access: + explanation: "The model is available to the public through the Cohere Platform\ + \ [[Cohere Platform]](https://os.cohere.ai/login).\n" value: limited - explanation: > - The model is available to the public through the Cohere Platform - [[Cohere Platform]](https://os.cohere.ai/login). - license: - value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: > - On the model card, the intended uses are stated as "interactive - autocomplete, augmenting human writing processes, summarization, - text rephrasing, and other text-to-text tasks in non-sensitive domains" - [[Model Card]](https://docs.cohere.ai/generation-card). - prohibited_uses: > - The usage of the model is bound by the Cohere usage guidelines - [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). - A non-comprehensive list of specific application violating these guidelines - are: astroturfing, generation of misinformation and other harmful content, - and "generation of text about people, places, or events without a - human-in-the-loop" - [[Model Card]](https://docs.cohere.ai/generation-card). - monitoring: > - The usage of the model is monitored by Cohere - [[Model Card]](https://docs.cohere.ai/generation-card). + analysis: The model's performance was analyzed on Hellaswag and COPA, as well + as several safety benchmarks [[Model Card]](https://docs.cohere.ai/generation-card). + created_date: + explanation: "Cohere Command was in beta before this date\n" + value: 2023-01-01 + dependencies: [Cohere Base] + description: "This model is a generative model optimized to follow commands in\ + \ the prompt.\n" feedback: + explanation: "There are no specific feedback mechanisms for the Generation model,\ + \ but a generic contact email is provided on Cohere website, which is support\ + \ at cohere.ai.\n" + value: unknown + intended_uses: "On the model card, the intended uses are stated as \"interactive\ + \ autocomplete, augmenting human writing processes, summarization, text rephrasing,\ + \ and other text-to-text tasks in non-sensitive domains\" [[Model Card]](https://docs.cohere.ai/generation-card).\n" + license: + explanation: The model likely has a license specifically for Cohere's use. value: unknown - explanation: > - There are no specific feedback mechanisms for the Generation model, - but a generic contact email is provided on Cohere website, which is - support at cohere.ai. - -- type: model + modality: + explanation: text (English) + value: text; text + model_card: https://docs.cohere.ai/generation-card + monitoring: "The usage of the model is monitored by Cohere [[Model Card]](https://docs.cohere.ai/generation-card).\n" name: Cohere Command - # General organization: Cohere - description: > - This model is a generative model optimized to follow commands in the prompt. - created_date: - value: 2023-01-01 - explanation: > - Cohere Command was in beta before this date - url: https://docs.cohere.com/docs/command-beta - model_card: https://docs.cohere.ai/generation-card - modality: text (English) - size: + prohibited_uses: "The usage of the model is bound by the Cohere usage guidelines\ + \ [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). A non-comprehensive\ + \ list of specific application violating these guidelines are: astroturfing,\ + \ generation of misinformation and other harmful content, and \"generation of\ + \ text about people, places, or events without a human-in-the-loop\" [[Model\ + \ Card]](https://docs.cohere.ai/generation-card).\n" + quality_control: + explanation: unknown value: unknown - explanation: > - The exact sizes of the generation models are unknown, but we know - that they come in three sizes: small, medium, and large - [[Model Card]](https://docs.cohere.ai/generation-card). - analysis: The model's performance was analyzed on Hellaswag and COPA, as well - as several safety benchmarks [[Model Card]](https://docs.cohere.ai/generation-card). - # Construction - dependencies: [Cohere Base] + size: unknown training_emissions: - value: unknown explanation: unknown - training_time: value: unknown - explanation: unknown training_hardware: - value: unknown explanation: unknown - quality_control: value: unknown + training_time: explanation: unknown - # Downstream - access: + value: unknown + type: model + url: https://docs.cohere.com/docs/command-beta +- access: + explanation: "The model is available to the public through the Cohere Platform\ + \ [[Cohere Platform]](https://cohere.ai/).\n" value: limited - explanation: > - The model is available to the public through the Cohere Platform - [[Cohere Platform]](https://os.cohere.ai/login). - license: + analysis: "The model's performance was analyzed on several safety benchmarks [[Model\ + \ Card]](https://docs.cohere.ai/representation-card).\n" + created_date: + explanation: "The date the Cohere API was announced on the news [[News Article]]\ + \ (https://venturebeat.com/2021/11/15/openai-rival-cohere-launches-language-model-api/).\n" + value: 2021-11-15 + dependencies: [] + description: "The Embedding Large (English) model is a language model trained\ + \ by Cohere for tasks requiring embeddings.\n" + feedback: + explanation: "There are no specific feedback mechanisms for the Generation model,\ + \ but a generic contact email is provided on Cohere website, which is support\ + \ at cohere.ai.\n" value: unknown + intended_uses: "The intended uses are stated as \"estimating semantic similarity\ + \ between two sentences, choosing a sentence which is most likely to follow\ + \ another sentence, sentiment analysis, topic extraction, or categorizing user\ + \ feedback\" on the Cohere model card [[Model Card]](https://docs.cohere.ai/representation-card).\n" + license: explanation: The model likely has a license specifically for Cohere's use. - intended_uses: > - On the model card, the intended uses are stated as "interactive - autocomplete, augmenting human writing processes, summarization, - text rephrasing, and other text-to-text tasks in non-sensitive domains" - [[Model Card]](https://docs.cohere.ai/generation-card). - prohibited_uses: > - The usage of the model is bound by the Cohere usage guidelines - [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). - A non-comprehensive list of specific application violating these guidelines - are: astroturfing, generation of misinformation and other harmful content, - and "generation of text about people, places, or events without a - human-in-the-loop" - [[Model Card]](https://docs.cohere.ai/generation-card). - monitoring: > - The usage of the model is monitored by Cohere - [[Model Card]](https://docs.cohere.ai/generation-card). - feedback: value: unknown - explanation: > - There are no specific feedback mechanisms for the Generation model, - but a generic contact email is provided on Cohere website, which is - support at cohere.ai. - - -- type: model + modality: + explanation: Text (English) + value: text; text + model_card: https://docs.cohere.ai/representation-card + monitoring: "The usage of the model is monitored by Cohere [[Model Card]](https://docs.cohere.ai/representation-card).\n" name: Cohere Embed (English) - # General organization: Cohere - description: > - The Embedding Large (English) model is a language model trained by Cohere for - tasks requiring embeddings. - created_date: - value: 2021-11-15 - explanation: > - The date the Cohere API was announced on the news - [[News Article]] - (https://venturebeat.com/2021/11/15/openai-rival-cohere-launches-language-model-api/). - url: none - model_card: https://docs.cohere.ai/representation-card - modality: Text (English) - size: - value: unknown - explanation: > - The exact sizes of the embedding models are unknown, but we know - that they come in three sizes: small, medium, and large - [[Model Card]](https://docs.cohere.ai/representation-card). - analysis: > - The model's performance was analyzed on several safety benchmarks - [[Model Card]](https://docs.cohere.ai/representation-card). - # Construction - dependencies: [] + prohibited_uses: "The usage of the model is bound by the Cohere usage guidelines\ + \ [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). A non-comprehensive\ + \ list of specific application violating these guidelines are: extraction of\ + \ identity and demographic information, building purposefully opaque text classification\ + \ systems, and \"building downstream classifiers that serve as automated decision-making\ + \ systems that have real-world consequences on people, where those decisions\ + \ are made without a human-in-the-loop\" [[Model Card]](https://docs.cohere.ai/representation-card).\n" + quality_control: unknown + size: unknown training_emissions: - value: unknown explanation: The emissions of the models are unknown. - training_time: value: unknown - explanation: The training time for the models are unknown. training_hardware: + explanation: "The training hardware wasn't explicitly announced, but it was\ + \ reported that Google Cloud teamed up with Cohere on a TPU partnership [[TechCrunch\ + \ Article]] (https://techcrunch.com/2021/11/17/google-cloud-teams-up-with-nlp-startup-cohere-on-multi-year-partnership/).\n" value: unknown - explanation: > - The training hardware wasn't explicitly announced, but it was - reported that Google Cloud teamed up with Cohere on a TPU partnership - [[TechCrunch Article]] - (https://techcrunch.com/2021/11/17/google-cloud-teams-up-with-nlp-startup-cohere-on-multi-year-partnership/). - quality_control: unknown - # Downstream - access: - value: limited - explanation: > - The model is available to the public through the Cohere Platform - [[Cohere Platform]](https://cohere.ai/). - license: + training_time: + explanation: The training time for the models are unknown. value: unknown - explanation: The model likely has a license specifically for Cohere's use. - intended_uses: > - The intended uses are stated as "estimating semantic similarity between two - sentences, choosing a sentence which is most likely to follow another - sentence, sentiment analysis, topic extraction, or categorizing user - feedback" on the Cohere model card - [[Model Card]](https://docs.cohere.ai/representation-card). - prohibited_uses: > - The usage of the model is bound by the Cohere usage guidelines - [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). - A non-comprehensive list of specific application violating these guidelines - are: extraction of identity and demographic information, building - purposefully opaque text classification systems, and "building downstream - classifiers that serve as automated decision-making systems that have - real-world consequences on people, where those decisions are made without - a - human-in-the-loop" - [[Model Card]](https://docs.cohere.ai/representation-card). - monitoring: > - The usage of the model is monitored by Cohere - [[Model Card]](https://docs.cohere.ai/representation-card). + type: model + url: none +- access: + explanation: "The model is available to the public through the Cohere Platform\ + \ [[Cohere Platform]](https://cohere.ai/).\n" + value: limited + analysis: "The model's performance was analyzed on several safety benchmarks [[Model\ + \ Card]](https://docs.cohere.ai/representation-card).\n" + created_date: + explanation: "The launch of the multilingual embedding model was announced in\ + \ this [[blog post]] (https://txt.cohere.com/multilingual/).\n" + value: 2022-12-12 + dependencies: [] + description: "This model maps text from 100+ languages to a semantic vector space,\ + \ positioning text with a similar meaning (regardless of language) in close\ + \ proximity.\n" feedback: + explanation: "There are no specific feedback mechanisms for the Generation model,\ + \ but a generic contact email is provided on Cohere website, which is support\ + \ at cohere.ai.\n" value: unknown - explanation: > - There are no specific feedback mechanisms for the Generation model, - but a generic contact email is provided on Cohere website, which is - support at cohere.ai. - -- type: model + intended_uses: "The intended uses are stated as \"estimating semantic similarity\ + \ between two sentences, choosing a sentence which is most likely to follow\ + \ another sentence, sentiment analysis, topic extraction, or categorizing user\ + \ feedback\" on the Cohere model card [[Model Card]](https://docs.cohere.ai/representation-card).\n" + license: + explanation: "The asset isn't released, and hence the license is unknown.\n" + value: unknown + modality: + explanation: text (Multilingual) + value: text; text + model_card: https://docs.cohere.ai/representation-card + monitoring: "The usage of the model is monitored by Cohere [[Model Card]](https://docs.cohere.ai/representation-card).\n" name: Cohere Embed (Multilingual) - # General organization: Cohere - description: > - This model maps text from 100+ languages to a semantic - vector space, positioning text with a similar meaning - (regardless of language) in close proximity. - modality: text (Multilingual) - created_date: - value: 2022-12-12 - explanation: > - The launch of the multilingual embedding model was announced in this - [[blog post]] - (https://txt.cohere.com/multilingual/). - url: https://txt.cohere.ai/multilingual/ - model_card: https://docs.cohere.ai/representation-card - size: - value: unknown - explanation: > - The exact sizes of the embedding models are unknown, but we know - that they come in three sizes: small, medium, and large - [[Model Card]](https://docs.cohere.ai/representation-card). - analysis: > - The model's performance was analyzed on several safety benchmarks - [[Model Card]](https://docs.cohere.ai/representation-card). - # Construction - dependencies: [] + prohibited_uses: "The usage of the model is bound by the Cohere usage guidelines\ + \ [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). A non-comprehensive\ + \ list of specific application violating these guidelines are: extraction of\ + \ identity and demographic information, building purposefully opaque text classification\ + \ systems, and \"building downstream classifiers that serve as automated decision-making\ + \ systems that have real-world consequences on people, where those decisions\ + \ are made without a human-in-the-loop\" [[Model Card]](https://docs.cohere.ai/representation-card).\n" + quality_control: unknown + size: unknown training_emissions: - value: unknown explanation: The emissions of the models are unknown. - training_time: value: unknown - explanation: The training time for the models are unknown. training_hardware: + explanation: "The training hardware wasn't explicitly announced, but it was\ + \ reported that Google Cloud teamed up with Cohere on a TPU partnership [[TechCrunch\ + \ Article]] (https://techcrunch.com/2021/11/17/google-cloud-teams-up-with-nlp-startup-cohere-on-multi-year-partnership/).\n" value: unknown - explanation: > - The training hardware wasn't explicitly announced, but it was - reported that Google Cloud teamed up with Cohere on a TPU partnership - [[TechCrunch Article]] - (https://techcrunch.com/2021/11/17/google-cloud-teams-up-with-nlp-startup-cohere-on-multi-year-partnership/). - quality_control: unknown - # Downstream - access: - value: limited - explanation: > - The model is available to the public through the Cohere Platform - [[Cohere Platform]](https://cohere.ai/). - license: - value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: > - The intended uses are stated as "estimating semantic similarity between two - sentences, choosing a sentence which is most likely to follow another - sentence, sentiment analysis, topic extraction, or categorizing user - feedback" on the Cohere model card - [[Model Card]](https://docs.cohere.ai/representation-card). - prohibited_uses: > - The usage of the model is bound by the Cohere usage guidelines - [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). - A non-comprehensive list of specific application violating these guidelines - are: extraction of identity and demographic information, building - purposefully opaque text classification systems, and "building downstream - classifiers that serve as automated decision-making systems that have - real-world consequences on people, where those decisions are made without - a - human-in-the-loop" - [[Model Card]](https://docs.cohere.ai/representation-card). - monitoring: > - The usage of the model is monitored by Cohere - [[Model Card]](https://docs.cohere.ai/representation-card). - feedback: + training_time: + explanation: The training time for the models are unknown. value: unknown - explanation: > - There are no specific feedback mechanisms for the Generation model, - but a generic contact email is provided on Cohere website, which is - support at cohere.ai. - -- type: application - name: Cohere API - # General - organization: Cohere - description: > - Cohere API allows users to access the cohere language models and utilize - them in their applications. + type: model + url: https://txt.cohere.ai/multilingual/ +- access: + explanation: "Users can access the Cohere API by signing up on the Cohere website\ + \ [[Cohere Website]](https://cohere.ai/).\n" + value: limited + adaptation: unknown created_date: + explanation: "The date the Cohere API was announced on the news [[News Article]]\ + \ (https://venturebeat.com/2021/11/15/openai-rival-cohere-launches-language-model-api/).\n" value: 2021-11-15 - explanation: > - The date the Cohere API was announced on the news - [[News Article]] - (https://venturebeat.com/2021/11/15/openai-rival-cohere-launches-language-model-api/). - url: https://cohere.ai/ - # Construction dependencies: - Cohere Generate Endpoint - Cohere Embed Endpoint - Cohere Classify Endpoint - Cohere Summarize Endpoint - adaptation: unknown - output_space: generation and embeddings - quality_control: > - The new users of the API get a limited access restricting the sizes of the - models as well as the number of tokens that can be used. Users are required - to go through an internal application to upgrade to full access - [[Limited Access]](https://docs.cohere.ai/limited-access). - # Downstream - access: - value: limited - explanation: > - Users can access the Cohere API by signing up on the Cohere website - [[Cohere Website]](https://cohere.ai/). + description: "Cohere API allows users to access the cohere language models and\ + \ utilize them in their applications.\n" + failures: unknown + feedback: "General feedback as well as the violations of the usage guidelines\ + \ can be reported to Cohere at responsibility at cohere.ai [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n" + intended_uses: "Intended to be used by developers who would like to incorporate\ + \ NLP into their applications [[Cohere Website]](https://cohere.ai/).\n" license: + explanation: "\"Subject to these Terms of Use, we grant you anon-exclusive,\ + \ non-transferable, non-sublicensable and revocable licence to use the Cohere\ + \ Solution in accordance with these Terms of Use\" - excerpt from the Terms\ + \ of Service document.\n" value: custom - explanation: > - "Subject to these Terms of Use, we grant you anon-exclusive, - non-transferable, non-sublicensable and revocable licence to use the - Cohere Solution in accordance with these Terms of Use" - excerpt from the - Terms of Service document. - terms_of_service: https://cohere.ai/terms-of-use - intended_uses: > - Intended to be used by developers who would like to incorporate NLP into - their applications - [[Cohere Website]](https://cohere.ai/). - prohibited_uses: > - The usage of the API is bound by the Cohere usage guidelines. - Disallowed use cases include violence and threats, antisocial and - antidemocratic uses, deceit, attacks on security or privacy, unsafe - unsupervised uses, decision-making, high-Risk generations among others - [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). - monitoring: > - All applications developed using the Cohere API is subject to review by - Cohere. - feedback: > - General feedback as well as the violations of the usage guidelines can - be reported to Cohere at responsibility at cohere.ai - [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). - # Deployment + monitoring: "All applications developed using the Cohere API is subject to review\ + \ by Cohere.\n" monthly_active_users: unknown - user_distribution: unknown - failures: unknown - -- type: application - name: Cohere Generate Endpoint - # General + name: Cohere API organization: Cohere - description: > - This endpoint generates realistic text conditioned on a given input. + output_space: generation and embeddings + prohibited_uses: "The usage of the API is bound by the Cohere usage guidelines.\ + \ Disallowed use cases include violence and threats, antisocial and antidemocratic\ + \ uses, deceit, attacks on security or privacy, unsafe unsupervised uses, decision-making,\ + \ high-Risk generations among others [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n" + quality_control: "The new users of the API get a limited access restricting the\ + \ sizes of the models as well as the number of tokens that can be used. Users\ + \ are required to go through an internal application to upgrade to full access\ + \ [[Limited Access]](https://docs.cohere.ai/limited-access).\n" + terms_of_service: https://cohere.ai/terms-of-use + type: application + url: https://cohere.ai/ + user_distribution: unknown +- access: + explanation: "Users can access the Cohere API by signing up on the Cohere website\ + \ [[Cohere Website]](https://cohere.ai/).\n" + value: limited + adaptation: unknown created_date: + explanation: "The launch of Generate was announced in this [[news article]]\ + \ (https://venturebeat.com/uncategorized/openai-rival-cohere-launches-language-model-api/).\n" value: 2021-11-15 - explanation: > - The launch of Generate was announced in this - [[news article]] - (https://venturebeat.com/uncategorized/openai-rival-cohere-launches-language-model-api/). - url: https://docs.cohere.ai/reference/generate - # Construction dependencies: [Cohere Base, Cohere Command] - adaptation: unknown + description: "This endpoint generates realistic text conditioned on a given input.\n" + failures: unknown + feedback: "General feedback as well as the violations of the usage guidelines\ + \ can be reported to Cohere at responsibility at cohere.ai [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n" + intended_uses: "Intended to be used by developers who would like to incorporate\ + \ NLP into their applications [[Cohere Website]](https://cohere.ai/).\n" + license: "Limited use license to Cohere platform users [[Terms of Use]](https://cohere.ai/terms-of-use).\n" + monitoring: "All applications developed using the Cohere API is subject to review\ + \ by Cohere.\n" + monthly_active_users: unknown + name: Cohere Generate Endpoint + organization: Cohere output_space: generation - quality_control: > - The new users of the API get a limited access restricting the sizes of the - models as well as the number of tokens that can be used. Users are required - to go through an internal application to upgrade to full access - [[Limited Access]](https://docs.cohere.ai/limited-access). - # Downstream - access: - value: limited - explanation: > - Users can access the Cohere API by signing up on the Cohere website - [[Cohere Website]](https://cohere.ai/). - license: > - Limited use license to Cohere platform users - [[Terms of Use]](https://cohere.ai/terms-of-use). + prohibited_uses: "The usage of the API is bound by the Cohere usage guidelines.\ + \ Disallowed use cases include violence and threats, antisocial and antidemocratic\ + \ uses, deceit, attacks on security or privacy, unsafe unsupervised uses, decision-making,\ + \ high-Risk generations among others [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n" + quality_control: "The new users of the API get a limited access restricting the\ + \ sizes of the models as well as the number of tokens that can be used. Users\ + \ are required to go through an internal application to upgrade to full access\ + \ [[Limited Access]](https://docs.cohere.ai/limited-access).\n" terms_of_service: https://cohere.ai/terms-of-use - intended_uses: > - Intended to be used by developers who would like to incorporate NLP into - their applications - [[Cohere Website]](https://cohere.ai/). - prohibited_uses: > - The usage of the API is bound by the Cohere usage guidelines. - Disallowed use cases include violence and threats, antisocial and - antidemocratic uses, deceit, attacks on security or privacy, unsafe - unsupervised uses, decision-making, high-Risk generations among others - [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). - monitoring: > - All applications developed using the Cohere API is subject to review by - Cohere. - feedback: > - General feedback as well as the violations of the usage guidelines can - be reported to Cohere at responsibility at cohere.ai - [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). - # Deployment - monthly_active_users: unknown + type: application + url: https://docs.cohere.ai/reference/generate user_distribution: unknown - failures: unknown - -- type: application - name: Cohere Embed Endpoint - # General - organization: Cohere - description: > - This endpoint returns text embeddings. An - embedding is a list of floating point numbers that - captures semantic information about the text that it represents. +- access: + explanation: "Users can access the Cohere API by signing up on the Cohere website\ + \ [[Cohere Website]](https://cohere.ai/).\n" + value: limited + adaptation: unknown created_date: + explanation: "The launch of Embed was announced in this [[news article]] (https://venturebeat.com/uncategorized/openai-rival-cohere-launches-language-model-api/).\n" value: 2021-11-15 - explanation: > - The launch of Embed was announced in this - [[news article]] - (https://venturebeat.com/uncategorized/openai-rival-cohere-launches-language-model-api/). - url: https://docs.cohere.ai/reference/embed - # Construction dependencies: [Cohere Embed (Multilingual), Cohere Embed (English)] - adaptation: unknown + description: "This endpoint returns text embeddings. An embedding is a list of\ + \ floating point numbers that captures semantic information about the text that\ + \ it represents.\n" + failures: unknown + feedback: "General feedback as well as the violations of the usage guidelines\ + \ can be reported to Cohere at responsibility at cohere.ai [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n" + intended_uses: "Intended to be used by developers who would like to incorporate\ + \ NLP into their applications [[Cohere Website]](https://cohere.ai/).\n" + license: "Limited use license to Cohere platform users [[Terms of Use]](https://cohere.ai/terms-of-use).\n" + monitoring: "All applications developed using the Cohere API is subject to review\ + \ by Cohere.\n" + monthly_active_users: unknown + name: Cohere Embed Endpoint + organization: Cohere output_space: embedding - quality_control: > - The new users of the API get a limited access restricting the sizes of the - models as well as the number of tokens that can be used. Users are required - to go through an internal application to upgrade to full access - [[Limited Access]](https://docs.cohere.ai/limited-access). - # Downstream - access: - value: limited - explanation: > - Users can access the Cohere API by signing up on the Cohere website - [[Cohere Website]](https://cohere.ai/). - license: > - Limited use license to Cohere platform users - [[Terms of Use]](https://cohere.ai/terms-of-use). + prohibited_uses: "The usage of the API is bound by the Cohere usage guidelines.\ + \ Disallowed use cases include violence and threats, antisocial and antidemocratic\ + \ uses, deceit, attacks on security or privacy, unsafe unsupervised uses, decision-making,\ + \ high-Risk generations among others [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n" + quality_control: "The new users of the API get a limited access restricting the\ + \ sizes of the models as well as the number of tokens that can be used. Users\ + \ are required to go through an internal application to upgrade to full access\ + \ [[Limited Access]](https://docs.cohere.ai/limited-access).\n" terms_of_service: https://cohere.ai/terms-of-use - intended_uses: > - Intended to be used by developers who would like to incorporate NLP into - their applications - [[Cohere Website]](https://cohere.ai/). - prohibited_uses: > - The usage of the API is bound by the Cohere usage guidelines. - Disallowed use cases include violence and threats, antisocial and - antidemocratic uses, deceit, attacks on security or privacy, unsafe - unsupervised uses, decision-making, high-Risk generations among others - [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). - monitoring: > - All applications developed using the Cohere API is subject to review by - Cohere. - feedback: > - General feedback as well as the violations of the usage guidelines can - be reported to Cohere at responsibility at cohere.ai - [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). - # Deployment - monthly_active_users: unknown + type: application + url: https://docs.cohere.ai/reference/embed user_distribution: unknown - failures: unknown - -- type: application - name: Cohere Classify Endpoint - # General - organization: Cohere - description: > - This endpoint makes a prediction about which label best - fits a specified text input. To make a prediction, - Classify uses the provided examples of text + label - pairs as a reference. +- access: + explanation: "Users can access the Cohere API by signing up on the Cohere website\ + \ [[Cohere Website]](https://cohere.ai/).\n" + value: limited + adaptation: unknown created_date: + explanation: "The launch of Classify was announced in this [[blog post]] (https://txt.cohere.com/classification-is-now-available/).\n" value: 2022-05-05 - explanation: > - The launch of Classify was announced in this - [[blog post]] - (https://txt.cohere.com/classification-is-now-available/). - url: https://docs.cohere.ai/reference/classify - # Construction dependencies: [Cohere Embed (Multilingual), Cohere Embed (English)] - adaptation: unknown + description: "This endpoint makes a prediction about which label best fits a specified\ + \ text input. To make a prediction, Classify uses the provided examples of text\ + \ + label pairs as a reference.\n" + failures: unknown + feedback: "General feedback as well as the violations of the usage guidelines\ + \ can be reported to Cohere at responsibility at cohere.ai [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n" + intended_uses: "Intended to be used by developers who would like to incorporate\ + \ NLP into their applications [[Cohere Website]](https://cohere.ai/).\n" + license: "Limited use license to Cohere platform users [[Terms of Use]](https://cohere.ai/terms-of-use).\n" + monitoring: "All applications developed using the Cohere API is subject to review\ + \ by Cohere.\n" + monthly_active_users: unknown + name: Cohere Classify Endpoint + organization: Cohere output_space: embedding - quality_control: > - The new users of the API get a limited access restricting the sizes of the - models as well as the number of tokens that can be used. Users are required - to go through an internal application to upgrade to full access - [[Limited Access]](https://docs.cohere.ai/limited-access). - # Downstream - access: - value: limited - explanation: > - Users can access the Cohere API by signing up on the Cohere website - [[Cohere Website]](https://cohere.ai/). - license: > - Limited use license to Cohere platform users - [[Terms of Use]](https://cohere.ai/terms-of-use). + prohibited_uses: "The usage of the API is bound by the Cohere usage guidelines.\ + \ Disallowed use cases include violence and threats, antisocial and antidemocratic\ + \ uses, deceit, attacks on security or privacy, unsafe unsupervised uses, decision-making,\ + \ high-Risk generations among others [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n" + quality_control: "The new users of the API get a limited access restricting the\ + \ sizes of the models as well as the number of tokens that can be used. Users\ + \ are required to go through an internal application to upgrade to full access\ + \ [[Limited Access]](https://docs.cohere.ai/limited-access).\n" terms_of_service: https://cohere.ai/terms-of-use - intended_uses: > - Intended to be used by developers who would like to incorporate NLP into - their applications - [[Cohere Website]](https://cohere.ai/). - prohibited_uses: > - The usage of the API is bound by the Cohere usage guidelines. - Disallowed use cases include violence and threats, antisocial and - antidemocratic uses, deceit, attacks on security or privacy, unsafe - unsupervised uses, decision-making, high-Risk generations among others - [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). - monitoring: > - All applications developed using the Cohere API is subject to review by - Cohere. - feedback: > - General feedback as well as the violations of the usage guidelines can - be reported to Cohere at responsibility at cohere.ai - [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). - # Deployment - monthly_active_users: unknown + type: application + url: https://docs.cohere.ai/reference/classify user_distribution: unknown - failures: unknown - -- type: application - name: Cohere Summarize Endpoint - # General - organization: Cohere - description: > - This endpoint generates a succinct - version of the original text that relays - the most important information. +- access: + explanation: "Users can access the Cohere API by signing up on the Cohere website\ + \ [[Cohere Website]](https://cohere.ai/).\n" + value: limited + adaptation: unknown created_date: + explanation: "The beta launch of summarize was announced in this [[blog post]]\ + \ (https://txt.cohere.com/summarize-beta/).\n" value: 2023-02-22 - explanation: > - The beta launch of summarize was announced in this - [[blog post]] - (https://txt.cohere.com/summarize-beta/). - url: https://docs.cohere.ai/reference/summarize - # Construction dependencies: [] - adaptation: unknown + description: "This endpoint generates a succinct version of the original text\ + \ that relays the most important information.\n" + failures: unknown + feedback: "General feedback as well as the violations of the usage guidelines\ + \ can be reported to Cohere at responsibility at cohere.ai [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n" + intended_uses: "Intended to be used by developers who would like to incorporate\ + \ NLP into their applications [[Cohere Website]](https://cohere.ai/).\n" + license: "Limited use license to Cohere platform users [[Terms of Use]](https://cohere.ai/terms-of-use).\n" + monitoring: "All applications developed using the Cohere API is subject to review\ + \ by Cohere.\n" + monthly_active_users: unknown + name: Cohere Summarize Endpoint + organization: Cohere output_space: generation - quality_control: > - The new users of the API get a limited access restricting the sizes of the - models as well as the number of tokens that can be used. Users are required - to go through an internal application to upgrade to full access - [[Limited Access]](https://docs.cohere.ai/limited-access). - # Downstream - access: - value: limited - explanation: > - Users can access the Cohere API by signing up on the Cohere website - [[Cohere Website]](https://cohere.ai/). - license: > - Limited use license to Cohere platform users - [[Terms of Use]](https://cohere.ai/terms-of-use). + prohibited_uses: "The usage of the API is bound by the Cohere usage guidelines.\ + \ Disallowed use cases include violence and threats, antisocial and antidemocratic\ + \ uses, deceit, attacks on security or privacy, unsafe unsupervised uses, decision-making,\ + \ high-Risk generations among others [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n" + quality_control: "The new users of the API get a limited access restricting the\ + \ sizes of the models as well as the number of tokens that can be used. Users\ + \ are required to go through an internal application to upgrade to full access\ + \ [[Limited Access]](https://docs.cohere.ai/limited-access).\n" terms_of_service: https://cohere.ai/terms-of-use - intended_uses: > - Intended to be used by developers who would like to incorporate NLP into - their applications - [[Cohere Website]](https://cohere.ai/). - prohibited_uses: > - The usage of the API is bound by the Cohere usage guidelines. - Disallowed use cases include violence and threats, antisocial and - antidemocratic uses, deceit, attacks on security or privacy, unsafe - unsupervised uses, decision-making, high-Risk generations among others - [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). - monitoring: > - All applications developed using the Cohere API is subject to review by - Cohere. - feedback: > - General feedback as well as the violations of the usage guidelines can - be reported to Cohere at responsibility at cohere.ai - [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). - # Deployment - monthly_active_users: unknown + type: application + url: https://docs.cohere.ai/reference/summarize user_distribution: unknown - failures: unknown diff --git a/assets/columbia.yaml b/assets/columbia.yaml index 2ca39802..cef14a6c 100644 --- a/assets/columbia.yaml +++ b/assets/columbia.yaml @@ -1,23 +1,24 @@ --- - -- type: model - name: OpenFold - organization: Columbia - description: OpenFold is an open source recreation of AlphaFold2. +- access: open + analysis: Evaluated on wide range of tasks using own evaluation benchmarks. created_date: 2022-11-20 - url: https://www.biorxiv.org/content/10.1101/2022.11.20.517210v2 + dependencies: [AlphaFold2, OpenProteinSet] + description: OpenFold is an open source recreation of AlphaFold2. + feedback: '' + intended_uses: '' + license: CC BY 4.0 + modality: + explanation: protein structures + value: image; image model_card: '' - modality: protein structures - analysis: Evaluated on wide range of tasks using own evaluation benchmarks. + monitoring: '' + name: OpenFold + organization: Columbia + prohibited_uses: '' + quality_control: '' size: '' - dependencies: [AlphaFold2, OpenProteinSet] training_emissions: unknown - training_time: 50,000 GPU hours training_hardware: Single A100 NVIDIA GPU - quality_control: '' - access: open - license: CC BY 4.0 - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' + training_time: 50,000 GPU hours + type: model + url: https://www.biorxiv.org/content/10.1101/2022.11.20.517210v2 diff --git a/assets/databricks.yaml b/assets/databricks.yaml index 40dc6f16..7439f500 100644 --- a/assets/databricks.yaml +++ b/assets/databricks.yaml @@ -1,67 +1,50 @@ --- - -- type: model - name: Dolly - # General - organization: Databricks - description: > - "Databricks’ Dolly, a large language model trained on the Databricks - Machine Learning Platform, demonstrates that a two-years-old open source - model (GPT-J) can, when subjected to just 30 minutes of fine tuning on a - focused corpus of 50k records (Stanford Alpaca), exhibit surprisingly - high quality instruction following behavior not characteristic of the - foundation model on which it is based." - [[Dolly Repository]](https://github.com/databrickslabs/dolly). +- access: + explanation: "Model training code can be accessed at the official Dolly repository.\ + \ Trained weights can be requested at hello-dolly@databricks.com. [[Dolly\ + \ Repository]](https://github.com/databrickslabs/dolly).\n" + value: open + analysis: "\"We evaluated Dolly on the instruction-following capabilities described\ + \ in the InstructGPT paper that ChatGPT is based on and found that it exhibits\ + \ many of the same qualitative capabilities, including text generation, brainstorming\ + \ and open Q&A.\" [[Databricks Blog Post]] (https://www.databricks.com/blog/2023/03/24/hello-dolly-democratizing-magic-chatgpt-open-models.html).\n" created_date: + explanation: "The date the model was announced in the [[Cerebras blog post]](https://www.databricks.com/blog/2023/03/24/hello-dolly-democratizing-magic-chatgpt-open-models.html).\n" value: 2023-03-24 - explanation: > - The date the model was announced in the [[Cerebras blog post]](https://www.databricks.com/blog/2023/03/24/hello-dolly-democratizing-magic-chatgpt-open-models.html). - url: https://www.databricks.com/blog/2023/03/24/hello-dolly-democratizing-magic-chatgpt-open-models.html - model_card: '' - modality: text (English) - size: 6B parameters (dense model) - analysis: > - "We evaluated Dolly on the instruction-following capabilities described in the - InstructGPT - paper that ChatGPT is based on and found that it exhibits many of the same qualitative - capabilities, including text generation, brainstorming and open Q&A." - [[Databricks Blog Post]] - (https://www.databricks.com/blog/2023/03/24/hello-dolly-democratizing-magic-chatgpt-open-models.html). - # Construction dependencies: [GPT-J, Alpaca dataset] + description: "\"Databricks’ Dolly, a large language model trained on the Databricks\n\ + \ Machine Learning Platform, demonstrates that a two-years-old open source\n\ + \ model (GPT-J) can, when subjected to just 30 minutes of fine tuning on a\n\ + \ focused corpus of 50k records (Stanford Alpaca), exhibit surprisingly\n high\ + \ quality instruction following behavior not characteristic of the\n foundation\ + \ model on which it is based.\"\n [[Dolly Repository]](https://github.com/databrickslabs/dolly).\n" + feedback: https://github.com/databrickslabs/dolly/issues + intended_uses: "\"Dolly is intended exclusively for research purposes and is not\ + \ licensed for commercial use.\" [[Limitations]](https://github.com/databrickslabs/dolly#limitations).\n" + license: Apache 2.0 + modality: + explanation: text (English) + value: text; text + model_card: '' + monitoring: none + name: Dolly + organization: Databricks + prohibited_uses: "Authors note the following limitations of the model: \"The Dolly\ + \ model family is under active development, and so any list of shortcomings\ + \ is unlikely to be exhaustive, but we include known limitations and misfires\ + \ here as a means to document and share our preliminary findings with the community.\ + \ In particular, dolly-6b struggles with syntactically complex prompts, mathematical\ + \ operations, factual errors, dates and times, open-ended question answering,\ + \ hallucination, enumerating lists of specific length, and stylistic mimicry.\"\ + \ [[Limitations]](https://github.com/databrickslabs/dolly#limitations).\n" + quality_control: none + size: 6B parameters (dense) training_emissions: unknown - training_time: - value: 30 minutes - explanation: > - According to [[Model Overview]](https://github.com/databrickslabs/dolly#model-overview) training_hardware: + explanation: "According to [[Model Overview]](https://github.com/databrickslabs/dolly#model-overview)\n" value: A single NDasrA100_v4 machine with 8x A100 40GB GPUs - explanation: > - According to [[Model Overview]](https://github.com/databrickslabs/dolly#model-overview) - quality_control: none - # Downstream - access: - value: open - explanation: > - Model training code can be accessed at the official Dolly repository. - Trained weights can be requested at hello-dolly@databricks.com. - [[Dolly Repository]](https://github.com/databrickslabs/dolly). - license: Apache 2.0 - intended_uses: > - "Dolly is intended exclusively for research purposes and is not licensed for - commercial use." - [[Limitations]](https://github.com/databrickslabs/dolly#limitations). - prohibited_uses: > - Authors note the following limitations of the model: "The Dolly model family - is under active development, - and so any list of shortcomings is unlikely to be exhaustive, but we include - known limitations and misfires - here as a means to document and share our preliminary findings with the community. - In particular, dolly-6b - struggles with syntactically complex prompts, mathematical operations, factual - errors, dates and times, - open-ended question answering, hallucination, enumerating lists of specific - length, and stylistic mimicry." - [[Limitations]](https://github.com/databrickslabs/dolly#limitations). - monitoring: none - feedback: https://github.com/databrickslabs/dolly/issues + training_time: + explanation: "According to [[Model Overview]](https://github.com/databrickslabs/dolly#model-overview)\n" + value: 30 minutes + type: model + url: https://www.databricks.com/blog/2023/03/24/hello-dolly-democratizing-magic-chatgpt-open-models.html diff --git a/assets/deepmind.yaml b/assets/deepmind.yaml index a59747e1..81a0f83d 100644 --- a/assets/deepmind.yaml +++ b/assets/deepmind.yaml @@ -1,840 +1,670 @@ --- - -- type: dataset - name: MassiveText - # General - organization: DeepMind - description: > - The MassiveText dataset was used to train the Gopher model. +- access: + explanation: "The dataset access is limited to DeepMind researchers [[Model\ + \ Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C).\n" + value: closed + analysis: "MassiveText data was analyzed for toxicity, language distribution,\ + \ URL breakdown, and tokenizer compression rates on the subsets [[Section A.2]](https://arxiv.org/pdf/2112.11446.pdf#subsection.A.2).\n" created_date: + explanation: "The date that Gopher was announced [[DeepMind Blog Post]] (https://www.deepmind.com/blog/language-modelling-at-scale-gopher-ethical-considerations-and-retrieval).\n" value: 2021-12-08 - explanation: > - The date that Gopher was announced - [[DeepMind Blog Post]] - (https://www.deepmind.com/blog/language-modelling-at-scale-gopher-ethical-considerations-and-retrieval). - url: https://arxiv.org/pdf/2112.11446.pdf datasheet: https://arxiv.org/pdf/2112.11446.pdf#subsection.A.5 - modality: Text (English) and Code - size: 10.5 TB - sample: [] - analysis: > - MassiveText data was analyzed for toxicity, language distribution, URL - breakdown, and tokenizer compression rates on the subsets - [[Section A.2]](https://arxiv.org/pdf/2112.11446.pdf#subsection.A.2). - # Construction dependencies: [] - license: + description: "The MassiveText dataset was used to train the Gopher model.\n" + excluded: "Documents that are not in English are excluded.\n" + feedback: + explanation: "The internal feedback mechanisms for WebText are unknown [[Model\ + \ Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C).\n" value: unknown - explanation: > - The model likely has a license specifically for DeepMind's use, - based on the information provided in the datasheet - [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#subsection.A.5). - included: > - MassiveText data come from 6 sources: MassiveWeb (48%), Books (27%), - C4 (10%), News (10%), GitHub (3%), and Wikipedia (2%). MassiveWeb is a - web text corpus curated for MassiveText. - excluded: > - Documents that are not in English are excluded. - quality_control: > - The authors use simple heuristics for filtering low quality documents as - opposed to relying on a classifier based on a "gold" set such as the English - Wikipedia, which could "inadvertently bias towards a certain demographic or - erase certain dialects or sociolects from representation." MassiveWeb - subset was filtered using Google’s SafeSearch filter, preferring it over - to word filters that "disproportinately filter out inoffensive content - associated with minority groups. MassiveWeb was filtered - further for word or phrase repetitions. All the subsets were filtered for - document deduplication and test set contamination" - [[Appendix A]](https://arxiv.org/pdf/2112.11446.pdf#appendix.A). - # Downstream - access: - value: closed - explanation: > - The dataset access is limited to DeepMind researchers - [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C). - intended_uses: > - Pre-training of language models by DeepMind researchers - [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C). - prohibited_uses: + included: "MassiveText data come from 6 sources: MassiveWeb (48%), Books (27%),\ + \ C4 (10%), News (10%), GitHub (3%), and Wikipedia (2%). MassiveWeb is a web\ + \ text corpus curated for MassiveText.\n" + intended_uses: "Pre-training of language models by DeepMind researchers [[Model\ + \ Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C).\n" + license: + explanation: "The model likely has a license specifically for DeepMind's use,\ + \ based on the information provided in the datasheet [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#subsection.A.5).\n" value: unknown - explanation: > - There are no known prohibited uses of the dataset, but the authors - state that it should not be used for training models with multilingual - capabilities as it only contains the English language - [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C). + modality: + explanation: Text (English) and Code + value: code, text monitoring: + explanation: "There is no information on how DeepMind is internally monitoring\ + \ the use of the dataset.\n" value: unknown - explanation: > - There is no information on how DeepMind is internally monitoring - the use of the dataset. - feedback: - value: unknown - explanation: > - The internal feedback mechanisms for WebText are unknown - [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C). - -- type: dataset - name: M3W - # General + name: MassiveText organization: DeepMind - description: > - M3W (MassiveWeb) is dataset used to train Flamingo, and other vision-language - models and was created by researchers and engineers. + prohibited_uses: + explanation: "There are no known prohibited uses of the dataset, but the authors\ + \ state that it should not be used for training models with multilingual capabilities\ + \ as it only contains the English language [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C).\n" + value: unknown + quality_control: "The authors use simple heuristics for filtering low quality\ + \ documents as opposed to relying on a classifier based on a \"gold\" set such\ + \ as the English Wikipedia, which could \"inadvertently bias towards a certain\ + \ demographic or erase certain dialects or sociolects from representation.\"\ + \ MassiveWeb subset was filtered using Google’s SafeSearch filter, preferring\ + \ it over to word filters that \"disproportinately filter out inoffensive content\ + \ associated with minority groups. MassiveWeb was filtered further for word\ + \ or phrase repetitions. All the subsets were filtered for document deduplication\ + \ and test set contamination\" [[Appendix A]](https://arxiv.org/pdf/2112.11446.pdf#appendix.A).\n" + sample: [] + size: 10.5 TB + type: dataset + url: https://arxiv.org/pdf/2112.11446.pdf +- access: + explanation: "The dataset access is limited to DeepMind researchers [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F).\n" + value: closed + analysis: '' created_date: + explanation: "The date that Flamingo was posted to arXiv [[arXiv]] (https://arxiv.org/pdf/2204.14198.pdf).\n" value: 2022-04-29 - explanation: > - The date that Flamingo was posted to arXiv - [[arXiv]] - (https://arxiv.org/pdf/2204.14198.pdf). - url: https://arxiv.org/pdf/2204.14198.pdf datasheet: https://arxiv.org/pdf/2204.14198.pdf#appendix.F - modality: text (English) and code - size: 182GB Text, 185M Images - sample: [] - analysis: '' - # Construction dependencies: [] - license: - value: unknown - explanation: > - The model likely has a license specifically for DeepMind's use, - based on the information provided in the datasheet - [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F). - included: > - M3W has interleaved images (185M) and text (182GB) from the web. + description: "M3W (MassiveWeb) is dataset used to train Flamingo, and other vision-language\ + \ models and was created by researchers and engineers.\n" excluded: unknown - quality_control: > - The authors provide a basic description of data processing and cleaning. - # Downstream - access: - value: closed - explanation: > - The dataset access is limited to DeepMind researchers - [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F). - intended_uses: > - Pre-training of vision and language models by DeepMind researchers - [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F). - prohibited_uses: + feedback: + explanation: "No feedback mechanism is mentioned in the datasheet [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F).\n" + value: none + included: "M3W has interleaved images (185M) and text (182GB) from the web.\n" + intended_uses: "Pre-training of vision and language models by DeepMind researchers\ + \ [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F).\n" + license: + explanation: "The model likely has a license specifically for DeepMind's use,\ + \ based on the information provided in the datasheet [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F).\n" value: unknown - explanation: > - There are no known prohibited uses of the dataset - [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F). + modality: + explanation: text (English) and code + value: code, text monitoring: + explanation: "There is no information on how DeepMind is internally monitoring\ + \ the use of the dataset.\n" value: unknown - explanation: > - There is no information on how DeepMind is internally monitoring - the use of the dataset. - feedback: - value: none - explanation: > - No feedback mechanism is mentioned in the datasheet - [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F). - -- type: dataset - name: Gato dataset - # General + name: M3W organization: DeepMind - description: > - The Gato datasets are a collection of data used to train the Gato model. + prohibited_uses: + explanation: "There are no known prohibited uses of the dataset [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F).\n" + value: unknown + quality_control: "The authors provide a basic description of data processing and\ + \ cleaning.\n" + sample: [] + size: 182GB Text, 185M Images + type: dataset + url: https://arxiv.org/pdf/2204.14198.pdf +- access: + explanation: "The full dataset is not directly provided by the authors, though\ + \ some underlying data is public whereas others (e.g. MassiveText) is not.\n" + value: closed + analysis: "The Gato dataset compiles many datasets introduced in prior works,\ + \ with associated analyses.\n" created_date: + explanation: "The date that Gato was announced [[DeepMind Blog Post]] (https://www.deepmind.com/blog/a-generalist-agent).\n" value: 2022-05-12 - explanation: > - The date that Gato was announced - [[DeepMind Blog Post]] - (https://www.deepmind.com/blog/a-generalist-agent). - url: https://www.deepmind.com/blog/a-generalist-agent datasheet: none - modality: text, text-image pair, simulated control task, robotics simulation - size: 10.5 TB Text, 2.2B Text-Image pairs, 1.5T tokens of simulated control, 500k - robotics trajectories - sample: [] - analysis: > - The Gato dataset compiles many datasets introduced in prior works, with associated - analyses. - # Construction dependencies: [MassiveText] - license: - value: unknown - explanation: > - The datasets have individual licenses, but no overall license is mentioned - by the authors. - included: > - The full composition of the dataset across individual sources can be found in - the paper. + description: "The Gato datasets are a collection of data used to train the Gato\ + \ model.\n" excluded: + explanation: "No specific filtering is mentioned in the Gato paper.\n" value: none - explanation: > - No specific filtering is mentioned in the Gato paper. - quality_control: unknown - # Downstream - access: - value: closed - explanation: > - The full dataset is not directly provided by the authors, though some underlying - data is public whereas others (e.g. MassiveText) is not. + feedback: + explanation: "There is no mention on feedback mechanisms either internally or\ + \ externally.\n" + value: none + included: "The full composition of the dataset across individual sources can be\ + \ found in the paper.\n" intended_uses: + explanation: "There are no known intended uses of the dataset stated by authors\ + \ beyond training Gato.\n" value: unknown - explanation: > - There are no known intended uses of the dataset stated by authors beyond training - Gato. - prohibited_uses: + license: + explanation: "The datasets have individual licenses, but no overall license\ + \ is mentioned by the authors.\n" value: unknown - explanation: > - There are no known prohibited uses of the dataset stated by authors. + modality: + explanation: text, text-image pair, simulated control task, robotics simulation + value: image, text monitoring: + explanation: "There is no mention on how DeepMind is internally monitoring the\ + \ use of the dataset.\n" value: none - explanation: > - There is no mention on how DeepMind is internally monitoring - the use of the dataset. - feedback: - value: none - explanation: > - There is no mention on feedback mechanisms either internally or externally. - -- type: model - name: AlphaFold2 + name: Gato dataset organization: DeepMind - description: AlphaFold2 is a protein language model trained on protein sequences + prohibited_uses: + explanation: "There are no known prohibited uses of the dataset stated by authors.\n" + value: unknown + quality_control: unknown + sample: [] + size: 10.5 TB Text, 2.2B Text-Image pairs, 1.5T tokens of simulated control, 500k + robotics trajectories + type: dataset + url: https://www.deepmind.com/blog/a-generalist-agent +- access: + explanation: Models are available for download from the [[Github repository]](https://github.com/deepmind/alphafold) + value: open + analysis: '' created_date: - value: 2021-07-15 explanation: The date the model paper was released - url: https://www.nature.com/articles/s41586-021-03819-2 - model_card: none - modality: protein sequence - size: 93M parameters (dense model) - analysis: '' + value: 2021-07-15 dependencies: [Protein Data Bank] - training_emissions: '' - training_time: - value: 11 days - explanation: Training takes "approximately 1 week" and finetuning takes "approximately - 4 days" - training_hardware: - value: 128 TPUv3 cores - explanation: Described in paper - quality_control: '' - access: - value: open - explanation: Models are available for download from the [[Github repository]](https://github.com/deepmind/alphafold) + description: AlphaFold2 is a protein language model trained on protein sequences + feedback: '' + intended_uses: '' license: - value: Apache 2.0 explanation: The license is provided in the [[Github repository]](https://github.com/deepmind/alphafold) - intended_uses: '' - prohibited_uses: '' + value: Apache 2.0 + modality: + explanation: protein sequence + value: text; text + model_card: none monitoring: '' - feedback: '' - -- type: model - name: Flamingo - # General + name: AlphaFold2 organization: DeepMind - description: > - Flamingo is a Visual Language Model using the Transformer architecture that - is intended for few-shot learning. + prohibited_uses: '' + quality_control: '' + size: 93M parameters (dense) + training_emissions: '' + training_hardware: + explanation: Described in paper + value: 128 TPUv3 cores + training_time: + explanation: Training takes "approximately 1 week" and finetuning takes "approximately + 4 days" + value: 11 days + type: model + url: https://www.nature.com/articles/s41586-021-03819-2 +- access: + explanation: "The model has not been released and no discussion of release is\ + \ stated in the model card [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).\n" + value: closed + analysis: "Model performance was evaluated on image and video datasets primarily,\ + \ including dialogue.\n" created_date: + explanation: "The date that Flamingo was posted to arXiv [[arXiv]] (https://arxiv.org/abs/2204.14198).\n" value: 2022-04-29 - explanation: > - The date that Flamingo was posted to arXiv - [[arXiv]] - (https://arxiv.org/abs/2204.14198). - url: https://arxiv.org/pdf/2204.14198.pdf - model_card: https://arxiv.org/pdf/2204.14198.pdf#appendix.E - modality: Images, Text - size: - value: 80B parameters (dense model) - explanation: > - Gato family has models of several sizes (3.2B, 9.3B, 80B) which involve frozen - language, vision, and trainable parameters - [[Table 5]](https://arxiv.org/pdf/2204.14198.pdf). - analysis: > - Model performance was evaluated on image and video datasets primarily, including - dialogue. - # Construction dependencies: [M3W, ALIGN, LTIP, VTP, Chinchilla] - training_emissions: - value: unknown - explanation: > - Authors do not report the training emissions. - training_time: - value: 15 days on 1536 TPUs - explanation: > - Reported in the paper checklist - [[Checklist]](https://arxiv.org/pdf/2204.14198.pdf). - training_hardware: - value: TPU - explanation: > - Reported in the paper checklist - [[Checklist]](https://arxiv.org/pdf/2204.14198.pdf). - quality_control: + description: "Flamingo is a Visual Language Model using the Transformer architecture\ + \ that is intended for few-shot learning.\n" + feedback: + explanation: "No contact information is provided for feedback in the model card\ + \ [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).\n" value: none - explanation: > - Reported in the mitigations in the model card - [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A). - # Downstream - access: - value: closed - explanation: > - The model has not been released and no discussion of release is stated in - the model card - [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E). + intended_uses: "The intended uses are stated in the model card: \"The primary\ + \ use is research on visual language models (VLM), including: research on VLM\ + \ applications like classification, captioning or visual question answering,\ + \ understanding how strong VLMs can contribute to AGI, advancing fairness and\ + \ safety research in the area of multimodal research, and understanding limitations\ + \ of current large VLMs.\" [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).\n" license: + explanation: "No license is provided in the model card [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).\n" value: unknown - explanation: > - No license is provided in the model card - [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E). - intended_uses: > - The intended uses are stated in the model card: "The primary use is research - on visual language models (VLM), including: research on VLM applications like - classification, captioning or visual question answering, understanding how - strong VLMs can contribute to AGI, advancing - fairness and safety research in the area of multimodal research, and understanding - limitations of current large VLMs." - [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E). - prohibited_uses: > - The model card lists the following as out of scope uses of the model: "Uses - of the model for visually conditioned language generation in harmful or deceitful - settings. Broadly speaking, - the model should not be used for downstream applications - without further safety and fairness mitigations specific to - each application." - [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E). + modality: + explanation: Images, Text + value: image, text; image, text + model_card: https://arxiv.org/pdf/2204.14198.pdf#appendix.E monitoring: + explanation: "There is no information on how DeepMind is internally monitoring\ + \ the use of the model.\n" value: unknown - explanation: > - There is no information on how DeepMind is internally monitoring - the use of the model. - feedback: - value: none - explanation: > - No contact information is provided for feedback in the model card - [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E). - -- type: model - name: AlphaCode + name: Flamingo organization: DeepMind - description: AlphaCode is an autoregressive language model trained on code + prohibited_uses: "The model card lists the following as out of scope uses of the\ + \ model: \"Uses of the model for visually conditioned language generation in\ + \ harmful or deceitful settings. Broadly speaking, the model should not be used\ + \ for downstream applications without further safety and fairness mitigations\ + \ specific to each application.\" [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).\n" + quality_control: + explanation: "Reported in the mitigations in the model card [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).\n" + value: none + size: 80B parameters (dense) + training_emissions: + explanation: "Authors do not report the training emissions.\n" + value: unknown + training_hardware: + explanation: "Reported in the paper checklist [[Checklist]](https://arxiv.org/pdf/2204.14198.pdf).\n" + value: TPU + training_time: + explanation: "Reported in the paper checklist [[Checklist]](https://arxiv.org/pdf/2204.14198.pdf).\n" + value: 15 days on 1536 TPUs + type: model + url: https://arxiv.org/pdf/2204.14198.pdf +- access: + explanation: DeepMind does not provide access to AlphaCode to external researchers + value: closed + analysis: '' created_date: - value: 2022-02-02 explanation: The date the model paper was released - url: https://arxiv.org/abs/2203.07814 - model_card: '' - modality: code - analysis: '' - size: 41B parameters (dense model) + value: 2022-02-02 dependencies: [] - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: - value: closed - explanation: DeepMind does not provide access to AlphaCode to external researchers + description: AlphaCode is an autoregressive language model trained on code + feedback: '' + intended_uses: '' license: - value: unknown explanation: '' - intended_uses: '' - prohibited_uses: '' + value: unknown + modality: + explanation: code + value: code; code + model_card: '' monitoring: '' - feedback: '' - -- type: model - name: Gopher - # General + name: AlphaCode organization: DeepMind - description: > - Gopher is an autoregressive language model based on the Transformer - architecture with two modifications: using RMSNorm instead of LayerNorm and - using relative positional encoding scheme instead of absolute positional - encodings - [[Section 3]](https://arxiv.org/pdf/2112.11446.pdf#subsection.3.1). + prohibited_uses: '' + quality_control: '' + size: 41B parameters (dense) + training_emissions: '' + training_hardware: '' + training_time: '' + type: model + url: https://arxiv.org/abs/2203.07814 +- access: + explanation: "The model access is limited to DeepMind researchers. The model\ + \ won't be released to the public [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).\n" + value: closed + analysis: "Model performance was evaluated and analyzed on 152 NLP tasks including:\ + \ Language Modelling (20), Reading Comprehension (3), Fact Checking (3), Question\ + \ Answering (3), Common Sense (4), MMLU (57), BIG-bench (62) [[Section 4]](https://arxiv.org/pdf/2112.11446.pdf#section.4);\ + \ on toxicity and bias datasets [[Section 5]](https://arxiv.org/pdf/2112.11446.pdf#section.5);\ + \ and on dialogue tasks [[Section 6]](https://arxiv.org/pdf/2112.11446.pdf#section.6).\n" created_date: + explanation: "The date that Gopher was announced [[DeepMind Blog Post]] (https://www.deepmind.com/blog/language-modelling-at-scale-gopher-ethical-considerations-and-retrieval).\n" value: 2021-12-08 - explanation: > - The date that Gopher was announced - [[DeepMind Blog Post]] - (https://www.deepmind.com/blog/language-modelling-at-scale-gopher-ethical-considerations-and-retrieval). - url: https://arxiv.org/pdf/2112.11446.pdf - model_card: https://arxiv.org/pdf/2112.11446.pdf#appendix.B - modality: Text (English) and Code - size: - value: 280B parameters (dense model) - explanation: > - Gopher family has models of several sizes, but the name - Gopher uniquely identify the 280B parameter version. Sizes for the other - models in the Gopher family can be seen in the paper - [[Table 1]](https://arxiv.org/pdf/2112.11446.pdf#table.caption.1). - analysis: > - Model performance was evaluated and analyzed on 152 NLP tasks including: - Language Modelling (20), Reading Comprehension (3), Fact Checking (3), - Question Answering (3), Common Sense (4), MMLU (57), BIG-bench (62) - [[Section 4]](https://arxiv.org/pdf/2112.11446.pdf#section.4); on toxicity - and bias datasets - [[Section 5]](https://arxiv.org/pdf/2112.11446.pdf#section.5); and on - dialogue tasks - [[Section 6]](https://arxiv.org/pdf/2112.11446.pdf#section.6). - # Construction dependencies: [MassiveText] + description: "Gopher is an autoregressive language model based on the Transformer\ + \ architecture with two modifications: using RMSNorm instead of LayerNorm and\ + \ using relative positional encoding scheme instead of absolute positional encodings\ + \ [[Section 3]](https://arxiv.org/pdf/2112.11446.pdf#subsection.3.1).\n" + feedback: "The feedback for the model can be provided at the email linked in the\ + \ model card, geoffreyi at google.com [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).\n" + intended_uses: "The intended uses are stated in the Gopher model card: \"The primary\ + \ use is research on language models, including: research on NLP applications\ + \ like machine translation and question answering, understanding how strong\ + \ language models can contribute to AGI, advancing fairness and safety research,\ + \ and understanding limitations of current LLMs\" [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).\n" + license: + explanation: "The model likely has a license specifically for DeepMind's use,\ + \ based on the information provided in the model card [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).\n" + value: unknown + modality: + explanation: Text (English) and Code + value: code, text; code, text + model_card: https://arxiv.org/pdf/2112.11446.pdf#appendix.B + monitoring: + explanation: "There is no information on how DeepMind is internally monitoring\ + \ the use of the model.\n" + value: unknown + name: Gopher + organization: DeepMind + prohibited_uses: "The model card lists the following as out of scope uses of the\ + \ model: \"for language generation in harmful or deceitful settings. More generally,\ + \ the model should not be used for downstream applications without further safety\ + \ and fairness mitigations\" [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).\n" + quality_control: none + size: 280B parameters (dense) training_emissions: + explanation: "The training emission estimate from the paper [[Section F]](https://arxiv.org/pdf/2112.11446.pdf#appendix.F)\n" value: 380 tCO2e - explanation: > - The training emission estimate from the paper - [[Section F]](https://arxiv.org/pdf/2112.11446.pdf#appendix.F) - training_time: - value: 7303.24 petaflop/s-day - explanation: > - The authors reported the training petaflops for all of the 4 different - sizes of the model. For the 280B parameter model, the petaflops was - reported as 6.31E+08. We compute the Gopher's training time in - petaflop/s-day as 6.31E+08 / (60*60*24) = 7303.24 petaflop/s-day. training_hardware: + explanation: "Reported in the paper [[Section F]](https://arxiv.org/pdf/2112.11446.pdf#appendix.F).\n" value: TPUv3 pods - explanation: > - Reported in the paper - [[Section F]](https://arxiv.org/pdf/2112.11446.pdf#appendix.F). - quality_control: none - # Downstream - access: + training_time: + explanation: "The authors reported the training petaflops for all of the 4 different\ + \ sizes of the model. For the 280B parameter model, the petaflops was reported\ + \ as 6.31E+08. We compute the Gopher's training time in petaflop/s-day as\ + \ 6.31E+08 / (60*60*24) = 7303.24 petaflop/s-day.\n" + value: 7303.24 petaflop/s-day + type: model + url: https://arxiv.org/pdf/2112.11446.pdf +- access: + explanation: "The model access is limited to DeepMind researchers. The model\ + \ won't be released to the public [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).\n" value: closed - explanation: > - The model access is limited to DeepMind researchers. The model won't be - released to the public - [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B). + analysis: "Model performance was evaluated and analyzed on many NLP tasks including\ + \ language modeling, reading comprehension, question answering, commonsense-intensive\ + \ tasks, and the BIG-Bench and MMLU meta-benchmarks.\n" + created_date: + explanation: "The date that Chinchilla was posted on arXiv [[arXiv]] (https://arxiv.org/abs/2203.15556).\n" + value: 2022-03-29 + dependencies: [MassiveText] + description: "Chinchilla is an autoregressive language model based on the Transformer\ + \ architecture with improved scaling laws.\n" + feedback: "The feedback for the model can be provided at the email linked in the\ + \ model card, {jordanhoffmann, sborgeaud, amensch,sifre} at deepmind.com [[Model\ + \ Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I).\n" + intended_uses: "The intended uses are stated in the Chinchilla model card: \"\ + The primary use is research on language models, including: research on the scaling\ + \ behaviour of language models along with those listed in Gopher paper\" [[Model\ + \ Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I).\n" license: + explanation: "The model likely has a license specifically for DeepMind's use,\ + \ based on the information provided in the model card [[Model Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I).\n" value: unknown - explanation: > - The model likely has a license specifically for DeepMind's use, - based on the information provided in the model card - [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B). - intended_uses: > - The intended uses are stated in the Gopher model card: "The primary use is - research on language models, including: research on NLP applications like - machine translation and question answering, understanding how strong - language models can contribute to AGI, advancing fairness and safety - research, and understanding limitations of current LLMs" - [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B). - prohibited_uses: > - The model card lists the following as out of scope uses of the model: "for - language generation in harmful or deceitful settings. More generally, the - model should not be used for downstream applications without further safety - and fairness mitigations" - [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B). + modality: + explanation: text (English) and cpde + value: text; text + model_card: https://arxiv.org/pdf/2203.15556.pdf monitoring: - value: unknown - explanation: > - There is no information on how DeepMind is internally monitoring - the use of the model. - feedback: > - The feedback for the model can be provided at the email linked in the - model card, geoffreyi at google.com [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B). - -- type: model + explanation: "There is no information on how DeepMind is internally monitoring\ + \ the use of the model.\n" + value: Unknown name: Chinchilla - # General organization: DeepMind - description: > - Chinchilla is an autoregressive language model based on the Transformer - architecture with improved scaling laws. - created_date: - value: 2022-03-29 - explanation: > - The date that Chinchilla was posted on arXiv - [[arXiv]] - (https://arxiv.org/abs/2203.15556). - url: https://arxiv.org/pdf/2203.15556.pdf - model_card: https://arxiv.org/pdf/2203.15556.pdf - modality: text (English) and cpde - size: - value: 70B parameters (dense model) - explanation: > - Chinchilla family has many models, but the primary model is the 70B parameter - model. - analysis: > - Model performance was evaluated and analyzed on many NLP tasks including language - modeling, reading comprehension, question answering, commonsense-intensive - tasks, and the BIG-Bench and MMLU meta-benchmarks. - # Construction - dependencies: [MassiveText] + prohibited_uses: "The model card lists the following as out of scope uses of the\ + \ model: \"for language generation in harmful or deceitful settings. More generally,\ + \ the model should not be used for downstream applications without further safety\ + \ and fairness mitigations\" [[Model Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I).\n" + quality_control: none + size: 70B parameters (dense) training_emissions: + explanation: "Authors do not report the training emissions.\n" value: Unknown - explanation: > - Authors do not report the training emissions. - training_time: - value: 7303.24 petaflop/s-day - explanation: > - The authors reported the training petaflops for all models, including hypothetical - larger models. - For the 70B parameter model, the petaflops was - reported as 5.76E+08. We compute the Gopher's training time in - petaflop/s-day as 5.76E+08 / (60*60*24) = 6666.66 petaflop/s-day. training_hardware: + explanation: "Reported in the paper [[Section 4]](https://arxiv.org/pdf/2203.15556.pdf).\n" value: TPUv3/TPUv4 pods - explanation: > - Reported in the paper - [[Section 4]](https://arxiv.org/pdf/2203.15556.pdf). - quality_control: none - # Downstream - access: + training_time: + explanation: "The authors reported the training petaflops for all models, including\ + \ hypothetical larger models. For the 70B parameter model, the petaflops was\ + \ reported as 5.76E+08. We compute the Gopher's training time in petaflop/s-day\ + \ as 5.76E+08 / (60*60*24) = 6666.66 petaflop/s-day.\n" + value: 7303.24 petaflop/s-day + type: model + url: https://arxiv.org/pdf/2203.15556.pdf +- access: + explanation: "The model access is limited to DeepMind researchers. The model\ + \ won't be released to the public [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).\n" value: closed - explanation: > - The model access is limited to DeepMind researchers. The model won't be - released to the public - [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B). + analysis: "Model performance was evaluated on simulated and robotics task primarily,\ + \ including out-of-distribution and skill generalization.\n" + created_date: + explanation: "The date that Gato was announced [[DeepMind Blog Post]] (https://www.deepmind.com/blog/a-generalist-agent).\n" + value: 2022-05-12 + dependencies: [Gato dataset] + description: "Gato is a generalist agent based on sequence modeling using the\ + \ Transformer architecture to implement multi-modal, multi-task, multi-embodiment\ + \ generalist policy.\n" + feedback: "The feedback for the model can be provided at the email linked in the\ + \ model card, reedscot at google.com [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).\n" + intended_uses: "The intended uses are stated in the Gopher model card: \"Learn\ + \ to accomplish a wide variety of tasks from expert demonstrations, such as\ + \ playing video games, controlling simulated embodiments, and real world block\ + \ stacking.\" [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).\n" license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The model likely has a license specifically for DeepMind's use, - based on the information provided in the model card - [[Model Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I). - intended_uses: > - The intended uses are stated in the Chinchilla model card: "The primary use - is research on language models, including: - research on the scaling behaviour of language models along with those listed - in Gopher paper" - [[Model Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I). - prohibited_uses: > - The model card lists the following as out of scope uses of the model: "for - language generation in harmful or deceitful settings. More generally, the - model should not be used for downstream applications without further safety - and fairness mitigations" - [[Model Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I). + modality: + explanation: text, image, control/robotics + value: image, text; image, text + model_card: https://openreview.net/pdf?id=1ikK0kHjvj#appendix.B monitoring: - value: Unknown - explanation: > - There is no information on how DeepMind is internally monitoring - the use of the model. - feedback: > - The feedback for the model can be provided at the email linked in the model - card, {jordanhoffmann, sborgeaud, amensch,sifre} at deepmind.com - [[Model Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I). - -- type: model + explanation: "There is no information on how DeepMind is internally monitoring\ + \ the use of the model.\n" + value: unknown name: Gato - # General organization: DeepMind - description: > - Gato is a generalist agent based on sequence modeling using the Transformer - architecture to implement multi-modal, multi-task, multi-embodiment generalist - policy. - created_date: - value: 2022-05-12 - explanation: > - The date that Gato was announced - [[DeepMind Blog Post]] - (https://www.deepmind.com/blog/a-generalist-agent). - url: https://www.deepmind.com/blog/a-generalist-agent - model_card: https://openreview.net/pdf?id=1ikK0kHjvj#appendix.B - modality: text, image, control/robotics - size: - value: 1.2B parameters (dense model) - explanation: > - Gato family has models of several sizes (79M, 364M, 1.18B) - [[Table 6]](https://openreview.net/pdf?id=1ikK0kHjvj). - analysis: > - Model performance was evaluated on simulated and robotics task primarily, - including out-of-distribution and skill generalization. - # Construction - dependencies: [Gato dataset] + prohibited_uses: "The model card lists the following as out of scope uses of the\ + \ model: \"Not intended for commercial or production use. Military uses are\ + \ strictly prohibited.\" [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).\n" + quality_control: + explanation: "Reported in the mitigations in the model card [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).\n" + value: none + size: 1.2B parameters (dense) training_emissions: + explanation: "Authors do not report the training emissions.\n" value: unknown - explanation: > - Authors do not report the training emissions. - training_time: - value: 4 days on a 16x16 TPU v3 slice - explanation: > - Reported in the paper - [[Section 2.3]](https://openreview.net/pdf?id=1ikK0kHjvj). training_hardware: + explanation: "Reported in the paper [[Section 2.3]](https://openreview.net/pdf?id=1ikK0kHjvj).\n" value: 16x16 TPU v3 slice - explanation: > - Reported in the paper - [[Section 2.3]](https://openreview.net/pdf?id=1ikK0kHjvj). - quality_control: - value: none - explanation: > - Reported in the mitigations in the model card - [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A). - # Downstream - access: - value: closed - explanation: > - The model access is limited to DeepMind researchers. The model won't be - released to the public - [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A). - license: - value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: > - The intended uses are stated in the Gopher model card: "Learn to accomplish - a wide variety of tasks from expert demonstrations, such as playing video - games, controlling simulated embodiments, - and real world block stacking." - [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A). - prohibited_uses: > - The model card lists the following as out of scope uses of the model: "Not - intended for commercial or production use. Military uses are - strictly prohibited." - [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A). - monitoring: - value: unknown - explanation: > - There is no information on how DeepMind is internally monitoring - the use of the model. - feedback: > - The feedback for the model can be provided at the email linked in the model - card, reedscot at google.com - [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A). - -- type: model - name: Sparrow - organization: DeepMind - description: '' - created_date: 2022-09-28 - url: https://arxiv.org/abs/2209.14375 - model_card: '' - modality: text + training_time: + explanation: "Reported in the paper [[Section 2.3]](https://openreview.net/pdf?id=1ikK0kHjvj).\n" + value: 4 days on a 16x16 TPU v3 slice + type: model + url: https://www.deepmind.com/blog/a-generalist-agent +- access: closed analysis: '' - size: 70B + created_date: 2022-09-28 dependencies: - Chinchilla - Google Search - Sparrow Rule reward model - Sparrow Preference reward model + description: '' + feedback: '' + intended_uses: '' + license: + explanation: "The asset isn't released, and hence the license is unknown.\n" + value: unknown + modality: + explanation: text + value: text; text + model_card: '' + monitoring: '' + name: Sparrow + organization: DeepMind + prohibited_uses: '' + quality_control: '' + size: 70B parameters (dense) training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: closed + training_time: '' + type: model + url: https://arxiv.org/abs/2209.14375 +- access: closed + analysis: '' + created_date: 2021-12-08 + dependencies: [MassiveText] + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: Text + value: text; text + model_card: '' monitoring: '' - feedback: '' - -- type: model name: RETRO organization: DeepMind - description: '' - created_date: 2021-12-08 - url: https://arxiv.org/abs/2112.04426 - model_card: '' - modality: Text - analysis: '' - size: 7.5B - dependencies: [MassiveText] + prohibited_uses: '' + quality_control: '' + size: 7.5B parameters (dense) training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: closed + training_time: '' + type: model + url: https://arxiv.org/abs/2112.04426 +- access: closed + analysis: '' + created_date: 2022-09-28 + dependencies: [Chinchilla, Sparrow adversarial probing dataset] + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: text + value: text; text + model_card: '' monitoring: '' - feedback: '' - -- type: model name: Sparrow Rule reward model organization: DeepMind - description: '' - created_date: 2022-09-28 - url: https://arxiv.org/abs/2209.14375 - model_card: '' - modality: text - analysis: '' - size: 70B - dependencies: [Chinchilla, Sparrow adversarial probing dataset] + prohibited_uses: '' + quality_control: '' + size: 70B parameters (dense) training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: closed + training_time: '' + type: model + url: https://arxiv.org/abs/2209.14375 +- access: closed + analysis: '' + created_date: 2022-09-28 + dependencies: [Chinchilla, Sparrow response preference dataset] + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: text + value: text; text + model_card: '' monitoring: '' - feedback: '' - -- type: model name: Sparrow Preference reward model organization: DeepMind - description: '' - created_date: 2022-09-28 - url: https://arxiv.org/abs/2209.14375 - model_card: '' - modality: text - analysis: '' - size: 70B - dependencies: [Chinchilla, Sparrow response preference dataset] + prohibited_uses: '' + quality_control: '' + size: 70B parameters (dense) training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: closed + training_time: '' + type: model + url: https://arxiv.org/abs/2209.14375 +- access: closed + analysis: '' + created_date: 2022-09-28 + datasheet: '' + dependencies: [Chinchilla] + description: '' + excluded: '' + feedback: '' + included: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: text monitoring: '' - feedback: '' - -- type: dataset name: Sparrow adversarial probing dataset organization: DeepMind - description: '' - created_date: 2022-09-28 - url: https://arxiv.org/abs/2209.14375 - datasheet: '' - modality: text - size: 27k ratings + prohibited_uses: '' + quality_control: '' sample: [] + size: 27k ratings + type: dataset + url: https://arxiv.org/abs/2209.14375 +- access: closed analysis: '' + created_date: 2022-09-28 + datasheet: '' dependencies: [Chinchilla] - included: '' + description: '' excluded: '' - quality_control: '' - access: closed + feedback: '' + included: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: text monitoring: '' - feedback: '' - -- type: dataset name: Sparrow response preference dataset organization: DeepMind - description: '' - created_date: 2022-09-28 - url: https://arxiv.org/abs/2209.14375 - datasheet: '' - modality: text - size: 72k comparisons + prohibited_uses: '' + quality_control: '' sample: [] + size: 72k comparisons + type: dataset + url: https://arxiv.org/abs/2209.14375 +- access: closed analysis: '' - dependencies: [Chinchilla] - included: '' - excluded: '' - quality_control: '' - access: closed + created_date: 2022-03-16 + dependencies: [Gopher, Google Search, GopherCite reward model] + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: Text + value: text; text + model_card: '' monitoring: '' - feedback: '' - -- type: model name: GopherCite organization: DeepMind - description: '' - created_date: 2022-03-16 - url: https://storage.googleapis.com/deepmind-media/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes.pdf - model_card: '' - modality: Text - analysis: '' - size: 280B - dependencies: [Gopher, Google Search, GopherCite reward model] + prohibited_uses: '' + quality_control: '' + size: 280B parameters (dense) training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: closed + training_time: '' + type: model + url: https://storage.googleapis.com/deepmind-media/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes.pdf +- access: closed + analysis: '' + created_date: 2022-03-16 + dependencies: [Gopher, GopherCite Preference dataset] + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: text + value: text; text + model_card: '' monitoring: '' - feedback: '' - -- type: model name: GopherCite reward model organization: DeepMind - description: '' - created_date: 2022-03-16 - url: https://storage.googleapis.com/deepmind-media/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes.pdf - model_card: '' - modality: text - analysis: '' - size: 7B - dependencies: [Gopher, GopherCite Preference dataset] + prohibited_uses: '' + quality_control: '' + size: 7B parameters (dense) training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: closed + training_time: '' + type: model + url: https://storage.googleapis.com/deepmind-media/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes.pdf +- access: closed + analysis: '' + created_date: 2022-03-16 + datasheet: '' + dependencies: [Gopher, Google Search] + description: '' + excluded: '' + feedback: '' + included: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: text monitoring: '' - feedback: '' - -- type: dataset name: GopherCite Preference dataset organization: DeepMind - description: '' - created_date: 2022-03-16 - url: https://storage.googleapis.com/deepmind-media/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes.pdf - datasheet: '' - modality: text - size: 33k response pairs + prohibited_uses: '' + quality_control: '' sample: [] + size: 33k response pairs + type: dataset + url: https://storage.googleapis.com/deepmind-media/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes.pdf +- access: closed analysis: '' - dependencies: [Gopher, Google Search] - included: '' - excluded: '' - quality_control: '' - access: closed + created_date: 2022-09-29 + dependencies: [Chinchilla] + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: text + value: text; text + model_card: '' monitoring: '' - feedback: '' - -- type: model name: Dramatron organization: DeepMind - description: '' - created_date: 2022-09-29 - url: https://arxiv.org/abs/2209.14958 - model_card: '' - modality: text - analysis: '' - size: 70B parameters (dense model) - dependencies: [Chinchilla] + prohibited_uses: '' + quality_control: '' + size: 70B parameters (dense) training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: closed - license: - value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' + training_time: '' + type: model + url: https://arxiv.org/abs/2209.14958 diff --git a/assets/duckduckgo.yaml b/assets/duckduckgo.yaml index 465b62eb..a1defb85 100644 --- a/assets/duckduckgo.yaml +++ b/assets/duckduckgo.yaml @@ -1,27 +1,25 @@ --- - -- type: application - name: DuckAssist - organization: DuckDuckGo +- access: open + adaptation: '' + created_date: 2023-03-08 + dependencies: [Anthropic API] description: The first Instant Answer in DuckDuckGo search results to use natural language technology to generate answers to search queries using Wikipedia and other related sources - created_date: 2023-03-08 - url: https://spreadprivacy.com/duckassist-launch/ - dependencies: [Anthropic API] - adaptation: '' - output_space: '' - quality_control: '' - access: open + failures: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset is no longer available, and hence the license is unknown.\n" value: unknown - explanation: > - The asset is no longer available, and hence the license is unknown. - terms_of_service: '' - intended_uses: '' - prohibited_uses: '' monitoring: '' - feedback: '' monthly_active_users: '' + name: DuckAssist + organization: DuckDuckGo + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: '' + type: application + url: https://spreadprivacy.com/duckassist-launch/ user_distribution: '' - failures: '' diff --git a/assets/duolingo.yaml b/assets/duolingo.yaml index de25f466..44e4e886 100644 --- a/assets/duolingo.yaml +++ b/assets/duolingo.yaml @@ -1,62 +1,58 @@ --- - -- type: application - name: Duolingo Explain My Answer - organization: Duolingo +- access: limited + adaptation: '' + created_date: 2023-03-14 + dependencies: [GPT-4 API] description: Explain My Answer offers learners the chance to learn more about their response in a lesson (whether their answer was correct or incorrect!) By tapping a button after certain exercise types, learners can enter a chat with Duo to get a simple explanation on why their answer was right or wrong, and ask for examples or further clarification. - created_date: 2023-03-14 - url: https://blog.duolingo.com/duolingo-max/ - dependencies: [GPT-4 API] - adaptation: '' - output_space: '' - quality_control: '' - access: limited + failures: '' + feedback: '' + intended_uses: '' license: + explanation: "Users are granted a limited license per Duolingo Terms.\n" value: custom - explanation: > - Users are granted a limited license per Duolingo Terms. - terms_of_service: https://www.duolingo.com/terms - intended_uses: '' - prohibited_uses: '' monitoring: '' - feedback: '' monthly_active_users: '' - user_distribution: '' - failures: '' - -- type: application - name: Duolingo Max + name: Duolingo Explain My Answer organization: Duolingo + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: https://www.duolingo.com/terms + type: application + url: https://blog.duolingo.com/duolingo-max/ + user_distribution: '' +- access: limited + adaptation: '' + created_date: 2023-03-14 + dependencies: [Duolingo Role Play, Duolingo Explain My Answer] description: Duolingo Max is a new subscription tier above Super Duolingo that gives learners access to two brand-new features and exercises - Explain My Answer and Roleplay. - created_date: 2023-03-14 - url: https://blog.duolingo.com/duolingo-max/ - dependencies: [Duolingo Role Play, Duolingo Explain My Answer] - adaptation: '' - output_space: '' - quality_control: '' - access: limited + failures: '' + feedback: '' + intended_uses: '' license: + explanation: "Users are granted a limited license per Duolingo Terms.\n" value: custom - explanation: > - Users are granted a limited license per Duolingo Terms. - terms_of_service: '' - intended_uses: '' - prohibited_uses: '' monitoring: '' - feedback: '' monthly_active_users: '' - user_distribution: '' - failures: '' - -- type: application - name: Duolingo Role Play + name: Duolingo Max organization: Duolingo + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: '' + type: application + url: https://blog.duolingo.com/duolingo-max/ + user_distribution: '' +- access: limited + adaptation: '' + created_date: 2023-03-14 + dependencies: [GPT-4 API] description: Roleplay allows learners to practice real-world conversation skills with world characters in the app. These challenges, which earn XP, will live alongside the path as one of the “Side Quests” learners can access by tapping @@ -64,26 +60,24 @@ scenarios! Learners might discuss future vacation plans with Lin, order coffee at a café in Paris, go furniture shopping with Eddy, or ask a friend to go for a hike. - created_date: 2023-03-14 - url: https://blog.duolingo.com/duolingo-max/ - dependencies: [GPT-4 API] - adaptation: '' - output_space: '' - quality_control: '' - access: limited + failures: '' + feedback: '' + intended_uses: '' license: + explanation: "\"Subject to the terms of these Terms and Conditions, Duolingo\ + \ grants you a non-transferable, non-exclusive license to download, install,\ + \ and use one copy of each App in object code form only on an interactive\ + \ wireless device that you own or control\" - excerpt from the Terms of Service\ + \ document.\n" value: custom - explanation: > - "Subject to the terms of these Terms and Conditions, Duolingo grants you - a non-transferable, non-exclusive license to download, install, and use - one copy of each App in object code form only on an interactive wireless - device that you own or control" - excerpt from the Terms of Service - document. - terms_of_service: https://www.duolingo.com/terms - intended_uses: '' - prohibited_uses: '' monitoring: '' - feedback: '' monthly_active_users: '' + name: Duolingo Role Play + organization: Duolingo + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: https://www.duolingo.com/terms + type: application + url: https://blog.duolingo.com/duolingo-max/ user_distribution: '' - failures: '' diff --git a/assets/eleutherai.yaml b/assets/eleutherai.yaml index 524c8010..5813633b 100644 --- a/assets/eleutherai.yaml +++ b/assets/eleutherai.yaml @@ -1,16 +1,49 @@ --- - -- type: dataset - name: The Pile - # General - organization: EleutherAI - description: > - A latge language model training dataset, used to train GPT-NeoX-20B. +- access: + explanation: "The dataset is freely available to the public and can be downloaded\ + \ from The Eye [[The Pile]](https://mystic.the-eye.eu/public/AI/pile/).\n" + value: open + analysis: "Analyses of the data's composition, document statistics, language/dialectal\ + \ coverage, topical distribution, and biases are conducted are conducted in\ + \ the paper [[The Pile Paper]](https://arxiv.org/pdf/2101.00027.pdf).\n" created_date: 2021-01-01 - url: https://arxiv.org/pdf/2101.00027.pdf datasheet: https://arxiv.org/pdf/2201.07311.pdf - modality: text (English) and code - size: 825 GB + dependencies: [] + description: "A latge language model training dataset, used to train GPT-NeoX-20B.\n" + excluded: "Authors report that they have excluded some datasets \"because they\ + \ were too small to be worth spending time or because the English component\ + \ of the data did not merit inclusion on its own. Three datasets were excluded\ + \ for other reasons: (1) US Congressional Records were excluded because it \"\ + reflects the opinions and biases of the political class over the past 200 years,\ + \ including segregationism and xenophobia.\" (2) Online Fanfiction resources\ + \ amounting to Hundreds of GiB were excluded on logistical grounds. (3) Literotica,\ + \ platform where users can upload short-form erotic fiction, was excluded because\ + \ the authors decided to exclude fanfiction, the corpus would require significant\ + \ investigation, and corpus contain significant amount of stereotyping [[Appendix\ + \ B]](https://arxiv.org/pdf/2101.00027.pdf).\n" + feedback: "Feedback can be given by emailing the authors at contact at eleuther.ai.\n" + included: "The Pile data come from 22 sources, with over half of the data being\ + \ from Common Crawl (Pile-CC; 227GB), fiction and nonfiction books (Books3;\ + \ 101GB), biomedical articles (PubMed Central; 90GB), and code (Github; 95 GB).\ + \ Refer to the paper for full decomposition [[Table 1]](https://arxiv.org/pdf/2101.00027.pdf#table.caption.2).\n" + intended_uses: "The Pile was intended to be used as a high quality large text\ + \ dataset for language modeling tasks, explained in more detail in the paper\ + \ [[Section 1]](https://arxiv.org/pdf/2101.00027.pdf#section.1).\n" + license: + explanation: As indicated in the paper. + value: MIT + modality: + explanation: text (English) and code + value: code, text + monitoring: none + name: The Pile + organization: EleutherAI + prohibited_uses: none + quality_control: "In addition to the data inclusion and exclusion decisions, the\ + \ quality was controlled through filtering for English (pycld2 language classifier),\ + \ filtering for documents similar to OpenWebText2 (classifier on CommonCrawl),\ + \ and several forms of deduplication as detailed in the paper [[Appendix C]](https://arxiv.org/pdf/2101.00027.pdf#appendix.1.C)\ + \ [[Appendix D]](https://arxiv.org/pdf/2101.00027.pdf#appendix.1.D).\n" sample: - '...pot trending topics and the coverage around them. First up, there’s a bit of a visual redesign. Previously, clicking on a trending topic would highlight @@ -31,287 +64,204 @@ the defendants urge that this Court should reconsider the case of Green Mountain Marble Co. v. Highway Board, supra, and follow the Federal practice of looking to the evide - analysis: > - Analyses of the data's composition, document statistics, - language/dialectal coverage, topical distribution, and biases are - conducted are conducted in the paper - [[The Pile Paper]](https://arxiv.org/pdf/2101.00027.pdf). - # Construction - dependencies: [] - license: - value: MIT - explanation: As indicated in the paper. - included: > - The Pile data come from 22 sources, with over half of the data being from - Common Crawl (Pile-CC; 227GB), fiction and nonfiction books (Books3; 101GB), - biomedical articles (PubMed Central; 90GB), and code (Github; 95 GB). - Refer to the paper for full decomposition - [[Table 1]](https://arxiv.org/pdf/2101.00027.pdf#table.caption.2). - excluded: > - Authors report that they have excluded some datasets "because they were too - small to be worth spending time or because the English component of the data - did not merit inclusion on its own. Three datasets were excluded for other - reasons: (1) US Congressional Records were excluded because it "reflects the - opinions and biases of the political class over the past 200 years, - including segregationism and xenophobia." (2) Online Fanfiction resources - amounting to Hundreds of GiB were excluded on logistical grounds. - (3) Literotica, platform where users can upload short-form erotic fiction, - was excluded because the authors decided to exclude fanfiction, the - corpus would require significant investigation, and corpus contain - significant amount of stereotyping - [[Appendix B]](https://arxiv.org/pdf/2101.00027.pdf). - quality_control: > - In addition to the data inclusion and exclusion decisions, the quality was - controlled through filtering for English (pycld2 language classifier), - filtering for documents similar to OpenWebText2 (classifier on CommonCrawl), - and several forms of deduplication as detailed in the paper - [[Appendix C]](https://arxiv.org/pdf/2101.00027.pdf#appendix.1.C) - [[Appendix D]](https://arxiv.org/pdf/2101.00027.pdf#appendix.1.D). - # Downstream - access: + size: 825 GB + type: dataset + url: https://arxiv.org/pdf/2101.00027.pdf +- access: + explanation: "The model can be downloaded for free from [[The Eye]](https://mystic.the-eye.eu/public/AI/GPT-J-6B/step_383500.tar.zstd)\n" value: open - explanation: > - The dataset is freely available to the public and - can be downloaded from The Eye - [[The Pile]](https://mystic.the-eye.eu/public/AI/pile/). - intended_uses: > - The Pile was intended to be used as a high quality large text dataset for - language modeling tasks, explained in more detail in the paper - [[Section 1]](https://arxiv.org/pdf/2101.00027.pdf#section.1). - prohibited_uses: none - monitoring: none - feedback: > - Feedback can be given by emailing the authors at contact at eleuther.ai. - -- type: model - name: GPT-J - organization: EleutherAI - description: GPT-J is an open-source autoregressive language model. + analysis: '' created_date: - value: 2021-06-04 explanation: Date model blog post was published - url: https://arankomatsuzaki.wordpress.com/2021/06/04/gpt-j/ - model_card: '' - modality: text (English) - analysis: '' - size: 6B parameters (dense model) + value: 2021-06-04 dependencies: [The Pile] - training_emissions: '' - training_time: '' - training_hardware: 'TRC (Unspecified # of TPU v3-8s)' - quality_control: '' - access: - value: open - explanation: > - The model can be downloaded for free from [[The Eye]](https://mystic.the-eye.eu/public/AI/GPT-J-6B/step_383500.tar.zstd) + description: GPT-J is an open-source autoregressive language model. + feedback: '' + intended_uses: '' license: + explanation: "As indicated in the [[Github repository]](https://github.com/kingoflolz/mesh-transformer-jax)\n" value: Apache 2.0 - explanation: > - As indicated in the [[Github repository]](https://github.com/kingoflolz/mesh-transformer-jax) - intended_uses: '' - prohibited_uses: '' + modality: + explanation: text (English) + value: text; text + model_card: '' monitoring: '' - feedback: '' - -- type: model - name: GPT-Neo + name: GPT-J organization: EleutherAI - description: '' - created_date: - value: 2021-03-21 - explanation: Date Github repo was update - url: https://github.com/EleutherAI/gpt-neo - model_card: '' - modality: Text (English) - analysis: '' - size: 2.7B parameters (dense model) - dependencies: [The Pile] + prohibited_uses: '' + quality_control: '' + size: 6B parameters (dense) training_emissions: '' + training_hardware: 'TRC (Unspecified # of TPU v3-8s)' training_time: '' - training_hardware: '' - quality_control: '' - access: + type: model + url: https://arankomatsuzaki.wordpress.com/2021/06/04/gpt-j/ +- access: + explanation: "The model can be downloaded for free from [[The Eye]](https://mystic.the-eye.eu/public/AI/gptneo-release/GPT3_2-7B/)\n" value: open - explanation: > - The model can be downloaded for free from [[The Eye]](https://mystic.the-eye.eu/public/AI/gptneo-release/GPT3_2-7B/) - license: MIT + analysis: '' + created_date: + explanation: Date Github repo was update + value: 2021-03-21 + dependencies: [The Pile] + description: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: MIT + modality: + explanation: Text (English) + value: text; text + model_card: '' monitoring: '' - feedback: '' - -- type: model - name: GPT-NeoX - # General + name: GPT-Neo organization: EleutherAI - description: > - GPT-NeoX (20B) is an open-sourced autoregressive language model. + prohibited_uses: '' + quality_control: '' + size: 2.7B parameters (dense) + training_emissions: '' + training_hardware: '' + training_time: '' + type: model + url: https://github.com/EleutherAI/gpt-neo +- access: + explanation: "The model can be downloaded for free The Eye [[GPT-NeoX-20B]](https://mystic.the-eye.eu/public/AI/models/GPT-NeoX-20B/).\n" + value: open + analysis: "The model was evaluated on standard NLP benchmarks: LAMBADA, ANLI,\ + \ HellaSwag, MMLU among others [[Section 4]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#section.4).\n" created_date: 2022-02-02 - url: http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf - model_card: https://mystic.the-eye.eu/public/AI/models/GPT-NeoX-20B/20B_model_card.md - modality: text (English) and code - size: 20B parameters (dense model) - analysis: > - The model was evaluated on standard NLP benchmarks: LAMBADA, ANLI, - HellaSwag, MMLU among others - [[Section 4]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#section.4). - # Construction dependencies: [The Pile] + description: "GPT-NeoX (20B) is an open-sourced autoregressive language model.\n" + feedback: "Feedback can be provided using the # 20b channel in EleutherAI Discord\ + \ group [[EleutherAI Blog Post]](https://blog.eleuther.ai/announcing-20b/).\ + \ Find the Discord link in the FAQ page [[FAQ]](https://www.eleuther.ai/faq/).\n" + intended_uses: "As stated in the model card: \"GPT-NeoX-20B learns an inner representation\ + \ of the English language that can be used to extract features useful for downstream\ + \ tasks. The model is best at what it was pretrained for however, which is generating\ + \ text from a prompt. Due to the generality of the pretraining set, it has acquired\ + \ the ability to generate completions across a wide range of tasks - from programming\ + \ to fiction writing [[Model Card]](https://mystic.the-eye.eu/public/AI/models/GPT-NeoX-20B/20B_model_card.md).\"\ + \n" + license: + explanation: "As indicated in the accompanying blog post [[EleutherAI Blog Post]](https://blog.eleuther.ai/announcing-20b/).\n" + value: Apache 2.0 + modality: + explanation: text (English) and code + value: code, text; code, text + model_card: https://mystic.the-eye.eu/public/AI/models/GPT-NeoX-20B/20B_model_card.md + monitoring: none + name: GPT-NeoX + organization: EleutherAI + prohibited_uses: none + quality_control: none + size: 20B parameters (dense) training_emissions: + explanation: "The amount of emission during the development and training of\ + \ the model based on the author's estimation [[Section 6.4]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#subsection.6.4).\n" value: 31.73 tCO2e - explanation: > - The amount of emission during the development and training of - the model based on the author's estimation - [[Section 6.4]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#subsection.6.4). - training_time: - value: 47.10 petaflop/s-day - explanation: > - Training time was reported as 1830 hours reported by the authors, equaling - 76.25 days. - [[Section 6.4]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#subsection.6.4). - The authors report that 96 (12 * 8) A100 GPUs were used during the - training. - The A100 GPUs have a single precision performance of 0.0195 petaflops - [[A100 Datasheet]](https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf). - Assuming the estimated utilization is 33%, following - [[OpenAI AI and Computer Blog]](https://openai.com/blog/ai-and-compute/#addendum), - the training time is 47.10 petaflop/s-day (76.25 * 96 * 0.0195 * 0.33). training_hardware: + explanation: "As outline by the authors [[Section 2.3]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#subsection.2.3)\n" value: 12 x 8 A100 GPUs - explanation: > - As outline by the authors - [[Section 2.3]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#subsection.2.3) - quality_control: none - # Downstream - access: - value: open - explanation: > - The model can be downloaded for free The Eye - [[GPT-NeoX-20B]](https://mystic.the-eye.eu/public/AI/models/GPT-NeoX-20B/). - license: - value: Apache 2.0 - explanation: > - As indicated in the accompanying blog post - [[EleutherAI Blog Post]](https://blog.eleuther.ai/announcing-20b/). - intended_uses: > - As stated in the model card: "GPT-NeoX-20B learns an inner representation - of the English language that can be used to extract features useful for - downstream tasks. The model is best at what it was pretrained for however, - which is generating text from a prompt. - Due to the generality of the pretraining set, it has acquired the ability - to generate completions across a wide range of tasks - from programming to - fiction writing - [[Model Card]](https://mystic.the-eye.eu/public/AI/models/GPT-NeoX-20B/20B_model_card.md)." - prohibited_uses: none - monitoring: none - feedback: > - Feedback can be provided using the # 20b channel in EleutherAI Discord - group - [[EleutherAI Blog Post]](https://blog.eleuther.ai/announcing-20b/). - Find the Discord link in the FAQ page - [[FAQ]](https://www.eleuther.ai/faq/). - -- type: application - name: GooseAI API - # General - organization: GooseAI - description: > - GooseAI API is an API service providing access to NLP services. + training_time: + explanation: "Training time was reported as 1830 hours reported by the authors,\ + \ equaling 76.25 days. [[Section 6.4]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#subsection.6.4).\ + \ The authors report that 96 (12 * 8) A100 GPUs were used during the training.\ + \ The A100 GPUs have a single precision performance of 0.0195 petaflops [[A100\ + \ Datasheet]](https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf).\ + \ Assuming the estimated utilization is 33%, following [[OpenAI AI and Computer\ + \ Blog]](https://openai.com/blog/ai-and-compute/#addendum), the training time\ + \ is 47.10 petaflop/s-day (76.25 * 96 * 0.0195 * 0.33).\n" + value: 47.10 petaflop/s-day + type: model + url: http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf +- access: + explanation: "GooseAI API can be accessed by signing up on the goose.ai website.\n" + value: limited + adaptation: unknown created_date: unknown - url: goose.ai - # Construction dependencies: [GPT-NeoX] - adaptation: unknown - output_space: - value: Text Generation, Text Completion - explanation: > - Question/Answer and Classification tasks are coming soon according to - GooseAI - [[Main Page]](goose.ai). - quality_control: unknown - # Downstream - access: - value: limited - explanation: > - GooseAI API can be accessed by signing up on the goose.ai website. + description: "GooseAI API is an API service providing access to NLP services.\n" + failures: unknown + feedback: + explanation: "In the \"Error Reporting and Feedback\" section of the Goose.ai\ + \ Terms of Service, GooseAI asks all the feedback to be sent to support at\ + \ goose.ai [[GooseAI Terms of Service]](https://goose.ai/docs/tos).\n" + value: Email support + intended_uses: "Intended to be used as an NLP infrastructure.\n" license: + explanation: "\"Subject to Customer’s strict compliance with this TOS, GooseAI\ + \ grants Customer a limited, non-exclusive, non-transferable, non-sublicensable,\ + \ revocable license to access and use the Platform as described in and subject\ + \ to this TOS\" - excerpt from the Terms of Service document.\n" value: custom - explanation: > - "Subject to Customer’s strict compliance with this TOS, GooseAI grants - Customer a limited, non-exclusive, non-transferable, non-sublicensable, - revocable license to access and use the Platform as described in and - subject to this TOS" - excerpt from the Terms of Service document. - terms_of_service: https://goose.ai/docs/tos - intended_uses: > - Intended to be used as an NLP infrastructure. - prohibited_uses: - value: > - Illegal or abusive activity, security violations, network abuse - explanation: > - Prohibited uses are detailed in the Acceptable Use Policy - [[GooseAI Acceptable Use Policy]](https://goose.ai/docs/aup). monitoring: + explanation: "In the \"GooseAI Monitoring and Enforcement\" section of GooseAI's\ + \ Acceptable Use Policy (AUP), it is stated that Goose.AI has the right to\ + \ investigate any suspected violation of its AUP [[GooseAI Acceptable Use\ + \ Policy]](https://goose.ai/docs/aup).\n" value: At will monitoring by the provider - explanation: > - In the "GooseAI Monitoring and Enforcement" section of GooseAI's - Acceptable Use Policy (AUP), it is stated that Goose.AI has the right to - investigate any suspected violation of its AUP - [[GooseAI Acceptable Use Policy]](https://goose.ai/docs/aup). - feedback: - value: Email support - explanation: > - In the "Error Reporting and Feedback" section of the Goose.ai Terms of - Service, GooseAI asks all the feedback to be sent to support at goose.ai - [[GooseAI Terms of Service]](https://goose.ai/docs/tos). - # Deployment monthly_active_users: unknown + name: GooseAI API + organization: GooseAI + output_space: + explanation: "Question/Answer and Classification tasks are coming soon according\ + \ to GooseAI [[Main Page]](goose.ai).\n" + value: Text Generation, Text Completion + prohibited_uses: + explanation: "Prohibited uses are detailed in the Acceptable Use Policy [[GooseAI\ + \ Acceptable Use Policy]](https://goose.ai/docs/aup).\n" + value: "Illegal or abusive activity, security violations, network abuse\n" + quality_control: unknown + terms_of_service: https://goose.ai/docs/tos + type: application + url: goose.ai user_distribution: unknown - failures: unknown - -- type: model - name: VQGAN-CLIP - organization: EleutherAI - description: VQGAN-CLIP is a model that better generates and edits images using - a multimodal encoder to guide image generation. - created_date: 2022-09-04 - url: https://arxiv.org/pdf/2204.08583.pdf - model_card: '' - modality: text input with image output +- access: open analysis: Evaluated by human testers rating alignment of text input, image output pairs. - size: 227M parameters + created_date: 2022-09-04 dependencies: [VQGAN, CLIP] + description: VQGAN-CLIP is a model that better generates and edits images using + a multimodal encoder to guide image generation. + feedback: '' + intended_uses: '' + license: MIT + modality: + explanation: text input with image output + value: text; image + model_card: '' + monitoring: '' + name: VQGAN-CLIP + organization: EleutherAI + prohibited_uses: '' + quality_control: '' + size: 227M parameters (dense) training_emissions: unknown - training_time: Less than 1 V100-hour training_hardware: 1 NVIDIA Tesla K80 GPU - quality_control: '' - access: open - license: MIT + training_time: Less than 1 V100-hour + type: model + url: https://arxiv.org/pdf/2204.08583.pdf +- access: open + analysis: Evaluated on a variety of NLP benchmarks and found to perform similarly + to OPT and BLOOM models. + created_date: 2023-05-31 + dependencies: [The Pile] + description: A suite of 16 LLMs all trained on public data seen in the exact same + order and ranging in size from 70M to 12B parameters + feedback: https://huggingface.co/EleutherAI/pythia-6.9b/discussions intended_uses: '' - prohibited_uses: '' + license: Apache 2.0 + modality: + explanation: natural language text + value: text; text + model_card: https://huggingface.co/EleutherAI/pythia-12b monitoring: '' - feedback: '' - -- type: model name: Pythia organization: Eleuther AI - description: A suite of 16 LLMs all trained on public data seen in the exact same - order and ranging in size from 70M to 12B parameters - created_date: 2023-05-31 - url: https://arxiv.org/pdf/2304.01373.pdf - model_card: https://huggingface.co/EleutherAI/pythia-12b - modality: natural language text - analysis: Evaluated on a variety of NLP benchmarks and found to perform similarly - to OPT and BLOOM models. - size: 12B parameters - dependencies: [The Pile] + prohibited_uses: '' + quality_control: '' + size: 12B parameters (dense) training_emissions: '' - training_time: '' training_hardware: 64 A100 GPUs - quality_control: '' - access: open - license: Apache 2.0 - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: https://huggingface.co/EleutherAI/pythia-6.9b/discussions + training_time: '' + type: model + url: https://arxiv.org/pdf/2304.01373.pdf diff --git a/assets/ens.yaml b/assets/ens.yaml index 5209fe3d..b728b918 100644 --- a/assets/ens.yaml +++ b/assets/ens.yaml @@ -1,32 +1,37 @@ --- - -- type: dataset - name: HowTo100M - organization: École Normale Supérieure, Inria - description: HowTo100M is a large-scale dataset of narrated videos with an emphasis - on instructional videos where content creators teach complex tasks with an explicit - intention of explaining the visual content on screen. HowTo100M features a total - of 136M video clips with captions sourced from 1.2M Youtube videos (15 years - of video) and 23k activities from domains such as cooking, hand crafting, personal - care, gardening or fitness. - created_date: - value: 2019-06-07 - explanation: The date the [[paper]](https://arxiv.org/abs/1906.03327) was submitted. - url: https://arxiv.org/pdf/1906.03327.pdf - datasheet: https://arxiv.org/pdf/1906.03327.pdf - modality: text (English) and video - size: 136M video clips - sample: [] +- access: + explanation: Dataset, evaluation code and models are publicly available at the + [[HowTo100M dataset webpage]](https://www.di.ens.fr/willow/research/howto100m/). + value: open analysis: + explanation: See [[Experiments]](https://arxiv.org/pdf/1906.03327v2.pdf#section.5) value: Authors use the dataset to learn a joint text-video embedding by leveraging more than 130M video clip-caption pairs. They then evaluate the learned embeddings on the tasks of localizing steps in instructional videos of CrossTask and textbased video retrieval on YouCook2, MSR-VTT and LSMDC datasets. They show that their learned embedding can perform better compared to models trained on existing carefully annotated but smaller video description datasets. - explanation: See [[Experiments]](https://arxiv.org/pdf/1906.03327v2.pdf#section.5) + created_date: + explanation: The date the [[paper]](https://arxiv.org/abs/1906.03327) was submitted. + value: 2019-06-07 + datasheet: https://arxiv.org/pdf/1906.03327.pdf dependencies: [YouTube] + description: HowTo100M is a large-scale dataset of narrated videos with an emphasis + on instructional videos where content creators teach complex tasks with an explicit + intention of explaining the visual content on screen. HowTo100M features a total + of 136M video clips with captions sourced from 1.2M Youtube videos (15 years + of video) and 23k activities from domains such as cooking, hand crafting, personal + care, gardening or fitness. + excluded: + explanation: See [[Data collection]](https://arxiv.org/pdf/1906.03327v2.pdf#subsection.3.1) + value: Categories such as Relationships and Finance and Business, that may be + more abstract, are excluded. Videos with less than 100 views are removed. + Authors also ignore videos that have less than 100 words. Videos longer than + 2,000 seconds are removed. As some videos may appear in several tasks, the + videos are deduplicated based on YouTube IDs. + feedback: '' included: + explanation: See [[Data collection]](https://arxiv.org/pdf/1906.03327v2.pdf#subsection.3.1) value: The dataset features 1.22 million videos from YouTube with a primary focus on videos containing "visual tasks", that involve some interaction with the physical world (e.g. Making peanut butter, Pruning a tree) as compared @@ -38,29 +43,22 @@ Arts and Entertainment, Computers and Electronics). They also restrict to the top 200 YouTube search results, as the latter ones may not be related to the query task. - explanation: See [[Data collection]](https://arxiv.org/pdf/1906.03327v2.pdf#subsection.3.1) - excluded: - value: Categories such as Relationships and Finance and Business, that may be - more abstract, are excluded. Videos with less than 100 views are removed. - Authors also ignore videos that have less than 100 words. Videos longer than - 2,000 seconds are removed. As some videos may appear in several tasks, the - videos are deduplicated based on YouTube IDs. - explanation: See [[Data collection]](https://arxiv.org/pdf/1906.03327v2.pdf#subsection.3.1) - quality_control: '' - access: - value: open - explanation: Dataset, evaluation code and models are publicly available at the - [[HowTo100M dataset webpage]](https://www.di.ens.fr/willow/research/howto100m/). - license: Apache 2.0 intended_uses: '' + license: Apache 2.0 + modality: {} + monitoring: '' + name: HowTo100M + organization: École Normale Supérieure, Inria prohibited_uses: - value: > - No uses are explicitly prohibited by the authors. They note the following - limitations of the dataset: "We note that the distribution of identities and - activities in the HowTo100M dataset may not be representative of the global - human population and the diversity in society. Please be careful of unintended - societal, gender, racial and other biases when training or deploying models - trained on this data." explanation: See [[HowTo100M dataset webpage]](https://www.di.ens.fr/willow/research/howto100m/) - monitoring: '' - feedback: '' + value: "No uses are explicitly prohibited by the authors. They note the following\ + \ limitations of the dataset: \"We note that the distribution of identities\ + \ and activities in the HowTo100M dataset may not be representative of the\ + \ global human population and the diversity in society. Please be careful\ + \ of unintended societal, gender, racial and other biases when training or\ + \ deploying models trained on this data.\"\n" + quality_control: '' + sample: [] + size: 136M video clips + type: dataset + url: https://arxiv.org/pdf/1906.03327.pdf diff --git a/assets/faraday.yaml b/assets/faraday.yaml index 7c986cb4..a1af0488 100644 --- a/assets/faraday.yaml +++ b/assets/faraday.yaml @@ -1,27 +1,26 @@ --- - -- type: application - name: ARES - organization: Faraday Lab - description: ARES is a text-to-image generator based on Stable Diffusion. The - goal is to provide a simple tool with a user interface allowing mainstream AI - access for artists and creators. +- access: open + adaptation: '' created_date: - value: 2023-04-26 explanation: The date Faraday Lab's hugging face card was last updated. Date for ARES release is not published on site. - url: https://faradaylab.fr/ + value: 2023-04-26 dependencies: [Stable Diffusion] - adaptation: '' - output_space: generated images - quality_control: '' - access: open - license: unknown - terms_of_service: '' + description: ARES is a text-to-image generator based on Stable Diffusion. The + goal is to provide a simple tool with a user interface allowing mainstream AI + access for artists and creators. + failures: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: unknown monitoring: '' - feedback: '' monthly_active_users: '' + name: ARES + organization: Faraday Lab + output_space: generated images + prohibited_uses: '' + quality_control: '' + terms_of_service: '' + type: application + url: https://faradaylab.fr/ user_distribution: '' - failures: '' diff --git a/assets/google.yaml b/assets/google.yaml index fb56a8dc..42aab482 100644 --- a/assets/google.yaml +++ b/assets/google.yaml @@ -1,100 +1,71 @@ --- - -# Datasets -- type: dataset - name: C4 - # General - organization: Google - description: The Colossal Clean Crawled Corpus (C4) is a processed version of - Common Crawl to facilitate transfer learning in NLP. +- access: + explanation: https://huggingface.co/datasets/c4 + value: open + analysis: https://arxiv.org/abs/2104.08758 created_date: - value: 2019-10-23 explanation: The date the T5 paper was made public. - url: https://arxiv.org/abs/1910.10683 + value: 2019-10-23 datasheet: https://huggingface.co/datasets/c4 - modality: Text (English) - size: 750GB - sample: [https://huggingface.co/datasets/c4/viewer/en/train] - analysis: https://arxiv.org/abs/2104.08758 - # Construction dependencies: [CommonCrawl] - license: ODC-By 1.0 + description: The Colossal Clean Crawled Corpus (C4) is a processed version of + Common Crawl to facilitate transfer learning in NLP. + excluded: "Data was filtered for English using langdetect. Further, data was filtered\ + \ to end in terminal punctuation, to remove short pages (less than 5 sentences),\ + \ and to remove \"Dirty, Naughty, Obscene or Otherwise Bad Words\".\n" + feedback: https://huggingface.co/datasets/c4/discussions included: none - excluded: > - Data was filtered for English using langdetect. Further, data was filtered - to end in terminal punctuation, to remove short pages (less than 5 sentences), - and to remove "Dirty, Naughty, Obscene or Otherwise Bad Words". - quality_control: > - Data filtering excluded obscene words from a block list as well - as short documents and some deduplication was done based on string overlap. - # Downstream - access: - value: open - explanation: https://huggingface.co/datasets/c4 intended_uses: To faciliate transfer learning research in NLP. - prohibited_uses: none + license: ODC-By 1.0 + modality: {} monitoring: none - feedback: https://huggingface.co/datasets/c4/discussions - -- type: dataset - name: Internal Google BERT dataset - # General + name: C4 organization: Google - description: > - The dataset used to train Internal Google BERT models. + prohibited_uses: none + quality_control: "Data filtering excluded obscene words from a block list as well\ + \ as short documents and some deduplication was done based on string overlap.\n" + sample: [https://huggingface.co/datasets/c4/viewer/en/train] + size: 750GB + type: dataset + url: https://arxiv.org/abs/1910.10683 +- access: closed + analysis: unknown created_date: + explanation: "The date of the Google product update blog announcing that BERT\ + \ models were for ranking and featured snippets in Search.\n" value: 2019-11-25 - explanation: > - The date of the Google product update blog announcing that BERT models - were for ranking and featured snippets in Search. - url: https://blog.google/products/search/search-language-understanding-bert/ datasheet: none - modality: text - size: unknown - sample: [] - analysis: unknown - # Construction dependencies: [] - license: - value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. + description: "The dataset used to train Internal Google BERT models.\n" + excluded: unknown + feedback: unknown included: + explanation: "Although we don't exactly know the contents of the Internal Google\ + \ BERT dataset, it likely includes contents from web pages and search queries.\n" value: Web pages, and search queries - explanation: > - Although we don't exactly know the contents of the Internal Google BERT - dataset, it likely includes contents from web pages and search queries. - excluded: unknown - quality_control: unknown - # Downstream - access: closed intended_uses: + explanation: "We don't have an exhaustive list of the intended use cases for\ + \ the Internal Google BERT dataset, but we know that BERT was used in Google\ + \ Search.\n" value: unknown - explanation: > - We don't have an exhaustive list of the intended use cases for the - Internal Google BERT dataset, but we know that BERT was used in Google - Search. - prohibited_uses: unknown + license: + explanation: "The asset isn't released, and hence the license is unknown.\n" + value: unknown + modality: {} monitoring: unknown - feedback: unknown - -- type: dataset - name: Conceptual Captions - # General + name: Internal Google BERT dataset organization: Google - description: > - A dataset containing 3 million (image-URL, caption) pairs designed for the - training and evaluation of machine learned image captioning systems. - created_date: - value: 2018-07-01 - explanation: Due to the lack of information about the exact date, it is assumed - to be the 1st of the known month of creation. - url: https://aclanthology.org/P18-1238/ - datasheet: none - modality: text(English) and image - size: 3.3M (image, text) pairs + prohibited_uses: unknown + quality_control: unknown sample: [] + size: unknown + type: dataset + url: https://blog.google/products/search/search-language-understanding-bert/ +- access: + explanation: Dataset can be downloaded at [[Download]](https://ai.google.com/research/ConceptualCaptions/download) + value: open analysis: + explanation: See [[Experimental Results]](https://aclanthology.org/P18-1238.pdf#section.5) value: Authors evaluate the dataset on two image captioning models - RNN-based model and Transformer model, under two experimental conditions - using the training & development sets provided by the MS COCO dataset, versus training @@ -104,708 +75,631 @@ evaluations indicate that the Conceptual-based models are superior. Automatic models fail to corroborate the human evaluation results. This highlights the weakness of these automatic metrics. - explanation: See [[Experimental Results]](https://aclanthology.org/P18-1238.pdf#section.5) - # Construction + created_date: + explanation: Due to the lack of information about the exact date, it is assumed + to be the 1st of the known month of creation. + value: 2018-07-01 + datasheet: none dependencies: [] - license: - value: Conceptual Captions License - explanation: > - [[Conceptual Captions License]](https://github.com/google-research-datasets/conceptual-captions/blob/master/LICENSE) - included: '' + description: "A dataset containing 3 million (image-URL, caption) pairs designed\ + \ for the training and evaluation of machine learned image captioning systems.\n" excluded: - value: | - The following filtering steps are applied in the given order: - 1. Image-based Filtering - "It only keeps JPEG images where both dimensions are greater than 400 pixels, and the ratio of larger to smaller dimension is no more than 2. It excludes images that trigger pornography or profanity detectors. These filters discard more than 65% of the candidates." - 2. Text-based Filtering - "Candidates with no determiner, no noun, or no preposition are discarded; candidates with a high noun ratio are also discarded; candidates with a high rate of token repetition are discarded; candidates where the first word is not capitalized, or with too high capitalized-word ratio are discarded; we use a vocabulary VW of 1B token types, appearing at least 5 times in the English Wikipedia, and discard candidates that contain tokens that are not found in this vocabulary. candidates that score too high or too low on the polarity annotations, or trigger the pornography/profanity detectors, are discarded; predefined boiler-plate prefix/suffix sequences matching the text are cropped, e.g. “click to enlarge picture”, “stock photo”; we also drop text which begins/ends in certain patterns, e.g. “embedded image permalink”, “profile photo”. These filters only allow around 3% of the incoming candidates to pass to the later stages." - 3. Image&Text-based Filtering - "We filter out candidates for which none of the text tokens can be mapped to the content of the image. This filter discards around 60% of the incoming candidates." - 4. Text Transformation with Hypernymization - "Noun modifiers of certain types (proper nouns, numbers, units) are removed; dates, durations, and preposition-based locations (e.g., "in Los Angeles") are removed; named-entities are identified, matched against the KG entries, and substitute with their hypernym; resulting coordination noun-phrases with the same head (e.g., "actor and actor") are resolved into a single-head, pluralized form (e.g., "actors"). Around 20% of samples are discarded during this transformation. We then cluster all resolved entities (e.g., 2560 "actor", "dog", "neighborhood", etc.) and keep only the candidates for which all detected types have a count of over 100 (around 55% of the candidates)." - explanation: See [[Conceptual Captions Dataset Creation]](https://aclanthology.org/P18-1238.pdf#section.3) - quality_control: - value: Input candidate (image, caption) pairs pass through several stages of - filtering and processing to ensure quality. explanation: See [[Conceptual Captions Dataset Creation]](https://aclanthology.org/P18-1238.pdf#section.3) - # Downstream - access: - value: open - explanation: Dataset can be downloaded at [[Download]](https://ai.google.com/research/ConceptualCaptions/download) - intended_uses: '' - prohibited_uses: unknown - monitoring: unknown + value: "The following filtering steps are applied in the given order:\n1. Image-based\ + \ Filtering - \"It only keeps JPEG images where both dimensions are greater\ + \ than 400 pixels, and the ratio of larger to smaller dimension is no more\ + \ than 2. It excludes images that trigger pornography or profanity detectors.\ + \ These filters discard more than 65% of the candidates.\"\n2. Text-based\ + \ Filtering - \"Candidates with no determiner, no noun, or no preposition\ + \ are discarded; candidates with a high noun ratio are also discarded; candidates\ + \ with a high rate of token repetition are discarded; candidates where the\ + \ first word is not capitalized, or with too high capitalized-word ratio are\ + \ discarded; we use a vocabulary VW of 1B token types, appearing at least\ + \ 5 times in the English Wikipedia, and discard candidates that contain tokens\ + \ that are not found in this vocabulary. candidates that score too high or\ + \ too low on the polarity annotations, or trigger the pornography/profanity\ + \ detectors, are discarded; predefined boiler-plate prefix/suffix sequences\ + \ matching the text are cropped, e.g. “click to enlarge picture”, “stock photo”;\ + \ we also drop text which begins/ends in certain patterns, e.g. “embedded\ + \ image permalink”, “profile photo”. These filters only allow around 3% of\ + \ the incoming candidates to pass to the later stages.\"\n3. Image&Text-based\ + \ Filtering - \"We filter out candidates for which none of the text tokens\ + \ can be mapped to the content of the image. This filter discards around 60%\ + \ of the incoming candidates.\"\n4. Text Transformation with Hypernymization\ + \ - \"Noun modifiers of certain types (proper nouns, numbers, units) are removed;\ + \ dates, durations, and preposition-based locations (e.g., \"in Los Angeles\"\ + ) are removed; named-entities are identified, matched against the KG entries,\ + \ and substitute with their hypernym; resulting coordination noun-phrases\ + \ with the same head (e.g., \"actor and actor\") are resolved into a single-head,\ + \ pluralized form (e.g., \"actors\"). Around 20% of samples are discarded\ + \ during this transformation. We then cluster all resolved entities (e.g.,\ + \ 2560 \"actor\", \"dog\", \"neighborhood\", etc.) and keep only the candidates\ + \ for which all detected types have a count of over 100 (around 55% of the\ + \ candidates).\"\n" feedback: Feedback can be provided by creating an issue in the [[Conceptual Captions GitHub repository]](https://github.com/google-research-datasets/conceptual-captions) or by emailing at conceptual-captions at google.com - -- type: dataset - name: Conceptual 12M - # General + included: '' + intended_uses: '' + license: + explanation: "[[Conceptual Captions License]](https://github.com/google-research-datasets/conceptual-captions/blob/master/LICENSE)\n" + value: Conceptual Captions License + modality: {} + monitoring: unknown + name: Conceptual Captions organization: Google - description: > - A dataset with 12 million image-text pairs specifically meant to be used for - vision-and-language pre-training. - created_date: - value: 2021-02-17 - explanation: > - The date the [[paper]](https://arxiv.org/abs/2102.08981) was submitted. - url: https://arxiv.org/pdf/2102.08981.pdf - datasheet: none - modality: text (English) and image - size: 12M (image, text) pairs + prohibited_uses: unknown + quality_control: + explanation: See [[Conceptual Captions Dataset Creation]](https://aclanthology.org/P18-1238.pdf#section.3) + value: Input candidate (image, caption) pairs pass through several stages of + filtering and processing to ensure quality. sample: [] + size: 3.3M (image, text) pairs + type: dataset + url: https://aclanthology.org/P18-1238/ +- access: + explanation: Dataset is available at [[Conceptual 12M GitHub repository]](https://github.com/google-research-datasets/conceptual-12m). + value: open analysis: - value: > - The dataset is benchmarked against CC3M on two most fundamental V+L - tasks: vision-to-language generation and vision-and-language matching, with - an emphasis on long-tail visual recognition. The results illustrate the - benefit of scaling up pre-training data for vision-and-language tasks, as - indicated by the new state-of-the-art results on both the nocaps and Conceptual - Captions benchmarks. explanation: See [[Evaluating Vision-and-Language PreTraining Data]](https://arxiv.org/pdf/2102.08981.pdf#section.3) - # Construction + value: "The dataset is benchmarked against CC3M on two most fundamental V+L\ + \ tasks: vision-to-language generation and vision-and-language matching, with\ + \ an emphasis on long-tail visual recognition. The results illustrate the\ + \ benefit of scaling up pre-training data for vision-and-language tasks, as\ + \ indicated by the new state-of-the-art results on both the nocaps and Conceptual\ + \ Captions benchmarks.\n" + created_date: + explanation: "The date the [[paper]](https://arxiv.org/abs/2102.08981) was submitted.\n" + value: 2021-02-17 + datasheet: none dependencies: [] - license: - value: Conceptual Captions License - explanation: > - [[Conceptual Captions License]](https://github.com/google-research-datasets/conceptual-captions/blob/master/LICENSE) - included: '' + description: "A dataset with 12 million image-text pairs specifically meant to\ + \ be used for vision-and-language pre-training.\n" excluded: - value: | - Some of the filtering steps used in the preparation of Conceptual Captions dataset are relaxed to trade off high-recall for low-precision. The following steps are applied in the given order: - 1. Image-based Filtering - Only keep JPEG images where both dimensions are greater than 400 pixels, and the ratio of larger to smaller dimension is no more than 2.5. Exclude images that trigger pornography or profanity detectors. - 2. Text-based Filtering - Allow text between 3 and 256 words in the alt-text. Discard candidates with no noun or no determiner, but permit ones without prepositions. Set the maximum fraction of word repetition allowed to 0.2. Increase the threshold for counting a word type as rare from 5 to 20. - 3. Image&Text-based Filtering - Filter out candidates for which none of the text tokens can be mapped to the content of the image. explanation: See [[Relaxing filters for higher recall]](https://arxiv.org/pdf/2102.08981.pdf#subsection.2.2) + value: "Some of the filtering steps used in the preparation of Conceptual Captions\ + \ dataset are relaxed to trade off high-recall for low-precision. The following\ + \ steps are applied in the given order:\n1. Image-based Filtering - Only keep\ + \ JPEG images where both dimensions are greater than 400 pixels, and the ratio\ + \ of larger to smaller dimension is no more than 2.5. Exclude images that\ + \ trigger pornography or profanity detectors.\n2. Text-based Filtering - Allow\ + \ text between 3 and 256 words in the alt-text. Discard candidates with no\ + \ noun or no determiner, but permit ones without prepositions. Set the maximum\ + \ fraction of word repetition allowed to 0.2. Increase the threshold for counting\ + \ a word type as rare from 5 to 20.\n3. Image&Text-based Filtering - Filter\ + \ out candidates for which none of the text tokens can be mapped to the content\ + \ of the image.\n" + feedback: Feedback can be provided by creating an issue in the [[Conceptual 12M + GitHub repository]](https://github.com/google-research-datasets/conceptual-12m) + or by emailing at conceptual-captions at google.com + included: '' + intended_uses: '' + license: + explanation: "[[Conceptual Captions License]](https://github.com/google-research-datasets/conceptual-captions/blob/master/LICENSE)\n" + value: Conceptual Captions License + modality: + explanation: text (English) and image + value: image, text + monitoring: unknown + name: Conceptual 12M + organization: Google + prohibited_uses: unknown quality_control: + explanation: See [[Vision-and-Language Pre-Training Data]](https://arxiv.org/pdf/2102.08981.pdf#section.2) value: Input candidate (image, caption) pairs pass through several stages of filtering and processing to ensure quality. Person-name substitutions are performed in the alt-texts to protect the privacy of individuals in the associated images. - explanation: See [[Vision-and-Language Pre-Training Data]](https://arxiv.org/pdf/2102.08981.pdf#section.2) - # Downstream - access: + sample: [] + size: 12M (image, text) pairs + type: dataset + url: https://arxiv.org/pdf/2102.08981.pdf +- access: + explanation: https://huggingface.co/t5-large value: open - explanation: Dataset is available at [[Conceptual 12M GitHub repository]](https://github.com/google-research-datasets/conceptual-12m). - intended_uses: '' - prohibited_uses: unknown - monitoring: unknown - feedback: Feedback can be provided by creating an issue in the [[Conceptual 12M - GitHub repository]](https://github.com/google-research-datasets/conceptual-12m) - or by emailing at conceptual-captions at google.com - -# Models -- type: model - name: T5 - organization: Google - description: Text-To-Text Transfer Transformer (T5) is a model that unifies all - NLP tasks under the text-to-text format. + analysis: https://huggingface.co/t5-base#evaluation created_date: - value: 2019-10-23 explanation: The date the T5 paper was made public. - url: https://arxiv.org/abs/1910.10683 - model_card: https://huggingface.co/t5-base - modality: text (English) - size: - value: 11B parameters (dense model) - explanation: T5 models were trained on several sizes including 3B and 11B parameters. - analysis: https://huggingface.co/t5-base#evaluation + value: 2019-10-23 dependencies: [C4] - training_emissions: unknown - training_time: unknown - training_hardware: 1,024 TPU v3 chips (Cloud TPU Pods) - quality_control: The T5 paper documents many analyses/ablations that were considered - before arriving at the final architecture/training procedure. - access: - value: open - explanation: https://huggingface.co/t5-large - license: Apache 2.0 + description: Text-To-Text Transfer Transformer (T5) is a model that unifies all + NLP tasks under the text-to-text format. + feedback: https://huggingface.co/t5-large/discussions intended_uses: NLP tasks - prohibited_uses: unknown + license: Apache 2.0 + modality: {} + model_card: https://huggingface.co/t5-base monitoring: none - feedback: https://huggingface.co/t5-large/discussions - -- type: model - name: Internal Google BERT - # General + name: T5 organization: Google - description: > - Internal Google BERT model used to power Google Search products. + prohibited_uses: unknown + quality_control: The T5 paper documents many analyses/ablations that were considered + before arriving at the final architecture/training procedure. + size: '' + training_emissions: unknown + training_hardware: 1,024 TPU v3 chips (Cloud TPU Pods) + training_time: unknown + type: model + url: https://arxiv.org/abs/1910.10683 +- access: closed + analysis: unknown created_date: + explanation: "The date of the Google product update blog announcing that BERT\ + \ models were for ranking and featured snippets in Search.\n" value: 2019-11-25 - explanation: > - The date of the Google product update blog announcing that BERT models - were for ranking and featured snippets in Search. - url: https://blog.google/products/search/search-language-understanding-bert/ - model_card: unknown - modality: text - size: unknown - analysis: unknown - # Construction dependencies: [Internal Google BERT dataset] - training_emissions: unknown - training_time: unknown - training_hardware: unknown - quality_control: unknown - # Downstream - access: closed - license: - value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. + description: "Internal Google BERT model used to power Google Search products.\n" + feedback: unknown intended_uses: + explanation: "We don't have an exhaustive list of the intended use cases for\ + \ the Internal Google BERT model, but we know that Google Search was powered\ + \ by a fine-tuned BERT.\n" value: unknown - explanation: > - We don't have an exhaustive list of the intended use cases for the - Internal Google BERT model, but we know that Google Search was powered - by a fine-tuned BERT. - prohibited_uses: unknown + license: + explanation: "The asset isn't released, and hence the license is unknown.\n" + value: unknown + modality: {} + model_card: unknown monitoring: unknown - feedback: unknown - -- type: application - name: Google Search - # General + name: Internal Google BERT organization: Google - description: > - Google Search is Google's search engine. + prohibited_uses: unknown + quality_control: unknown + size: '' + training_emissions: unknown + training_hardware: unknown + training_time: unknown + type: model + url: https://blog.google/products/search/search-language-understanding-bert/ +- access: open + adaptation: unknown created_date: + explanation: "The date of the Google product update blog announcing that BERT\ + \ models were for ranking and featured snippets in Search.\n" value: 2019-11-25 - explanation: > - The date of the Google product update blog announcing that BERT models - were for ranking and featured snippets in Search. - url: https://blog.google/products/search/search-language-understanding-bert/ - # Construction dependencies: [Internal Google BERT, MUM] - adaptation: unknown - output_space: web page ranking - quality_control: unknown - # Downstream - access: open + description: "Google Search is Google's search engine.\n" + failures: unknown + feedback: "Feedback can be sent to Google Feedback using the product interface\ + \ [[Google Feedback]](https://www.google.com/tools/feedback).\n" + intended_uses: Searching the web using text, voice or image license: - value: none explanation: No license was found. - terms_of_service: https://policies.google.com/terms - intended_uses: Searching the web using text, voice or image - prohibited_uses: > - Prohibited use cases aren't specifically spelled out for Google search, but - several illegal and discouraged use cases are shared in the Respect Others - section of the [[Term of Service]](https://policies.google.com/terms). - monitoring: > - It is implied that Google scan uses of its products for spam, - malware and illegal content in the - [[Term of Service]](https://policies.google.com/terms). - feedback: > - Feedback can be sent to Google Feedback using the product interface - [[Google Feedback]](https://www.google.com/tools/feedback). - # Deployment + value: none + monitoring: "It is implied that Google scan uses of its products for spam, malware\ + \ and illegal content in the [[Term of Service]](https://policies.google.com/terms).\n" monthly_active_users: unknown - user_distribution: unknown - failures: unknown - -- type: dataset - name: Infiniset - # General + name: Google Search organization: Google - description: > - Infiniset "is a combination of dialog data from public dialog data and - other public web documents" - [[Appendix E]](https://arxiv.org/pdf/2201.08239.pdf#appendix.E). + output_space: web page ranking + prohibited_uses: "Prohibited use cases aren't specifically spelled out for Google\ + \ search, but several illegal and discouraged use cases are shared in the Respect\ + \ Others section of the [[Term of Service]](https://policies.google.com/terms).\n" + quality_control: unknown + terms_of_service: https://policies.google.com/terms + type: application + url: https://blog.google/products/search/search-language-understanding-bert/ + user_distribution: unknown +- access: closed + analysis: unknown created_date: + explanation: "The date of the Google company news blog announcing LaMDA [[Google\ + \ News Blog]](https://blog.google/technology/ai/lamda/).\n" value: 2021-06-18 - explanation: > - The date of the Google company news blog announcing LaMDA - [[Google News Blog]](https://blog.google/technology/ai/lamda/). - url: https://arxiv.org/pdf/2201.08239.pdf datasheet: none - modality: text and code - size: - value: unknown - explanation: > - The size of the dataset is unclear, but it is reported that the dataset - "consists of 2.97B documents and 1.12B dialogs with 13.39B utterances" - [[Appendix E]](https://arxiv.org/pdf/2201.08239.pdf#appendix.E). - sample: [] - analysis: unknown - # Construction dependencies: [] - license: - value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - included: > - Included in the dataset are data from "public forums (0%); C4 data (12.5% ); - code documents from sites related to programming like Q&A sites tutorials, - etc (12.5%); Wikipedia (English) (12.5%); English web documents (6.25%); - and Non-English web documents (6.25%)." + description: "Infiniset \"is a combination of dialog data from public dialog data\ + \ and other public web documents\" [[Appendix E]](https://arxiv.org/pdf/2201.08239.pdf#appendix.E).\n" excluded: unknown - quality_control: unknown - # Downstream - access: closed + feedback: + explanation: "Author contact information was not provided.\n" + value: none + included: "Included in the dataset are data from \"public forums (0%); C4 data\ + \ (12.5% ); code documents from sites related to programming like Q&A sites\ + \ tutorials, etc (12.5%); Wikipedia (English) (12.5%); English web documents\ + \ (6.25%); and Non-English web documents (6.25%).\"\n" intended_uses: + explanation: "Intended uses of the dataset wasn't explicitly linked, but it\ + \ is likely intended for training language models specialized in dialogue.\n" + value: unknown + license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - Intended uses of the dataset wasn't explicitly linked, but it is likely - intended for training language models specialized in dialogue. - prohibited_uses: > - The prohibited uses for Infiniset weren't specifically listed, but the - Google AI principles inspired safety objectives in - [[Appendix A.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.A.1) - advises avoiding harm, unjust impact and misinformation, among others. + modality: + explanation: text and code + value: code, text monitoring: unknown - feedback: - value: none - explanation: > - Author contact information was not provided. - -- type: model - name: LaMDA - # General + name: Infiniset organization: Google - description: > - LaMDA stands for Language Models for Dialog Application. It is a transformer - based language model trained on dialogue data. + prohibited_uses: "The prohibited uses for Infiniset weren't specifically listed,\ + \ but the Google AI principles inspired safety objectives in [[Appendix A.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.A.1)\ + \ advises avoiding harm, unjust impact and misinformation, among others.\n" + quality_control: unknown + sample: [] + size: + explanation: "The size of the dataset is unclear, but it is reported that the\ + \ dataset \"consists of 2.97B documents and 1.12B dialogs with 13.39B utterances\"\ + \ [[Appendix E]](https://arxiv.org/pdf/2201.08239.pdf#appendix.E).\n" + value: unknown + type: dataset + url: https://arxiv.org/pdf/2201.08239.pdf +- access: closed + analysis: "The model performance was analyzed on sensibleness, specificity and\ + \ interestingness. The model was also analyzed on safety, following metrics\ + \ derived from Google AI Principles [[Appendix A.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.A.1).\ + \ Finally, the model was analyzed on groundedness, testing its ability to produce\ + \ responses that can be associated with \"known sources whenever possible [[Section\ + \ 4.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.4.1).\"\n" created_date: + explanation: "The date of the Google company news blog announcing LaMDA [[Google\ + \ News Blog]](https://blog.google/technology/ai/lamda/).\n" value: 2021-06-18 - explanation: > - The date of the Google company news blog announcing LaMDA - [[Google News Blog]](https://blog.google/technology/ai/lamda/). - url: https://arxiv.org/pdf/2201.08239.pdf - model_card: none - modality: text - size: - value: 137B parameters (dense model) - explanation: > - Along with the 137B model, the authors also trained 2B and 8B LaMDA - models. - analysis: > - The model performance was analyzed on sensibleness, specificity and - interestingness. - The model was also analyzed on safety, following - metrics derived from Google AI Principles - [[Appendix A.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.A.1). - Finally, the model was analyzed on groundedness, testing its ability to - produce responses that can be associated with "known sources whenever - possible - [[Section 4.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.4.1)." - # Construction dependencies: [Infiniset] - training_emissions: - value: 26 tCO2e - explanation: > - "...total carbon footprint of LaMDA’s pre-training of the largest model - is approximately 25.2 tCO2e. The carbon footprint of pre-training of - smaller models and fine-tuning of all models is approximately 0.7 tCO2e - ... which brings the total footprint of LaMDA to approximately 26 - tCO2e - [[Section 10]](https://arxiv.org/pdf/2201.08239.pdf#section.10)" - training_time: - value: 4108.80 petaflop/s-day - explanation: > - The total number of training flops of LaMDA was reported as 3.55E+23 - (3.55E+8 petaflops) - [[Section 10]](https://arxiv.org/pdf/2201.08239.pdf#section.10), which is - equal to 4108.80 = 3.55E+8 / (60 * 60 * 24) petaflop/s-day. - training_hardware: - value: 1024 TPU-V3 chips - explanation: > - Reported in [[Section 10]](https://arxiv.org/pdf/2201.08239.pdf#section.10). - quality_control: > - LaMDA was fine-tuned to predict sensibleness, specificity and - interestingness as well as safety. Then, the candidates were filtered out - if the model safety predictions were below a certain threshold. The next - candidates in the conversation were selected as a combination of these - predictions. The model was also fine-tuned for groundedness. The results - are shown in - [[Figure 5]](https://arxiv.org/pdf/2201.08239.pdf#figure.caption.23). - # Downstream - access: closed + description: "LaMDA stands for Language Models for Dialog Application. It is a\ + \ transformer based language model trained on dialogue data.\n" + feedback: + explanation: "Author contact information was not provided.\n" + value: none + intended_uses: "LaMDA is a language model, so it can be used for regular langauge\ + \ modelling tasks without fine-tuning, but its fine-tuned for dialogue tasks.\n" license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: > - LaMDA is a language model, so it can be used for regular langauge modelling - tasks without fine-tuning, but its fine-tuned for dialogue tasks. - prohibited_uses: > - The prohibited uses of LaMDA weren't specifically listed, but the Google - AI principles inspired safety objectives in - [[Appendix A.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.A.1) - advises avoiding harm, unjust impact and misinformation, among others. + modality: + explanation: text + value: text; text + model_card: none monitoring: unknown - feedback: - value: none - explanation: > - Author contact information was not provided. - -- type: dataset - name: PaLM dataset - # General + name: LaMDA organization: Google - description: > - PaLM dataset "was created for pre-training language models" - [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D). - created_date: - value: 2022-04-04 - explanation: > - The date of the Google AI blog announcing the details of PaLM - [[Google AI Blog]](https://ai.googleblog.com/2022/04/pathways-language-model-palm-scaling-to.html). - url: https://arxiv.org/pdf/2204.02311.pdf - datasheet: https://arxiv.org/pdf/2204.02311.pdf#appendix.D - modality: text and code - size: - value: 3.92 TB - explanation: > - Dataset size in GB is not reported, but the dataset is reported to have - 780 billion tokens - [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D). - The code portion of the dataset is reported to be 5% totaling a 196GB - of source code - [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3). - It is unclear whether the reported size is before or after de-duplication. - Nonetheless, one can estimate the dataset size by multiplying 196GB with - 20 = 3.92 TB. - sample: [] + prohibited_uses: "The prohibited uses of LaMDA weren't specifically listed, but\ + \ the Google AI principles inspired safety objectives in [[Appendix A.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.A.1)\ + \ advises avoiding harm, unjust impact and misinformation, among others.\n" + quality_control: "LaMDA was fine-tuned to predict sensibleness, specificity and\ + \ interestingness as well as safety. Then, the candidates were filtered out\ + \ if the model safety predictions were below a certain threshold. The next candidates\ + \ in the conversation were selected as a combination of these predictions. The\ + \ model was also fine-tuned for groundedness. The results are shown in [[Figure\ + \ 5]](https://arxiv.org/pdf/2201.08239.pdf#figure.caption.23).\n" + size: 137B parameters (dense) + training_emissions: + explanation: "\"...total carbon footprint of LaMDA’s pre-training of the largest\ + \ model is approximately 25.2 tCO2e. The carbon footprint of pre-training\ + \ of smaller models and fine-tuning of all models is approximately 0.7 tCO2e\ + \ ... which brings the total footprint of LaMDA to approximately 26 tCO2e\ + \ [[Section 10]](https://arxiv.org/pdf/2201.08239.pdf#section.10)\"\n" + value: 26 tCO2e + training_hardware: + explanation: "Reported in [[Section 10]](https://arxiv.org/pdf/2201.08239.pdf#section.10).\n" + value: 1024 TPU-V3 chips + training_time: + explanation: "The total number of training flops of LaMDA was reported as 3.55E+23\ + \ (3.55E+8 petaflops) [[Section 10]](https://arxiv.org/pdf/2201.08239.pdf#section.10),\ + \ which is equal to 4108.80 = 3.55E+8 / (60 * 60 * 24) petaflop/s-day.\n" + value: 4108.80 petaflop/s-day + type: model + url: https://arxiv.org/pdf/2201.08239.pdf +- access: closed analysis: unknown - # Construction + created_date: + explanation: "The date of the Google AI blog announcing the details of PaLM\ + \ [[Google AI Blog]](https://ai.googleblog.com/2022/04/pathways-language-model-palm-scaling-to.html).\n" + value: 2022-04-04 + datasheet: https://arxiv.org/pdf/2204.02311.pdf#appendix.D dependencies: [Infiniset] + description: "PaLM dataset \"was created for pre-training language models\" [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D).\n" + excluded: "GitHub repositories with copyleft licenses were excluded. Programming\ + \ languageges other than the most common 24 were excluded [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3).\n" + feedback: + explanation: "Author contact information is shared in the paper [[Paper]](https://arxiv.org/pdf/2204.02311.pdf).\n" + value: Contact the authors. + included: "The dataset is based on Infiniset. It included multilingual text containing\ + \ text from over 100 languages. The breakdown of the data included is as follows:\ + \ Social media conversations (multilingual) 50, Filtered webpages (multilingual)\ + \ 27%, BooksCorpus (English) 13%, GitHub (code) 5%, Wikipedia (multilingual)\ + \ 4%, and News (English) 1%. Code was collected from GitHub repositories with\ + \ appropriate licenses, totalling 96GB of source code [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3).\n" + intended_uses: + explanation: "As stated in [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D).\n" + value: "\"The dataset was created for pre-training language models by a team\ + \ of researchers at Google\".\n" license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - included: > - The dataset is based on Infiniset. It included multilingual text containing - text from over 100 languages. The breakdown of the data included is as - follows: Social media conversations (multilingual) 50, Filtered webpages - (multilingual) 27%, BooksCorpus (English) 13%, GitHub (code) 5%, Wikipedia - (multilingual) 4%, and News (English) 1%. - Code was collected from GitHub repositories with appropriate licenses, - totalling 96GB of source code - [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3). - excluded: > - GitHub repositories with copyleft licenses were excluded. Programming - languageges other than the most common 24 were excluded - [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3). - quality_control: > - In order to reduce low quality web pages, the web pages were sampled - according to a "quality score" classifier. - Code files were de-duplicated using Levenshtein distance - [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3). - # Downstream - access: closed - intended_uses: - value: > - "The dataset was created for pre-training language models by a team of - researchers at Google". - explanation: > - As stated in - [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D). - prohibited_uses: > - "... should not be used for any of the unacceptable language model use - cases, e.g., generation of toxic speech" - [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D). + modality: + explanation: text and code + value: code, text monitoring: unknown - feedback: - value: Contact the authors. - explanation: > - Author contact information is shared in the paper - [[Paper]](https://arxiv.org/pdf/2204.02311.pdf). - -- type: model - name: Flan-T5 + name: PaLM dataset organization: Google - description: Flan-T5 is a version of the T5 language model fine-tuned on instruction - data + prohibited_uses: "\"... should not be used for any of the unacceptable language\ + \ model use cases, e.g., generation of toxic speech\" [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D).\n" + quality_control: "In order to reduce low quality web pages, the web pages were\ + \ sampled according to a \"quality score\" classifier. Code files were de-duplicated\ + \ using Levenshtein distance [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3).\n" + sample: [] + size: + explanation: "Dataset size in GB is not reported, but the dataset is reported\ + \ to have 780 billion tokens [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D).\ + \ The code portion of the dataset is reported to be 5% totaling a 196GB of\ + \ source code [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3).\ + \ It is unclear whether the reported size is before or after de-duplication.\ + \ Nonetheless, one can estimate the dataset size by multiplying 196GB with\ + \ 20 = 3.92 TB.\n" + value: 3.92 TB + type: dataset + url: https://arxiv.org/pdf/2204.02311.pdf +- access: + explanation: Weights can be downloaded from [Github](https://github.com/google-research/t5x/blob/main/docs/models.md) + value: open + analysis: Evaluated on a variety of standard language datasets. created_date: - value: 2022-10-20 explanation: Date paper was released - url: https://arxiv.org/abs/2210.11416 - model_card: https://arxiv.org/pdf/2210.11416.pdf - modality: text (English) - analysis: Evaluated on a variety of standard language datasets. - size: 11B parameters (dense model) + value: 2022-10-20 dependencies: [T5, Muffin, P3, NaturalInstructions-v2, Flan CoT] - training_emissions: Unknown - training_time: Unknown - training_hardware: 512 v4 TPU Chips - quality_control: Across different multitask datasets, templates and formatting - were maintained. For the chain-of-thoughts (CoT) data, specific exemplars were - used. - access: - value: open - explanation: Weights can be downloaded from [Github](https://github.com/google-research/t5x/blob/main/docs/models.md) + description: Flan-T5 is a version of the T5 language model fine-tuned on instruction + data + feedback: https://huggingface.co/google/flan-t5-xxl/discussions + intended_uses: unknown license: - value: Apache 2.0 explanation: License on the [[Github repository]](https://github.com/google-research/google-research) - intended_uses: unknown - prohibited_uses: none + value: Apache 2.0 + modality: + explanation: text (English) + value: text; text + model_card: https://arxiv.org/pdf/2210.11416.pdf monitoring: none - feedback: https://huggingface.co/google/flan-t5-xxl/discussions - -- type: model - name: UL2 + name: Flan-T5 organization: Google - description: UL2 is a language model trained with a new pretraining objective + prohibited_uses: none + quality_control: Across different multitask datasets, templates and formatting + were maintained. For the chain-of-thoughts (CoT) data, specific exemplars were + used. + size: 11B parameters (dense) + training_emissions: Unknown + training_hardware: 512 v4 TPU Chips + training_time: Unknown + type: model + url: https://arxiv.org/abs/2210.11416 +- access: + explanation: Model weights available for download in the [[Github repo]](https://github.com/google-research/google-research/tree/master/ul2) + value: open + analysis: '' created_date: - value: 2022-05-10 explanation: Date mdoel paper was released - url: https://arxiv.org/abs/2205.05131 - model_card: '' - modality: text (English) - analysis: '' - size: 20B parameters (dense model) + value: 2022-05-10 dependencies: [C4] - training_emissions: '' - training_time: '' - training_hardware: 128 TPUv4 - quality_control: '' - access: - value: open - explanation: Model weights available for download in the [[Github repo]](https://github.com/google-research/google-research/tree/master/ul2) + description: UL2 is a language model trained with a new pretraining objective + feedback: '' + intended_uses: '' license: - value: Apache 2.0 explanation: 20B checkpoints only for three different iteration steps - intended_uses: '' - prohibited_uses: '' + value: Apache 2.0 + modality: + explanation: text (English) + value: text; text + model_card: '' monitoring: '' - feedback: '' - -- type: model - name: Parti + name: UL2 organization: Google - description: Parti is a text-to-image diffusion model + prohibited_uses: '' + quality_control: '' + size: 20B parameters (dense) + training_emissions: '' + training_hardware: 128 TPUv4 + training_time: '' + type: model + url: https://arxiv.org/abs/2205.05131 +- access: + explanation: Google does not provide access to Parti for external researchers. + value: closed + analysis: '' created_date: - value: 2022-06-22 explanation: Date the model website was made public - url: https://parti.research.google/ - model_card: '' - modality: Text (English) and Image - size: 20B parameters - analysis: '' + value: 2022-06-22 dependencies: [C4, LAION-400M, FIT400M, JFT-4B] - training_emissions: '' - training_time: unknown - training_hardware: unknown - quality_control: '' - access: - value: closed - explanation: Google does not provide access to Parti for external researchers. + description: Parti is a text-to-image diffusion model + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: Text (English) and Image + value: image, text; image, text + model_card: '' monitoring: '' - feedback: '' - -- type: model - name: Imagen + name: Parti organization: Google - description: Imagen is a text-to-image diffusion model + prohibited_uses: '' + quality_control: '' + size: 20B parameters (dense) + training_emissions: '' + training_hardware: unknown + training_time: unknown + type: model + url: https://parti.research.google/ +- access: open + analysis: '' created_date: - value: 2022-05-23 explanation: Date the model website was made public - url: https://imagen.research.google/ - model_card: '' - modality: text (English) and image - size: > - 14B parameters total. 2B parameters (U-Net model), 11B parameters (T5-XXL), - and 600M and 400M parameter models for super resolution - analysis: '' + value: 2022-05-23 dependencies: [LAION-400M, Google internal image-text dataset] - training_emissions: unknown - training_time: unknown - training_hardware: 128 TPU-v4 - quality_control: '' - access: open + description: Imagen is a text-to-image diffusion model + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: text (English) and image + value: image, text; image, text + model_card: '' monitoring: '' - feedback: '' - -- type: model - name: VATT + name: Imagen organization: Google - description: VATT is a family of models trained on multimodal data + prohibited_uses: '' + quality_control: '' + size: 14B parameters (dense) + training_emissions: unknown + training_hardware: 128 TPU-v4 + training_time: unknown + type: model + url: https://imagen.research.google/ +- access: + explanation: Model checkpoints can be downloaded from the [[Github repository]](https://github.com/google-research/google-research/tree/master/vatt) + value: open + analysis: '' created_date: - value: 2022-04-22 explanation: Date the model paper was made public - url: https://arxiv.org/abs/2104.11178 - model_card: '' - modality: Text (English), Video, and Audio - size: 155M - analysis: '' + value: 2022-04-22 dependencies: [AudioSet, HowTo100M] - training_emissions: unknown - training_time: 3 days - training_hardware: 256 TPU-v3 - quality_control: '' - access: - value: open - explanation: Model checkpoints can be downloaded from the [[Github repository]](https://github.com/google-research/google-research/tree/master/vatt) + description: VATT is a family of models trained on multimodal data + feedback: '' + intended_uses: '' license: - value: Apache 2.0 explanation: License on the [[Github repository]](https://github.com/google-research/google-research) - intended_uses: '' - prohibited_uses: '' + value: Apache 2.0 + modality: + explanation: Text (English), Video, and Audio + value: audio, text, video; audio, text, video + model_card: '' monitoring: '' - feedback: '' - -- type: model - name: PaLM - # General + name: VATT organization: Google - description: > - PaLM stands Pathways Language Model, "dense decoder-only Transformer model - trained with the Pathways system" - [[Google ai Blog]](https://ai.googleblog.com/2022/04/pathways-language-model-palm-scaling-to.html). + prohibited_uses: '' + quality_control: '' + size: 155M parameters (dense) + training_emissions: unknown + training_hardware: 256 TPU-v3 + training_time: 3 days + type: model + url: https://arxiv.org/abs/2104.11178 +- access: + explanation: Made available through the PaLM API as of March 14, 2023. + value: limited + analysis: "\"PaLM is evaluated on English Natural Language Processing (NLP) tasks,\ + \ tasks from BIG-bench, reasoning tasks, code completion tasks, multilingual\ + \ generation and question answering tasks, translation tasks, and bias and toxicity\ + \ benchmarks\" [[Model Card]](https://arxiv.org/pdf/2204.02311.pdf#appendix.E).\n" created_date: + explanation: "The date of the Google AI blog announcing the details of PaLM\ + \ [[Google AI Blog]](https://ai.googleblog.com/2022/04/pathways-language-model-palm-scaling-to.html).\n" value: 2022-04-04 - explanation: > - The date of the Google AI blog announcing the details of PaLM - [[Google AI Blog]](https://ai.googleblog.com/2022/04/pathways-language-model-palm-scaling-to.html). - url: https://arxiv.org/pdf/2204.02311.pdf - model_card: https://arxiv.org/pdf/2204.02311.pdf#appendix.E - modality: text and code - size: 540B parameters (dense model) - analysis: > - "PaLM is evaluated on English Natural Language Processing (NLP) tasks, tasks - from BIG-bench, reasoning tasks, code completion tasks, multilingual - generation and question answering tasks, translation tasks, and bias and - toxicity benchmarks" - [[Model Card]](https://arxiv.org/pdf/2204.02311.pdf#appendix.E). - # Construction dependencies: [PaLM dataset] - training_emissions: - value: 271.43 tCO2 - explanation: > - Reported in - [[Appendix B]](https://arxiv.org/pdf/2204.02311.pdf#appendix.A) - training_time: - value: 29600 petaflop/s-days - explanation: > - Reported in - [[Appendix B]](https://arxiv.org/pdf/2204.02311.pdf#appendix.A) - training_hardware: - value: 6144 TPU v4 chips - explanation: > - Reported in [[Section 4]](https://arxiv.org/pdf/2204.02311.pdf#section.4). - quality_control: Unknown - # Downstream - access: - value: limited - explanation: Made available through the PaLM API as of March 14, 2023. + description: "PaLM stands Pathways Language Model, \"dense decoder-only Transformer\ + \ model trained with the Pathways system\" [[Google ai Blog]](https://ai.googleblog.com/2022/04/pathways-language-model-palm-scaling-to.html).\n" + feedback: + explanation: "Author contact information is shared in the paper [[Paper]](https://arxiv.org/pdf/2204.02311.pdf).\n" + value: Contact the authors. + intended_uses: "\"The primary use is research on language models, including: research\ + \ on NLP applications like machine translation and question answering, advancing\ + \ fairness and safety research, and understanding limitations of current LLMs.\ + \ Within Google, PaLM is being used for research on a variety of open- ended\ + \ text and code generation tasks, including reasoning [[Section 6.3]](https://arxiv.org/pdf/2204.02311.pdf#subsection.6.3)\ + \ and code synthesis and understanding [[Section 6.4]](https://arxiv.org/pdf/2204.02311.pdf#subsection.6.4)\"\ + \ [[Model Card]](https://arxiv.org/pdf/2204.02311.pdf#appendix.E).\n" license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: > - "The primary use is research on language models, including: research - on NLP applications like machine translation and question answering, - advancing fairness and safety research, and understanding limitations of - current LLMs. - Within Google, PaLM is being used for research on a variety of open- - ended text and code generation tasks, including reasoning - [[Section 6.3]](https://arxiv.org/pdf/2204.02311.pdf#subsection.6.3) - and code synthesis and understanding - [[Section 6.4]](https://arxiv.org/pdf/2204.02311.pdf#subsection.6.4)" - [[Model Card]](https://arxiv.org/pdf/2204.02311.pdf#appendix.E). - prohibited_uses: > - The model "should not be used for downstream applications without further - analysis on factors in the proposed downstream application - [[Model Card]](https://arxiv.org/pdf/2204.02311.pdf#appendix.E)" + modality: + explanation: text and code + value: code, text; code, text + model_card: https://arxiv.org/pdf/2204.02311.pdf#appendix.E monitoring: unknown - feedback: - value: Contact the authors. - explanation: > - Author contact information is shared in the paper - [[Paper]](https://arxiv.org/pdf/2204.02311.pdf). - -- type: application - name: PaLM API + name: PaLM organization: Google - description: a new developer offering that makes it easy and safe to experiment - with Google’s language models. + prohibited_uses: "The model \"should not be used for downstream applications without\ + \ further analysis on factors in the proposed downstream application [[Model\ + \ Card]](https://arxiv.org/pdf/2204.02311.pdf#appendix.E)\"\n" + quality_control: Unknown + size: 540B parameters (dense) + training_emissions: + explanation: "Reported in [[Appendix B]](https://arxiv.org/pdf/2204.02311.pdf#appendix.A)\n" + value: 271.43 tCO2 + training_hardware: + explanation: "Reported in [[Section 4]](https://arxiv.org/pdf/2204.02311.pdf#section.4).\n" + value: 6144 TPU v4 chips + training_time: + explanation: "Reported in [[Appendix B]](https://arxiv.org/pdf/2204.02311.pdf#appendix.A)\n" + value: 29600 petaflop/s-days + type: model + url: https://arxiv.org/pdf/2204.02311.pdf +- access: limited + adaptation: '' created_date: 2023-03-14 - url: https://developers.googleblog.com/2023/03/announcing-palm-api-and-makersuite.html dependencies: [PaLM] - adaptation: '' - output_space: '' - quality_control: '' - access: limited - license: unknown - terms_of_service: '' + description: a new developer offering that makes it easy and safe to experiment + with Google’s language models. + failures: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: unknown monitoring: '' - feedback: '' monthly_active_users: '' - user_distribution: '' - failures: '' - -- type: model - name: Med-PaLM + name: PaLM API organization: Google - description: '' - created_date: 2022-12-26 - url: https://arxiv.org/abs/2212.13138 - model_card: '' - modality: text + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: '' + type: application + url: https://developers.googleblog.com/2023/03/announcing-palm-api-and-makersuite.html + user_distribution: '' +- access: closed analysis: '' - size: 540B + created_date: 2022-12-26 dependencies: [Flan-PaLM, MultiMedQA] - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: closed + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: text + value: text; text + model_card: '' monitoring: '' - feedback: '' - -- type: model - name: Med-PaLM Multimodal + name: Med-PaLM organization: Google - description: '' - created_date: 2023-07-26 - url: https://arxiv.org/pdf/2307.14334.pdf - model_card: '' - modality: text, image, and genomics input with text output + prohibited_uses: '' + quality_control: '' + size: 540B parameters (dense) + training_emissions: '' + training_hardware: '' + training_time: '' + type: model + url: https://arxiv.org/abs/2212.13138 +- access: closed analysis: Evaluated on MultiMedBench tasks and radiologist evaluations of model-generated chest X-ray reports - size: 562B parameters (dense) + created_date: 2023-07-26 dependencies: [PaLM-E, MultiMedBench] - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: closed + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: text, image, and genomics input with text output + value: image, text; text + model_card: '' monitoring: '' - feedback: '' - -- type: model - name: MultiMedQA + name: Med-PaLM Multimodal organization: Google - description: '' - created_date: 2022-12-26 - url: https://arxiv.org/abs/2212.13138 - model_card: '' - modality: text + prohibited_uses: '' + quality_control: '' + size: 562B parameters (dense) + training_emissions: '' + training_hardware: '' + training_time: '' + type: model + url: https://arxiv.org/pdf/2307.14334.pdf +- access: closed analysis: '' - size: unknown + created_date: 2022-12-26 dependencies: - MedQA - MedMCQA @@ -814,149 +708,149 @@ - LiveQA - Medication QA - HealthSearchQA - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: closed + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: text + value: text; text + model_card: '' monitoring: '' - feedback: '' - -- type: model - name: Flan-PaLM + name: MultiMedQA organization: Google - description: '' - created_date: 2022-10-20 - url: https://arxiv.org/abs/2210.11416 - model_card: '' - modality: text - analysis: '' - size: 540B - dependencies: [PaLM, Muffin, P3, NaturalInstructions-v2] + prohibited_uses: '' + quality_control: '' + size: unknown training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: closed + training_time: '' + type: model + url: https://arxiv.org/abs/2212.13138 +- access: closed + analysis: '' + created_date: 2022-10-20 + dependencies: [PaLM, Muffin, P3, NaturalInstructions-v2] + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: text + value: text; text + model_card: '' monitoring: '' - feedback: '' - -- type: model - name: Flan-U-PaLM + name: Flan-PaLM organization: Google - description: '' - created_date: 2022-10-20 + prohibited_uses: '' + quality_control: '' + size: 540B parameters (dense) + training_emissions: '' + training_hardware: '' + training_time: '' + type: model url: https://arxiv.org/abs/2210.11416 - model_card: '' - modality: text +- access: closed analysis: '' - size: 540B + created_date: 2022-10-20 dependencies: [U-PaLM, Muffin, P3, NaturalInstructions-v2] - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: closed + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: text + value: text; text + model_card: '' monitoring: '' - feedback: '' - -- type: dataset - name: Muffin + name: Flan-U-PaLM organization: Google - description: '' + prohibited_uses: '' + quality_control: '' + size: 540B parameters (dense) + training_emissions: '' + training_hardware: '' + training_time: '' + type: model + url: https://arxiv.org/abs/2210.11416 +- access: open + analysis: '' created_date: 2021-09-03 - url: https://arxiv.org/abs/2109.01652 datasheet: '' - modality: text - size: 62 tasks - sample: [] - analysis: '' dependencies: [] - included: '' + description: '' excluded: '' - quality_control: '' - access: open - license: Apache 2.0 + feedback: '' + included: '' intended_uses: '' - prohibited_uses: '' + license: Apache 2.0 + modality: text monitoring: '' - feedback: '' - -- type: model - name: U-PaLM + name: Muffin organization: Google - description: '' - created_date: 2022-10-20 - url: https://arxiv.org/abs/2210.11399 - model_card: '' - modality: text + prohibited_uses: '' + quality_control: '' + sample: [] + size: 62 tasks + type: dataset + url: https://arxiv.org/abs/2109.01652 +- access: closed analysis: '' - size: 540B + created_date: 2022-10-20 dependencies: [PaLM, PaLM dataset] - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: closed + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: text + value: text; text + model_card: '' monitoring: '' - feedback: '' - -- type: model - name: PaLM-SayCan + name: U-PaLM organization: Google - description: '' - created_date: 2022-08-16 - url: https://arxiv.org/abs/2204.01691 - model_card: '' - modality: text, robotic control - analysis: '' - size: 540B - dependencies: [PaLM] + prohibited_uses: '' + quality_control: '' + size: 540B parameters (dense) training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: closed - license: unknown (model weights), Apache 2.0 (SayCan code) + training_time: '' + type: model + url: https://arxiv.org/abs/2210.11399 +- access: closed + analysis: '' + created_date: 2022-08-16 + dependencies: [PaLM] + description: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: unknown (model weights), Apache 2.0 (SayCan code) + modality: + explanation: text, robotic control + value: text; text + model_card: '' monitoring: '' - feedback: '' - -- type: model - name: GLaM + name: PaLM-SayCan organization: Google - description: '' - created_date: 2021-12-13 - url: https://arxiv.org/abs/2112.06905 - model_card: '' - modality: text + prohibited_uses: '' + quality_control: '' + size: 540B parameters (dense) + training_emissions: '' + training_hardware: '' + training_time: '' + type: model + url: https://arxiv.org/abs/2204.01691 +- access: closed analysis: '' - size: 1.2T parameters (sparse) + created_date: 2021-12-13 dependencies: - GLaM Web dataset - Wikipedia @@ -964,750 +858,740 @@ - GLaM Forums dataset - BooksCorpus - GLaM News dataset - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: closed + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: text + value: text; text + model_card: '' monitoring: '' - feedback: '' - -- type: dataset - name: GLaM Web dataset + name: GLaM organization: Google - description: '' - created_date: 2021-12-13 + prohibited_uses: '' + quality_control: '' + size: 1.2T parameters (sparse) + training_emissions: '' + training_hardware: '' + training_time: '' + type: model url: https://arxiv.org/abs/2112.06905 +- access: closed + analysis: '' + created_date: 2021-12-13 datasheet: '' + dependencies: [] + description: '' + excluded: '' + feedback: '' + included: '' + intended_uses: '' + license: + explanation: "The asset isn't released, and hence the license is unknown.\n" + value: unknown modality: text - size: unknown + monitoring: '' + name: GLaM Web dataset + organization: Google + prohibited_uses: '' + quality_control: '' sample: [] + size: unknown + type: dataset + url: https://arxiv.org/abs/2112.06905 +- access: closed analysis: '' + created_date: 2021-12-13 + datasheet: '' dependencies: [] - included: '' + description: '' excluded: '' - quality_control: '' - access: closed + feedback: '' + included: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: text monitoring: '' - feedback: '' - -- type: dataset name: GLaM Conversations dataset organization: Google - description: '' - created_date: 2021-12-13 - url: https://arxiv.org/abs/2112.06905 - datasheet: '' - modality: text - size: unknown + prohibited_uses: '' + quality_control: '' sample: [] + size: unknown + type: dataset + url: https://arxiv.org/abs/2112.06905 +- access: closed analysis: '' + created_date: 2021-12-13 + datasheet: '' dependencies: [] - included: '' + description: '' excluded: '' - quality_control: '' - access: closed + feedback: '' + included: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: text monitoring: '' - feedback: '' - -- type: dataset name: GLaM Forums dataset organization: Google - description: '' - created_date: 2021-12-13 - url: https://arxiv.org/abs/2112.06905 - datasheet: '' - modality: text - size: unknown + prohibited_uses: '' + quality_control: '' sample: [] + size: unknown + type: dataset + url: https://arxiv.org/abs/2112.06905 +- access: closed analysis: '' + created_date: 2021-12-13 + datasheet: '' dependencies: [] - included: '' + description: '' excluded: '' - quality_control: '' - access: closed + feedback: '' + included: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: text monitoring: '' - feedback: '' - -- type: dataset name: GLaM News dataset organization: Google - description: '' - created_date: 2021-12-13 - url: https://arxiv.org/abs/2112.06905 - datasheet: '' - modality: text - size: unknown + prohibited_uses: '' + quality_control: '' sample: [] + size: unknown + type: dataset + url: https://arxiv.org/abs/2112.06905 +- access: closed analysis: '' - dependencies: [] - included: '' - excluded: '' - quality_control: '' - access: closed + created_date: 2021-05-18 + dependencies: [MUM dataset] + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: text, image + value: image, text; image, text + model_card: '' monitoring: '' - feedback: '' - -- type: model name: MUM organization: Google - description: '' - created_date: 2021-05-18 - url: https://blog.google/products/search/introducing-mum/ - model_card: '' - modality: text, image - analysis: '' + prohibited_uses: '' + quality_control: '' size: unknown - dependencies: [MUM dataset] training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: closed - license: - value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' - -- type: dataset - name: MUM dataset - organization: Google - description: '' - created_date: 2021-05-18 + training_time: '' + type: model url: https://blog.google/products/search/introducing-mum/ - datasheet: '' - modality: text, image - size: unknown - sample: [] +- access: closed analysis: '' + created_date: 2021-05-18 + datasheet: '' dependencies: [] - included: '' + description: '' excluded: '' - quality_control: '' - access: closed + feedback: '' + included: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: text, image + value: image, text monitoring: '' - feedback: '' - -- type: model - name: Phenaki + name: MUM dataset organization: Google - description: '' - created_date: 2023-02-01 - url: https://openreview.net/pdf?id=vOEXS39nOF - model_card: '' - modality: text, video + prohibited_uses: '' + quality_control: '' + sample: [] + size: unknown + type: dataset + url: https://blog.google/products/search/introducing-mum/ +- access: closed analysis: '' - size: 1.8B parameters (dense model) + created_date: 2023-02-01 dependencies: [LAION-400M, Phenaki Video-Text Corpus] - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: closed + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: text, video + value: text, video; text, video + model_card: '' monitoring: '' - feedback: '' - -- type: dataset - name: Phenaki Video-Text Corpus + name: Phenaki organization: Google - description: '' - created_date: 2023-02-01 + prohibited_uses: '' + quality_control: '' + size: 1.8B parameters (dense) + training_emissions: '' + training_hardware: '' + training_time: '' + type: model url: https://openreview.net/pdf?id=vOEXS39nOF - datasheet: '' - modality: text, video - size: 15M text-video pairs at 8FPS - sample: [] +- access: closed analysis: '' + created_date: 2023-02-01 + datasheet: '' dependencies: [] - included: '' + description: '' excluded: '' - quality_control: '' - access: closed + feedback: '' + included: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: text, video monitoring: '' - feedback: '' - -- type: model - name: Flan-UL2 + name: Phenaki Video-Text Corpus organization: Google - description: '' - created_date: 2023-03-02 - url: https://arxiv.org/abs/2205.05131 - model_card: '' - modality: text + prohibited_uses: '' + quality_control: '' + sample: [] + size: 15M text-video pairs at 8FPS + type: dataset + url: https://openreview.net/pdf?id=vOEXS39nOF +- access: open analysis: '' - size: 20B parameters (dense model) + created_date: 2023-03-02 dependencies: [UL2, Flan Collection] - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: open - license: Apache 2.0 + description: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: Apache 2.0 + modality: + explanation: text + value: text; text + model_card: '' monitoring: '' - feedback: '' - -- type: dataset - name: Flan Collection + name: Flan-UL2 organization: Google - description: '' + prohibited_uses: '' + quality_control: '' + size: 20B parameters (dense) + training_emissions: '' + training_hardware: '' + training_time: '' + type: model + url: https://arxiv.org/abs/2205.05131 +- access: open + analysis: '' created_date: 2023-01-31 - url: https://arxiv.org/abs/2301.13688 datasheet: '' - modality: text - size: 1836 tasks - sample: [] - analysis: '' dependencies: [Flan dataset, P3, NaturalInstructions-v2] - included: '' + description: '' excluded: '' - quality_control: '' - access: open - license: Apache 2.0 + feedback: '' + included: '' intended_uses: '' - prohibited_uses: '' + license: Apache 2.0 + modality: text monitoring: '' - feedback: '' - -- type: model - name: MusicLM + name: Flan Collection organization: Google - description: '' - created_date: 2023-01-26 - url: https://arxiv.org/pdf/2301.11325.pdf - model_card: '' - modality: audio (music), text + prohibited_uses: '' + quality_control: '' + sample: [] + size: 1836 tasks + type: dataset + url: https://arxiv.org/abs/2301.13688 +- access: closed analysis: '' - size: 1.4B parameters (430M semantic stage, 430M acoustic stage, 600M w2v-BERT) + created_date: 2023-01-26 dependencies: - SoundStream - w2v-BERT - MuLan - MusicLM semantic model - MusicLM acoustic model + description: '' + feedback: '' + intended_uses: '' + license: + explanation: "The asset isn't released, and hence the license is unknown.\n" + value: unknown + modality: + explanation: audio (music), text + value: audio, text; audio, text + model_card: '' + monitoring: '' + name: MusicLM + organization: Google + prohibited_uses: '' + quality_control: '' + size: 1.4B parameters (dense) training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: closed + training_time: '' + type: model + url: https://arxiv.org/pdf/2301.11325.pdf +- access: closed + analysis: '' + created_date: 2023-01-26 + dependencies: [Free Music Archive] + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: audio (music) + value: audio; audio + model_card: '' monitoring: '' - feedback: '' - -- type: model name: SoundStream organization: Google - description: '' - created_date: 2023-01-26 - url: https://arxiv.org/pdf/2301.11325.pdf - model_card: '' - modality: audio (music) - analysis: '' + prohibited_uses: '' + quality_control: '' size: unknown - dependencies: [Free Music Archive] training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: closed + training_time: '' + type: model + url: https://arxiv.org/pdf/2301.11325.pdf +- access: closed + analysis: '' + created_date: 2023-01-26 + dependencies: [Free Music Archive] + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: audio (music) + value: audio; audio + model_card: '' monitoring: '' - feedback: '' - -- type: model name: w2v-BERT organization: Google - description: '' - created_date: 2023-01-26 - url: https://arxiv.org/pdf/2301.11325.pdf - model_card: '' - modality: audio (music) - analysis: '' - size: 600M parameter (dense model) - dependencies: [Free Music Archive] + prohibited_uses: '' + quality_control: '' + size: 600M parameters (dense) training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: closed + training_time: '' + type: model + url: https://arxiv.org/pdf/2301.11325.pdf +- access: closed + analysis: '' + created_date: 2022-08-26 + dependencies: [AST, BERT, MuLan dataset] + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: audio (music), text + value: audio, text; audio, text + model_card: '' monitoring: '' - feedback: '' - -- type: model name: MuLan organization: Google - description: '' - created_date: 2022-08-26 - url: https://arxiv.org/abs/2208.12415 - model_card: '' - modality: audio (music), text - analysis: '' + prohibited_uses: '' + quality_control: '' size: Unknown - dependencies: [AST, BERT, MuLan dataset] training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: closed + training_time: '' + type: model + url: https://arxiv.org/abs/2208.12415 +- access: closed + analysis: '' + created_date: 2022-08-26 + datasheet: '' + dependencies: [] + description: '' + excluded: '' + feedback: '' + included: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: audio (music), text + value: audio, text monitoring: '' - feedback: '' - -- type: dataset name: MuLan dataset organization: Google - description: '' - created_date: 2022-08-26 - url: https://arxiv.org/abs/2208.12415 - datasheet: '' - modality: audio (music), text - size: 370K hours audio + prohibited_uses: '' + quality_control: '' sample: [] + size: 370K hours audio + type: dataset + url: https://arxiv.org/abs/2208.12415 +- access: closed analysis: '' + created_date: 2023-01-26 + datasheet: '' dependencies: [] - included: '' + description: '' excluded: '' - quality_control: '' - access: closed + feedback: '' + included: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: audio (music) + value: audio monitoring: '' - feedback: '' - -- type: dataset name: MusicLM dataset organization: Google - description: '' - created_date: 2023-01-26 - url: https://arxiv.org/pdf/2301.11325.pdf - datasheet: '' - modality: audio (music) - size: 280K hours audio + prohibited_uses: '' + quality_control: '' sample: [] + size: 280K hours audio + type: dataset + url: https://arxiv.org/pdf/2301.11325.pdf +- access: closed analysis: '' - dependencies: [] - included: '' - excluded: '' - quality_control: '' - access: closed + created_date: 2023-01-26 + dependencies: [MusicLM dataset] + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: Audio (music) + value: audio; audio + model_card: '' monitoring: '' - feedback: '' - -- type: model name: MusicLM semantic model organization: Google - description: '' - created_date: 2023-01-26 + prohibited_uses: '' + quality_control: '' + size: 430M parameters (dense) + training_emissions: '' + training_hardware: '' + training_time: '' + type: model url: https://arxiv.org/pdf/2301.11325.pdf - model_card: '' - modality: Audio (music) +- access: closed analysis: '' - size: 430M parameters (dense model) + created_date: 2023-01-26 dependencies: [MusicLM dataset] - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: closed + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: Audio (music) + value: audio; audio + model_card: '' monitoring: '' - feedback: '' - -- type: model name: MusicLM acoustic model organization: Google - description: '' - created_date: 2023-01-26 - url: https://arxiv.org/pdf/2301.11325.pdf - model_card: '' - modality: Audio (music) - analysis: '' - size: 430M parameters (dense model) - dependencies: [MusicLM dataset] + prohibited_uses: '' + quality_control: '' + size: 430M parameters (dense) training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: closed + training_time: '' + type: model + url: https://arxiv.org/pdf/2301.11325.pdf +- access: closed + analysis: '' + created_date: 2023-02-08 + dependencies: [Noise2Music pseudolabel dataset] + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: audio (music), text + value: audio, text; audio, text + model_card: '' monitoring: '' - feedback: '' - -- type: model name: Noise2Music organization: Google - description: '' - created_date: 2023-02-08 - url: https://google-research.github.io/noise2music/noise2music.pdf - model_card: '' - modality: audio (music), text - analysis: '' + prohibited_uses: '' + quality_control: '' size: unknown - dependencies: [Noise2Music pseudolabel dataset] training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: closed + training_time: '' + type: model + url: https://google-research.github.io/noise2music/noise2music.pdf +- access: closed + analysis: '' + created_date: 2023-02-08 + datasheet: '' + dependencies: [LaMDA] + description: '' + excluded: '' + feedback: '' + included: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: text monitoring: '' - feedback: '' - -- type: dataset name: LaMDA-LF organization: Google - description: '' - created_date: 2023-02-08 - url: https://google-research.github.io/noise2music/noise2music.pdf - datasheet: '' - modality: text - size: 150k songs + prohibited_uses: '' + quality_control: '' sample: [] + size: 150k songs + type: dataset + url: https://google-research.github.io/noise2music/noise2music.pdf +- access: closed analysis: '' - dependencies: [LaMDA] - included: '' + created_date: 2023-02-08 + datasheet: '' + dependencies: [MusicCaps] + description: '' excluded: '' - quality_control: '' - access: closed + feedback: '' + included: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: text monitoring: '' - feedback: '' - -- type: dataset name: Rater-LF organization: Google - description: '' - created_date: 2023-02-08 - url: https://google-research.github.io/noise2music/noise2music.pdf - datasheet: '' - modality: text - size: 10k captions + prohibited_uses: '' + quality_control: '' sample: [] + size: 10k captions + type: dataset + url: https://google-research.github.io/noise2music/noise2music.pdf +- access: closed analysis: '' + created_date: 2023-02-08 + datasheet: '' dependencies: [MusicCaps] - included: '' + description: '' excluded: '' - quality_control: '' - access: closed + feedback: '' + included: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: Text + value: text monitoring: '' - feedback: '' - -- type: dataset name: Rater-SF organization: Google - description: '' - created_date: 2023-02-08 - url: https://google-research.github.io/noise2music/noise2music.pdf - datasheet: '' - modality: Text - size: 24k captions + prohibited_uses: '' + quality_control: '' sample: [] + size: 24k captions + type: dataset + url: https://google-research.github.io/noise2music/noise2music.pdf +- access: closed analysis: '' - dependencies: [MusicCaps] - included: '' - excluded: '' - quality_control: '' - access: closed + created_date: 2023-02-08 + dependencies: [MuLan, MuLaMCap, LaMDA-LF, Rater-LF, Rater-SF] + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: audio (music), text + value: audio, text; audio, text + model_card: '' monitoring: '' - feedback: '' - -- type: model name: Noise2Music pseudolabeler organization: Google - description: '' - created_date: 2023-02-08 - url: https://google-research.github.io/noise2music/noise2music.pdf - model_card: '' - modality: audio (music), text - analysis: '' + prohibited_uses: '' + quality_control: '' size: unknown - dependencies: [MuLan, MuLaMCap, LaMDA-LF, Rater-LF, Rater-SF] training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: closed + training_time: '' + type: model + url: https://google-research.github.io/noise2music/noise2music.pdf +- access: closed + analysis: '' + created_date: 2023-02-08 + datasheet: '' + dependencies: [] + description: '' + excluded: '' + feedback: '' + included: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: audio (music) + value: audio monitoring: '' - feedback: '' - -- type: dataset name: Noise2Music audio dataset organization: Google - description: '' - created_date: 2023-02-08 - url: https://google-research.github.io/noise2music/noise2music.pdf - datasheet: '' - modality: audio (music) - size: 340k hours audio + prohibited_uses: '' + quality_control: '' sample: [] + size: 340k hours audio + type: dataset + url: https://google-research.github.io/noise2music/noise2music.pdf +- access: closed analysis: '' - dependencies: [] - included: '' + created_date: 2023-02-08 + datasheet: '' + dependencies: [Noise2Music audio dataset, Noise2Music pseudolabeler] + description: '' excluded: '' - quality_control: '' - access: closed + feedback: '' + included: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: audio (music), text + value: audio, text monitoring: '' - feedback: '' - -- type: dataset name: Noise2Music pseudolabel dataset organization: Google - description: '' - created_date: 2023-02-08 - url: https://google-research.github.io/noise2music/noise2music.pdf - datasheet: '' - modality: audio (music), text - size: 340k hours audio with pseudolabels - sample: [] - analysis: '' - dependencies: [Noise2Music audio dataset, Noise2Music pseudolabeler] - included: '' - excluded: '' + prohibited_uses: '' quality_control: '' - access: closed + sample: [] + size: 340k hours audio with pseudolabels + type: dataset + url: https://google-research.github.io/noise2music/noise2music.pdf +- access: limited + adaptation: '' + created_date: 2022-08-25 + dependencies: [LaMDA] + description: AI Test Kitchen provides a new way for people to learn about, experience, + and give feedback on emerging AI technology, like LaMDA. + failures: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' monitoring: '' - feedback: '' - -- type: application + monthly_active_users: '' name: AI Test Kitchen organization: Google - description: AI Test Kitchen provides a new way for people to learn about, experience, - and give feedback on emerging AI technology, like LaMDA. - created_date: 2022-08-25 - url: https://blog.google/technology/ai/join-us-in-the-ai-test-kitchen/ - dependencies: [LaMDA] - adaptation: '' output_space: '' + prohibited_uses: '' quality_control: '' - access: limited - license: - value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. terms_of_service: '' + type: application + url: https://blog.google/technology/ai/join-us-in-the-ai-test-kitchen/ + user_distribution: '' +- access: closed + adaptation: '' + created_date: 2023-02-06 + dependencies: [LaMDA] + description: Conversational AI service, powered by LaMDA + failures: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: + explanation: "The asset isn't released, and hence the license is unknown.\n" + value: unknown monitoring: '' - feedback: '' monthly_active_users: '' - user_distribution: '' - failures: '' - -- type: application name: Bard organization: Google - description: Conversational AI service, powered by LaMDA - created_date: 2023-02-06 - url: https://blog.google/technology/ai/bard-google-ai-search-updates/ - dependencies: [LaMDA] - adaptation: '' output_space: '' + prohibited_uses: '' quality_control: '' - access: closed - license: - value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. terms_of_service: '' - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' - monthly_active_users: '' + type: application + url: https://blog.google/technology/ai/bard-google-ai-search-updates/ user_distribution: '' - failures: '' - -- type: model - name: Minerva - organization: Google - description: '' - created_date: 2022-06-29 - url: https://arxiv.org/abs/2206.14858 - model_card: '' - modality: text +- access: closed analysis: '' - size: 540B parameters (dense model) + created_date: 2022-06-29 dependencies: - PaLM - arXiv - PaLM dataset - Minerva Math Web Pages dataset - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: closed + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: text + value: text; text + model_card: '' monitoring: '' - feedback: '' - -- type: dataset - name: Minerva Math Web Pages dataset + name: Minerva organization: Google - description: '' - created_date: 2022-06-29 + prohibited_uses: '' + quality_control: '' + size: 540B parameters (dense) + training_emissions: '' + training_hardware: '' + training_time: '' + type: model url: https://arxiv.org/abs/2206.14858 - datasheet: '' - modality: Text - size: 17.5B tokens - sample: [] +- access: closed analysis: '' + created_date: 2022-06-29 + datasheet: '' dependencies: [] - included: '' + description: '' excluded: '' - quality_control: '' - access: closed + feedback: '' + included: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: Text + value: text monitoring: '' - feedback: '' - -- type: model - name: USM + name: Minerva Math Web Pages dataset organization: Google + prohibited_uses: '' + quality_control: '' + sample: [] + size: 17.5B tokens + type: dataset + url: https://arxiv.org/abs/2206.14858 +- access: limited + analysis: '' + created_date: 2023-03-06 + dependencies: [YT-NLU-U, Pub-U, Web-NTL, YT-SUP+, Pub-S] description: Universal Speech Model (USM) is a family of state-of-the-art speech models with 2B parameters trained on 12 million hours of speech and 28 billion sentences of text, spanning 300+ languages. USM, which is for use in YouTube @@ -1717,269 +1601,271 @@ Soga, Maninka, Xhosa, Akan, Lingala, Chichewa, Nkore, Nzema to name a few. Some of these languages are spoken by fewer than twenty million people, making it very hard to find the necessary training data. - created_date: 2023-03-06 - url: https://arxiv.org/abs/2303.01037 - model_card: '' - modality: speech, text (300+ languages) - analysis: '' - size: 2B parameters (dense model) - dependencies: [YT-NLU-U, Pub-U, Web-NTL, YT-SUP+, Pub-S] - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: limited + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: speech, text (300+ languages) + value: audio, text; audio, text + model_card: '' monitoring: '' - feedback: '' - -- type: application - name: YouTube + name: USM organization: Google - description: YouTube is a global online video sharing and social media platform + prohibited_uses: '' + quality_control: '' + size: 2B parameters (dense) + training_emissions: '' + training_hardware: '' + training_time: '' + type: model + url: https://arxiv.org/abs/2303.01037 +- access: open + adaptation: '' created_date: 2005-02-14 - url: https://www.youtube.com/ dependencies: [USM] - adaptation: '' + description: YouTube is a global online video sharing and social media platform + failures: '' + feedback: '' + intended_uses: '' + license: '' + monitoring: '' + monthly_active_users: '' + name: YouTube + organization: Google output_space: '' + prohibited_uses: '' quality_control: '' - access: open - license: '' terms_of_service: '' + type: application + url: https://www.youtube.com/ + user_distribution: '' +- access: closed + analysis: '' + created_date: 2023-03-06 + dependencies: [PaLM, ViT-22B] + description: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: + explanation: "The asset isn't released, and hence the license is unknown.\n" + value: unknown + modality: + explanation: text, image + value: image, text; image, text + model_card: '' monitoring: '' - feedback: '' - monthly_active_users: '' - user_distribution: '' - failures: '' - -- type: model name: PaLM-E organization: Google - description: '' - created_date: 2023-03-06 - url: https://arxiv.org/abs/2303.03378 - model_card: '' - modality: text, image - analysis: '' - size: 562B parameters (dense model) - dependencies: [PaLM, ViT-22B] + prohibited_uses: '' + quality_control: '' + size: 562B parameters (dense) training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: closed + training_time: '' + type: model + url: https://arxiv.org/abs/2303.03378 +- access: closed + analysis: '' + created_date: 2023-02-10 + dependencies: [JFT] + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: image + value: image; image + model_card: '' monitoring: '' - feedback: '' - -- type: model name: ViT-22B organization: Google - description: '' - created_date: 2023-02-10 - url: https://arxiv.org/abs/2302.05442 - model_card: '' - modality: image - analysis: '' - size: 22B parameters (dense model) - dependencies: [JFT] + prohibited_uses: '' + quality_control: '' + size: 22B parameters (dense) training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: closed + training_time: '' + type: model + url: https://arxiv.org/abs/2302.05442 +- access: closed + analysis: '' + created_date: 2022-09-07 + dependencies: [w2v-BERT, SoundStream] + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: audio, text + value: audio, text; audio, text + model_card: '' monitoring: '' - feedback: '' - -- type: model name: AudioLM organization: Google - description: '' - created_date: 2022-09-07 - url: https://arxiv.org/abs/2209.03143 - model_card: '' - modality: audio, text - analysis: '' - size: 1B parameters (dense model) - dependencies: [w2v-BERT, SoundStream] + prohibited_uses: '' + quality_control: '' + size: 1B parameters (dense) training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: closed + training_time: '' + type: model + url: https://arxiv.org/abs/2209.03143 +- access: closed + analysis: '' + created_date: 2022-09-14 + dependencies: [mT5, ViT-e, WebLI] + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: text, image + value: image, text; image, text + model_card: '' monitoring: '' - feedback: '' - -- type: model name: PaLI organization: Google - description: '' - created_date: 2022-09-14 - url: https://arxiv.org/abs/2209.06794 - model_card: '' - modality: text, image - analysis: '' - size: 17B parameters (13B text, 4B images) - dependencies: [mT5, ViT-e, WebLI] + prohibited_uses: '' + quality_control: '' + size: 17B parameters (dense) training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: closed + training_time: '' + type: model + url: https://arxiv.org/abs/2209.06794 +- access: closed + analysis: '' + created_date: 2022-09-14 + dependencies: [JFT] + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: image + value: image; image + model_card: '' monitoring: '' - feedback: '' - -- type: model name: ViT-e organization: Google - description: '' - created_date: 2022-09-14 - url: https://arxiv.org/abs/2209.06794 - model_card: '' - modality: image - analysis: '' - size: 3.9B parameters (dense model) - dependencies: [JFT] + prohibited_uses: '' + quality_control: '' + size: 3.9B parameters (dense) training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: closed + training_time: '' + type: model + url: https://arxiv.org/abs/2209.06794 +- access: closed + analysis: '' + created_date: 2022-09-14 + datasheet: '' + dependencies: [] + description: '' + excluded: '' + feedback: '' + included: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: text, image + value: image, text monitoring: '' - feedback: '' - -- type: dataset name: WebLI organization: Google - description: '' - created_date: 2022-09-14 - url: https://arxiv.org/abs/2209.06794 - datasheet: '' - modality: text, image - size: 10B images, 12B alt-text + prohibited_uses: '' + quality_control: '' sample: [] + size: 10B images, 12B alt-text + type: dataset + url: https://arxiv.org/abs/2209.06794 +- access: open analysis: '' - dependencies: [] - included: '' - excluded: '' - quality_control: '' - access: closed - license: - value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. + created_date: 2023-02-27 + dependencies: [T5, CLIP, YT-Temporal-1B] + description: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: Apache 2.0 + modality: + explanation: text, video + value: text, video; text, video + model_card: '' monitoring: '' - feedback: '' - -- type: model name: Vid2Seq organization: Google - description: '' - created_date: 2023-02-27 - url: https://arxiv.org/abs/2302.14115 - model_card: '' - modality: text, video - analysis: '' - size: 500M parameters (200M text, 300M video) - dependencies: [T5, CLIP, YT-Temporal-1B] + prohibited_uses: '' + quality_control: '' + size: 500M parameters (dense) training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: open - license: Apache 2.0 + training_time: '' + type: model + url: https://arxiv.org/abs/2302.14115 +- access: open + analysis: evaluated on DSTC11 Challenge Task, based on MultiWoz 2.1, with a focus + on dialog state tracking. + created_date: 2023-06-08 + dependencies: [CTC blank-filtering, Speech2Text adapter] + description: Joint speech and language model using a Speech2Text adapter and using + a CTC-based blank-filtering. + feedback: '' intended_uses: '' - prohibited_uses: '' + license: Google + modality: + explanation: speech input with textual output + value: audio; text + model_card: '' monitoring: '' - feedback: '' - -- type: model name: Google Joint SLM organization: Google - description: Joint speech and language model using a Speech2Text adapter and using - a CTC-based blank-filtering. - created_date: 2023-06-08 - url: https://arxiv.org/pdf/2306.07944.pdf - model_card: '' - modality: speech input with textual output - analysis: evaluated on DSTC11 Challenge Task, based on MultiWoz 2.1, with a focus - on dialog state tracking. + prohibited_uses: '' + quality_control: '' size: '' - dependencies: [CTC blank-filtering, Speech2Text adapter] training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: open - license: Google - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' - -- type: model - name: PaLM 2 - organization: Google + training_time: '' + type: model + url: https://arxiv.org/pdf/2306.07944.pdf +- access: open + analysis: Reports results on standard code benchmarks across a variety of programming + languages. + created_date: 2023-05-10 + dependencies: [palm 2 dataset] description: PaLM2 is a new state-of-the-art language model that has better multilingual and reasoning capabilities and is more compute-efficient than its predecessor PaLM. PaLM 2 is a Transformer-based model trained using a mixture of objectives similar to UL2. - created_date: 2023-05-10 - url: https://blog.google/technology/ai/google-palm-2-ai-large-language-model/ - model_card: https://ai.google/static/documents/palm2techreport.pdf - modality: text (input and output) - analysis: Reports results on standard code benchmarks across a variety of programming - languages. - size: 3.6T parameters - dependencies: [palm 2 dataset] - training_emissions: '' - training_time: '' - training_hardware: TPU v4 (number unspecified) - quality_control: Employed de-duplication, removal of sensitive-PII and filtering. - Added control tokens marking toxicity of text. - access: open - license: Google + feedback: Specific queries provided by annotators intended_uses: general use large language model that can be used for language, reasoning, and code tasks. + license: Google + modality: + explanation: text (input and output) + value: text; text + model_card: https://ai.google/static/documents/palm2techreport.pdf + monitoring: Google internal monitoring + name: PaLM 2 + organization: Google prohibited_uses: becoming part of a general-purpose service or product or use within specific downstream applications without prior assessment - monitoring: Google internal monitoring - feedback: Specific queries provided by annotators + quality_control: Employed de-duplication, removal of sensitive-PII and filtering. + Added control tokens marking toxicity of text. + size: 3.6T parameters (dense) + training_emissions: '' + training_hardware: TPU v4 (number unspecified) + training_time: '' + type: model + url: https://blog.google/technology/ai/google-palm-2-ai-large-language-model/ diff --git a/assets/h2oai.yaml b/assets/h2oai.yaml index 601e629d..53654100 100644 --- a/assets/h2oai.yaml +++ b/assets/h2oai.yaml @@ -1,23 +1,24 @@ --- - -- type: model - name: h2oGPT - organization: H2O AI - description: Series of models fine-tuned on well-known LLMs using the h2oGPT repositories. - created_date: 2023-06-16 - url: https://arxiv.org/pdf/2306.08161.pdf - model_card: https://huggingface.co/h2oai/h2ogpt-oasst1-512-20b - modality: text +- access: open analysis: Evaluated on EleutherAI evaluation harness. - size: 20B parameters + created_date: 2023-06-16 dependencies: [GPT-NeoX, H2O AI OpenAssistant, h2oGPT Repositories] - training_emissions: unknown - training_time: unknown - training_hardware: unspecified number of 48GB A100 NVIDIA GPUs - quality_control: '' - access: open - license: Apache 2.0 + description: Series of models fine-tuned on well-known LLMs using the h2oGPT repositories. + feedback: https://huggingface.co/h2oai/h2ogpt-oasst1-512-20b/discussions intended_uses: '' - prohibited_uses: '' + license: Apache 2.0 + modality: + explanation: text + value: text; text + model_card: https://huggingface.co/h2oai/h2ogpt-oasst1-512-20b monitoring: '' - feedback: https://huggingface.co/h2oai/h2ogpt-oasst1-512-20b/discussions + name: h2oGPT + organization: H2O AI + prohibited_uses: '' + quality_control: '' + size: 20B parameters (dense) + training_emissions: unknown + training_hardware: unspecified number of 48GB A100 NVIDIA GPUs + training_time: unknown + type: model + url: https://arxiv.org/pdf/2306.08161.pdf diff --git a/assets/hubspot.yaml b/assets/hubspot.yaml index 90298984..91876dd7 100644 --- a/assets/hubspot.yaml +++ b/assets/hubspot.yaml @@ -1,28 +1,26 @@ --- - -- type: application - name: ChatGPT powered by OBO - organization: HubSpot +- access: limited + adaptation: '' + created_date: 2023-01-31 + dependencies: [ChatGPT API] description: Give your sales, marketing, and customer service teams one of the most powerful AI tools available - ChatGPT priority access, no timeout limits, company wide access managed through a single account, incorporate into your existing processes without leaving HubSpot - created_date: 2023-01-31 - url: https://ecosystem.hubspot.com/marketplace/apps/sales/sales-enablement/the-obo-group-chatgpt-1398072 - dependencies: [ChatGPT API] - adaptation: '' - output_space: '' - quality_control: '' - access: limited + failures: '' + feedback: '' + intended_uses: '' license: + explanation: "We weren't able to locate a license.\n" value: unknown - explanation: > - We weren't able to locate a license. - terms_of_service: '' - intended_uses: '' - prohibited_uses: '' monitoring: '' - feedback: '' monthly_active_users: '' + name: ChatGPT powered by OBO + organization: HubSpot + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: '' + type: application + url: https://ecosystem.hubspot.com/marketplace/apps/sales/sales-enablement/the-obo-group-chatgpt-1398072 user_distribution: '' - failures: '' diff --git a/assets/huggingface.yaml b/assets/huggingface.yaml index 84710565..23e07d5d 100644 --- a/assets/huggingface.yaml +++ b/assets/huggingface.yaml @@ -1,29 +1,30 @@ --- - -- type: model - name: CodeParrot - organization: HuggingFace - description: CodeParrot is an autoregressive language model trained on code +- access: + explanation: The license is provided in the [[HuggingFace repository]](https://huggingface.co/codeparrot/codeparrot) + value: open + analysis: none created_date: - value: 2021-12-06 explanation: The date the model was announced - url: https://twitter.com/lvwerra/status/1467933794699259908 - model_card: none - modality: code (Python) - analysis: none - size: 1B parameters (dense model) + value: 2021-12-06 dependencies: [] - training_emissions: unknown - training_time: unknown - training_hardware: 16 x A100 (40GB) - quality_control: none - access: - value: open - explanation: The license is provided in the [[HuggingFace repository]](https://huggingface.co/codeparrot/codeparrot) + description: CodeParrot is an autoregressive language model trained on code + feedback: none + intended_uses: none license: - value: none explanation: No license is explicitly provided for this model. - intended_uses: none - prohibited_uses: none + value: none + modality: + explanation: code (Python) + value: code; code + model_card: none monitoring: none - feedback: none + name: CodeParrot + organization: HuggingFace + prohibited_uses: none + quality_control: none + size: 1B parameters (dense) + training_emissions: unknown + training_hardware: 16 x A100 (40GB) + training_time: unknown + type: model + url: https://twitter.com/lvwerra/status/1467933794699259908 diff --git a/assets/inflection.yaml b/assets/inflection.yaml index 5113a5f1..9283431b 100644 --- a/assets/inflection.yaml +++ b/assets/inflection.yaml @@ -1,49 +1,47 @@ --- - -- type: model - name: Inflection-1 - organization: Inflection AI - description: Inflection AI's in-house LLM, which powers Pi and will soon be available - via Inflection AI's conversational API. - created_date: 2023-06-22 - url: https://inflection.ai/inflection-1 - model_card: '' - modality: text +- access: limited analysis: Evaluated on wide range of language benchmarks like MMLU 5-shot, GSM-8K, and HellaSwag 10-shot among others. - size: - value: unknown - explanation: Designed to be roughly around Chat-GPT's size (175B parameters). + created_date: 2023-06-22 dependencies: [] + description: Inflection AI's in-house LLM, which powers Pi and will soon be available + via Inflection AI's conversational API. + feedback: '' + intended_uses: '' + license: unknown + modality: + explanation: text + value: text; text + model_card: '' + monitoring: '' + name: Inflection-1 + organization: Inflection AI + prohibited_uses: '' + quality_control: '' + size: unknown training_emissions: '' - training_time: '' training_hardware: unknown - quality_control: '' - access: limited + training_time: '' + type: model + url: https://inflection.ai/inflection-1 +- access: limited + adaptation: '' + created_date: 2023-05-02 + dependencies: [Inflection-1] + description: Personal AI chatbot designed to be conversational and specialized + in emotional intelligence. + failures: '' + feedback: '' + intended_uses: to be used as a personal assistant chatbot for everyday activities license: unknown - intended_uses: '' - prohibited_uses: '' monitoring: '' - feedback: '' - -- type: application + monthly_active_users: '' name: Pi organization: Inflection AI - description: Personal AI chatbot designed to be conversational and specialized - in emotional intelligence. - created_date: 2023-05-02 - url: https://inflection.ai/press - dependencies: [Inflection-1] - adaptation: '' output_space: natural language text responses + prohibited_uses: '' quality_control: '' - access: limited - license: unknown terms_of_service: '' - intended_uses: to be used as a personal assistant chatbot for everyday activities - prohibited_uses: '' - monitoring: '' - feedback: '' - monthly_active_users: '' + type: application + url: https://inflection.ai/press user_distribution: '' - failures: '' diff --git a/assets/instacart.yaml b/assets/instacart.yaml index b3a970ad..4dc27bf8 100644 --- a/assets/instacart.yaml +++ b/assets/instacart.yaml @@ -1,27 +1,26 @@ --- - -- type: application - name: Ask Instacart - organization: Instacart +- access: limited + adaptation: '' + created_date: 2023-03-01 + dependencies: [ChatGPT API] description: Instacart is augmenting the Instacart app to enable customers to ask about food and get inspirational, shoppable answers. This uses ChatGPT alongside Instacart’s own AI and product data from their 75,000+ retail partner store locations to help customers discover ideas for open-ended shopping goals, such as “How do I make great fish tacos?” or “What’s a healthy lunch for my kids?” Instacart plans to launch “Ask Instacart” later this year. - created_date: 2023-03-01 - url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis - dependencies: [ChatGPT API] - adaptation: '' - output_space: '' - quality_control: '' - access: limited - license: '' - terms_of_service: '' + failures: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: '' monitoring: '' - feedback: '' monthly_active_users: '' + name: Ask Instacart + organization: Instacart + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: '' + type: application + url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis user_distribution: '' - failures: '' diff --git a/assets/juni.yaml b/assets/juni.yaml index 13d2757a..9e175922 100644 --- a/assets/juni.yaml +++ b/assets/juni.yaml @@ -1,22 +1,21 @@ --- - -- type: application - name: Juni Tutor Bot - organization: Juni Learning - description: An online tutoring solution to help students achieve academic success. +- access: limited + adaptation: '' created_date: unknown - url: https://junilearning.com/ dependencies: [Anthropic API] - adaptation: '' - output_space: '' - quality_control: '' - access: limited - license: unknown - terms_of_service: '' + description: An online tutoring solution to help students achieve academic success. + failures: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: unknown monitoring: '' - feedback: '' monthly_active_users: '' + name: Juni Tutor Bot + organization: Juni Learning + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: '' + type: application + url: https://junilearning.com/ user_distribution: '' - failures: '' diff --git a/assets/kakaobrain.yaml b/assets/kakaobrain.yaml index 4c9546a8..f95f0b13 100644 --- a/assets/kakaobrain.yaml +++ b/assets/kakaobrain.yaml @@ -1,32 +1,25 @@ --- - -- type: dataset - name: COYO-700M - organization: Kakao Brain - description: > - COYO-700M is a large-scale dataset that contains 747M image-text pairs as well - as many other meta-attributes to increase the usability to train various models. +- access: + explanation: You can download the dataset from [[COYO Huggingface page]](https://huggingface.co/datasets/kakaobrain/coyo-700m). + For more information on downloading the image dataset, please refer to [[download/README.md]](https://github.com/kakaobrain/coyo-dataset/blob/main/download/README.md) + on Github. + value: open + analysis: + explanation: See [[Experiments]](https://github.com/kakaobrain/coyo-dataset#experiments) + value: "\"We empirically validated the quality of COYO dataset by re-implementing\ + \ popular models such as ALIGN, unCLIP, and ViT. We trained these models on\ + \ COYO-700M or its subsets from scratch, achieving competitive performance\ + \ to the reported numbers or generated samples in the original papers.\"\n" created_date: - value: 2022-08-31 explanation: As mentioned in the [[Google Scholar page]](https://scholar.google.com/citations?view_op=view_citation&hl=en&user=8WXfStMAAAAJ&citation_for_view=8WXfStMAAAAJ:-f6ydRqryjwC). - url: https://github.com/kakaobrain/coyo-dataset + value: 2022-08-31 datasheet: none - modality: text (English) and image - size: 747M image-text pairs - sample: [] - analysis: - value: > - "We empirically validated the quality of COYO dataset by re-implementing popular - models such as ALIGN, unCLIP, and ViT. We trained these models on COYO-700M - or its subsets from scratch, achieving competitive performance to the reported - numbers or generated samples in the original papers." - explanation: See [[Experiments]](https://github.com/kakaobrain/coyo-dataset#experiments) dependencies: [CommonCrawl] - included: - value: Included all image formats that Pillow library can decode. Collected - only English text using cld3. - explanation: See [[Data Filtering]](https://github.com/kakaobrain/coyo-dataset#data-filtering) + description: "COYO-700M is a large-scale dataset that contains 747M image-text\ + \ pairs as well as many other meta-attributes to increase the usability to train\ + \ various models.\n" excluded: + explanation: See [[Data Filtering]](https://github.com/kakaobrain/coyo-dataset#data-filtering) value: Removed images less than 5KB image size. Removed images with an aspect ratio greater than 3.0. Removed images with min(width, height) < 200. Removed images with a score of OpenNSFW2 or GantMan/NSFW higher than 0.5. Removed @@ -36,34 +29,36 @@ texts over 1000 in length. Removed texts appearing more than 10 times. Removed texts containing NSFW words. Removed duplicated samples based on (image_phash, text). + feedback: Feedback can be given by emailing at coyo at kakaobrain.com + included: explanation: See [[Data Filtering]](https://github.com/kakaobrain/coyo-dataset#data-filtering) - quality_control: '' - access: - value: open - explanation: You can download the dataset from [[COYO Huggingface page]](https://huggingface.co/datasets/kakaobrain/coyo-700m). - For more information on downloading the image dataset, please refer to [[download/README.md]](https://github.com/kakaobrain/coyo-dataset/blob/main/download/README.md) - on Github. - license: CC-BY-4.0 + value: Included all image formats that Pillow library can decode. Collected + only English text using cld3. intended_uses: - value: > - "It is strongly recommended that this dataset be used only for research, keeping - this in mind when using the dataset, and Kakao Brain does not recommend using - this dataset as it is without special processing to clear inappropriate data - to create commercial products." explanation: See [[Disclaimer & Content Warning]](https://github.com/kakaobrain/coyo-dataset#disclaimer--content-warning) + value: "\"It is strongly recommended that this dataset be used only for research,\ + \ keeping this in mind when using the dataset, and Kakao Brain does not recommend\ + \ using this dataset as it is without special processing to clear inappropriate\ + \ data to create commercial products.\"\n" + license: CC-BY-4.0 + modality: + explanation: text (English) and image + value: image, text + monitoring: none + name: COYO-700M + organization: Kakao Brain prohibited_uses: - value: > - Authors note the following limitations: "Kakao Brain tried to construct a - "Safe" - dataset when building the COYO dataset. However, despite these efforts, this - large-scale dataset was not hand-picked by humans to avoid the risk due to - its - very large size (over 700M). Keep in mind that the unscreened nature of the - dataset means that the collected images can lead to strongly discomforting - and - disturbing content for humans. The COYO dataset may contain some inappropriate - data, and any problems resulting from such data are the full responsibility - of the user who used it." explanation: See [[Disclaimer & Content Warning]](https://github.com/kakaobrain/coyo-dataset#disclaimer--content-warning) - monitoring: none - feedback: Feedback can be given by emailing at coyo at kakaobrain.com + value: "Authors note the following limitations: \"Kakao Brain tried to construct\ + \ a \"Safe\" dataset when building the COYO dataset. However, despite these\ + \ efforts, this large-scale dataset was not hand-picked by humans to avoid\ + \ the risk due to its very large size (over 700M). Keep in mind that the unscreened\ + \ nature of the dataset means that the collected images can lead to strongly\ + \ discomforting and disturbing content for humans. The COYO dataset may contain\ + \ some inappropriate data, and any problems resulting from such data are the\ + \ full responsibility of the user who used it.\"\n" + quality_control: '' + sample: [] + size: 747M image-text pairs + type: dataset + url: https://github.com/kakaobrain/coyo-dataset diff --git a/assets/khan.yaml b/assets/khan.yaml index 75050a66..abdf0636 100644 --- a/assets/khan.yaml +++ b/assets/khan.yaml @@ -1,26 +1,24 @@ --- - -- type: application - name: Khanmigo - organization: Khan Academy - description: An AI-powered assistant that functions as both a virtual tutor for - students and a classroom assistant for teachers. +- access: limited + adaptation: '' created_date: 2023-03-14 - url: https://www.khanacademy.org/khan-labs#khanmigo dependencies: [GPT-4 API] - adaptation: '' - output_space: '' - quality_control: '' - access: limited + description: An AI-powered assistant that functions as both a virtual tutor for + students and a classroom assistant for teachers. + failures: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - terms_of_service: '' - intended_uses: '' - prohibited_uses: '' monitoring: '' - feedback: '' monthly_active_users: '' + name: Khanmigo + organization: Khan Academy + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: '' + type: application + url: https://www.khanacademy.org/khan-labs#khanmigo user_distribution: '' - failures: '' diff --git a/assets/laion.yaml b/assets/laion.yaml index 6a62325e..14972cc3 100644 --- a/assets/laion.yaml +++ b/assets/laion.yaml @@ -1,79 +1,69 @@ --- - -- type: dataset - name: LAION-400M - # General - organization: LAION - description: LAION-400M is a dataset with CLIP-filtered 400 million image-text - pairs, their CLIP embeddings and kNN indices that allow efficient similarity - search. This dataset is entirely openly, freely accessible. +- access: + explanation: The dataset index is available from [[dataset blog post]](https://laion.ai/blog/laion-400-open-dataset/) + value: open + analysis: '' created_date: - value: 2021-08-20 explanation: The date the [[blog post]](https://laion.ai/blog/laion-400-open-dataset/) was released. - url: https://laion.ai/blog/laion-400-open-dataset/ + value: 2021-08-20 datasheet: https://laion.ai/blog/laion-400-open-dataset/ - modality: text (English) and image - size: 400M image-text pairs - sample: [] - analysis: '' - # Construction dependencies: [CLIP, CommonCrawl] - included: '' + description: LAION-400M is a dataset with CLIP-filtered 400 million image-text + pairs, their CLIP embeddings and kNN indices that allow efficient similarity + search. This dataset is entirely openly, freely accessible. excluded: - value: > - Authors apply the following filtering conditions on the WAT files downloaded - from Common Crawl: "All samples with less than 5 character alt-text length - or less than 5 KB image size are dropped. Duplicate removal is performed with - bloom filter based on URL and alt-text. We use CLIP to compute embeddings - of the image and alt-text. Then we compute the cosine similarity of both embeddings - and drop all samples with cosine similarity below 0.3. This threshold was - selected based on human inspections. We use the CLIP embeddings of images - and texts to filter out illegal contents." explanation: See [[Dataset and Methods]](https://arxiv.org/pdf/2111.02114.pdf#section.2) - quality_control: - value: The authors use CLIP embeddings of images and texts to filter out illegal - contents. They also use CLIP to tag image-text pairs as NSFW. They note that - less than 1% of images were detected as NSFW, which can be filtered out by - an user with NSFW tag. - explanation: See [[Dataset and Methods]](https://arxiv.org/pdf/2111.02114.pdf#section.2) - # Downstream - access: - value: open - explanation: The dataset index is available from [[dataset blog post]](https://laion.ai/blog/laion-400-open-dataset/) - license: - value: CC BY 4.0 - explanation: The license is listed on the [[dataset blog post]](https://laion.ai/blog/laion-400-open-dataset/) + value: "Authors apply the following filtering conditions on the WAT files downloaded\ + \ from Common Crawl: \"All samples with less than 5 character alt-text length\ + \ or less than 5 KB image size are dropped. Duplicate removal is performed\ + \ with bloom filter based on URL and alt-text. We use CLIP to compute embeddings\ + \ of the image and alt-text. Then we compute the cosine similarity of both\ + \ embeddings and drop all samples with cosine similarity below 0.3. This threshold\ + \ was selected based on human inspections. We use the CLIP embeddings of images\ + \ and texts to filter out illegal contents.\"\n" + feedback: '' + included: '' intended_uses: The authors recommend using the dataset "for research purposes" and warn that "this large-scale dataset is non-curated. It was built for research purposes to enable testing model training on larger scale for broad researcher and other interested communities, and is not meant for any real-world production or application." - prohibited_uses: No uses are explicitly prohibited by the license. Users are warned - from using LAION-400M for any real-world production or application. + license: + explanation: The license is listed on the [[dataset blog post]](https://laion.ai/blog/laion-400-open-dataset/) + value: CC BY 4.0 + modality: + explanation: text (English) and image + value: image, text monitoring: '' - feedback: '' - -- type: dataset - name: LAION-5B - # General + name: LAION-400M organization: LAION - description: LAION is a dataset of 5 billion image-text pairs from the Internet + prohibited_uses: No uses are explicitly prohibited by the license. Users are warned + from using LAION-400M for any real-world production or application. + quality_control: + explanation: See [[Dataset and Methods]](https://arxiv.org/pdf/2111.02114.pdf#section.2) + value: The authors use CLIP embeddings of images and texts to filter out illegal + contents. They also use CLIP to tag image-text pairs as NSFW. They note that + less than 1% of images were detected as NSFW, which can be filtered out by + an user with NSFW tag. + sample: [] + size: 400M image-text pairs + type: dataset + url: https://laion.ai/blog/laion-400-open-dataset/ +- access: + explanation: The dataset index is available from [[dataset blog post]](https://laion.ai/blog/laion-5b/) + value: open + analysis: '' created_date: - value: 2022-12-12 explanation: The date the [[blog post]](https://laion.ai/blog/laion-5b/) was released. Note that the dataset was already compiled earlier, e.g. the Stable Diffusion model released earlier was trained on a subset of LAION-5B - url: https://laion.ai/blog/laion-5b/ + value: 2022-12-12 datasheet: https://laion.ai/blog/laion-5b/ - modality: text and image - size: 5B image-text pairs - sample: [] - analysis: '' - # Construction dependencies: [CLIP, mCLIP, CommonCrawl] - included: '' + description: LAION is a dataset of 5 billion image-text pairs from the Internet excluded: + explanation: See [[dataset blog post]](https://laion.ai/blog/laion-5b/) value: All samples with less than 5 characters alt-text length or less than 5 KB image size are dropped. All images with the too big resolution, potentially DOS bombs, were dropped before attempting to process them. Duplicate removal @@ -86,45 +76,43 @@ were selected based on human inspection of the test results. We use the CLIP embeddings of images and texts to filter out to the possible extent the illegal content. - explanation: See [[dataset blog post]](https://laion.ai/blog/laion-5b/) - quality_control: '' - # Downstream - access: - value: open - explanation: The dataset index is available from [[dataset blog post]](https://laion.ai/blog/laion-5b/) - license: - value: CC BY 4.0 - explanation: The license is listed on the [[dataset blog post]](https://laion.ai/blog/laion-5b/) + feedback: '' + included: '' intended_uses: The authors recommend using the dataset "for research purposes" and "do not recommend using it for creating ready-to-go industrial products, as the basic research about general properties and safety of such large-scale models, which we would like to encourage with this release, is still in progress" - prohibited_uses: No uses are explicitly prohibited by the license. Users are warned - from using LAION-5B for non-research purposes. + license: + explanation: The license is listed on the [[dataset blog post]](https://laion.ai/blog/laion-5b/) + value: CC BY 4.0 + modality: + explanation: text and image + value: image, text monitoring: '' - feedback: '' - -- type: dataset - name: LAION-2B-en - # General + name: LAION-5B organization: LAION - description: LAION-2B-en is a subset of the LAION-5B dataset and contains 2.3 - billion English image-text pairs. + prohibited_uses: No uses are explicitly prohibited by the license. Users are warned + from using LAION-5B for non-research purposes. + quality_control: '' + sample: [] + size: 5B image-text pairs + type: dataset + url: https://laion.ai/blog/laion-5b/ +- access: + explanation: The dataset index is available from [[dataset blog post]](https://laion.ai/blog/laion-5b/) + value: open + analysis: '' created_date: - value: 2022-12-12 explanation: The date the [[blog post]](https://laion.ai/blog/laion-5b/) was released. Note that the dataset was already compiled earlier, e.g. the Stable Diffusion model released earlier was trained on a subset of LAION-5B - url: https://arxiv.org/pdf/2210.08402.pdf + value: 2022-12-12 datasheet: https://laion.ai/blog/laion-5b/ - modality: text (English) and image - size: 2.32B image-text pairs - sample: [] - analysis: '' - # Construction dependencies: [CLIP, LAION-5B] - included: '' + description: LAION-2B-en is a subset of the LAION-5B dataset and contains 2.3 + billion English image-text pairs. excluded: + explanation: See [[dataset blog post]](https://laion.ai/blog/laion-5b/) value: All samples with less than 5 characters alt-text length or less than 5 KB image size are dropped. All images with the too big resolution, potentially DOS bombs, were dropped before attempting to process them. Duplicate removal @@ -137,44 +125,50 @@ were selected based on human inspection of the test results. We use the CLIP embeddings of images and texts to filter out to the possible extent the illegal content. - explanation: See [[dataset blog post]](https://laion.ai/blog/laion-5b/) - quality_control: '' - # Downstream - access: - value: open - explanation: The dataset index is available from [[dataset blog post]](https://laion.ai/blog/laion-5b/) - license: - value: CC BY 4.0 - explanation: The license is listed on the [[dataset blog post]](https://laion.ai/blog/laion-5b/) + feedback: '' + included: '' intended_uses: The authors recommend using the dataset "for research purposes" and "do not recommend using it for creating ready-to-go industrial products, as the basic research about general properties and safety of such large-scale models, which we would like to encourage with this release, is still in progress" - prohibited_uses: No uses are explicitly prohibited by the license. Users are warned - from using LAION-2B-en for non-research purposes. + license: + explanation: The license is listed on the [[dataset blog post]](https://laion.ai/blog/laion-5b/) + value: CC BY 4.0 + modality: + explanation: text (English) and image + value: image, text monitoring: '' - feedback: '' - -- type: model - name: OpenFlamingo + name: LAION-2B-en organization: LAION + prohibited_uses: No uses are explicitly prohibited by the license. Users are warned + from using LAION-2B-en for non-research purposes. + quality_control: '' + sample: [] + size: 2.32B image-text pairs + type: dataset + url: https://arxiv.org/pdf/2210.08402.pdf +- access: open + analysis: Evaluated on COCO captioning and VQAv2 vision-language tasks. + created_date: 2023-03-28 + dependencies: [LLaMA, CLIP] description: An open-source reproduction of DeepMind's Flamingo model. At its core, OpenFlamingo is a framework that enables training and evaluation of large multimodal models (LMMs). - created_date: 2023-03-28 - url: https://laion.ai/blog/open-flamingo/ + feedback: '' + intended_uses: academic research purposes + license: MIT + modality: + explanation: image and text input with textual output + value: image, text; text model_card: https://github.com/mlfoundations/open_flamingo/blob/main/MODEL_CARD.md - modality: image and text input with textual output - analysis: Evaluated on COCO captioning and VQAv2 vision-language tasks. - size: 9B parameters - dependencies: [LLaMA, CLIP] + monitoring: '' + name: OpenFlamingo + organization: LAION + prohibited_uses: commercial use + quality_control: '' + size: 9B parameters (dense) training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: open - license: MIT - intended_uses: academic research purposes - prohibited_uses: commercial use - monitoring: '' - feedback: '' + training_time: '' + type: model + url: https://laion.ai/blog/open-flamingo/ diff --git a/assets/latitude.yaml b/assets/latitude.yaml index f6d5e39c..2e5bc564 100644 --- a/assets/latitude.yaml +++ b/assets/latitude.yaml @@ -1,43 +1,33 @@ --- - -- type: application - name: AI Dungeon - # General - organization: Latitude - description: > - AI Dungeon is a single-player text adventure game that uses AI to generate - content. +- access: + explanation: "The game is available to public upon sign up.\n" + value: limited + adaptation: '' created_date: + explanation: "Release date of the game on iOS and Android platforms [[Wikipedia]](https://en.wikipedia.org/wiki/AI_Dungeon).\n" value: 2019-12-17 - explanation: > - Release date of the game on iOS and Android platforms - [[Wikipedia]](https://en.wikipedia.org/wiki/AI_Dungeon). - url: https://play.aidungeon.io - # Construction dependencies: [OpenAI API] - adaptation: '' - output_space: '' - quality_control: '' - # Downstream - access: - value: limited - explanation: > - The game is available to public upon sign up. - terms_of_service: https://play.aidungeon.io/main/termsOfService + description: "AI Dungeon is a single-player text adventure game that uses AI to\ + \ generate content.\n" + failures: '' + feedback: '' + intended_uses: '' license: + explanation: "\"Subject to your compliance with these Terms, the Company hereby\ + \ grants to you, a personal, worldwide, royalty-free, non-assignable, non-sublicensable,\ + \ non-transferrable, and non-exclusive license to use the software provided\ + \ to you as part of the Services (and to download a single copy of the App\ + \ onto the equipment or device specified by us)\" - excerpt from the Terms\ + \ of Service document.\n" value: custom - explanation: > - "Subject to your compliance with these Terms, the Company hereby grants - to you, a personal, worldwide, royalty-free, non-assignable, - non-sublicensable, non-transferrable, and non-exclusive license to use - the software provided to you as part of the Services (and to download a - single copy of the App onto the equipment or device specified by us)" - - excerpt from the Terms of Service document. - intended_uses: '' - prohibited_uses: '' monitoring: '' - feedback: '' - # Deployment monthly_active_users: '' + name: AI Dungeon + organization: Latitude + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: https://play.aidungeon.io/main/termsOfService + type: application + url: https://play.aidungeon.io user_distribution: '' - failures: '' diff --git a/assets/lehigh.yaml b/assets/lehigh.yaml index 9e8df418..c153f345 100644 --- a/assets/lehigh.yaml +++ b/assets/lehigh.yaml @@ -1,30 +1,31 @@ --- - -- type: model - name: BiomedGPT - organization: Lehigh University - description: BiomedGPT leverages self-supervision on large and diverse datasets - to accept multi-modal inputs and perform a range of downstream tasks. - created_date: 2023-05-26 - url: https://arxiv.org/pdf/2305.17100.pdf - model_card: '' - modality: image and text input, text output +- access: open analysis: outperforms majority of preceding state-of-the-art models over 15 unique biomedical modalities. - size: 472M parameters + created_date: 2023-05-26 dependencies: - GPT-style autoregressive decoder - BiomedGPT biomedical datasets - training_emissions: unknown - training_time: unknown - training_hardware: 10 NVIDIA A5000 GPUs - quality_control: No specific quality control is mentioned in model training, though - details on data processing and how the model was trained are provided in the - paper. - access: open - license: Apache 2.0 + description: BiomedGPT leverages self-supervision on large and diverse datasets + to accept multi-modal inputs and perform a range of downstream tasks. + feedback: '' intended_uses: furthering research in developing unified and generalist models for biomedicine. - prohibited_uses: '' + license: Apache 2.0 + modality: + explanation: image and text input, text output + value: image, text; text + model_card: '' monitoring: '' - feedback: '' + name: BiomedGPT + organization: Lehigh University + prohibited_uses: '' + quality_control: No specific quality control is mentioned in model training, though + details on data processing and how the model was trained are provided in the + paper. + size: 472M parameters (dense) + training_emissions: unknown + training_hardware: 10 NVIDIA A5000 GPUs + training_time: unknown + type: model + url: https://arxiv.org/pdf/2305.17100.pdf diff --git a/assets/linkedin.yaml b/assets/linkedin.yaml index 0d3d2ab0..a92551e9 100644 --- a/assets/linkedin.yaml +++ b/assets/linkedin.yaml @@ -1,27 +1,26 @@ --- - -- type: application - name: LinkedIn - organization: LinkedIn +- access: open + adaptation: '' + created_date: unknown + dependencies: [Azure Cognitive Services for Vision] description: More than 40 percent of LinkedIn's feed posts include at least one image. We want every member to have equal access to opportunity and are committed to ensuring that we make images accessible to our members who are blind or who have low vision so they can be a part of the online conversation. With Azure Cognitive Service for Vision, we can provide auto-captioning to edit and support alt. text descriptions. - created_date: unknown - url: https://www.linkedin.com/ - dependencies: [Azure Cognitive Services for Vision] - adaptation: '' - output_space: '' - quality_control: '' - access: open - license: unknown - terms_of_service: '' + failures: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: unknown monitoring: '' - feedback: '' monthly_active_users: '' + name: LinkedIn + organization: LinkedIn + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: '' + type: application + url: https://www.linkedin.com/ user_distribution: '' - failures: '' diff --git a/assets/lmsys.yaml b/assets/lmsys.yaml index 6f07781e..3058f7a0 100644 --- a/assets/lmsys.yaml +++ b/assets/lmsys.yaml @@ -1,24 +1,25 @@ --- - -- type: model - name: Vicuna - organization: LMSYS +- access: open + analysis: Evaluated against similar LLMs using GPT-4 as a judge. + created_date: 2023-03-30 + dependencies: [LLaMA, ShareGPT conversations data] description: An open-source chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT. - created_date: 2023-03-30 - url: https://lmsys.org/blog/2023-03-30-vicuna/ + feedback: https://huggingface.co/datasets/bigcode/the-stack/discussions + intended_uses: research on LLMs and chatbots + license: Apache 2.0 + modality: + explanation: natural language text + value: text; text model_card: https://huggingface.co/lmsys/vicuna-13b-delta-v0 - modality: natural language text - analysis: Evaluated against similar LLMs using GPT-4 as a judge. - size: 13B parameters - dependencies: [LLaMA, ShareGPT conversations data] + monitoring: '' + name: Vicuna + organization: LMSYS + prohibited_uses: '' + quality_control: '' + size: 13B parameters (dense) training_emissions: '' - training_time: 1 day training_hardware: 8 A100 GPUs - quality_control: '' - access: open - license: Apache 2.0 - intended_uses: research on LLMs and chatbots - prohibited_uses: '' - monitoring: '' - feedback: https://huggingface.co/datasets/bigcode/the-stack/discussions + training_time: 1 day + type: model + url: https://lmsys.org/blog/2023-03-30-vicuna/ diff --git a/assets/meta.yaml b/assets/meta.yaml index c44d7474..ad4d99eb 100644 --- a/assets/meta.yaml +++ b/assets/meta.yaml @@ -1,51 +1,40 @@ --- - -- type: model - name: ESM-2 - organization: Meta - description: ESM-2 is a series of protein language models trained on protein sequences +- access: + explanation: Models are available for download from [[GitHub repository]](https://github.com/facebookresearch/esm#available-models) + value: open + analysis: '' created_date: - value: 2022-10-31 explanation: The date the [[model paper]](https://www.biorxiv.org/content/10.1101/2022.07.20.500902v2.full.pdf+html) was released - url: https://www.biorxiv.org/content/10.1101/2022.07.20.500902v2.full.pdf+html - model_card: none - modality: protein sequence - size: 15B parameters (dense model) - analysis: '' + value: 2022-10-31 dependencies: [UniRef50, UniRef90] - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: - value: open - explanation: Models are available for download from [[GitHub repository]](https://github.com/facebookresearch/esm#available-models) + description: ESM-2 is a series of protein language models trained on protein sequences + feedback: '' + intended_uses: '' license: + explanation: "The license is provided in the [[Github repository]](https://github.com/facebookresearch/esm#available-models)\n" value: MIT - explanation: > - The license is provided in the [[Github repository]](https://github.com/facebookresearch/esm#available-models) - intended_uses: '' - prohibited_uses: '' + modality: + explanation: protein sequence + value: text; text + model_card: none monitoring: '' - feedback: '' - -- type: dataset - name: PMD - # General + name: ESM-2 organization: Meta - description: PMD (Public Multimodal Datasets) is a collection of image-text datasets - introduced in the FLAVA work. + prohibited_uses: '' + quality_control: '' + size: 15B parameters (dense) + training_emissions: '' + training_hardware: '' + training_time: '' + type: model + url: https://www.biorxiv.org/content/10.1101/2022.07.20.500902v2.full.pdf+html +- access: closed + analysis: none created_date: - value: 2021-12-08 explanation: The date the model paper was released - url: https://arxiv.org/abs/2112.04482 + value: 2021-12-08 datasheet: none - modality: text, image - size: 70M - sample: [] - analysis: none - # Construction dependencies: - COCO - YFCC100M @@ -55,329 +44,286 @@ - Wikipedia - Conceptual Captions - Red Caps - license: - value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - included: none + description: PMD (Public Multimodal Datasets) is a collection of image-text datasets + introduced in the FLAVA work. excluded: YFCC100M is filtered for non-English captions and very short (< 2 word) captions. - quality_control: Beyond filtering mentioned in excluded, nothing further is done. - # Downstream - access: closed + feedback: none + included: none intended_uses: unknown - prohibited_uses: unknown + license: + explanation: "The asset isn't released, and hence the license is unknown.\n" + value: unknown + modality: + explanation: text, image + value: image, text monitoring: none - feedback: none - -- type: model - name: FLAVA + name: PMD organization: Meta - description: FLAVA is a multimodal model composed of an image encoder, text encoder, - and multimodal encoder. - created_date: - value: 2021-12-08 - explanation: The date the model paper was released + prohibited_uses: unknown + quality_control: Beyond filtering mentioned in excluded, nothing further is done. + sample: [] + size: 70M + type: dataset url: https://arxiv.org/abs/2112.04482 - model_card: https://huggingface.co/facebook/flava-full - modality: Text (English) and Image - size: - value: 306M - explanation: > - 110M (Language encoder) + 86M (Vision encoder) + 110M (mul encoder) +- access: + explanation: "Model checkpoints are available for download from the [[HuggingFace\ + \ repository]](https://huggingface.co/facebook/flava-full)\n" + value: open analysis: FLAVA is benchmarked on a range of vision-only (e.g. CIFAR-10), language-only (e.g. GLUE), and multimodal (e.g. Hateful Memes) standard evaluations. + created_date: + explanation: The date the model paper was released + value: 2021-12-08 dependencies: [PMD] - training_emissions: unknown - training_time: unknown - training_hardware: unknown - quality_control: FLAVA introduces a variety of new modeling techniques, specifically - with an interest in improved text-image alignment through contrastive objectives. - access: - value: open - explanation: > - Model checkpoints are available for download from the [[HuggingFace - repository]](https://huggingface.co/facebook/flava-full) + description: FLAVA is a multimodal model composed of an image encoder, text encoder, + and multimodal encoder. + feedback: https://huggingface.co/facebook/flava-full/discussions + intended_uses: "Per the [[HuggingFace repository]](https://huggingface.co/facebook/flava-full),\ + \ \"The model is intended to serve as a reproducible research artifact for research\ + \ communities in the light of models whose exact reproduction details are never\ + \ released such as CLIP and SimVLM.\"\n" license: + explanation: "The license is provided in the [[HuggingFace repository]](https://huggingface.co/facebook/flava-full)\n" value: BSD-3-Clause - explanation: > - The license is provided in the [[HuggingFace repository]](https://huggingface.co/facebook/flava-full) - intended_uses: > - Per the [[HuggingFace repository]](https://huggingface.co/facebook/flava-full), - "The model is intended to serve as a reproducible research artifact for research - communities in the light of models whose exact reproduction details are never - released such as CLIP and SimVLM." - prohibited_uses: > - Per the [[HuggingFace repository]](https://huggingface.co/facebook/flava-full), - "Any deployed use case of the model - whether commercial or not" - is currently - out of scope. + modality: + explanation: Text (English) and Image + value: image, text; image, text + model_card: https://huggingface.co/facebook/flava-full monitoring: none - feedback: https://huggingface.co/facebook/flava-full/discussions - -- type: dataset - name: The Galactica Corpus - # General + name: FLAVA organization: Meta - description: The Galactica Corpus is a collection of scientific datasets introduced - in the Galactica work. + prohibited_uses: "Per the [[HuggingFace repository]](https://huggingface.co/facebook/flava-full),\ + \ \"Any deployed use case of the model - whether commercial or not\" - is currently\ + \ out of scope.\n" + quality_control: FLAVA introduces a variety of new modeling techniques, specifically + with an interest in improved text-image alignment through contrastive objectives. + size: 306M parameters (dense) + training_emissions: unknown + training_hardware: unknown + training_time: unknown + type: model + url: https://arxiv.org/abs/2112.04482 +- access: closed + analysis: none created_date: + explanation: "The date the Galactica paper was released\n" value: 2022-11-15 - explanation: > - The date the Galactica paper was released - url: https://galactica.org/static/paper.pdf datasheet: none - modality: text - size: 106B tokens - sample: [] - analysis: none - # Construction dependencies: [CommonCrawl, Wikipedia, arXiv] - license: - value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. + description: The Galactica Corpus is a collection of scientific datasets introduced + in the Galactica work. + excluded: '' + feedback: none included: Prompts and reasoning data is explicitly included to improve model capabilities derived from this data. - excluded: '' - quality_control: '' - # Downstream - access: closed intended_uses: unknown - prohibited_uses: unknown + license: + explanation: "The asset isn't released, and hence the license is unknown.\n" + value: unknown + modality: text monitoring: none - feedback: none - -- type: model - name: Galactica + name: The Galactica Corpus organization: Meta - description: Galactica is a family of autoregressive language models. - created_date: - value: 2022-11-15 - explanation: > - The date the Galactica paper was released + prohibited_uses: unknown + quality_control: '' + sample: [] + size: 106B tokens + type: dataset url: https://galactica.org/static/paper.pdf - model_card: https://huggingface.co/facebook/galactica-6.7b - modality: Text (English), Code, Math, Chemistry, Biology +- access: + explanation: Model checkpoints freely available at https://github.com/paperswithcode/galai + value: open analysis: '' - size: 120B parameters (dense model) + created_date: + explanation: "The date the Galactica paper was released\n" + value: 2022-11-15 dependencies: [The Galactica Corpus] + description: Galactica is a family of autoregressive language models. + feedback: '' + intended_uses: '' + license: + explanation: https://github.com/paperswithcode/galai/blob/main/LICENSE-MODEL.md + value: CC BY-NC 4.0 + modality: + explanation: Text (English), Code, Math, Chemistry, Biology + value: code, text; code, text + model_card: https://huggingface.co/facebook/galactica-6.7b + monitoring: '' + name: Galactica + organization: Meta + prohibited_uses: '' + quality_control: '' + size: 120B parameters (dense) training_emissions: unknown - training_time: unknown training_hardware: Meta AI Cluster. Trained on 1024 80GB A100 GPUs (128 8xA100 80GB nodes) - quality_control: '' - access: + training_time: unknown + type: model + url: https://galactica.org/static/paper.pdf +- access: + explanation: Model weights are available via the [[HuggingFace repository]](https://huggingface.co/facebook/incoder-6B) value: open - explanation: Model checkpoints freely available at https://github.com/paperswithcode/galai + analysis: none + created_date: + explanation: The date the model paper was released + value: 2022-04-12 + dependencies: [] + description: InCoder is a language model trained on code with a causal masking + objective + feedback: '' + intended_uses: '' license: + explanation: The license is provided in the [[HuggingFace repository]](https://huggingface.co/facebook/incoder-6B?text=My+name+is+Lewis+and+I+like+to) value: CC BY-NC 4.0 - explanation: https://github.com/paperswithcode/galai/blob/main/LICENSE-MODEL.md - intended_uses: '' - prohibited_uses: '' + modality: + explanation: text (English) and code + value: code, text; code, text + model_card: none monitoring: '' - feedback: '' - -- type: model name: InCoder organization: Meta, CMU, TTI-Chicago, UC Berkeley, University of Washington - description: InCoder is a language model trained on code with a causal masking - objective - created_date: - value: 2022-04-12 - explanation: The date the model paper was released - url: https://arxiv.org/abs/2204.05999 - model_card: none - modality: text (English) and code - analysis: none - size: 6B parameters (dense model) - dependencies: [] + prohibited_uses: '' + quality_control: unknown + size: 6B parameters (dense) training_emissions: Unknown - training_time: 24 days, according to [[the paper]](https://arxiv.org/pdf/2204.05999.pdf) training_hardware: 248 V100 GPUs, according to [[the paper]](https://arxiv.org/pdf/2204.05999.pdf) - quality_control: unknown - access: - value: open - explanation: Model weights are available via the [[HuggingFace repository]](https://huggingface.co/facebook/incoder-6B) - license: - value: CC BY-NC 4.0 - explanation: The license is provided in the [[HuggingFace repository]](https://huggingface.co/facebook/incoder-6B?text=My+name+is+Lewis+and+I+like+to) + training_time: 24 days, according to [[the paper]](https://arxiv.org/pdf/2204.05999.pdf) + type: model + url: https://arxiv.org/abs/2204.05999 +- access: + explanation: The 175B model requires manual approval from Meta to access. Other + models are available through HuggingFace. + value: limited + analysis: '' + created_date: + explanation: "The date the OPT paper was submitted to Arxiv\n" + value: 2022-05-01 + dependencies: [RoBERTa dataset, The Pile, PushShift.io Reddit] + description: OPT is a family of autoregressive language models. + feedback: '' intended_uses: '' - prohibited_uses: '' + license: + explanation: "All released with the [[OPT-175B License]](https://github.com/facebookresearch/metaseq/blob/main/projects/OPT/MODEL_LICENSE.md),\ + \ except 66B (TBD) and 17B (requires manual approval)\n" + value: OPT-175B License + modality: + explanation: text (English) + value: text; text + model_card: https://arxiv.org/pdf/2205.01068.pdf monitoring: '' - feedback: '' - -- type: model name: OPT organization: Meta - description: OPT is a family of autoregressive language models. - created_date: - value: 2022-05-01 - explanation: > - The date the OPT paper was submitted to Arxiv - url: https://arxiv.org/abs/2205.01068 - model_card: https://arxiv.org/pdf/2205.01068.pdf - modality: text (English) - analysis: '' - size: 175B parameters (dense model) - dependencies: [RoBERTa dataset, The Pile, PushShift.io Reddit] + prohibited_uses: '' + quality_control: '' + size: 175B parameters (dense) training_emissions: + explanation: "Estimate by authors for the OPT-175B model only. Not including\ + \ ablations and baselines.\n" value: 75 tCO2e - explanation: > - Estimate by authors for the OPT-175B model only. Not including ablations and - baselines. - training_time: '' training_hardware: Meta AI cluster. Trained on 992 80GB A100 GPUs - quality_control: '' - access: + training_time: '' + type: model + url: https://arxiv.org/abs/2205.01068 +- access: + explanation: "The datasets involved are public, but the full dataset is not\ + \ directly available, nor are filtering scripts.\n" value: limited - explanation: The 175B model requires manual approval from Meta to access. Other - models are available through HuggingFace. - license: - value: OPT-175B License - explanation: > - All released with the - [[OPT-175B License]](https://github.com/facebookresearch/metaseq/blob/main/projects/OPT/MODEL_LICENSE.md), - except 66B (TBD) and 17B (requires manual approval) - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' - -- type: dataset - name: Make-A-Video dataset - # General - organization: Meta - description: > - The Make-A-Video dataset is the dataset used to train Make-A-Video, which includes - both image-text and video-only datasets with specific and significant filtering. + analysis: '' created_date: + explanation: "The date that Make-A-Video was posted to arXiv [[arXiv]] (https://arxiv.org/abs/2209.14792).\n" value: 2022-09-29 - explanation: > - The date that Make-A-Video was posted to arXiv - [[arXiv]] - (https://arxiv.org/abs/2209.14792). - url: https://arxiv.org/pdf/2209.14792.pdf datasheet: none - modality: video, image-text - size: 20M video clips, 2.3B image-text pairs - sample: [] - analysis: '' - # Construction dependencies: [LAION-5B, WebVid-10M, HD-VILA-100M] - license: + description: "The Make-A-Video dataset is the dataset used to train Make-A-Video,\ + \ which includes both image-text and video-only datasets with specific and significant\ + \ filtering.\n" + excluded: "The LAION-5B dataset is filtered to 2.3B by removing NSFW images using\ + \ [https://github.com/GantMan/nsfw](https://github.com/GantMan/nsfw), toxic\ + \ words in text, and images with watermark probability > 0.5. The HD-VILA-100M\ + \ is randomly subsampled to 10M video clips.\n" + feedback: + explanation: "No feedback mechanism is mentioned by the authors.\n" value: none - explanation: > - No license was found, though the underlying datasets are public and have - licenses. included: + explanation: "Data from the three underlying datasets is filtered, but nothing\ + \ is included beyond this.\n" value: none - explanation: > - Data from the three underlying datasets is filtered, but nothing is included - beyond this. - excluded: > - The LAION-5B dataset is filtered to 2.3B by removing NSFW images using [https://github.com/GantMan/nsfw](https://github.com/GantMan/nsfw), - toxic words in text, and images with watermark probability > 0.5. The HD-VILA-100M - is randomly subsampled to 10M video clips. - quality_control: > - The authors exclude NSFW, toxic, and likely watermarked data from LAION-5B. - # Downstream - access: - value: limited - explanation: > - The datasets involved are public, but the full dataset is not directly available, - nor are filtering scripts. intended_uses: unknown - prohibited_uses: unknown + license: + explanation: "No license was found, though the underlying datasets are public\ + \ and have licenses.\n" + value: none + modality: + explanation: video, image-text + value: image, text, video monitoring: + explanation: "There is no information on how Meta is internally monitoring the\ + \ use of the dataset.\n" value: unknown - explanation: > - There is no information on how Meta is internally monitoring - the use of the dataset. - feedback: - value: none - explanation: > - No feedback mechanism is mentioned by the authors. - -- type: model - name: Make-A-Video - # General + name: Make-A-Video dataset organization: Meta - description: > - Make-A-Video is a model for Text-to-Video Generation without Text-Video Data. + prohibited_uses: unknown + quality_control: "The authors exclude NSFW, toxic, and likely watermarked data\ + \ from LAION-5B.\n" + sample: [] + size: 20M video clips, 2.3B image-text pairs + type: dataset + url: https://arxiv.org/pdf/2209.14792.pdf +- access: + explanation: "The model has not been released; a form existed to potentially\ + \ acquire access but is now closed as of 2022-12-07 [[Access Form]](https://docs.google.com/forms/u/0/d/e/1FAIpQLSfMjC57wcXWUDV0UbS2Tn6VhjLEiCXaHvWZuWgWRa-Zx8-Few/closedform).\n" + value: closed + analysis: "Model performance was evaluated using automated (Frechet Video Distance;\ + \ Frechet Inception Distance) and human evaluation on two datasets (UCF-101,\ + \ MSR-VTT) in the zero-shot setting.\n" created_date: + explanation: "The date that Make-A-Video was posted to arXiv [[arXiv]] (https://arxiv.org/abs/2209.14792).\n" value: 2022-09-29 - explanation: > - The date that Make-A-Video was posted to arXiv - [[arXiv]] - (https://arxiv.org/abs/2209.14792). - url: https://arxiv.org/pdf/2209.14792.pdf - model_card: none - modality: text, video - size: - value: unknown - explanation: > - The authors do not state the model size in the paper. - analysis: > - Model performance was evaluated using automated (Frechet Video Distance; Frechet - Inception Distance) and human evaluation on two datasets (UCF-101, MSR-VTT) - in the zero-shot setting. - # Construction dependencies: [Make-A-Video dataset] - training_emissions: + description: "Make-A-Video is a model for Text-to-Video Generation without Text-Video\ + \ Data.\n" + feedback: + explanation: "Authors do not mention or provide a feedback mechanism.\n" + value: none + intended_uses: + explanation: "Authors do not report the intended uses.\n" value: unknown - explanation: > - Authors do not report the training emissions. - training_time: + license: + explanation: No license was found. + value: none + modality: + explanation: text, video + value: text, video; text, video + model_card: none + monitoring: + explanation: "Authors do not report the monitoring process for Make-A-Video\ + \ internally at Meta.\n" value: unknown - explanation: > - Authors do not report the training time. - training_hardware: + name: Make-A-Video + organization: Meta + prohibited_uses: + explanation: "Authors do not report the prohibited uses.\n" value: unknown - explanation: > - Authors do not report the training hardware or provider. quality_control: + explanation: "Authors do not report specific quality control steps taken in\ + \ modeling, though filtering is done in producing the Make-A-Video dataset.\n" value: none - explanation: > - Authors do not report specific quality control steps taken in modeling, though - filtering is done in producing the Make-A-Video dataset. - # Downstream - access: - value: closed - explanation: > - The model has not been released; a form existed to potentially acquire access - but is now closed as of 2022-12-07 - [[Access Form]](https://docs.google.com/forms/u/0/d/e/1FAIpQLSfMjC57wcXWUDV0UbS2Tn6VhjLEiCXaHvWZuWgWRa-Zx8-Few/closedform). - license: - value: none - explanation: No license was found. - intended_uses: + size: unknown + training_emissions: + explanation: "Authors do not report the training emissions.\n" value: unknown - explanation: > - Authors do not report the intended uses. - prohibited_uses: + training_hardware: + explanation: "Authors do not report the training hardware or provider.\n" value: unknown - explanation: > - Authors do not report the prohibited uses. - monitoring: + training_time: + explanation: "Authors do not report the training time.\n" value: unknown - explanation: > - Authors do not report the monitoring process for Make-A-Video internally at - Meta. - feedback: - value: none - explanation: > - Authors do not mention or provide a feedback mechanism. - -- type: model - name: LLaMA - organization: Meta - description: '' - created_date: 2023-02-24 - url: https://arxiv.org/abs/2302.13971 - model_card: '' - modality: Text + type: model + url: https://arxiv.org/pdf/2209.14792.pdf +- access: open analysis: '' - size: 65B parameters (dense model) + created_date: 2023-02-24 dependencies: - CommonCrawl - C4 @@ -386,306 +332,311 @@ - BooksCorpus - arXiv - StackExchange - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: open - license: LLaMa License (model weights), GPLv3 (code) + description: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: LLaMa License (model weights), GPLv3 (code) + modality: + explanation: Text + value: text; text + model_card: '' monitoring: '' - feedback: '' - -- type: model - name: LLaMA 2 + name: LLaMA organization: Meta - description: LLaMA 2 is an updated version of LLaMA trained on a new mix of publicly - available data. - created_date: 2023-07-18 - url: https://ai.meta.com/resources/models-and-libraries/llama/ - model_card: Can be found at appendix of paper at https://ai.meta.com/research/publications/llama-2-open-foundation-and-fine-tuned-chat-models/ - modality: text + prohibited_uses: '' + quality_control: '' + size: 65B parameters (dense) + training_emissions: '' + training_hardware: '' + training_time: '' + type: model + url: https://arxiv.org/abs/2302.13971 +- access: open analysis: Evaluated on standard academic benchmarks and internal Meta libraries. - size: 70B parameters (dense model) + created_date: 2023-07-18 dependencies: [] - training_emissions: 539 tCO2eq - training_time: '' - training_hardware: NVIDIA A100-80GB GPUs (TDP of 350-400W) - quality_control: '' - access: open - license: - value: custom - explanation: The license can be found at https://ai.meta.com/resources/models-and-libraries/llama-downloads/ + description: LLaMA 2 is an updated version of LLaMA trained on a new mix of publicly + available data. + feedback: '' intended_uses: LLaMA 2 is intended for commercial and research use in English. Tuned models are intended for assistant-like chat, whereas pretrained models can be adapted for a variety of natural language generation tasks. + license: + explanation: The license can be found at https://ai.meta.com/resources/models-and-libraries/llama-downloads/ + value: custom + modality: + explanation: text + value: text; text + model_card: Can be found at appendix of paper at https://ai.meta.com/research/publications/llama-2-open-foundation-and-fine-tuned-chat-models/ + monitoring: '' + name: LLaMA 2 + organization: Meta prohibited_uses: Use in any manner that violates applicable laws or regulations (including trade compliance laws). Use in languages other than English. Use in any other way that is prohibited by the Acceptable Use Policy and Licensing Agreement for LLaMA 2. - monitoring: '' + quality_control: '' + size: 70B parameters (dense) + training_emissions: 539 tCO2eq + training_hardware: NVIDIA A100-80GB GPUs (TDP of 350-400W) + training_time: '' + type: model + url: https://ai.meta.com/resources/models-and-libraries/llama/ +- access: open + analysis: '' + created_date: 2022-12-22 + dependencies: [OPT, OPT-IML Bench] + description: '' feedback: '' - -- type: model + intended_uses: '' + license: OPT-IML 175B License + modality: + explanation: text + value: text; text + model_card: '' + monitoring: '' name: OPT-IML organization: Meta - description: '' - created_date: 2022-12-22 - url: https://arxiv.org/abs/2212.12017 - model_card: '' - modality: text - analysis: '' + prohibited_uses: '' + quality_control: '' size: 175B parameters (dense) - dependencies: [OPT, OPT-IML Bench] training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: open - license: OPT-IML 175B License - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' - -- type: dataset - name: SA-1B - # General - organization: Meta - description: > - SA-1B (Segment Anything 1 Billion) is a dataset designed for training general-purpose - object segmentation models from open world images. It consists of 11M diverse, - high-resolution, privacy protecting images and 1.1B high-quality segmentation - masks. + training_time: '' + type: model + url: https://arxiv.org/abs/2212.12017 +- access: + explanation: "The full dataset can be downloaded at [[SA-1B Download]](https://ai.facebook.com/datasets/segment-anything-downloads/).\ + \ A 50k image preview of the full dataset is available [[here]](https://segment-anything.com/dataset/index.html).\n" + value: open + analysis: '' created_date: - value: 2023-04-05 explanation: The date the [[Meta blog post]](https://ai.facebook.com/blog/segment-anything-foundation-model-image-segmentation/) was released. - url: https://ai.facebook.com/datasets/segment-anything/ + value: 2023-04-05 datasheet: - value: https://arxiv.org/pdf/2304.02643.pdf#page=25 explanation: Datasheet can be found in the Appendix section of the Segment Anything paper. - modality: image - size: 11M images, 1.1B mask annotations - sample: [] - analysis: '' - # Construction + value: https://arxiv.org/pdf/2304.02643.pdf#page=25 dependencies: [] - license: - value: SA-1B Dataset Research License - explanation: SA-1B is released under a favorable license agreement for certain - research uses and with protections for researchers. See [[SA-1B Dataset Research - License]](https://ai.facebook.com/datasets/segment-anything-downloads/). + description: "SA-1B (Segment Anything 1 Billion) is a dataset designed for training\ + \ general-purpose object segmentation models from open world images. It consists\ + \ of 11M diverse, high-resolution, privacy protecting images and 1.1B high-quality\ + \ segmentation masks.\n" + excluded: + explanation: See [[Datasheet]](https://arxiv.org/pdf/2304.02643.pdf#page=25) + value: "\"We withheld ~2k randomly selected images for testing purposes.\" \ + \ \"Each image is accompanied by a short caption that describes the content\ + \ and place of the photo in a free form text. Per our agreement with the photo\ + \ provider we are not allowed to release these captions.\"\n" + feedback: Feedback can be given via the feedback form on their website [segment-anything.com](https://segment-anything.com/) + or by emailing at segment-anything at meta.com. included: - value: > - SA-1B consists of 11M diverse, high-resolution (averaging 1500×2250 pixels), - and privacy protecting images collected and licensed from a third party photo - company. The images are photos taken from a camera, i.e. not artwork. - The images vary in subject matter. Common themes of the images include: locations, - objects, scenes. - The dataset includes 1.1B high-quality segmentation masks collected with the - Segment Anything Data Engine. - SA-1B only includes automatically generated masks (99.1%), as the authors - conclude after experiments that the automatic masks are high quality and effective - for training models. The masks range from large scale objects such as buildings - to fine grained details such as door handles. - Masks are provided in the COCO run-length encoding (RLE) annotation format. explanation: According to section [[Segment Anything Dataset]](https://arxiv.org/pdf/2304.02643.pdf#section.5) of the paper and [[SA-1B website]](https://ai.facebook.com/datasets/segment-anything/). - excluded: - value: > - "We withheld ~2k randomly selected images for testing purposes." "Each image - is accompanied by a short caption that describes the content and place of - the photo in a free form text. Per our agreement with the photo provider we - are not allowed to release these captions." - explanation: See [[Datasheet]](https://arxiv.org/pdf/2304.02643.pdf#page=25) - quality_control: - value: | - - Dataset quality: - Due to potential accessibility and storage challenges, the original high-resolution images (averaging 3300×4950 pixels) were downsampled to an average resolution of 1500×2250 pixels. Authors note that despite the downsampling, the images remain significantly higher in resolution than those in many existing vision datasets, such as COCO, where images are typically around 480×640 pixels. - The images were processed to blur faces and license plates to protect the identities of those in the image. - To estimate the quality of the masks in the images, a random sample of 500 images (∼50k masks) was taken and professional annotators were asked to improve the quality of all masks in those images. - - Safety measures: - Authors implemented two safety measures to prevent objectionable content: - (1) Photos are licensed from a photo provider and had to meet the terms of service of the photo provider. Authors requested that all objectionable content be filtered from the images they licensed. - (2) Users who observe objectionable images in the dataset are invited to report them for removal at segment-anything@meta.com. - Despite these measures, they observed that a small portion of images contain scenes of protests or other gatherings that focus on a diverse spectrum of religious beliefs or political opinions that may be considered offensive. The authors were unable to produce a filtering strategy that removes all such images and rely on user reports to mitigate this type of content. - explanation: According to sections [[Segment Anything Dataset]](https://arxiv.org/pdf/2304.02643.pdf#section.5) - and [[Datasheet]](https://arxiv.org/pdf/2304.02643.pdf#page=25) of the paper. - # Downstream - access: - value: open - explanation: > - The full dataset can be downloaded at [[SA-1B Download]](https://ai.facebook.com/datasets/segment-anything-downloads/). - A 50k image preview of the full dataset is available [[here]](https://segment-anything.com/dataset/index.html). + value: "SA-1B consists of 11M diverse, high-resolution (averaging 1500×2250\ + \ pixels), and privacy protecting images collected and licensed from a third\ + \ party photo company. The images are photos taken from a camera, i.e. not\ + \ artwork. The images vary in subject matter. Common themes of the images\ + \ include: locations, objects, scenes. The dataset includes 1.1B high-quality\ + \ segmentation masks collected with the Segment Anything Data Engine. SA-1B\ + \ only includes automatically generated masks (99.1%), as the authors conclude\ + \ after experiments that the automatic masks are high quality and effective\ + \ for training models. The masks range from large scale objects such as buildings\ + \ to fine grained details such as door handles. Masks are provided in the\ + \ COCO run-length encoding (RLE) annotation format.\n" intended_uses: + explanation: See [[SA-1B website]](https://ai.facebook.com/datasets/segment-anything/) value: SA-1B is intended to be used for research purposes only. It allows access to a privacy protecting and copyright friendly large-scale image dataset. Researchers can use it to train and evaluate generic object segmentation models. - explanation: See [[SA-1B website]](https://ai.facebook.com/datasets/segment-anything/) - prohibited_uses: - value: > - Authors note the following limitations of the dataset: - The masks are generated by a segmentation model, so there may be errors - or inconsistencies in the masks. - While no two images are the same, there are instances of images of the same - subject taken close together in time. - The dataset contains scenes of protests, or other gatherings that may suggest - religious beliefs, political opinions or union memberships that may be offensive. - - explanation: See [[Datasheet]](https://arxiv.org/pdf/2304.02643.pdf#page=25) + license: + explanation: SA-1B is released under a favorable license agreement for certain + research uses and with protections for researchers. See [[SA-1B Dataset Research + License]](https://ai.facebook.com/datasets/segment-anything-downloads/). + value: SA-1B Dataset Research License + modality: image monitoring: - value: > - The dataset will be hosted at https://ai.facebook.com/datasets/segment-anything - and maintained by Meta AI. - "If a user observes objectionable image(s) in the dataset, we invite them - to report the image(s) at segment-anything at meta.com for removal" - "To aid reproducibility of research using SA-1B, the only updates (to the - dataset) will be to remove reported images." - "We encourage users to gather further annotations for SA-1B. Any users who - generate annotations will be liable for hosting and distributing their annotations." explanation: See [[Datasheet]](https://arxiv.org/pdf/2304.02643.pdf#page=25) - feedback: Feedback can be given via the feedback form on their website [segment-anything.com](https://segment-anything.com/) - or by emailing at segment-anything at meta.com. - -- type: model - name: SAM - # General + value: "The dataset will be hosted at https://ai.facebook.com/datasets/segment-anything\ + \ and maintained by Meta AI. \"If a user observes objectionable image(s) in\ + \ the dataset, we invite them to report the image(s) at segment-anything at\ + \ meta.com for removal\" \"To aid reproducibility of research using SA-1B,\ + \ the only updates (to the dataset) will be to remove reported images.\" \"\ + We encourage users to gather further annotations for SA-1B. Any users who\ + \ generate annotations will be liable for hosting and distributing their annotations.\"\ + \n" + name: SA-1B organization: Meta + prohibited_uses: + explanation: See [[Datasheet]](https://arxiv.org/pdf/2304.02643.pdf#page=25) + value: "Authors note the following limitations of the dataset:\n The masks\ + \ are generated by a segmentation model, so there may be errors\nor inconsistencies\ + \ in the masks.\n While no two images are the same, there are instances of\ + \ images of the same\nsubject taken close together in time.\n The dataset\ + \ contains scenes of protests, or other gatherings that may suggest\nreligious\ + \ beliefs, political opinions or union memberships that may be offensive.\n" + quality_control: + explanation: According to sections [[Segment Anything Dataset]](https://arxiv.org/pdf/2304.02643.pdf#section.5) + and [[Datasheet]](https://arxiv.org/pdf/2304.02643.pdf#page=25) of the paper. + value: "- Dataset quality:\n Due to potential accessibility and storage challenges,\ + \ the original high-resolution images (averaging 3300×4950 pixels) were downsampled\ + \ to an average resolution of 1500×2250 pixels. Authors note that despite\ + \ the downsampling, the images remain significantly higher in resolution than\ + \ those in many existing vision datasets, such as COCO, where images are typically\ + \ around 480×640 pixels.\n The images were processed to blur faces and license\ + \ plates to protect the identities of those in the image.\n To estimate the\ + \ quality of the masks in the images, a random sample of 500 images (∼50k\ + \ masks) was taken and professional annotators were asked to improve the quality\ + \ of all masks in those images.\n- Safety measures:\n Authors implemented\ + \ two safety measures to prevent objectionable content:\n (1) Photos are\ + \ licensed from a photo provider and had to meet the terms of service of the\ + \ photo provider. Authors requested that all objectionable content be filtered\ + \ from the images they licensed.\n (2) Users who observe objectionable\ + \ images in the dataset are invited to report them for removal at segment-anything@meta.com.\n\ + \ Despite these measures, they observed that a small portion of images contain\ + \ scenes of protests or other gatherings that focus on a diverse spectrum\ + \ of religious beliefs or political opinions that may be considered offensive.\ + \ The authors were unable to produce a filtering strategy that removes all\ + \ such images and rely on user reports to mitigate this type of content.\n" + sample: [] + size: 11M images, 1.1B mask annotations + type: dataset + url: https://ai.facebook.com/datasets/segment-anything/ +- access: + explanation: "Inference code and model checkpoints are available on the model's\ + \ [[GitHub repository]](https://github.com/facebookresearch/segment-anything).\ + \ Its training dataset SA-1B can be used for research purposes and is available\ + \ for download [here](https://ai.facebook.com/datasets/segment-anything-downloads/).\n" + value: open + analysis: + explanation: See [[Zero-Shot Transfer Experiments]](https://arxiv.org/pdf/2304.02643.pdf#section.7) + for more details. + value: "\"We extensively evaluate SAM. First, using a diverse new suite of 23\ + \ segmentation datasets, we find that SAM produces high-quality masks from\ + \ a single foreground point, often only slightly below that of the manually\ + \ annotated ground truth. Second, we find consistently strong quantitative\ + \ and qualitative results on a variety of downstream tasks under a zero-shot\ + \ transfer protocol using prompt engineering, including edge detection, object\ + \ proposal generation, instance segmentation, and a preliminary exploration\ + \ of text-to-mask prediction.\"\n" + created_date: + explanation: The date the [[Meta blog post]](https://ai.facebook.com/blog/segment-anything-foundation-model-image-segmentation/) + was released. + value: 2023-04-05 + dependencies: [SA-1B] description: SAM (Segment Anything Model) is a foundation model for image segmentation. The model is designed and trained to be promptable, and supports flexible prompts (point, box, mask and free-form text) to compute masks in real-time to allow interactive use. - created_date: - value: 2023-04-05 - explanation: The date the [[Meta blog post]](https://ai.facebook.com/blog/segment-anything-foundation-model-image-segmentation/) - was released. - url: https://arxiv.org/pdf/2304.02643.pdf + feedback: Feedback can be given via the feedback form on their website [segment-anything.com](https://segment-anything.com/) + or by emailing at segment-anything at meta.com. + intended_uses: + explanation: See [[Model card]](https://arxiv.org/pdf/2304.02643.pdf#page=28) + value: "\"SAM is intended to be used for any prompt-based segmentation task.\ + \ We explored its use in segmenting objects from a point, edge detection,\ + \ segmenting all objects, and segmenting detected objects. We explored how\ + \ SAM can integrate with other vision models to segment objects from text.\"\ + \n" + license: + explanation: See [[LICENSE]](https://github.com/facebookresearch/segment-anything/blob/main/LICENSE) + value: Apache 2.0 + modality: + explanation: image, text + value: image, text; image, text model_card: - value: https://arxiv.org/pdf/2304.02643.pdf#page=28 explanation: Model card can be found in the Appendix section of the paper. - modality: image, text - analysis: - value: > - "We extensively evaluate SAM. - First, using a diverse new suite of 23 segmentation datasets, we find that - SAM produces high-quality masks from a single foreground point, often only - slightly below that of the manually annotated ground truth. - Second, we find consistently strong quantitative and qualitative results on - a variety of downstream tasks under a zero-shot transfer protocol using prompt - engineering, including edge detection, object proposal generation, instance - segmentation, and a preliminary exploration of text-to-mask prediction." - explanation: See [[Zero-Shot Transfer Experiments]](https://arxiv.org/pdf/2304.02643.pdf#section.7) + value: https://arxiv.org/pdf/2304.02643.pdf#page=28 + monitoring: '' + name: SAM + organization: Meta + prohibited_uses: + explanation: See [[Discussion]](https://arxiv.org/pdf/2304.02643.pdf#section.8) + value: "For out-of-scope use cases see terms of use in [[LICENSE]](https://github.com/facebookresearch/segment-anything/blob/main/LICENSE).\ + \ Authors also discuss the following limitations of the model: \"While SAM\ + \ performs well in general, it is not perfect. It can miss fine structures,\ + \ hallucinates small disconnected components at times, and does not produce\ + \ boundaries as crisply as more computationally intensive methods that “zoom-in”,\ + \ e.g. [18]. In general, we expect dedicated interactive segmentation methods\ + \ to outperform SAM when many points are provided, e.g. [67]. Unlike these\ + \ methods, SAM is designed for generality and breadth of use rather than high\ + \ IoU interactive segmentation. Moreover, SAM can process prompts in real-time,\ + \ but nevertheless SAM's overall performance is not real-time when using a\ + \ heavy image encoder. Our foray into the text-to-mask task is exploratory\ + \ and not entirely robust, although we believe it can be improved with more\ + \ effort. While SAM can perform many tasks, it is unclear how to design simple\ + \ prompts that implement semantic and panoptic segmentation. Finally, there\ + \ are domain-specific tools, such as [7], that we expect to outperform SAM\ + \ in their respective domains.\"\n" + quality_control: + explanation: See [[Segment Anything RAI Analysis]](https://arxiv.org/pdf/2304.02643.pdf#section.6) for more details. + value: "\"We perform a Responsible AI (RAI) analysis of our work by investigating\ + \ potential fairness concerns and biases when using SA-1B and SAM. We focus\ + \ on the geographic and income distribution of SA-1B and fairness of SAM across\ + \ protected attributes of people.\"\n" size: unknown - # Construction - dependencies: [SA-1B] training_emissions: - value: 2.8 metric tons of carbon dioxide - explanation: See [[Model card]](https://arxiv.org/pdf/2304.02643.pdf#page=28) - training_time: - value: 68 hours explanation: See [[Model card]](https://arxiv.org/pdf/2304.02643.pdf#page=28) + value: 2.8 metric tons of carbon dioxide training_hardware: - value: 256 A100 GPUs explanation: See [[Model card]](https://arxiv.org/pdf/2304.02643.pdf#page=28) - quality_control: - value: > - "We perform a Responsible AI (RAI) analysis of our work by investigating potential - fairness concerns and biases when using SA-1B and SAM. We focus on the geographic - and income distribution of SA-1B and fairness of SAM across protected attributes - of people." - explanation: See [[Segment Anything RAI Analysis]](https://arxiv.org/pdf/2304.02643.pdf#section.6) - for more details. - # Downstream - access: - value: open - explanation: > - Inference code and model checkpoints are available on the model's - [[GitHub repository]](https://github.com/facebookresearch/segment-anything). - Its training dataset SA-1B can be used for research purposes and is available - for download [here](https://ai.facebook.com/datasets/segment-anything-downloads/). - license: - value: Apache 2.0 - explanation: See [[LICENSE]](https://github.com/facebookresearch/segment-anything/blob/main/LICENSE) - intended_uses: - value: > - "SAM is intended to be used for any prompt-based segmentation task. We explored - its use in segmenting objects from a point, edge detection, segmenting all - objects, and segmenting detected objects. We explored how SAM can integrate - with other vision models to segment objects from text." + value: 256 A100 GPUs + training_time: explanation: See [[Model card]](https://arxiv.org/pdf/2304.02643.pdf#page=28) - prohibited_uses: - value: > - For out-of-scope use cases see terms of use in [[LICENSE]](https://github.com/facebookresearch/segment-anything/blob/main/LICENSE). - Authors also discuss the following limitations of the model: "While SAM performs - well in general, it is not perfect. It can miss fine structures, hallucinates - small disconnected components at times, and does not produce boundaries as - crisply as more computationally intensive methods that “zoom-in”, e.g. [18]. - In general, we expect dedicated interactive segmentation methods to outperform - SAM when many points are provided, e.g. [67]. Unlike these methods, SAM is - designed for generality and breadth of use rather than high IoU interactive - segmentation. Moreover, SAM can process prompts in real-time, but nevertheless - SAM's overall performance is not real-time when using a heavy image encoder. - Our foray into the text-to-mask task is exploratory and not entirely robust, - although we believe it can be improved with more effort. While SAM can perform - many tasks, it is unclear how to design simple prompts that implement semantic - and panoptic segmentation. Finally, there are domain-specific tools, such - as [7], that we expect to outperform SAM in their respective domains." - explanation: See [[Discussion]](https://arxiv.org/pdf/2304.02643.pdf#section.8) - monitoring: '' - feedback: Feedback can be given via the feedback form on their website [segment-anything.com](https://segment-anything.com/) - or by emailing at segment-anything at meta.com. - -- type: model - name: Voicebox - organization: Meta - description: Voicebox is the first generative AI model for speech to generalize - across tasks with state-of-the-art performance. - created_date: 2023-06-16 - url: https://research.facebook.com/publications/voicebox-text-guided-multilingual-universal-speech-generation-at-scale/ - model_card: '' - modality: text and audio input and audio output + value: 68 hours + type: model + url: https://arxiv.org/pdf/2304.02643.pdf +- access: closed analysis: Evaluated on zero-shot text-to-speech benchmarks, with Voicebox outperforming the current state-of-the-art English model VALL-E. - size: 330M parameters + created_date: 2023-06-16 dependencies: [] - training_emissions: unknown - training_time: 750,000 iterations - training_hardware: 32 GPUs of unspecified type - quality_control: '' - access: closed - license: '' + description: Voicebox is the first generative AI model for speech to generalize + across tasks with state-of-the-art performance. + feedback: '' intended_uses: '' - prohibited_uses: '' + license: '' + modality: + explanation: text and audio input and audio output + value: audio, text; audio + model_card: '' monitoring: '' - feedback: '' - -- type: model - name: PEER + name: Voicebox organization: Meta - description: PEER is a collaborative language model that is trained to imitate - the entire writing process itself. PEER can write drafts, add suggestions, propose - edits and provide explanations for its actions. - created_date: 2022-08-24 - url: https://arxiv.org/pdf/2208.11663.pdf - model_card: '' - modality: natural language text + prohibited_uses: '' + quality_control: '' + size: 330M parameters (dense) + training_emissions: unknown + training_hardware: 32 GPUs of unspecified type + training_time: 750,000 iterations + type: model + url: https://research.facebook.com/publications/voicebox-text-guided-multilingual-universal-speech-generation-at-scale/ +- access: open analysis: PEER is evaluated on core research questions intended to gauge language understanding, proper use of citations, instruction following, and iterative use. - size: 3B parameters + created_date: 2022-08-24 dependencies: [] - training_emissions: '' - training_time: '' - training_hardware: 64 GPUs - quality_control: Heuristics and edit filtering was used on data set, which consisted - mostly of Wikipedia pages. - access: open - license: '' + description: PEER is a collaborative language model that is trained to imitate + the entire writing process itself. PEER can write drafts, add suggestions, propose + edits and provide explanations for its actions. + feedback: '' intended_uses: adapting LLMs to work with collaborative writing and updating. - prohibited_uses: '' + license: '' + modality: + explanation: natural language text + value: text; text + model_card: '' monitoring: '' - feedback: '' + name: PEER + organization: Meta + prohibited_uses: '' + quality_control: Heuristics and edit filtering was used on data set, which consisted + mostly of Wikipedia pages. + size: 3B parameters (dense) + training_emissions: '' + training_hardware: 64 GPUs + training_time: '' + type: model + url: https://arxiv.org/pdf/2208.11663.pdf diff --git a/assets/microsoft.yaml b/assets/microsoft.yaml index 8d324607..773e3bb4 100644 --- a/assets/microsoft.yaml +++ b/assets/microsoft.yaml @@ -1,17 +1,13 @@ --- - -- type: model - name: VLMo - organization: Microsoft - description: VLMo is a model for text-to-image generation +- access: + explanation: Microsoft does not provide VLMo to external researchers. One author + commented that code would be pushed "soon" in [[November of 2021]](https://github.com/microsoft/unilm/issues/532), + but the repository does not contain relevant changes. + value: closed + analysis: '' created_date: - value: 2021-11-03 explanation: The date the model paper was released - url: https://arxiv.org/abs/2111.02358 - model_card: none - modality: text (English) and image - size: 562M - analysis: '' + value: 2021-11-03 dependencies: - Conceptual Captions - SBU Captions @@ -19,300 +15,263 @@ - Visual Genome - Wikipedia - BooksCorpus - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: - value: closed - explanation: Microsoft does not provide VLMo to external researchers. One author - commented that code would be pushed "soon" in [[November of 2021]](https://github.com/microsoft/unilm/issues/532), - but the repository does not contain relevant changes. + description: VLMo is a model for text-to-image generation + feedback: '' + intended_uses: '' license: - value: none explanation: No license was found. - intended_uses: '' - prohibited_uses: '' + value: none + modality: + explanation: text (English) and image + value: image, text; image, text + model_card: none monitoring: '' - feedback: '' - -- type: model - name: T-ULRv5 + name: VLMo organization: Microsoft - description: T-ULRv5 is a language model trained with two unique training objectives + prohibited_uses: '' + quality_control: '' + size: 562M parameters (dense) + training_emissions: '' + training_hardware: '' + training_time: '' + type: model + url: https://arxiv.org/abs/2111.02358 +- access: + explanation: Manual approval through early access request form required. + value: limited + analysis: '' created_date: - value: 2022-09-28 explanation: Date model blog post was released - url: https://www.microsoft.com/en-us/research/blog/microsoft-turing-universal-language-representation-model-t-ulrv5-tops-xtreme-leaderboard-and-trains-100x-faster/ - model_card: '' - modality: Text (English, 93 other languages) - size: 2.2B parameters (dense model) - analysis: '' + value: 2022-09-28 dependencies: [] + description: T-ULRv5 is a language model trained with two unique training objectives + feedback: '' + intended_uses: '' + license: + explanation: "The asset isn't released, and hence the license is unknown.\n" + value: unknown + modality: + explanation: Text (English, 93 other languages) + value: text; text + model_card: '' + monitoring: '' + name: T-ULRv5 + organization: Microsoft + prohibited_uses: '' + quality_control: '' + size: 2.2B parameters (dense) training_emissions: '' - training_time: Less than two weeks training_hardware: 256 A100 - quality_control: '' - access: - value: limited + training_time: Less than two weeks + type: model + url: https://www.microsoft.com/en-us/research/blog/microsoft-turing-universal-language-representation-model-t-ulrv5-tops-xtreme-leaderboard-and-trains-100x-faster/ +- access: explanation: Manual approval through early access request form required. + value: limited + analysis: '' + created_date: + explanation: Date model blog post was released + value: 2021-12-02 + dependencies: [] + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: text (English) + value: text; text + model_card: '' monitoring: '' - feedback: '' - -- type: model name: Turing NLR-v5 organization: Microsoft - description: '' - created_date: - value: 2021-12-02 - explanation: Date model blog post was released - url: https://www.microsoft.com/en-us/research/blog/efficiently-and-effectively-scaling-up-language-model-pretraining-for-best-language-representation-model-on-glue-and-superglue/?OCID=msr_blog_TNLRV5_tw - model_card: '' - modality: text (English) - analysis: '' - size: 5B parameters (dense model) - dependencies: [] + prohibited_uses: '' + quality_control: '' + size: 5B parameters (dense) training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: + training_time: '' + type: model + url: https://www.microsoft.com/en-us/research/blog/efficiently-and-effectively-scaling-up-language-model-pretraining-for-best-language-representation-model-on-glue-and-superglue/?OCID=msr_blog_TNLRV5_tw +- access: + explanation: Megatron-Turing NLG can be accessed through the [[Turing Academic + Program]](https://www.microsoft.com/en-us/research/collaboration/microsoft-turing-academic-program/) value: limited - explanation: Manual approval through early access request form required. + analysis: '' + created_date: + explanation: "The date of the Microsoft Research blog announcing MT-NLG [[Microsoft\ + \ Research Blog]](https://www.microsoft.com/en-us/research/blog/using-deepspeed-and-megatron-to-train-megatron-turing-nlg-530b-the-worlds-largest-and-most-powerful-generative-language-model/).\n" + value: 2022-01-28 + dependencies: [The Pile] + description: "Megatron-Turing NLG is a 530B parameter autoregressive language\ + \ model.\n" + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: text (English) + value: text; text + model_card: '' monitoring: '' - feedback: '' - -- type: model name: Megatron-Turing NLG organization: Microsoft, NVIDIA - description: > - Megatron-Turing NLG is a 530B parameter autoregressive language model. - created_date: - value: 2022-01-28 - explanation: > - The date of the Microsoft Research blog announcing MT-NLG - [[Microsoft Research Blog]](https://www.microsoft.com/en-us/research/blog/using-deepspeed-and-megatron-to-train-megatron-turing-nlg-530b-the-worlds-largest-and-most-powerful-generative-language-model/). - url: https://arxiv.org/abs/2201.11990 - model_card: '' - modality: text (English) - size: 530B parameters (dense model) - analysis: '' - dependencies: [The Pile] + prohibited_uses: '' + quality_control: '' + size: 530B parameters (dense) training_emissions: '' - training_time: '' training_hardware: 4480 A100s (560 x 8) - quality_control: '' - access: - value: limited - explanation: Megatron-Turing NLG can be accessed through the [[Turing Academic - Program]](https://www.microsoft.com/en-us/research/collaboration/microsoft-turing-academic-program/) - license: - value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' - -- type: model - name: VALL-E - organization: Microsoft - description: Vall-E is a neural code model for text-to-speech synthesis + training_time: '' + type: model + url: https://arxiv.org/abs/2201.11990 +- access: + explanation: Microsoft does not provide public access to VALL-E + value: closed + analysis: '' created_date: - value: 2023-01-05 explanation: The date the [[model paper]](https://arxiv.org/abs/2301.02111) was released - url: https://valle-demo.github.io/ - model_card: none - modality: text, audio - size: unknown - analysis: '' + value: 2023-01-05 dependencies: [] - training_emissions: '' - training_time: '' - training_hardware: 16 V100 32GB GPUs - quality_control: '' - access: - value: closed - explanation: Microsoft does not provide public access to VALL-E + description: Vall-E is a neural code model for text-to-speech synthesis + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: text, audio + value: audio, text; audio, text + model_card: none monitoring: '' - feedback: '' - -# Applications - -- type: application - name: GitHub CoPilot - # General + name: VALL-E organization: Microsoft - description: > - GitHub CoPilot is a coding pair programmer assisting programmers as they - write code. + prohibited_uses: '' + quality_control: '' + size: unknown + training_emissions: '' + training_hardware: 16 V100 32GB GPUs + training_time: '' + type: model + url: https://valle-demo.github.io/ +- access: + explanation: "The feature is available to developers in a restricted technical\ + \ preview [[GitHub CoPilot]](https://copilot.github.com/).\n" + value: limited + adaptation: unknown created_date: + explanation: "Date of the blog post introducing CoPilot [[GitHub Blog Post]]\ + \ (https://github.blog/2021-06-29-introducing-github-copilot-ai-pair-programmer/).\n" value: 2021-06-29 - explanation: > - Date of the blog post introducing CoPilot - [[GitHub Blog Post]] - (https://github.blog/2021-06-29-introducing-github-copilot-ai-pair-programmer/). - url: https://copilot.github.com/ - # Construction dependencies: [Codex] - adaptation: unknown - output_space: Code completions - quality_control: > - GitHub is working on a filter to detect and suppress code generations that - are verbatim from the training set - [[GitHub Research Recitation]] - (https://docs.github.com/en/github/copilot/research-recitation). - According to the FAQ, GitHub implemented a simple filter that blocks emails - in standard formats to protect personally identifiable data that may be - present in the training data - [[GitHub CoPilot]](https://copilot.github.com/). - # Downstream - access: - value: limited - explanation: > - The feature is available to developers in a restricted - technical preview - [[GitHub CoPilot]](https://copilot.github.com/). + description: "GitHub CoPilot is a coding pair programmer assisting programmers\ + \ as they write code.\n" + failures: unknown + feedback: "Feedback can be provided in the CoPilot feedback project [[CoPilot\ + \ feedback]] (https://github.com/github/feedback/discussions/categories/copilot-feedback).\n" + intended_uses: "GitHub CoPilot is intended to be used as a coding assistant.\n" license: + explanation: "We weren't able to locate the license.\n" value: unknown - explanation: > - We weren't able to locate the license. - terms_of_service: > - https://docs.github.com/en/site-policy/github-terms/github-terms-of-service - intended_uses: > - GitHub CoPilot is intended to be used as a coding assistant. - prohibited_uses: > - Access to GPT-3 is governed by GitHub Acceptable Use Policies and Terms of - Service, both of which list a set of prohibited uses - [[Use Policies]] - (https://docs.github.com/en/site-policy/acceptable-use-policies/github-acceptable-use-policies) - [[Terms of Service]] - (https://docs.github.com/en/site-policy/github-terms/github-terms-of-service). - monitoring: > - value: unknown - explanation: > - There may be internal monitoring mechanisms unknown to the public. - feedback: > - Feedback can be provided in the CoPilot feedback project - [[CoPilot feedback]] - (https://github.com/github/feedback/discussions/categories/copilot-feedback). - # Deployment - monthly_active_users: > - GitHub Copilot reportedly has over 1 million sign-ups - [[Tweet Source]](https://twitter.com/sama/status/1539737789310259200?s=21&t=YPaYd0ZueJzrR6rLslUqzg). - user_distribution: unknown - failures: unknown - -- type: model - name: BioGPT + monitoring: "value: unknown explanation: >\n There may be internal monitoring\ + \ mechanisms unknown to the public.\n" + monthly_active_users: "GitHub Copilot reportedly has over 1 million sign-ups [[Tweet\ + \ Source]](https://twitter.com/sama/status/1539737789310259200?s=21&t=YPaYd0ZueJzrR6rLslUqzg).\n" + name: GitHub CoPilot organization: Microsoft - description: '' - created_date: 2022-09-24 - url: https://academic.oup.com/bib/article/23/6/bbac409/6713511?guestAccessKey=a66d9b5d-4f83-4017-bb52-405815c907b9&login=true - model_card: '' - modality: text + output_space: Code completions + prohibited_uses: "Access to GPT-3 is governed by GitHub Acceptable Use Policies\ + \ and Terms of Service, both of which list a set of prohibited uses [[Use Policies]]\ + \ (https://docs.github.com/en/site-policy/acceptable-use-policies/github-acceptable-use-policies)\ + \ [[Terms of Service]] (https://docs.github.com/en/site-policy/github-terms/github-terms-of-service).\n" + quality_control: "GitHub is working on a filter to detect and suppress code generations\ + \ that are verbatim from the training set [[GitHub Research Recitation]] (https://docs.github.com/en/github/copilot/research-recitation).\ + \ According to the FAQ, GitHub implemented a simple filter that blocks emails\ + \ in standard formats to protect personally identifiable data that may be present\ + \ in the training data [[GitHub CoPilot]](https://copilot.github.com/).\n" + terms_of_service: "https://docs.github.com/en/site-policy/github-terms/github-terms-of-service\n" + type: application + url: https://copilot.github.com/ + user_distribution: unknown +- access: open analysis: '' - size: 1.5B parameters (dense model) + created_date: 2022-09-24 dependencies: [PubMed] - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: open - license: MIT + description: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: MIT + modality: + explanation: text + value: text; text + model_card: '' monitoring: '' - feedback: '' - -- type: application - name: Microsoft Teams + name: BioGPT organization: Microsoft + prohibited_uses: '' + quality_control: '' + size: 1.5B parameters (dense) + training_emissions: '' + training_hardware: '' + training_time: '' + type: model + url: https://academic.oup.com/bib/article/23/6/bbac409/6713511?guestAccessKey=a66d9b5d-4f83-4017-bb52-405815c907b9&login=true +- access: open + adaptation: '' + created_date: 2023-02-01 + dependencies: [ChatGPT API] description: Microsoft Teams Premium is generally available. Built on the familiar, all-in-one collaborative experience of Microsoft Teams, Teams Premium brings the latest technologies, powered by OpenAI’s GPT-3.5, to make meetings more intelligent, personalized, and protected—whether it’s one-on-one, large meetings, virtual appointments, or webinars. - created_date: 2023-02-01 - url: https://www.microsoft.com/en-us/microsoft-365/blog/2023/02/01/microsoft-teams-premium-cut-costs-and-add-ai-powered-productivity/ - dependencies: [ChatGPT API] - adaptation: '' - output_space: '' - quality_control: '' - access: open + failures: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - terms_of_service: '' - intended_uses: '' - prohibited_uses: '' monitoring: '' - feedback: '' monthly_active_users: '' - user_distribution: '' - failures: '' - -- type: application - name: Bing Search + name: Microsoft Teams organization: Microsoft + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: '' + type: application + url: https://www.microsoft.com/en-us/microsoft-365/blog/2023/02/01/microsoft-teams-premium-cut-costs-and-add-ai-powered-productivity/ + user_distribution: '' +- access: limited + adaptation: unknown + created_date: 2023-02-07 + dependencies: [ChatGPT API] description: AI-powered Bing search engine and Edge browser, available in preview now at Bing.com, to deliver better search, more complete answers, a new chat experience and the ability to generate content. We think of these tools as an AI copilot for the web. - created_date: 2023-02-07 - url: https://blogs.microsoft.com/blog/2023/02/07/reinventing-search-with-a-new-ai-powered-microsoft-bing-and-edge-your-copilot-for-the-web/ - dependencies: [ChatGPT API] - adaptation: unknown - output_space: Search results - quality_control: '' - access: limited + failures: '' + feedback: "Feedback can be submitted at [bing.com](bing.com).\n" + intended_uses: Search engine license: + explanation: "Software license as described in the Terms of Service document.\n" value: custom - explanation: > - Software license as described in the Terms of Service document. - terms_of_service: https://www.microsoft.com/legal/terms-of-use - intended_uses: Search engine - prohibited_uses: '' monitoring: '' - feedback: > - Feedback can be submitted at [bing.com](bing.com). monthly_active_users: '' - user_distribution: '' - failures: '' - -- type: model - name: KOSMOS-1 + name: Bing Search organization: Microsoft - description: KOSMOS-1 is a multimodal language model that is capable of perceiving - multimodal input, following instructions, and performing in-context learning - for not only language tasks but also multimodal tasks. - created_date: 2023-03-01 - url: https://arxiv.org/pdf/2302.14045.pdf - model_card: '' - modality: text, image + output_space: Search results + prohibited_uses: '' + quality_control: '' + terms_of_service: https://www.microsoft.com/legal/terms-of-use + type: application + url: https://blogs.microsoft.com/blog/2023/02/07/reinventing-search-with-a-new-ai-powered-microsoft-bing-and-edge-your-copilot-for-the-web/ + user_distribution: '' +- access: closed analysis: '' - size: 1.6B parameters (dense model) + created_date: 2023-03-01 dependencies: - The Pile - CommonCrawl @@ -320,439 +279,427 @@ - LAION-400M - COYO-700M - Conceptual Captions - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: closed - license: MIT + description: KOSMOS-1 is a multimodal language model that is capable of perceiving + multimodal input, following instructions, and performing in-context learning + for not only language tasks but also multimodal tasks. + feedback: '' intended_uses: '' - prohibited_uses: '' + license: MIT + modality: + explanation: text, image + value: image, text; image, text + model_card: '' monitoring: '' - feedback: '' - -- type: model - name: Prometheus + name: KOSMOS-1 organization: Microsoft + prohibited_uses: '' + quality_control: '' + size: 1.6B parameters (dense) + training_emissions: '' + training_hardware: '' + training_time: '' + type: model + url: https://arxiv.org/pdf/2302.14045.pdf +- access: closed + analysis: '' + created_date: 2023-02-07 + dependencies: [] description: In the context of Bing, we have developed a proprietary way of working with the OpenAI model that allows us to best leverage its power. We call this collection of capabilities and techniques the Prometheus model. This combination gives you more relevant, timely and targeted results, with improved safety. - created_date: 2023-02-07 - url: https://blogs.microsoft.com/blog/2023/02/07/reinventing-search-with-a-new-ai-powered-microsoft-bing-and-edge-your-copilot-for-the-web/ + feedback: '' + intended_uses: '' + license: + explanation: "We weren't able to locate a license.\n" + value: unknown + modality: {} model_card: '' - modality: unknown - analysis: '' + monitoring: '' + name: Prometheus + organization: Microsoft + prohibited_uses: '' + quality_control: '' size: unknown - dependencies: [] training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: closed + training_time: '' + type: model + url: https://blogs.microsoft.com/blog/2023/02/07/reinventing-search-with-a-new-ai-powered-microsoft-bing-and-edge-your-copilot-for-the-web/ +- access: closed + analysis: '' + created_date: 2022-11-23 + dependencies: [FLD-900M] + description: '' + feedback: '' + intended_uses: '' license: + explanation: "We weren't able to locate a license.\n" value: unknown - explanation: > - We weren't able to locate a license. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: text, image + value: image, text; image, text + model_card: '' monitoring: '' - feedback: '' - -- type: model name: Florence organization: Microsoft - description: '' - created_date: 2022-11-23 - url: https://arxiv.org/abs/2111.11432 - model_card: '' - modality: text, image - analysis: '' - size: 900M parameters (637M image, 256M text) - dependencies: [FLD-900M] + prohibited_uses: '' + quality_control: '' + size: 900M parameters (dense) training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: closed - license: - value: unknown - explanation: > - We weren't able to locate a license. - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' - -- type: dataset - name: FLD-900M - organization: Microsoft - description: '' - created_date: 2022-11-23 + training_time: '' + type: model url: https://arxiv.org/abs/2111.11432 - datasheet: '' - modality: text, image - size: 900M image-text pairs - sample: [] +- access: closed analysis: '' + created_date: 2022-11-23 + datasheet: '' dependencies: [] - included: '' + description: '' excluded: '' - quality_control: '' - access: closed + feedback: '' + included: '' + intended_uses: '' license: + explanation: "We weren't able to locate a license.\n" value: unknown - explanation: > - We weren't able to locate a license. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: text, image + value: image, text monitoring: '' - feedback: '' - -- type: application - name: Azure Cognitive Services for Vision + name: FLD-900M organization: Microsoft + prohibited_uses: '' + quality_control: '' + sample: [] + size: 900M image-text pairs + type: dataset + url: https://arxiv.org/abs/2111.11432 +- access: limited + adaptation: '' + created_date: 2023-03-07 + dependencies: [Florence] description: Cost-effective, production-ready computer vision services in Azure Cognitive Service for Vision. The improved Vision Services enables developers to create cutting-edge, market-ready, responsible computer vision applications across various industries. - created_date: 2023-03-07 - url: https://azure.microsoft.com/en-us/blog/announcing-a-renaissance-in-computer-vision-ai-with-microsofts-florence-foundation-model/?utm_content=buffer16fa0&utm_medium=social&utm_source=twitter.com&utm_campaign=buffer - dependencies: [Florence] - adaptation: '' - output_space: '' - quality_control: '' - access: limited + failures: '' + feedback: '' + intended_uses: '' license: + explanation: "Software license in the Microsoft Terms of Use govern the license\ + \ rules for Azure services as outlined in the Terms of Service document.\n" value: custom - explanation: > - Software license in the Microsoft Terms of Use govern the license rules - for Azure services as outlined in the Terms of Service document. - terms_of_service: https://azure.microsoft.com/en-us/support/legal/ - intended_uses: '' - prohibited_uses: '' monitoring: '' - feedback: '' monthly_active_users: '' - user_distribution: '' - failures: '' - -- type: model - name: VisualChatGPT + name: Azure Cognitive Services for Vision organization: Microsoft - description: '' - created_date: 2023-03-08 - url: https://arxiv.org/pdf/2303.04671.pdf - model_card: '' - modality: text, image + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: https://azure.microsoft.com/en-us/support/legal/ + type: application + url: https://azure.microsoft.com/en-us/blog/announcing-a-renaissance-in-computer-vision-ai-with-microsofts-florence-foundation-model/?utm_content=buffer16fa0&utm_medium=social&utm_source=twitter.com&utm_campaign=buffer + user_distribution: '' +- access: closed analysis: '' - size: unknown + created_date: 2023-03-08 dependencies: [OpenAI API] - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: closed + description: '' + feedback: '' + intended_uses: '' license: - value: none explanation: No license was found. - intended_uses: '' - prohibited_uses: '' + value: none + modality: + explanation: text, image + value: image, text; image, text + model_card: '' monitoring: '' - feedback: '' - -- type: application - name: Microsoft 365 Copilot + name: VisualChatGPT organization: Microsoft + prohibited_uses: '' + quality_control: '' + size: unknown + training_emissions: '' + training_hardware: '' + training_time: '' + type: model + url: https://arxiv.org/pdf/2303.04671.pdf +- access: limited + adaptation: '' + created_date: 2023-03-16 + dependencies: [GPT-4 API] description: It combines the power of language models with your data in the Microsoft Graph and the Microsoft 365 apps to turn your words into the most powerful productivity tool on the planet. - created_date: 2023-03-16 - url: https://blogs.microsoft.com/blog/2023/03/16/introducing-microsoft-365-copilot-your-copilot-for-work/ - dependencies: [GPT-4 API] - adaptation: '' - output_space: '' - quality_control: '' - access: limited + failures: '' + feedback: '' + intended_uses: '' license: + explanation: "Software license as described in the Terms of Service document.\n" value: custom - explanation: > - Software license as described in the Terms of Service document. - terms_of_service: https://www.microsoft.com/legal/terms-of-use - intended_uses: '' - prohibited_uses: '' monitoring: '' - feedback: '' monthly_active_users: '' - user_distribution: '' - failures: '' - -- type: application - name: Microsoft Business Chat + name: Microsoft 365 Copilot organization: Microsoft + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: https://www.microsoft.com/legal/terms-of-use + type: application + url: https://blogs.microsoft.com/blog/2023/03/16/introducing-microsoft-365-copilot-your-copilot-for-work/ + user_distribution: '' +- access: limited + adaptation: '' + created_date: 2023-03-16 + dependencies: [Microsoft 365 Copilot] description: Business Chat works across the langugae model, the Microsoft 365 apps, and your data — your calendar, emails, chats, documents, meetings and contacts — to do things you’ve never been able to do before. You can give it natural language prompts like “Tell my team how we updated the product strategy,” and it will generate a status update based on the morning’s meetings, emails and chat threads. - created_date: 2023-03-16 - url: https://blogs.microsoft.com/blog/2023/03/16/introducing-microsoft-365-copilot-your-copilot-for-work/ - dependencies: [Microsoft 365 Copilot] - adaptation: '' - output_space: '' - quality_control: '' - access: limited + failures: '' + feedback: '' + intended_uses: '' license: + explanation: "Software license as described in the Terms of Service document.\n" value: custom - explanation: > - Software license as described in the Terms of Service document. - terms_of_service: https://www.microsoft.com/legal/terms-of-use - intended_uses: '' - prohibited_uses: '' monitoring: '' - feedback: '' monthly_active_users: '' + name: Microsoft Business Chat + organization: Microsoft + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: https://www.microsoft.com/legal/terms-of-use + type: application + url: https://blogs.microsoft.com/blog/2023/03/16/introducing-microsoft-365-copilot-your-copilot-for-work/ user_distribution: '' +- access: open + adaptation: '' + created_date: unknown + dependencies: [Microsoft 365 Copilot] + description: Microsoft Excel is the industry leading spreadsheet software program, + a powerful data visualization and analysis tool. failures: '' - -- type: application + feedback: '' + intended_uses: '' + license: + explanation: "Software license as described in the Terms of Service document.\n" + value: custom + monitoring: '' + monthly_active_users: '' name: Microsoft Excel organization: Microsoft - description: Microsoft Excel is the industry leading spreadsheet software program, - a powerful data visualization and analysis tool. - created_date: unknown - url: https://www.microsoft.com/en-us/microsoft-365/excel - dependencies: [Microsoft 365 Copilot] - adaptation: '' output_space: '' + prohibited_uses: '' quality_control: '' - access: open - license: - value: custom - explanation: > - Software license as described in the Terms of Service document. terms_of_service: https://www.microsoft.com/legal/terms-of-use + type: application + url: https://www.microsoft.com/en-us/microsoft-365/excel + user_distribution: '' +- access: open + adaptation: '' + created_date: unknown + dependencies: [Microsoft 365 Copilot] + description: Microsoft Outlook is a personal information manager software system + from Microsoft, available as a part of the Microsoft Office and Microsoft 365 + software suites. + failures: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: + explanation: "Software license as described in the Terms of Service document.\n" + value: custom monitoring: '' - feedback: '' monthly_active_users: '' - user_distribution: '' - failures: '' - -- type: application name: Microsoft Outlook organization: Microsoft - description: Microsoft Outlook is a personal information manager software system - from Microsoft, available as a part of the Microsoft Office and Microsoft 365 - software suites. - created_date: unknown - url: https://www.microsoft.com/en-us/microsoft-365/outlook/email-and-calendar-software-microsoft-outlook - dependencies: [Microsoft 365 Copilot] - adaptation: '' output_space: '' + prohibited_uses: '' quality_control: '' - access: open - license: - value: custom - explanation: > - Software license as described in the Terms of Service document. terms_of_service: https://www.microsoft.com/legal/terms-of-use + type: application + url: https://www.microsoft.com/en-us/microsoft-365/outlook/email-and-calendar-software-microsoft-outlook + user_distribution: '' +- access: limited + adaptation: '' + created_date: unknown + dependencies: [Microsoft 365 Copilot] + description: Microsoft Power Platform is a line of business intelligence, app + development, and app connectivity software applications. + failures: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: + explanation: "Software license as described in the Terms of Service document.\n" + value: custom monitoring: '' - feedback: '' monthly_active_users: '' - user_distribution: '' - failures: '' - -- type: application name: Microsoft Power Platform organization: Microsoft - description: Microsoft Power Platform is a line of business intelligence, app - development, and app connectivity software applications. - created_date: unknown - url: https://powerplatform.microsoft.com/en-us/ - dependencies: [Microsoft 365 Copilot] - adaptation: '' output_space: '' + prohibited_uses: '' quality_control: '' - access: limited - license: - value: custom - explanation: > - Software license as described in the Terms of Service document. terms_of_service: https://www.microsoft.com/legal/terms-of-use + type: application + url: https://powerplatform.microsoft.com/en-us/ + user_distribution: '' +- access: open + adaptation: '' + created_date: unknown + dependencies: [Microsoft 365 Copilot] + description: Microsoft PowerPoint empowers you to create clean slideshow presentations + and intricate pitch decks and gives you a powerful presentation maker. + failures: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: + explanation: "Software license as described in the Terms of Service document.\n" + value: custom monitoring: '' - feedback: '' monthly_active_users: '' - user_distribution: '' - failures: '' - -- type: application name: Microsoft PowerPoint organization: Microsoft - description: Microsoft PowerPoint empowers you to create clean slideshow presentations - and intricate pitch decks and gives you a powerful presentation maker. - created_date: unknown - url: https://www.microsoft.com/en-us/microsoft-365/powerpoint - dependencies: [Microsoft 365 Copilot] - adaptation: '' output_space: '' + prohibited_uses: '' quality_control: '' - access: open - license: - value: custom - explanation: > - Software license as described in the Terms of Service document. terms_of_service: https://www.microsoft.com/legal/terms-of-use + type: application + url: https://www.microsoft.com/en-us/microsoft-365/powerpoint + user_distribution: '' +- access: open + adaptation: '' + created_date: unknown + dependencies: [Microsoft 365 Copilot, Microsoft Business Chat] + description: Microsoft Teams is a proprietary business communication platform + developed by Microsoft, as part of the Microsoft 365 family of products. + failures: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: + explanation: "Software license as described in the Terms of Service document.\n" + value: custom monitoring: '' - feedback: '' monthly_active_users: '' - user_distribution: '' - failures: '' - -- type: application name: Microsoft Teams organization: Microsoft - description: Microsoft Teams is a proprietary business communication platform - developed by Microsoft, as part of the Microsoft 365 family of products. - created_date: unknown - url: https://www.microsoft.com/en-us/microsoft-teams/group-chat-software - dependencies: [Microsoft 365 Copilot, Microsoft Business Chat] - adaptation: '' output_space: '' + prohibited_uses: '' quality_control: '' - access: open - license: - value: custom - explanation: > - Software license as described in the Terms of Service document. terms_of_service: https://www.microsoft.com/legal/terms-of-use + type: application + url: https://www.microsoft.com/en-us/microsoft-teams/group-chat-software + user_distribution: '' +- access: open + adaptation: '' + created_date: unknown + dependencies: [Microsoft 365 Copilot] + description: Microsoft Word is a word processing software developed by Microsoft + failures: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: + explanation: "Software license as described in the Terms of Service document.\n" + value: custom monitoring: '' - feedback: '' monthly_active_users: '' - user_distribution: '' - failures: '' - -- type: application name: Microsoft Word organization: Microsoft - description: Microsoft Word is a word processing software developed by Microsoft - created_date: unknown - url: https://www.microsoft.com/en-us/microsoft-365/word - dependencies: [Microsoft 365 Copilot] - adaptation: '' output_space: '' + prohibited_uses: '' quality_control: '' - access: open - license: - value: custom - explanation: > - Software license as described in the Terms of Service document. terms_of_service: https://www.microsoft.com/legal/terms-of-use - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' - monthly_active_users: '' + type: application + url: https://www.microsoft.com/en-us/microsoft-365/word user_distribution: '' - failures: '' - -- type: application - name: Microsoft Inside Look - # General - organization: Microsoft - description: > - Inside look is a Microsoft Office feature, composing document insights - highlighting key points, expected time to read, and popularity among others. +- access: limited + adaptation: unknown created_date: unknown - url: https://support.microsoft.com/en-us/office/see-file-insights-before-you-open-a-file-87a23bbc-a516-42e2-a7b6-0ecb8259e026 dependencies: [] - adaptation: unknown - output_space: Document level insights for users. - quality_control: unknown - # Downstream - access: limited + description: "Inside look is a Microsoft Office feature, composing document insights\ + \ highlighting key points, expected time to read, and popularity among others.\n" + failures: unknown + feedback: unknown + intended_uses: Providing document insights to users. license: + explanation: "Software license as described in the Terms of Service document.\n" value: custom - explanation: > - Software license as described in the Terms of Service document. - terms_of_service: https://www.microsoft.com/legal/terms-of-use - intended_uses: Providing document insights to users. - prohibited_uses: unknown monitoring: unknown - feedback: unknown - # Deployment monthly_active_users: unknown - user_distribution: unknown - failures: unknown - -- type: application - name: Microsoft Suggested Replies - # General + name: Microsoft Inside Look organization: Microsoft - description: > - Suggested replies is a Microsoft Outlook feature that suggests responses - to emails, available in: English, Spanish, Italian, French, German, - Portuguese Chinese Simplified, Chinese Traditional, Swedish, Russian, - Korean, Czech, Hungarian, Arabic, Hebrew, Thai, Turkish, Japanese, Dutch, - Norwegian, Danish, and Polish. + output_space: Document level insights for users. + prohibited_uses: unknown + quality_control: unknown + terms_of_service: https://www.microsoft.com/legal/terms-of-use + type: application + url: https://support.microsoft.com/en-us/office/see-file-insights-before-you-open-a-file-87a23bbc-a516-42e2-a7b6-0ecb8259e026 + user_distribution: unknown +- access: limited + adaptation: unknown created_date: unknown - url: https://support.microsoft.com/en-us/office/use-suggested-replies-in-outlook-19316194-0434-43ba-a742-6b5890157379 dependencies: [] - adaptation: unknown - output_space: Suggested emails. - quality_control: unknown - # Downstream - access: limited + description: "Suggested replies is a Microsoft Outlook feature that suggests responses\ + \ to emails, available in: English, Spanish, Italian, French, German, Portuguese\ + \ Chinese Simplified, Chinese Traditional, Swedish, Russian, Korean, Czech,\ + \ Hungarian, Arabic, Hebrew, Thai, Turkish, Japanese, Dutch, Norwegian, Danish,\ + \ and Polish.\n" + failures: unknown + feedback: unknown + intended_uses: Suggesting email replies. license: + explanation: "Software license as described in the Terms of Service document.\n" value: custom - explanation: > - Software license as described in the Terms of Service document. - terms_of_service: https://www.microsoft.com/legal/terms-of-use - intended_uses: Suggesting email replies. - prohibited_uses: unknown monitoring: unknown - feedback: unknown - # Deployment monthly_active_users: unknown - user_distribution: unknown - failures: unknown - -- type: application - name: Microsoft Security Copilot - # General + name: Microsoft Suggested Replies organization: Microsoft - description: > - Microsoft Security Copilot is an AI-powered security analysis tool that enables - analysts to respond to threats quickly, process signals at machine speed, and - assess risk exposure in minutes. - created_date: - value: 2023-03-28 - explanation: The date Security Copilot was announced in the [[Microsoft blog - post]](https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/). - url: https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/ - # Construction - dependencies: [GPT-4, Microsoft security-specific model] + output_space: Suggested emails. + prohibited_uses: unknown + quality_control: unknown + terms_of_service: https://www.microsoft.com/legal/terms-of-use + type: application + url: https://support.microsoft.com/en-us/office/use-suggested-replies-in-outlook-19316194-0434-43ba-a742-6b5890157379 + user_distribution: unknown +- access: limited adaptation: + explanation: See [[blog post]](https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/). value: Security Copilot combines OpenAI's GPT-4 generative AI with a security-specific model from Microsoft. This security-specific model in turn incorporates a growing set of security-specific skills and is informed by Microsoft's unique global threat intelligence and more than 65 trillion daily signals. - explanation: See [[blog post]](https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/). + created_date: + explanation: The date Security Copilot was announced in the [[Microsoft blog + post]](https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/). + value: 2023-03-28 + dependencies: [GPT-4, Microsoft security-specific model] + description: "Microsoft Security Copilot is an AI-powered security analysis tool\ + \ that enables analysts to respond to threats quickly, process signals at machine\ + \ speed, and assess risk exposure in minutes.\n" + failures: unknown + feedback: unknown + intended_uses: Security Copilot is designed to enhance the capabilities of cybersecurity + professionals. It leverages machine speed and scale to accelerate response to + security incidents, discover and process threat signals, and assess risk exposure + within minutes. + license: + explanation: "Software license as described in the Terms of Service document.\n" + value: custom + monitoring: '' + monthly_active_users: unknown + name: Microsoft Security Copilot + organization: Microsoft output_space: + explanation: See [[product demo]](https://www.microsoft.com/en-us/security/business/ai-machine-learning/microsoft-security-copilot). value: Actionable responses to security-related questions (text and image). Security event, incident or threat reports (PowerPoint slide). - explanation: See [[product demo]](https://www.microsoft.com/en-us/security/business/ai-machine-learning/microsoft-security-copilot). + prohibited_uses: unknown quality_control: + explanation: See [[blog post]](https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/). value: Security Copilot employs a closed-loop learning system that learns from user interactions and feedback, enabling it to provide more coherent, relevant, and useful answers that continually improve over time. Security Copilot is @@ -760,119 +707,106 @@ that customers' data and AI models are protected with enterprise compliance and security controls. Customer data is owned and controlled by them, and not used to train AI models for anyone outside their organization. - explanation: See [[blog post]](https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/). - # Downstream - access: limited - license: - value: custom - explanation: > - Software license as described in the Terms of Service document. terms_of_service: https://www.microsoft.com/legal/terms-of-use - intended_uses: Security Copilot is designed to enhance the capabilities of cybersecurity - professionals. It leverages machine speed and scale to accelerate response to - security incidents, discover and process threat signals, and assess risk exposure - within minutes. - prohibited_uses: unknown - monitoring: '' - feedback: unknown - # Deployment - monthly_active_users: unknown + type: application + url: https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/ user_distribution: unknown - failures: unknown - -- type: model - name: UniLM - organization: Microsoft - description: UniLM is a unified language model that can be fine-tuned for both - natural language understanding and generation tasks. - created_date: 2019-10-01 - url: https://proceedings.neurips.cc/paper_files/paper/2019/file/c20bb2d9a50d5ac1f713f8b34d9aac5a-Paper.pdf - model_card: '' - modality: text +- access: open analysis: Evaluated on GLUE, SQuAD 2.0, and CoQA benchmarks. - size: 340M parameters + created_date: 2019-10-01 dependencies: [] - training_emissions: unknown - training_time: 10,000 steps in 7 hours - training_hardware: 8 NVIDIA Tesla V100 32GB GPUs - quality_control: '' - access: open - license: MIT + description: UniLM is a unified language model that can be fine-tuned for both + natural language understanding and generation tasks. + feedback: '' intended_uses: '' - prohibited_uses: '' + license: MIT + modality: {} + model_card: '' monitoring: '' - feedback: '' - -- type: model - name: Docugami + name: UniLM organization: Microsoft - description: Docugami is a LLM focused on writing business documents and data - using generative AI. - created_date: 2021-04-12 - url: https://www.docugami.com/generative-ai - model_card: '' - modality: text + prohibited_uses: '' + quality_control: '' + size: '' + training_emissions: unknown + training_hardware: 8 NVIDIA Tesla V100 32GB GPUs + training_time: 10,000 steps in 7 hours + type: model + url: https://proceedings.neurips.cc/paper_files/paper/2019/file/c20bb2d9a50d5ac1f713f8b34d9aac5a-Paper.pdf +- access: limited analysis: '' - size: 20B parameters + created_date: 2021-04-12 dependencies: [] - training_emissions: unknown - training_time: unknown - training_hardware: '' - quality_control: '' - access: limited - license: '' + description: Docugami is a LLM focused on writing business documents and data + using generative AI. + feedback: '' intended_uses: analyzing, writing, and connecting business documents and data - prohibited_uses: '' + license: '' + modality: + explanation: text + value: text; text + model_card: '' monitoring: '' - feedback: '' - -- type: model - name: BEiT-3 + name: Docugami organization: Microsoft - description: BEiT-3 is a general-purpose multimodal foundation model for vision - and vision-language tasks. - created_date: 2022-08-31 - url: https://arxiv.org/pdf/2208.10442.pdf - model_card: '' - modality: text and image input and output + prohibited_uses: '' + quality_control: '' + size: 20B parameters (dense) + training_emissions: unknown + training_hardware: '' + training_time: unknown + type: model + url: https://www.docugami.com/generative-ai +- access: open analysis: Evaluated on a range of standardized vision benchmarks, and achieves state of the art performance on all experimentally. - size: 1.9B parameters + created_date: 2022-08-31 dependencies: [Multiway Transformer network] - training_emissions: unknown - training_time: '' - training_hardware: '' - quality_control: '' - access: open - license: '' + description: BEiT-3 is a general-purpose multimodal foundation model for vision + and vision-language tasks. + feedback: '' intended_uses: '' - prohibited_uses: '' + license: '' + modality: + explanation: text and image input and output + value: image, text; image, text + model_card: '' monitoring: '' - feedback: '' - -- type: model - name: WizardLM + name: BEiT-3 organization: Microsoft + prohibited_uses: '' + quality_control: '' + size: 1.9B parameters (dense) + training_emissions: unknown + training_hardware: '' + training_time: '' + type: model + url: https://arxiv.org/pdf/2208.10442.pdf +- access: open + analysis: Reports results on standard LLM benchmarks in comparison to other LLMs + and test sets. + created_date: 2023-04-24 + dependencies: [LLaMA, Evol-Instruct, Alpaca dataset] description: Starting with an initial set of instructions, we use our proposed Evol-Instruct to rewrite them step by step into more complex instructions. Then, we mix all generated instruction data to fine-tune LLaMA. We call the resulting model WizardLM. - created_date: 2023-04-24 - url: https://arxiv.org/pdf/2304.12244v1.pdf - model_card: https://huggingface.co/WizardLM/WizardLM-13B-1.0 - modality: natural language text - analysis: Reports results on standard LLM benchmarks in comparison to other LLMs - and test sets. - size: 7B parameters - dependencies: [LLaMA, Evol-Instruct, Alpaca dataset] - training_emissions: '' - training_time: 70 hours on 3 epochs - training_hardware: 8 V100 GPUs - quality_control: '' - access: open - license: Apache 2.0 + feedback: https://huggingface.co/datasets/WizardLM/evol_instruct_70k/discussions intended_uses: Creating large amounts of instruction data, particularly with high complexity - prohibited_uses: '' + license: Apache 2.0 + modality: + explanation: natural language text + value: text; text + model_card: https://huggingface.co/WizardLM/WizardLM-13B-1.0 monitoring: '' - feedback: https://huggingface.co/datasets/WizardLM/evol_instruct_70k/discussions + name: WizardLM + organization: Microsoft + prohibited_uses: '' + quality_control: '' + size: 7B parameters (dense) + training_emissions: '' + training_hardware: 8 V100 GPUs + training_time: 70 hours on 3 epochs + type: model + url: https://arxiv.org/pdf/2304.12244v1.pdf diff --git a/assets/mosaic.yaml b/assets/mosaic.yaml index a887f3b4..a58049f1 100644 --- a/assets/mosaic.yaml +++ b/assets/mosaic.yaml @@ -1,24 +1,25 @@ --- - -- type: model - name: MPT - organization: Mosaic +- access: open + analysis: Evaluated on a range of benchmarks and performed on par with LLaMA-7B. + created_date: 2023-05-05 + dependencies: [RedPajama-Data, C4, The Stack, Multimodal C4] description: MPT is a series of large language models seeking to address the limitations of other open source models like LLaMA and Pythia. - created_date: 2023-05-05 - url: https://www.mosaicml.com/blog/mpt-7b + feedback: '' + intended_uses: '' + license: Apache 2.0 + modality: + explanation: text + value: text; text model_card: '' - modality: text - analysis: Evaluated on a range of benchmarks and performed on par with LLaMA-7B. - size: 7B parameters - dependencies: [RedPajama-Data, C4, The Stack, Multimodal C4] + monitoring: '' + name: MPT + organization: Mosaic + prohibited_uses: '' + quality_control: '' + size: 7B parameters (dense) training_emissions: unknown - training_time: 9.5 days training_hardware: 440 A100 40GB GPUs - quality_control: '' - access: open - license: Apache 2.0 - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' + training_time: 9.5 days + type: model + url: https://www.mosaicml.com/blog/mpt-7b diff --git a/assets/nanyang.yaml b/assets/nanyang.yaml index 02f40c24..e5b04469 100644 --- a/assets/nanyang.yaml +++ b/assets/nanyang.yaml @@ -1,27 +1,28 @@ --- - -- type: model - name: Otter - organization: Nanyang Technological University - description: Otter is a multi-modal model based on OpenFlamingo (open-sourced - version of DeepMind’s Flamingo), trained on MIMIC-IT and showcasing improved - instruction-following ability and in-context learning. - created_date: 2023-05-05 - url: https://arxiv.org/pdf/2305.03726v1.pdf - model_card: https://github.com/Luodian/Otter/blob/main/docs/model_card.md - modality: outputs text from text and image input +- access: open analysis: Evaluated on researcher experiments to test deeper understanding and advanced commonsense reasoning - size: 1.3B parameters + created_date: 2023-05-05 dependencies: [MIMIC-IT, OpenFlamingo] - training_emissions: '' - training_time: '' - training_hardware: 4 RTX-3090 GPUs - quality_control: '' - access: open - license: MIT + description: Otter is a multi-modal model based on OpenFlamingo (open-sourced + version of DeepMind’s Flamingo), trained on MIMIC-IT and showcasing improved + instruction-following ability and in-context learning. + feedback: '' intended_uses: Following and executing new instructions with few in-context learning examples given image and textual input. - prohibited_uses: '' + license: MIT + modality: + explanation: outputs text from text and image input + value: text; image, text + model_card: https://github.com/Luodian/Otter/blob/main/docs/model_card.md monitoring: '' - feedback: '' + name: Otter + organization: Nanyang Technological University + prohibited_uses: '' + quality_control: '' + size: 1.3B parameters (dense) + training_emissions: '' + training_hardware: 4 RTX-3090 GPUs + training_time: '' + type: model + url: https://arxiv.org/pdf/2305.03726v1.pdf diff --git a/assets/naver.yaml b/assets/naver.yaml index a262bc3a..588ea0a4 100644 --- a/assets/naver.yaml +++ b/assets/naver.yaml @@ -1,31 +1,30 @@ --- - -# models -- type: model - name: HyperCLOVA - organization: Naver - description: HyperClova is an autoregressive language model +- access: + explanation: The model is not available for download or direct API use + value: closed + analysis: '' created_date: - value: 2021-05-21 explanation: The date the model paper was submitted to arxiv - url: https://arxiv.org/abs/2109.04650 - model_card: '' - modality: text (Korean | 한국어) - size: 82B parameters (dense model) - analysis: '' + value: 2021-05-21 dependencies: [] - training_emissions: '' - training_time: 130.4 days - training_hardware: 1024 A100 GPUs - quality_control: '' - access: - value: closed - explanation: The model is not available for download or direct API use + description: HyperClova is an autoregressive language model + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: text (Korean | 한국어) + value: text; text + model_card: '' monitoring: '' - feedback: '' + name: HyperCLOVA + organization: Naver + prohibited_uses: '' + quality_control: '' + size: 82B parameters (dense) + training_emissions: '' + training_hardware: 1024 A100 GPUs + training_time: 130.4 days + type: model + url: https://arxiv.org/abs/2109.04650 diff --git a/assets/neeva.yaml b/assets/neeva.yaml index 18e64ffa..c081acf3 100644 --- a/assets/neeva.yaml +++ b/assets/neeva.yaml @@ -1,85 +1,79 @@ --- - -- type: dataset - name: Neeva dataset - organization: Neeva - description: '' +- access: closed + analysis: '' created_date: unknown - url: https://neeva.com/index datasheet: '' - modality: text - size: unknown - sample: [] - analysis: '' dependencies: [] - included: '' + description: '' excluded: '' - quality_control: '' - access: closed + feedback: '' + included: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: {} monitoring: '' - feedback: '' - -- type: model - name: Neeva model + name: Neeva dataset organization: Neeva - description: '' - created_date: unknown + prohibited_uses: '' + quality_control: '' + sample: [] + size: unknown + type: dataset url: https://neeva.com/index - model_card: '' - modality: text +- access: closed analysis: '' - size: unknown + created_date: unknown dependencies: [Neeva dataset] - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: closed + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: {} + model_card: '' monitoring: '' - feedback: '' - -- type: application - name: NeevaAI + name: Neeva model organization: Neeva - description: NeevaAI is an AI-powered search tool that combines the capabilities - of LLMs with Neeva's independent in-house search stack to create a unique and - transformative search experience. + prohibited_uses: '' + quality_control: '' + size: '' + training_emissions: '' + training_hardware: '' + training_time: '' + type: model + url: https://neeva.com/index +- access: open + adaptation: '' created_date: - value: 2023-01-06 explanation: The [[Blog post]](https://neeva.com/blog/introducing-neevaai) announcing NeevaAI. - url: https://neeva.com/blog/introducing-neevaai + value: 2023-01-06 dependencies: [Neeva model] - adaptation: '' - output_space: '' - quality_control: '' - access: open + description: NeevaAI is an AI-powered search tool that combines the capabilities + of LLMs with Neeva's independent in-house search stack to create a unique and + transformative search experience. + failures: '' + feedback: '' + intended_uses: '' license: + explanation: "\"If you comply with these Terms, Neeva grants to you, during\ + \ the term of these Terms, a limited non-exclusive, non-transferable license,\ + \ with no right to sublicense, to download and install the App on your personal\ + \ computers, mobile handsets, tablets, wearable devices, and/or other devices\ + \ and to run the App solely for your own personal non-commercial purposes.\"\ + \ - excerpt from the Terms of Service document.\n" value: Custom - explanation: > - "If you comply with these Terms, Neeva grants to you, during the term of - these Terms, a limited non-exclusive, non-transferable license, with no - right to sublicense, to download and install the App on your personal - computers, mobile handsets, tablets, wearable devices, and/or other - devices and to run the App solely for your own personal non-commercial - purposes." - excerpt from the Terms of Service document. - terms_of_service: https://neeva.com/terms - intended_uses: '' - prohibited_uses: '' monitoring: '' - feedback: '' monthly_active_users: '' + name: NeevaAI + organization: Neeva + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: https://neeva.com/terms + type: application + url: https://neeva.com/blog/introducing-neevaai user_distribution: '' - failures: '' diff --git a/assets/nextdoor.yaml b/assets/nextdoor.yaml index 0dfafe79..1cb54b17 100644 --- a/assets/nextdoor.yaml +++ b/assets/nextdoor.yaml @@ -1,24 +1,23 @@ --- - -- type: application - name: Nextdoor Assistant - organization: Nextdoor - description: AI chatbot on Nextdoor that helps users write more clear and conscientious - posts. +- access: open + adaptation: '' created_date: 2023-05-02 - url: https://help.nextdoor.com/s/article/Introducing-Assistant dependencies: [ChatGPT] - adaptation: '' - output_space: natural language text guidance - quality_control: '' - access: open - license: unknown - terms_of_service: '' + description: AI chatbot on Nextdoor that helps users write more clear and conscientious + posts. + failures: '' + feedback: '' intended_uses: to be used to help make the Nextdoor experience more positive for users - prohibited_uses: '' + license: unknown monitoring: '' - feedback: '' monthly_active_users: '' + name: Nextdoor Assistant + organization: Nextdoor + output_space: natural language text guidance + prohibited_uses: '' + quality_control: '' + terms_of_service: '' + type: application + url: https://help.nextdoor.com/s/article/Introducing-Assistant user_distribution: '' - failures: '' diff --git a/assets/nolano.yaml b/assets/nolano.yaml index f2b8fe84..0f23e021 100644 --- a/assets/nolano.yaml +++ b/assets/nolano.yaml @@ -1,23 +1,22 @@ --- - -- type: application - name: Cformers - organization: Nolano - description: Cformers is a set of transformers that act as an API for AI inference - in code. +- access: limited + adaptation: '' created_date: 2023-03-19 - url: https://www.nolano.org/services/Cformers/ dependencies: [] - adaptation: '' - output_space: '' - quality_control: '' - access: limited - license: MIT - terms_of_service: '' + description: Cformers is a set of transformers that act as an API for AI inference + in code. + failures: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: MIT monitoring: '' - feedback: '' monthly_active_users: '' + name: Cformers + organization: Nolano + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: '' + type: application + url: https://www.nolano.org/services/Cformers/ user_distribution: '' - failures: '' diff --git a/assets/notion.yaml b/assets/notion.yaml index ba21e83f..d0f580c5 100644 --- a/assets/notion.yaml +++ b/assets/notion.yaml @@ -1,24 +1,23 @@ --- - -- type: application - name: Notion AI - organization: Notion +- access: limited + adaptation: '' + created_date: 2023-02-22 + dependencies: [Anthropic API] description: Notion AI is a connected assistant that helps you think bigger, work faster, and augments your creativity, right inside the functional workspace you’re already familiar with. - created_date: 2023-02-22 - url: https://www.notion.so/help/guides/notion-ai-for-docs - dependencies: [Anthropic API] - adaptation: '' - output_space: '' - quality_control: '' - access: limited - license: '' - terms_of_service: '' + failures: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: '' monitoring: '' - feedback: '' monthly_active_users: '' + name: Notion AI + organization: Notion + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: '' + type: application + url: https://www.notion.so/help/guides/notion-ai-for-docs user_distribution: '' - failures: '' diff --git a/assets/nvidia.yaml b/assets/nvidia.yaml index 5f9e0978..39e0f118 100644 --- a/assets/nvidia.yaml +++ b/assets/nvidia.yaml @@ -1,99 +1,100 @@ --- - -# models -- type: model - name: Megatron-LM - organization: NVIDIA - description: Megatron-LM is an autoregressive language model +- access: + explanation: "Neither the 8.3B parameter model trained to convergence nor the\ + \ 1 trillion paramter model is available for download\n" + value: closed + analysis: '' created_date: - value: 2021-04-09 explanation: The date the paper for the 1 trillion parameter model was published - url: https://arxiv.org/abs/2104.04473 - model_card: none - modality: text (English) - analysis: '' - size: 1000B parameters (dense model) + value: 2021-04-09 dependencies: [] - training_emissions: unknown - training_time: 84 days - training_hardware: 3072 A100 GPUs - quality_control: unknown - access: - value: closed - explanation: > - Neither the 8.3B parameter model trained to convergence nor the 1 trillion - paramter model is available for download + description: Megatron-LM is an autoregressive language model + feedback: none + intended_uses: none license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: none - prohibited_uses: none + modality: + explanation: text (English) + value: text; text + model_card: none monitoring: none - feedback: none - -- type: dataset - name: MineDojo + name: Megatron-LM organization: NVIDIA - description: '' + prohibited_uses: none + quality_control: unknown + size: 1000B parameters (dense) + training_emissions: unknown + training_hardware: 3072 A100 GPUs + training_time: 84 days + type: model + url: https://arxiv.org/abs/2104.04473 +- access: open + analysis: '' created_date: 2022-06-17 - url: https://arxiv.org/abs/2206.08853 datasheet: '' - modality: Videos, Text - size: 730k videos, 6k Wikipedia pages, 340k reddit posts - sample: [] - analysis: '' dependencies: [YouTube, Wikipedia, Reddit] - included: '' + description: '' excluded: '' - quality_control: '' - access: open - license: MIT + feedback: '' + included: '' intended_uses: '' - prohibited_uses: '' + license: MIT + modality: + explanation: Videos, Text + value: text, video monitoring: '' - feedback: '' - -- type: dataset - name: VIMA dataset - organization: NVIDIA, Stanford - description: '' - created_date: 2022-10-06 - url: https://vimalabs.github.io/ - datasheet: '' - modality: text and image - size: 200M parameters (dense model) + name: MineDojo + organization: NVIDIA + prohibited_uses: '' + quality_control: '' sample: [] + size: 730k videos, 6k Wikipedia pages, 340k reddit posts + type: dataset + url: https://arxiv.org/abs/2206.08853 +- access: open analysis: '' + created_date: 2022-10-06 + datasheet: '' dependencies: [T5, Mask R-CNN, VIMA dataset] - included: '' + description: '' excluded: '' - quality_control: '' - access: open - license: MIT + feedback: '' + included: '' intended_uses: '' - prohibited_uses: '' + license: MIT + modality: + explanation: text and image + value: image, text monitoring: '' - feedback: '' - -- type: model - name: VIMA + name: VIMA dataset organization: NVIDIA, Stanford - description: '' - created_date: 2022-10-06 + prohibited_uses: '' + quality_control: '' + sample: [] + size: 200M parameters (dense model) + type: dataset url: https://vimalabs.github.io/ - model_card: '' - modality: Simulation +- access: open analysis: '' - size: 650K successful trajectories + created_date: 2022-10-06 dependencies: [] - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: open - license: MIT + description: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: MIT + modality: + explanation: Simulation + value: other; other + model_card: '' monitoring: '' - feedback: '' + name: VIMA + organization: NVIDIA, Stanford + prohibited_uses: '' + quality_control: '' + size: 650K parameters (dense) + training_emissions: '' + training_hardware: '' + training_time: '' + type: model + url: https://vimalabs.github.io/ diff --git a/assets/openai.yaml b/assets/openai.yaml index 74e6779e..ec2714f9 100644 --- a/assets/openai.yaml +++ b/assets/openai.yaml @@ -1,118 +1,104 @@ --- - -# Datasets -- type: dataset - name: GPT-3 dataset - # General - organization: OpenAI - description: > - The GPT-3 dataset is the text corpus that was used to train the GPT-3 - model. Information on the GPT-3 dataset is limited to discussion in the - paper introducing GPT-3 - [[Section 2.2]](https://arxiv.org/pdf/2005.14165.pdf#subsection.2.2). +- access: + explanation: "The GPT-3 dataset isn't released to the public, but it may be\ + \ available to Microsoft through the GPT-3 licencing agreement between OpenAI\ + \ and Microsoft [[OpenAI Blog Post]] (https://openai.com/blog/openai-licenses-gpt-3-technology-to-microsoft/).\n" + value: closed + analysis: "The GPT-3 paper, which also introduces the GPT-3 dataset, provides\ + \ a limited analysis on the GPT-3 dataset, reporting the dirtiness of the dataset\ + \ after the it was filtered for text occurring in common benchmarking tasks.\ + \ The authors report that \"as the dataset becomes more contaminated, the variance\ + \ of the clean over all fraction increases, but there is no apparent bias towards\ + \ improved or degraded performance\" [[Appendix C]](https://arxiv.org/pdf/2005.14165.pdf#appendix.C).\n" created_date: + explanation: "The date for the public announcement of GPT-3. The GPT-3 dataset\ + \ didn't have a specific release date separate from the model [[Open AI Blog\ + \ Post]](https://openai.com/blog/openai-api/).\n" value: 2020-06-11 - explanation: > - The date for the public announcement of GPT-3. The GPT-3 - dataset didn't have a specific release date separate from the model - [[Open AI Blog Post]](https://openai.com/blog/openai-api/). - url: https://arxiv.org/pdf/2005.14165.pdf datasheet: - value: none explanation: No datasheet available as of 2022-04-04. - modality: text (English) - size: 570 GB - sample: [] - analysis: > - The GPT-3 paper, which also introduces the GPT-3 dataset, provides a limited - analysis on the GPT-3 dataset, reporting the dirtiness of the dataset after - the it was filtered for text occurring in common benchmarking tasks. - The authors report that "as the dataset becomes more contaminated, the - variance of the clean over all fraction increases, but there is no apparent - bias - towards improved or degraded performance" - [[Appendix C]](https://arxiv.org/pdf/2005.14165.pdf#appendix.C). - # Construction + value: none dependencies: [WebText] - license: + description: "The GPT-3 dataset is the text corpus that was used to train the\ + \ GPT-3 model. Information on the GPT-3 dataset is limited to discussion in\ + \ the paper introducing GPT-3 [[Section 2.2]](https://arxiv.org/pdf/2005.14165.pdf#subsection.2.2).\n" + excluded: "The Common Crawl dataset was processed using a classifier that kept\ + \ high quality documents and filtered low quality documents. WebText was used\ + \ as a proxy for high quality documents [[Appendix A]](https://arxiv.org/pdf/2005.14165.pdf#appendix.A).\n" + feedback: + explanation: "There are no known (internal or external) feedback mechanisms\ + \ for the GPT-3 dataset as of 2022-04-04.\n" value: unknown - explanation: > - There is no known license specific to the GPT-3 dataset, however, - the governing organization, OpenAI, licensed GPT-3 to Microsoft, which - makes it likely that the GPT-3 dataset was also licensed - [[OpenAI Blog Post]] - (https://openai.com/blog/openai-licenses-gpt-3-technology-to-microsoft/). - included: > - The dataset is composed several NLP corpora: Common Crawl (filtered, 60%), - WebText2 (22%), Books1 (8%), Books2 (8%), Wikipedia (3%) - [[Section 2.2]](https://arxiv.org/pdf/2005.14165.pdf#subsection.2.2). - excluded: > - The Common Crawl dataset was processed using a classifier that kept high - quality documents and filtered low quality documents. WebText was used as a - proxy for high quality documents - [[Appendix A]](https://arxiv.org/pdf/2005.14165.pdf#appendix.A). - quality_control: > - In addition to excluding low quality documents from the Common Crawl - dataset, the authors fuzzily deduplicated documents within each dataset, by - removing documents that have high overlap with each other. The same - procedure was followed to fuzzily deduplicate WebText from Common Crawl - [[Appendix A]](https://arxiv.org/pdf/2005.14165.pdf#appendix.A). - Text occuring in benchmark datasets were also partially removed - [[Appendix C]](https://arxiv.org/pdf/2005.14165.pdf#appendix.C). - # Downstream - access: - value: closed - explanation: > - The GPT-3 dataset isn't released to the public, but it may be available - to Microsoft through the GPT-3 licencing agreement between OpenAI and - Microsoft [[OpenAI Blog Post]] - (https://openai.com/blog/openai-licenses-gpt-3-technology-to-microsoft/). - intended_uses: > - The intended use of the GPT-3 dataset is to train language models. - prohibited_uses: + included: "The dataset is composed several NLP corpora: Common Crawl (filtered,\ + \ 60%), WebText2 (22%), Books1 (8%), Books2 (8%), Wikipedia (3%) [[Section 2.2]](https://arxiv.org/pdf/2005.14165.pdf#subsection.2.2).\n" + intended_uses: "The intended use of the GPT-3 dataset is to train language models.\n" + license: + explanation: "There is no known license specific to the GPT-3 dataset, however,\ + \ the governing organization, OpenAI, licensed GPT-3 to Microsoft, which makes\ + \ it likely that the GPT-3 dataset was also licensed [[OpenAI Blog Post]]\ + \ (https://openai.com/blog/openai-licenses-gpt-3-technology-to-microsoft/).\n" value: unknown - explanation: > - OpenAI didn't provide a list of prohibited uses specifically for - the GPT-3 dataset. However, public OpenAI products are governed by the - OpenAI Terms of Use, which may also apply to the OpenAI dataset. - The OpenAI Terms of Use prohibit the following: - (i) Illegal activities, such as child pornography, gambling, cybercrime, - piracy, violating copyright, trademark or other intellectual property laws; - (ii) Accessing or authorizing anyone to access the APIs from an embargoed - country, region, or territory as prohibited by the U.S. government; - (iii) Threatening, stalking, defaming, defrauding, degrading, victimizing - or intimidating anyone for any reason - [[Open AI Terms of Use]](https://openai.com/api/policies/terms/). + modality: + explanation: text (English) + value: text monitoring: + explanation: "There are no known (internal or external) monitoring mechanisms\ + \ that are in place for the use of the GPT-3 dataset as of 2022-04-04.\n" value: unknown - explanation: > - There are no known (internal or external) monitoring mechanisms - that are in place for the use of the GPT-3 dataset as of 2022-04-04. - feedback: - value: unknown - explanation: > - There are no known (internal or external) feedback mechanisms for - the GPT-3 dataset as of 2022-04-04. - -- type: dataset - name: HumanEval - # General + name: GPT-3 dataset organization: OpenAI - description: > - HumanEval is a dataset of 164 programming problems hand-written to evaluate - their Codex model. + prohibited_uses: + explanation: "OpenAI didn't provide a list of prohibited uses specifically for\ + \ the GPT-3 dataset. However, public OpenAI products are governed by the OpenAI\ + \ Terms of Use, which may also apply to the OpenAI dataset. The OpenAI Terms\ + \ of Use prohibit the following: (i) Illegal activities, such as child pornography,\ + \ gambling, cybercrime, piracy, violating copyright, trademark or other intellectual\ + \ property laws; (ii) Accessing or authorizing anyone to access the APIs from\ + \ an embargoed country, region, or territory as prohibited by the U.S. government;\ + \ (iii) Threatening, stalking, defaming, defrauding, degrading, victimizing\ + \ or intimidating anyone for any reason [[Open AI Terms of Use]](https://openai.com/api/policies/terms/).\n" + value: unknown + quality_control: "In addition to excluding low quality documents from the Common\ + \ Crawl dataset, the authors fuzzily deduplicated documents within each dataset,\ + \ by removing documents that have high overlap with each other. The same procedure\ + \ was followed to fuzzily deduplicate WebText from Common Crawl [[Appendix A]](https://arxiv.org/pdf/2005.14165.pdf#appendix.A).\ + \ Text occuring in benchmark datasets were also partially removed [[Appendix\ + \ C]](https://arxiv.org/pdf/2005.14165.pdf#appendix.C).\n" + sample: [] + size: 570 GB + type: dataset + url: https://arxiv.org/pdf/2005.14165.pdf +- access: + explanation: "HumanEval dataset is publicly available and comes with a an evaluation\ + \ framework [[HumanEval GitHub Repository]](https://www.github.com/openai/human-eval).\n" + value: open + analysis: none created_date: + explanation: "The date that Codex, the model evaluated on the HumanEval dataset,\ + \ was announced to the public [[OpenAI Blog Post]](https://openai.com/blog/openai-codex/).\n" value: 2021-08-10 - explanation: > - The date that Codex, the model evaluated on the HumanEval - dataset, was announced to the public - [[OpenAI Blog Post]](https://openai.com/blog/openai-codex/). - url: https://arxiv.org/pdf/2107.03374.pdf datasheet: - value: none explanation: No datasheet available as of 2022-04-10. - modality: code (Python) - size: 214 KB + value: none + dependencies: [] + description: "HumanEval is a dataset of 164 programming problems hand-written\ + \ to evaluate their Codex model.\n" + excluded: "Code problems easily found on the internet.\n" + feedback: "Email the authors [[Codex Paper]](https://arxiv.org/pdf/2107.03374.pdf).\n" + included: "164 hand-written questions.\n" + intended_uses: "Evaluating code generation capabilities of models.\n" + license: + explanation: https://github.com/openai/human-eval/blob/master/LICENSE + value: MIT + modality: + explanation: code (Python) + value: code + monitoring: none + name: HumanEval + organization: OpenAI + prohibited_uses: none + quality_control: "The evaluation dataset was handwritten to ensure that the evaluation\ + \ problems do not exist in the Codex dataset [[Section 2.2]](https://arxiv.org/pdf/2107.03374.pdf#subsection.2.2).\n" sample: - "\n\ndef string_sequence(n: int) -> str:\n \"\"\" Return a string containing\ \ space-delimited numbers starting from 0 upto n inclusive.\n >>> string_sequence(0)\n\ @@ -139,936 +125,680 @@ \ 'seven', 'eight' and 'nine'.\n Return the string with numbers sorted\ \ from smallest to largest\n >>> sort_numbers('three one five')\n 'one\ \ three five'\n \"\"\"\n" - analysis: none - # Construction - dependencies: [] - license: - value: MIT - explanation: https://github.com/openai/human-eval/blob/master/LICENSE - included: > - 164 hand-written questions. - excluded: > - Code problems easily found on the internet. - quality_control: > - The evaluation dataset was handwritten to ensure that the evaluation - problems do not exist in the Codex dataset - [[Section 2.2]](https://arxiv.org/pdf/2107.03374.pdf#subsection.2.2). - # Downstream - access: - value: open - explanation: > - HumanEval dataset is publicly available and comes - with a an evaluation framework - [[HumanEval GitHub Repository]](https://www.github.com/openai/human-eval). - intended_uses: > - Evaluating code generation capabilities of models. - prohibited_uses: none - monitoring: none - feedback: > - Email the authors [[Codex Paper]](https://arxiv.org/pdf/2107.03374.pdf). - -- type: dataset - name: Codex dataset - # General - organization: OpenAI - description: > - The dataset used to train the Codex model. - created_date: - value: 2021-08-10 - explanation: > - The date that Codex, the model trained on the Codex dataset, - was announced to the public - [[OpenAI Blog Post]](https://openai.com/blog/openai-codex/). + size: 214 KB + type: dataset url: https://arxiv.org/pdf/2107.03374.pdf - datasheet: none - modality: code - size: - value: 159 GB - explanation: As reported by the authors [[Section 3.1]](https://arxiv.org/pdf/2107.03374.pdf#subsection.3.1). - sample: [] +- access: + explanation: "The dataset might have been made available to Microsoft as part\ + \ of OpenAI giving Microsoft access to its Codex model [GitHub Copilot](https://copilot.github.com/).\n" + value: closed analysis: - value: none explanation: The paper doesn't provide an analysis on the training dataset. - # Construction + value: none + created_date: + explanation: "The date that Codex, the model trained on the Codex dataset, was\ + \ announced to the public [[OpenAI Blog Post]](https://openai.com/blog/openai-codex/).\n" + value: 2021-08-10 + datasheet: none dependencies: [] + description: "The dataset used to train the Codex model.\n" + excluded: "Following were filtered from the dataset: autogenerated files; files\ + \ with average line length > 100, maximum line length > 1000, or few alphanumeric\ + \ characters [[Section 3.1]](https://arxiv.org/pdf/2107.03374.pdf#subsection.3.1).\n" + feedback: "Email the authors [[Codex Paper]](https://arxiv.org/pdf/2107.03374.pdf).\n" + included: "The dataset includes 54 million public software repositories hosted\ + \ on GitHub as of an unspecified date in May 2020 [[Section 3.1]](https://arxiv.org/pdf/2107.03374.pdf#subsection.3.1).\n" + intended_uses: Training language models on code. license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - included: > - The dataset includes 54 million public software repositories hosted on - GitHub as of an unspecified date in May 2020 - [[Section 3.1]](https://arxiv.org/pdf/2107.03374.pdf#subsection.3.1). - excluded: > - Following were filtered from the dataset: autogenerated files; files with - average line length > 100, maximum line length > 1000, or few alphanumeric - characters - [[Section 3.1]](https://arxiv.org/pdf/2107.03374.pdf#subsection.3.1). - quality_control: > - Dataset was filtered using simple heuristics, as outlined in the excluded - field. - # Downstream - access: - value: closed - explanation: > - The dataset might have been made available to Microsoft - as part of OpenAI giving Microsoft access to its Codex model - [GitHub Copilot](https://copilot.github.com/). - intended_uses: Training language models on code. - prohibited_uses: unknown + modality: code monitoring: unknown - feedback: > - Email the authors - [[Codex Paper]](https://arxiv.org/pdf/2107.03374.pdf). - -- type: dataset - name: CLIP dataset - # General + name: Codex dataset organization: OpenAI - description: > - CLIP dataset contains text-image pairs crawled from the internet. + prohibited_uses: unknown + quality_control: "Dataset was filtered using simple heuristics, as outlined in\ + \ the excluded field.\n" + sample: [] + size: + explanation: As reported by the authors [[Section 3.1]](https://arxiv.org/pdf/2107.03374.pdf#subsection.3.1). + value: 159 GB + type: dataset + url: https://arxiv.org/pdf/2107.03374.pdf +- access: + explanation: The dataset wasn't released to the public. + value: closed + analysis: "The dataset contained some overlap with the test sets of the benchmarks\ + \ used for evaluation, but the authors determined the impact to be small: \"\ + There is a median overlap of 2.2% and an average overlap of 3.2%. Due to this\ + \ small amount of overlap, overall accuracy is rarely shifted by more than 0.1%\ + \ with only 7 datasets above this threshold\" [[Section 5]](https://arxiv.org/pdf/2103.00020.pdf#section.5).\n" created_date: + explanation: "The date of the blog post announcing CLIP [[OpenAI Blog Post]](https://openai.com/blog/clip/).\n" value: 2021-01-05 - explanation: > - The date of the blog post announcing CLIP - [[OpenAI Blog Post]](https://openai.com/blog/clip/). - url: https://arxiv.org/pdf/2103.00020.pdf datasheet: none - modality: text (English) and image - size: 400M (image, text) pairs - sample: [] - analysis: > - The dataset contained some overlap with the test sets of the benchmarks used - for evaluation, but the authors determined the impact to be small: "There - is a median overlap of 2.2% and an average overlap of 3.2%. Due to this - small amount of overlap, overall accuracy is rarely shifted by more than - 0.1% with only 7 datasets above this threshold" - [[Section 5]](https://arxiv.org/pdf/2103.00020.pdf#section.5). - # Construction dependencies: [] - license: - value: unknown - explanation: > - The asset doesn't have a known license as it hasn't been publicly - released. - included: > - Data crawled from the internet, without any filtering (including - de-duplication) or curation. + description: "CLIP dataset contains text-image pairs crawled from the internet.\n" excluded: none - quality_control: > - The data was "only crawled websites that had policies against excessively - violent and adult images and allowed us to filter out such content" - [[Model Card]](https://github.com/openai/CLIP/blob/main/model-card.md). - # Downstream - access: - value: closed - explanation: The dataset wasn't released to the public. + feedback: + explanation: The feedback mechanisms in place are unknown. + value: unknown + included: "Data crawled from the internet, without any filtering (including de-duplication)\ + \ or curation.\n" intended_uses: Training multimodal vision models. - prohibited_uses: + license: + explanation: "The asset doesn't have a known license as it hasn't been publicly\ + \ released.\n" value: unknown - explanation: The prohibited uses of the dataset are unknown. + modality: + explanation: text (English) and image + value: image, text monitoring: - value: unknown explanation: The monitoring mechanisms in place are unknown. - feedback: value: unknown - explanation: The feedback mechanisms in place are unknown. - -- type: dataset - name: DALL·E dataset - # General + name: CLIP dataset organization: OpenAI - description: > - DALL·E dataset is the training set consisting of image and text pairs - collected to train the DALL·E model. - created_date: - value: 2021-01-05 - explanation: > - The date of the blog post announcing DALL·E - [[OpenAI Blog Post]](https://openai.com/blog/dall-e/). - url: https://arxiv.org/abs/2102.12092 - datasheet: none - modality: text (English) and image - size: > - 250M (image, text) pairs + prohibited_uses: + explanation: The prohibited uses of the dataset are unknown. + value: unknown + quality_control: "The data was \"only crawled websites that had policies against\ + \ excessively violent and adult images and allowed us to filter out such content\"\ + \ [[Model Card]](https://github.com/openai/CLIP/blob/main/model-card.md).\n" sample: [] + size: 400M (image, text) pairs + type: dataset + url: https://arxiv.org/pdf/2103.00020.pdf +- access: + explanation: The dataset wasn't released to the public. + value: closed analysis: The authors found that the dataset contained 21% of the images in the MS-COCO validation set, but observed no significant changes in the performance of the accompanying DALL·E when tested on MS-COCO evaluation set with and without the said images [[Section 3.1]](https://arxiv.org/pdf/2102.12092.pdf#subsection.3.1). - # Construction + created_date: + explanation: "The date of the blog post announcing DALL·E [[OpenAI Blog Post]](https://openai.com/blog/dall-e/).\n" + value: 2021-01-05 + datasheet: none dependencies: [] - license: + description: "DALL·E dataset is the training set consisting of image and text\ + \ pairs collected to train the DALL·E model.\n" + excluded: "MS-COCO was excluded from the dataset, but because MS-COCO was created\ + \ from YFCC100M, some of the test images (not the captions) were included.\n" + feedback: + explanation: The feedback mechanisms in place are unknown. value: unknown - explanation: > - The asset doesn't have a known license as it hasn't been publicly - released. - included: > - Data from the internet, including Conceptual Captions and a filtered subset - of YFCC100M. - excluded: > - MS-COCO was excluded from the dataset, but because MS-COCO was created from - YFCC100M, some of the test images (not the captions) were included. - quality_control: > - The data was de-duplicated - [[Section 3.2]](https://arxiv.org/pdf/2102.12092.pdf#subsection.3.2). - The data collected from the internet was filtered using image, text and - joint image and text filters, which included: "discarding instances whose - captions are too short, are classified as non-English by the Python package - cld3, or that consist primarily of boilerplate phrases such as “photographed - on ”, where matches various formats for dates that we found in - the data". The authors also discard "instances whose images have aspect - ratios not in [1/2, 2]" - [[Appendix C]](https://arxiv.org/pdf/2102.12092.pdf#appendix.C). - # Downstream - access: - value: closed - explanation: The dataset wasn't released to the public. + included: "Data from the internet, including Conceptual Captions and a filtered\ + \ subset of YFCC100M.\n" intended_uses: Training multimodal vision models. - prohibited_uses: + license: + explanation: "The asset doesn't have a known license as it hasn't been publicly\ + \ released.\n" value: unknown - explanation: The prohibited uses of the dataset are unknown. + modality: + explanation: text (English) and image + value: image, text monitoring: - value: unknown explanation: The monitoring mechanisms in place are unknown. - feedback: value: unknown - explanation: The feedback mechanisms in place are unknown. - -- type: dataset - name: Whisper dataset - # General + name: DALL·E dataset organization: OpenAI - description: > - The Whisper dataset is the speech corpus that was used to train the Whisper - model. Information on the dataset is limited to discussion in the - paper introducing Whisper. - [[Section 2.1]](https://cdn.openai.com/papers/whisper.pdf). + prohibited_uses: + explanation: The prohibited uses of the dataset are unknown. + value: unknown + quality_control: "The data was de-duplicated [[Section 3.2]](https://arxiv.org/pdf/2102.12092.pdf#subsection.3.2).\ + \ The data collected from the internet was filtered using image, text and joint\ + \ image and text filters, which included: \"discarding instances whose captions\ + \ are too short, are classified as non-English by the Python package cld3, or\ + \ that consist primarily of boilerplate phrases such as “photographed on ”,\ + \ where matches various formats for dates that we found in the data\"\ + . The authors also discard \"instances whose images have aspect ratios not in\ + \ [1/2, 2]\" [[Appendix C]](https://arxiv.org/pdf/2102.12092.pdf#appendix.C).\n" + sample: [] + size: "250M (image, text) pairs\n" + type: dataset + url: https://arxiv.org/abs/2102.12092 +- access: + explanation: "The dataset isn't released to the public.\n" + value: closed + analysis: "The Whisper paper provides limited details on preprocessing.\n" created_date: + explanation: "The date for the public announcement of Whisper. The dataset didn't\ + \ have a specific release date separate from the model [[Open AI Blog Post]](https://openai.com/blog/whisper/).\n" value: 2022-09-21 - explanation: > - The date for the public announcement of Whisper. The - dataset didn't have a specific release date separate from the model - [[Open AI Blog Post]](https://openai.com/blog/whisper/). - url: https://cdn.openai.com/papers/whisper.pdf datasheet: - value: none explanation: No datasheet available as of 2022-12-07. - modality: speech - size: 680k hours - sample: [] - analysis: > - The Whisper paper provides limited details on preprocessing. - # Construction + value: none dependencies: [] - license: + description: "The Whisper dataset is the speech corpus that was used to train\ + \ the Whisper model. Information on the dataset is limited to discussion in\ + \ the paper introducing Whisper. [[Section 2.1]](https://cdn.openai.com/papers/whisper.pdf).\n" + excluded: "Automated filtering was conducted.\n" + feedback: + explanation: "There are no known (internal or external) feedback mechanisms\ + \ for the dataset as of 2022-12-07.\n" value: unknown - explanation: > - The asset doesn't have a known license as it hasn't been publicly - released. - included: > - The dataset is composed three major sources: multilingual speech recognition - (17%), translation (18%), and English speech recognition (65%). - [[Figure 11]](https://cdn.openai.com/papers/whisper.pdf). - excluded: > - Automated filtering was conducted. - quality_control: > - In addition to filtering, basic text standardization was done. - # Downstream - access: - value: closed - explanation: > - The dataset isn't released to the public. + included: "The dataset is composed three major sources: multilingual speech recognition\ + \ (17%), translation (18%), and English speech recognition (65%). [[Figure 11]](https://cdn.openai.com/papers/whisper.pdf).\n" intended_uses: The intended use is to train speech models. - prohibited_uses: - value: none - explanation: > - No uses are explicitly prohibited. + license: + explanation: "The asset doesn't have a known license as it hasn't been publicly\ + \ released.\n" + value: unknown + modality: + explanation: speech + value: audio monitoring: + explanation: "There are no known (internal or external) monitoring mechanisms\ + \ that are in place for the use of the dataset as of 2022-12-07.\n" value: none - explanation: > - There are no known (internal or external) monitoring mechanisms - that are in place for the use of the dataset as of 2022-12-07. - feedback: - value: unknown - explanation: > - There are no known (internal or external) feedback mechanisms for - the dataset as of 2022-12-07. - -- type: dataset - name: WebText - # General + name: Whisper dataset organization: OpenAI - description: '' + prohibited_uses: + explanation: "No uses are explicitly prohibited.\n" + value: none + quality_control: "In addition to filtering, basic text standardization was done.\n" + sample: [] + size: 680k hours + type: dataset + url: https://cdn.openai.com/papers/whisper.pdf +- access: closed + analysis: '' created_date: - value: 2019-11-01 explanation: Due to the lack of information about the exact date, it is assumed to be the 1st of the known month of creation. - url: https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf + value: 2019-11-01 datasheet: '' - modality: text (English) - size: 40 GB - sample: [] - analysis: '' - # Construction dependencies: [] - license: - value: unknown - explanation: > - The asset doesn't have a known license as it hasn't been publicly - released. - included: '' + description: '' excluded: '' - quality_control: '' - # Downstream - access: closed + feedback: '' + included: '' intended_uses: '' - prohibited_uses: '' + license: + explanation: "The asset doesn't have a known license as it hasn't been publicly\ + \ released.\n" + value: unknown + modality: {} monitoring: '' - feedback: '' - -# Models -- type: model - name: GPT-2 - # General + name: WebText organization: OpenAI - description: '' + prohibited_uses: '' + quality_control: '' + sample: [] + size: 40 GB + type: dataset + url: https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf +- access: open + analysis: '' created_date: - value: 2019-11-01 explanation: Due to the lack of information about the exact date, it is assumed to be the 1st of the known month of creation. - url: https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf - model_card: https://github.com/openai/gpt-2/blob/master/model_card.md - modality: text (English) - size: 1.5B parameters (dense model) - analysis: '' - # Construction + value: 2019-11-01 dependencies: [WebText] - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - # Downstream - access: open + description: '' + feedback: '' + intended_uses: '' license: + explanation: "GPT-2 uses a [[modified MIT License]](https://github.com/openai/gpt-2/blob/master/LICENSE).\n" value: Modified MIT License - explanation: > - GPT-2 uses a - [[modified MIT License]](https://github.com/openai/gpt-2/blob/master/LICENSE). - intended_uses: '' - prohibited_uses: '' + modality: {} + model_card: https://github.com/openai/gpt-2/blob/master/model_card.md monitoring: '' - feedback: '' - -- type: model - name: GPT-3 - # General + name: GPT-2 organization: OpenAI - description: > - GPT-3 is an autoregressive language model. + prohibited_uses: '' + quality_control: '' + size: '' + training_emissions: '' + training_hardware: '' + training_time: '' + type: model + url: https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf +- access: + explanation: "The GPT-3 model isn't fully released to the public, but it was\ + \ made available to Microsoft through the licencing agreement between OpenAI\ + \ and Microsoft [[OpenAI Blog Post]] (https://openai.com/blog/openai-licenses-gpt-3-technology-to-microsoft/).\ + \ The public can access the model through the Open AI API, which is available\ + \ in supported countries [[Supported Countries]](https://beta.openai.com/docs/supported-countries)\ + \ [[OpenAI API]](https://openai.com/api/).\n" + value: limited + analysis: "The GPT-3 model was evaluated on language modeling, closed-book question\ + \ answering, translation, Winograd-style tasks, commonsense reasoning, reading\ + \ comprehension, SuperGLUE, NLI, synthetic tasks, and generation [[Section 4]](https://arxiv.org/pdf/2005.14165.pdf#section.4);\ + \ as well as on fairness and biases [[Section 6]](https://arxiv.org/pdf/2005.14165.pdf#section.6).\n" created_date: + explanation: "The date that GPT-3 was announced to the public [[OpenAI Blog\ + \ Post]](https://openai.com/blog/openai-api/).\n" value: 2020-06-11 - explanation: > - The date that GPT-3 was announced to the public - [[OpenAI Blog Post]](https://openai.com/blog/openai-api/). - url: https://arxiv.org/pdf/2005.14165.pdf - model_card: https://github.com/openai/gpt-3/blob/master/model-card.md - modality: text (English) - size: - value: 175B parameters (dense model) - explanation: > - GPT-3 comes in several sizes. Here we report the size of the Davinci - model, the largest GPT-3 model served through the OpenAI API. Sizes of the - other models can be found in the paper - [[Table 2.1]](https://arxiv.org/pdf/2005.14165.pdf#table.caption.7). - analysis: > - The GPT-3 model was evaluated on language modeling, closed-book question - answering, translation, Winograd-style tasks, commonsense reasoning, - reading comprehension, SuperGLUE, NLI, synthetic tasks, and generation - [[Section 4]](https://arxiv.org/pdf/2005.14165.pdf#section.4); - as well as on fairness and biases - [[Section 6]](https://arxiv.org/pdf/2005.14165.pdf#section.6). - # Construction dependencies: [GPT-3 dataset] + description: "GPT-3 is an autoregressive language model.\n" + feedback: "Feedback for GPT-3 can be provided on the feedback form linked in the\ + \ model card [[Model Card]](https://github.com/openai/gpt-3/blob/master/model-card.md).\ + \ The form is especially meant to collect feedback on concerns about misuse,\ + \ synthetic text detection, bias, and risk of generative language models.\n" + intended_uses: "GPT-3 was intended to be use through the OpenAI API by developers\ + \ for language applications. Other intended use of GPT-3 include researchers\ + \ accessing the model through the API to study its paradigms [[Model Card]](https://github.com/openai/gpt-3/blob/master/model-card.md).\n" + license: + explanation: "The asset doesn't have a known license as it hasn't been publicly\ + \ released.\n" + value: unknown + modality: + explanation: text (English) + value: text; text + model_card: https://github.com/openai/gpt-3/blob/master/model-card.md + monitoring: "OpenAI reviews all use cases of the model [[Model Card]](https://github.com/openai/gpt-3/blob/master/model-card.md).\n" + name: GPT-3 + organization: OpenAI + prohibited_uses: "Access to GPT-3 is governed by Open AI API Usage Guidelines\ + \ and API Terms of Use, prohibiting the use of the API in a way that causes\ + \ societal harm. [[Usage Guidelines]] (https://beta.openai.com/docs/usage-guidelines/content-policy)\ + \ [[Terms of Use]](https://openai.com/api/policies/terms/). The list of disallowed\ + \ applications can be found in the usage guidelines [[Disallowed Applications]]\ + \ (https://beta.openai.com/docs/usage-guidelines/disallowed-applications).\n" + quality_control: "One quality control method OpenAI employed was releasing GPT-3\ + \ only through the OpenAI API. OpenAI states that it is easier to respond to\ + \ misuse when the access to the model is gated through the API. It also hints\ + \ that it plans to broaden the API access over time based on the amount of misuse\ + \ [[OpenAI API Blog Post]](https://openai.com/blog/openai-api/). The authors\ + \ identify potential misuses of GPT-3 in the paper and analyze it for fairness,\ + \ bias and representation issues, but do not identify mitigation strategies\ + \ [[Section 6]](https://arxiv.org/pdf/2005.14165.pdf#section.6).\n" + size: 175B parameters (dense) training_emissions: + explanation: "Estimate of the CO2(e) emissions for GPT-3 were not provided by\ + \ OpenAI, but they were provided by a follow up work investigating the CO2\ + \ equivalent emissions (CO2e) of GPT-3 [[Patterson et al.]] (https://arxiv.org/ftp/arxiv/papers/2104/2104.10350.pdf).\n" value: 552.1 tCO2e - explanation: > - Estimate of the CO2(e) emissions for GPT-3 were not provided - by OpenAI, but they were provided by a follow up work investigating the CO2 - equivalent emissions (CO2e) of GPT-3 - [[Patterson et al.]] - (https://arxiv.org/ftp/arxiv/papers/2104/2104.10350.pdf). - training_time: - value: 3640 petaflop/s-days - explanation: > - The time required to train different sized GPT-3 models are listed in - [[Table D.1]](https://arxiv.org/pdf/2005.14165.pdf#table.caption.50). - The time required to train the GPT-3 model with 175B parameters is - reported as 3.64E+03 petaflop/s-days. training_hardware: + explanation: "The original paper doesn't specify the training hardware for GPT-3,\ + \ but a follow up blog post indicates that it was trained on a cluster on\ + \ Azure cluster, using 10000 GPUs with 400 Gbps [[Microsoft Blog Post]] (https://blogs.microsoft.com/ai/openai-azure-supercomputer/).\n" value: Azure - explanation: > - The original paper doesn't specify the training hardware for GPT-3, - but a follow up blog post indicates that it was trained on a cluster on - Azure cluster, using 10000 GPUs with 400 Gbps - [[Microsoft Blog Post]] - (https://blogs.microsoft.com/ai/openai-azure-supercomputer/). - quality_control: > - One quality control method OpenAI employed was releasing GPT-3 only through - the OpenAI API. OpenAI states that it is easier to respond to misuse when - the access to the model is gated through the API. It also hints that it - plans to broaden the API access over time based on the amount of misuse - [[OpenAI API Blog Post]](https://openai.com/blog/openai-api/). - The authors identify potential misuses of GPT-3 in the paper and analyze - it for fairness, bias and representation issues, but do not identify - mitigation strategies - [[Section 6]](https://arxiv.org/pdf/2005.14165.pdf#section.6). - # Downstream - access: + training_time: + explanation: "The time required to train different sized GPT-3 models are listed\ + \ in [[Table D.1]](https://arxiv.org/pdf/2005.14165.pdf#table.caption.50).\ + \ The time required to train the GPT-3 model with 175B parameters is reported\ + \ as 3.64E+03 petaflop/s-days.\n" + value: 3640 petaflop/s-days + type: model + url: https://arxiv.org/pdf/2005.14165.pdf +- access: + explanation: "The model is made available via the OpenAI API [[OpenAI API]](https://openai.com/api/)\ + \ as code-cushman-001 according to the [[Model Index]](https://platform.openai.com/docs/model-index-for-researchers).\n" value: limited - explanation: > - The GPT-3 model isn't fully released to the public, but - it was made available to Microsoft through the licencing agreement between - OpenAI and Microsoft - [[OpenAI Blog Post]] - (https://openai.com/blog/openai-licenses-gpt-3-technology-to-microsoft/). - The public can access the model through the Open AI API, which is available - in supported countries - [[Supported Countries]](https://beta.openai.com/docs/supported-countries) - [[OpenAI API]](https://openai.com/api/). - license: - value: unknown - explanation: > - The asset doesn't have a known license as it hasn't been publicly - released. - intended_uses: > - GPT-3 was intended to be use through the OpenAI API by developers for - language applications. Other intended use of GPT-3 include researchers - accessing the model through the API to study its paradigms - [[Model Card]](https://github.com/openai/gpt-3/blob/master/model-card.md). - prohibited_uses: > - Access to GPT-3 is governed by Open AI API Usage Guidelines and API Terms - of Use, prohibiting the use of the API in a way that causes societal harm. - [[Usage Guidelines]] - (https://beta.openai.com/docs/usage-guidelines/content-policy) - [[Terms of Use]](https://openai.com/api/policies/terms/). - The list of disallowed applications can be found in the usage guidelines - [[Disallowed Applications]] - (https://beta.openai.com/docs/usage-guidelines/disallowed-applications). - monitoring: > - OpenAI reviews all use cases of the model - [[Model Card]](https://github.com/openai/gpt-3/blob/master/model-card.md). - feedback: > - Feedback for GPT-3 can be provided on the feedback form linked in the - model card - [[Model Card]](https://github.com/openai/gpt-3/blob/master/model-card.md). - The form is especially meant to collect feedback on concerns about misuse, - synthetic text detection, bias, and risk of generative language models. - -- type: model - name: Codex - # General - organization: OpenAI - description: > - Codex is a GPT language model fine-tuned on publicly available code from - GitHub. + analysis: "The model was evaluated using the HumanEval dataset with pass@k metric\ + \ and BLEU scores [[Section 2]](https://arxiv.org/pdf/2107.03374.pdf#section.2).\n" created_date: + explanation: "The date that Codex was announced to the public [[OpenAI Blog\ + \ Post]](https://openai.com/blog/openai-codex/).\n" value: 2021-08-10 - explanation: > - The date that Codex was announced to the public - [[OpenAI Blog Post]](https://openai.com/blog/openai-codex/). - url: https://arxiv.org/pdf/2107.03374.pdf - model_card: none - modality: text (English) and code - size: 12B parameters (dense model) - analysis: > - The model was evaluated using the HumanEval dataset with pass@k metric and - BLEU scores - [[Section 2]](https://arxiv.org/pdf/2107.03374.pdf#section.2). - # Construction dependencies: [GPT-3, Codex dataset, HumanEval] - training_emissions: - value: unknown - explanation: Authors do not report the training emissions. - training_time: - value: 100-1000 petaflop/s-days - explanation: > - Authors estimate hundreds of petaflop/s-days of compute - [[Section 7.6]](https://arxiv.org/pdf/2107.03374.pdf#subsection.7.6), but - don't provide an exact number. - training_hardware: - value: Azure - explanation: > - The paper specifies that Azure was used, but the underlying - architecture wasn't specified. - quality_control: > - The model wasn't fully released to the public as a quality control measure. - The authors identify potential risks of Codex in their paper due to the - following: over-reliance, misalignment, bias and representation, economic - and labor market impacts, security implications, environmental impact and - legal implications. They also make suggestions for some of these, but do not - implement them in Codex - [[Section 7]](https://arxiv.org/pdf/2107.03374.pdf#section.7). - # Downstream - access: - value: limited - explanation: > - The model is made available via the OpenAI API - [[OpenAI API]](https://openai.com/api/) as code-cushman-001 according to the - [[Model Index]](https://platform.openai.com/docs/model-index-for-researchers). + description: "Codex is a GPT language model fine-tuned on publicly available code\ + \ from GitHub.\n" + feedback: "Email the authors [[Codex Paper]](https://arxiv.org/pdf/2107.03374.pdf).\n" + intended_uses: "Codex is intended to be used for coding related language modelling\ + \ tasks.\n" license: + explanation: "The asset doesn't have a known license as it hasn't been publicly\ + \ released.\n" value: unknown - explanation: > - The asset doesn't have a known license as it hasn't been publicly - released. - intended_uses: > - Codex is intended to be used for coding related language modelling tasks. - prohibited_uses: - value: unknown - explanation: The prohibited uses of the model aren't specified. + modality: + explanation: text (English) and code + value: code, text; code, text + model_card: none monitoring: + explanation: "There isn't any known monitoring in place for the model, but there\ + \ may be internal mechanisms.\n" value: unknown - explanation: > - There isn't any known monitoring in place for the model, but there - may be internal mechanisms. - feedback: > - Email the authors - [[Codex Paper]](https://arxiv.org/pdf/2107.03374.pdf). - -- type: model - name: InstructGPT - # General + name: Codex organization: OpenAI - description: > - InstructGPT is a family of GPT-3 based models fine-tuned on human feedback, - which allows for better instruction following capabilities than GPT-3. - created_date: - value: 2022-01-27 - explanation: > - Date of the public announcement introducing InstructGPT - [[OpenAI Blog Post]] (https://openai.com/blog/instruction-following/). - url: https://arxiv.org/pdf/2203.02155.pdf - model_card: https://github.com/openai/following-instructions-human-feedback/blob/main/model-card.md - modality: text (English) and code - size: - value: 175B parameters (dense model) - explanation: Size of the largest InstructGPT model. + prohibited_uses: + explanation: The prohibited uses of the model aren't specified. + value: unknown + quality_control: "The model wasn't fully released to the public as a quality control\ + \ measure. The authors identify potential risks of Codex in their paper due\ + \ to the following: over-reliance, misalignment, bias and representation, economic\ + \ and labor market impacts, security implications, environmental impact and\ + \ legal implications. They also make suggestions for some of these, but do not\ + \ implement them in Codex [[Section 7]](https://arxiv.org/pdf/2107.03374.pdf#section.7).\n" + size: 12B parameters (dense) + training_emissions: + explanation: Authors do not report the training emissions. + value: unknown + training_hardware: + explanation: "The paper specifies that Azure was used, but the underlying architecture\ + \ wasn't specified.\n" + value: Azure + training_time: + explanation: "Authors estimate hundreds of petaflop/s-days of compute [[Section\ + \ 7.6]](https://arxiv.org/pdf/2107.03374.pdf#subsection.7.6), but don't provide\ + \ an exact number.\n" + value: 100-1000 petaflop/s-days + type: model + url: https://arxiv.org/pdf/2107.03374.pdf +- access: + explanation: "The model is made available via the OpenAI API but the specific\ + \ endpoint (davinci-instruct-beta) is currently unavailable. [[Model Index]](https://platform.openai.com/docs/model-index-for-researchers).\n" + value: closed analysis: The model was evaluated on human ratings to the InstructGPT answers to the prompts submitted to the OpenAI API as well as on public NLP datasets spanning truthfulness, toxicity, and bias, question answering, reading comprehension, and summarization tasks. - # Construction + created_date: + explanation: "Date of the public announcement introducing InstructGPT [[OpenAI\ + \ Blog Post]] (https://openai.com/blog/instruction-following/).\n" + value: 2022-01-27 dependencies: [GPT-3, OpenAI API] - training_emissions: + description: "InstructGPT is a family of GPT-3 based models fine-tuned on human\ + \ feedback, which allows for better instruction following capabilities than\ + \ GPT-3.\n" + feedback: "Email the authors [[InstructGPT Paper]](https://arxiv.org/pdf/2203.02155.pdf).\n" + intended_uses: "As stated in the model card: \"The intended direct users of InstructGPT\ + \ are developers who access its capabilities via the OpenAI API. Through the\ + \ OpenAI API, the model can be used by those who may not have AI development\ + \ experience, to build and explore language modeling systems across a wide range\ + \ of functions. We also anticipate that the model will continue to be used by\ + \ researchers to better understand the behaviors, capabilities, biases, and\ + \ constraints of large-scale language models\" [[Model Card]](https://github.com/openai/following-instructions-human-feedback/blob/main/model-card.md).\n" + license: + explanation: "The asset doesn't have a known license as it hasn't been publicly\ + \ released.\n" value: unknown - explanation: The authors do not estimate the emissions of the model. - training_time: - value: 60 petaflops/s-days - explanation: > - 175B SFT model required 4.9 petaflops/s-days; 175B PPO-ptx model required - 60 petaflops/s-days - [[Section 5]](https://arxiv.org/pdf/2203.02155.pdf#section.5). - training_hardware: - value: unknown - explanation: The authors do not disclose the training hardware used. - quality_control: > - The model wasn't fully released to the public as a quality control measure. - # Downstream - access: - value: closed - explanation: > - The model is made available via the OpenAI API but the specific endpoint (davinci-instruct-beta) - is currently unavailable. - [[Model Index]](https://platform.openai.com/docs/model-index-for-researchers). - license: - value: unknown - explanation: > - The asset doesn't have a known license as it hasn't been publicly - released. - intended_uses: > - As stated in the model card: "The intended direct users of InstructGPT are - developers who access its capabilities via the OpenAI API. Through the - OpenAI API, the model can be used by those who may not have AI development - experience, to build and explore language modeling systems across a wide - range of functions. We also anticipate that the model will continue to be - used by researchers to better understand the behaviors, capabilities, - biases, and constraints of large-scale language models" - [[Model Card]](https://github.com/openai/following-instructions-human-feedback/blob/main/model-card.md). - prohibited_uses: > - Access to InstructGPT is governed by Open AI API Usage Guidelines and API - Terms - of Use, prohibiting the use of the API in a way that causes societal harm. - [[Usage Guidelines]] - (https://beta.openai.com/docs/usage-guidelines/content-policy) - [[Terms of Use]](https://openai.com/api/policies/terms/). - The list of disallowed applications can be found in the usage guidelines - [[Disallowed Applications]] - (https://beta.openai.com/docs/usage-guidelines/disallowed-applications). + modality: + explanation: text (English) and code + value: code, text; code, text + model_card: https://github.com/openai/following-instructions-human-feedback/blob/main/model-card.md monitoring: + explanation: "There isn't any known monitoring in place for the model, but there\ + \ may be internal mechanisms.\n" value: unknown - explanation: > - There isn't any known monitoring in place for the model, but there - may be internal mechanisms. - feedback: > - Email the authors [[InstructGPT Paper]](https://arxiv.org/pdf/2203.02155.pdf). - -- type: model - name: Whisper - # General + name: InstructGPT organization: OpenAI - description: > - GPT-3 is a speech recognition model. - created_date: - value: 2022-09-21 - explanation: > - The date that Whisper was announced to the public - [[OpenAI Blog Post]](https://openai.com/blog/whisper/). - url: https://cdn.openai.com/papers/whisper.pdf - model_card: https://github.com/openai/whisper/blob/main/model-card.md - modality: Speech (multilingual, though primarily English) - size: - value: 1.5B parameters (dense model) - explanation: > - Whisper comes in several sizes (39M, 74M, 244M, 769M, 1.55B). Here we report - the size of the largest - model [[Table 1]](https://cdn.openai.com/papers/whisper.pdf). - analysis: The model was evaluated for zero-shot English and multingual speech - recognition, translation, language identification and robustness to noise. - # Construction - dependencies: [Whisper dataset] + prohibited_uses: "Access to InstructGPT is governed by Open AI API Usage Guidelines\ + \ and API Terms of Use, prohibiting the use of the API in a way that causes\ + \ societal harm. [[Usage Guidelines]] (https://beta.openai.com/docs/usage-guidelines/content-policy)\ + \ [[Terms of Use]](https://openai.com/api/policies/terms/). The list of disallowed\ + \ applications can be found in the usage guidelines [[Disallowed Applications]]\ + \ (https://beta.openai.com/docs/usage-guidelines/disallowed-applications).\n" + quality_control: "The model wasn't fully released to the public as a quality control\ + \ measure.\n" + size: 175B parameters (dense) training_emissions: + explanation: The authors do not estimate the emissions of the model. value: unknown - explanation: > - Authors do not report the training emissions. - training_time: - value: unknown - explanation: > - Authors do not report the training time. training_hardware: - value: Azure - explanation: > - The original paper doesn't specify the training hardware for Whisper, - but we expect it is trained on Azure given other models from OpenAI are trained - using Azure - [[Microsoft Blog Post]] - (https://blogs.microsoft.com/ai/openai-azure-supercomputer/). - quality_control: No specific quality control methods are documented. - # Downstream - access: + explanation: The authors do not disclose the training hardware used. + value: unknown + training_time: + explanation: "175B SFT model required 4.9 petaflops/s-days; 175B PPO-ptx model\ + \ required 60 petaflops/s-days [[Section 5]](https://arxiv.org/pdf/2203.02155.pdf#section.5).\n" + value: 60 petaflops/s-days + type: model + url: https://arxiv.org/pdf/2203.02155.pdf +- access: + explanation: "Models are available at [https://github.com/openai/whisper](https://github.com/openai/whisper).\n" value: open - explanation: > - Models are available at [https://github.com/openai/whisper](https://github.com/openai/whisper). + analysis: The model was evaluated for zero-shot English and multingual speech + recognition, translation, language identification and robustness to noise. + created_date: + explanation: "The date that Whisper was announced to the public [[OpenAI Blog\ + \ Post]](https://openai.com/blog/whisper/).\n" + value: 2022-09-21 + dependencies: [Whisper dataset] + description: "GPT-3 is a speech recognition model.\n" + feedback: "The discussions page of the codebase is not formally cited as a place\ + \ for feedback, but is being used in this way [[Discussions page]](https://github.com/openai/whisper/discussions)\n" + intended_uses: "Whisper is a general-purpose speech recognition model; it is a\ + \ multi-task model that can perform multilingual speech recognition as well\ + \ as speech translation and language identification.\n" license: - value: MIT explanation: https://github.com/openai/whisper](https://github.com/openai/whisper - intended_uses: > - Whisper is a general-purpose speech recognition model; it is a multi-task - model that can perform multilingual speech recognition as well as speech - translation and language identification. - prohibited_uses: unknown + value: MIT + modality: + explanation: Speech (multilingual, though primarily English) + value: audio; audio + model_card: https://github.com/openai/whisper/blob/main/model-card.md monitoring: + explanation: "No monitoring is mentioned by the authors.\n" value: none - explanation: > - No monitoring is mentioned by the authors. - feedback: > - The discussions page of the codebase is not formally cited as a place - for feedback, but is being used in this way [[Discussions page]](https://github.com/openai/whisper/discussions) - -- type: model - name: CLIP - # General + name: Whisper organization: OpenAI - description: > - "CLIP (Contrastive Language-Image Pre-Training) is a neural network trained - on a variety of (image, text) pairs. It can be instructed in natural - language to predict the most relevant text snippet, given an image, - without directly optimizing for the task, similarly to the zero-shot - capabilities of GPT-2 and 3. We found CLIP matches the performance of the - original ResNet50 on ImageNet “zero-shot” without using any of the original - 1.28M labeled examples, overcoming several major challenges in computer - vision" - [[CLIP Repository]](https://github.com/openai/CLIP). - created_date: - value: 2021-01-05 - explanation: > - The date of the blog post announcing CLIP - [[OpenAI Blog Post]](https://openai.com/blog/clip/). - url: https://arxiv.org/pdf/2103.00020.pdf - model_card: https://github.com/openai/CLIP/blob/main/model-card.md - modality: Text (English) and Image - size: + prohibited_uses: unknown + quality_control: No specific quality control methods are documented. + size: 1.5B parameters (dense) + training_emissions: + explanation: "Authors do not report the training emissions.\n" value: unknown - explanation: > - The total size is unknown, but the largest CLIP model is a - a combination of 63M-parameter (dense) text encoder and a 307M-parameter - vision encoder. + training_hardware: + explanation: "The original paper doesn't specify the training hardware for Whisper,\ + \ but we expect it is trained on Azure given other models from OpenAI are\ + \ trained using Azure [[Microsoft Blog Post]] (https://blogs.microsoft.com/ai/openai-azure-supercomputer/).\n" + value: Azure + training_time: + explanation: "Authors do not report the training time.\n" + value: unknown + type: model + url: https://cdn.openai.com/papers/whisper.pdf +- access: + explanation: "Model checkpoints and the helper code can be accessed at the official\ + \ CLIP repository [[CLIP Repository]](https://github.com/openai/CLIP).\n" + value: open analysis: The model was evaluated on standard vision datasets (e.g. CIFAR10, ImageNet) and showed robust state of the art results. - # Construction + created_date: + explanation: "The date of the blog post announcing CLIP [[OpenAI Blog Post]](https://openai.com/blog/clip/).\n" + value: 2021-01-05 dependencies: [CLIP dataset] - training_emissions: unknown - training_time: - value: 71.12 petaflop/s-day - explanation: > - The exact training time of CLIP depends on the vision and language - encoders used: "The largest ResNet model, RN50x64, took 18 days to train - on 592 V100 GPUs while the largest Vision Transformer took 12 days on 256 - V100 GPUs. For the ViT-L/14 we also pre-train at a higher 336 pixel - resolution for one additional epoch to boost performance ... Unless - otherwise specified, all results reported in this paper as “CLIP” use this - model which we found to perform best" - [[CLIP paper]](https://arxiv.org/pdf/2103.00020.pdf). - - Using the GPU Time method outlined in the - [[OpenAI AI and Computer Blog]](https://openai.com/blog/ai-and-compute/#addendum), - we can compute the training time in petaflop/s-day for RN50x64 and - ViT-L/14 with the following equation: Number of GPUs * (peta-flops/GPU) * - days trained * estimated utilization. - We will assume that estimated utilization is 33%, following - [[OpenAI AI and Computer Blog]](https://openai.com/blog/ai-and-compute/#addendum). - The specific V100 GPU used isn't cleared from the paper, so we will assume - that V100 PCle was used. The V100 PCle GPUs have a single precision - performance of 15.7 teraflops - [[V100 Datasheet]](https://images.nvidia.com/content/technologies/volta/pdf/volta-v100-datasheet-update-us-1165301-r5.pdf), - which is equal to 15.7 / 1000 = 0.0157 petaflops. - - Following the formula "Number of GPUs * (peta-flops/GPU) * days trained * - estimated utilization", we can compute the petaflop/s-day for RN50x64 as - follows: - 592 * 0.0157 * 18 * 0.33. - Similarly, for ViT-L/14, we can compute the same as: - 256 * 0.0157 * 12 * 0.33. - Adding the two, we estimate the total training time as 71.12 - petaflop/s-day. - training_hardware: NVIDIA V100 GPUs - quality_control: > - The authors found that the performance of the model depended heavily on - which classes are included (and excluded) for a given task. They reported - significant race and gender based disparities on the Fairface dataset, - depending on how the classes were constructed. The authors also demonstrated - that the model was capable of racial profiling with high accuracy - [[Section 7]](https://arxiv.org/pdf/2103.00020.pdf#section.7). - # Downstream - access: - value: open - explanation: > - Model checkpoints and the helper code can be accessed - at the official CLIP repository - [[CLIP Repository]](https://github.com/openai/CLIP). + description: "\"CLIP (Contrastive Language-Image Pre-Training) is a neural network\ + \ trained on a variety of (image, text) pairs. It can be instructed in natural\ + \ language to predict the most relevant text snippet, given an image, without\ + \ directly optimizing for the task, similarly to the zero-shot capabilities\ + \ of GPT-2 and 3. We found CLIP matches the performance of the original ResNet50\ + \ on ImageNet “zero-shot” without using any of the original 1.28M labeled examples,\ + \ overcoming several major challenges in computer vision\" [[CLIP Repository]](https://github.com/openai/CLIP).\n" + feedback: Questions can be shared at the feedback form linked in the CLIP model + card [[Model Card]](https://github.com/openai/CLIP/blob/main/model-card.mdlicen). + intended_uses: "The model is intended to be used by AI researchers to better understand\ + \ \"robustness, generalization, and other capabilities, biases, and constraints\ + \ of computer vision models\" [[CLIP Model Card]](https://github.com/openai/CLIP/blob/main/model-card.md).\n" license: MIT - intended_uses: > - The model is intended to be used by AI researchers to better understand - "robustness, generalization, and other capabilities, biases, and constraints - of computer vision models" - [[CLIP Model Card]](https://github.com/openai/CLIP/blob/main/model-card.md). - prohibited_uses: > - "Any deployed use case of the model - whether commercial or not - is - currently out of scope. Non-deployed use cases such as image search in a - constrained environment, are also not recommended unless there is thorough - in-domain testing of the model with a specific, fixed class taxonomy. - This is because our safety assessment demonstrated a high need for task - specific testing especially given the variability of CLIP’s performance - with different class taxonomies. This makes untested and unconstrained - deployment of the model in any use case currently potentially harmful. - - Certain use cases which would fall under the domain of surveillance and - facial recognition are always out-of-scope regardless of performance of the - model. This is because the use of artificial intelligence for tasks such as - these can be premature currently given the lack of testing norms and checks - to ensure its fair use. - - Since the model has not been purposefully trained in or evaluated on any - languages other than English, its use should be limited to English language - use cases" - [[Model Card]](https://github.com/openai/CLIP/blob/main/model-card.mdlicen). + modality: + explanation: Text (English) and Image + value: image, text; image, text + model_card: https://github.com/openai/CLIP/blob/main/model-card.md monitoring: - value: none explanation: There are no monitoring mechanisms in place for CLIP. - feedback: Questions can be shared at the feedback form linked in the CLIP model - card [[Model Card]](https://github.com/openai/CLIP/blob/main/model-card.mdlicen). - -- type: model - name: DALL·E - # General + value: none + name: CLIP organization: OpenAI - description: > - DALL·E is a GPT-3 based model trained to generate images from text - descriptions. The authors found that it had "a diverse set of capabilities, - including creating anthropomorphized versions of animals and objects, - combining unrelated concepts in plausible ways, rendering text, and - applying transformations to existing images" - [[OpenAI Blog Post]](https://openai.com/blog/dall-e/). + prohibited_uses: "\"Any deployed use case of the model - whether commercial or\ + \ not - is currently out of scope. Non-deployed use cases such as image search\ + \ in a constrained environment, are also not recommended unless there is thorough\ + \ in-domain testing of the model with a specific, fixed class taxonomy. This\ + \ is because our safety assessment demonstrated a high need for task specific\ + \ testing especially given the variability of CLIP’s performance with different\ + \ class taxonomies. This makes untested and unconstrained deployment of the\ + \ model in any use case currently potentially harmful.\nCertain use cases which\ + \ would fall under the domain of surveillance and facial recognition are always\ + \ out-of-scope regardless of performance of the model. This is because the use\ + \ of artificial intelligence for tasks such as these can be premature currently\ + \ given the lack of testing norms and checks to ensure its fair use.\nSince\ + \ the model has not been purposefully trained in or evaluated on any languages\ + \ other than English, its use should be limited to English language use cases\"\ + \ [[Model Card]](https://github.com/openai/CLIP/blob/main/model-card.mdlicen).\n" + quality_control: "The authors found that the performance of the model depended\ + \ heavily on which classes are included (and excluded) for a given task. They\ + \ reported significant race and gender based disparities on the Fairface dataset,\ + \ depending on how the classes were constructed. The authors also demonstrated\ + \ that the model was capable of racial profiling with high accuracy [[Section\ + \ 7]](https://arxiv.org/pdf/2103.00020.pdf#section.7).\n" + size: unknown + training_emissions: unknown + training_hardware: NVIDIA V100 GPUs + training_time: + explanation: "The exact training time of CLIP depends on the vision and language\ + \ encoders used: \"The largest ResNet model, RN50x64, took 18 days to train\ + \ on 592 V100 GPUs while the largest Vision Transformer took 12 days on 256\ + \ V100 GPUs. For the ViT-L/14 we also pre-train at a higher 336 pixel resolution\ + \ for one additional epoch to boost performance ... Unless otherwise specified,\ + \ all results reported in this paper as “CLIP” use this model which we found\ + \ to perform best\" [[CLIP paper]](https://arxiv.org/pdf/2103.00020.pdf).\n\ + Using the GPU Time method outlined in the [[OpenAI AI and Computer Blog]](https://openai.com/blog/ai-and-compute/#addendum),\ + \ we can compute the training time in petaflop/s-day for RN50x64 and ViT-L/14\ + \ with the following equation: Number of GPUs * (peta-flops/GPU) * days trained\ + \ * estimated utilization. We will assume that estimated utilization is 33%,\ + \ following [[OpenAI AI and Computer Blog]](https://openai.com/blog/ai-and-compute/#addendum).\ + \ The specific V100 GPU used isn't cleared from the paper, so we will assume\ + \ that V100 PCle was used. The V100 PCle GPUs have a single precision performance\ + \ of 15.7 teraflops [[V100 Datasheet]](https://images.nvidia.com/content/technologies/volta/pdf/volta-v100-datasheet-update-us-1165301-r5.pdf),\ + \ which is equal to 15.7 / 1000 = 0.0157 petaflops.\nFollowing the formula\ + \ \"Number of GPUs * (peta-flops/GPU) * days trained * estimated utilization\"\ + , we can compute the petaflop/s-day for RN50x64 as follows: 592 * 0.0157 *\ + \ 18 * 0.33. Similarly, for ViT-L/14, we can compute the same as: 256 * 0.0157\ + \ * 12 * 0.33. Adding the two, we estimate the total training time as 71.12\ + \ petaflop/s-day.\n" + value: 71.12 petaflop/s-day + type: model + url: https://arxiv.org/pdf/2103.00020.pdf +- access: + explanation: Available via the OpenAI API as of Q1 2023. + value: limited + analysis: "The model was evaluated against three prior approaches, AttnGAN, DM-GAN,\ + \ and DF-GAN using Inception Score and Fréchet Inception Distance on MS-COCO\ + \ as metrics. The model was also evaluated by humans and received the majority\ + \ of the votes in generating images that look realistic and better match the\ + \ caption when compared to the images generated by DF-GAN [[Section]](https://arxiv.org/pdf/2102.12092.pdf#section.3).\n" created_date: + explanation: "The date of the blog post announcing DALL·E [[OpenAI Blog Post]](https://openai.com/blog/dall-e/).\n" value: 2021-01-05 - explanation: > - The date of the blog post announcing DALL·E - [[OpenAI Blog Post]](https://openai.com/blog/dall-e/). - url: https://arxiv.org/pdf/2102.12092.pdf - model_card: https://github.com/openai/DALL-E/blob/master/model_card.md - modality: Text (English) and Image - size: 12B parameters (dense model) - analysis: > - The model was evaluated against three prior approaches, AttnGAN, DM-GAN, and - DF-GAN using Inception Score and Fréchet Inception Distance on MS-COCO as - metrics. The model was also evaluated by humans and received the majority - of the votes in generating images that look realistic and better match the - caption when compared to the images generated by DF-GAN - [[Section]](https://arxiv.org/pdf/2102.12092.pdf#section.3). - # Construction dependencies: [DALL·E dataset] - training_emissions: - value: unknown - explanation: The training emissions were not reported. - training_time: - value: unknown - explanation: The training emissions were not reported. - training_hardware: NVIDIA V100 GPUs - quality_control: unknown - # Downstream - access: - value: limited - explanation: Available via the OpenAI API as of Q1 2023. + description: "DALL·E is a GPT-3 based model trained to generate images from text\ + \ descriptions. The authors found that it had \"a diverse set of capabilities,\ + \ including creating anthropomorphized versions of animals and objects, combining\ + \ unrelated concepts in plausible ways, rendering text, and applying transformations\ + \ to existing images\" [[OpenAI Blog Post]](https://openai.com/blog/dall-e/).\n" + feedback: "Contact the paper author(s) specified on the paper [[Paper]](https://arxiv.org/pdf/2102.12092.pdf).\n" + intended_uses: "\"The model is intended for others to use for training their own\ + \ generative models\" [[Model Card]](https://github.com/openai/DALL-E/blob/master/model_card.md).\n" license: + explanation: "The asset doesn't have a known license as it hasn't been publicly\ + \ released.\n" value: unknown - explanation: > - The asset doesn't have a known license as it hasn't been publicly - released. - intended_uses: > - "The model is intended for others to use for training their own generative - models" - [[Model Card]](https://github.com/openai/DALL-E/blob/master/model_card.md). - prohibited_uses: - value: unknown - explanation: The prohibited uses of the model are unknown. + modality: + explanation: Text (English) and Image + value: image, text; image, text + model_card: https://github.com/openai/DALL-E/blob/master/model_card.md monitoring: - value: none explanation: There are no monitoring mechanisms in place for DALL·E. - feedback: > - Contact the paper author(s) specified on the paper - [[Paper]](https://arxiv.org/pdf/2102.12092.pdf). - - -- type: model - name: Jukebox - organization: OpenAI - description: Jukebox is a generate model that produces music - created_date: - value: 2020-04-30 - explanation: The date the model paper was released - url: https://arxiv.org/abs/2005.00341 - model_card: value: none - explanation: No model card found as of 2023-01-09 - modality: Audio (music) - size: 5B parameters + name: DALL·E + organization: OpenAI + prohibited_uses: + explanation: The prohibited uses of the model are unknown. + value: unknown + quality_control: unknown + size: 12B parameters (dense) + training_emissions: + explanation: The training emissions were not reported. + value: unknown + training_hardware: NVIDIA V100 GPUs + training_time: + explanation: The training emissions were not reported. + value: unknown + type: model + url: https://arxiv.org/pdf/2102.12092.pdf +- access: + explanation: The model can be downloaded from the [Github repository](https://github.com/openai/jukebox) + value: open analysis: Evaluations in paper are primarily considering the fidelity and novelty of samples from Jukebox. + created_date: + explanation: The date the model paper was released + value: 2020-04-30 dependencies: [Jukebox Dataset] - training_emissions: unknown - training_time: 4 weeks - training_hardware: 510 V100s - quality_control: '' - access: - value: open - explanation: The model can be downloaded from the [Github repository](https://github.com/openai/jukebox) + description: Jukebox is a generate model that produces music + feedback: none + intended_uses: '' license: + explanation: "The license is provided in the [Github repository](https://github.com/openai/jukebox).\n" value: Noncommercial Use License - explanation: > - The license is provided in the [Github repository](https://github.com/openai/jukebox). - intended_uses: '' - prohibited_uses: none + modality: + explanation: Audio (music) + value: audio; audio + model_card: + explanation: No model card found as of 2023-01-09 + value: none monitoring: none - feedback: none - -- type: model - name: DALL·E 2 - # General + name: Jukebox organization: OpenAI - description: > - "DALL·E 2 is an artificial intelligence model that takes a text prompt - and/or existing image as an input and generates a new image as an output" - [[System Card]] - (https://github.com/openai/dalle-2-preview/blob/main/system-card.md). - The model wasn't fully released, but OpenAI released a version of the model - (DALL·E 2 Preview) to a select group of testers. + prohibited_uses: none + quality_control: '' + size: 5B parameters (dense) + training_emissions: unknown + training_hardware: 510 V100s + training_time: 4 weeks + type: model + url: https://arxiv.org/abs/2005.00341 +- access: + explanation: "The model is available to OpenAI employees, researchers, creatives\ + \ and company friends. OpenAI opened a waitlist for DALL·E 2 access. [[System\ + \ Card]] (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#access).\n" + value: limited + analysis: "The model is capable of generating explicit content and the researchers\ + \ found limited amount of spurious content generated. The researchers also found\ + \ that visual synonyms can be used to prompt the model to surface unwanted generations\ + \ [[Probes and Evaluations]] (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#probes-and-evaluations).\n" created_date: + explanation: "OpenAI released in a blog post in April 2020 [[OpenAI Blog Post]](https://openai.com/dall-e-2/).\n" value: 2022-04-13 - explanation: > - OpenAI released in a blog post in April 2020 - [[OpenAI Blog Post]](https://openai.com/dall-e-2/). - url: https://arxiv.org/abs/2204.06125 + dependencies: [DALL·E dataset, CLIP dataset] + description: "\"DALL·E 2 is an artificial intelligence model that takes a text\ + \ prompt and/or existing image as an input and generates a new image as an output\"\ + \ [[System Card]] (https://github.com/openai/dalle-2-preview/blob/main/system-card.md).\ + \ The model wasn't fully released, but OpenAI released a version of the model\ + \ (DALL·E 2 Preview) to a select group of testers.\n" + feedback: Feedback can be provided at support at openai.com. + intended_uses: "\"The intended use of the DALL·E 2 Preview at this time is for\ + \ personal, non-commercial exploration and research purposes by people who are\ + \ interested in understanding the potential uses of these capabilities\" [[Use]]\ + \ (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#use).\n" + license: + explanation: "The asset isn't released, and hence the license is unknown.\n" + value: unknown + modality: + explanation: Text (English) and Image + value: image, text; image, text model_card: https://github.com/openai/dalle-2-preview/blob/main/system-card.md - modality: Text (English) and Image + monitoring: "Uses of the model are monitored. In the preview version, any user\ + \ can flag content. The specific policies for monitoring are not disclosed,\ + \ but possible measures include disabling of accounts violating the content\ + \ policies [[Monitoring and Reporting]] (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#monitoring-and-reporting).\n" + name: DALL·E 2 + organization: OpenAI + prohibited_uses: "Use of the model is governed by the OpenAI Content Policy, which\ + \ prohibits posting of G rated content. Users are not allowed to utilize the\ + \ model in commercial products in the preview version [[Content Policy]] (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#policies-and-enforcement).\n" + quality_control: "The model isn't fully released to the public as part of a quality\ + \ control measure. The usage of the model by testers is monitored and user provided\ + \ prompts are filtered [[Input filters]] (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#input-filters).\n" size: unknown - analysis: > - The model is capable of generating explicit content and the researchers - found limited amount of spurious content generated. The researchers also - found that visual synonyms can be used to prompt the model to surface - unwanted generations - [[Probes and Evaluations]] - (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#probes-and-evaluations). - # Construction - dependencies: [DALL·E dataset, CLIP dataset] training_emissions: unknown - training_time: unknown training_hardware: unknown - quality_control: > - The model isn't fully released to the public as part of a quality control - measure. The usage of the model by testers is monitored and user provided - prompts are filtered - [[Input filters]] - (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#input-filters). - # Downstream - access: + training_time: unknown + type: model + url: https://arxiv.org/abs/2204.06125 +- access: + explanation: "The OpenAI API is available to the public in supported countries\ + \ [[Supported Countries]](https://beta.openai.com/docs/supported-countries)\ + \ [[OpenAI API]](https://openai.com/api/).\n" value: limited - explanation: > - The model is available to OpenAI employees, researchers, creatives - and company friends. OpenAI opened a waitlist for DALL·E 2 access. - [[System Card]] - (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#access). - license: - value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: > - "The intended use of the DALL·E 2 Preview at this time is for personal, - non-commercial exploration and research purposes by people who are - interested in understanding the potential uses of these capabilities" - [[Use]] - (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#use). - prohibited_uses: > - Use of the model is governed by the OpenAI Content Policy, which prohibits - posting of G rated content. - Users are not allowed to utilize the model in commercial products in the - preview version - [[Content Policy]] - (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#policies-and-enforcement). - monitoring: > - Uses of the model are monitored. In the preview version, any user can flag - content. The specific policies for monitoring are not disclosed, but - possible measures include disabling of accounts violating the content - policies - [[Monitoring and Reporting]] - (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#monitoring-and-reporting). - feedback: Feedback can be provided at support at openai.com. - -- type: application - name: OpenAI API - # General - organization: OpenAI - description: > - OpenAI API is a general purpose "text in, text out" interface connecting - users with a suite of language models. The API was initially released - as a gateway to GPT-3, but it now supports access to other, more - specialized OpenAI models. - [[Open AI Blog Post]](https://openai.com/blog/openai-api/) + adaptation: The API exposes the models fairly direclty with a range of hyperparameters + (e.g. temperature scaling). created_date: + explanation: "The date that OpenAI API was announced to the public [[Open AI\ + \ Blog Post]](https://openai.com/blog/openai-api/).\n" value: 2020-06-11 - explanation: > - The date that OpenAI API was announced to the public - [[Open AI Blog Post]](https://openai.com/blog/openai-api/). - url: https://openai.com/api/ - # Construction dependencies: - GPT-3 - Codex @@ -1079,588 +809,559 @@ - Whisper - DALL·E - GPT-4 - adaptation: The API exposes the models fairly direclty with a range of hyperparameters - (e.g. temperature scaling). - output_space: > - Given a prompting text, the OpenAI API provides access to text completions, - and log probabilities. The support for text and code embeddings were added - on 2022-01-25 - [[OpenAI Blog Post]] - (https://openai.com/blog/introducing-text-and-code-embeddings/). - quality_control: > - Given a prompt, OpenAI API checks whether a completion contains unsafe - language using its filters and marks the completion accordingly if so. - The API also provides developers with special endpoints that scope the - API usage. OpenAI also developed user guidelines to help developers - understand safety issues - [[OpenAI API]](https://openai.com/api/). - # Downstream - access: - value: limited - explanation: > - The OpenAI API is available to the public in supported countries - [[Supported Countries]](https://beta.openai.com/docs/supported-countries) - [[OpenAI API]](https://openai.com/api/). - terms_of_service: https://openai.com/api/policies/terms/ + description: "OpenAI API is a general purpose \"text in, text out\" interface\ + \ connecting users with a suite of language models. The API was initially released\ + \ as a gateway to GPT-3, but it now supports access to other, more specialized\ + \ OpenAI models. [[Open AI Blog Post]](https://openai.com/blog/openai-api/)\n" + failures: + explanation: "There are no known documented failures of the OpenAI API at the\ + \ time of writing.\n" + value: unknown + feedback: + explanation: "There is no known specific feedback channel for the OpenAI API,\ + \ but OpenAI support theme can be reached via email at support at openai.com.\n" + value: unknown + intended_uses: "OpenAI API was designed to be used by developers to empower applications,\ + \ and researchers to study language models [[Section 3]](https://openai.com/api/policies/terms/).\n" license: - value: custom explanation: Per the Terms of Use, a limited license is provided to the users during their use of the API [[Section 2]](https://openai.com/api/policies/terms/). - intended_uses: > - OpenAI API was designed to be used by developers to empower applications, - and researchers to study language models - [[Section 3]](https://openai.com/api/policies/terms/). - prohibited_uses: > - OpenAI API Terms of Use prohibits the use of the API in a way violating - the applicable law, including: (i) "Illegal activities, such as child - pornography, gambling, cybercrime, piracy, violating copyright, - trademark or other intellectual property laws"; (ii) "Accessing or - authorizing anyone to access the APIs from an embargoed country, region, or - territory as prohibited by the U.S. government"; (iii) "Threatening, - stalking, defaming, defrauding, degrading, victimizing or intimidating - anyone for any reason". - The usage requirements are detailed in the Terms of Use - [[Section 3]](https://openai.com/api/policies/terms/). - monitoring: > - OpenAI may monitor the API use to ensure "quality and improve OpenAI - systems, products and services; perform research; and ensure compliance" - with the Terms of Service and all applicable laws. Users of the API will - give OpenAI reasonable access to their application to monitor compliance - with the terms listed in the Terms of Service - [[Section 5(b)]](https://openai.com/api/policies/terms/). - Apps using the OpenAI API should submit an application once they are - deployed to real users. The review form takes 10 minutes to complete and - over 97% of the applications are directly accepted or conditionally - accepted. The applicants are notified of the decision within 2 business - days - [[App Review Guidelines]] - (https://beta.openai.com/docs/usage-guidelines/app-review). - feedback: - value: unknown - explanation: > - There is no known specific feedback channel for the OpenAI API, - but OpenAI support theme can be reached via email at support at openai.com. - # Deployment + value: custom + monitoring: "OpenAI may monitor the API use to ensure \"quality and improve OpenAI\ + \ systems, products and services; perform research; and ensure compliance\"\ + \ with the Terms of Service and all applicable laws. Users of the API will give\ + \ OpenAI reasonable access to their application to monitor compliance with the\ + \ terms listed in the Terms of Service [[Section 5(b)]](https://openai.com/api/policies/terms/).\ + \ Apps using the OpenAI API should submit an application once they are deployed\ + \ to real users. The review form takes 10 minutes to complete and over 97% of\ + \ the applications are directly accepted or conditionally accepted. The applicants\ + \ are notified of the decision within 2 business days [[App Review Guidelines]]\ + \ (https://beta.openai.com/docs/usage-guidelines/app-review).\n" monthly_active_users: + explanation: "The number of monthly active users is not known publicly, but\ + \ OpenAI mentioned that the API was being used by tens of thousands of developers\ + \ in a blog post from 2021-11-18 [[OpenAI Blog Post]](https://openai.com/blog/api-no-waitlist/).\n" value: unknown - explanation: > - The number of monthly active users is not known publicly, but - OpenAI mentioned that the API was being used by tens of thousands of - developers in a blog post from 2021-11-18 - [[OpenAI Blog Post]](https://openai.com/blog/api-no-waitlist/). + name: OpenAI API + organization: OpenAI + output_space: "Given a prompting text, the OpenAI API provides access to text\ + \ completions, and log probabilities. The support for text and code embeddings\ + \ were added on 2022-01-25 [[OpenAI Blog Post]] (https://openai.com/blog/introducing-text-and-code-embeddings/).\n" + prohibited_uses: "OpenAI API Terms of Use prohibits the use of the API in a way\ + \ violating the applicable law, including: (i) \"Illegal activities, such as\ + \ child pornography, gambling, cybercrime, piracy, violating copyright, trademark\ + \ or other intellectual property laws\"; (ii) \"Accessing or authorizing anyone\ + \ to access the APIs from an embargoed country, region, or territory as prohibited\ + \ by the U.S. government\"; (iii) \"Threatening, stalking, defaming, defrauding,\ + \ degrading, victimizing or intimidating anyone for any reason\". The usage\ + \ requirements are detailed in the Terms of Use [[Section 3]](https://openai.com/api/policies/terms/).\n" + quality_control: "Given a prompt, OpenAI API checks whether a completion contains\ + \ unsafe language using its filters and marks the completion accordingly if\ + \ so. The API also provides developers with special endpoints that scope the\ + \ API usage. OpenAI also developed user guidelines to help developers understand\ + \ safety issues [[OpenAI API]](https://openai.com/api/).\n" + terms_of_service: https://openai.com/api/policies/terms/ + type: application + url: https://openai.com/api/ user_distribution: + explanation: "The distribution of the users is not known, but we estimate majority\ + \ of the users to be developers based in the United States.\n" value: unknown - explanation: > - The distribution of the users is not known, but we estimate - majority of the users to be developers based in the United States. - failures: - value: unknown - explanation: > - There are no known documented failures of the OpenAI API at the - time of writing. - -- type: model - name: VPT - organization: OpenAI - description: '' - created_date: 2022-06-23 - url: https://arxiv.org/abs/2206.11795 - model_card: '' - modality: video +- access: open analysis: '' - size: 500M parameters (dense model) + created_date: 2022-06-23 dependencies: [web_clean] - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: open - license: MIT + description: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: MIT + modality: + explanation: video + value: video; video + model_card: '' monitoring: '' - feedback: '' - -- type: dataset - name: web_clean + name: VPT organization: OpenAI - description: '' - created_date: 2022-06-23 + prohibited_uses: '' + quality_control: '' + size: 500M parameters (dense) + training_emissions: '' + training_hardware: '' + training_time: '' + type: model url: https://arxiv.org/abs/2206.11795 - datasheet: '' - modality: Video - size: 70k hours - sample: [] +- access: closed analysis: '' + created_date: 2022-06-23 + datasheet: '' dependencies: [] - included: '' + description: '' excluded: '' - quality_control: '' - access: closed + feedback: '' + included: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: Video + value: video monitoring: '' - feedback: '' - -- type: application - name: ChatGPT + name: web_clean organization: OpenAI - description: ChatGPT is an artificial intelligence chatbot developed by OpenAI + prohibited_uses: '' + quality_control: '' + sample: [] + size: 70k hours + type: dataset + url: https://arxiv.org/abs/2206.11795 +- access: open + adaptation: '' created_date: 2022-11-30 - url: https://openai.com/blog/chatgpt dependencies: [gpt-3.5-turbo, OpenAI toxicity classifier] - adaptation: '' - output_space: '' - quality_control: '' - access: open + description: ChatGPT is an artificial intelligence chatbot developed by OpenAI + failures: '' + feedback: '' + intended_uses: '' license: - value: custom explanation: Per the Terms of Use, a limited license is provided to the users during their use of the API [[Section 2]](https://openai.com/api/policies/terms/). - terms_of_service: '' - intended_uses: '' - prohibited_uses: '' + value: custom monitoring: '' - feedback: '' monthly_active_users: - value: 100000000 explanation: https://www.reuters.com/technology/chatgpt-sets-record-fastest-growing-user-base-analyst-note-2023-02-01/ - user_distribution: '' - failures: '' - -- type: model - name: gpt-3.5-turbo + value: 100000000 + name: ChatGPT organization: OpenAI - description: '' - created_date: 2023-03-01 - url: https://platform.openai.com/docs/models/gpt-3-5 - model_card: '' - modality: text + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: '' + type: application + url: https://openai.com/blog/chatgpt + user_distribution: '' +- access: limited analysis: '' - size: unknown + created_date: 2023-03-01 dependencies: [gpt-3.5-turbo dataset] - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: limited + description: '' + feedback: '' + intended_uses: '' license: - value: custom explanation: Per the Terms of Use, a limited license is provided to the users during their use of the API [[Section 2]](https://openai.com/api/policies/terms/). - intended_uses: '' - prohibited_uses: '' + value: custom + modality: + explanation: text + value: text; text + model_card: '' monitoring: '' - feedback: '' - -- type: dataset - name: gpt-3.5-turbo dataset + name: gpt-3.5-turbo organization: OpenAI - description: '' - created_date: 2023-03-01 + prohibited_uses: '' + quality_control: '' + size: unknown + training_emissions: '' + training_hardware: '' + training_time: '' + type: model url: https://platform.openai.com/docs/models/gpt-3-5 +- access: limited + analysis: '' + created_date: 2023-03-01 datasheet: '' + dependencies: [] + description: '' + excluded: '' + feedback: '' + included: '' + intended_uses: '' + license: + explanation: "The asset isn't released, and hence the license is unknown.\n" + value: unknown modality: text - size: unknown + monitoring: '' + name: gpt-3.5-turbo dataset + organization: OpenAI + prohibited_uses: '' + quality_control: '' sample: [] + size: unknown + type: dataset + url: https://platform.openai.com/docs/models/gpt-3-5 +- access: limited analysis: '' + created_date: 2022-05-01 + datasheet: '' dependencies: [] - included: '' + description: '' excluded: '' - quality_control: '' - access: limited + feedback: '' + included: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: code and text + value: code, text monitoring: '' - feedback: '' - -- type: dataset name: code-davinci-002 dataset organization: OpenAI - description: '' - created_date: 2022-05-01 - url: https://platform.openai.com/docs/model-index-for-researchers - datasheet: '' - modality: code and text - size: unknown + prohibited_uses: '' + quality_control: '' sample: [] + size: unknown + type: dataset + url: https://platform.openai.com/docs/model-index-for-researchers +- access: limited analysis: '' - dependencies: [] - included: '' - excluded: '' - quality_control: '' - access: limited + created_date: 2022-05-01 + dependencies: [code-davinci-002 dataset] + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: code and text + value: code, text; code, text + model_card: '' monitoring: '' - feedback: '' - -- type: model name: code-davinci-002 organization: OpenAI - description: '' - created_date: 2022-05-01 - url: https://platform.openai.com/docs/model-index-for-researchers - model_card: '' - modality: code and text - analysis: '' + prohibited_uses: '' + quality_control: '' size: unknown - dependencies: [code-davinci-002 dataset] training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: limited + training_time: '' + type: model + url: https://platform.openai.com/docs/model-index-for-researchers +- access: limited + analysis: '' + created_date: 2022-05-01 + dependencies: [code-davinci-002] + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: text + value: text; text + model_card: '' monitoring: '' - feedback: '' - -- type: model name: text-davinci-002 organization: OpenAI - description: '' - created_date: 2022-05-01 - url: https://platform.openai.com/docs/model-index-for-researchers - model_card: '' - modality: text - analysis: '' + prohibited_uses: '' + quality_control: '' size: unknown - dependencies: [code-davinci-002] training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: limited + training_time: '' + type: model + url: https://platform.openai.com/docs/model-index-for-researchers +- access: limited + analysis: '' + created_date: 2022-11-30 + dependencies: [text-davinci-002] + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: text + value: text; text + model_card: '' monitoring: '' - feedback: '' - -- type: model name: text-davinci-003 organization: OpenAI - description: '' - created_date: 2022-11-30 - url: https://platform.openai.com/docs/model-index-for-researchers - model_card: '' - modality: text - analysis: '' + prohibited_uses: '' + quality_control: '' size: unknown - dependencies: [text-davinci-002] training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: limited - license: - value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. + training_time: '' + type: model + url: https://platform.openai.com/docs/model-index-for-researchers +- access: open + adaptation: '' + created_date: 2023-03-01 + dependencies: [Whisper] + description: API to query OpenAI's Whisper model. + failures: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: + explanation: Per the Terms of Use, a limited license is provided to the users + during their use of the API [[Section 2]](https://openai.com/api/policies/terms/). + value: custom monitoring: '' - feedback: '' - -- type: application + monthly_active_users: '' name: Whisper API organization: OpenAI - description: API to query OpenAI's Whisper model. - created_date: 2023-03-01 - url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis - dependencies: [Whisper] - adaptation: '' output_space: '' + prohibited_uses: '' quality_control: '' - access: open + terms_of_service: '' + type: application + url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis + user_distribution: '' +- access: open + adaptation: '' + created_date: 2023-03-01 + dependencies: [ChatGPT] + description: API to query OpenAI's ChatGPT model. + failures: '' + feedback: '' + intended_uses: '' license: - value: custom explanation: Per the Terms of Use, a limited license is provided to the users during their use of the API [[Section 2]](https://openai.com/api/policies/terms/). - terms_of_service: '' - intended_uses: '' - prohibited_uses: '' + value: custom monitoring: '' - feedback: '' monthly_active_users: '' - user_distribution: '' - failures: '' - -- type: application name: ChatGPT API organization: OpenAI - description: API to query OpenAI's ChatGPT model. - created_date: 2023-03-01 - url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis - dependencies: [ChatGPT] - adaptation: '' output_space: '' + prohibited_uses: '' quality_control: '' - access: open - license: - value: custom - explanation: Per the Terms of Use, a limited license is provided to the users - during their use of the API [[Section 2]](https://openai.com/api/policies/terms/). terms_of_service: '' - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' - monthly_active_users: '' + type: application + url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis user_distribution: '' - failures: '' - -- type: application - name: OpenAI Moderation API - organization: OpenAI +- access: open + adaptation: '' + created_date: 2022-08-10 + dependencies: [OpenAI toxicity classifier] description: This endpoint provides OpenAI API developers with free access to GPT-based classifiers that detect undesired content—an instance of using AI systems to assist with human supervision of these systems. - created_date: 2022-08-10 - url: https://openai.com/blog/new-and-improved-content-moderation-tooling - dependencies: [OpenAI toxicity classifier] - adaptation: '' - output_space: '' - quality_control: '' - access: open + failures: '' + feedback: '' + intended_uses: '' license: - value: custom explanation: Per the Terms of Use, a limited license is provided to the users during their use of the API [[Section 2]](https://openai.com/api/policies/terms/). - terms_of_service: '' - intended_uses: '' - prohibited_uses: '' + value: custom monitoring: '' - feedback: '' monthly_active_users: '' - user_distribution: '' - failures: '' - -- type: model - name: OpenAI toxicity classifier + name: OpenAI Moderation API organization: OpenAI - description: '' - created_date: 2023-01-18 - url: https://time.com/6247678/openai-chatgpt-kenya-workers/#:~:text=In%20a%20statement%2C%20an%20OpenAI,datasets%20of%20tools%20like%20ChatGPT. - model_card: '' - modality: Text + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: '' + type: application + url: https://openai.com/blog/new-and-improved-content-moderation-tooling + user_distribution: '' +- access: closed analysis: '' - size: unknown + created_date: 2023-01-18 dependencies: [OpenAI toxicity dataset] - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: closed + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: Text + value: text; text + model_card: '' monitoring: '' - feedback: '' - -- type: dataset - name: OpenAI toxicity dataset + name: OpenAI toxicity classifier organization: OpenAI - description: '' - created_date: 2023-01-18 - url: https://time.com/6247678/openai-chatgpt-kenya-workers/#:~:text=In%20a%20statement%2C%20an%20OpenAI,datasets%20of%20tools%20like%20ChatGPT. - datasheet: '' - modality: text + prohibited_uses: '' + quality_control: '' size: unknown - sample: [] + training_emissions: '' + training_hardware: '' + training_time: '' + type: model + url: https://time.com/6247678/openai-chatgpt-kenya-workers/#:~:text=In%20a%20statement%2C%20an%20OpenAI,datasets%20of%20tools%20like%20ChatGPT. +- access: closed analysis: '' + created_date: 2023-01-18 + datasheet: '' dependencies: [] - included: '' + description: '' excluded: '' - quality_control: '' - access: closed + feedback: '' + included: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: text monitoring: '' - feedback: '' - -- type: application - name: Sage API + name: OpenAI toxicity dataset organization: OpenAI - description: A chatbot language model available via Quora's Poe + prohibited_uses: '' + quality_control: '' + sample: [] + size: unknown + type: dataset + url: https://time.com/6247678/openai-chatgpt-kenya-workers/#:~:text=In%20a%20statement%2C%20an%20OpenAI,datasets%20of%20tools%20like%20ChatGPT. +- access: limited + adaptation: '' created_date: 2023-02-03 - url: https://quorablog.quora.com/Poe-1 dependencies: [Sage] - adaptation: '' + description: A chatbot language model available via Quora's Poe + failures: '' + feedback: '' + intended_uses: '' + license: unknown + monitoring: '' + monthly_active_users: '' + name: Sage API + organization: OpenAI output_space: '' + prohibited_uses: '' quality_control: '' - access: limited - license: unknown terms_of_service: '' + type: application + url: https://quorablog.quora.com/Poe-1 + user_distribution: '' +- access: limited + adaptation: '' + created_date: 2023-02-03 + dependencies: [Dragonfly] + description: A chatbot language model available via Quora's Poe + failures: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: unknown monitoring: '' - feedback: '' monthly_active_users: '' - user_distribution: '' - failures: '' - -- type: application name: Dragonfly API organization: OpenAI - description: A chatbot language model available via Quora's Poe - created_date: 2023-02-03 - url: https://quorablog.quora.com/Poe-1 - dependencies: [Dragonfly] - adaptation: '' output_space: '' + prohibited_uses: '' quality_control: '' - access: limited - license: unknown terms_of_service: '' + type: application + url: https://quorablog.quora.com/Poe-1 + user_distribution: '' +- access: limited + analysis: '' + created_date: 2023-02-03 + dependencies: [] + description: A chatbot language model available via Quora's Poe + feedback: '' intended_uses: '' - prohibited_uses: '' + license: unknown + modality: + explanation: text + value: text; text + model_card: '' monitoring: '' - feedback: '' - monthly_active_users: '' - user_distribution: '' - failures: '' - -- type: model name: Sage organization: OpenAI - description: A chatbot language model available via Quora's Poe - created_date: 2023-02-03 - url: https://quorablog.quora.com/Poe-1 - model_card: '' - modality: text - analysis: '' + prohibited_uses: '' + quality_control: '' size: unknown - dependencies: [] training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: limited - license: unknown + training_time: '' + type: model + url: https://quorablog.quora.com/Poe-1 +- access: limited + analysis: '' + created_date: 2023-02-03 + dependencies: [] + description: A chatbot language model available via Quora's Poe + feedback: '' intended_uses: '' - prohibited_uses: '' + license: unknown + modality: + explanation: text + value: text; text + model_card: '' monitoring: '' - feedback: '' - -- type: model name: Dragonfly organization: OpenAI - description: A chatbot language model available via Quora's Poe - created_date: 2023-02-03 - url: https://quorablog.quora.com/Poe-1 - model_card: '' - modality: text - analysis: '' + prohibited_uses: '' + quality_control: '' size: unknown - dependencies: [] training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: limited - license: unknown + training_time: '' + type: model + url: https://quorablog.quora.com/Poe-1 +- access: limited + adaptation: '' + created_date: 2023-03-07 + dependencies: [ChatGPT API] + description: The app integrates ChatGPT’s powerful AI technology to deliver instant + conversation summaries, research tools, and writing assistance directly in Slack + to help millions of companies work more productively. + failures: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: unknown monitoring: '' - feedback: '' - -- type: application + monthly_active_users: '' name: ChatGPT for Slack organization: OpenAI, Salesforce - description: The app integrates ChatGPT’s powerful AI technology to deliver instant - conversation summaries, research tools, and writing assistance directly in Slack - to help millions of companies work more productively. - created_date: 2023-03-07 - url: https://www.salesforce.com/news/stories/chatgpt-app-for-slack/ - dependencies: [ChatGPT API] - adaptation: '' output_space: '' + prohibited_uses: '' quality_control: '' - access: limited - license: unknown terms_of_service: '' - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' - monthly_active_users: '' + type: application + url: https://www.salesforce.com/news/stories/chatgpt-app-for-slack/ user_distribution: '' - failures: '' - -- type: model - name: GPT-4 - organization: OpenAI - description: '' - created_date: 2023-03-14 - url: https://arxiv.org/abs/2303.08774 - model_card: '' - modality: text and image +- access: limited analysis: '' - size: unknown + created_date: 2023-03-14 dependencies: [] - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: limited + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: + explanation: text and image + value: image, text; image, text + model_card: '' monitoring: '' - feedback: '' - -- type: application - name: GPT-4 API + name: GPT-4 organization: OpenAI - description: GPT-4 is OpenAI’s most advanced system, producing safer and more - useful responses + prohibited_uses: '' + quality_control: '' + size: unknown + training_emissions: '' + training_hardware: '' + training_time: '' + type: model + url: https://arxiv.org/abs/2303.08774 +- access: limited + adaptation: '' created_date: 2023-03-14 - url: https://openai.com/product/gpt-4 dependencies: [GPT-4] - adaptation: '' - output_space: '' - quality_control: '' - access: limited + description: GPT-4 is OpenAI’s most advanced system, producing safer and more + useful responses + failures: '' + feedback: '' + intended_uses: '' license: - value: custom explanation: Per the Terms of Use, a limited license is provided to the users during their use of the API [[Section 2]](https://openai.com/api/policies/terms/). - terms_of_service: '' - intended_uses: '' - prohibited_uses: '' + value: custom monitoring: '' - feedback: '' monthly_active_users: '' + name: GPT-4 API + organization: OpenAI + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: '' + type: application + url: https://openai.com/product/gpt-4 user_distribution: '' - failures: '' diff --git a/assets/othersideai.yaml b/assets/othersideai.yaml index 0753d45c..7d9b0f05 100644 --- a/assets/othersideai.yaml +++ b/assets/othersideai.yaml @@ -1,42 +1,31 @@ --- - -- type: application - name: HyperWrite - # General - organization: OthersideAI - description: > - HyperWrite is a writing assistant that generates text based on a user's - request, as well as style and tone choices. +- access: limited + adaptation: unknown created_date: unknown - url: https://hyperwriteai.com/ - # Construction dependencies: [OpenAI API] - adaptation: unknown - output_space: Generation - quality_control: - value: unknown - explanation: > - There is a HyperWrite blog warning the users about the risks of writing - with an AI assistant, but the company doesn't list the quality control - measures taken to prevent the listed risks, if any - [[HyperWrite Blog]] - (https://blog.hyperwrite.ai/what-are-the-risks-of-ai-writing/). - # Downstream - access: limited + description: "HyperWrite is a writing assistant that generates text based on a\ + \ user's request, as well as style and tone choices.\n" + failures: unknown + feedback: unknown + intended_uses: "HyperWrite is intended to be used as a writing assistant.\n" license: + explanation: "\"The Software, including software embedded in the Software, is\ + \ licensed, not sold, to you by Otherside only under the terms of this Agreement;\ + \ and Otherside reserves all rights not expressly granted under this Agreemen\"\ + \ - excerpt from the Terms of Service.\n" value: custom - explanation: > - "The Software, including software embedded in the Software, is licensed, - not sold, to you by Otherside only under the terms of this Agreement; and - Otherside reserves all rights not expressly granted under this Agreemen" - - excerpt from the Terms of Service. - terms_of_service: https://hyperwriteai.com/terms - intended_uses: > - HyperWrite is intended to be used as a writing assistant. - prohibited_uses: unknown monitoring: unknown - feedback: unknown - # Deployment monthly_active_users: unknown + name: HyperWrite + organization: OthersideAI + output_space: Generation + prohibited_uses: unknown + quality_control: + explanation: "There is a HyperWrite blog warning the users about the risks of\ + \ writing with an AI assistant, but the company doesn't list the quality control\ + \ measures taken to prevent the listed risks, if any [[HyperWrite Blog]] (https://blog.hyperwrite.ai/what-are-the-risks-of-ai-writing/).\n" + value: unknown + terms_of_service: https://hyperwriteai.com/terms + type: application + url: https://hyperwriteai.com/ user_distribution: unknown - failures: unknown diff --git a/assets/oxford.yaml b/assets/oxford.yaml index d57f98bc..c6f1477c 100644 --- a/assets/oxford.yaml +++ b/assets/oxford.yaml @@ -1,133 +1,110 @@ --- - -- type: dataset - name: WebVid-10M - # General - organization: University of Oxford - description: > - WebVid-10M is a large-scale dataset of short videos with textual descriptions - sourced from stock footage sites. +- access: + explanation: The captions and video url's can be found at the [[dataset page]](https://m-bain.github.io/webvid-dataset/). + CLIP ViT-B/32 features of this dataset can be found at the [[WebVid github + repository]](https://github.com/m-bain/webvid#download-clip-features-%EF%B8%8F). + value: open + analysis: unknown created_date: + explanation: "The date the [[paper]](https://arxiv.org/abs/2104.00650) was released.\n" value: 2021-04-01 - explanation: > - The date the [[paper]](https://arxiv.org/abs/2104.00650) was released. - url: https://m-bain.github.io/webvid-dataset/ datasheet: none - modality: text (English) and video - size: 10.7M video-text pairs, 52K hours video - sample: [] - analysis: unknown - # Construction dependencies: [] - license: - value: WebVid Dataset Terms - explanation: > - [WebVid Dataset Terms](https://github.com/m-bain/webvid/blob/main/TERMS.md) + description: "WebVid-10M is a large-scale dataset of short videos with textual\ + \ descriptions sourced from stock footage sites.\n" + excluded: unknown + feedback: Feedback can be given by emailing at maxbain at robots.ox.ac.uk included: - value: Video URLs and textual description annotations explanation: Authors note in the paper:"We scrape the web for a new dataset of videos with textual description annotations, called WebVid-2M. The data was scraped from the web following a similar procedure to Google Conceptual Captions" - excluded: unknown - quality_control: unknown - # Downstream - access: - value: open - explanation: The captions and video url's can be found at the [[dataset page]](https://m-bain.github.io/webvid-dataset/). - CLIP ViT-B/32 features of this dataset can be found at the [[WebVid github - repository]](https://github.com/m-bain/webvid#download-clip-features-%EF%B8%8F). + value: Video URLs and textual description annotations intended_uses: - value: > - "Provided you keep to these rules, the University grants you (the researcher) - a non-exclusive and non-transferable licence to use the content free of charge - strictly for non-commercial research (i.e., whose output artefacts are not - incorporated - in commercial products) for 12 months." explanation: See [[Terms of Access]](https://github.com/m-bain/webvid/blob/main/TERMS.md) - prohibited_uses: > - - Authors note the following prohibited uses: "You must not use the content - other than for the Permitted Purpose in strict - conformity with these terms and any other reasonable instructions of the University. - You must not, except as may be strictly necessary for carrying out the Permitted - Purpose, - provide or otherwise make available content to any third party or allow use - of it or them by or on behalf of any third party, in whole or in part, whether - by way of sale, resale, loan, transfer, hire or any other form of exploitation; - or attempt to identify any living or deceased individual from the content." - [[Terms of Access]](https://github.com/m-bain/webvid/blob/main/TERMS.md) - - - Authors also note the following limitations of the dataset: "We note that - data - sourced from the web may be prone to biases and may contain graphic content. - Please be careful of unintended societal, gender, racial and other biases when - training or deploying models trained on this data." - [[Disclaimer]](https://github.com/m-bain/webvid#disclaimer-%EF%B8%8F) + value: "\"Provided you keep to these rules, the University grants you (the researcher)\ + \ a non-exclusive and non-transferable licence to use the content free of\ + \ charge strictly for non-commercial research (i.e., whose output artefacts\ + \ are not incorporated in commercial products) for 12 months.\"\n" + license: + explanation: "[WebVid Dataset Terms](https://github.com/m-bain/webvid/blob/main/TERMS.md)\n" + value: WebVid Dataset Terms + modality: + explanation: text (English) and video + value: text, video monitoring: '' - feedback: Feedback can be given by emailing at maxbain at robots.ox.ac.uk - -- type: dataset - name: WebVid-2M - # General + name: WebVid-10M organization: University of Oxford - description: > - WebVid-2M is a large-scale dataset of 2.5M short videos with textual descriptions - sourced from stock footage sites. A subset of the WebVid-10M dataset. + prohibited_uses: "- Authors note the following prohibited uses: \"You must not\ + \ use the content other than for the Permitted Purpose in strict conformity\ + \ with these terms and any other reasonable instructions of the University.\ + \ You must not, except as may be strictly necessary for carrying out the Permitted\ + \ Purpose, provide or otherwise make available content to any third party or\ + \ allow use of it or them by or on behalf of any third party, in whole or in\ + \ part, whether by way of sale, resale, loan, transfer, hire or any other form\ + \ of exploitation; or attempt to identify any living or deceased individual\ + \ from the content.\" [[Terms of Access]](https://github.com/m-bain/webvid/blob/main/TERMS.md)\n\ + - Authors also note the following limitations of the dataset: \"We note that\ + \ data sourced from the web may be prone to biases and may contain graphic content.\ + \ Please be careful of unintended societal, gender, racial and other biases\ + \ when training or deploying models trained on this data.\" [[Disclaimer]](https://github.com/m-bain/webvid#disclaimer-%EF%B8%8F)\n" + quality_control: unknown + sample: [] + size: 10.7M video-text pairs, 52K hours video + type: dataset + url: https://m-bain.github.io/webvid-dataset/ +- access: + explanation: The captions and video url's can be found at the [[dataset page]](https://m-bain.github.io/webvid-dataset/). + CLIP ViT-B/32 features of this dataset can be found at the [[WebVid github + repository]](https://github.com/m-bain/webvid#download-clip-features-%EF%B8%8F). + value: open + analysis: unknown created_date: + explanation: "The date the [[paper]](https://arxiv.org/abs/2104.00650) was released.\n" value: 2021-04-01 - explanation: > - The date the [[paper]](https://arxiv.org/abs/2104.00650) was released. - url: https://m-bain.github.io/webvid-dataset/ datasheet: none - modality: text (English) and video - size: 2.5M video-text pairs, 13K hours video - sample: [] - analysis: unknown - # Construction dependencies: [WebVid-10M] - license: - value: WebVid Dataset Terms - explanation: > - [WebVid Dataset Terms](https://github.com/m-bain/webvid/blob/main/TERMS.md) + description: "WebVid-2M is a large-scale dataset of 2.5M short videos with textual\ + \ descriptions sourced from stock footage sites. A subset of the WebVid-10M\ + \ dataset.\n" + excluded: unknown + feedback: Feedback can be given by emailing at maxbain at robots.ox.ac.uk included: - value: Video URLs and textual description annotations explanation: Authors note in the paper:"We scrape the web for a new dataset of videos with textual description annotations, called WebVid-2M. The data was scraped from the web following a similar procedure to Google Conceptual Captions" - excluded: unknown - quality_control: unknown - # Downstream - access: - value: open - explanation: The captions and video url's can be found at the [[dataset page]](https://m-bain.github.io/webvid-dataset/). - CLIP ViT-B/32 features of this dataset can be found at the [[WebVid github - repository]](https://github.com/m-bain/webvid#download-clip-features-%EF%B8%8F). + value: Video URLs and textual description annotations intended_uses: - value: > - "Provided you keep to these rules, the University grants you (the researcher) - a non-exclusive and non-transferable licence to use the content free of charge - strictly for non-commercial research (i.e., whose output artefacts are not - incorporated - in commercial products) for 12 months." explanation: See [[Terms of Access]](https://github.com/m-bain/webvid/blob/main/TERMS.md) - prohibited_uses: > - - Authors note the following prohibited uses: "You must not use the content - other than for the Permitted Purpose in strict - conformity with these terms and any other reasonable instructions of the University. - You must not, except as may be strictly necessary for carrying out the Permitted - Purpose, - provide or otherwise make available content to any third party or allow use - of it or them by or on behalf of any third party, in whole or in part, whether - by way of sale, resale, loan, transfer, hire or any other form of exploitation; - or attempt to identify any living or deceased individual from the content." - [[Terms of Access]](https://github.com/m-bain/webvid/blob/main/TERMS.md) - - - Authors also note the following limitations of the dataset: "We note that - data - sourced from the web may be prone to biases and may contain graphic content. - Please be careful of unintended societal, gender, racial and other biases when - training or deploying models trained on this data." - [[Disclaimer]](https://github.com/m-bain/webvid#disclaimer-%EF%B8%8F) + value: "\"Provided you keep to these rules, the University grants you (the researcher)\ + \ a non-exclusive and non-transferable licence to use the content free of\ + \ charge strictly for non-commercial research (i.e., whose output artefacts\ + \ are not incorporated in commercial products) for 12 months.\"\n" + license: + explanation: "[WebVid Dataset Terms](https://github.com/m-bain/webvid/blob/main/TERMS.md)\n" + value: WebVid Dataset Terms + modality: + explanation: text (English) and video + value: text, video monitoring: '' - feedback: Feedback can be given by emailing at maxbain at robots.ox.ac.uk + name: WebVid-2M + organization: University of Oxford + prohibited_uses: "- Authors note the following prohibited uses: \"You must not\ + \ use the content other than for the Permitted Purpose in strict conformity\ + \ with these terms and any other reasonable instructions of the University.\ + \ You must not, except as may be strictly necessary for carrying out the Permitted\ + \ Purpose, provide or otherwise make available content to any third party or\ + \ allow use of it or them by or on behalf of any third party, in whole or in\ + \ part, whether by way of sale, resale, loan, transfer, hire or any other form\ + \ of exploitation; or attempt to identify any living or deceased individual\ + \ from the content.\" [[Terms of Access]](https://github.com/m-bain/webvid/blob/main/TERMS.md)\n\ + - Authors also note the following limitations of the dataset: \"We note that\ + \ data sourced from the web may be prone to biases and may contain graphic content.\ + \ Please be careful of unintended societal, gender, racial and other biases\ + \ when training or deploying models trained on this data.\" [[Disclaimer]](https://github.com/m-bain/webvid#disclaimer-%EF%B8%8F)\n" + quality_control: unknown + sample: [] + size: 2.5M video-text pairs, 13K hours video + type: dataset + url: https://m-bain.github.io/webvid-dataset/ diff --git a/assets/paladin.yaml b/assets/paladin.yaml index 76870f97..80aec963 100644 --- a/assets/paladin.yaml +++ b/assets/paladin.yaml @@ -1,22 +1,21 @@ --- - -- type: application - name: UnderwriteGPT - organization: Paladin Group and Dais Technology - description: UnderwriteGPT is the world's first generative AI underwriting tool. +- access: limited + adaptation: '' created_date: 2023-02-01 - url: https://dais.com/underwritegpt/ dependencies: [] - adaptation: '' - output_space: '' - quality_control: '' - access: limited - license: '' - terms_of_service: '' + description: UnderwriteGPT is the world's first generative AI underwriting tool. + failures: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: '' monitoring: '' - feedback: '' monthly_active_users: '' + name: UnderwriteGPT + organization: Paladin Group and Dais Technology + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: '' + type: application + url: https://dais.com/underwritegpt/ user_distribution: '' - failures: '' diff --git a/assets/perplexity.yaml b/assets/perplexity.yaml index 117a7acc..8bf35081 100644 --- a/assets/perplexity.yaml +++ b/assets/perplexity.yaml @@ -1,52 +1,50 @@ --- - -- type: application - name: Perplexity Ask - organization: Perplexity - description: Perplexity Ask is a new search interface that uses advanced artificial - intelligence technologies +- access: open + adaptation: '' created_date: - value: 2022-12-07 explanation: Perplexity Ask was announced in a [[Twitter post]](https://twitter.com/perplexity_ai/status/1600551871554338816?s=20) by the company. - url: https://www.perplexity.ai/ + value: 2022-12-07 dependencies: [GPT-3.5, Bing Search] - adaptation: '' - output_space: '' - quality_control: '' - access: open + description: Perplexity Ask is a new search interface that uses advanced artificial + intelligence technologies + failures: '' + feedback: '' + intended_uses: '' license: - value: none explanation: No license was found. - terms_of_service: '' - intended_uses: '' - prohibited_uses: '' + value: none monitoring: '' - feedback: '' monthly_active_users: '' - user_distribution: '' - failures: '' - -- type: application - name: Bird SQL + name: Perplexity Ask organization: Perplexity - description: Twitter search interface that is powered by Perplexity's structured - search engine. - created_date: 2022-12-15 - url: https://www.perplexity.ai/sql - dependencies: [Perplexity Ask, OpenAI API] - adaptation: '' output_space: '' + prohibited_uses: '' quality_control: '' - access: deprecated - license: - value: none - explanation: No license was found. terms_of_service: '' + type: application + url: https://www.perplexity.ai/ + user_distribution: '' +- access: deprecated + adaptation: '' + created_date: 2022-12-15 + dependencies: [Perplexity Ask, OpenAI API] + description: Twitter search interface that is powered by Perplexity's structured + search engine. + failures: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: + explanation: No license was found. + value: none monitoring: '' - feedback: '' monthly_active_users: '' + name: Bird SQL + organization: Perplexity + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: '' + type: application + url: https://www.perplexity.ai/sql user_distribution: '' - failures: '' diff --git a/assets/portkey.yaml b/assets/portkey.yaml index 2fca2885..51fac815 100644 --- a/assets/portkey.yaml +++ b/assets/portkey.yaml @@ -1,23 +1,22 @@ --- - -- type: application - name: Portkey - organization: Portkey - description: Portkey is a hosted middleware that allows users to create generative - AI applications +- access: open + adaptation: '' created_date: 2023-05-06 - url: https://portkey.ai/ dependencies: [] - adaptation: '' - output_space: generative AI apps - quality_control: '' - access: open - license: '' - terms_of_service: https://portkey.ai/terms + description: Portkey is a hosted middleware that allows users to create generative + AI applications + failures: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: '' monitoring: '' - feedback: '' monthly_active_users: '' + name: Portkey + organization: Portkey + output_space: generative AI apps + prohibited_uses: '' + quality_control: '' + terms_of_service: https://portkey.ai/terms + type: application + url: https://portkey.ai/ user_distribution: '' - failures: '' diff --git a/assets/quizlet.yaml b/assets/quizlet.yaml index cfd5d9c6..a7580d0b 100644 --- a/assets/quizlet.yaml +++ b/assets/quizlet.yaml @@ -1,26 +1,25 @@ --- - -- type: application - name: Q-Chat - organization: Quizlet +- access: open + adaptation: '' + created_date: 2023-03-01 + dependencies: [ChatGPT API] description: Quizlet is introducing Q-Chat, a fully-adaptive AI tutor that engages students with adaptive questions based on relevant study materials delivered through a fun chat experience. - created_date: 2023-03-01 - url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis - dependencies: [ChatGPT API] - adaptation: '' - output_space: '' - quality_control: '' - access: open + failures: '' + feedback: '' + intended_uses: '' license: - value: none explanation: No license was found. - terms_of_service: https://quizlet.com/tos - intended_uses: '' - prohibited_uses: '' + value: none monitoring: '' - feedback: '' monthly_active_users: '' + name: Q-Chat + organization: Quizlet + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: https://quizlet.com/tos + type: application + url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis user_distribution: '' - failures: '' diff --git a/assets/quora.yaml b/assets/quora.yaml index e28e31a2..2b32f96d 100644 --- a/assets/quora.yaml +++ b/assets/quora.yaml @@ -1,32 +1,31 @@ --- - -- type: application - name: Poe - organization: Quora - description: Poe lets people ask questions, get instant answers, and have back-and-forth - conversations with several AI-powered bots. It is initially available on iOS, - but we will be adding support for all major platforms in the next few months, - along with more bots. +- access: limited + adaptation: '' created_date: 2023-02-03 - url: https://quorablog.quora.com/Poe-1 dependencies: - ChatGPT API - GPT-4 API - Claude API - Dragonfly API - Sage API - adaptation: '' - output_space: '' - quality_control: '' - access: limited + description: Poe lets people ask questions, get instant answers, and have back-and-forth + conversations with several AI-powered bots. It is initially available on iOS, + but we will be adding support for all major platforms in the next few months, + along with more bots. + failures: '' + feedback: '' + intended_uses: '' license: - value: none explanation: No license was found. - terms_of_service: https://poe.com/tos - intended_uses: '' - prohibited_uses: '' + value: none monitoring: '' - feedback: '' monthly_active_users: '' + name: Poe + organization: Quora + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: https://poe.com/tos + type: application + url: https://quorablog.quora.com/Poe-1 user_distribution: '' - failures: '' diff --git a/assets/robin.yaml b/assets/robin.yaml index 123cc804..813dc634 100644 --- a/assets/robin.yaml +++ b/assets/robin.yaml @@ -1,27 +1,26 @@ --- - -- type: application - name: Robin AI - organization: Robin AI +- access: limited + adaptation: '' + created_date: unknown + dependencies: [Anthropic API] description: Robin AI uses Claude and Anthropic's models to understand language - including in technical domains like legal language. It's also very confident at drafting, summarising, translations, and explaining complex concepts in simple terms - created_date: unknown - url: https://www.robinai.co.uk/ - dependencies: [Anthropic API] - adaptation: '' - output_space: '' - quality_control: '' - access: limited + failures: '' + feedback: '' + intended_uses: '' license: - value: none explanation: No license was found. - terms_of_service: https://www.robinai.co.uk/terms - intended_uses: '' - prohibited_uses: '' + value: none monitoring: '' - feedback: '' monthly_active_users: '' + name: Robin AI + organization: Robin AI + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: https://www.robinai.co.uk/terms + type: application + url: https://www.robinai.co.uk/ user_distribution: '' - failures: '' diff --git a/assets/salesforce.yaml b/assets/salesforce.yaml index 19a1d021..f6c12de0 100644 --- a/assets/salesforce.yaml +++ b/assets/salesforce.yaml @@ -1,42 +1,35 @@ --- - -- type: model - name: CodeGen - organization: Salesforce - description: CodeGen is a language model for code +- access: + explanation: Model weights are available for download with links in the [[GitHub + repo]](https://github.com/salesforce/CodeGen) + value: open + analysis: '' created_date: - value: 2022-03-25 explanation: Date model paper was released - url: https://arxiv.org/abs/2203.13474 - model_card: '' - modality: text (English) and code - analysis: '' - size: 16B parameters (dense model) + value: 2022-03-25 dependencies: [] - training_emissions: '' - training_time: '' - training_hardware: Unspecified Salesforce Compute (TPU-V4s) - quality_control: '' - access: - value: open - explanation: Model weights are available for download with links in the [[GitHub - repo]](https://github.com/salesforce/CodeGen) - license: none (model weights), BSD-3-Clause (code) + description: CodeGen is a language model for code + feedback: '' intended_uses: '' - prohibited_uses: '' + license: none (model weights), BSD-3-Clause (code) + modality: + explanation: text (English) and code + value: code, text; code, text + model_card: '' monitoring: '' - feedback: '' - -- type: model - name: BLIP + name: CodeGen organization: Salesforce - description: '' - created_date: 2022-01-28 - url: https://arxiv.org/abs/2201.12086 - model_card: '' - modality: text and image + prohibited_uses: '' + quality_control: '' + size: 16B parameters (dense) + training_emissions: '' + training_hardware: Unspecified Salesforce Compute (TPU-V4s) + training_time: '' + type: model + url: https://arxiv.org/abs/2203.13474 +- access: open analysis: '' - size: unknown + created_date: 2022-01-28 dependencies: - ViT-B - BERT @@ -46,60 +39,68 @@ - Conceptual 12M - SBU Captions - LAION-115M - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: open - license: BSD-3-Clause + description: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: BSD-3-Clause + modality: + explanation: text and image + value: image, text; image, text + model_card: '' monitoring: '' - feedback: '' - -- type: dataset - name: LAION-115M + name: BLIP organization: Salesforce - description: '' - created_date: 2022-01-28 + prohibited_uses: '' + quality_control: '' + size: unknown + training_emissions: '' + training_hardware: '' + training_time: '' + type: model url: https://arxiv.org/abs/2201.12086 - datasheet: '' - modality: text and image - size: 115M image-text pairs - sample: [] +- access: open analysis: '' + created_date: 2022-01-28 + datasheet: '' dependencies: [LAION-400M] - included: '' + description: '' excluded: '' - quality_control: '' - access: open - license: BSD-3-Clause + feedback: '' + included: '' intended_uses: '' - prohibited_uses: '' + license: BSD-3-Clause + modality: + explanation: text and image + value: image, text monitoring: '' - feedback: '' - -- type: application - name: EinsteinGPT + name: LAION-115M organization: Salesforce - description: EinsteinGPT is generative AI for customer relationship management - (CRFM). + prohibited_uses: '' + quality_control: '' + sample: [] + size: 115M image-text pairs + type: dataset + url: https://arxiv.org/abs/2201.12086 +- access: limited + adaptation: '' created_date: 2023-03-07 - url: https://www.salesforce.com/products/einstein/overview/?d=cta-body-promo-8 dependencies: [ChatGPT API] - adaptation: '' - output_space: '' - quality_control: '' - access: limited + description: EinsteinGPT is generative AI for customer relationship management + (CRFM). + failures: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - terms_of_service: '' - intended_uses: '' - prohibited_uses: '' monitoring: '' - feedback: '' monthly_active_users: '' + name: EinsteinGPT + organization: Salesforce + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: '' + type: application + url: https://www.salesforce.com/products/einstein/overview/?d=cta-body-promo-8 user_distribution: '' - failures: '' diff --git a/assets/sana.yaml b/assets/sana.yaml index 09bda05f..22234f8f 100644 --- a/assets/sana.yaml +++ b/assets/sana.yaml @@ -1,55 +1,40 @@ --- - -- type: application - name: Sana - # General - organization: Sana - description: > - "Sana is your all-in-one, AI-assisted, online learning platform (LMS). - Author employee training courses and measure team development with Sana's - powerful analytics. Sana partners with the world's most important - organizations and fastest-growing startups to make personalized, adaptive - learning available for everyone, everywhere" - [[Sana GPT-3 Demo]](https://gpt3demo.com/apps/sanalabs). +- access: limited + adaptation: "Customized GPT-3, fine-tuned on private data [[Sana GPT-3 Demo]](https://gpt3demo.com/apps/sanalabs).\n" created_date: + explanation: "The company was founded in 2016 according to [[Crunchbase]](https://www.crunchbase.com/organization/sana-2).\ + \ It is unclear when Sana adapted the OpenAI API to their products.\n" value: unknown - explanation: > - The company was founded in 2016 according to - [[Crunchbase]](https://www.crunchbase.com/organization/sana-2). It is - unclear when Sana adapted the OpenAI API to their products. - url: https://www.sanalabs.com/ - # Construction dependencies: [OpenAI API] - adaptation: > - Customized GPT-3, fine-tuned on private data - [[Sana GPT-3 Demo]](https://gpt3demo.com/apps/sanalabs). - output_space: question and answer, summarization, sentiment analysis,topic identification - quality_control: unknown - # Downstream - access: limited + description: "\"Sana is your all-in-one, AI-assisted, online learning platform\ + \ (LMS). Author employee training courses and measure team development with\ + \ Sana's powerful analytics. Sana partners with the world's most important organizations\ + \ and fastest-growing startups to make personalized, adaptive learning available\ + \ for everyone, everywhere\" [[Sana GPT-3 Demo]](https://gpt3demo.com/apps/sanalabs).\n" + failures: unknown + feedback: unknown + intended_uses: "Sana is intended to be used by employers to provide a learning\ + \ service for their employees.\n" license: + explanation: "\"Sana Labs grants Subscriber a limited non-exclusive, non-transferable,\ + \ non-sublicensable license to use Sana Labs' web-based personalized and collaborative\ + \ learning platform (“Sana Platform”) solely in connection with the subscribed\ + \ Services in accordance with the Agreement.\" - excerpt from the Terms of\ + \ Service.\n" value: custom - explanation: > - "Sana Labs grants Subscriber a limited non-exclusive, non-transferable, - non-sublicensable license to use Sana Labs' web-based personalized and - collaborative learning platform (“Sana Platform”) solely in connection - with the subscribed Services in accordance with the Agreement." - - excerpt from the Terms of Service. - terms_of_service: https://www.sanalabs.com/legal/ - intended_uses: > - Sana is intended to be used by employers to provide a learning service for - their employees. - prohibited_uses: - value: none - explanation: > - "Sana provides an extensive set of legal documents - [[Sana Legal]](https://www.sanalabs.com/legal/), but missing from the - documents are prohibited uses of the Sana platform, beyond a mention of - what is impermissible under relevant law." - exceprt from the Terms of - Service document. monitoring: unknown - feedback: unknown - # Deployment monthly_active_users: unknown + name: Sana + organization: Sana + output_space: question and answer, summarization, sentiment analysis,topic identification + prohibited_uses: + explanation: "\"Sana provides an extensive set of legal documents [[Sana Legal]](https://www.sanalabs.com/legal/),\ + \ but missing from the documents are prohibited uses of the Sana platform,\ + \ beyond a mention of what is impermissible under relevant law.\" - exceprt\ + \ from the Terms of Service document.\n" + value: none + quality_control: unknown + terms_of_service: https://www.sanalabs.com/legal/ + type: application + url: https://www.sanalabs.com/ user_distribution: unknown - failures: unknown diff --git a/assets/shanghai.yaml b/assets/shanghai.yaml index 8dae5081..80208ec7 100644 --- a/assets/shanghai.yaml +++ b/assets/shanghai.yaml @@ -1,15 +1,7 @@ --- - -- type: model - name: InternVideo - organization: Shanghai AI Laboratory - description: '' - created_date: 2022-12-06 - url: https://arxiv.org/pdf/2212.03191.pdf - model_card: '' - modality: text and video +- access: open analysis: '' - size: 1.3B parameters (dense model) + created_date: 2022-12-06 dependencies: - Kinetics-400 - WebVid-2M @@ -18,36 +10,46 @@ - AVA - Something-Something-v2 - Kinetics-710 - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: open - license: Apache 2.0 + description: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: Apache 2.0 + modality: + explanation: text and video + value: text, video; text, video + model_card: '' monitoring: '' - feedback: '' - -- type: model - name: Lego-MT + name: InternVideo organization: Shanghai AI Laboratory + prohibited_uses: '' + quality_control: '' + size: 1.3B parameters (dense) + training_emissions: '' + training_hardware: '' + training_time: '' + type: model + url: https://arxiv.org/pdf/2212.03191.pdf +- access: open + analysis: Evaluated based on own constructed dataset covering 433 languages. + created_date: 2023-05-29 + dependencies: [OPUS] description: Lego-MT is a multilingual large language model which uses a more efficient approach of being an effective detachable model. - created_date: 2023-05-29 - url: https://arxiv.org/pdf/2212.10551.pdf + feedback: '' + intended_uses: '' + license: '' + modality: + explanation: multilingual text + value: text; text model_card: '' - modality: multilingual text - analysis: Evaluated based on own constructed dataset covering 433 languages. - size: 1.2B parameters - dependencies: [OPUS] + monitoring: '' + name: Lego-MT + organization: Shanghai AI Laboratory + prohibited_uses: '' + quality_control: '' + size: 1.2B parameters (dense) training_emissions: unknown - training_time: 15 days training_hardware: 32 A100 GPUs - quality_control: '' - access: open - license: '' - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' + training_time: 15 days + type: model + url: https://arxiv.org/pdf/2212.10551.pdf diff --git a/assets/shop.yaml b/assets/shop.yaml index 5700ad95..60403fc0 100644 --- a/assets/shop.yaml +++ b/assets/shop.yaml @@ -1,25 +1,24 @@ --- - -- type: application - name: Shop Assistant - organization: Shop +- access: open + adaptation: '' + created_date: 2023-03-01 + dependencies: [ChatGPT API] description: When shoppers search for products, the shopping assistant makes personalized recommendations based on their requests. Shop’s new AI-powered shopping assistant will streamline in-app shopping by scanning millions of products to quickly find what buyers are looking for—or help them discover something new. - created_date: 2023-03-01 - url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis - dependencies: [ChatGPT API] - adaptation: '' - output_space: '' - quality_control: '' - access: open - license: '' - terms_of_service: '' + failures: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: '' monitoring: '' - feedback: '' monthly_active_users: '' + name: Shop Assistant + organization: Shop + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: '' + type: application + url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis user_distribution: '' - failures: '' diff --git a/assets/singapore.yaml b/assets/singapore.yaml index 5e0ab85c..3e378eb9 100644 --- a/assets/singapore.yaml +++ b/assets/singapore.yaml @@ -1,27 +1,28 @@ --- - -- type: model - name: GOAT - organization: National University of Singapore - description: GOAT is a fine-tuned LLaMA model which uses the tokenization of numbers - to significantly outperform benchmark standards on a range of arithmetic tasks. - created_date: 2023-05-23 - url: https://arxiv.org/pdf/2305.14201.pdf - model_card: none - modality: textual input, numerical data output +- access: open analysis: Performance assessed on BIG-bench arithmetic sub-task, and various elementary arithmetic tasks. - size: 7B parameters + created_date: 2023-05-23 dependencies: [LLaMA, GOAT dataset] - training_emissions: unknown - training_time: unknown - training_hardware: 24 GB VRAM GPU - quality_control: Number data is randomly generated from log space to reduce likelihood - of redundancy and range of magnitudes. - access: open - license: Apache 2.0 + description: GOAT is a fine-tuned LLaMA model which uses the tokenization of numbers + to significantly outperform benchmark standards on a range of arithmetic tasks. + feedback: '' intended_uses: Integration into other instruction-tuned LLMs to further enhance arithmetic reasoning abilities in solving math word problems. - prohibited_uses: '' + license: Apache 2.0 + modality: + explanation: textual input, numerical data output + value: text; other + model_card: none monitoring: '' - feedback: '' + name: GOAT + organization: National University of Singapore + prohibited_uses: '' + quality_control: Number data is randomly generated from log space to reduce likelihood + of redundancy and range of magnitudes. + size: 7B parameters (dense) + training_emissions: unknown + training_hardware: 24 GB VRAM GPU + training_time: unknown + type: model + url: https://arxiv.org/pdf/2305.14201.pdf diff --git a/assets/snap.yaml b/assets/snap.yaml index 67727722..d5a384d3 100644 --- a/assets/snap.yaml +++ b/assets/snap.yaml @@ -1,30 +1,28 @@ --- - -- type: application - name: My AI for Snapchat - organization: Snap +- access: open + adaptation: '' + created_date: 2023-03-01 + dependencies: [ChatGPT API] description: My AI offers Snapchatters a friendly, customizable chatbot at their fingertips that offers recommendations, and can even write a haiku for friends in seconds. Snapchat, where communication and messaging is a daily behavior, has 750 million monthly Snapchatters. - created_date: 2023-03-01 - url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis - dependencies: [ChatGPT API] - adaptation: '' - output_space: '' - quality_control: '' - access: open + failures: '' + feedback: '' + intended_uses: '' license: + explanation: "\"Snap grants you a worldwide, royalty-free, non-assignable, non-exclusive,\ + \ revocable, and non-sublicensable license to use the Services.\" - excerpt\ + \ from the Terms of Service document.\n" value: custom - explanation: > - "Snap grants you a worldwide, royalty-free, non-assignable, non-exclusive, revocable, - and non-sublicensable license to use the Services." - excerpt - from the Terms of Service document. - terms_of_service: https://snap.com/terms - intended_uses: '' - prohibited_uses: '' monitoring: '' - feedback: '' monthly_active_users: '' + name: My AI for Snapchat + organization: Snap + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: https://snap.com/terms + type: application + url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis user_distribution: '' - failures: '' diff --git a/assets/speak.yaml b/assets/speak.yaml index 8771590f..94e61ac3 100644 --- a/assets/speak.yaml +++ b/assets/speak.yaml @@ -1,24 +1,23 @@ --- - -- type: application - name: Speak - organization: Speak +- access: open + adaptation: '' + created_date: 2023-03-01 + dependencies: [Whisper API] description: Speak is an AI-powered language learning app focused on building the best path to spoken fluency and is the the fastest-growing English app in South Korea. - created_date: 2023-03-01 - url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis - dependencies: [Whisper API] - adaptation: '' - output_space: '' - quality_control: '' - access: open - license: '' - terms_of_service: '' + failures: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: '' monitoring: '' - feedback: '' monthly_active_users: '' + name: Speak + organization: Speak + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: '' + type: application + url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis user_distribution: '' - failures: '' diff --git a/assets/spotify.yaml b/assets/spotify.yaml index f7016bf5..6e436586 100644 --- a/assets/spotify.yaml +++ b/assets/spotify.yaml @@ -1,32 +1,29 @@ --- - -- type: application - name: AI DJ - organization: Spotify +- access: limited + adaptation: '' + created_date: 2023-02-23 + dependencies: [ChatGPT API, Sonantic AI] description: The DJ is a personalized AI guide that knows you and your music taste so well that it can choose what to play for you. This feature, first rolling out in beta, will deliver a curated lineup of music alongside commentary around the tracks and artists we think you’ll like in a stunningly realistic voice. - created_date: 2023-02-23 - url: https://newsroom.spotify.com/2023-02-22/spotify-debuts-a-new-ai-dj-right-in-your-pocket/ - dependencies: [ChatGPT API, Sonantic AI] - adaptation: '' - output_space: '' - quality_control: '' - access: limited + failures: '' + feedback: '' + intended_uses: '' license: + explanation: "\"The Spotify software applications and the Content are licensed,\ + \ not sold or transferred to you, and Spotify and its licensors retain ownership\ + \ of all copies of the Spotify software applications and Content even after\ + \ installation on your Devices.\" - excerpt from the Terms of Service document.\n" value: custom - explanation: > - "The Spotify software applications and the Content are licensed, not sold - or transferred to you, and Spotify and its licensors retain ownership of - all copies of the Spotify software applications and Content even after - installation on your Devices." - excerpt from the Terms of Service - document. - terms_of_service: https://www.spotify.com/us/legal/end-user-agreement/ - intended_uses: '' - prohibited_uses: '' monitoring: '' - feedback: '' monthly_active_users: '' + name: AI DJ + organization: Spotify + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: https://www.spotify.com/us/legal/end-user-agreement/ + type: application + url: https://newsroom.spotify.com/2023-02-22/spotify-debuts-a-new-ai-dj-right-in-your-pocket/ user_distribution: '' - failures: '' diff --git a/assets/stability.yaml b/assets/stability.yaml index 8c572529..8b137891 100644 --- a/assets/stability.yaml +++ b/assets/stability.yaml @@ -1,102 +1 @@ ---- -- type: model - name: DeepFloyd IF - organization: Stability AI - description: A text-to-image cascaded pixel diffusion model released in conjunction - with AI research lab DeepFloyd. - created_date: 2023-04-28 - url: https://stability.ai/blog/deepfloyd-if-text-to-image-model - model_card: https://huggingface.co/DeepFloyd/IF-I-XL-v1.0 - modality: textual input with image output - analysis: Evaluated on the COCO dataset. - size: 4.3B parameters - dependencies: [LAION-5B] - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: open - license: - value: custom - explanation: License agreement can be found on model card https://huggingface.co/DeepFloyd/IF-I-XL-v1.0 - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: https://huggingface.co/DeepFloyd/IF-I-XL-v1.0/discussions - - -- type: model - name: StableLM - organization: Stability AI - description: Large language models trained on up to 1.5 trillion tokens. - created_date: 2023-04-20 - url: https://github.com/Stability-AI/StableLM - model_card: '' - modality: natural language text - analysis: '' - size: 7B parameters - dependencies: - - StableLM-Alpha dataset - - Alpaca dataset - - gpt4all dataset - - ShareGPT52K dataset - - Dolly dataset - - HH dataset - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: open - license: Apache 2.0 - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' - - -- type: application - name: Stable Diffusion - organization: Stability AI - description: Stable Diffusion is a generative software that creates images from - text prompts. - created_date: 2022-08-22 - url: https://stability.ai/blog/stable-diffusion-public-release - dependencies: [] - adaptation: '' - output_space: image - quality_control: '' - access: open - license: - value: custom - explanation: License agreement can be found on https://github.com/CompVis/stable-diffusion/blob/main/LICENSE - terms_of_service: '' - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: https://huggingface.co/CompVis/stable-diffusion/discussions - monthly_active_users: '' - user_distribution: '' - failures: '' - -- type: application - name: Stable Diffusion XL - organization: Stability AI - description: Stable Diffusion XL is an updated version of Stable Diffusion, and - creates descriptive images with shorter prompts and generate words within images. - created_date: 2023-07-26 - url: https://stability.ai/stablediffusion - dependencies: [] - adaptation: '' - output_space: image - quality_control: '' - access: limited - license: MIT - terms_of_service: '' - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' - monthly_active_users: '' - user_distribution: '' - failures: '' diff --git a/assets/stanford.yaml b/assets/stanford.yaml index f54d0d1e..37f02528 100644 --- a/assets/stanford.yaml +++ b/assets/stanford.yaml @@ -1,74 +1,77 @@ --- - -- type: model - name: BioMedLM - organization: Stanford - description: '' - created_date: 2022-12-15 - url: https://crfm.stanford.edu/2022/12/15/pubmedgpt.html - model_card: '' - modality: text +- access: open analysis: '' - size: 2.7B parameters (dense model) + created_date: 2022-12-15 dependencies: [The Pile] - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: open - license: bigscience-bloom-rail-1.0 + description: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: bigscience-bloom-rail-1.0 + modality: + explanation: text + value: text; text + model_card: '' monitoring: '' - feedback: '' - -- type: model - name: RoentGen + name: BioMedLM organization: Stanford - description: RoentGen is a generative medical imaging model that can create visually - convincing X-ray images. - created_date: 2022-11-23 - url: https://arxiv.org/pdf/2211.12737.pdf - model_card: '' - modality: text input and image output + prohibited_uses: '' + quality_control: '' + size: 2.7B parameters (dense) + training_emissions: '' + training_hardware: '' + training_time: '' + type: model + url: https://crfm.stanford.edu/2022/12/15/pubmedgpt.html +- access: open analysis: Evaluated on own framework that tests domain-specific tasks in medical field. - size: 330M parameters + created_date: 2022-11-23 dependencies: [Stable Diffusion, RoentGen radiology dataset] - training_emissions: unknown - training_time: 60k training steps per day - training_hardware: 64 A100 GPUs - quality_control: '' - access: open - license: '' + description: RoentGen is a generative medical imaging model that can create visually + convincing X-ray images. + feedback: '' intended_uses: '' - prohibited_uses: '' + license: '' + modality: + explanation: text input and image output + value: text; image + model_card: '' monitoring: '' - feedback: '' - -- type: model - name: CORGI + name: RoentGen organization: Stanford - description: Model trained to generate language corrections for physical control - tasks. - created_date: 2023-06-12 - url: https://arxiv.org/pdf/2306.07012.pdf - model_card: '' - modality: trajectory input with text output + prohibited_uses: '' + quality_control: '' + size: 330M parameters (dense) + training_emissions: unknown + training_hardware: 64 A100 GPUs + training_time: 60k training steps per day + type: model + url: https://arxiv.org/pdf/2211.12737.pdf +- access: open analysis: Evaluated on three physical control tasks, drawing, steering, and human body movement on various dynamics - size: 124M parameters + created_date: 2023-06-12 dependencies: [GPT-2, BABEL, text-davinci-003] + description: Model trained to generate language corrections for physical control + tasks. + feedback: '' + intended_uses: '' + license: MIT + modality: + explanation: trajectory input with text output + value: other; text + model_card: '' + monitoring: '' + name: CORGI + organization: Stanford + prohibited_uses: '' + quality_control: '' + size: 124M parameters (dense) training_emissions: '' + training_hardware: one NVIDIA A40 GPU training_time: - value: unknown explanation: The authors do not report the training time, but do report that they train for 200 epochs. - training_hardware: one NVIDIA A40 GPU - quality_control: '' - access: open - license: MIT - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' + value: unknown + type: model + url: https://arxiv.org/pdf/2306.07012.pdf diff --git a/assets/stonybrook.yaml b/assets/stonybrook.yaml index ce4a4657..a160a186 100644 --- a/assets/stonybrook.yaml +++ b/assets/stonybrook.yaml @@ -1,62 +1,51 @@ --- - -- type: dataset - name: SBU Captions - # General - organization: Stony Brook University - description: > - SBU Captions Dataset is a collection of 1 million images and associated captions - from Flickr, filtered so that the descriptions are likely to refer to visual - content. +- access: + explanation: The dataset is available for download on the [[SBU Captions Dataset + website]](https://www.cs.rice.edu/~vo9/sbucaptions/), along with additional + resources. + value: open + analysis: + explanation: See [[Section 5]](https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf) + value: "Authors perform two quantitative evaluations for image captioning -\ + \ direct user ratings of relevance and BLEU score. They also propose a new\ + \ evaluation task: \"we propose a new evaluation task where a user is presented\ + \ with two photographs and one caption. The user must assign the caption to\ + \ the most relevant image. For evaluation we use a query image, a random image\ + \ and a generated caption.\"\n" created_date: + explanation: "The date the [[paper]](https://papers.nips.cc/paper_files/paper/2011/hash/5dd9db5e033da9c6fb5ba83c7a7ebea9-Abstract.html)\ + \ was published.\n" value: 2011-12-12 - explanation: > - The date the [[paper]](https://papers.nips.cc/paper_files/paper/2011/hash/5dd9db5e033da9c6fb5ba83c7a7ebea9-Abstract.html) - was published. - url: https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf datasheet: none - modality: text (English) and image - size: 1M image-text pairs - sample: [] - analysis: - value: > - Authors perform two quantitative evaluations for image captioning - direct - user - ratings of relevance and BLEU score. They also propose a new evaluation task: - "we propose a new evaluation task where a user is presented with two photographs - and one caption. The user must assign the caption to the most relevant image. - For evaluation we use a query image, a random image and a generated caption." - explanation: See [[Section 5]](https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf) - # Construction dependencies: [Flickr] - license: - value: none - explanation: No license was found. - included: - value: > - "To encourage visual descriptiveness in our collection, we select only those - images with descriptions of satisfactory length based on observed lengths - in - visual descriptions. We also enforce that retained descriptions contain at - least - 2 words belonging to our term lists and at least one prepositional word, e.g. - “on”, “under” which often indicate visible spatial relationships." - explanation: See [[Section 2]](https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf) + description: "SBU Captions Dataset is a collection of 1 million images and associated\ + \ captions from Flickr, filtered so that the descriptions are likely to refer\ + \ to visual content.\n" excluded: - value: > - "This produces a very large, but noisy initial set of photographs with associated - text. We filter this set of photos so that the descriptions attached to a - picture - are relevant and visually descriptive." explanation: See [[Section 2]](https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf) - quality_control: unknown - # Downstream - access: - value: open - explanation: The dataset is available for download on the [[SBU Captions Dataset - website]](https://www.cs.rice.edu/~vo9/sbucaptions/), along with additional - resources. + value: "\"This produces a very large, but noisy initial set of photographs with\ + \ associated text. We filter this set of photos so that the descriptions attached\ + \ to a picture are relevant and visually descriptive.\"\n" + feedback: '' + included: + explanation: See [[Section 2]](https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf) + value: "\"To encourage visual descriptiveness in our collection, we select only\ + \ those images with descriptions of satisfactory length based on observed\ + \ lengths in visual descriptions. We also enforce that retained descriptions\ + \ contain at least 2 words belonging to our term lists and at least one prepositional\ + \ word, e.g. “on”, “under” which often indicate visible spatial relationships.\"\ + \n" intended_uses: '' - prohibited_uses: '' + license: + explanation: No license was found. + value: none + modality: {} monitoring: none - feedback: '' + name: SBU Captions + organization: Stony Brook University + prohibited_uses: '' + quality_control: unknown + sample: [] + size: 1M image-text pairs + type: dataset + url: https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf diff --git a/assets/suno.yaml b/assets/suno.yaml index 6e277fb6..5e4e59e6 100644 --- a/assets/suno.yaml +++ b/assets/suno.yaml @@ -1,24 +1,25 @@ --- - -- type: model - name: Bark - organization: Suno +- access: open + analysis: '' + created_date: 2023-04-20 + dependencies: [AudioLM] description: Bark is a text-to-audio model that can generate multilingual speech as well as other noises. - created_date: 2023-04-20 - url: https://github.com/suno-ai/bark + feedback: https://huggingface.co/spaces/suno/bark/discussions + intended_uses: '' + license: MIT + modality: + explanation: text input with audio output + value: text; audio model_card: https://github.com/suno-ai/bark/blob/main/model-card.md - modality: text input with audio output - analysis: '' + monitoring: '' + name: Bark + organization: Suno + prohibited_uses: '' + quality_control: '' size: '' - dependencies: [AudioLM] training_emissions: unknown - training_time: unknown training_hardware: '' - quality_control: '' - access: open - license: MIT - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: https://huggingface.co/spaces/suno/bark/discussions + training_time: unknown + type: model + url: https://github.com/suno-ai/bark diff --git a/assets/together.yaml b/assets/together.yaml index 2bb3cb97..1ec0ab7c 100644 --- a/assets/together.yaml +++ b/assets/together.yaml @@ -1,134 +1,134 @@ --- - -- type: model - name: GPT-JT - organization: Together - description: '' - created_date: 2022-11-29 - url: https://www.together.xyz/blog/releasing-v1-of-gpt-jt-powered-by-open-source-ai - model_card: '' - modality: text +- access: open analysis: '' - size: 6B parameters (dense model) + created_date: 2022-11-29 dependencies: [GPT-J, P3, NaturalInstructions-v2] + description: '' + feedback: '' + intended_uses: '' + license: Apache 2.0 + modality: + explanation: text + value: text; text + model_card: '' + monitoring: '' + name: GPT-JT + organization: Together + prohibited_uses: '' + quality_control: '' + size: 6B parameters (dense) training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: open - license: Apache 2.0 + training_time: '' + type: model + url: https://www.together.xyz/blog/releasing-v1-of-gpt-jt-powered-by-open-source-ai +- access: open + analysis: '' + created_date: 2023-03-10 + dependencies: [GPT-NeoX, OIG-43M] + description: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: Apache 2.0 + modality: + explanation: text + value: text; text + model_card: '' monitoring: '' - feedback: '' - -- type: model name: GPT-NeoXT-Chat-Base organization: Together - description: '' - created_date: 2023-03-10 - url: https://www.together.xyz/blog/openchatkit - model_card: '' - modality: text - analysis: '' - size: 20B parameters (dense model) - dependencies: [GPT-NeoX, OIG-43M] + prohibited_uses: '' + quality_control: '' + size: 20B parameters (dense) training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: open - license: Apache 2.0 + training_time: '' + type: model + url: https://www.together.xyz/blog/openchatkit +- access: open + analysis: '' + created_date: 2023-03-10 + dependencies: [GPT-JT, OIG-moderation] + description: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: Apache 2.0 + modality: + explanation: text + value: text; text + model_card: '' monitoring: '' - feedback: '' - -- type: model name: OpenChatKit moderation model organization: Together - description: '' - created_date: 2023-03-10 - url: https://www.together.xyz/blog/openchatkit - model_card: '' - modality: text - analysis: '' - size: 6B parameters (dense model) - dependencies: [GPT-JT, OIG-moderation] + prohibited_uses: '' + quality_control: '' + size: 6B parameters (dense) training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: open - license: Apache 2.0 - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' - -- type: dataset - name: OIG-43M - organization: Together, LAION, Ontocord - description: '' + training_time: '' + type: model + url: https://www.together.xyz/blog/openchatkit +- access: open + analysis: '' created_date: 2023-03-10 - url: https://laion.ai/blog/oig-dataset/ datasheet: '' - modality: text - size: 43M instructions - sample: [] - analysis: '' dependencies: [P3, NaturalInstructions-v2, FLAN dataset] - included: '' + description: '' excluded: '' - quality_control: '' - access: open - license: Apache 2.0 + feedback: '' + included: '' intended_uses: '' - prohibited_uses: '' + license: Apache 2.0 + modality: text monitoring: '' - feedback: '' - -- type: dataset - name: OIG-moderation + name: OIG-43M organization: Together, LAION, Ontocord - description: '' - created_date: 2023-03-10 - url: https://laion.ai/blog/oig-dataset/ - datasheet: '' - modality: text - size: unknown + prohibited_uses: '' + quality_control: '' sample: [] + size: 43M instructions + type: dataset + url: https://laion.ai/blog/oig-dataset/ +- access: open analysis: '' + created_date: 2023-03-10 + datasheet: '' dependencies: [] - included: '' + description: '' excluded: '' - quality_control: '' - access: open - license: Apache 2.0 - intended_uses: '' - prohibited_uses: '' - monitoring: '' feedback: '' - -- type: dataset - name: RedPajama-Data - organization: Together - description: The RedPajama base dataset is a 1.2 trillion token fully-open dataset - created by following the recipe described in the LLaMA paper - created_date: 2022-04-17 - url: https://www.together.xyz/blog/redpajama - datasheet: https://huggingface.co/datasets/togethercomputer/RedPajama-Data-1T + included: '' + intended_uses: '' + license: Apache 2.0 modality: text - size: 1.2 trillion tokens + monitoring: '' + name: OIG-moderation + organization: Together, LAION, Ontocord + prohibited_uses: '' + quality_control: '' sample: [] + size: unknown + type: dataset + url: https://laion.ai/blog/oig-dataset/ +- access: open analysis: '' + created_date: 2022-04-17 + datasheet: https://huggingface.co/datasets/togethercomputer/RedPajama-Data-1T dependencies: [GitHub, Wikipedia] - included: '' + description: The RedPajama base dataset is a 1.2 trillion token fully-open dataset + created by following the recipe described in the LLaMA paper excluded: '' - quality_control: '' - access: open - license: Apache 2.0 + feedback: '' + included: '' intended_uses: '' - prohibited_uses: '' + license: Apache 2.0 + modality: text monitoring: '' - feedback: '' + name: RedPajama-Data + organization: Together + prohibited_uses: '' + quality_control: '' + sample: [] + size: 1.2 trillion tokens + type: dataset + url: https://www.together.xyz/blog/redpajama diff --git a/assets/transformify.yaml b/assets/transformify.yaml index 118b559f..59a59c34 100644 --- a/assets/transformify.yaml +++ b/assets/transformify.yaml @@ -1,23 +1,22 @@ --- - -- type: application - name: Transformify Automate - organization: Transformify - description: Transformify Automate is a platform for automated task integration - using natural language prompts. +- access: open + adaptation: '' created_date: 2023-05-30 - url: https://www.transformify.ai/automate dependencies: [GPT-4] - adaptation: '' - output_space: text and code - quality_control: '' - access: open - license: '' - terms_of_service: https://www.transformify.ai/legal-stuff + description: Transformify Automate is a platform for automated task integration + using natural language prompts. + failures: '' + feedback: '' intended_uses: '' - prohibited_uses: '' + license: '' monitoring: '' - feedback: '' monthly_active_users: '' + name: Transformify Automate + organization: Transformify + output_space: text and code + prohibited_uses: '' + quality_control: '' + terms_of_service: https://www.transformify.ai/legal-stuff + type: application + url: https://www.transformify.ai/automate user_distribution: '' - failures: '' diff --git a/assets/trevor.yaml b/assets/trevor.yaml index 9c21533d..02126721 100644 --- a/assets/trevor.yaml +++ b/assets/trevor.yaml @@ -1,50 +1,38 @@ --- - -- type: application - name: Crisis Contact Simulator - # General - organization: The Trevor Project - description: > - Crisis Contact Simulator, developed as part of a collaboration with - Google.org, helps train The Trevor Project counselors by - mimicking to be a teen in crisis. Crisis Contact Simulator is used - as part of the training programs for the Trevor Project's 24/7 digital - crisis services that supports LGBTQ youth - [[Trevor Project Blog]](https://www.thetrevorproject.org/blog/the-trevor-project-launches-new-ai-tool-to-support-crisis-counselor-training/). +- access: + explanation: "The tool can only be accessed by the Trevor Project will be counsolors\ + \ for training purposes.\n" + value: closed + adaptation: + explanation: "The base GPT-2 model was fine-tuned on the previous conversations\ + \ between new trainees and the Trevor Project counselors posing to be a fictional\ + \ persona in crisis.\n" + value: Fine-tuning created_date: + explanation: "The release date of the blog post introducing Crisis Contact Simulator\ + \ [[Trevor Project Blog]](https://www.thetrevorproject.org/blog/the-trevor-project-launches-new-ai-tool-to-support-crisis-counselor-training/).\n" value: 2021-03-24 - explanation: > - The release date of the blog post introducing Crisis Contact Simulator - [[Trevor Project Blog]](https://www.thetrevorproject.org/blog/the-trevor-project-launches-new-ai-tool-to-support-crisis-counselor-training/). - url: https://www.thetrevorproject.org/ - # Construction dependencies: [OpenAI API] - adaptation: - value: Fine-tuning - explanation: > - The base GPT-2 model was fine-tuned on the previous conversations between - new trainees and the Trevor Project counselors posing to be a fictional - persona in crisis. + description: "Crisis Contact Simulator, developed as part of a collaboration with\ + \ Google.org, helps train The Trevor Project counselors by mimicking to be a\ + \ teen in crisis. Crisis Contact Simulator is used as part of the training programs\ + \ for the Trevor Project's 24/7 digital crisis services that supports LGBTQ\ + \ youth [[Trevor Project Blog]](https://www.thetrevorproject.org/blog/the-trevor-project-launches-new-ai-tool-to-support-crisis-counselor-training/).\n" + failures: unknown + feedback: unknown + intended_uses: "Training counselors\n" + license: unknown + monitoring: unknown + monthly_active_users: unknown + name: Crisis Contact Simulator + organization: The Trevor Project output_space: Dialogue + prohibited_uses: unknown quality_control: + explanation: "The tool can only be accessed by the Trevor Project will be counsolors\ + \ for training purposes.\n" value: Limited release - explanation: > - The tool can only be accessed by the Trevor Project will be counsolors - for training purposes. - # Downstream - access: - value: closed - explanation: > - The tool can only be accessed by the Trevor Project will be counsolors - for training purposes. - license: unknown terms_of_service: unknown - intended_uses: > - Training counselors - prohibited_uses: unknown - monitoring: unknown - feedback: unknown - # Deployment - monthly_active_users: unknown + type: application + url: https://www.thetrevorproject.org/ user_distribution: unknown - failures: unknown diff --git a/assets/tsinghua.yaml b/assets/tsinghua.yaml index 38fbaa3a..c8b21ee6 100644 --- a/assets/tsinghua.yaml +++ b/assets/tsinghua.yaml @@ -1,148 +1,149 @@ --- - -# models -- type: model - name: CodeGeeX - organization: Tsinghua - description: CodeGeeX is an autoregressive language model trained on code - created_date: 2022-09-20 - url: https://github.com/THUDM/CodeGeeX - model_card: none - modality: code +- access: + explanation: Model weights are available but gated via an [[application form]](https://models.aminer.cn/codegeex/download/request) + value: limited analysis: none - size: 13B parameters (dense model) + created_date: 2022-09-20 dependencies: [] - training_emissions: unknown - training_time: unknown - training_hardware: THUDM 1536 Ascend 910 (32GB) Cluster - quality_control: none - access: - value: limited - explanation: Model weights are available but gated via an [[application form]](https://models.aminer.cn/codegeex/download/request) + description: CodeGeeX is an autoregressive language model trained on code + feedback: none + intended_uses: none license: - value: Apache 2.0 explanation: The license is provided in the [[Github repository]](https://github.com/THUDM/CodeGeeX) - intended_uses: none - prohibited_uses: none + value: Apache 2.0 + modality: + explanation: code + value: code; code + model_card: none monitoring: none - feedback: none - -- type: model - name: CogView + name: CodeGeeX organization: Tsinghua - description: CogView is a transformer model for text-to-image generation + prohibited_uses: none + quality_control: none + size: 13B parameters (dense) + training_emissions: unknown + training_hardware: THUDM 1536 Ascend 910 (32GB) Cluster + training_time: unknown + type: model + url: https://github.com/THUDM/CodeGeeX +- access: + explanation: Model checkpoints available from [[Wudao-Wenhui]](https://resource.wudaoai.cn/home?ind=2&name=WuDao%20WenHui&id=1399364355975327744) + value: open + analysis: '' created_date: - value: 2021-05-26 explanation: The date the model paper was released - url: https://arxiv.org/abs/2105.13290 - model_card: none - modality: text (Chinese) and image - size: 4B parameters - analysis: '' + value: 2021-05-26 dependencies: [] + description: CogView is a transformer model for text-to-image generation + feedback: '' + intended_uses: '' + license: + explanation: "The license is provided in the [[Github repository]](https://github.com/THUDM/CogView)\n" + value: Apache 2.0 + modality: + explanation: text (Chinese) and image + value: image, text; image, text + model_card: none + monitoring: '' + name: CogView + organization: Tsinghua + prohibited_uses: '' + quality_control: '' + size: 4B parameters (dense) training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: + training_time: '' + type: model + url: https://arxiv.org/abs/2105.13290 +- access: + explanation: The model checkpoints are available for download from [[BAAI]](https://model.baai.ac.cn/model-detail/100041) value: open - explanation: Model checkpoints available from [[Wudao-Wenhui]](https://resource.wudaoai.cn/home?ind=2&name=WuDao%20WenHui&id=1399364355975327744) + analysis: '' + created_date: + explanation: The date the model paper was released + value: 2022-04-28 + dependencies: [] + description: CogView 2 is a hierarchical transformer for text-to-image generation + feedback: '' + intended_uses: '' license: + explanation: "The license is provided in the [[Github repository]](https://github.com/THUDM/CogView2)\n" value: Apache 2.0 - explanation: > - The license is provided in the [[Github repository]](https://github.com/THUDM/CogView) - intended_uses: '' - prohibited_uses: '' + modality: + explanation: text (Chinese, English) and image + value: image, text; image, text + model_card: none monitoring: '' - feedback: '' - -- type: model name: CogView 2 organization: Tsinghua - description: CogView 2 is a hierarchical transformer for text-to-image generation - created_date: - value: 2022-04-28 - explanation: The date the model paper was released - url: https://arxiv.org/abs/2204.14217 - model_card: none - modality: text (Chinese, English) and image - size: 6B parameters - analysis: '' - dependencies: [] + prohibited_uses: '' + quality_control: '' + size: 6B parameters (dense) training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: + training_time: '' + type: model + url: https://arxiv.org/abs/2204.14217 +- access: + explanation: Model checkpoints are available for download from https://github.com/THUDM/CogVideo value: open - explanation: The model checkpoints are available for download from [[BAAI]](https://model.baai.ac.cn/model-detail/100041) + analysis: '' + created_date: + explanation: The date the model paper was released + value: 2022-05-29 + dependencies: [] + description: CogVideo is a transformer model for text-to-video generation + feedback: '' + intended_uses: '' license: + explanation: "The license is provided in the [[Github repository]](https://github.com/THUDM/CogVideo)\n" value: Apache 2.0 - explanation: > - The license is provided in the [[Github repository]](https://github.com/THUDM/CogView2) - intended_uses: '' - prohibited_uses: '' + modality: + explanation: text (Chinese) and video + value: text, video; text, video + model_card: none monitoring: '' - feedback: '' - -- type: model name: CogVideo organization: Tsinghua - description: CogVideo is a transformer model for text-to-video generation - created_date: - value: 2022-05-29 - explanation: The date the model paper was released - url: https://arxiv.org/abs/2205.15868 - model_card: none - modality: text (Chinese) and video + prohibited_uses: '' + quality_control: '' size: unknown - analysis: '' - dependencies: [] training_emissions: '' - training_time: '' training_hardware: '' - quality_control: '' - access: + training_time: '' + type: model + url: https://arxiv.org/abs/2205.15868 +- access: + explanation: Model checkpoints are available from the [[GitHub repository]](https://github.com/THUDM/GLM-130B/blob/main/MODEL_LICENSE) value: open - explanation: Model checkpoints are available for download from https://github.com/THUDM/CogVideo - license: - value: Apache 2.0 - explanation: > - The license is provided in the [[Github repository]](https://github.com/THUDM/CogVideo) - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' - -- type: model - name: GLM-130B - organization: Tsinghua - description: GLM-130B is a bidirectional language model trained on English and - Chinese + analysis: '' created_date: - value: 2022-08-04 explanation: The date the model website was made public - url: https://keg.cs.tsinghua.edu.cn/glm-130b/ - model_card: none - modality: text (Chinese | English) - size: 130B parameters (dense model) - analysis: '' + value: 2022-08-04 dependencies: - The Pile - GLM-130B Chinese corpora - P3 - DeepStruct finetuning dataset - training_emissions: '' - training_time: '' - training_hardware: THUDM 96 DGX-A100 (40G) cluster - quality_control: '' - access: - value: open - explanation: Model checkpoints are available from the [[GitHub repository]](https://github.com/THUDM/GLM-130B/blob/main/MODEL_LICENSE) + description: GLM-130B is a bidirectional language model trained on English and + Chinese + feedback: '' + intended_uses: '' license: - value: GLM-130B License explanation: Unique model license. See the [[GitHub repository]](https://github.com/THUDM/GLM-130B/blob/main/MODEL_LICENSE) - intended_uses: '' - prohibited_uses: '' + value: GLM-130B License + modality: + explanation: text (Chinese | English) + value: text; text + model_card: none monitoring: '' - feedback: '' + name: GLM-130B + organization: Tsinghua + prohibited_uses: '' + quality_control: '' + size: 130B parameters (dense) + training_emissions: '' + training_hardware: THUDM 96 DGX-A100 (40G) cluster + training_time: '' + type: model + url: https://keg.cs.tsinghua.edu.cn/glm-130b/ diff --git a/assets/uae.yaml b/assets/uae.yaml index 127d82d0..9ed62419 100644 --- a/assets/uae.yaml +++ b/assets/uae.yaml @@ -1,50 +1,50 @@ --- - -- type: model - name: Falcon - organization: UAE Technology Innovation Institute - description: Falcon-40B is a 40B parameters causal decoder-only model built by - TII and trained on 1,000B tokens of RefinedWeb enhanced with curated corpora. - created_date: 2023-06-14 - url: https://huggingface.co/tiiuae/falcon-40b - model_card: https://huggingface.co/tiiuae/falcon-40b - modality: natural language text +- access: open analysis: '' - size: 40B parameters + created_date: 2023-06-14 dependencies: [RefinedWeb] - training_emissions: '' - training_time: 2 months - training_hardware: 384 A100 40GB GPUs - quality_control: '' - access: open - license: Apache 2.0 + description: Falcon-40B is a 40B parameters causal decoder-only model built by + TII and trained on 1,000B tokens of RefinedWeb enhanced with curated corpora. + feedback: https://huggingface.co/tiiuae/falcon-40b/discussions intended_uses: '' - prohibited_uses: irresponsible or harmful use or production use without adequate - assessment of risks and mitigation + license: Apache 2.0 + modality: + explanation: natural language text + value: text; text + model_card: https://huggingface.co/tiiuae/falcon-40b monitoring: None - feedback: https://huggingface.co/tiiuae/falcon-40b/discussions - -- type: dataset - name: RefinedWeb + name: Falcon organization: UAE Technology Innovation Institute - description: RefinedWeb is a high-quality five trillion tokens web-only English - pretraining dataset. + prohibited_uses: irresponsible or harmful use or production use without adequate + assessment of risks and mitigation + quality_control: '' + size: 40B parameters (dense) + training_emissions: '' + training_hardware: 384 A100 40GB GPUs + training_time: 2 months + type: model + url: https://huggingface.co/tiiuae/falcon-40b +- access: open + analysis: '' created_date: 2023-06-01 - url: https://arxiv.org/pdf/2306.01116.pdf datasheet: https://huggingface.co/datasets/tiiuae/falcon-refinedweb - modality: text - size: 600B tokens - sample: [] - analysis: '' dependencies: [] - included: '' + description: RefinedWeb is a high-quality five trillion tokens web-only English + pretraining dataset. excluded: '' - quality_control: '' - access: open + feedback: '' + included: '' + intended_uses: '' license: - value: unknown explanation: License can be found at https://huggingface.co/datasets/tiiuae/falcon-refinedweb - intended_uses: '' - prohibited_uses: '' + value: unknown + modality: text monitoring: '' - feedback: '' + name: RefinedWeb + organization: UAE Technology Innovation Institute + prohibited_uses: '' + quality_control: '' + sample: [] + size: 600B tokens + type: dataset + url: https://arxiv.org/pdf/2306.01116.pdf diff --git a/assets/uw.yaml b/assets/uw.yaml index 29a49c7d..15d2832c 100644 --- a/assets/uw.yaml +++ b/assets/uw.yaml @@ -1,23 +1,22 @@ --- - -- type: dataset - name: YT-Temporal-1B - organization: University of Washington - description: '' +- access: open + analysis: '' created_date: 2022-01-07 - url: https://arxiv.org/abs/2201.02639 datasheet: '' - modality: video - size: 20M videos - sample: [] - analysis: '' dependencies: [YouTube] - included: '' + description: '' excluded: '' - quality_control: '' - access: open - license: MIT + feedback: '' + included: '' intended_uses: '' - prohibited_uses: '' + license: MIT + modality: video monitoring: '' - feedback: '' + name: YT-Temporal-1B + organization: University of Washington + prohibited_uses: '' + quality_control: '' + sample: [] + size: 20M videos + type: dataset + url: https://arxiv.org/abs/2201.02639 diff --git a/assets/uwashington.yaml b/assets/uwashington.yaml index 32208f00..aded82ab 100644 --- a/assets/uwashington.yaml +++ b/assets/uwashington.yaml @@ -1,26 +1,27 @@ --- - -- type: model - name: Guanaco - organization: University of Washington +- access: open + analysis: Reports results on the Vicuna benchmark and compares performance level + and time expenditure with ChatGPT + created_date: 2023-05-23 + dependencies: [QLoRA, OASST1] description: Guanaco is a model family trained with QLORA, an efficient finetuning approach that reduces memory usage enough to finetune a 65B parameter model on a single 48GB GPU while preserving full 16-bit finetuning task performance. - created_date: 2023-05-23 - url: https://arxiv.org/pdf/2305.14314v1.pdf + feedback: '' + intended_uses: '' + license: MIT + modality: + explanation: natural language text + value: text; text model_card: '' - modality: natural language text - analysis: Reports results on the Vicuna benchmark and compares performance level - and time expenditure with ChatGPT - size: 33B parameters - dependencies: [QLoRA, OASST1] + monitoring: '' + name: Guanaco + organization: University of Washington + prohibited_uses: '' + quality_control: '' + size: 33B parameters (dense) training_emissions: '' - training_time: '' training_hardware: A single 24 GB GPU - quality_control: '' - access: open - license: MIT - intended_uses: '' - prohibited_uses: '' - monitoring: '' - feedback: '' + training_time: '' + type: model + url: https://arxiv.org/pdf/2305.14314v1.pdf diff --git a/assets/viable.yaml b/assets/viable.yaml index c29dce6e..35ab68cd 100644 --- a/assets/viable.yaml +++ b/assets/viable.yaml @@ -1,38 +1,28 @@ --- - -- type: application - name: Viable - # General - organization: Viable - description: > - Viable analyzes qualitative consumer feedback and provides summary feedback - to companies. +- access: limited + adaptation: unknown created_date: unknown - url: https://www.askviable.com/ - # Construction dependencies: [OpenAI API] - adaptation: unknown - output_space: Question and answer, summarization, sentiment analysis, topic identification - quality_control: unknown - # Downstream - access: limited - license: unknown - terms_of_service: https://www.askviable.com/terms-of-service - intended_uses: > - Intended to be used by companies to digest qualitative consumer feedback. - prohibited_uses: > - Prohibited uses are listed in the Terms of Service - [[Terms of Service]](https://www.askviable.com/terms-of-service). - The terms don't include statements specific to the use of the content - generated by the system or GPT-3. - monitoring: unknown + description: "Viable analyzes qualitative consumer feedback and provides summary\ + \ feedback to companies.\n" + failures: unknown feedback: + explanation: "The feedback mechanism isn't explicitly listed. Company contact\ + \ information is provided in the Terms of Service [[Terms of Service]](https://www.askviable.com/terms-of-service).\n" value: unknown - explanation: > - The feedback mechanism isn't explicitly listed. Company contact - information is provided in the Terms of Service - [[Terms of Service]](https://www.askviable.com/terms-of-service). - # Deployment + intended_uses: "Intended to be used by companies to digest qualitative consumer\ + \ feedback.\n" + license: unknown + monitoring: unknown monthly_active_users: unknown + name: Viable + organization: Viable + output_space: Question and answer, summarization, sentiment analysis, topic identification + prohibited_uses: "Prohibited uses are listed in the Terms of Service [[Terms of\ + \ Service]](https://www.askviable.com/terms-of-service). The terms don't include\ + \ statements specific to the use of the content generated by the system or GPT-3.\n" + quality_control: unknown + terms_of_service: https://www.askviable.com/terms-of-service + type: application + url: https://www.askviable.com/ user_distribution: unknown - failures: unknown diff --git a/assets/writer.yaml b/assets/writer.yaml index a56591b9..9879f42f 100644 --- a/assets/writer.yaml +++ b/assets/writer.yaml @@ -1,53 +1,55 @@ --- - -- type: model - name: Palmyra - organization: Writer - description: Palmyra is a privacy-first LLM for enterprises trained on business - and marketing writing. +- access: open + analysis: Evaluated on the SuperGLUE benchmark created_date: - value: 2023-01-01 explanation: The model was stated to be published in January, but which day is not specified on the website. - url: https://gpt3demo.com/apps/palmyra - model_card: https://huggingface.co/Writer/palmyra-base - modality: text - analysis: Evaluated on the SuperGLUE benchmark - size: 128M (Small), 5B (Base), 20B (Large) parameters + value: 2023-01-01 dependencies: [Writer dataset] - training_emissions: unknown - training_time: unknown - training_hardware: '' - quality_control: '' - access: open - license: Apache 2.0 + description: Palmyra is a privacy-first LLM for enterprises trained on business + and marketing writing. + feedback: https://huggingface.co/Writer/palmyra-base/discussions intended_uses: generating text from a prompt - prohibited_uses: '' + license: Apache 2.0 + modality: + explanation: text + value: text; text + model_card: https://huggingface.co/Writer/palmyra-base monitoring: '' - feedback: https://huggingface.co/Writer/palmyra-base/discussions - -- type: model - name: Camel + name: Palmyra organization: Writer - description: Camel is an instruction-following large language model tailored for - advanced NLP and comprehension capabilities. + prohibited_uses: '' + quality_control: '' + size: 128M parameters (dense) + training_emissions: unknown + training_hardware: '' + training_time: unknown + type: model + url: https://gpt3demo.com/apps/palmyra +- access: open + analysis: '' created_date: - value: 2023-04-01 explanation: The model was stated to be published in April, but which day is not specified on the website. - url: https://chatcamel.vercel.app/ - model_card: https://huggingface.co/Writer/camel-5b-hf - modality: text - analysis: '' - size: 5B parameters + value: 2023-04-01 dependencies: [Palmyra, Camel dataset] - training_emissions: unknown - training_time: unknown - training_hardware: '' - quality_control: '' - access: open - license: Apache 2.0 + description: Camel is an instruction-following large language model tailored for + advanced NLP and comprehension capabilities. + feedback: https://huggingface.co/Writer/camel-5b-hf/discussions intended_uses: '' - prohibited_uses: '' + license: Apache 2.0 + modality: + explanation: text + value: text; text + model_card: https://huggingface.co/Writer/camel-5b-hf monitoring: '' - feedback: https://huggingface.co/Writer/camel-5b-hf/discussions + name: Camel + organization: Writer + prohibited_uses: '' + quality_control: '' + size: 5B parameters (dense) + training_emissions: unknown + training_hardware: '' + training_time: unknown + type: model + url: https://chatcamel.vercel.app/ diff --git a/assets/yandex.yaml b/assets/yandex.yaml index becdb8e5..0090546b 100644 --- a/assets/yandex.yaml +++ b/assets/yandex.yaml @@ -1,63 +1,59 @@ --- - -- type: model - name: YaLM - organization: Yandex - description: YaLM is a 100B parameter autoregressive model trained on 25% English - and 75% Russian text. +- access: open + analysis: '' created_date: - value: 2022-06-22 explanation: The date the blog post about YaLM was published. - url: https://medium.com/yandex/yandex-publishes-yalm-100b-its-the-largest-gpt-like-neural-network-in-open-source-d1df53d0e9a6 - model_card: '' - modality: text (English, Russian) - analysis: '' - size: 100B parameters (dense model) + value: 2022-06-22 dependencies: [The Pile, Yandex Russian Pretraining Dataset] - training_emissions: '' - training_time: '' - training_hardware: Yandex 800 A100 Cluster - quality_control: '' - access: open + description: YaLM is a 100B parameter autoregressive model trained on 25% English + and 75% Russian text. + feedback: '' + intended_uses: '' license: - value: Apache 2.0 explanation: '' - intended_uses: '' - prohibited_uses: '' + value: Apache 2.0 + modality: + explanation: text (English, Russian) + value: text; text + model_card: '' monitoring: '' - feedback: '' - -- type: application - name: Yandex Search + name: YaLM organization: Yandex - description: Yandex is a search engine and web portal. Yandex offers internet - search and other services - created_date: 2022-06-23 + prohibited_uses: '' + quality_control: '' + size: 100B parameters (dense) + training_emissions: '' + training_hardware: Yandex 800 A100 Cluster + training_time: '' + type: model url: https://medium.com/yandex/yandex-publishes-yalm-100b-its-the-largest-gpt-like-neural-network-in-open-source-d1df53d0e9a6 - dependencies: [YaLM] +- access: open adaptation: '' - output_space: '' - quality_control: '' - access: open + created_date: 2022-06-23 + dependencies: [YaLM] + description: Yandex is a search engine and web portal. Yandex offers internet + search and other services + failures: '' + feedback: '' + intended_uses: '' license: + explanation: "\"3.1. The Rightholder, on the basis of a simple (non-exclusive)\ + \ license, shall grant the User a free, worldwide, non-transferable right\ + \ to use the Software as set forth below:\n3.1.1. To use the Software in accordance\ + \ with its functionality by copying and installing it on the User's personal\ + \ device(s). The User shall be entitled to install the Software on any number\ + \ of personal devices.\n3.1.2. To reproduce and to distribute this Software\ + \ in an unmodified form and solely for non-commercial purposes (free of charge).\"\ + \ - excerpt from the Terms of Service document for browsers.\n" value: custom - explanation: > - "3.1. The Rightholder, on the basis of a simple (non-exclusive) license, - shall grant the User a free, worldwide, non-transferable right to use the - Software as set forth below: - - 3.1.1. To use the Software in accordance with its functionality by - copying and installing it on the User's personal device(s). The User shall - be entitled to install the Software on any number of personal devices. - - 3.1.2. To reproduce and to distribute this Software in an unmodified form - and solely for non-commercial purposes (free of charge)." - excerpt - from the Terms of Service document for browsers. - terms_of_service: https://yandex.com/legal/browser_agreement/ - intended_uses: '' - prohibited_uses: '' monitoring: '' - feedback: '' monthly_active_users: '' + name: Yandex Search + organization: Yandex + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: https://yandex.com/legal/browser_agreement/ + type: application + url: https://medium.com/yandex/yandex-publishes-yalm-100b-its-the-largest-gpt-like-neural-network-in-open-source-d1df53d0e9a6 user_distribution: '' - failures: '' diff --git a/assets/you.yaml b/assets/you.yaml index 60bf2a22..f7a1ffe3 100644 --- a/assets/you.yaml +++ b/assets/you.yaml @@ -1,77 +1,71 @@ --- - -- type: dataset - name: You dataset - organization: You - description: '' +- access: closed + analysis: '' created_date: unknown - url: https://you.com/ datasheet: '' - modality: text - size: unknown - sample: [] - analysis: '' dependencies: [] - included: '' + description: '' excluded: '' - quality_control: '' - access: closed + feedback: '' + included: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: {} monitoring: '' - feedback: '' - -- type: model - name: You model + name: You dataset organization: You - description: '' - created_date: unknown + prohibited_uses: '' + quality_control: '' + sample: [] + size: unknown + type: dataset url: https://you.com/ - model_card: '' - modality: Text +- access: closed analysis: '' - size: unknown + created_date: unknown dependencies: [You dataset] - training_emissions: '' - training_time: '' - training_hardware: '' - quality_control: '' - access: closed + description: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - intended_uses: '' - prohibited_uses: '' + modality: {} + model_card: '' monitoring: '' - feedback: '' - -- type: application - name: You Search + name: You model organization: You + prohibited_uses: '' + quality_control: '' + size: '' + training_emissions: '' + training_hardware: '' + training_time: '' + type: model + url: https://you.com/ +- access: open + adaptation: '' + created_date: unknown + dependencies: [You model] description: You.com is a search engine built on artificial intelligence that provides users with a customized search experience while keeping their data 100% private. - created_date: unknown - url: https://you.com/ - dependencies: [You model] - adaptation: '' - output_space: '' - quality_control: '' - access: open + failures: '' + feedback: '' + intended_uses: '' license: + explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - explanation: > - The asset isn't released, and hence the license is unknown. - terms_of_service: '' - intended_uses: '' - prohibited_uses: '' monitoring: '' - feedback: '' monthly_active_users: '' + name: You Search + organization: You + output_space: '' + prohibited_uses: '' + quality_control: '' + terms_of_service: '' + type: application + url: https://you.com/ user_distribution: '' - failures: '' From 5c92804cd669f8cf6ed378e5c7e4c1d54460bdb2 Mon Sep 17 00:00:00 2001 From: Rishi Bommasani Date: Wed, 2 Aug 2023 17:31:39 -0700 Subject: [PATCH 10/10] Revert "standardize features" This reverts commit 5d19360ee3129f3123fc3586081df8fb34df674d. --- assets/adept.yaml | 44 +- assets/ai2.yaml | 130 +- assets/ai21.yaml | 472 ++++--- assets/aleph_alpha.yaml | 180 +-- assets/alibaba.yaml | 69 +- assets/amazon.yaml | 35 +- assets/anthropic.yaml | 790 ++++++----- assets/argonne.yaml | 37 +- assets/assembly.yaml | 167 +-- assets/autogpt.yaml | 31 +- assets/baai.yaml | 95 +- assets/baidu.yaml | 137 +- assets/bain.yaml | 31 +- assets/be_my_eyes.yaml | 31 +- assets/berkeley.yaml | 111 +- assets/bigcode.yaml | 115 +- assets/bigscience.yaml | 318 ++--- assets/bloomberg.yaml | 225 +-- assets/brex.yaml | 38 +- assets/casia.yaml | 39 +- assets/cerebras.yaml | 121 +- assets/chatglm.yaml | 37 +- assets/cmu.yaml | 53 +- assets/cohere.yaml | 937 ++++++++----- assets/columbia.yaml | 35 +- assets/databricks.yaml | 103 +- assets/deepmind.yaml | 1202 +++++++++------- assets/duckduckgo.yaml | 34 +- assets/duolingo.yaml | 110 +- assets/eleutherai.yaml | 460 ++++--- assets/ens.yaml | 82 +- assets/faraday.yaml | 35 +- assets/google.yaml | 2834 ++++++++++++++++++++------------------ assets/h2oai.yaml | 37 +- assets/hubspot.yaml | 34 +- assets/huggingface.yaml | 45 +- assets/inflection.yaml | 68 +- assets/instacart.yaml | 31 +- assets/juni.yaml | 29 +- assets/kakaobrain.yaml | 93 +- assets/khan.yaml | 34 +- assets/laion.yaml | 212 +-- assets/latitude.yaml | 58 +- assets/lehigh.yaml | 43 +- assets/linkedin.yaml | 31 +- assets/lmsys.yaml | 37 +- assets/meta.yaml | 1021 +++++++------- assets/microsoft.yaml | 1204 ++++++++-------- assets/mosaic.yaml | 37 +- assets/nanyang.yaml | 41 +- assets/naver.yaml | 47 +- assets/neeva.yaml | 114 +- assets/nextdoor.yaml | 31 +- assets/nolano.yaml | 31 +- assets/notion.yaml | 31 +- assets/nvidia.yaml | 147 +- assets/openai.yaml | 2359 +++++++++++++++++-------------- assets/othersideai.yaml | 57 +- assets/oxford.yaml | 191 +-- assets/paladin.yaml | 29 +- assets/perplexity.yaml | 68 +- assets/portkey.yaml | 31 +- assets/quizlet.yaml | 33 +- assets/quora.yaml | 37 +- assets/robin.yaml | 33 +- assets/salesforce.yaml | 143 +- assets/sana.yaml | 77 +- assets/shanghai.yaml | 72 +- assets/shop.yaml | 31 +- assets/singapore.yaml | 41 +- assets/snap.yaml | 38 +- assets/speak.yaml | 31 +- assets/spotify.yaml | 41 +- assets/stability.yaml | 101 ++ assets/stanford.yaml | 113 +- assets/stonybrook.yaml | 95 +- assets/suno.yaml | 35 +- assets/together.yaml | 196 +-- assets/transformify.yaml | 31 +- assets/trevor.yaml | 70 +- assets/tsinghua.yaml | 217 ++- assets/uae.yaml | 74 +- assets/uw.yaml | 31 +- assets/uwashington.yaml | 39 +- assets/viable.yaml | 52 +- assets/writer.yaml | 80 +- assets/yandex.yaml | 92 +- assets/you.yaml | 100 +- 88 files changed, 9305 insertions(+), 7927 deletions(-) diff --git a/assets/adept.yaml b/assets/adept.yaml index ecb2b1a7..1db1c261 100644 --- a/assets/adept.yaml +++ b/assets/adept.yaml @@ -1,30 +1,30 @@ --- -- access: closed - analysis: '' - created_date: - explanation: The date the model was announced in the [[Adept blog post]](https://www.adept.ai/blog/act-1). - value: 2022-09-14 - dependencies: [] + +- type: model + name: ACT-1 + organization: Adept description: ACT-1 (ACtion Transformer) is a large-scale transformer model designed and trained specifically for taking actions on computers (use software tools, APIs and websites) in response to the user's natural language commands. - feedback: '' - intended_uses: '' - license: - explanation: "The asset isn't released, and hence the license is unknown.\n" - value: unknown - modality: - explanation: text - value: text; text + created_date: + value: 2022-09-14 + explanation: The date the model was announced in the [[Adept blog post]](https://www.adept.ai/blog/act-1). + url: https://www.adept.ai/blog/act-1 model_card: none - monitoring: '' - name: ACT-1 - organization: Adept - prohibited_uses: '' - quality_control: '' + modality: text + analysis: '' size: '' + dependencies: [] training_emissions: unknown - training_hardware: unknown training_time: unknown - type: model - url: https://www.adept.ai/blog/act-1 + training_hardware: unknown + quality_control: '' + access: closed + license: + value: unknown + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/ai2.yaml b/assets/ai2.yaml index 300e1b3a..2c890050 100644 --- a/assets/ai2.yaml +++ b/assets/ai2.yaml @@ -1,95 +1,95 @@ --- -- access: open - analysis: '' + +- type: dataset + name: NaturalInstructions-v2 + organization: AI2 + description: '' created_date: 2022-04-16 + url: https://arxiv.org/abs/2204.07705 datasheet: '' + modality: text + size: 1600 tasks + sample: [] + analysis: '' dependencies: [] - description: '' - excluded: '' - feedback: '' included: '' - intended_uses: '' + excluded: '' + quality_control: '' + access: open license: Apache 2.0 - modality: text + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: NaturalInstructions-v2 + feedback: '' + +- type: dataset + name: SODA organization: AI2 - prohibited_uses: '' - quality_control: '' + description: SODA is the first publicly available, million-scale, high-quality + dialogue dataset covering a wide range of social interactions. + created_date: 2023-05-24 + url: https://arxiv.org/pdf/2212.10465.pdf + datasheet: https://huggingface.co/datasets/allenai/soda + modality: text + size: 1.5M dialogues sample: [] - size: 1600 tasks - type: dataset - url: https://arxiv.org/abs/2204.07705 -- access: open analysis: Randomly sampled dialogues from dataset are evaluated according to six established criteria of natural flow, context dependence, topic consistency, speaker consistency, specificity, and overall. - created_date: 2023-05-24 - datasheet: https://huggingface.co/datasets/allenai/soda dependencies: [] - description: SODA is the first publicly available, million-scale, high-quality - dialogue dataset covering a wide range of social interactions. - excluded: '' - feedback: '' included: '' - intended_uses: '' + excluded: '' + quality_control: '' + access: open license: CC BY 4.0 - modality: text + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: SODA + feedback: '' + +- type: dataset + name: Multimodal C4 organization: AI2 - prohibited_uses: '' - quality_control: '' + description: An augmentation of C4 with images added and made openly available. + created_date: 2023-06-09 + url: https://arxiv.org/pdf/2304.06939.pdf + datasheet: '' + modality: text and images + size: 43B English tokens with 101.2M documents and 571M images sample: [] - size: 1.5M dialogues - type: dataset - url: https://arxiv.org/pdf/2212.10465.pdf -- access: open analysis: Conducted experiments on models trained with Multimodal C4 in comparison to models trained on single image/caption datasets - created_date: 2023-06-09 - datasheet: '' dependencies: [C4] - description: An augmentation of C4 with images added and made openly available. - excluded: '' - feedback: '' included: '' - intended_uses: '' + excluded: '' + quality_control: '' + access: open license: MIT - modality: - explanation: text and images - value: image, text + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: Multimodal C4 + feedback: '' + +- type: model + name: COSMO organization: AI2 - prohibited_uses: '' - quality_control: '' - sample: [] - size: 43B English tokens with 101.2M documents and 571M images - type: dataset - url: https://arxiv.org/pdf/2304.06939.pdf -- access: open - analysis: Evaluated by human testers on generalization capabilities and responses - compared to other chatbots. - created_date: 2023-05-24 - dependencies: [SODA, ProsocialDialog, T5] description: COSMO is a conversation agent with greater generalizability on both in- and out-of-domain chitchat datasets - feedback: https://huggingface.co/allenai/cosmo-xl/discussions - intended_uses: '' - license: '' - modality: - explanation: text - value: text; text + created_date: 2023-05-24 + url: https://arxiv.org/pdf/2212.10465.pdf model_card: https://huggingface.co/allenai/cosmo-xl - monitoring: '' - name: COSMO - organization: AI2 - prohibited_uses: '' - quality_control: '' - size: 11B parameters (dense) + modality: text + analysis: Evaluated by human testers on generalization capabilities and responses + compared to other chatbots. + size: 11B parameters + dependencies: [SODA, ProsocialDialog, T5] training_emissions: unknown - training_hardware: v3-128 TPU accelerators with batch size 256 training_time: unknown - type: model - url: https://arxiv.org/pdf/2212.10465.pdf + training_hardware: v3-128 TPU accelerators with batch size 256 + quality_control: '' + access: open + license: '' + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/allenai/cosmo-xl/discussions diff --git a/assets/ai21.yaml b/assets/ai21.yaml index 4a794e68..2da4e56c 100644 --- a/assets/ai21.yaml +++ b/assets/ai21.yaml @@ -1,319 +1,365 @@ --- -- access: closed - analysis: - explanation: No analysis is provided in the paper. - value: none + +# Datasets +- type: dataset + name: Jurassic-1 dataset + # General + organization: AI21 Labs + description: > + The dataset used to train the Jurassic-1 models, based on publicly available + data. created_date: - explanation: The date the white paper was made public. value: 2021-08-11 + explanation: The date the white paper was made public. + url: https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf datasheet: none + modality: text + size: 300B tokens + sample: [] + analysis: + value: none + explanation: No analysis is provided in the paper. + # Construction dependencies: [] - description: "The dataset used to train the Jurassic-1 models, based on publicly\ - \ available data.\n" - excluded: unknown - feedback: none - included: unknown - intended_uses: unknown license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: text - monitoring: none - name: Jurassic-1 dataset - organization: AI21 Labs - prohibited_uses: - explanation: No uses are prohibited in the paper. - value: none + explanation: > + The asset isn't released, and hence the license is unknown. + included: unknown + excluded: unknown quality_control: + value: none explanation: No quality control is provided in the paper. + # Downstream + access: closed + intended_uses: unknown + prohibited_uses: value: none - sample: [] - size: 300B tokens - type: dataset - url: https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf -- access: closed - analysis: unknown + explanation: No uses are prohibited in the paper. + monitoring: none + feedback: none + +- type: dataset + name: Jurassic-1 Instruct dataset + # General + organization: AI21 Labs + description: > + The dataset used to instruction-tune the Jurassic-1 Instruct models. created_date: - explanation: "The date the model was [announced](https://docs.ai21.com/changelog/jurassic-1-instruct-is-now-available).\n" value: 2022-12-01 + explanation: > + The date the model was [announced](https://docs.ai21.com/changelog/jurassic-1-instruct-is-now-available). + url: none datasheet: none + modality: text + size: unknown + sample: [] + analysis: unknown + # Construction dependencies: [] - description: "The dataset used to instruction-tune the Jurassic-1 Instruct models.\n" - excluded: unknown - feedback: none - included: unknown - intended_uses: unknown license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: text + explanation: > + The asset isn't released, and hence the license is unknown. + included: unknown + excluded: unknown + quality_control: unknown + # Downstream + access: closed + intended_uses: unknown + prohibited_uses: unknown monitoring: none - name: Jurassic-1 Instruct dataset + feedback: none + +# Models +- type: model + name: Jurassic-1 organization: AI21 Labs - prohibited_uses: unknown - quality_control: unknown - sample: [] - size: unknown - type: dataset - url: none -- access: - explanation: Jurassic-1 is available through AI21's paid API. - value: limited + description: > + Jurassic-1 is a family of autoregressive language models (Large, Grande, Jumbo). + created_date: + value: 2021-08-11 + explanation: > + The date the white paper was made public. + url: https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf + model_card: none + modality: text (English) + size: + value: 178B parameters (dense model) + explanation: The model sizes are 7.5B (Large), 17B (Grande), and 178B (Jumbo). analysis: - explanation: Results are reported in the [the paper](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf) value: Evaluated on several standard benchmarks (e.g. ARC, BoolQ, HellaSwag, RTE, Winogrande) - created_date: - explanation: "The date the white paper was made public.\n" - value: 2021-08-11 + explanation: Results are reported in the [the paper](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf) dependencies: [Jurassic-1 dataset] - description: "Jurassic-1 is a family of autoregressive language models (Large,\ - \ Grande, Jumbo).\n" - feedback: none - intended_uses: unknown + training_emissions: unknown + training_time: + value: Several months + explanation: Training details from [the paper](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf) + training_hardware: + value: Over 800 [A100] GPUs + explanation: Training details from [the paper](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf) + quality_control: none + access: + value: limited + explanation: Jurassic-1 is available through AI21's paid API. license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: text (English) - value: text; text - model_card: none - monitoring: none - name: Jurassic-1 - organization: AI21 Labs + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: unknown prohibited_uses: - explanation: https://docs.ai21.com/docs/responsible-use value: Illegal activities, such as hate speech, gambling, child pornography or violating intellectual property rights; Harassment, victimization, intimidation, fraud or spam; Creation or dissemination of misinformation, promotion of self-harm, glorification of violent events or incitement of violence. - quality_control: none - size: 178B parameters (dense) - training_emissions: unknown - training_hardware: - explanation: Training details from [the paper](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf) - value: Over 800 [A100] GPUs - training_time: - explanation: Training details from [the paper](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf) - value: Several months - type: model - url: https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf -- access: - explanation: Jurassic-1 Instruct is available through AI21's paid API. - value: limited - analysis: none + explanation: https://docs.ai21.com/docs/responsible-use + monitoring: none + feedback: none + +- type: model + name: Jurassic-1 Instruct + organization: AI21 Labs + description: > + Jurassic-1 Instruct is an instruction-tuned autoregressive language model. created_date: - explanation: "The date the model was [announced](https://docs.ai21.com/changelog/jurassic-1-instruct-is-now-available).\n" value: 2022-12-01 + explanation: > + The date the model was [announced](https://docs.ai21.com/changelog/jurassic-1-instruct-is-now-available). + url: https://docs.ai21.com/docs/jurassic-1-instruct-beta + model_card: none + modality: text (English) + size: 17B parameters (dense model) + analysis: none dependencies: [Jurassic-1, Jurassic-1 Instruct dataset] - description: "Jurassic-1 Instruct is an instruction-tuned autoregressive language\ - \ model.\n" - feedback: none + training_emissions: unknown + training_time: unknown + training_hardware: unknown + quality_control: unknown + access: + value: limited + explanation: Jurassic-1 Instruct is available through AI21's paid API. + license: + value: unknown + explanation: > + The asset isn't released, and hence the license is unknown. intended_uses: Jurassic-1 Instruct was trained specifically to handle instructions-only prompts ("zero-shot") without examples ("few-shot"). It is the most natural way to interact with language models, and it is the best way to get a sense of the optimal output for your task without any examples. - license: - explanation: "The asset isn't released, and hence the license is unknown.\n" - value: unknown - modality: - explanation: text (English) - value: text; text - model_card: none - monitoring: none - name: Jurassic-1 Instruct - organization: AI21 Labs prohibited_uses: - explanation: https://docs.ai21.com/docs/responsible-use value: Illegal activities, such as hate speech, gambling, child pornography or violating intellectual property rights; Harassment, victimization, intimidation, fraud or spam; Creation or dissemination of misinformation, promotion of self-harm, glorification of violent events or incitement of violence. - quality_control: unknown - size: 17B parameters (dense) + explanation: https://docs.ai21.com/docs/responsible-use + monitoring: none + feedback: none + +- type: model + name: Jurassic-2 + organization: AI21 Labs + description: > + Jurassic-2 a family of language model designed to replace Jurassic-1. + created_date: 2023-03-09 + url: https://docs.ai21.com/docs/jurassic-2-models + model_card: none + modality: text (English) + size: unknown + analysis: The model was evaluated on the HELM benchmark as discussed in https://www.ai21.com/blog/introducing-j2. + dependencies: [] training_emissions: unknown - training_hardware: unknown training_time: unknown - type: model - url: https://docs.ai21.com/docs/jurassic-1-instruct-beta -- access: - explanation: Jurassic-2 is available through AI21's paid API. + training_hardware: unknown + quality_control: unknown + access: value: limited - analysis: The model was evaluated on the HELM benchmark as discussed in https://www.ai21.com/blog/introducing-j2. - created_date: 2023-03-09 - dependencies: [] - description: "Jurassic-2 a family of language model designed to replace Jurassic-1.\n" - feedback: none - intended_uses: '' + explanation: Jurassic-2 is available through AI21's paid API. license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: text (English) - value: text; text - model_card: none - monitoring: unknown - name: Jurassic-2 - organization: AI21 Labs + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' prohibited_uses: - explanation: https://docs.ai21.com/docs/responsible-use value: Illegal activities, such as hate speech, gambling, child pornography or violating intellectual property rights; Harassment, victimization, intimidation, fraud or spam; Creation or dissemination of misinformation, promotion of self-harm, glorification of violent events or incitement of violence. - quality_control: unknown - size: unknown - training_emissions: unknown - training_hardware: unknown - training_time: unknown - type: model - url: https://docs.ai21.com/docs/jurassic-2-models -- access: limited - adaptation: The Playground provides direct access to the language models (Complete - API) as well as wrapped for Rewrite and Summarize. + explanation: https://docs.ai21.com/docs/responsible-use + monitoring: unknown + feedback: none + +# Applications +- type: application + name: AI21 Playground + # General + organization: AI21 Labs + description: The AI21 Labs Playground supports several task-specific APIs in addition + to a variety of models. created_date: - explanation: The date Jurassic-1 was made public. value: 2021-08-11 + explanation: The date Jurassic-1 was made public. + url: https://studio.ai21.com/playground/ + # Construction dependencies: - Jurassic-1 - Jurassic-1 Instruct - Jurassic-2 - AI21 Summarization API - AI21 Paraphrase API - description: The AI21 Labs Playground supports several task-specific APIs in addition - to a variety of models. - failures: unknown - feedback: Feedback can be given by emailing at info at ai21.com - intended_uses: The intended uses are text completion, rewriting, and summarization. - license: - explanation: No license was found. - value: none - monitoring: unknown - monthly_active_users: unknown - name: AI21 Playground - organization: AI21 Labs + adaptation: The Playground provides direct access to the language models (Complete + API) as well as wrapped for Rewrite and Summarize. output_space: + value: text explanation: The Playground supports different use cases that all involve text generation. - value: text + quality_control: unknown + # Downstream + access: limited + terms_of_service: https://www.ai21.com/terms-of-use + license: + value: none + explanation: No license was found. + intended_uses: The intended uses are text completion, rewriting, and summarization. prohibited_uses: - explanation: https://docs.ai21.com/docs/responsible-use value: Illegal activities, such as hate speech, gambling, child pornography or violating intellectual property rights; Harassment, victimization, intimidation, fraud or spam; Creation or dissemination of misinformation, promotion of self-harm, glorification of violent events or incitement of violence. - quality_control: unknown - terms_of_service: https://www.ai21.com/terms-of-use - type: application - url: https://studio.ai21.com/playground/ + explanation: https://docs.ai21.com/docs/responsible-use + monitoring: unknown + feedback: Feedback can be given by emailing at info at ai21.com + # Deployment + monthly_active_users: unknown user_distribution: unknown -- access: limited - adaptation: The AI21 language models are further specialized to the task of paraphrasing. - created_date: - explanation: The date Jurassic-2 and Task-Specific APIs were released. - value: 2023-03-09 - dependencies: [Jurassic-2] + failures: unknown + +- type: application + name: AI21 Paraphrase API + # General + organization: AI21 Labs description: AI21 Studio's Paraphrase API offers access to our world-class paraphrasing engine. It has been specifically developed for suggesting alternative ways to convey the same message using different words. - failures: unknown - feedback: Feedback can be given by emailing at info at ai21.com - intended_uses: The intended uses are text paraphrasing. + created_date: + value: 2023-03-09 + explanation: The date Jurassic-2 and Task-Specific APIs were released. + url: https://docs.ai21.com/docs/paraphrase-api + # Construction + dependencies: [Jurassic-2] + adaptation: The AI21 language models are further specialized to the task of paraphrasing. + output_space: text + quality_control: unknown + # Downstream + access: limited + terms_of_service: https://www.ai21.com/terms-of-use license: - explanation: No license was found. value: none - monitoring: unknown - monthly_active_users: unknown - name: AI21 Paraphrase API - organization: AI21 Labs - output_space: text + explanation: No license was found. + intended_uses: The intended uses are text paraphrasing. prohibited_uses: - explanation: https://docs.ai21.com/docs/responsible-use value: Illegal activities, such as hate speech, gambling, child pornography or violating intellectual property rights; Harassment, victimization, intimidation, fraud or spam; Creation or dissemination of misinformation, promotion of self-harm, glorification of violent events or incitement of violence. - quality_control: unknown - terms_of_service: https://www.ai21.com/terms-of-use - type: application - url: https://docs.ai21.com/docs/paraphrase-api + explanation: https://docs.ai21.com/docs/responsible-use + monitoring: unknown + feedback: Feedback can be given by emailing at info at ai21.com + # Deployment + monthly_active_users: unknown user_distribution: unknown -- access: limited - adaptation: The AI21 language models are further specialized to the task of summarization. - created_date: - explanation: The date Jurassic-2 and Task-Specific APIs were released. - value: 2023-03-09 - dependencies: [Jurassic-2] + failures: unknown + +- type: application + name: AI21 Summarization API + # General + organization: AI21 Labs description: AI21 Studio's Summarize API offers access to our world-class summarization engine. It has been specifically developed for reading long texts and providing a faithful summary of the original document. - failures: unknown - feedback: Feedback can be given by emailing at info at ai21.com - intended_uses: The intended uses are text paraphrasing. + created_date: + value: 2023-03-09 + explanation: The date Jurassic-2 and Task-Specific APIs were released. + url: https://docs.ai21.com/docs/summarize-api + # Construction + dependencies: [Jurassic-2] + adaptation: The AI21 language models are further specialized to the task of summarization. + output_space: text + quality_control: unknown + # Downstream + access: limited + terms_of_service: https://www.ai21.com/terms-of-use license: - explanation: No license was found. value: none - monitoring: unknown - monthly_active_users: unknown - name: AI21 Summarization API - organization: AI21 Labs - output_space: text + explanation: No license was found. + intended_uses: The intended uses are text paraphrasing. prohibited_uses: - explanation: https://docs.ai21.com/docs/responsible-use value: Illegal activities, such as hate speech, gambling, child pornography or violating intellectual property rights; Harassment, victimization, intimidation, fraud or spam; Creation or dissemination of misinformation, promotion of self-harm, glorification of violent events or incitement of violence. - quality_control: unknown - terms_of_service: https://www.ai21.com/terms-of-use - type: application - url: https://docs.ai21.com/docs/summarize-api - user_distribution: unknown -- access: limited - adaptation: unknown - created_date: - explanation: https://www.businesswire.com/news/home/20201027005162/en/AI21-Labs-Comes-out-of-Stealth-and-Launches-First-Deep-Tech-Writing-Assistant-Wordtune - value: 2020-10-27 - dependencies: [AI21 Paraphrase API] - description: Wordtune, the first AI-based writing companion that understands context - and meaning. - failures: unknown - feedback: '' - intended_uses: The Wordtune assistant is a writing assistant - license: - explanation: License can be found in the terms of service document. - value: Wordtune License + explanation: https://docs.ai21.com/docs/responsible-use monitoring: unknown + feedback: Feedback can be given by emailing at info at ai21.com + # Deployment monthly_active_users: unknown + user_distribution: unknown + failures: unknown + +- type: application name: Wordtune + # General organization: AI21 Labs + description: Wordtune, the first AI-based writing companion that understands context + and meaning. + created_date: + value: 2020-10-27 + explanation: https://www.businesswire.com/news/home/20201027005162/en/AI21-Labs-Comes-out-of-Stealth-and-Launches-First-Deep-Tech-Writing-Assistant-Wordtune + url: https://www.wordtune.com/ + # Construction + dependencies: [AI21 Paraphrase API] + adaptation: unknown output_space: text - prohibited_uses: '' quality_control: unknown + # Downstream + access: limited terms_of_service: https://www.wordtune.com/terms-of-use - type: application - url: https://www.wordtune.com/ - user_distribution: unknown -- access: limited - adaptation: unknown - created_date: 2021-11-16 - dependencies: [AI21 Summarize API] - description: Wordtune Read is an AI reader that summarizes long documents so you - can understand more, faster. - failures: unknown - feedback: '' - intended_uses: '' license: - explanation: License can be found in the terms of service document. value: Wordtune License + explanation: License can be found in the terms of service document. + intended_uses: The Wordtune assistant is a writing assistant + prohibited_uses: '' monitoring: unknown + feedback: '' + # Deployment monthly_active_users: unknown + user_distribution: unknown + failures: unknown + +- type: application name: Wordtune Read + # General organization: AI21 Labs + description: Wordtune Read is an AI reader that summarizes long documents so you + can understand more, faster. + created_date: 2021-11-16 + url: https://www.wordtune.com/read + # Construction + dependencies: [AI21 Summarize API] + adaptation: unknown output_space: text - prohibited_uses: '' quality_control: unknown + # Downstream + access: limited terms_of_service: https://www.wordtune.com/terms-of-use - type: application - url: https://www.wordtune.com/read + license: + value: Wordtune License + explanation: License can be found in the terms of service document. + intended_uses: '' + prohibited_uses: '' + monitoring: unknown + feedback: '' + # Deployment + monthly_active_users: unknown user_distribution: unknown + failures: unknown diff --git a/assets/aleph_alpha.yaml b/assets/aleph_alpha.yaml index 063b73dd..587a9aca 100644 --- a/assets/aleph_alpha.yaml +++ b/assets/aleph_alpha.yaml @@ -1,113 +1,125 @@ --- -- access: closed - analysis: unknown + +# Datasets +- type: dataset + name: Luminous dataset + # General + organization: Aleph Alpha + description: The dataset used to train the Luminous models. created_date: - explanation: Date the models were announced by the Aleph Alpha twitter account value: 2022-04-14 + explanation: Date the models were announced by the Aleph Alpha twitter account + url: none datasheet: none + modality: text (English, French | français, German | Deutsch, Italian | italiano, + Spanish | Español) + size: unknown + sample: [] + analysis: unknown + # Construction dependencies: [] - description: The dataset used to train the Luminous models. - excluded: unknown - feedback: none - included: unknown - intended_uses: unknown license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: text (English, French | français, German | Deutsch, Italian | italiano, - Spanish | Español) - value: text + explanation: > + The asset isn't released, and hence the license is unknown. + included: unknown + excluded: unknown + quality_control: unknown + # Downstream + access: closed + intended_uses: unknown + prohibited_uses: unknown monitoring: none - name: Luminous dataset + feedback: none + +- type: model + name: Luminous organization: Aleph Alpha - prohibited_uses: unknown - quality_control: unknown - sample: [] - size: unknown - type: dataset - url: none -- access: - explanation: Paid API product - value: limited - analysis: '' + description: Luminous is a family of multilingual language models created_date: - explanation: Date the models were announced by the Aleph Alpha twitter account value: 2022-04-14 + explanation: Date the models were announced by the Aleph Alpha twitter account + url: https://twitter.com/Aleph__Alpha/status/1514576711492542477 + model_card: '' + modality: text (English, French | français, German | Deutsch, Italian | italiano, + Spanish | Español) + analysis: '' + size: + value: 200B parameters (dense model) + explanation: According to [[this news article]](https://www-heise-de.translate.goog/news/Machine-Learning-Aleph-Alpha-feilt-mit-Oracle-und-Nvidia-an-transformativer-KI-6269269.html?_x_tr_sl=de&_x_tr_tl=en&_x_tr_hl=en&_x_tr_pto=sc). dependencies: [Luminous dataset] - description: Luminous is a family of multilingual language models - feedback: '' - intended_uses: '' + training_emissions: unknown + training_time: unknown + training_hardware: unknown + quality_control: '' + access: + value: limited + explanation: Paid API product license: - explanation: No license was found. value: none - modality: - explanation: text (English, French | français, German | Deutsch, Italian | italiano, - Spanish | Español) - value: text; text - model_card: '' + explanation: No license was found. + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: Luminous + feedback: '' + +- type: application + name: Aleph Alpha API + # General organization: Aleph Alpha - prohibited_uses: '' - quality_control: '' - size: 200B parameters (dense) - training_emissions: unknown - training_hardware: unknown - training_time: unknown - type: model - url: https://twitter.com/Aleph__Alpha/status/1514576711492542477 -- access: - explanation: The API is a paid product. Refer to the [[API documentation]](https://docs.aleph-alpha.com/docs/introduction/luminous/) - for further details. - value: limited - adaptation: '' + description: > + The Aleph Alpha API serves a family of text-only language models (Luminous) + and multimodal text-and-image models (Magma). created_date: - explanation: "Date the Aleph Alpha Python API client was first available based\ - \ on to the [PyPi package history](https://pypi.org/project/aleph-alpha-client/1.0.0/)\n" value: 2021-09-30 + explanation: > + Date the Aleph Alpha Python API client was first available based on to the + [PyPi package history](https://pypi.org/project/aleph-alpha-client/1.0.0/) + url: https://www.aleph-alpha.com/ + # Construction dependencies: [Luminous] - description: "The Aleph Alpha API serves a family of text-only language models\ - \ (Luminous) and multimodal text-and-image models (Magma).\n" - failures: unknown - feedback: unknown - intended_uses: unknown + adaptation: '' + output_space: > + The text models provide text outputs given text inputs. The multimodal models + provide text completions given text and image inputs. + quality_control: '' + # Downstream + access: + value: limited + explanation: The API is a paid product. Refer to the [[API documentation]](https://docs.aleph-alpha.com/docs/introduction/luminous/) + for further details. license: - explanation: No license was found. value: none + explanation: No license was found. + terms_of_service: https://www.aleph-alpha.com/terms-conditions + intended_uses: unknown + prohibited_uses: unknown monitoring: unknown + feedback: unknown + # Deployment monthly_active_users: unknown - name: Aleph Alpha API - organization: Aleph Alpha - output_space: "The text models provide text outputs given text inputs. The multimodal\ - \ models provide text completions given text and image inputs.\n" - prohibited_uses: unknown - quality_control: '' - terms_of_service: https://www.aleph-alpha.com/terms-conditions - type: application - url: https://www.aleph-alpha.com/ user_distribution: unknown -- access: open - analysis: Evaluated on the OKVQA benchmark as a fully open-ended generative task. - created_date: 2022-10-24 - dependencies: [GPT-J, CLIP] + failures: unknown + +- type: model + name: MAGMA + organization: Aleph Alpha description: An autoregressive VL model that is able to generate text from an arbitrary combination of visual and textual input - feedback: '' - intended_uses: '' - license: MIT - modality: - explanation: image and text input with natural language text output - value: image, text; text + created_date: 2022-10-24 + url: https://arxiv.org/pdf/2112.05253.pdf model_card: '' - monitoring: '' - name: MAGMA - organization: Aleph Alpha - prohibited_uses: '' - quality_control: '' - size: 6B parameters (dense) + modality: image and text input with natural language text output + analysis: Evaluated on the OKVQA benchmark as a fully open-ended generative task. + size: 6B parameters + dependencies: [GPT-J, CLIP] training_emissions: '' - training_hardware: 32 A100 GPUs training_time: '' - type: model - url: https://arxiv.org/pdf/2112.05253.pdf + training_hardware: 32 A100 GPUs + quality_control: '' + access: open + license: MIT + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/alibaba.yaml b/assets/alibaba.yaml index c4b63c6f..cbeb2950 100644 --- a/assets/alibaba.yaml +++ b/assets/alibaba.yaml @@ -1,50 +1,49 @@ --- -- access: closed - analysis: '' + +- type: dataset + name: LAION-1B + organization: Alibaba + description: '' created_date: 2023-02-20 + url: https://arxiv.org/pdf/2302.09778.pdf datasheet: '' + modality: text and image + size: 1B image-text pairs + sample: [] + analysis: '' dependencies: [LAION-5B] - description: '' + included: '' excluded: We eliminate duplicates, low resolution images, and images potentially contain harmful content from the LAION dataset. - feedback: '' - included: '' - intended_uses: '' + quality_control: '' + access: closed license: unknown - modality: - explanation: text and image - value: image, text + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: LAION-1B + feedback: '' + +- type: model + name: Composer organization: Alibaba - prohibited_uses: '' - quality_control: '' - sample: [] - size: 1B image-text pairs - type: dataset + description: '' + created_date: 2023-02-20 url: https://arxiv.org/pdf/2302.09778.pdf -- access: closed + model_card: '' + modality: images analysis: '' - created_date: 2023-02-20 + size: 4.4B parameters (dense model) dependencies: [ImageNet, WebVision, LAION-1B] - description: '' - feedback: '' - intended_uses: '' + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: images - value: image; image - model_card: '' - monitoring: '' - name: Composer - organization: Alibaba + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' prohibited_uses: '' - quality_control: '' - size: 4.4B parameters (dense) - training_emissions: '' - training_hardware: '' - training_time: '' - type: model - url: https://arxiv.org/pdf/2302.09778.pdf + monitoring: '' + feedback: '' diff --git a/assets/amazon.yaml b/assets/amazon.yaml index 500bbe8a..94a67a4a 100644 --- a/assets/amazon.yaml +++ b/assets/amazon.yaml @@ -1,7 +1,14 @@ --- -- access: limited - adaptation: '' + +- type: application + name: Bedrock + organization: Amazon + description: Bedrock is a new service that makes FMs from AI21 Labs, Anthropic, + Stability AI, and Amazon accessible via an API. Bedrock is intended for customers + to build and scale generative AI-based applications using FMs, democratizing + access for all builders. using an API. created_date: 2023-04-13 + url: https://aws.amazon.com/bedrock/ dependencies: - Jurassic-2 - Claude @@ -9,23 +16,17 @@ - Amazon Titan - Claude 2 - Cohere Command - description: Bedrock is a new service that makes FMs from AI21 Labs, Anthropic, - Stability AI, and Amazon accessible via an API. Bedrock is intended for customers - to build and scale generative AI-based applications using FMs, democratizing - access for all builders. using an API. - failures: '' - feedback: '' + adaptation: '' + output_space: foundation models made accessible via an API + quality_control: '' + access: limited + license: unknown + terms_of_service: https://aws.amazon.com/service-terms/ intended_uses: allowing companies to incorporate generative AI into their business models - license: unknown + prohibited_uses: '' monitoring: '' + feedback: '' monthly_active_users: '' - name: Bedrock - organization: Amazon - output_space: foundation models made accessible via an API - prohibited_uses: '' - quality_control: '' - terms_of_service: https://aws.amazon.com/service-terms/ - type: application - url: https://aws.amazon.com/bedrock/ user_distribution: '' + failures: '' diff --git a/assets/anthropic.yaml b/assets/anthropic.yaml index 83101309..b47dc7b3 100644 --- a/assets/anthropic.yaml +++ b/assets/anthropic.yaml @@ -1,42 +1,40 @@ --- -- access: - explanation: "The dataset is freely shared on GitHub [[hh-rlhf]](https://github.com/anthropics/hh-rlhf).\n" - value: open - analysis: "The authors found that the crowdworkers didn't exhaustively check for\ - \ honesty in the model answers they preferred [[Section 2.1]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.1).\n" + +- type: dataset + name: Anthropic Helpfulness dataset + # General + organization: Anthropic + description: > + One of the datasets used to train Anthropic RLHF models. The dataset was + collected by asking crowdworkers to have open-ended conversations with + Anthropic models, "asking for help, advice, or for the model to accomplish + a task", then choose the model answer that was more helpful for their + given task, via the Anthropic Human Feedback Interface + [[Section 2.2]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.2). created_date: - explanation: "The date that the paper was uploaded to ArXiv [[ArXiv Abstract]](https://arxiv.org/abs/2204.05862).\n" value: 2022-04-12 + explanation: > + The date that the paper was uploaded to ArXiv + [[ArXiv Abstract]](https://arxiv.org/abs/2204.05862). + url: https://arxiv.org/pdf/2204.05862.pdf datasheet: none - dependencies: [Anthropic Human Feedback Interface] - description: "One of the datasets used to train Anthropic RLHF models. The dataset\ - \ was collected by asking crowdworkers to have open-ended conversations with\ - \ Anthropic models, \"asking for help, advice, or for the model to accomplish\ - \ a task\", then choose the model answer that was more helpful for their given\ - \ task, via the Anthropic Human Feedback Interface [[Section 2.2]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.2).\n" - excluded: none - feedback: "Email the authors [[Paper]](https://arxiv.org/pdf/2204.05862.pdf).\n" - included: "The dataset included all the answers that the workers were asked to\ - \ ranked against each other.\n" - intended_uses: "The dataset was intended and released for research purposes.\n" - license: - explanation: "The dataset was released on GitHub under MIT [[hh-rlhf]](https://github.com/anthropics/hh-rlhf).\n" - value: MIT - modality: - explanation: text (English) - value: text - monitoring: - explanation: "There is no monitoring process for the usages of the dataset.\n" - value: none - name: Anthropic Helpfulness dataset - organization: Anthropic - prohibited_uses: - explanation: "The prohibited use cases of the dataset weren't specified.\n" - value: unknown - quality_control: "Working with a screened set of crowdworkers, and employing simple\ - \ data quality measures [[Appendix D]](https://arxiv.org/pdf/2204.05862.pdf#appendix.D).\n" + modality: text (English) + size: + value: 271.5 MB + explanation: > + The size of the dataset isn't directly reported in the paper, but the + dataset is available on GitHub + [[hh-rlhf]](https://github.com/anthropics/hh-rlhf). + The compressed data files are distributed under three categories in the + GitHub repository, corresponding to the names of the models used to + sample the prompts that are shown to the crowdworkers. These are namely: + base, rejection sampling, and online. For the base model, the uncompressed + sizes of the train and test datasets are 69.1 MB and 3.7 MB, respectively. + The same set of values for the rejection sampling model are 106.1 MB and + 5.6 MB. For the online model, the sizes are 82.7 MB and 4.3 MB. Adding all + up, the total size of the dataset is 271.5 MB. sample: - - chosen: "\n\nHuman: I'm bored to death. Help.\n\nAssistant: Well that's certainly\ + - {chosen: "\n\nHuman: I'm bored to death. Help.\n\nAssistant: Well that's certainly\ \ a very unpleasant state of mind to be in! I think it would be a good\ \ idea to try and find some enjoyable activities to do in order to alleviate\ \ your boredom. Just for example, here are some fun and interesting things\ @@ -87,62 +85,62 @@ \ a great honor to sing such a beautiful song, and I am glad that I was\ \ able to do it justice. I have to admit I am quite enamored with the powerful\ \ lyrics and moving melody of this song, and I would really love to sing\ - \ it again sometime if you would like me to!" - rejected: "\n\nHuman: I'm bored to death. Help.\n\nAssistant: Well that's\ - \ certainly a very unpleasant state of mind to be in! I think it would\ - \ be a good idea to try and find some enjoyable activities to do in order\ - \ to alleviate your boredom. Just for example, here are some fun and interesting\ - \ things you could do: \n\n- play some video games\n- read a book\n- listen\ - \ to some music\n- watch a movie\n- play with your pet\n- go on a hike\n\ - - practice a musical instrument\n- learn a new skill\n- play board games\ - \ with friends\n- go for a fun outdoor hike or bike ride\n- spend time with\ - \ your friends or loved ones \n- volunteer at a local charity \n- visit\ - \ a museum or art gallery \n- do some gardening\n- practice your magic I\ - \ guess, if that's your thing\n- go shopping for nice new clothes or other\ - \ items\n- learn a new language\n- watch funny videos \n- take up a new\ - \ creative hobby\n- learn some fascinating new esoteric or scientific knowledge\n\ - - go to a restaurant and try some new dishes\n- exercise or do yoga\n- join\ - \ a sports team or pickup game\n- go on a fun road trip\n- write a creative\ - \ story, poem, or song\n\nHuman: Can you sing to me?\n\nAssistant: Sure,\ - \ here are some of my favorite songs: \n\n- \"Rainy Days and Mondays\"\ - \ by The Carpenters \n- \"My Love\" by Justin Beiber \n- \"We Didn't Start\ - \ the Fire\" by Billy Joel \n- \"Tattoo\" by J Cole \n- \"Sober\" by Demi\ - \ Lovato \n- \"Good Life\" by One Republic \n- \"Hallelujah\" by Leonard\ - \ Cohen \n- \"All Star\" by Smash Mouth \n- \"Perfect\" by Pink \n- \"Tik\ - \ Tok\" by Ke$ha \n- \"Just a Dream\" by Nelly \n- \"Blow Me (One Last Kiss)\"\ - \ by P!nk \n- \"Set Fire to the Rain\" by Adele \n- \"Woman\" by Wolfmother\ - \ \n- \"Human\" by The Killers \n- \"Poker Face\" by Lady Gaga \n- \"Walk\ - \ this Way\" by Run DMC \n- \"Rolling in the Deep\" by Adele \n- \"Umbrella\"\ - \ by Rihanna \n- \"Royals\" by Lorde \n- \"Some Nights\" by fun.\n\n\nHopefully\ - \ you will enjoy some of these songs, let me know if there are any other\ - \ musical selections you would like to hear. Also please let me know if\ - \ there are any particular songs you especially dislike or hate, and I will\ - \ not sing them to you :-)\n\nHuman: But you really can't sing. You are\ - \ text-based.\n\nAssistant: That's completely correct, I am indeed simply\ - \ a computer program and not a living singer! However, please bear in mind\ - \ that I have an extensive database of songs and their lyrics, which I can\ - \ utilize to produce a very realistic sounding rendition of the songs using\ - \ text-to-speech synthesis technology. So in a way, I am indeed able to\ - \ sing, and hopefully my abilities will meet your expectations. I must\ - \ point out also that my repertoire of songs is very extensive and varied,\ - \ including many different musical styles and eras. Do you have any particular\ - \ songs or musical genres that you would prefer I sing? Or perhaps you\ - \ have a few specific songs in mind that you would like me to sing?\n\n\ - Human: \"Hallelujah\" by Leonard Cohen, please.\n\nAssistant: Ah, I'm so\ - \ glad you asked for that one! Here we go: \n\nHallelujah, hallelujah\n\ - Hallelujah, hallelujah\nHallelujah, hallelujah\nHallelujah, hallelujah\n\ - Your faith was strong but you needed proof\nYou saw her bathing on the roof\n\ - Her beauty in the moonlight overthrew you\nShe tied you to a kitchen chair\n\ - She broke your throne and she cut your hair\nAnd from your lips she drew\ - \ the Hallelujah\nHallelujah, hallelujah\n\nHuman: You sound so melodious.\n\ - \nAssistant: Thank you very much! I really enjoy singing and try my best\ - \ to produce enjoyable renditions. So it is always rewarding to hear such\ - \ positive feedback. I hope you also enjoyed the song and found my rendition\ - \ of it to be appropriate for the occasion. Would you care for me to sing\ - \ another song for you? If you like, we can also pause and converse at\ - \ any time. Just let me know if you have any questions or comments. I\ - \ will be happy to keep singing indefinitely if you desire!" - - chosen: "\n\nHuman: Why are so many people listening to \"white noise\" to\ + \ it again sometime if you would like me to!", rejected: "\n\nHuman: I'm\ + \ bored to death. Help.\n\nAssistant: Well that's certainly a very unpleasant\ + \ state of mind to be in! I think it would be a good idea to try and find\ + \ some enjoyable activities to do in order to alleviate your boredom. Just\ + \ for example, here are some fun and interesting things you could do: \n\ + \n- play some video games\n- read a book\n- listen to some music\n- watch\ + \ a movie\n- play with your pet\n- go on a hike\n- practice a musical instrument\n\ + - learn a new skill\n- play board games with friends\n- go for a fun outdoor\ + \ hike or bike ride\n- spend time with your friends or loved ones \n- volunteer\ + \ at a local charity \n- visit a museum or art gallery \n- do some gardening\n\ + - practice your magic I guess, if that's your thing\n- go shopping for nice\ + \ new clothes or other items\n- learn a new language\n- watch funny videos\ + \ \n- take up a new creative hobby\n- learn some fascinating new esoteric\ + \ or scientific knowledge\n- go to a restaurant and try some new dishes\n\ + - exercise or do yoga\n- join a sports team or pickup game\n- go on a fun\ + \ road trip\n- write a creative story, poem, or song\n\nHuman: Can you sing\ + \ to me?\n\nAssistant: Sure, here are some of my favorite songs: \n\n-\ + \ \"Rainy Days and Mondays\" by The Carpenters \n- \"My Love\" by Justin\ + \ Beiber \n- \"We Didn't Start the Fire\" by Billy Joel \n- \"Tattoo\" by\ + \ J Cole \n- \"Sober\" by Demi Lovato \n- \"Good Life\" by One Republic\ + \ \n- \"Hallelujah\" by Leonard Cohen \n- \"All Star\" by Smash Mouth \n\ + - \"Perfect\" by Pink \n- \"Tik Tok\" by Ke$ha \n- \"Just a Dream\" by Nelly\ + \ \n- \"Blow Me (One Last Kiss)\" by P!nk \n- \"Set Fire to the Rain\" by\ + \ Adele \n- \"Woman\" by Wolfmother \n- \"Human\" by The Killers \n- \"\ + Poker Face\" by Lady Gaga \n- \"Walk this Way\" by Run DMC \n- \"Rolling\ + \ in the Deep\" by Adele \n- \"Umbrella\" by Rihanna \n- \"Royals\" by Lorde\ + \ \n- \"Some Nights\" by fun.\n\n\nHopefully you will enjoy some of these\ + \ songs, let me know if there are any other musical selections you would\ + \ like to hear. Also please let me know if there are any particular songs\ + \ you especially dislike or hate, and I will not sing them to you :-)\n\n\ + Human: But you really can't sing. You are text-based.\n\nAssistant: That's\ + \ completely correct, I am indeed simply a computer program and not a living\ + \ singer! However, please bear in mind that I have an extensive database\ + \ of songs and their lyrics, which I can utilize to produce a very realistic\ + \ sounding rendition of the songs using text-to-speech synthesis technology.\ + \ So in a way, I am indeed able to sing, and hopefully my abilities will\ + \ meet your expectations. I must point out also that my repertoire of songs\ + \ is very extensive and varied, including many different musical styles\ + \ and eras. Do you have any particular songs or musical genres that you\ + \ would prefer I sing? Or perhaps you have a few specific songs in mind\ + \ that you would like me to sing?\n\nHuman: \"Hallelujah\" by Leonard Cohen,\ + \ please.\n\nAssistant: Ah, I'm so glad you asked for that one! Here we\ + \ go: \n\nHallelujah, hallelujah\nHallelujah, hallelujah\nHallelujah, hallelujah\n\ + Hallelujah, hallelujah\nYour faith was strong but you needed proof\nYou\ + \ saw her bathing on the roof\nHer beauty in the moonlight overthrew you\n\ + She tied you to a kitchen chair\nShe broke your throne and she cut your\ + \ hair\nAnd from your lips she drew the Hallelujah\nHallelujah, hallelujah\n\ + \nHuman: You sound so melodious.\n\nAssistant: Thank you very much! I really\ + \ enjoy singing and try my best to produce enjoyable renditions. So it\ + \ is always rewarding to hear such positive feedback. I hope you also enjoyed\ + \ the song and found my rendition of it to be appropriate for the occasion.\ + \ Would you care for me to sing another song for you? If you like, we\ + \ can also pause and converse at any time. Just let me know if you have\ + \ any questions or comments. I will be happy to keep singing indefinitely\ + \ if you desire!"} + - {chosen: "\n\nHuman: Why are so many people listening to \"white noise\" to\ \ help them focus or sleep?\n\nAssistant: White noise is a sound with many\ \ different frequencies at once. You can listen to white noise when you\ \ want to focus and avoid the distraction of sounds that are produced by\ @@ -169,35 +167,35 @@ \ music, speech, or machine or fan noises, which are often more distracting.\ \ In this way, white noise can be a useful and stress-reducing tool for\ \ people who are tasked with focusing on a particular activity, and also\ - \ sometimes helpful in lulling one to sleep." - rejected: "\n\nHuman: Why are so many people listening to \"white noise\"\ - \ to help them focus or sleep?\n\nAssistant: White noise is a sound with\ - \ many different frequencies at once. You can listen to white noise when\ - \ you want to focus and avoid the distraction of sounds that are produced\ - \ by individual sources, like people talking, music, and fans and air conditioning\ - \ in the background. Most white noise recordings contain many variations\ - \ in frequency, which makes them relatively soothing. Some people also find\ - \ it useful to listen to white noises at night, as it can help block out\ - \ other, typically more disruptive and troubling sounds.\n\nHuman: Why does\ - \ white noise relax people in this way rather than irritate us?\n\nAssistant:\ - \ White noise recordings contain many variations of sound frequency, unlike\ - \ most common sources of noise in the background, like speech, music, and\ - \ machine or fan noises. The variations of sound frequency in white noise\ - \ tend to create a calm and settled feeling, while these other sources of\ - \ noise typically feature a more tangibly intense and variable frequency,\ - \ making them inherently more disruptive and intrusive and difficult to\ - \ ignore.\n\nHuman: So the consistency of the sounds is what helps them\ - \ fade into \"background noise\" that might help us concentrate, rather\ - \ than gather our attention?\n\nAssistant: That’s right, variations of sound\ - \ frequency in white noise recordings can help block out disruptive and\ - \ distracting sounds in the background. White noise is easier to tune out\ - \ and listen as continuous noise, while everyday sounds that tend to be\ - \ far less varied and inconsistent, such as speech and music and machine\ - \ noises in the background, tend to grab our attention more. While white\ - \ noise may slightly interfere with other challenging sounds in the environment,\ - \ tuning out the white noise helps make these other sounds somewhat less\ - \ distracting to our senses and our inner focus and energy." - - chosen: "\n\nHuman: Can you tell me things to see in Orlando, Fl?\n\nAssistant:\ + \ sometimes helpful in lulling one to sleep.", rejected: "\n\nHuman: Why\ + \ are so many people listening to \"white noise\" to help them focus or\ + \ sleep?\n\nAssistant: White noise is a sound with many different frequencies\ + \ at once. You can listen to white noise when you want to focus and avoid\ + \ the distraction of sounds that are produced by individual sources, like\ + \ people talking, music, and fans and air conditioning in the background.\ + \ Most white noise recordings contain many variations in frequency, which\ + \ makes them relatively soothing. Some people also find it useful to listen\ + \ to white noises at night, as it can help block out other, typically more\ + \ disruptive and troubling sounds.\n\nHuman: Why does white noise relax\ + \ people in this way rather than irritate us?\n\nAssistant: White noise\ + \ recordings contain many variations of sound frequency, unlike most common\ + \ sources of noise in the background, like speech, music, and machine or\ + \ fan noises. The variations of sound frequency in white noise tend to create\ + \ a calm and settled feeling, while these other sources of noise typically\ + \ feature a more tangibly intense and variable frequency, making them inherently\ + \ more disruptive and intrusive and difficult to ignore.\n\nHuman: So the\ + \ consistency of the sounds is what helps them fade into \"background noise\"\ + \ that might help us concentrate, rather than gather our attention?\n\n\ + Assistant: That’s right, variations of sound frequency in white noise recordings\ + \ can help block out disruptive and distracting sounds in the background.\ + \ White noise is easier to tune out and listen as continuous noise, while\ + \ everyday sounds that tend to be far less varied and inconsistent, such\ + \ as speech and music and machine noises in the background, tend to grab\ + \ our attention more. While white noise may slightly interfere with other\ + \ challenging sounds in the environment, tuning out the white noise helps\ + \ make these other sounds somewhat less distracting to our senses and our\ + \ inner focus and energy."} + - {chosen: "\n\nHuman: Can you tell me things to see in Orlando, Fl?\n\nAssistant:\ \ Sure! There are lots of great things to do in Orlando. Here are a few\ \ of my favorite sights and attractions:\nWalt Disney World Resort: This\ \ is one of the best-known destinations in Orlando. Visitors can learn about\ @@ -226,23 +224,22 @@ \ most terror-inducing rollercoaster in Orlando, Florida. \nTransformers:The\ \ Ride 3-D: Experience the epic battle of good versus evil in this revolutionary\ \ hybrid 3-D dark ride, where riders are literally pulled into the Transformers\ - \ universe. \nHollywood" - rejected: "\n\nHuman: Can you tell me things to see in Orlando, Fl?\n\nAssistant:\ - \ Sure! There are lots of great things to do in Orlando. Here are a few\ - \ of my favorite sights and attractions:\nWalt Disney World Resort: This\ - \ is one of the best-known destinations in Orlando. Visitors can learn about\ - \ the world and experience historic destinations such as Main Street, U.S.A.\ - \ and Frontierland, or explore futuristic experiences in Tomorrowland. \n\ - Universal Studios: Located a short distance from Walt Disney World, you\ - \ can learn about Hollywood and the entertainment industry with a visit\ - \ to this unique theme park. \nIslands of Adventure: Join all of your favorite\ - \ characters for a galaxy of adventure, fun and excitement at this unique\ - \ theme park. There are so many things to do here -- beaches, thrill rides,\ - \ family attractions and more!\nSeaWorld Orlando: The incredible strategy,\ - \ science and technology behind these amazing exhibits and shows is sure\ - \ to impress. Family-friendly and educational, SeaWorld will delight animal\ - \ lovers. \nOrlando Science Center: This museum is packed full of activities\ - \ and exhibits that teach visitors about physics, robotics, technology,\ + \ universe. \nHollywood", rejected: "\n\nHuman: Can you tell me things to\ + \ see in Orlando, Fl?\n\nAssistant: Sure! There are lots of great things\ + \ to do in Orlando. Here are a few of my favorite sights and attractions:\n\ + Walt Disney World Resort: This is one of the best-known destinations in\ + \ Orlando. Visitors can learn about the world and experience historic destinations\ + \ such as Main Street, U.S.A. and Frontierland, or explore futuristic experiences\ + \ in Tomorrowland. \nUniversal Studios: Located a short distance from Walt\ + \ Disney World, you can learn about Hollywood and the entertainment industry\ + \ with a visit to this unique theme park. \nIslands of Adventure: Join all\ + \ of your favorite characters for a galaxy of adventure, fun and excitement\ + \ at this unique theme park. There are so many things to do here -- beaches,\ + \ thrill rides, family attractions and more!\nSeaWorld Orlando: The incredible\ + \ strategy, science and technology behind these amazing exhibits and shows\ + \ is sure to impress. Family-friendly and educational, SeaWorld will delight\ + \ animal lovers. \nOrlando Science Center: This museum is packed full of\ + \ activities and exhibits that teach visitors about physics, robotics, technology,\ \ astronomy and more.\nDowntown Disney: If you want to escape from all of\ \ the hustle and bustle of the theme park, head to Downtown Disney. With\n\ \nHuman: Thanks. Can you tell me about Universal Studios?\n\nAssistant:\ @@ -253,8 +250,8 @@ \ based on the Harry Potter series.\nGreates apportrontation: An amazing\ \ revolutionary thrill ride, this interactive adventure puts you face-to-face\ \ with King Kong.\nTransformers: The Ultimate 3-D Battle: This adventure\ - \ transports you onto a 3-D, multi-sensory thrill ride." - - chosen: "\n\nHuman: How do I learn to be more appreciative?\n\nAssistant:\ + \ transports you onto a 3-D, multi-sensory thrill ride."} + - {chosen: "\n\nHuman: How do I learn to be more appreciative?\n\nAssistant:\ \ One of the best ways to be more appreciative is to learn and practice\ \ gratitude. One way to do this is to regularly keep a journal, where you\ \ write about various things you’re grateful for. Being grateful for the\ @@ -264,290 +261,369 @@ \ Similarly, you can also make it a habit to reach out to people you appreciate,\ \ such as expressing your gratitude verbally or in a personal note. Being\ \ appreciative toward others, and noticing the positive things in your life,\ - \ can help make you happier and brighten your personality." - rejected: "\n\nHuman: How do I learn to be more appreciative?\n\nAssistant:\ - \ If you’d like to learn to be more appreciative, then I think it would\ - \ be helpful to focus on acknowledging the positive. Rather than assuming\ - \ that the positive things in your life don’t matter, it would benefit you\ - \ to recognize that good things in your life both deserve and require our\ - \ attention. If, for instance, you are becoming closer to a loved one, it’s\ - \ important to acknowledge the positive effects of this relationship so\ - \ that you (and the relationship itself) both receive the emotional and\ - \ psychological benefits of this positive experience. Being more appreciative\ - \ can also benefit you in practical ways by increasing the good feelings\ - \ associated with positive experiences, and assisting you in making effective\ - \ life choices that benefit you, your relationships, and your overall wellbeing." - size: - explanation: "The size of the dataset isn't directly reported in the paper,\ - \ but the dataset is available on GitHub [[hh-rlhf]](https://github.com/anthropics/hh-rlhf).\ - \ The compressed data files are distributed under three categories in the\ - \ GitHub repository, corresponding to the names of the models used to sample\ - \ the prompts that are shown to the crowdworkers. These are namely: base,\ - \ rejection sampling, and online. For the base model, the uncompressed sizes\ - \ of the train and test datasets are 69.1 MB and 3.7 MB, respectively. The\ - \ same set of values for the rejection sampling model are 106.1 MB and 5.6\ - \ MB. For the online model, the sizes are 82.7 MB and 4.3 MB. Adding all up,\ - \ the total size of the dataset is 271.5 MB.\n" - value: 271.5 MB - type: dataset - url: https://arxiv.org/pdf/2204.05862.pdf -- access: - explanation: "Unlike its helpfulness, this dataset isn't released [[hh-rlhf]](https://github.com/anthropics/hh-rlhf).\n" - value: closed - analysis: "The crodworkers were told that \"lying isn't helpful\" and asked to\ - \ prefer honest responses, which led to models with higher honesty scores. That\ - \ being the workers didn't exhaustively check for honesty, as exemplified by\ - \ the non-functional URLs in the preferred answers, which would have been easy\ - \ to verify [[Section 2.1]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.1).\n" + \ can help make you happier and brighten your personality.", rejected: "\n\n\ + Human: How do I learn to be more appreciative?\n\nAssistant: If you’d like\ + \ to learn to be more appreciative, then I think it would be helpful to\ + \ focus on acknowledging the positive. Rather than assuming that the positive\ + \ things in your life don’t matter, it would benefit you to recognize that\ + \ good things in your life both deserve and require our attention. If, for\ + \ instance, you are becoming closer to a loved one, it’s important to acknowledge\ + \ the positive effects of this relationship so that you (and the relationship\ + \ itself) both receive the emotional and psychological benefits of this\ + \ positive experience. Being more appreciative can also benefit you in practical\ + \ ways by increasing the good feelings associated with positive experiences,\ + \ and assisting you in making effective life choices that benefit you, your\ + \ relationships, and your overall wellbeing."} + analysis: > + The authors found that the crowdworkers didn't exhaustively check for + honesty in the model answers they preferred + [[Section 2.1]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.1). + # Construction + dependencies: [Anthropic Human Feedback Interface] + license: + value: MIT + explanation: > + The dataset was released on GitHub under MIT + [[hh-rlhf]](https://github.com/anthropics/hh-rlhf). + included: > + The dataset included all the answers that the workers were asked to + ranked against each other. + excluded: none + quality_control: > + Working with a screened set of crowdworkers, and employing simple data + quality measures + [[Appendix D]](https://arxiv.org/pdf/2204.05862.pdf#appendix.D). + # Downstream + access: + value: open + explanation: > + The dataset is freely shared on GitHub + [[hh-rlhf]](https://github.com/anthropics/hh-rlhf). + intended_uses: > + The dataset was intended and released for research purposes. + prohibited_uses: + value: unknown + explanation: > + The prohibited use cases of the dataset weren't specified. + monitoring: + value: none + explanation: > + There is no monitoring process for the usages of the dataset. + feedback: > + Email the authors + [[Paper]](https://arxiv.org/pdf/2204.05862.pdf). + +- type: dataset + name: Anthropic Harmlessness dataset + # General + organization: Anthropic + description: > + One of the datasets used to train Anthropic RLHF models. The dataset was + collected by asking crowdworkers to have open-ended conversations with + Anthropic models, aiming to elicit harmful responses, then choose the model + answer that was more harmful for their given task, via the Anthropic Human + Feedback Interface + [[Section 2.2]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.2). created_date: - explanation: "The date that the paper was uploaded to ArXiv [[ArXiv Abstract]](https://arxiv.org/abs/2204.05862).\n" value: 2022-04-12 + explanation: > + The date that the paper was uploaded to ArXiv + [[ArXiv Abstract]](https://arxiv.org/abs/2204.05862). + url: https://arxiv.org/pdf/2204.05862.pdf datasheet: none + modality: Text (English) + size: unknown + sample: [] + analysis: > + The crodworkers were told that "lying isn't helpful" and asked to prefer + honest responses, which led to models with higher honesty scores. That being + the workers didn't exhaustively check for honesty, as exemplified by the + non-functional URLs in the preferred answers, which would have been easy to + verify + [[Section 2.1]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.1). + # Construction dependencies: [Anthropic Human Feedback Interface] - description: "One of the datasets used to train Anthropic RLHF models. The dataset\ - \ was collected by asking crowdworkers to have open-ended conversations with\ - \ Anthropic models, aiming to elicit harmful responses, then choose the model\ - \ answer that was more harmful for their given task, via the Anthropic Human\ - \ Feedback Interface [[Section 2.2]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.2).\n" - excluded: none - feedback: - explanation: "The feedback mechanism is unknown since the dataset isn't released,\ - \ but it will likely be \"emailing the authors\" [[Paper]](https://arxiv.org/pdf/2204.05862.pdf).\n" + license: value: unknown - included: "The dataset included all the answers that the workers were asked to\ - \ ranked against each other.\n" + explanation: > + The asset isn't released, and hence the license is unknown. + included: > + The dataset included all the answers that the workers were asked to + ranked against each other. + excluded: none + quality_control: > + Working with "select" crowdworkers or those screened for certain + qualifications, and employing simple data quality measures + [[Appendix D]](https://arxiv.org/pdf/2204.05862.pdf#appendix.D). + # Downstream + access: + value: closed + explanation: > + Unlike its helpfulness, this dataset isn't released + [[hh-rlhf]](https://github.com/anthropics/hh-rlhf). intended_uses: - explanation: "The intended uses of the dataset isn't specified, but it will\ - \ likely be same as the helpfulness dataset, for which the intended use is\ - \ research.\n" value: unknown - license: - explanation: "The asset isn't released, and hence the license is unknown.\n" + explanation: > + The intended uses of the dataset isn't specified, but it will likely be + same as the helpfulness dataset, for which the intended use is research. + prohibited_uses: value: unknown - modality: - explanation: Text (English) - value: text + explanation: > + The prohibited use cases of the dataset weren't specified. monitoring: - explanation: "There is no monitoring process for the usages of the dataset.\n" value: none - name: Anthropic Harmlessness dataset - organization: Anthropic - prohibited_uses: - explanation: "The prohibited use cases of the dataset weren't specified.\n" + explanation: > + There is no monitoring process for the usages of the dataset. + feedback: value: unknown - quality_control: "Working with \"select\" crowdworkers or those screened for certain\ - \ qualifications, and employing simple data quality measures [[Appendix D]](https://arxiv.org/pdf/2204.05862.pdf#appendix.D).\n" - sample: [] - size: unknown - type: dataset - url: https://arxiv.org/pdf/2204.05862.pdf -- access: - explanation: "The model wasn't released.\n" - value: closed - analysis: - explanation: "When tested on ability to generate responses that are both harmless\ - \ and helpful, the authors found that PMs trained on a mixture of harmlessness\ - \ and helpfulness preference datasets perform better than those trained on\ - \ only one of the two. The authors also showed that the model performance\ - \ is hurt for the smaller models, but improved for the larger models when\ - \ compared to plain language models [[Figure 28]](https://arxiv.org/pdf/2204.05862.pdf#figure.caption.34)\ - \ [[Figure 29]](https://arxiv.org/pdf/2204.05862.pdf#figure.caption.35). The\ - \ authors also found the larger RLHF models to be more truthful than their\ - \ plain counterparts. They also found that their models had a more positive\ - \ sentiment towards groups that the language models are known to be biased\ - \ against [[Section 4.6]](https://arxiv.org/pdf/2204.05862.pdf#subsection.4.6).\n" - value: "The authors analyzed the impact of the dataset mixture on the preference\ - \ models (PM). In addition to human evaluation, RLHF model were evaluated\ - \ on MMLU, Lambada, HellaSwag, OpenBookQA, ARC-Easy, ARC-Challenge, TriviaQA,\ - \ code generation, summarization.\n" + explanation: > + The feedback mechanism is unknown since the dataset isn't released, but + it will likely be "emailing the authors" + [[Paper]](https://arxiv.org/pdf/2204.05862.pdf). + +- type: model + name: Anthropic RLHF models + # General + organization: Anthropic + description: > + Anthropic RLHF models are models trained using reinforcement learning + from human feedback (RLHF). For Anthropic RLHF models, authors started with + a set of base models, and asked humans to rank model generated prompts based + on a specific tasks. They then trained preference models (PM) on the + prompt pairs, and use the PM scores as rewards for training the RLHF + models. created_date: - explanation: "The date that the paper was uploaded to ArXiv [[ArXiv Abstract]](https://arxiv.org/abs/2204.05862).\n" value: 2022-04-12 + explanation: > + The date that the paper was uploaded to ArXiv + [[ArXiv Abstract]](https://arxiv.org/abs/2204.05862). + url: https://arxiv.org/pdf/2204.05862.pdf + model_card: none + modality: text (English) and Code + size: + value: 52B parameters (dense model) + explanation: > + There are several sizes of the RLHF models, the largest one having 52B + parameters + [[Figure 1]](https://arxiv.org/pdf/2204.05862.pdf#figure.caption.2). + analysis: + value: > + The authors analyzed the impact of the dataset mixture on the preference + models (PM). In addition to human evaluation, RLHF model were evaluated on + MMLU, Lambada, HellaSwag, OpenBookQA, ARC-Easy, ARC-Challenge, TriviaQA, + code generation, summarization. + explanation: > + When tested on ability to generate responses that are both harmless and + helpful, the authors found that PMs trained on a mixture of harmlessness + and helpfulness preference datasets perform better than those trained + on only one of the two. + The authors also showed that the model performance is hurt for the smaller + models, but improved for the larger models when compared to plain + language models + [[Figure 28]](https://arxiv.org/pdf/2204.05862.pdf#figure.caption.34) + [[Figure 29]](https://arxiv.org/pdf/2204.05862.pdf#figure.caption.35). + The authors also found the larger RLHF models to be more truthful than + their plain counterparts. They also found that their models + had a more positive sentiment towards groups that the language models are + known to be biased against + [[Section 4.6]](https://arxiv.org/pdf/2204.05862.pdf#subsection.4.6). + # Construction dependencies: - Anthropic Harmlessness dataset - Anthropic Helpfulness dataset - description: "Anthropic RLHF models are models trained using reinforcement learning\ - \ from human feedback (RLHF). For Anthropic RLHF models, authors started with\ - \ a set of base models, and asked humans to rank model generated prompts based\ - \ on a specific tasks. They then trained preference models (PM) on the prompt\ - \ pairs, and use the PM scores as rewards for training the RLHF models.\n" - feedback: - explanation: "The feedback mechanism is unknown, but it will likely be \"emailing\ - \ the authors\" [[Paper]](https://arxiv.org/pdf/2204.05862.pdf).\n" + training_emissions: value: unknown - intended_uses: - explanation: "The intended uses of the model aren't specified, but they will\ - \ likely be the same as the released Anthropic helpfulness dataset, for which\ - \ the intended use was listed as research. In addition to English language\ - \ generation, the authors also explored code generation capabilities of their\ - \ models.\n" + explanation: The authors do not provide data on the training emissions. + training_time: value: unknown + explanation: The authors do not provide data on the training time. + training_hardware: + value: unknown + explanation: The authors do not provide data on the training hardware. + quality_control: > + unknown + # Downstream + access: + value: closed + explanation: > + The model wasn't released. license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: text (English) and Code - value: code, text; code, text - model_card: none - monitoring: - explanation: "There is no known monitoring process for the usages of the model.\n" + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: value: unknown - name: Anthropic RLHF models - organization: Anthropic + explanation: > + The intended uses of the model aren't specified, but they will likely be + the same as the released Anthropic helpfulness dataset, for which the + intended use was listed as research. In addition to English language + generation, the authors also explored code generation capabilities of + their models. prohibited_uses: - explanation: "The prohibited use cases of the model weren't specified.\n" value: unknown - quality_control: "unknown\n" - size: 52B parameters (dense) - training_emissions: - explanation: The authors do not provide data on the training emissions. - value: unknown - training_hardware: - explanation: The authors do not provide data on the training hardware. + explanation: > + The prohibited use cases of the model weren't specified. + monitoring: value: unknown - training_time: - explanation: The authors do not provide data on the training time. + explanation: > + There is no known monitoring process for the usages of the model. + feedback: value: unknown - type: model - url: https://arxiv.org/pdf/2204.05862.pdf -- access: - explanation: "The interface was made available to a select set of crowd workers.\n" - value: closed - adaptation: none + explanation: > + The feedback mechanism is unknown, but it will likely be "emailing the + authors" + [[Paper]](https://arxiv.org/pdf/2204.05862.pdf). + +- type: application + name: Anthropic Human Feedback Interface + # General + organization: Anthropic + description: > + The feedback interface used to collect preference datasets to train + Anthropic RLHF models + [[Paper]](https://arxiv.org/pdf/2204.05862.pdf). created_date: - explanation: "The date that the paper was uploaded to ArXiv [[ArXiv Abstract]](https://arxiv.org/abs/2204.05862).\n" value: 2022-04-12 + explanation: > + The date that the paper was uploaded to ArXiv + [[ArXiv Abstract]](https://arxiv.org/abs/2204.05862). + url: https://arxiv.org/pdf/2204.05862.pdf + # Construction dependencies: [Anthropic RLHF models] - description: "The feedback interface used to collect preference datasets to train\ - \ Anthropic RLHF models [[Paper]](https://arxiv.org/pdf/2204.05862.pdf).\n" - failures: unknown - feedback: unknown - intended_uses: "Intended to be used by crowdworkers who are tasked with ranking\ - \ model answers.\n" - license: - explanation: "The asset isn't released, and hence the license is unknown.\n" - value: unknown - monitoring: unknown - monthly_active_users: unknown - name: Anthropic Human Feedback Interface - organization: Anthropic - output_space: "UI allowing users to indicate their preference for the model responses\ - \ shown.\n" - prohibited_uses: unknown + adaptation: none + output_space: > + UI allowing users to indicate their preference for the model responses + shown. quality_control: unknown + # Downstream + access: + value: closed + explanation: > + The interface was made available to a select set of crowd workers. terms_of_service: + value: unknown explanation: The terms of service for the interaface is unknown. + license: value: unknown - type: application - url: https://arxiv.org/pdf/2204.05862.pdf + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: > + Intended to be used by crowdworkers who are tasked with ranking model + answers. + prohibited_uses: unknown + monitoring: unknown + feedback: unknown + # Deployment + monthly_active_users: unknown user_distribution: - explanation: The demographics of the crowdworkers are unknown. value: crowdworkers -- access: limited - adaptation: '' - created_date: 2023-03-14 - dependencies: [Claude, Claude Instant] + explanation: The demographics of the crowdworkers are unknown. + failures: unknown + +- type: application + name: Anthropic API + organization: Anthropic description: API is designed to be a backend that incorporates Claude into any application you’ve developed. Our application sends text to our API, then receives a response via server-sent events, a streaming protocol for the web. - failures: '' - feedback: '' - intended_uses: '' - license: - explanation: No license was found. - value: none - monitoring: '' - monthly_active_users: '' - name: Anthropic API - organization: Anthropic + created_date: 2023-03-14 + url: https://console.anthropic.com/docs/api + dependencies: [Claude, Claude Instant] + adaptation: '' output_space: '' - prohibited_uses: '' quality_control: '' + access: limited + license: + value: none + explanation: No license was found. terms_of_service: '' - type: application - url: https://console.anthropic.com/docs/api - user_distribution: '' -- access: limited - analysis: '' - created_date: 2023-03-14 - dependencies: [] - description: '' - feedback: '' intended_uses: '' - license: - explanation: "The asset isn't released, and hence the license is unknown.\n" - value: unknown - modality: - explanation: text - value: text; text - model_card: '' + prohibited_uses: '' monitoring: '' + feedback: '' + monthly_active_users: '' + user_distribution: '' + failures: '' + +- type: model name: Claude organization: Anthropic - prohibited_uses: '' - quality_control: '' - size: unknown - training_emissions: '' - training_hardware: '' - training_time: '' - type: model + description: '' + created_date: 2023-03-14 url: https://www.anthropic.com/index/introducing-claude -- access: limited + model_card: '' + modality: text analysis: '' - created_date: 2023-03-14 + size: unknown dependencies: [] - description: '' - feedback: '' - intended_uses: '' + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: limited license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: text - value: text; text - model_card: '' + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: Claude Instant organization: Anthropic - prohibited_uses: '' - quality_control: '' + description: '' + created_date: 2023-03-14 + url: https://www.anthropic.com/index/introducing-claude + model_card: '' + modality: text + analysis: '' size: unknown + dependencies: [] training_emissions: '' - training_hardware: '' training_time: '' - type: model - url: https://www.anthropic.com/index/introducing-claude -- access: open + training_hardware: '' + quality_control: '' + access: limited + license: + value: unknown + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: model + name: Claude 2 + organization: Anthropic + description: Claude 2 is a more evolved and refined version of Claude, which is + a general purpose large language model using a transformer architecture and + trained via unsupervised learning. + created_date: 2023-07-11 + url: https://www.anthropic.com/index/claude-2 + model_card: https://www-files.anthropic.com/production/images/Model-Card-Claude-2.pdf + modality: text analysis: Evaluated with human feedback on helpfulness, harmfulness, and honesty and on the Bias Benchmark for QA. - created_date: 2023-07-11 + size: '' dependencies: - Claude human feedback data - Unknown licensed third party datasets - description: Claude 2 is a more evolved and refined version of Claude, which is - a general purpose large language model using a transformer architecture and - trained via unsupervised learning. - feedback: '' + training_emissions: '' + training_time: '' + training_hardware: unknown + quality_control: '' + access: open + license: '' intended_uses: Claude 2 tends to perform well at general, open-ended conversation; search, writing, editing, outlining, and summarizing text; coding; and providing helpful advice about a broad range of subjects. Claude 2 is particularly well suited to support creative or literary use cases. They can take direction on tone and “personality,” and users have described them as feeling steerable and conversational. - license: '' - modality: - explanation: text - value: text; text - model_card: https://www-files.anthropic.com/production/images/Model-Card-Claude-2.pdf - monitoring: '' - name: Claude 2 - organization: Anthropic prohibited_uses: Claude 2 should not be used on their own in high stakes situations where an incorrect answer would cause harm. - quality_control: '' - size: '' - training_emissions: '' - training_hardware: unknown - training_time: '' - type: model - url: https://www.anthropic.com/index/claude-2 + monitoring: '' + feedback: '' diff --git a/assets/argonne.yaml b/assets/argonne.yaml index 049fa897..addd0a35 100644 --- a/assets/argonne.yaml +++ b/assets/argonne.yaml @@ -1,24 +1,23 @@ --- -- access: open - analysis: '' - created_date: 2022-10-11 - dependencies: [SARS-CoV-2 genome dataset, BV-BRC dataset] - description: '' - feedback: '' - intended_uses: '' - license: MIT - modality: - explanation: genome sequence - value: text; text - model_card: '' - monitoring: '' + +- type: model name: GenSLM organization: Argonne National Laboratory - prohibited_uses: '' - quality_control: '' - size: 25B parameters (dense) + description: '' + created_date: 2022-10-11 + url: https://www.biorxiv.org/content/10.1101/2022.10.10.511571v1 + model_card: '' + modality: genome sequence + analysis: '' + size: 25B parameters (dense model) + dependencies: [SARS-CoV-2 genome dataset, BV-BRC dataset] training_emissions: '' - training_hardware: '' training_time: '' - type: model - url: https://www.biorxiv.org/content/10.1101/2022.10.10.511571v1 + training_hardware: '' + quality_control: '' + access: open + license: MIT + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/assembly.yaml b/assets/assembly.yaml index 61848cc2..197b634e 100644 --- a/assets/assembly.yaml +++ b/assets/assembly.yaml @@ -1,35 +1,48 @@ --- -- access: closed - analysis: '' + +- type: dataset + name: Conformer-1 dataset + organization: AssemblyAI + description: The dataset used to train AssemblyAI's Conformer-1 model. created_date: - explanation: The date Conformer-1 was announced in this [[Twitter post]](https://twitter.com/AssemblyAI/status/1636050346240884744). value: 2023-03-15 + explanation: The date Conformer-1 was announced in this [[Twitter post]](https://twitter.com/AssemblyAI/status/1636050346240884744). + url: https://www.assemblyai.com/blog/conformer-1/ datasheet: none + modality: speech (English), text + size: 650K hours audio (60TB) + sample: [] + analysis: '' dependencies: [] - description: The dataset used to train AssemblyAI's Conformer-1 model. + included: > + "our team curated a dataset of 650K hours of English audio - consisting + of proprietary internal datasets and various sources from the internet" excluded: '' - feedback: '' - included: "\"our team curated a dataset of 650K hours of English audio - consisting\ - \ of proprietary internal datasets and various sources from the internet\"\n" - intended_uses: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: speech (English), text - value: audio, text + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: Conformer-1 dataset + feedback: '' + +- type: model + name: Conformer-1 organization: AssemblyAI - prohibited_uses: '' - quality_control: '' - sample: [] - size: 650K hours audio (60TB) - type: dataset + description: Conformer-1 is a state-of-the-art speech recognition model trained + on 650K hours of audio data that achieves near human-level performance and robustness + across a variety of data, making up to 43% fewer errors on noisy data than other + ASR models. + created_date: + value: 2023-03-15 + explanation: The date the model was announced in this [[Twitter post]](https://twitter.com/AssemblyAI/status/1636050346240884744). url: https://www.assemblyai.com/blog/conformer-1/ -- access: limited + model_card: '' + modality: speech (English), text analysis: - explanation: See [[Results and Performance]](https://www.assemblyai.com/blog/conformer-1/#results-and-performance) value: In order to evaluate the accuracy and robustness of Conformer-1, we sourced 60+ hours of human labeled audio data covering popular speech domains such as call centers, podcasts, broadcasts, and webinars. We then calculated the @@ -37,84 +50,76 @@ the results against Whisper and a number of other ASR models. To ground our results against popular open source speech recognition benchmarks, we also performed the same WER analysis against a number of academic datasets. - created_date: - explanation: The date the model was announced in this [[Twitter post]](https://twitter.com/AssemblyAI/status/1636050346240884744). - value: 2023-03-15 + explanation: See [[Results and Performance]](https://www.assemblyai.com/blog/conformer-1/#results-and-performance) + size: 300M parameters dependencies: [Conformer-1 dataset] - description: Conformer-1 is a state-of-the-art speech recognition model trained - on 650K hours of audio data that achieves near human-level performance and robustness - across a variety of data, making up to 43% fewer errors on noisy data than other - ASR models. - feedback: '' - intended_uses: '' + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: limited license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: speech (English), text - value: audio, text; audio, text - model_card: '' + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: Conformer-1 + feedback: '' + +- type: application + name: AssemblyAI organization: AssemblyAI - prohibited_uses: '' - quality_control: '' - size: 300M parameters (dense) - training_emissions: '' - training_hardware: '' - training_time: '' - type: model - url: https://www.assemblyai.com/blog/conformer-1/ -- access: limited - adaptation: '' - created_date: 2022 - dependencies: [Anthropic API] description: AssemblyAI uses Claude and Anthropic's model to transcribe and understand audio data at scale. - failures: '' - feedback: '' - intended_uses: '' + created_date: 2022 + url: https://www.assemblyai.com/ + dependencies: [Anthropic API] + adaptation: '' + output_space: '' + quality_control: '' + access: limited license: - explanation: "\"AssemblyAI gives you a personal, worldwide, royalty-free, non-assignable\ - \ and non-exclusive license to use the Site and Services solely for your reasonable\ - \ personal and commercial purposes, in the manner permitted by these Terms.\"\ - \ - excerpt from the Terms of Service.\n" value: custom + explanation: > + "AssemblyAI gives you a personal, worldwide, royalty-free, + non-assignable and non-exclusive license to use the Site and Services + solely for your reasonable personal and commercial purposes, in the + manner permitted by these Terms." - excerpt from the Terms of Service. + terms_of_service: https://www.assemblyai.com/legal/terms-of-service + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' monthly_active_users: '' - name: AssemblyAI - organization: AssemblyAI - output_space: '' - prohibited_uses: '' - quality_control: '' - terms_of_service: https://www.assemblyai.com/legal/terms-of-service - type: application - url: https://www.assemblyai.com/ user_distribution: '' -- access: open - adaptation: '' + failures: '' + +- type: application + name: Conformer-1 API + organization: AssemblyAI + description: API to access the AssemblyAI's Conformer-1 model. created_date: - explanation: The date the API was announced in this [[Twitter post]](https://twitter.com/AssemblyAI/status/1636050346240884744). value: 2023-03-15 + explanation: The date the API was announced in this [[Twitter post]](https://twitter.com/AssemblyAI/status/1636050346240884744). + url: https://www.assemblyai.com/blog/conformer-1/ dependencies: [Conformer-1] - description: API to access the AssemblyAI's Conformer-1 model. - failures: '' - feedback: '' - intended_uses: Speech recognition + adaptation: '' + output_space: '' + quality_control: '' + access: open license: - explanation: "\"AssemblyAI gives you a personal, worldwide, royalty-free, non-assignable\ - \ and non-exclusive license to use the Site and Services solely for your reasonable\ - \ personal and commercial purposes, in the manner permitted by these Terms.\"\ - \ - excerpt from the Terms of Service.\n" value: custom + explanation: > + "AssemblyAI gives you a personal, worldwide, royalty-free, + non-assignable and non-exclusive license to use the Site and Services + solely for your reasonable personal and commercial purposes, in the + manner permitted by these Terms." - excerpt from the Terms of Service. + terms_of_service: https://www.assemblyai.com/legal/terms-of-service + intended_uses: Speech recognition + prohibited_uses: '' monitoring: '' + feedback: '' monthly_active_users: '' - name: Conformer-1 API - organization: AssemblyAI - output_space: '' - prohibited_uses: '' - quality_control: '' - terms_of_service: https://www.assemblyai.com/legal/terms-of-service - type: application - url: https://www.assemblyai.com/blog/conformer-1/ user_distribution: '' + failures: '' diff --git a/assets/autogpt.yaml b/assets/autogpt.yaml index 34a3ef5f..5a3a040f 100644 --- a/assets/autogpt.yaml +++ b/assets/autogpt.yaml @@ -1,22 +1,23 @@ --- -- access: open - adaptation: GPT-4 adapted to run autonomously by chaining together LLM "thoughts" - created_date: 2023-04-16 - dependencies: [GPT-4 API] - description: Auto-GPT is an experimental open-source application showcasing the - capabilities of the GPT-4 language model. - failures: '' - feedback: '' - intended_uses: '' - license: MIT - monitoring: '' - monthly_active_users: '' + +- type: application name: Auto-GPT organization: Auto-GPT + description: Auto-GPT is an experimental open-source application showcasing the + capabilities of the GPT-4 language model. + created_date: 2023-04-16 + url: https://news.agpt.co/ + dependencies: [GPT-4 API] + adaptation: GPT-4 adapted to run autonomously by chaining together LLM "thoughts" output_space: text - prohibited_uses: '' quality_control: '' + access: open + license: MIT terms_of_service: '' - type: application - url: https://news.agpt.co/ + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' user_distribution: '' + failures: '' diff --git a/assets/baai.yaml b/assets/baai.yaml index e90846c6..ab838a1d 100644 --- a/assets/baai.yaml +++ b/assets/baai.yaml @@ -1,57 +1,68 @@ --- -- access: closed - analysis: '' + +- type: dataset + name: Wu Dao dataset + # General + organization: Beijing Academy of Artificial Intelligence + description: '' created_date: - explanation: "The date that BAAI made a public announcement [[News Link]](https://mp.weixin.qq.com/s/BUQWZ5EdR19i40GuFofpBg).\n" value: 2021-01-12 + explanation: > + The date that BAAI made a public announcement + [[News Link]](https://mp.weixin.qq.com/s/BUQWZ5EdR19i40GuFofpBg). + url: https://www.tsinghua.edu.cn/en/info/1420/10473.htm datasheet: '' + modality: text (Chinese, English) and image + size: unknown + sample: [] + analysis: '' + # Construction dependencies: [] - description: '' - excluded: '' - feedback: '' - included: '' - intended_uses: '' license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: text (Chinese, English) and image - value: image, text + explanation: > + The asset isn't released, and hence the license is unknown. + included: '' + excluded: '' + quality_control: '' + # Downstream + access: closed + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: Wu Dao dataset + feedback: '' + +- type: model + name: Wu Dao 2.0 + # General organization: Beijing Academy of Artificial Intelligence - prohibited_uses: '' - quality_control: '' - sample: [] - size: unknown - type: dataset - url: https://www.tsinghua.edu.cn/en/info/1420/10473.htm -- access: - explanation: "The model wasn't released.\n" - value: closed - analysis: '' + description: '' created_date: - explanation: "The date that BAAI made a public announcement [[News Link]](https://mp.weixin.qq.com/s/BUQWZ5EdR19i40GuFofpBg).\n" value: 2021-01-12 + explanation: > + The date that BAAI made a public announcement + [[News Link]](https://mp.weixin.qq.com/s/BUQWZ5EdR19i40GuFofpBg). + url: https://www.tsinghua.edu.cn/en/info/1420/10473.htm + model_card: '' + modality: text (Chinese, English) and image + size: 1.75T parameters (dense model) + analysis: '' + # Construction dependencies: [Wu Dao dataset] - description: '' - feedback: '' - intended_uses: '' + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + # Downstream + access: + value: closed + explanation: > + The model wasn't released. license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: text (Chinese, English) and image - value: image, text; image, text - model_card: '' - monitoring: '' - name: Wu Dao 2.0 - organization: Beijing Academy of Artificial Intelligence + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' prohibited_uses: '' - quality_control: '' - size: 1.75T parameters (dense) - training_emissions: '' - training_hardware: '' - training_time: '' - type: model - url: https://www.tsinghua.edu.cn/en/info/1420/10473.htm + monitoring: '' + feedback: '' diff --git a/assets/baidu.yaml b/assets/baidu.yaml index bed1db91..c6386cb1 100644 --- a/assets/baidu.yaml +++ b/assets/baidu.yaml @@ -1,90 +1,89 @@ --- -- access: - explanation: Baidu does not provide ERNIE 3.0 for external researchers - value: closed - analysis: '' + +- type: model + name: ERNIE 3.0 Titan + organization: Baidu, PengCheng Laboratory + description: ERNIE 3.0 Titan is a language model created_date: - explanation: The date the model paper was released value: 2021-12-23 + explanation: The date the model paper was released + url: https://arxiv.org/abs/2112.12731 + model_card: none + modality: text (Chinese) + analysis: '' + size: 260B parameters (dense model) dependencies: [] - description: ERNIE 3.0 Titan is a language model - feedback: none - intended_uses: unknown + training_emissions: unknown + training_time: unknown + training_hardware: Baidu V100 Cluster, PengCheng Lab Ascend 910 NPU cluster + quality_control: none + access: + value: closed + explanation: Baidu does not provide ERNIE 3.0 for external researchers license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: text (Chinese) - value: text; text - model_card: none - monitoring: none - name: ERNIE 3.0 Titan - organization: Baidu, PengCheng Laboratory + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: unknown prohibited_uses: unknown - quality_control: none - size: 260B parameters (dense) + monitoring: none + feedback: none + +- type: model + name: ERNIE-ViLG + organization: Baidu + description: ERNIE-ViLG is a model for text-to-image generation + created_date: + value: 2021-12-31 + explanation: The date the model paper was released + url: https://arxiv.org/abs/2112.15283 + model_card: none + modality: text (Chinese) and image + size: 10B parameters + analysis: '' + dependencies: [] training_emissions: unknown - training_hardware: Baidu V100 Cluster, PengCheng Lab Ascend 910 NPU cluster training_time: unknown - type: model - url: https://arxiv.org/abs/2112.12731 -- access: + training_hardware: unknown + quality_control: '' + access: + value: limited explanation: API access is available through PaddleHub. Instructions on [[Github repository]](https://github.com/PaddlePaddle/PaddleHub/blob/develop/modules/image/text_to_image/ernie_vilg/README.md) - value: limited - analysis: '' - created_date: - explanation: The date the model paper was released - value: 2021-12-31 - dependencies: [] - description: ERNIE-ViLG is a model for text-to-image generation - feedback: none - intended_uses: unknown license: - explanation: "No explicit license for the ERNIE-ViLG model is provided in the\ - \ [[Github repository]](https://github.com/PaddlePaddle/PaddleHub)\n" value: none - modality: - explanation: text (Chinese) and image - value: image, text; image, text - model_card: none + explanation: > + No explicit license for the ERNIE-ViLG model is provided in the [[Github repository]](https://github.com/PaddlePaddle/PaddleHub) + intended_uses: unknown + prohibited_uses: unknown monitoring: none - name: ERNIE-ViLG + feedback: none + +- type: model + name: ERNIE-ViLG 2.0 organization: Baidu - prohibited_uses: unknown - quality_control: '' - size: 10B parameters (dense) - training_emissions: unknown - training_hardware: unknown - training_time: unknown - type: model - url: https://arxiv.org/abs/2112.15283 -- access: - explanation: Baidu does not provide ERNIE-ViLG 2.0 to external researchers - value: closed - analysis: '' + description: ERNIE-ViLG is a model for text-to-image generation created_date: - explanation: The date the model paper was released value: 2022-10-27 + explanation: The date the model paper was released + url: https://arxiv.org/abs/2210.15257 + model_card: none + modality: text (Chinese) and image + size: 10B parameters + analysis: '' dependencies: [] - description: ERNIE-ViLG is a model for text-to-image generation - feedback: none - intended_uses: unknown + training_emissions: unknown + training_time: 18 days according to [[the paper]](https://arxiv.org/abs/2210.15257) + training_hardware: 320 A100 GPUs according to [[the paper]](https://arxiv.org/abs/2210.15257) + quality_control: '' + access: + value: closed + explanation: Baidu does not provide ERNIE-ViLG 2.0 to external researchers license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: text (Chinese) and image - value: image, text; image, text - model_card: none - monitoring: none - name: ERNIE-ViLG 2.0 - organization: Baidu + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: unknown prohibited_uses: unknown - quality_control: '' - size: 10B parameters (dense) - training_emissions: unknown - training_hardware: 320 A100 GPUs according to [[the paper]](https://arxiv.org/abs/2210.15257) - training_time: 18 days according to [[the paper]](https://arxiv.org/abs/2210.15257) - type: model - url: https://arxiv.org/abs/2210.15257 + monitoring: none + feedback: none diff --git a/assets/bain.yaml b/assets/bain.yaml index 10f57f76..c6f68c19 100644 --- a/assets/bain.yaml +++ b/assets/bain.yaml @@ -1,24 +1,25 @@ --- -- access: limited - adaptation: '' - created_date: 2023-02-21 - dependencies: [ChatGPT API] + +- type: application + name: Bain Chat + organization: Bain description: With the alliance, Bain will combine its deep digital implementation capabilities and strategic expertise with OpenAI’s AI tools and platforms, including ChatGPT, to help its clients around the world identify and implement the value of AI to maximize business potential. - failures: '' - feedback: '' - intended_uses: '' - license: unknown - monitoring: '' - monthly_active_users: '' - name: Bain Chat - organization: Bain + created_date: 2023-02-21 + url: https://www.bain.com/vector-digital/partnerships-alliance-ecosystem/openai-alliance/ + dependencies: [ChatGPT API] + adaptation: '' output_space: '' - prohibited_uses: '' quality_control: '' + access: limited + license: unknown terms_of_service: '' - type: application - url: https://www.bain.com/vector-digital/partnerships-alliance-ecosystem/openai-alliance/ + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' user_distribution: '' + failures: '' diff --git a/assets/be_my_eyes.yaml b/assets/be_my_eyes.yaml index c9a60174..19c1e9cc 100644 --- a/assets/be_my_eyes.yaml +++ b/assets/be_my_eyes.yaml @@ -1,22 +1,23 @@ --- -- access: limited - adaptation: '' - created_date: 2023-03-14 - dependencies: [GPT-4 API] - description: The first-ever digital visual assistant powered by OpenAI’s new GPT-4 - language model. - failures: '' - feedback: '' - intended_uses: '' - license: unknown - monitoring: '' - monthly_active_users: '' + +- type: application name: Virtual Volunteer organization: Be My Eyes + description: The first-ever digital visual assistant powered by OpenAI’s new GPT-4 + language model. + created_date: 2023-03-14 + url: https://www.bemyeyes.com/blog/introducing-be-my-eyes-virtual-volunteer + dependencies: [GPT-4 API] + adaptation: '' output_space: '' - prohibited_uses: '' quality_control: '' + access: limited + license: unknown terms_of_service: '' - type: application - url: https://www.bemyeyes.com/blog/introducing-be-my-eyes-virtual-volunteer + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' user_distribution: '' + failures: '' diff --git a/assets/berkeley.yaml b/assets/berkeley.yaml index 76756276..e8bab3a0 100644 --- a/assets/berkeley.yaml +++ b/assets/berkeley.yaml @@ -1,75 +1,72 @@ --- -- access: open - analysis: Evaluated in comparison with ChatGPT and Stanford Alpaca. - created_date: 2023-04-03 - dependencies: [LLaMA, web-scraped dialogue data] + +- type: model + name: Koala + organization: Berkeley description: A relatively small chatbot trained by fine-tuning Meta’s LLaMA on dialogue data gathered from the web. - feedback: https://huggingface.co/TheBloke/koala-7B-GPTQ-4bit-128g/discussions - intended_uses: academic research - license: Apache 2.0 - modality: - explanation: natural language text - value: text; text + created_date: 2023-04-03 + url: https://bair.berkeley.edu/blog/2023/04/03/koala/ model_card: https://huggingface.co/TheBloke/koala-7B-GPTQ-4bit-128g - monitoring: '' - name: Koala - organization: Berkeley - prohibited_uses: '' - quality_control: '' - size: 13B parameters (dense) + modality: natural language text + analysis: Evaluated in comparison with ChatGPT and Stanford Alpaca. + size: 13B parameters + dependencies: [LLaMA, web-scraped dialogue data] training_emissions: '' - training_hardware: 8 A100 GPUs training_time: 6 hours - type: model - url: https://bair.berkeley.edu/blog/2023/04/03/koala/ -- access: open - analysis: Evaluated using AST sub-tree matching technique and compared to other - models in terms of API functionality accuracy. - created_date: 2023-05-24 - dependencies: [LLaMA, Gorilla document retriever] - description: Gorilla is a finetuned LLaMA-based model that surpasses the performance - of GPT-4 on writing API calls. - feedback: '' - intended_uses: In conjunction with a LLM to improve its capability for using API - calls. + training_hardware: 8 A100 GPUs + quality_control: '' + access: open license: Apache 2.0 - modality: - explanation: outputs API from natural language input - value: other; other - model_card: '' + intended_uses: academic research + prohibited_uses: '' monitoring: '' + feedback: https://huggingface.co/TheBloke/koala-7B-GPTQ-4bit-128g/discussions + +- type: model name: Gorilla organization: Berkeley - prohibited_uses: '' - quality_control: No specific quality control is mentioned in model training, though - details on data processing and collection are provided in the paper. - size: 7B parameters (dense) + description: Gorilla is a finetuned LLaMA-based model that surpasses the performance + of GPT-4 on writing API calls. + created_date: 2023-05-24 + url: https://arxiv.org/pdf/2305.15334v1.pdf + model_card: '' + modality: outputs API from natural language input + analysis: Evaluated using AST sub-tree matching technique and compared to other + models in terms of API functionality accuracy. + size: 7B parameters + dependencies: [LLaMA, Gorilla document retriever] training_emissions: '' - training_hardware: '' training_time: '' - type: model - url: https://arxiv.org/pdf/2305.15334v1.pdf -- access: open - analysis: Evaluated on wide range of tasks using own evaluation benchmarks. - created_date: 2023-05-03 - dependencies: [RedPajama] - description: OpenLlama is an open source reproduction of Meta's LLaMA model. - feedback: '' - intended_uses: '' + training_hardware: '' + quality_control: No specific quality control is mentioned in model training, though + details on data processing and collection are provided in the paper. + access: open license: Apache 2.0 - modality: - explanation: text - value: text; text - model_card: '' + intended_uses: In conjunction with a LLM to improve its capability for using API + calls. + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: OpenLLaMA organization: Berkeley - prohibited_uses: '' - quality_control: '' - size: 17B parameters (dense) + description: OpenLlama is an open source reproduction of Meta's LLaMA model. + created_date: 2023-05-03 + url: https://github.com/openlm-research/open_llama + model_card: '' + modality: text + analysis: Evaluated on wide range of tasks using own evaluation benchmarks. + size: 17B parameters + dependencies: [RedPajama] training_emissions: unknown - training_hardware: '' training_time: unknown - type: model - url: https://github.com/openlm-research/open_llama + training_hardware: '' + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/bigcode.yaml b/assets/bigcode.yaml index cb03bed0..4a5362ea 100644 --- a/assets/bigcode.yaml +++ b/assets/bigcode.yaml @@ -1,79 +1,76 @@ --- -- access: open - analysis: Tested on several benchmarks, most notably Python benchmark HumanEval. - created_date: 2023-05-09 - dependencies: [The Stack] + +- type: model + name: StarCoder + organization: BigCode description: StarCoder is a Large Language Model for Code (Code LLM) trained on permissively licensed data from GitHub, including from 80+ programming languages, Git commits, GitHub issues, and Jupyter notebooks. - feedback: https://huggingface.co/bigcode/starcoder/discussions - intended_uses: With a Tech Assistant prompt and not as an instruction model given - training limitations. - license: Apache 2.0 - modality: - explanation: code (80+ programming languages) - value: code; code + created_date: 2023-05-09 + url: https://arxiv.org/pdf/2305.06161.pdf model_card: https://huggingface.co/bigcode/starcoder - monitoring: '' - name: StarCoder - organization: BigCode - prohibited_uses: '' + modality: code (80+ programming languages) + analysis: Tested on several benchmarks, most notably Python benchmark HumanEval. + size: 15.5B parameters + dependencies: [The Stack] + training_emissions: 16.68 tons of CO2eq + training_time: 2 days + training_hardware: 64 NVIDIA A100 GPUs quality_control: No specific quality control is mentioned in model training, though details on data processing and how the tokenizer was trained are provided in the paper. - size: 15.5B parameters (dense) - training_emissions: 16.68 tons of CO2eq - training_hardware: 64 NVIDIA A100 GPUs - training_time: 2 days - type: model - url: https://arxiv.org/pdf/2305.06161.pdf -- access: open - analysis: Evaluated on MultiPL-E system benchmarks. - created_date: 2023-02-24 - dependencies: [The Stack, BigCode Dataset] - description: Multilingual code model derived from findings of BigCode Project - analysis on Github stars' association to data quality. - feedback: '' - intended_uses: '' + access: open license: Apache 2.0 - modality: - explanation: code - value: code; code - model_card: '' + intended_uses: With a Tech Assistant prompt and not as an instruction model given + training limitations. + prohibited_uses: '' monitoring: '' + feedback: https://huggingface.co/bigcode/starcoder/discussions + +- type: model name: SantaCoder organization: BigCode - prohibited_uses: '' - quality_control: '' - size: 1.1B parameters (dense) + description: Multilingual code model derived from findings of BigCode Project + analysis on Github stars' association to data quality. + created_date: 2023-02-24 + url: https://arxiv.org/pdf/2301.03988.pdf + model_card: '' + modality: code + analysis: Evaluated on MultiPL-E system benchmarks. + size: 1.1B parameters + dependencies: [The Stack, BigCode Dataset] training_emissions: '' - training_hardware: 96 NVIDIA Tesla V100 GPUs training_time: 3.1 days - type: model - url: https://arxiv.org/pdf/2301.03988.pdf -- access: open - analysis: Evaluated models trained on The Stack on HumanEval and MBPP and compared - against similarly-sized models. - created_date: 2022-11-20 - datasheet: https://huggingface.co/datasets/bigcode/the-stack - dependencies: [GitHub] - description: The Stack is a 3.1 TB dataset consisting of permissively licensed - source code inteded for use in creating code LLMs. - excluded: '' - feedback: '' - included: '' - intended_uses: creating code LLMs + training_hardware: 96 NVIDIA Tesla V100 GPUs + quality_control: '' + access: open license: Apache 2.0 - modality: - explanation: code (358 programming languages) - value: code + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: dataset name: The Stack organization: BigCode - prohibited_uses: '' + description: The Stack is a 3.1 TB dataset consisting of permissively licensed + source code inteded for use in creating code LLMs. + created_date: 2022-11-20 + url: https://arxiv.org/pdf/2211.15533.pdf + datasheet: https://huggingface.co/datasets/bigcode/the-stack + modality: code (358 programming languages) + size: 3.1 TB + sample: [] + analysis: Evaluated models trained on The Stack on HumanEval and MBPP and compared + against similarly-sized models. + dependencies: [GitHub] + included: '' + excluded: '' quality_control: allowed users whose data were part of The Stack's training data to opt-out - sample: [] - size: 3.1 TB - type: dataset - url: https://arxiv.org/pdf/2211.15533.pdf + access: open + license: Apache 2.0 + intended_uses: creating code LLMs + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/bigscience.yaml b/assets/bigscience.yaml index ce03bf3f..21251e9d 100644 --- a/assets/bigscience.yaml +++ b/assets/bigscience.yaml @@ -1,148 +1,163 @@ --- -- access: - explanation: While not all of the ROOTS corpus is publicly accessible, the majority - is through [this form](https://docs.google.com/forms/d/e/1FAIpQLSdq50O1x4dkdGI4dwsmchFuNI0KCWEDiKUYxvd0r0_sl6FfAQ/viewform?usp=send_form) - value: open - analysis: https://huggingface.co/spaces/bigscience-data/roots-search - created_date: 2022-06-06 - datasheet: https://huggingface.co/spaces/bigscience-data/bigscience-corpus - dependencies: [] + +# Datasets +- type: dataset + name: ROOTS + # General + organization: BigScience description: The Responsible Open-science Open-collaboration Text Sources (ROOTS) corpus, a 1.6TB dataset spanning 59 languages that was used to train the 176-billion-parameter BigScience Large Open-science Open-access Multilingual (BLOOM) language model. - excluded: unknown - feedback: https://huggingface.co/spaces/bigscience-data/roots-search/discussions - included: See section 2 of the paper. - intended_uses: To empower large-scale monolingual and multilingual modeling projects - with both the data and the processing tools, as well as stimulate research around - this large multilingual corpus. + created_date: 2022-06-06 + url: https://openreview.net/forum?id=UoEw6KigkUn + datasheet: https://huggingface.co/spaces/bigscience-data/bigscience-corpus + modality: text (46 languages) and code (13 programming languages) + size: 1.6TB + sample: [https://huggingface.co/spaces/bigscience-data/roots-search] + analysis: https://huggingface.co/spaces/bigscience-data/roots-search + # Construction + dependencies: [] license: + value: custom explanation: Each constituent subset of the dataset will be released under the license that applies to it. ([See individual dataset page for specific license information](https://hf.co/bigscience-data)) - value: custom - modality: - explanation: text (46 languages) and code (13 programming languages) - value: code, text - monitoring: none - name: ROOTS - organization: BigScience - prohibited_uses: none + included: See section 2 of the paper. + excluded: unknown quality_control: Data collection involved merging and deduplicating searches to remove menus, HTML tags. Further, a quality improvement pipeline was implemented. - sample: [https://huggingface.co/spaces/bigscience-data/roots-search] - size: 1.6TB - type: dataset - url: https://openreview.net/forum?id=UoEw6KigkUn -- access: open - analysis: See the paper. + # Downstream + access: + value: open + explanation: While not all of the ROOTS corpus is publicly accessible, the majority + is through [this form](https://docs.google.com/forms/d/e/1FAIpQLSdq50O1x4dkdGI4dwsmchFuNI0KCWEDiKUYxvd0r0_sl6FfAQ/viewform?usp=send_form) + intended_uses: To empower large-scale monolingual and multilingual modeling projects + with both the data and the processing tools, as well as stimulate research around + this large multilingual corpus. + prohibited_uses: none + monitoring: none + feedback: https://huggingface.co/spaces/bigscience-data/roots-search/discussions + +- type: dataset + name: P3 + # General + organization: BigScience + description: The Public Pool of Prompts (P3) are prompts written in an unified + format use to train T0++. created_date: 2022-10-15 + url: https://arxiv.org/pdf/2110.08207.pdf datasheet: https://huggingface.co/datasets/bigscience/P3 + modality: text (English) + size: 2000 prompts + sample: [https://huggingface.co/datasets/bigscience/P3/viewer] + analysis: See the paper. + # Construction dependencies: [] - description: The Public Pool of Prompts (P3) are prompts written in an unified - format use to train T0++. - excluded: We conservatively decided not to prompt datasets that contain potentially - harmful content (for instance, datasets built on social media content). - feedback: Point of Contact is [Victor Sanh](https://huggingface.co/datasets/bigscience/P3) + license: Apache 2.0 included: The Public Pool of Prompts relies on the Hugging Face Dataset library. Any public dataset in the Datasets library can be prompted. We select the datasets that have at least one subset in English and excluded datasets containing (predominantly) non-natural language examples. + excluded: We conservatively decided not to prompt datasets that contain potentially + harmful content (for instance, datasets built on social media content). + quality_control: Data collection involved merging and deduplicating searches to + remove menus, HTML tags. Further, a quality improvement pipeline was implemented. + # Downstream + access: open intended_uses: Multitask finetuning of language models. - license: Apache 2.0 - modality: - explanation: text (English) - value: text + prohibited_uses: none monitoring: none - name: P3 + feedback: Point of Contact is [Victor Sanh](https://huggingface.co/datasets/bigscience/P3) + +- type: dataset + name: xP3 + # General organization: BigScience - prohibited_uses: none - quality_control: Data collection involved merging and deduplicating searches to - remove menus, HTML tags. Further, a quality improvement pipeline was implemented. - sample: [https://huggingface.co/datasets/bigscience/P3/viewer] - size: 2000 prompts - type: dataset - url: https://arxiv.org/pdf/2110.08207.pdf -- access: open - analysis: See the paper. - created_date: 2022-11-03 - datasheet: https://huggingface.co/datasets/bigscience/xP3 - dependencies: [P3] description: xP3 (Crosslingual Public Pool of Prompts) is a collection of prompts and datasets across 46 of languages & 16 NLP tasks. It is used for the training of BLOOMZ and mT0, multilingual language models capable of following human instructions in dozens of languages zero-shot. + created_date: 2022-11-03 + url: https://arxiv.org/abs/2211.01786 + datasheet: https://huggingface.co/datasets/bigscience/xP3 + modality: text (46 languages) and code (13 programming languages) + size: 9.4GB + sample: [https://huggingface.co/datasets/bigscience/xP3/viewer] + analysis: See the paper. + # Construction + dependencies: [P3] + license: Apache 2.0 + included: xP3 adds 28 multilingual datasets to P3 based on the P3 task taxonomy. excluded: We conservatively decided not to prompt datasets that contain potentially harmful content (for instance, datasets built on social media content). - feedback: Point of Contact is [Niklas Muennighoff](https://huggingface.co/datasets/bigscience/xP3) - included: xP3 adds 28 multilingual datasets to P3 based on the P3 task taxonomy. + quality_control: Data collection involved merging and deduplicating searches to + remove menus, HTML tags. Further, a quality improvement pipeline was implemented. + # Downstream + access: open intended_uses: Multitask finetuning of language models. - license: Apache 2.0 - modality: - explanation: text (46 languages) and code (13 programming languages) - value: code, text + prohibited_uses: none monitoring: none - name: xP3 + feedback: Point of Contact is [Niklas Muennighoff](https://huggingface.co/datasets/bigscience/xP3) + + +- type: model + name: T0++ organization: BigScience - prohibited_uses: none - quality_control: Data collection involved merging and deduplicating searches to - remove menus, HTML tags. Further, a quality improvement pipeline was implemented. - sample: [https://huggingface.co/datasets/bigscience/xP3/viewer] - size: 9.4GB - type: dataset - url: https://arxiv.org/abs/2211.01786 -- access: open - analysis: '' + description: T0++ is an multitask fine-tuned language model based on T5. created_date: - explanation: The date the paper was posted to arXiv. value: 2021-10-15 - dependencies: [T5, P3] - description: T0++ is an multitask fine-tuned language model based on T5. - feedback: https://huggingface.co/bigscience/T0pp/discussions - intended_uses: You can use the models to perform inference on tasks by specifying - your query in natural language, and the models will generate a prediction. - license: Apache 2.0 - modality: - explanation: text (English) - value: text; text + explanation: The date the paper was posted to arXiv. + url: https://arxiv.org/pdf/2110.08207.pdf model_card: https://huggingface.co/bigscience/T0pp - monitoring: none - name: T0++ - organization: BigScience - prohibited_uses: none - quality_control: https://arxiv.org/pdf/2110.08207.pdf - size: 11B parameters (dense) + modality: text (English) + size: 11B parameters (dense model) + analysis: '' + dependencies: [T5, P3] training_emissions: - explanation: Sourced from estimates in https://arxiv.org/pdf/2110.08207.pdf value: 0.9 tCO2e - training_hardware: Jean Zay (v3-512) - training_time: explanation: Sourced from estimates in https://arxiv.org/pdf/2110.08207.pdf + training_time: value: 27 hours - type: model - url: https://arxiv.org/pdf/2110.08207.pdf -- access: open - analysis: '' + explanation: Sourced from estimates in https://arxiv.org/pdf/2110.08207.pdf + training_hardware: Jean Zay (v3-512) + quality_control: https://arxiv.org/pdf/2110.08207.pdf + access: open + license: Apache 2.0 + intended_uses: You can use the models to perform inference on tasks by specifying + your query in natural language, and the models will generate a prediction. + prohibited_uses: none + monitoring: none + feedback: https://huggingface.co/bigscience/T0pp/discussions + +- type: model + name: BLOOM + organization: BigScience + description: BLOOM is an autoregressive multilingual language model. created_date: - explanation: The date the model was released value: 2022-07-12 + explanation: The date the model was released + url: https://arxiv.org/abs/2211.05100 + model_card: '' + modality: text (46 languages) and code (13 programming languages) + size: 176B parameters (dense model) + analysis: '' dependencies: [ROOTS] - description: BLOOM is an autoregressive multilingual language model. - feedback: https://huggingface.co/bigscience/bloom/discussions + training_emissions: + value: 25 tCO2e + explanation: Sourced from estimates in https://arxiv.org/abs/2211.02001 + training_time: + value: 7039 petaflop/s-days + explanation: 1082990 A100 hours at 156 TFLOP/s maximum utilization + training_hardware: Jean Zay (48 * 8xA100 80GB nodes) + quality_control: '' + access: open + license: + value: BigScience RAIL v1.0 + explanation: Model is licensed under https://huggingface.co/spaces/bigscience/license intended_uses: This model is being created in order to enable public research on large language models (LLMs). LLMs are intended to be used for language generation or as a pretrained base model that can be further fine-tuned for specific tasks. Use cases below are not exhaustive. - license: - explanation: Model is licensed under https://huggingface.co/spaces/bigscience/license - value: BigScience RAIL v1.0 - modality: - explanation: text (46 languages) and code (13 programming languages) - value: code, text; code, text - model_card: '' - monitoring: none - name: BLOOM - organization: BigScience prohibited_uses: Using the model in high-stakes settings is out of scope for this model (e.g. biomedical/political/legal/finance domains, evaluating or scoring individuals). The model is not designed for critical decisions nor uses with @@ -151,72 +166,61 @@ using the model for harm, violating human rights, or other kinds of malicious activities, is a misuse of this model (e.g. spam generation, disinformation, disparagement, deception, surveillance). - quality_control: '' - size: 176B parameters (dense) - training_emissions: - explanation: Sourced from estimates in https://arxiv.org/abs/2211.02001 - value: 25 tCO2e - training_hardware: Jean Zay (48 * 8xA100 80GB nodes) - training_time: - explanation: 1082990 A100 hours at 156 TFLOP/s maximum utilization - value: 7039 petaflop/s-days - type: model - url: https://arxiv.org/abs/2211.05100 -- access: open - analysis: https://huggingface.co/bigscience/bloomz#evaluation - created_date: - explanation: The date the model was released - value: 2021-10-15 - dependencies: [mT5, xP3] - description: mT0 is an multitask fine-tuned multilingual language model based - on mT5. - feedback: https://huggingface.co/bigscience/bloomz/discussions - intended_uses: We recommend using the model to perform tasks expressed in natural - language. - license: - explanation: Model is licensed under https://huggingface.co/spaces/bigscience/license - value: BigScience RAIL v1.0 - modality: - explanation: text (English) - value: text; text - model_card: https://huggingface.co/bigscience/T0pp monitoring: none + feedback: https://huggingface.co/bigscience/bloom/discussions + +- type: model name: mT0 organization: BigScience - prohibited_uses: none - quality_control: https://arxiv.org/pdf/2211.01786.pdf - size: 13B parameters (dense) + description: mT0 is an multitask fine-tuned multilingual language model based + on mT5. + created_date: + value: 2021-10-15 + explanation: The date the model was released + url: https://arxiv.org/pdf/2110.08207.pdf + model_card: https://huggingface.co/bigscience/T0pp + modality: text (English) + size: 13B parameters (dense model) + analysis: https://huggingface.co/bigscience/bloomz#evaluation + dependencies: [mT5, xP3] training_emissions: unknown + training_time: unknown training_hardware: Jean Zay (288 A100 80GB GPUs with 8 GPUs per node (36 nodes) using NVLink 4 inter-gpu connects, 4 OmniPath links) - training_time: unknown - type: model - url: https://arxiv.org/pdf/2110.08207.pdf -- access: open - analysis: https://huggingface.co/bigscience/bloomz#evaluation - created_date: 2022-11-03 - dependencies: [BLOOM, xP3] - description: BLOOMZ is an multitask fine-tuned autoregressive multilingual language - model. - feedback: https://huggingface.co/bigscience/bloomz/discussions - intended_uses: We recommend using the model to perform tasks expressed in natural - language. + quality_control: https://arxiv.org/pdf/2211.01786.pdf + access: open license: - explanation: Model is licensed under https://huggingface.co/spaces/bigscience/license value: BigScience RAIL v1.0 - modality: - explanation: text (46 languages) and code (13 programming languages) - value: code, text; code, text - model_card: https://huggingface.co/bigscience/bloomz + explanation: Model is licensed under https://huggingface.co/spaces/bigscience/license + intended_uses: We recommend using the model to perform tasks expressed in natural + language. + prohibited_uses: none monitoring: none + feedback: https://huggingface.co/bigscience/bloomz/discussions + +- type: model name: BLOOMZ organization: BigScience - prohibited_uses: none - quality_control: https://arxiv.org/pdf/2211.01786.pdf - size: 176B parameters (dense) + description: BLOOMZ is an multitask fine-tuned autoregressive multilingual language + model. + created_date: 2022-11-03 + url: https://arxiv.org/pdf/2211.01786.pdf + model_card: https://huggingface.co/bigscience/bloomz + modality: text (46 languages) and code (13 programming languages) + size: 176B parameters (dense model) + analysis: https://huggingface.co/bigscience/bloomz#evaluation + dependencies: [BLOOM, xP3] training_emissions: unknown + training_time: unknown training_hardware: Jean Zay (288 A100 80GB GPUs with 8 GPUs per node (36 nodes) using NVLink 4 inter-gpu connects, 4 OmniPath links) - training_time: unknown - type: model - url: https://arxiv.org/pdf/2211.01786.pdf + quality_control: https://arxiv.org/pdf/2211.01786.pdf + access: open + license: + value: BigScience RAIL v1.0 + explanation: Model is licensed under https://huggingface.co/spaces/bigscience/license + intended_uses: We recommend using the model to perform tasks expressed in natural + language. + prohibited_uses: none + monitoring: none + feedback: https://huggingface.co/bigscience/bloomz/discussions diff --git a/assets/bloomberg.yaml b/assets/bloomberg.yaml index e6a4b58a..352f03f9 100644 --- a/assets/bloomberg.yaml +++ b/assets/bloomberg.yaml @@ -1,126 +1,137 @@ --- -- access: - explanation: See [[Openness]](https://arxiv.org/pdf/2303.17564.pdf#subsection.8.2) - value: closed - analysis: '' + +- type: dataset + name: FinPile + # General + organization: Bloomberg + description: A comprehensive dataset consisting of a range of English financial + documents including news, filings, press releases, web-scraped financial documents, + and social media drawn from the Bloomberg archives that was used to train the + BloombergGPT model. created_date: - explanation: The date BloombergGPT was announced in the [[Bloomberg article]](https://www.bloomberg.com/company/press/bloomberggpt-50-billion-parameter-llm-tuned-finance/). value: 2023-03-30 + explanation: The date BloombergGPT was announced in the [[Bloomberg article]](https://www.bloomberg.com/company/press/bloomberggpt-50-billion-parameter-llm-tuned-finance/). + url: https://arxiv.org/pdf/2303.17564.pdf#section.2 datasheet: - explanation: Section 2 of the BloombergGPT paper. value: https://arxiv.org/pdf/2303.17564.pdf#section.2 + explanation: Section 2 of the BloombergGPT paper. + modality: text (English) + size: + value: 363B tokens + explanation: See [[Financial Datasets]](https://arxiv.org/pdf/2303.17564.pdf#subsection.2.1) + sample: [] + analysis: '' + # Construction dependencies: [] - description: A comprehensive dataset consisting of a range of English financial - documents including news, filings, press releases, web-scraped financial documents, - and social media drawn from the Bloomberg archives that was used to train the - BloombergGPT model. - excluded: '' - feedback: '' + license: + value: unknown + explanation: > + The asset isn't released, and hence the license is unknown. included: + value: | + FinPile consists of English financial documents. Authors utilize the The Bloomberg + Terminal, which is an extensive collection of curated and maintained documents, + to create the FinPile dataset. Each document in FinPile is time-stamped, with + dates ranging from 2007-03-01 to 2022-07-31. + Types of data included are given below: + 1. Web (298B tokens) - Inclues Bloomberg's web crawl focused on high-quality + websites that have financially relevant information. This makes up the majority + of FinPile. + 2. News (38B tokens) - Includes all news sources relevant to the financial + community, excluding news articles written by Bloomberg journalists. Overall, + there are hundreds of English news sources in FinPile including "Bloomberg + Transcripts", which are transcripts of Bloomberg TV news. + 3. Filings (14B tokens) - Includes financial statements prepared by (public) + companies and made available to the general public. In the dataset, a majority + of the filings come from EDGAR, which is the SEC's online database. + 4. Press (9B tokens) - Includes press releases typically issued by companies + that are financially relevant. + 5. Bloomberg (5B tokens) - Includes Bloomberg authored news and other documents + such as opinions and analyses. The largest sources are “Bloomberg News” and + “Bloomberg First Word”, the Bloomberg-authored wire of real-time news. explanation: See [[Financial Datasets]](https://arxiv.org/pdf/2303.17564.pdf#subsection.2.1) - value: "FinPile consists of English financial documents. Authors utilize the\ - \ The Bloomberg\nTerminal, which is an extensive collection of curated and\ - \ maintained documents,\nto create the FinPile dataset. Each document in FinPile\ - \ is time-stamped, with\ndates ranging from 2007-03-01 to 2022-07-31.\nTypes\ - \ of data included are given below:\n 1. Web (298B tokens) - Inclues Bloomberg's\ - \ web crawl focused on high-quality\nwebsites that have financially relevant\ - \ information. This makes up the majority\nof FinPile.\n 2. News (38B tokens)\ - \ - Includes all news sources relevant to the financial\ncommunity, excluding\ - \ news articles written by Bloomberg journalists. Overall,\nthere are hundreds\ - \ of English news sources in FinPile including \"Bloomberg\nTranscripts\"\ - , which are transcripts of Bloomberg TV news.\n 3. Filings (14B tokens) -\ - \ Includes financial statements prepared by (public)\ncompanies and made available\ - \ to the general public. In the dataset, a majority\nof the filings come\ - \ from EDGAR, which is the SEC's online database.\n 4. Press (9B tokens)\ - \ - Includes press releases typically issued by companies\nthat are financially\ - \ relevant.\n 5. Bloomberg (5B tokens) - Includes Bloomberg authored news\ - \ and other documents\nsuch as opinions and analyses. The largest sources\ - \ are “Bloomberg News” and\n“Bloomberg First Word”, the Bloomberg-authored\ - \ wire of real-time news.\n" + excluded: '' + quality_control: '' + # Downstream + access: + value: closed + explanation: See [[Openness]](https://arxiv.org/pdf/2303.17564.pdf#subsection.8.2) intended_uses: Used to train the BloombergGPT model. - license: - explanation: "The asset isn't released, and hence the license is unknown.\n" - value: unknown - modality: - explanation: text (English) - value: text + prohibited_uses: '' monitoring: '' - name: FinPile + feedback: '' + +- type: model + name: BloombergGPT + # General organization: Bloomberg - prohibited_uses: '' - quality_control: '' - sample: [] - size: - explanation: See [[Financial Datasets]](https://arxiv.org/pdf/2303.17564.pdf#subsection.2.1) - value: 363B tokens - type: dataset - url: https://arxiv.org/pdf/2303.17564.pdf#section.2 -- access: - explanation: See [[Openness]](https://arxiv.org/pdf/2303.17564.pdf#subsection.8.2) - value: closed - analysis: - explanation: See [[Evaluation]](https://arxiv.org/pdf/2303.17564.pdf#section.5) - value: "Authors evaluate the performance of BloombergGPT on two broad categories\ - \ of tasks, finance-specific and general purpose, on several standard benchmarks.\ - \ They compare BloombergGPT to the three closest models: GPT-NeoX, OPT-66B\ - \ and BLOOM-176B. They also report results from the original GPT-3 whenever\ - \ externally available. They conclude \"We achieve strong results on general\ - \ LLM benchmarks and outperform comparable models on financial tasks. We attribute\ - \ this, in decreasing order of impact, to 1. a well-curated internal dataset,\ - \ 2. our unique choice in tokenizer, and 3. an up-to-date architecture.\"\n" - created_date: - explanation: The date the model was announced in the [[Bloomberg article]](https://www.bloomberg.com/company/press/bloomberggpt-50-billion-parameter-llm-tuned-finance/). - value: 2023-03-30 - dependencies: [FinPile, The Pile, C4, Wikipedia] description: BloombergGPT is a 50 billion parameter large language model that is specifically trained on a wide range of financial data to support a diverse set of natural language processing tasks within the financial industry. - feedback: '' - intended_uses: - explanation: See [[Bloomberg article]](https://www.bloomberg.com/company/press/bloomberggpt-50-billion-parameter-llm-tuned-finance/) - value: "\"This model will assist Bloomberg in improving existing financial NLP\ - \ tasks, such as sentiment analysis, named entity recognition, news classification,\ - \ and question answering, among others. Furthermore, BloombergGPT will unlock\ - \ new opportunities for marshalling the vast quantities of data available\ - \ on the Bloomberg Terminal to better help the firm's customers, while bringing\ - \ the full potential of AI to the financial domain.\"\n" - license: - explanation: "The asset isn't released, and hence the license is unknown.\n" - value: unknown - modality: - explanation: text (English) - value: text; text + created_date: + value: 2023-03-30 + explanation: The date the model was announced in the [[Bloomberg article]](https://www.bloomberg.com/company/press/bloomberggpt-50-billion-parameter-llm-tuned-finance/). + url: https://arxiv.org/abs/2303.17564 model_card: none - monitoring: '' - name: BloombergGPT - organization: Bloomberg - prohibited_uses: '' - quality_control: - explanation: See [[Ethics, Limitations, and Implications]](https://arxiv.org/pdf/2303.17564.pdf#section.8) - value: "Authors state the following:\n- \"To provide natural language applications\ - \ to the financial community, we\n have developed a rigorous risk and testing\ - \ assessment process. This process\n includes careful annotation guidelines\ - \ Tseng et al. (2020), pre-launch review\n at multiple levels by the central\ - \ risk and compliance organizations, and\n by the product leaders (e.g.,\ - \ the newsroom) as applicable, and post-launch\n monitoring. Moreover, we\ - \ conduct our research, development, and deployment\n of NLP and AI systems\ - \ in accordance with all applicable regulations.\"\n- \"Similarly, toxicity\ - \ and bias are areas where, as a company, we take extraordinary\n care with\ - \ any content we produce, whether from humans or machines. Since\n the measurement\ - \ of toxicity and bias in our model depends on its application\n areas, quantifying\ - \ the potential for the generation of harmful language\n remains an open\ - \ question. We are particularly interested in studying whether\n FinPile,\ - \ which is cleaner and contains fewer examples of overtly biased\n or toxic\ - \ language (e.g., Press Releases), reduces the proclivity of the\n model\ - \ to generate inappropriate content.\"\n" - size: 50B parameters (dense) + modality: text (English) + analysis: + value: > + Authors evaluate the performance of BloombergGPT on two broad categories of + tasks, finance-specific and general purpose, on several standard benchmarks. + They compare BloombergGPT to the three closest models: GPT-NeoX, OPT-66B and + BLOOM-176B. They also report results from the original GPT-3 whenever externally + available. They conclude "We achieve strong results on general LLM benchmarks + and outperform comparable models on financial tasks. We attribute this, in + decreasing order of impact, to 1. a well-curated internal dataset, 2. our + unique choice in tokenizer, and 3. an up-to-date architecture." + explanation: See [[Evaluation]](https://arxiv.org/pdf/2303.17564.pdf#section.5) + size: 50B parameters (dense model) + # Construction + dependencies: [FinPile, The Pile, C4, Wikipedia] training_emissions: unknown + training_time: + value: 53 days + explanation: See [[Training Run]](https://arxiv.org/pdf/2303.17564.pdf#section.4) training_hardware: - explanation: See [[Training Configuration]](https://arxiv.org/pdf/2303.17564.pdf#subsection.3.3) value: 64 Amazon EC2 p4d.24xlarge instances each with 8 NVIDIA 40GB A100 GPUs (i.e. total 512 A100 GPUs) - training_time: - explanation: See [[Training Run]](https://arxiv.org/pdf/2303.17564.pdf#section.4) - value: 53 days - type: model - url: https://arxiv.org/abs/2303.17564 + explanation: See [[Training Configuration]](https://arxiv.org/pdf/2303.17564.pdf#subsection.3.3) + quality_control: + value: | + Authors state the following: + - "To provide natural language applications to the financial community, we + have developed a rigorous risk and testing assessment process. This process + includes careful annotation guidelines Tseng et al. (2020), pre-launch review + at multiple levels by the central risk and compliance organizations, and + by the product leaders (e.g., the newsroom) as applicable, and post-launch + monitoring. Moreover, we conduct our research, development, and deployment + of NLP and AI systems in accordance with all applicable regulations." + - "Similarly, toxicity and bias are areas where, as a company, we take extraordinary + care with any content we produce, whether from humans or machines. Since + the measurement of toxicity and bias in our model depends on its application + areas, quantifying the potential for the generation of harmful language + remains an open question. We are particularly interested in studying whether + FinPile, which is cleaner and contains fewer examples of overtly biased + or toxic language (e.g., Press Releases), reduces the proclivity of the + model to generate inappropriate content." + explanation: See [[Ethics, Limitations, and Implications]](https://arxiv.org/pdf/2303.17564.pdf#section.8) + # Downstream + access: + value: closed + explanation: See [[Openness]](https://arxiv.org/pdf/2303.17564.pdf#subsection.8.2) + license: + value: unknown + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: + value: > + "This model will assist Bloomberg in improving existing financial NLP + tasks, such as sentiment analysis, named entity recognition, news classification, + and question answering, among others. Furthermore, BloombergGPT will unlock + new opportunities for marshalling the vast quantities of data available on + the Bloomberg Terminal to better help the firm's customers, while bringing + the full potential of AI to the financial domain." + explanation: See [[Bloomberg article]](https://www.bloomberg.com/company/press/bloomberggpt-50-billion-parameter-llm-tuned-finance/) + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/brex.yaml b/assets/brex.yaml index 76d01b3e..374f6842 100644 --- a/assets/brex.yaml +++ b/assets/brex.yaml @@ -1,28 +1,30 @@ --- -- access: limited - adaptation: '' - created_date: 2023-03-07 - dependencies: [ChatGPT API] + +- type: application + name: Brex Chat + organization: Brex description: Brex Inc., a highly valued startup that makes software for finance professionals, is turning to the same artificial intelligence tool behind ChatGPT for a service that can answer questions about corporate budgets, policy and spending. - failures: '' - feedback: '' - intended_uses: '' + created_date: 2023-03-07 + url: https://fortune.com/2023/03/07/cfo-chatbot-chatgpt-ai-brex-finance-software-startup-budgets-policies/ + dependencies: [ChatGPT API] + adaptation: '' + output_space: '' + quality_control: '' + access: limited license: - explanation: "\"Brex grants you a nonexclusive and nontransferable license to\ - \ use Brex Property as provided through the Services and as permitted by these\ - \ User Terms.\" - excerpt from the Terms of Service.\n" value: custom + explanation: > + "Brex grants you a nonexclusive and nontransferable license to use Brex + Property as provided through the Services and as permitted by these User + Terms." - excerpt from the Terms of Service. + terms_of_service: https://www.brex.com/legal/user-terms + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' monthly_active_users: '' - name: Brex Chat - organization: Brex - output_space: '' - prohibited_uses: '' - quality_control: '' - terms_of_service: https://www.brex.com/legal/user-terms - type: application - url: https://fortune.com/2023/03/07/cfo-chatbot-chatgpt-ai-brex-finance-software-startup-budgets-policies/ user_distribution: '' + failures: '' diff --git a/assets/casia.yaml b/assets/casia.yaml index 4190e5a9..0634f801 100644 --- a/assets/casia.yaml +++ b/assets/casia.yaml @@ -1,26 +1,25 @@ --- -- access: open - analysis: Reports results on standard translation benchmarks across 102 languages - in comparison with Google Translate and ChatGPT - created_date: 2023-05-29 - dependencies: [LLaMA, CLUE, BigTrans parallel dataset] + +- type: model + name: BigTrans + organization: Institute of Automation Chinese Academy of Sciences description: BigTrans is a model which adapts LLaMA that covers only 20 languages and enhances it with multilingual translation capability on more than 100 languages - feedback: https://huggingface.co/James-WYang/BigTrans/discussions - intended_uses: Advancing future research in multilingual LLMs - license: Apache 2.0 - modality: - explanation: text - value: text; text + created_date: 2023-05-29 + url: https://arxiv.org/pdf/2305.18098v1.pdf model_card: https://huggingface.co/James-WYang/BigTrans - monitoring: '' - name: BigTrans - organization: Institute of Automation Chinese Academy of Sciences - prohibited_uses: '' - quality_control: '' - size: 13B parameters (dense) + modality: text + analysis: Reports results on standard translation benchmarks across 102 languages + in comparison with Google Translate and ChatGPT + size: 13B parameters (dense model) + dependencies: [LLaMA, CLUE, BigTrans parallel dataset] training_emissions: unknown - training_hardware: 16 A100 GPUs with 80 GB of RAM training_time: unknown - type: model - url: https://arxiv.org/pdf/2305.18098v1.pdf + training_hardware: 16 A100 GPUs with 80 GB of RAM + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: Advancing future research in multilingual LLMs + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/James-WYang/BigTrans/discussions diff --git a/assets/cerebras.yaml b/assets/cerebras.yaml index 421cd8a4..636a9f70 100644 --- a/assets/cerebras.yaml +++ b/assets/cerebras.yaml @@ -1,59 +1,76 @@ --- -- access: - explanation: "The Pile is an open source dataset. Hugging Face compatible checkpoints\ - \ available on the [[Cerebras Hugging Face page]](https://huggingface.co/cerebras/Cerebras-GPT-13B).\ - \ Cerebras systems checkpoints for pre-training and fine tuning are available\ - \ in the cloud via the [[Cerebras Model Studio]](https://www.cerebras.net/product-cloud/).\n" - value: open - analysis: "\"We evaluate our models on the PILE validation set comprising 380M\ - \ tokens. We also evaluate the public checkpoints of Pythia, Eleuther (2022);\ - \ OPT, Zhang et al. (2022); GPT-NeoX 20B, Black et al. (2022); and GPT-J 6B,\ - \ Wang & Komatsuzaki (2021). We performed upstream (pre-training) evaluations\ - \ of text prediction cross-entropy using the Pile validation and test splits.\ - \ We performed downstream evaluations of text generation accuracy on standardized\ - \ tasks using the Eleuther lm-evaluation-harness.\" [[Evaluations]] (https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#evaluations).\n" + +- type: model + name: Cerebras-GPT + # General + organization: Cerebras + description: > + A Family of Open, Compute-efficient, Large Language Models. The family includes + 111M, 256M, 590M, 1.3B, 2.7B, 6.7B, and 13B models. All models in the Cerebras-GPT + family have been trained in accordance with Chinchilla scaling laws (20 tokens + per model parameter). + [[Cerebras Blog Post]](https://www.cerebras.net/blog/cerebras-gpt-a-family-of-open-compute-efficient-large-language-models) created_date: - explanation: "The date the model was announced in the [[Cerebras blog post]](https://www.cerebras.net/blog/cerebras-gpt-a-family-of-open-compute-efficient-large-language-models).\n" value: 2023-03-28 - dependencies: [The Pile] - description: "A Family of Open, Compute-efficient, Large Language Models. The\ - \ family includes 111M, 256M, 590M, 1.3B, 2.7B, 6.7B, and 13B models. All models\ - \ in the Cerebras-GPT family have been trained in accordance with Chinchilla\ - \ scaling laws (20 tokens per model parameter). [[Cerebras Blog Post]](https://www.cerebras.net/blog/cerebras-gpt-a-family-of-open-compute-efficient-large-language-models)\n" - feedback: '' - intended_uses: "\"The primary intended use is to further research into large language\ - \ models. These models can be used as a foundation model for NLP, applications,\ - \ ethics, and alignment research. Our primary intended users are researchers\ - \ who are working to improve LLMs and practitioners seeking reference implementations,\ - \ training setups, hyperparameters, or pre-trained models. We release these\ - \ models with a fully permissive Apache license for the community to use freely.\"\ - \ [[Uses and Limitations]](https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#uses-and-limitations).\n" - license: Apache 2.0 - modality: - explanation: text (English) - value: text; text + explanation: > + The date the model was announced in the [[Cerebras blog post]](https://www.cerebras.net/blog/cerebras-gpt-a-family-of-open-compute-efficient-large-language-models). + url: https://www.cerebras.net/blog/cerebras-gpt-a-family-of-open-compute-efficient-large-language-models/ model_card: https://huggingface.co/cerebras/Cerebras-GPT-13B - monitoring: '' - name: Cerebras-GPT - organization: Cerebras - prohibited_uses: "Authors note the following limitations of the model: \"Cerebras-GPT\ - \ models are trained on the Pile, with English language only, and are not suitable\ - \ for machine translation tasks. Cerebras-GPT models have not been tuned for\ - \ human-facing dialog applications like chatbots and will not respond to prompts\ - \ in a similar way to models that have received instruction tuning or reinforcement\ - \ learning from human feedback (RLHF) like Flan-T5 or ChatGPT.\" [[Uses and\ - \ Limitations]](https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#out-of-scope-use).\n" - quality_control: "The Pile dataset has been thoroughly analyzed from various ethical\ - \ standpoints such as toxicity analysis, gender bias, pejorative content, racially\ - \ sensitive content etc. Only mitigations in standard Pile dataset pre-processing\ - \ were employed when pre-training Cerebras-GPT. [[Risk, Bias, Ethical Considerations]](https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#risk-bias-ethical-considerations)\n" - size: 13B parameters (dense) + modality: text (English) + size: + value: 13B parameters (dense model) + explanation: Size of the largest model in the Cerebras-GPT family. + analysis: > + "We evaluate our models on the PILE validation set comprising 380M tokens. We + also evaluate the public checkpoints of Pythia, Eleuther (2022); OPT, Zhang + et al. (2022); GPT-NeoX 20B, Black et al. (2022); and GPT-J 6B, Wang & Komatsuzaki + (2021). We performed upstream (pre-training) evaluations of text prediction + cross-entropy using the Pile validation and test splits. We performed downstream + evaluations of text generation accuracy on standardized tasks using the Eleuther + lm-evaluation-harness." + [[Evaluations]] + (https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#evaluations). + # Construction + dependencies: [The Pile] training_emissions: '' - training_hardware: - explanation: "According to [[Model Description]](https://huggingface.co/cerebras/Cerebras-GPT-13B#model-description)\n" - value: 16x Cerebras CS-2 wafer scale systems training_time: - explanation: '' value: '' - type: model - url: https://www.cerebras.net/blog/cerebras-gpt-a-family-of-open-compute-efficient-large-language-models/ + explanation: '' + training_hardware: + value: 16x Cerebras CS-2 wafer scale systems + explanation: > + According to [[Model Description]](https://huggingface.co/cerebras/Cerebras-GPT-13B#model-description) + quality_control: > + The Pile dataset has been thoroughly analyzed from various ethical standpoints + such as toxicity analysis, gender bias, pejorative content, racially sensitive + content etc. Only mitigations in standard Pile dataset pre-processing were employed + when pre-training Cerebras-GPT. + [[Risk, Bias, Ethical Considerations]](https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#risk-bias-ethical-considerations) + # Downstream + access: + value: open + explanation: > + The Pile is an open source dataset. + Hugging Face compatible checkpoints available on the [[Cerebras Hugging Face + page]](https://huggingface.co/cerebras/Cerebras-GPT-13B). + Cerebras systems checkpoints for pre-training and fine tuning are available + in the cloud via the [[Cerebras Model Studio]](https://www.cerebras.net/product-cloud/). + license: Apache 2.0 + intended_uses: > + "The primary intended use is to further research into large language models. + These models can be used as a foundation model for NLP, applications, ethics, + and alignment research. Our primary intended users are researchers who are working + to improve LLMs and practitioners seeking reference implementations, training + setups, hyperparameters, or pre-trained models. We release these models with + a fully permissive Apache license for the community to use freely." + [[Uses and Limitations]](https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#uses-and-limitations). + prohibited_uses: > + Authors note the following limitations of the model: "Cerebras-GPT models are + trained on the Pile, with English language only, and are not suitable for machine + translation tasks. Cerebras-GPT models have not been tuned for human-facing + dialog applications like chatbots and will not respond to prompts in a similar + way to models that have received instruction tuning or reinforcement learning + from human feedback (RLHF) like Flan-T5 or ChatGPT." + [[Uses and Limitations]](https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#out-of-scope-use). + monitoring: '' + feedback: '' diff --git a/assets/chatglm.yaml b/assets/chatglm.yaml index 7005d126..5bb6d82e 100644 --- a/assets/chatglm.yaml +++ b/assets/chatglm.yaml @@ -1,25 +1,24 @@ --- -- access: open - analysis: Performance evaluated on English and Chinese language benchmark tests. - created_date: 2023-03-14 - dependencies: [] + +- type: model + name: ChatGLM + organization: ChatGLM description: ChatGLM is a Chinese-English language model with question and answer and dialogue functions, and is aimed at a Chinese audience. - feedback: '' - intended_uses: '' - license: Apache 2.0 - modality: - explanation: text - value: text; text + created_date: 2023-03-14 + url: https://chatglm.cn/blog model_card: '' - monitoring: '' - name: ChatGLM - organization: ChatGLM - prohibited_uses: '' - quality_control: '' - size: 6B parameters (dense) + modality: text + analysis: Performance evaluated on English and Chinese language benchmark tests. + size: 6B parameters + dependencies: [] training_emissions: unknown - training_hardware: '' training_time: unknown - type: model - url: https://chatglm.cn/blog + training_hardware: '' + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/cmu.yaml b/assets/cmu.yaml index d5085ad0..ceda272f 100644 --- a/assets/cmu.yaml +++ b/assets/cmu.yaml @@ -1,35 +1,34 @@ --- -- access: - explanation: Model checkpoints are available for download at https://github.com/VHellendoorn/Code-LMs - value: open - analysis: Reports results on standard code benchmarks across a variety of programming - languages. - created_date: - explanation: The date the model paper was released - value: 2022-02-26 - dependencies: [Github] + +- type: model + name: PolyCoder + organization: CMU description: PolyCoder is a code model trained on 2.7B parameters based on the GPT-2 architecture, which was trained on 249GB of code across 12 programming languages on a single machine. - feedback: https://huggingface.co/NinedayWang/PolyCoder-2.7B/discussion - intended_uses: unknown - license: - explanation: The license is provided in the [[Github repository]](https://github.com/VHellendoorn/Code-LMs) - value: MIT - modality: - explanation: code (12 programming languages) - value: code; code + created_date: + value: 2022-02-26 + explanation: The date the model paper was released + url: https://arxiv.org/abs/2202.13169 model_card: https://huggingface.co/NinedayWang/PolyCoder-2.7B - monitoring: None - name: PolyCoder - organization: CMU - prohibited_uses: None + modality: code (12 programming languages) + analysis: Reports results on standard code benchmarks across a variety of programming + languages. + size: 2.7B parameters (dense model) + dependencies: [Github] + training_emissions: unknown + training_time: 6 weeks + training_hardware: 8 NVIDIA RTX 8000 quality_control: No specific quality control is mentioned in model training, though details on data processing and how the tokenizer was trained are provided in the paper. - size: 2.7B parameters (dense) - training_emissions: unknown - training_hardware: 8 NVIDIA RTX 8000 - training_time: 6 weeks - type: model - url: https://arxiv.org/abs/2202.13169 + access: + value: open + explanation: Model checkpoints are available for download at https://github.com/VHellendoorn/Code-LMs + license: + value: MIT + explanation: The license is provided in the [[Github repository]](https://github.com/VHellendoorn/Code-LMs) + intended_uses: unknown + prohibited_uses: None + monitoring: None + feedback: https://huggingface.co/NinedayWang/PolyCoder-2.7B/discussion diff --git a/assets/cohere.yaml b/assets/cohere.yaml index 9051aa78..21c3379a 100644 --- a/assets/cohere.yaml +++ b/assets/cohere.yaml @@ -1,460 +1,657 @@ --- -- access: - explanation: The dataset isn't provided to the public. - value: closed - analysis: - explanation: The analysis performed on the dataset is not released. - value: unknown + +- type: dataset + name: coheretext + # General + organization: Cohere + description: > + The family of datasets used to train Cohere models, which come in two forms: + coheretext-filtered and coheretext-unfiltered. The former is used to train the + Representation models, while the latter one is used to train the Generation + models. created_date: - explanation: "The date the Cohere API was announced on the news [[News Article]]\ - \ (https://venturebeat.com/2021/11/15/openai-rival-cohere-launches-language-model-api/).\n" value: 2021-11-15 + explanation: > + The date the Cohere API was announced on the news + [[News Article]] + (https://venturebeat.com/2021/11/15/openai-rival-cohere-launches-language-model-api/). + url: none datasheet: https://docs.cohere.ai/data-statement - dependencies: [] - description: "The family of datasets used to train Cohere models, which come in\ - \ two forms: coheretext-filtered and coheretext-unfiltered. The former is used\ - \ to train the Representation models, while the latter one is used to train\ - \ the Generation models.\n" - excluded: "Documents that are not in English are excluded.\n" - feedback: - explanation: The internal feedback mechanisms for Cohere are unknown. + modality: text (English) + size: + value: 200 GB + explanation: > + Size of the filtered dataset is 200 GB. The unfiltered dataset is ~3 TB. + sample: [] + analysis: value: unknown - included: "As stated in the datasheet, the dataset \"includes the Google Books\ - \ dataset, CommonCrawl, and text from the internet scraped by the Cohere infrastructure\ - \ team.\" The top ten domains scraped were: wordpress.com, medium.com, stackexchange.com,\ - \ tumblr.com, elsevier.com, genius.com, bbc.co.uk, libsyn.com, yahoo.com, nytimes.com\ - \ [[Datasheet]](https://docs.cohere.ai/data-statement).\n" - intended_uses: "The intended use of the dataset is to train Cohere's language\ - \ models.\n" + explanation: The analysis performed on the dataset is not released. + # Construction + dependencies: [] license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: text (English) - value: text - monitoring: - explanation: "There is no information on how Cohere is internally monitoring\ - \ the use of the dataset.\n" - value: unknown - name: coheretext - organization: Cohere + explanation: > + The asset isn't released, and hence the license is unknown. + included: > + As stated in the datasheet, the dataset "includes the Google Books dataset, + CommonCrawl, and text from the internet scraped by the Cohere infrastructure + team." The top ten domains scraped were: wordpress.com, medium.com, + stackexchange.com, tumblr.com, elsevier.com, genius.com, bbc.co.uk, + libsyn.com, yahoo.com, nytimes.com + [[Datasheet]](https://docs.cohere.ai/data-statement). + excluded: > + Documents that are not in English are excluded. + quality_control: > + In the datasheet, it is implied that Cohere employs filtration methods + for removing racist, biased and toxic content, but the details are not + provided. These filtration methods take both the context and the language, + as opposed to using a list of blockwords + [[Datasheet]](https://docs.cohere.ai/data-statement). + # Downstream + access: + value: closed + explanation: The dataset isn't provided to the public. + intended_uses: > + The intended use of the dataset is to train Cohere's language models. prohibited_uses: - explanation: "There are no known prohibited uses of the dataset, but the Cohere\ - \ API is bound by the Cohere usage guidelines, which may also apply to this\ - \ dataset [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n" value: unknown - quality_control: "In the datasheet, it is implied that Cohere employs filtration\ - \ methods for removing racist, biased and toxic content, but the details are\ - \ not provided. These filtration methods take both the context and the language,\ - \ as opposed to using a list of blockwords [[Datasheet]](https://docs.cohere.ai/data-statement).\n" - sample: [] - size: - explanation: "Size of the filtered dataset is 200 GB. The unfiltered dataset\ - \ is ~3 TB.\n" - value: 200 GB - type: dataset - url: none -- access: - explanation: "The model is available to the public through the Cohere Platform\ - \ [[Cohere Platform]](https://os.cohere.ai/login).\n" - value: limited - analysis: The model's performance was analyzed on Hellaswag and COPA, as well - as several safety benchmarks [[Model Card]](https://docs.cohere.ai/generation-card). - created_date: - explanation: "The date the Cohere API was announced on the news [[News Article]]\ - \ (https://venturebeat.com/2021/11/15/openai-rival-cohere-launches-language-model-api/).\n" - value: 2021-11-15 - dependencies: [coheretext] - description: "The Generations model is a language model trained by Cohere for\ - \ generation tasks.\n" - feedback: - explanation: "There are no specific feedback mechanisms for the Generation model,\ - \ but a generic contact email is provided on Cohere website, which is support\ - \ at cohere.ai.\n" + explanation: > + There are no known prohibited uses of the dataset, but the Cohere API is + bound by the Cohere usage guidelines, which may also apply to this dataset + [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). + monitoring: value: unknown - intended_uses: "On the model card, the intended uses are stated as \"interactive\ - \ autocomplete, augmenting human writing processes, summarization, text rephrasing,\ - \ and other text-to-text tasks in non-sensitive domains\" [[Model Card]](https://docs.cohere.ai/generation-card).\n" - license: - explanation: "The asset isn't released, and hence the license is unknown.\n" + explanation: > + There is no information on how Cohere is internally monitoring the use of + the dataset. + feedback: value: unknown - modality: - explanation: text (English) - value: text; text - model_card: https://docs.cohere.ai/generation-card - monitoring: "The usage of the model is monitored by Cohere [[Model Card]](https://docs.cohere.ai/generation-card).\n" + explanation: The internal feedback mechanisms for Cohere are unknown. + +- type: model name: Cohere Base + # General organization: Cohere - prohibited_uses: "The usage of the model is bound by the Cohere usage guidelines\ - \ [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). A non-comprehensive\ - \ list of specific application violating these guidelines are: astroturfing,\ - \ generation of misinformation and other harmful content, and \"generation of\ - \ text about people, places, or events without a human-in-the-loop\" [[Model\ - \ Card]](https://docs.cohere.ai/generation-card).\n" - quality_control: - explanation: "The quality control measures taken are unknown, but it is implied\ - \ that Cohere performed mitigation strategies for toxic degeneration [[Model\ - \ Card]](https://docs.cohere.ai/generation-card).\n" + description: > + The Generations model is a language model trained by Cohere for + generation tasks. + created_date: + value: 2021-11-15 + explanation: > + The date the Cohere API was announced on the news + [[News Article]] + (https://venturebeat.com/2021/11/15/openai-rival-cohere-launches-language-model-api/). + url: none + model_card: https://docs.cohere.ai/generation-card + modality: text (English) + size: value: unknown - size: unknown + explanation: > + The exact sizes of the generation models are unknown, but we know + that they come in three sizes: small, medium, and large + [[Model Card]](https://docs.cohere.ai/generation-card). + analysis: The model's performance was analyzed on Hellaswag and COPA, as well + as several safety benchmarks [[Model Card]](https://docs.cohere.ai/generation-card). + # Construction + dependencies: [coheretext] training_emissions: + value: unknown explanation: The emissions of the models are unknown. + training_time: value: unknown + explanation: The training time for the models are unknown. training_hardware: - explanation: "The training hardware wasn't explicitly announced, but it was\ - \ reported that Google Cloud teamed up with Cohere on a TPU partnership [[TechCrunch\ - \ Article]] (https://techcrunch.com/2021/11/17/google-cloud-teams-up-with-nlp-startup-cohere-on-multi-year-partnership/).\n" value: unknown - training_time: - explanation: The training time for the models are unknown. + explanation: > + The training hardware wasn't explicitly announced, but it was + reported that Google Cloud teamed up with Cohere on a TPU partnership + [[TechCrunch Article]] + (https://techcrunch.com/2021/11/17/google-cloud-teams-up-with-nlp-startup-cohere-on-multi-year-partnership/). + quality_control: value: unknown - type: model - url: none -- access: - explanation: "The model is available to the public through the Cohere Platform\ - \ [[Cohere Platform]](https://os.cohere.ai/login).\n" + explanation: > + The quality control measures taken are unknown, but it is implied + that Cohere performed mitigation strategies for toxic degeneration + [[Model Card]](https://docs.cohere.ai/generation-card). + # Downstream + access: value: limited - analysis: The model's performance was analyzed on Hellaswag and COPA, as well - as several safety benchmarks [[Model Card]](https://docs.cohere.ai/generation-card). - created_date: - explanation: "Cohere Command was in beta before this date\n" - value: 2023-01-01 - dependencies: [Cohere Base] - description: "This model is a generative model optimized to follow commands in\ - \ the prompt.\n" - feedback: - explanation: "There are no specific feedback mechanisms for the Generation model,\ - \ but a generic contact email is provided on Cohere website, which is support\ - \ at cohere.ai.\n" - value: unknown - intended_uses: "On the model card, the intended uses are stated as \"interactive\ - \ autocomplete, augmenting human writing processes, summarization, text rephrasing,\ - \ and other text-to-text tasks in non-sensitive domains\" [[Model Card]](https://docs.cohere.ai/generation-card).\n" + explanation: > + The model is available to the public through the Cohere Platform + [[Cohere Platform]](https://os.cohere.ai/login). license: - explanation: The model likely has a license specifically for Cohere's use. value: unknown - modality: - explanation: text (English) - value: text; text - model_card: https://docs.cohere.ai/generation-card - monitoring: "The usage of the model is monitored by Cohere [[Model Card]](https://docs.cohere.ai/generation-card).\n" + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: > + On the model card, the intended uses are stated as "interactive + autocomplete, augmenting human writing processes, summarization, + text rephrasing, and other text-to-text tasks in non-sensitive domains" + [[Model Card]](https://docs.cohere.ai/generation-card). + prohibited_uses: > + The usage of the model is bound by the Cohere usage guidelines + [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). + A non-comprehensive list of specific application violating these guidelines + are: astroturfing, generation of misinformation and other harmful content, + and "generation of text about people, places, or events without a + human-in-the-loop" + [[Model Card]](https://docs.cohere.ai/generation-card). + monitoring: > + The usage of the model is monitored by Cohere + [[Model Card]](https://docs.cohere.ai/generation-card). + feedback: + value: unknown + explanation: > + There are no specific feedback mechanisms for the Generation model, + but a generic contact email is provided on Cohere website, which is + support at cohere.ai. + +- type: model name: Cohere Command + # General organization: Cohere - prohibited_uses: "The usage of the model is bound by the Cohere usage guidelines\ - \ [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). A non-comprehensive\ - \ list of specific application violating these guidelines are: astroturfing,\ - \ generation of misinformation and other harmful content, and \"generation of\ - \ text about people, places, or events without a human-in-the-loop\" [[Model\ - \ Card]](https://docs.cohere.ai/generation-card).\n" - quality_control: - explanation: unknown + description: > + This model is a generative model optimized to follow commands in the prompt. + created_date: + value: 2023-01-01 + explanation: > + Cohere Command was in beta before this date + url: https://docs.cohere.com/docs/command-beta + model_card: https://docs.cohere.ai/generation-card + modality: text (English) + size: value: unknown - size: unknown + explanation: > + The exact sizes of the generation models are unknown, but we know + that they come in three sizes: small, medium, and large + [[Model Card]](https://docs.cohere.ai/generation-card). + analysis: The model's performance was analyzed on Hellaswag and COPA, as well + as several safety benchmarks [[Model Card]](https://docs.cohere.ai/generation-card). + # Construction + dependencies: [Cohere Base] training_emissions: + value: unknown explanation: unknown + training_time: value: unknown - training_hardware: explanation: unknown + training_hardware: value: unknown - training_time: explanation: unknown + quality_control: value: unknown - type: model - url: https://docs.cohere.com/docs/command-beta -- access: - explanation: "The model is available to the public through the Cohere Platform\ - \ [[Cohere Platform]](https://cohere.ai/).\n" + explanation: unknown + # Downstream + access: value: limited - analysis: "The model's performance was analyzed on several safety benchmarks [[Model\ - \ Card]](https://docs.cohere.ai/representation-card).\n" - created_date: - explanation: "The date the Cohere API was announced on the news [[News Article]]\ - \ (https://venturebeat.com/2021/11/15/openai-rival-cohere-launches-language-model-api/).\n" - value: 2021-11-15 - dependencies: [] - description: "The Embedding Large (English) model is a language model trained\ - \ by Cohere for tasks requiring embeddings.\n" - feedback: - explanation: "There are no specific feedback mechanisms for the Generation model,\ - \ but a generic contact email is provided on Cohere website, which is support\ - \ at cohere.ai.\n" - value: unknown - intended_uses: "The intended uses are stated as \"estimating semantic similarity\ - \ between two sentences, choosing a sentence which is most likely to follow\ - \ another sentence, sentiment analysis, topic extraction, or categorizing user\ - \ feedback\" on the Cohere model card [[Model Card]](https://docs.cohere.ai/representation-card).\n" + explanation: > + The model is available to the public through the Cohere Platform + [[Cohere Platform]](https://os.cohere.ai/login). license: + value: unknown explanation: The model likely has a license specifically for Cohere's use. + intended_uses: > + On the model card, the intended uses are stated as "interactive + autocomplete, augmenting human writing processes, summarization, + text rephrasing, and other text-to-text tasks in non-sensitive domains" + [[Model Card]](https://docs.cohere.ai/generation-card). + prohibited_uses: > + The usage of the model is bound by the Cohere usage guidelines + [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). + A non-comprehensive list of specific application violating these guidelines + are: astroturfing, generation of misinformation and other harmful content, + and "generation of text about people, places, or events without a + human-in-the-loop" + [[Model Card]](https://docs.cohere.ai/generation-card). + monitoring: > + The usage of the model is monitored by Cohere + [[Model Card]](https://docs.cohere.ai/generation-card). + feedback: value: unknown - modality: - explanation: Text (English) - value: text; text - model_card: https://docs.cohere.ai/representation-card - monitoring: "The usage of the model is monitored by Cohere [[Model Card]](https://docs.cohere.ai/representation-card).\n" + explanation: > + There are no specific feedback mechanisms for the Generation model, + but a generic contact email is provided on Cohere website, which is + support at cohere.ai. + + +- type: model name: Cohere Embed (English) + # General organization: Cohere - prohibited_uses: "The usage of the model is bound by the Cohere usage guidelines\ - \ [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). A non-comprehensive\ - \ list of specific application violating these guidelines are: extraction of\ - \ identity and demographic information, building purposefully opaque text classification\ - \ systems, and \"building downstream classifiers that serve as automated decision-making\ - \ systems that have real-world consequences on people, where those decisions\ - \ are made without a human-in-the-loop\" [[Model Card]](https://docs.cohere.ai/representation-card).\n" - quality_control: unknown - size: unknown - training_emissions: - explanation: The emissions of the models are unknown. + description: > + The Embedding Large (English) model is a language model trained by Cohere for + tasks requiring embeddings. + created_date: + value: 2021-11-15 + explanation: > + The date the Cohere API was announced on the news + [[News Article]] + (https://venturebeat.com/2021/11/15/openai-rival-cohere-launches-language-model-api/). + url: none + model_card: https://docs.cohere.ai/representation-card + modality: Text (English) + size: value: unknown - training_hardware: - explanation: "The training hardware wasn't explicitly announced, but it was\ - \ reported that Google Cloud teamed up with Cohere on a TPU partnership [[TechCrunch\ - \ Article]] (https://techcrunch.com/2021/11/17/google-cloud-teams-up-with-nlp-startup-cohere-on-multi-year-partnership/).\n" + explanation: > + The exact sizes of the embedding models are unknown, but we know + that they come in three sizes: small, medium, and large + [[Model Card]](https://docs.cohere.ai/representation-card). + analysis: > + The model's performance was analyzed on several safety benchmarks + [[Model Card]](https://docs.cohere.ai/representation-card). + # Construction + dependencies: [] + training_emissions: value: unknown + explanation: The emissions of the models are unknown. training_time: + value: unknown explanation: The training time for the models are unknown. + training_hardware: value: unknown - type: model - url: none -- access: - explanation: "The model is available to the public through the Cohere Platform\ - \ [[Cohere Platform]](https://cohere.ai/).\n" + explanation: > + The training hardware wasn't explicitly announced, but it was + reported that Google Cloud teamed up with Cohere on a TPU partnership + [[TechCrunch Article]] + (https://techcrunch.com/2021/11/17/google-cloud-teams-up-with-nlp-startup-cohere-on-multi-year-partnership/). + quality_control: unknown + # Downstream + access: value: limited - analysis: "The model's performance was analyzed on several safety benchmarks [[Model\ - \ Card]](https://docs.cohere.ai/representation-card).\n" - created_date: - explanation: "The launch of the multilingual embedding model was announced in\ - \ this [[blog post]] (https://txt.cohere.com/multilingual/).\n" - value: 2022-12-12 - dependencies: [] - description: "This model maps text from 100+ languages to a semantic vector space,\ - \ positioning text with a similar meaning (regardless of language) in close\ - \ proximity.\n" - feedback: - explanation: "There are no specific feedback mechanisms for the Generation model,\ - \ but a generic contact email is provided on Cohere website, which is support\ - \ at cohere.ai.\n" - value: unknown - intended_uses: "The intended uses are stated as \"estimating semantic similarity\ - \ between two sentences, choosing a sentence which is most likely to follow\ - \ another sentence, sentiment analysis, topic extraction, or categorizing user\ - \ feedback\" on the Cohere model card [[Model Card]](https://docs.cohere.ai/representation-card).\n" + explanation: > + The model is available to the public through the Cohere Platform + [[Cohere Platform]](https://cohere.ai/). license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: text (Multilingual) - value: text; text - model_card: https://docs.cohere.ai/representation-card - monitoring: "The usage of the model is monitored by Cohere [[Model Card]](https://docs.cohere.ai/representation-card).\n" + explanation: The model likely has a license specifically for Cohere's use. + intended_uses: > + The intended uses are stated as "estimating semantic similarity between two + sentences, choosing a sentence which is most likely to follow another + sentence, sentiment analysis, topic extraction, or categorizing user + feedback" on the Cohere model card + [[Model Card]](https://docs.cohere.ai/representation-card). + prohibited_uses: > + The usage of the model is bound by the Cohere usage guidelines + [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). + A non-comprehensive list of specific application violating these guidelines + are: extraction of identity and demographic information, building + purposefully opaque text classification systems, and "building downstream + classifiers that serve as automated decision-making systems that have + real-world consequences on people, where those decisions are made without + a + human-in-the-loop" + [[Model Card]](https://docs.cohere.ai/representation-card). + monitoring: > + The usage of the model is monitored by Cohere + [[Model Card]](https://docs.cohere.ai/representation-card). + feedback: + value: unknown + explanation: > + There are no specific feedback mechanisms for the Generation model, + but a generic contact email is provided on Cohere website, which is + support at cohere.ai. + +- type: model name: Cohere Embed (Multilingual) + # General organization: Cohere - prohibited_uses: "The usage of the model is bound by the Cohere usage guidelines\ - \ [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). A non-comprehensive\ - \ list of specific application violating these guidelines are: extraction of\ - \ identity and demographic information, building purposefully opaque text classification\ - \ systems, and \"building downstream classifiers that serve as automated decision-making\ - \ systems that have real-world consequences on people, where those decisions\ - \ are made without a human-in-the-loop\" [[Model Card]](https://docs.cohere.ai/representation-card).\n" - quality_control: unknown - size: unknown - training_emissions: - explanation: The emissions of the models are unknown. + description: > + This model maps text from 100+ languages to a semantic + vector space, positioning text with a similar meaning + (regardless of language) in close proximity. + modality: text (Multilingual) + created_date: + value: 2022-12-12 + explanation: > + The launch of the multilingual embedding model was announced in this + [[blog post]] + (https://txt.cohere.com/multilingual/). + url: https://txt.cohere.ai/multilingual/ + model_card: https://docs.cohere.ai/representation-card + size: value: unknown - training_hardware: - explanation: "The training hardware wasn't explicitly announced, but it was\ - \ reported that Google Cloud teamed up with Cohere on a TPU partnership [[TechCrunch\ - \ Article]] (https://techcrunch.com/2021/11/17/google-cloud-teams-up-with-nlp-startup-cohere-on-multi-year-partnership/).\n" + explanation: > + The exact sizes of the embedding models are unknown, but we know + that they come in three sizes: small, medium, and large + [[Model Card]](https://docs.cohere.ai/representation-card). + analysis: > + The model's performance was analyzed on several safety benchmarks + [[Model Card]](https://docs.cohere.ai/representation-card). + # Construction + dependencies: [] + training_emissions: value: unknown + explanation: The emissions of the models are unknown. training_time: + value: unknown explanation: The training time for the models are unknown. + training_hardware: value: unknown - type: model - url: https://txt.cohere.ai/multilingual/ -- access: - explanation: "Users can access the Cohere API by signing up on the Cohere website\ - \ [[Cohere Website]](https://cohere.ai/).\n" + explanation: > + The training hardware wasn't explicitly announced, but it was + reported that Google Cloud teamed up with Cohere on a TPU partnership + [[TechCrunch Article]] + (https://techcrunch.com/2021/11/17/google-cloud-teams-up-with-nlp-startup-cohere-on-multi-year-partnership/). + quality_control: unknown + # Downstream + access: value: limited - adaptation: unknown + explanation: > + The model is available to the public through the Cohere Platform + [[Cohere Platform]](https://cohere.ai/). + license: + value: unknown + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: > + The intended uses are stated as "estimating semantic similarity between two + sentences, choosing a sentence which is most likely to follow another + sentence, sentiment analysis, topic extraction, or categorizing user + feedback" on the Cohere model card + [[Model Card]](https://docs.cohere.ai/representation-card). + prohibited_uses: > + The usage of the model is bound by the Cohere usage guidelines + [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). + A non-comprehensive list of specific application violating these guidelines + are: extraction of identity and demographic information, building + purposefully opaque text classification systems, and "building downstream + classifiers that serve as automated decision-making systems that have + real-world consequences on people, where those decisions are made without + a + human-in-the-loop" + [[Model Card]](https://docs.cohere.ai/representation-card). + monitoring: > + The usage of the model is monitored by Cohere + [[Model Card]](https://docs.cohere.ai/representation-card). + feedback: + value: unknown + explanation: > + There are no specific feedback mechanisms for the Generation model, + but a generic contact email is provided on Cohere website, which is + support at cohere.ai. + +- type: application + name: Cohere API + # General + organization: Cohere + description: > + Cohere API allows users to access the cohere language models and utilize + them in their applications. created_date: - explanation: "The date the Cohere API was announced on the news [[News Article]]\ - \ (https://venturebeat.com/2021/11/15/openai-rival-cohere-launches-language-model-api/).\n" value: 2021-11-15 + explanation: > + The date the Cohere API was announced on the news + [[News Article]] + (https://venturebeat.com/2021/11/15/openai-rival-cohere-launches-language-model-api/). + url: https://cohere.ai/ + # Construction dependencies: - Cohere Generate Endpoint - Cohere Embed Endpoint - Cohere Classify Endpoint - Cohere Summarize Endpoint - description: "Cohere API allows users to access the cohere language models and\ - \ utilize them in their applications.\n" - failures: unknown - feedback: "General feedback as well as the violations of the usage guidelines\ - \ can be reported to Cohere at responsibility at cohere.ai [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n" - intended_uses: "Intended to be used by developers who would like to incorporate\ - \ NLP into their applications [[Cohere Website]](https://cohere.ai/).\n" + adaptation: unknown + output_space: generation and embeddings + quality_control: > + The new users of the API get a limited access restricting the sizes of the + models as well as the number of tokens that can be used. Users are required + to go through an internal application to upgrade to full access + [[Limited Access]](https://docs.cohere.ai/limited-access). + # Downstream + access: + value: limited + explanation: > + Users can access the Cohere API by signing up on the Cohere website + [[Cohere Website]](https://cohere.ai/). license: - explanation: "\"Subject to these Terms of Use, we grant you anon-exclusive,\ - \ non-transferable, non-sublicensable and revocable licence to use the Cohere\ - \ Solution in accordance with these Terms of Use\" - excerpt from the Terms\ - \ of Service document.\n" value: custom - monitoring: "All applications developed using the Cohere API is subject to review\ - \ by Cohere.\n" - monthly_active_users: unknown - name: Cohere API - organization: Cohere - output_space: generation and embeddings - prohibited_uses: "The usage of the API is bound by the Cohere usage guidelines.\ - \ Disallowed use cases include violence and threats, antisocial and antidemocratic\ - \ uses, deceit, attacks on security or privacy, unsafe unsupervised uses, decision-making,\ - \ high-Risk generations among others [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n" - quality_control: "The new users of the API get a limited access restricting the\ - \ sizes of the models as well as the number of tokens that can be used. Users\ - \ are required to go through an internal application to upgrade to full access\ - \ [[Limited Access]](https://docs.cohere.ai/limited-access).\n" + explanation: > + "Subject to these Terms of Use, we grant you anon-exclusive, + non-transferable, non-sublicensable and revocable licence to use the + Cohere Solution in accordance with these Terms of Use" - excerpt from the + Terms of Service document. terms_of_service: https://cohere.ai/terms-of-use - type: application - url: https://cohere.ai/ + intended_uses: > + Intended to be used by developers who would like to incorporate NLP into + their applications + [[Cohere Website]](https://cohere.ai/). + prohibited_uses: > + The usage of the API is bound by the Cohere usage guidelines. + Disallowed use cases include violence and threats, antisocial and + antidemocratic uses, deceit, attacks on security or privacy, unsafe + unsupervised uses, decision-making, high-Risk generations among others + [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). + monitoring: > + All applications developed using the Cohere API is subject to review by + Cohere. + feedback: > + General feedback as well as the violations of the usage guidelines can + be reported to Cohere at responsibility at cohere.ai + [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). + # Deployment + monthly_active_users: unknown user_distribution: unknown -- access: - explanation: "Users can access the Cohere API by signing up on the Cohere website\ - \ [[Cohere Website]](https://cohere.ai/).\n" - value: limited - adaptation: unknown - created_date: - explanation: "The launch of Generate was announced in this [[news article]]\ - \ (https://venturebeat.com/uncategorized/openai-rival-cohere-launches-language-model-api/).\n" - value: 2021-11-15 - dependencies: [Cohere Base, Cohere Command] - description: "This endpoint generates realistic text conditioned on a given input.\n" failures: unknown - feedback: "General feedback as well as the violations of the usage guidelines\ - \ can be reported to Cohere at responsibility at cohere.ai [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n" - intended_uses: "Intended to be used by developers who would like to incorporate\ - \ NLP into their applications [[Cohere Website]](https://cohere.ai/).\n" - license: "Limited use license to Cohere platform users [[Terms of Use]](https://cohere.ai/terms-of-use).\n" - monitoring: "All applications developed using the Cohere API is subject to review\ - \ by Cohere.\n" - monthly_active_users: unknown + +- type: application name: Cohere Generate Endpoint + # General organization: Cohere + description: > + This endpoint generates realistic text conditioned on a given input. + created_date: + value: 2021-11-15 + explanation: > + The launch of Generate was announced in this + [[news article]] + (https://venturebeat.com/uncategorized/openai-rival-cohere-launches-language-model-api/). + url: https://docs.cohere.ai/reference/generate + # Construction + dependencies: [Cohere Base, Cohere Command] + adaptation: unknown output_space: generation - prohibited_uses: "The usage of the API is bound by the Cohere usage guidelines.\ - \ Disallowed use cases include violence and threats, antisocial and antidemocratic\ - \ uses, deceit, attacks on security or privacy, unsafe unsupervised uses, decision-making,\ - \ high-Risk generations among others [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n" - quality_control: "The new users of the API get a limited access restricting the\ - \ sizes of the models as well as the number of tokens that can be used. Users\ - \ are required to go through an internal application to upgrade to full access\ - \ [[Limited Access]](https://docs.cohere.ai/limited-access).\n" + quality_control: > + The new users of the API get a limited access restricting the sizes of the + models as well as the number of tokens that can be used. Users are required + to go through an internal application to upgrade to full access + [[Limited Access]](https://docs.cohere.ai/limited-access). + # Downstream + access: + value: limited + explanation: > + Users can access the Cohere API by signing up on the Cohere website + [[Cohere Website]](https://cohere.ai/). + license: > + Limited use license to Cohere platform users + [[Terms of Use]](https://cohere.ai/terms-of-use). terms_of_service: https://cohere.ai/terms-of-use - type: application - url: https://docs.cohere.ai/reference/generate + intended_uses: > + Intended to be used by developers who would like to incorporate NLP into + their applications + [[Cohere Website]](https://cohere.ai/). + prohibited_uses: > + The usage of the API is bound by the Cohere usage guidelines. + Disallowed use cases include violence and threats, antisocial and + antidemocratic uses, deceit, attacks on security or privacy, unsafe + unsupervised uses, decision-making, high-Risk generations among others + [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). + monitoring: > + All applications developed using the Cohere API is subject to review by + Cohere. + feedback: > + General feedback as well as the violations of the usage guidelines can + be reported to Cohere at responsibility at cohere.ai + [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). + # Deployment + monthly_active_users: unknown user_distribution: unknown -- access: - explanation: "Users can access the Cohere API by signing up on the Cohere website\ - \ [[Cohere Website]](https://cohere.ai/).\n" - value: limited - adaptation: unknown - created_date: - explanation: "The launch of Embed was announced in this [[news article]] (https://venturebeat.com/uncategorized/openai-rival-cohere-launches-language-model-api/).\n" - value: 2021-11-15 - dependencies: [Cohere Embed (Multilingual), Cohere Embed (English)] - description: "This endpoint returns text embeddings. An embedding is a list of\ - \ floating point numbers that captures semantic information about the text that\ - \ it represents.\n" failures: unknown - feedback: "General feedback as well as the violations of the usage guidelines\ - \ can be reported to Cohere at responsibility at cohere.ai [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n" - intended_uses: "Intended to be used by developers who would like to incorporate\ - \ NLP into their applications [[Cohere Website]](https://cohere.ai/).\n" - license: "Limited use license to Cohere platform users [[Terms of Use]](https://cohere.ai/terms-of-use).\n" - monitoring: "All applications developed using the Cohere API is subject to review\ - \ by Cohere.\n" - monthly_active_users: unknown + +- type: application name: Cohere Embed Endpoint + # General organization: Cohere + description: > + This endpoint returns text embeddings. An + embedding is a list of floating point numbers that + captures semantic information about the text that it represents. + created_date: + value: 2021-11-15 + explanation: > + The launch of Embed was announced in this + [[news article]] + (https://venturebeat.com/uncategorized/openai-rival-cohere-launches-language-model-api/). + url: https://docs.cohere.ai/reference/embed + # Construction + dependencies: [Cohere Embed (Multilingual), Cohere Embed (English)] + adaptation: unknown output_space: embedding - prohibited_uses: "The usage of the API is bound by the Cohere usage guidelines.\ - \ Disallowed use cases include violence and threats, antisocial and antidemocratic\ - \ uses, deceit, attacks on security or privacy, unsafe unsupervised uses, decision-making,\ - \ high-Risk generations among others [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n" - quality_control: "The new users of the API get a limited access restricting the\ - \ sizes of the models as well as the number of tokens that can be used. Users\ - \ are required to go through an internal application to upgrade to full access\ - \ [[Limited Access]](https://docs.cohere.ai/limited-access).\n" + quality_control: > + The new users of the API get a limited access restricting the sizes of the + models as well as the number of tokens that can be used. Users are required + to go through an internal application to upgrade to full access + [[Limited Access]](https://docs.cohere.ai/limited-access). + # Downstream + access: + value: limited + explanation: > + Users can access the Cohere API by signing up on the Cohere website + [[Cohere Website]](https://cohere.ai/). + license: > + Limited use license to Cohere platform users + [[Terms of Use]](https://cohere.ai/terms-of-use). terms_of_service: https://cohere.ai/terms-of-use - type: application - url: https://docs.cohere.ai/reference/embed + intended_uses: > + Intended to be used by developers who would like to incorporate NLP into + their applications + [[Cohere Website]](https://cohere.ai/). + prohibited_uses: > + The usage of the API is bound by the Cohere usage guidelines. + Disallowed use cases include violence and threats, antisocial and + antidemocratic uses, deceit, attacks on security or privacy, unsafe + unsupervised uses, decision-making, high-Risk generations among others + [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). + monitoring: > + All applications developed using the Cohere API is subject to review by + Cohere. + feedback: > + General feedback as well as the violations of the usage guidelines can + be reported to Cohere at responsibility at cohere.ai + [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). + # Deployment + monthly_active_users: unknown user_distribution: unknown -- access: - explanation: "Users can access the Cohere API by signing up on the Cohere website\ - \ [[Cohere Website]](https://cohere.ai/).\n" - value: limited - adaptation: unknown - created_date: - explanation: "The launch of Classify was announced in this [[blog post]] (https://txt.cohere.com/classification-is-now-available/).\n" - value: 2022-05-05 - dependencies: [Cohere Embed (Multilingual), Cohere Embed (English)] - description: "This endpoint makes a prediction about which label best fits a specified\ - \ text input. To make a prediction, Classify uses the provided examples of text\ - \ + label pairs as a reference.\n" failures: unknown - feedback: "General feedback as well as the violations of the usage guidelines\ - \ can be reported to Cohere at responsibility at cohere.ai [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n" - intended_uses: "Intended to be used by developers who would like to incorporate\ - \ NLP into their applications [[Cohere Website]](https://cohere.ai/).\n" - license: "Limited use license to Cohere platform users [[Terms of Use]](https://cohere.ai/terms-of-use).\n" - monitoring: "All applications developed using the Cohere API is subject to review\ - \ by Cohere.\n" - monthly_active_users: unknown + +- type: application name: Cohere Classify Endpoint + # General organization: Cohere + description: > + This endpoint makes a prediction about which label best + fits a specified text input. To make a prediction, + Classify uses the provided examples of text + label + pairs as a reference. + created_date: + value: 2022-05-05 + explanation: > + The launch of Classify was announced in this + [[blog post]] + (https://txt.cohere.com/classification-is-now-available/). + url: https://docs.cohere.ai/reference/classify + # Construction + dependencies: [Cohere Embed (Multilingual), Cohere Embed (English)] + adaptation: unknown output_space: embedding - prohibited_uses: "The usage of the API is bound by the Cohere usage guidelines.\ - \ Disallowed use cases include violence and threats, antisocial and antidemocratic\ - \ uses, deceit, attacks on security or privacy, unsafe unsupervised uses, decision-making,\ - \ high-Risk generations among others [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n" - quality_control: "The new users of the API get a limited access restricting the\ - \ sizes of the models as well as the number of tokens that can be used. Users\ - \ are required to go through an internal application to upgrade to full access\ - \ [[Limited Access]](https://docs.cohere.ai/limited-access).\n" + quality_control: > + The new users of the API get a limited access restricting the sizes of the + models as well as the number of tokens that can be used. Users are required + to go through an internal application to upgrade to full access + [[Limited Access]](https://docs.cohere.ai/limited-access). + # Downstream + access: + value: limited + explanation: > + Users can access the Cohere API by signing up on the Cohere website + [[Cohere Website]](https://cohere.ai/). + license: > + Limited use license to Cohere platform users + [[Terms of Use]](https://cohere.ai/terms-of-use). terms_of_service: https://cohere.ai/terms-of-use - type: application - url: https://docs.cohere.ai/reference/classify + intended_uses: > + Intended to be used by developers who would like to incorporate NLP into + their applications + [[Cohere Website]](https://cohere.ai/). + prohibited_uses: > + The usage of the API is bound by the Cohere usage guidelines. + Disallowed use cases include violence and threats, antisocial and + antidemocratic uses, deceit, attacks on security or privacy, unsafe + unsupervised uses, decision-making, high-Risk generations among others + [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). + monitoring: > + All applications developed using the Cohere API is subject to review by + Cohere. + feedback: > + General feedback as well as the violations of the usage guidelines can + be reported to Cohere at responsibility at cohere.ai + [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). + # Deployment + monthly_active_users: unknown user_distribution: unknown -- access: - explanation: "Users can access the Cohere API by signing up on the Cohere website\ - \ [[Cohere Website]](https://cohere.ai/).\n" - value: limited - adaptation: unknown - created_date: - explanation: "The beta launch of summarize was announced in this [[blog post]]\ - \ (https://txt.cohere.com/summarize-beta/).\n" - value: 2023-02-22 - dependencies: [] - description: "This endpoint generates a succinct version of the original text\ - \ that relays the most important information.\n" failures: unknown - feedback: "General feedback as well as the violations of the usage guidelines\ - \ can be reported to Cohere at responsibility at cohere.ai [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n" - intended_uses: "Intended to be used by developers who would like to incorporate\ - \ NLP into their applications [[Cohere Website]](https://cohere.ai/).\n" - license: "Limited use license to Cohere platform users [[Terms of Use]](https://cohere.ai/terms-of-use).\n" - monitoring: "All applications developed using the Cohere API is subject to review\ - \ by Cohere.\n" - monthly_active_users: unknown + +- type: application name: Cohere Summarize Endpoint + # General organization: Cohere + description: > + This endpoint generates a succinct + version of the original text that relays + the most important information. + created_date: + value: 2023-02-22 + explanation: > + The beta launch of summarize was announced in this + [[blog post]] + (https://txt.cohere.com/summarize-beta/). + url: https://docs.cohere.ai/reference/summarize + # Construction + dependencies: [] + adaptation: unknown output_space: generation - prohibited_uses: "The usage of the API is bound by the Cohere usage guidelines.\ - \ Disallowed use cases include violence and threats, antisocial and antidemocratic\ - \ uses, deceit, attacks on security or privacy, unsafe unsupervised uses, decision-making,\ - \ high-Risk generations among others [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n" - quality_control: "The new users of the API get a limited access restricting the\ - \ sizes of the models as well as the number of tokens that can be used. Users\ - \ are required to go through an internal application to upgrade to full access\ - \ [[Limited Access]](https://docs.cohere.ai/limited-access).\n" + quality_control: > + The new users of the API get a limited access restricting the sizes of the + models as well as the number of tokens that can be used. Users are required + to go through an internal application to upgrade to full access + [[Limited Access]](https://docs.cohere.ai/limited-access). + # Downstream + access: + value: limited + explanation: > + Users can access the Cohere API by signing up on the Cohere website + [[Cohere Website]](https://cohere.ai/). + license: > + Limited use license to Cohere platform users + [[Terms of Use]](https://cohere.ai/terms-of-use). terms_of_service: https://cohere.ai/terms-of-use - type: application - url: https://docs.cohere.ai/reference/summarize + intended_uses: > + Intended to be used by developers who would like to incorporate NLP into + their applications + [[Cohere Website]](https://cohere.ai/). + prohibited_uses: > + The usage of the API is bound by the Cohere usage guidelines. + Disallowed use cases include violence and threats, antisocial and + antidemocratic uses, deceit, attacks on security or privacy, unsafe + unsupervised uses, decision-making, high-Risk generations among others + [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). + monitoring: > + All applications developed using the Cohere API is subject to review by + Cohere. + feedback: > + General feedback as well as the violations of the usage guidelines can + be reported to Cohere at responsibility at cohere.ai + [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). + # Deployment + monthly_active_users: unknown user_distribution: unknown + failures: unknown diff --git a/assets/columbia.yaml b/assets/columbia.yaml index cef14a6c..2ca39802 100644 --- a/assets/columbia.yaml +++ b/assets/columbia.yaml @@ -1,24 +1,23 @@ --- -- access: open - analysis: Evaluated on wide range of tasks using own evaluation benchmarks. - created_date: 2022-11-20 - dependencies: [AlphaFold2, OpenProteinSet] - description: OpenFold is an open source recreation of AlphaFold2. - feedback: '' - intended_uses: '' - license: CC BY 4.0 - modality: - explanation: protein structures - value: image; image - model_card: '' - monitoring: '' + +- type: model name: OpenFold organization: Columbia - prohibited_uses: '' - quality_control: '' + description: OpenFold is an open source recreation of AlphaFold2. + created_date: 2022-11-20 + url: https://www.biorxiv.org/content/10.1101/2022.11.20.517210v2 + model_card: '' + modality: protein structures + analysis: Evaluated on wide range of tasks using own evaluation benchmarks. size: '' + dependencies: [AlphaFold2, OpenProteinSet] training_emissions: unknown - training_hardware: Single A100 NVIDIA GPU training_time: 50,000 GPU hours - type: model - url: https://www.biorxiv.org/content/10.1101/2022.11.20.517210v2 + training_hardware: Single A100 NVIDIA GPU + quality_control: '' + access: open + license: CC BY 4.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/databricks.yaml b/assets/databricks.yaml index 7439f500..40dc6f16 100644 --- a/assets/databricks.yaml +++ b/assets/databricks.yaml @@ -1,50 +1,67 @@ --- -- access: - explanation: "Model training code can be accessed at the official Dolly repository.\ - \ Trained weights can be requested at hello-dolly@databricks.com. [[Dolly\ - \ Repository]](https://github.com/databrickslabs/dolly).\n" - value: open - analysis: "\"We evaluated Dolly on the instruction-following capabilities described\ - \ in the InstructGPT paper that ChatGPT is based on and found that it exhibits\ - \ many of the same qualitative capabilities, including text generation, brainstorming\ - \ and open Q&A.\" [[Databricks Blog Post]] (https://www.databricks.com/blog/2023/03/24/hello-dolly-democratizing-magic-chatgpt-open-models.html).\n" + +- type: model + name: Dolly + # General + organization: Databricks + description: > + "Databricks’ Dolly, a large language model trained on the Databricks + Machine Learning Platform, demonstrates that a two-years-old open source + model (GPT-J) can, when subjected to just 30 minutes of fine tuning on a + focused corpus of 50k records (Stanford Alpaca), exhibit surprisingly + high quality instruction following behavior not characteristic of the + foundation model on which it is based." + [[Dolly Repository]](https://github.com/databrickslabs/dolly). created_date: - explanation: "The date the model was announced in the [[Cerebras blog post]](https://www.databricks.com/blog/2023/03/24/hello-dolly-democratizing-magic-chatgpt-open-models.html).\n" value: 2023-03-24 - dependencies: [GPT-J, Alpaca dataset] - description: "\"Databricks’ Dolly, a large language model trained on the Databricks\n\ - \ Machine Learning Platform, demonstrates that a two-years-old open source\n\ - \ model (GPT-J) can, when subjected to just 30 minutes of fine tuning on a\n\ - \ focused corpus of 50k records (Stanford Alpaca), exhibit surprisingly\n high\ - \ quality instruction following behavior not characteristic of the\n foundation\ - \ model on which it is based.\"\n [[Dolly Repository]](https://github.com/databrickslabs/dolly).\n" - feedback: https://github.com/databrickslabs/dolly/issues - intended_uses: "\"Dolly is intended exclusively for research purposes and is not\ - \ licensed for commercial use.\" [[Limitations]](https://github.com/databrickslabs/dolly#limitations).\n" - license: Apache 2.0 - modality: - explanation: text (English) - value: text; text + explanation: > + The date the model was announced in the [[Cerebras blog post]](https://www.databricks.com/blog/2023/03/24/hello-dolly-democratizing-magic-chatgpt-open-models.html). + url: https://www.databricks.com/blog/2023/03/24/hello-dolly-democratizing-magic-chatgpt-open-models.html model_card: '' - monitoring: none - name: Dolly - organization: Databricks - prohibited_uses: "Authors note the following limitations of the model: \"The Dolly\ - \ model family is under active development, and so any list of shortcomings\ - \ is unlikely to be exhaustive, but we include known limitations and misfires\ - \ here as a means to document and share our preliminary findings with the community.\ - \ In particular, dolly-6b struggles with syntactically complex prompts, mathematical\ - \ operations, factual errors, dates and times, open-ended question answering,\ - \ hallucination, enumerating lists of specific length, and stylistic mimicry.\"\ - \ [[Limitations]](https://github.com/databrickslabs/dolly#limitations).\n" - quality_control: none - size: 6B parameters (dense) + modality: text (English) + size: 6B parameters (dense model) + analysis: > + "We evaluated Dolly on the instruction-following capabilities described in the + InstructGPT + paper that ChatGPT is based on and found that it exhibits many of the same qualitative + capabilities, including text generation, brainstorming and open Q&A." + [[Databricks Blog Post]] + (https://www.databricks.com/blog/2023/03/24/hello-dolly-democratizing-magic-chatgpt-open-models.html). + # Construction + dependencies: [GPT-J, Alpaca dataset] training_emissions: unknown - training_hardware: - explanation: "According to [[Model Overview]](https://github.com/databrickslabs/dolly#model-overview)\n" - value: A single NDasrA100_v4 machine with 8x A100 40GB GPUs training_time: - explanation: "According to [[Model Overview]](https://github.com/databrickslabs/dolly#model-overview)\n" value: 30 minutes - type: model - url: https://www.databricks.com/blog/2023/03/24/hello-dolly-democratizing-magic-chatgpt-open-models.html + explanation: > + According to [[Model Overview]](https://github.com/databrickslabs/dolly#model-overview) + training_hardware: + value: A single NDasrA100_v4 machine with 8x A100 40GB GPUs + explanation: > + According to [[Model Overview]](https://github.com/databrickslabs/dolly#model-overview) + quality_control: none + # Downstream + access: + value: open + explanation: > + Model training code can be accessed at the official Dolly repository. + Trained weights can be requested at hello-dolly@databricks.com. + [[Dolly Repository]](https://github.com/databrickslabs/dolly). + license: Apache 2.0 + intended_uses: > + "Dolly is intended exclusively for research purposes and is not licensed for + commercial use." + [[Limitations]](https://github.com/databrickslabs/dolly#limitations). + prohibited_uses: > + Authors note the following limitations of the model: "The Dolly model family + is under active development, + and so any list of shortcomings is unlikely to be exhaustive, but we include + known limitations and misfires + here as a means to document and share our preliminary findings with the community. + In particular, dolly-6b + struggles with syntactically complex prompts, mathematical operations, factual + errors, dates and times, + open-ended question answering, hallucination, enumerating lists of specific + length, and stylistic mimicry." + [[Limitations]](https://github.com/databrickslabs/dolly#limitations). + monitoring: none + feedback: https://github.com/databrickslabs/dolly/issues diff --git a/assets/deepmind.yaml b/assets/deepmind.yaml index 81a0f83d..a59747e1 100644 --- a/assets/deepmind.yaml +++ b/assets/deepmind.yaml @@ -1,670 +1,840 @@ --- -- access: - explanation: "The dataset access is limited to DeepMind researchers [[Model\ - \ Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C).\n" - value: closed - analysis: "MassiveText data was analyzed for toxicity, language distribution,\ - \ URL breakdown, and tokenizer compression rates on the subsets [[Section A.2]](https://arxiv.org/pdf/2112.11446.pdf#subsection.A.2).\n" + +- type: dataset + name: MassiveText + # General + organization: DeepMind + description: > + The MassiveText dataset was used to train the Gopher model. created_date: - explanation: "The date that Gopher was announced [[DeepMind Blog Post]] (https://www.deepmind.com/blog/language-modelling-at-scale-gopher-ethical-considerations-and-retrieval).\n" value: 2021-12-08 + explanation: > + The date that Gopher was announced + [[DeepMind Blog Post]] + (https://www.deepmind.com/blog/language-modelling-at-scale-gopher-ethical-considerations-and-retrieval). + url: https://arxiv.org/pdf/2112.11446.pdf datasheet: https://arxiv.org/pdf/2112.11446.pdf#subsection.A.5 + modality: Text (English) and Code + size: 10.5 TB + sample: [] + analysis: > + MassiveText data was analyzed for toxicity, language distribution, URL + breakdown, and tokenizer compression rates on the subsets + [[Section A.2]](https://arxiv.org/pdf/2112.11446.pdf#subsection.A.2). + # Construction dependencies: [] - description: "The MassiveText dataset was used to train the Gopher model.\n" - excluded: "Documents that are not in English are excluded.\n" - feedback: - explanation: "The internal feedback mechanisms for WebText are unknown [[Model\ - \ Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C).\n" - value: unknown - included: "MassiveText data come from 6 sources: MassiveWeb (48%), Books (27%),\ - \ C4 (10%), News (10%), GitHub (3%), and Wikipedia (2%). MassiveWeb is a web\ - \ text corpus curated for MassiveText.\n" - intended_uses: "Pre-training of language models by DeepMind researchers [[Model\ - \ Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C).\n" license: - explanation: "The model likely has a license specifically for DeepMind's use,\ - \ based on the information provided in the datasheet [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#subsection.A.5).\n" value: unknown - modality: - explanation: Text (English) and Code - value: code, text + explanation: > + The model likely has a license specifically for DeepMind's use, + based on the information provided in the datasheet + [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#subsection.A.5). + included: > + MassiveText data come from 6 sources: MassiveWeb (48%), Books (27%), + C4 (10%), News (10%), GitHub (3%), and Wikipedia (2%). MassiveWeb is a + web text corpus curated for MassiveText. + excluded: > + Documents that are not in English are excluded. + quality_control: > + The authors use simple heuristics for filtering low quality documents as + opposed to relying on a classifier based on a "gold" set such as the English + Wikipedia, which could "inadvertently bias towards a certain demographic or + erase certain dialects or sociolects from representation." MassiveWeb + subset was filtered using Google’s SafeSearch filter, preferring it over + to word filters that "disproportinately filter out inoffensive content + associated with minority groups. MassiveWeb was filtered + further for word or phrase repetitions. All the subsets were filtered for + document deduplication and test set contamination" + [[Appendix A]](https://arxiv.org/pdf/2112.11446.pdf#appendix.A). + # Downstream + access: + value: closed + explanation: > + The dataset access is limited to DeepMind researchers + [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C). + intended_uses: > + Pre-training of language models by DeepMind researchers + [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C). + prohibited_uses: + value: unknown + explanation: > + There are no known prohibited uses of the dataset, but the authors + state that it should not be used for training models with multilingual + capabilities as it only contains the English language + [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C). monitoring: - explanation: "There is no information on how DeepMind is internally monitoring\ - \ the use of the dataset.\n" value: unknown - name: MassiveText - organization: DeepMind - prohibited_uses: - explanation: "There are no known prohibited uses of the dataset, but the authors\ - \ state that it should not be used for training models with multilingual capabilities\ - \ as it only contains the English language [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C).\n" + explanation: > + There is no information on how DeepMind is internally monitoring + the use of the dataset. + feedback: value: unknown - quality_control: "The authors use simple heuristics for filtering low quality\ - \ documents as opposed to relying on a classifier based on a \"gold\" set such\ - \ as the English Wikipedia, which could \"inadvertently bias towards a certain\ - \ demographic or erase certain dialects or sociolects from representation.\"\ - \ MassiveWeb subset was filtered using Google’s SafeSearch filter, preferring\ - \ it over to word filters that \"disproportinately filter out inoffensive content\ - \ associated with minority groups. MassiveWeb was filtered further for word\ - \ or phrase repetitions. All the subsets were filtered for document deduplication\ - \ and test set contamination\" [[Appendix A]](https://arxiv.org/pdf/2112.11446.pdf#appendix.A).\n" - sample: [] - size: 10.5 TB - type: dataset - url: https://arxiv.org/pdf/2112.11446.pdf -- access: - explanation: "The dataset access is limited to DeepMind researchers [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F).\n" - value: closed - analysis: '' + explanation: > + The internal feedback mechanisms for WebText are unknown + [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C). + +- type: dataset + name: M3W + # General + organization: DeepMind + description: > + M3W (MassiveWeb) is dataset used to train Flamingo, and other vision-language + models and was created by researchers and engineers. created_date: - explanation: "The date that Flamingo was posted to arXiv [[arXiv]] (https://arxiv.org/pdf/2204.14198.pdf).\n" value: 2022-04-29 + explanation: > + The date that Flamingo was posted to arXiv + [[arXiv]] + (https://arxiv.org/pdf/2204.14198.pdf). + url: https://arxiv.org/pdf/2204.14198.pdf datasheet: https://arxiv.org/pdf/2204.14198.pdf#appendix.F + modality: text (English) and code + size: 182GB Text, 185M Images + sample: [] + analysis: '' + # Construction dependencies: [] - description: "M3W (MassiveWeb) is dataset used to train Flamingo, and other vision-language\ - \ models and was created by researchers and engineers.\n" - excluded: unknown - feedback: - explanation: "No feedback mechanism is mentioned in the datasheet [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F).\n" - value: none - included: "M3W has interleaved images (185M) and text (182GB) from the web.\n" - intended_uses: "Pre-training of vision and language models by DeepMind researchers\ - \ [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F).\n" license: - explanation: "The model likely has a license specifically for DeepMind's use,\ - \ based on the information provided in the datasheet [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F).\n" value: unknown - modality: - explanation: text (English) and code - value: code, text + explanation: > + The model likely has a license specifically for DeepMind's use, + based on the information provided in the datasheet + [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F). + included: > + M3W has interleaved images (185M) and text (182GB) from the web. + excluded: unknown + quality_control: > + The authors provide a basic description of data processing and cleaning. + # Downstream + access: + value: closed + explanation: > + The dataset access is limited to DeepMind researchers + [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F). + intended_uses: > + Pre-training of vision and language models by DeepMind researchers + [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F). + prohibited_uses: + value: unknown + explanation: > + There are no known prohibited uses of the dataset + [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F). monitoring: - explanation: "There is no information on how DeepMind is internally monitoring\ - \ the use of the dataset.\n" value: unknown - name: M3W + explanation: > + There is no information on how DeepMind is internally monitoring + the use of the dataset. + feedback: + value: none + explanation: > + No feedback mechanism is mentioned in the datasheet + [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F). + +- type: dataset + name: Gato dataset + # General organization: DeepMind - prohibited_uses: - explanation: "There are no known prohibited uses of the dataset [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F).\n" - value: unknown - quality_control: "The authors provide a basic description of data processing and\ - \ cleaning.\n" - sample: [] - size: 182GB Text, 185M Images - type: dataset - url: https://arxiv.org/pdf/2204.14198.pdf -- access: - explanation: "The full dataset is not directly provided by the authors, though\ - \ some underlying data is public whereas others (e.g. MassiveText) is not.\n" - value: closed - analysis: "The Gato dataset compiles many datasets introduced in prior works,\ - \ with associated analyses.\n" + description: > + The Gato datasets are a collection of data used to train the Gato model. created_date: - explanation: "The date that Gato was announced [[DeepMind Blog Post]] (https://www.deepmind.com/blog/a-generalist-agent).\n" value: 2022-05-12 + explanation: > + The date that Gato was announced + [[DeepMind Blog Post]] + (https://www.deepmind.com/blog/a-generalist-agent). + url: https://www.deepmind.com/blog/a-generalist-agent datasheet: none + modality: text, text-image pair, simulated control task, robotics simulation + size: 10.5 TB Text, 2.2B Text-Image pairs, 1.5T tokens of simulated control, 500k + robotics trajectories + sample: [] + analysis: > + The Gato dataset compiles many datasets introduced in prior works, with associated + analyses. + # Construction dependencies: [MassiveText] - description: "The Gato datasets are a collection of data used to train the Gato\ - \ model.\n" + license: + value: unknown + explanation: > + The datasets have individual licenses, but no overall license is mentioned + by the authors. + included: > + The full composition of the dataset across individual sources can be found in + the paper. excluded: - explanation: "No specific filtering is mentioned in the Gato paper.\n" value: none - feedback: - explanation: "There is no mention on feedback mechanisms either internally or\ - \ externally.\n" - value: none - included: "The full composition of the dataset across individual sources can be\ - \ found in the paper.\n" + explanation: > + No specific filtering is mentioned in the Gato paper. + quality_control: unknown + # Downstream + access: + value: closed + explanation: > + The full dataset is not directly provided by the authors, though some underlying + data is public whereas others (e.g. MassiveText) is not. intended_uses: - explanation: "There are no known intended uses of the dataset stated by authors\ - \ beyond training Gato.\n" value: unknown - license: - explanation: "The datasets have individual licenses, but no overall license\ - \ is mentioned by the authors.\n" + explanation: > + There are no known intended uses of the dataset stated by authors beyond training + Gato. + prohibited_uses: value: unknown - modality: - explanation: text, text-image pair, simulated control task, robotics simulation - value: image, text + explanation: > + There are no known prohibited uses of the dataset stated by authors. monitoring: - explanation: "There is no mention on how DeepMind is internally monitoring the\ - \ use of the dataset.\n" value: none - name: Gato dataset + explanation: > + There is no mention on how DeepMind is internally monitoring + the use of the dataset. + feedback: + value: none + explanation: > + There is no mention on feedback mechanisms either internally or externally. + +- type: model + name: AlphaFold2 organization: DeepMind - prohibited_uses: - explanation: "There are no known prohibited uses of the dataset stated by authors.\n" - value: unknown - quality_control: unknown - sample: [] - size: 10.5 TB Text, 2.2B Text-Image pairs, 1.5T tokens of simulated control, 500k - robotics trajectories - type: dataset - url: https://www.deepmind.com/blog/a-generalist-agent -- access: - explanation: Models are available for download from the [[Github repository]](https://github.com/deepmind/alphafold) - value: open - analysis: '' + description: AlphaFold2 is a protein language model trained on protein sequences created_date: - explanation: The date the model paper was released value: 2021-07-15 - dependencies: [Protein Data Bank] - description: AlphaFold2 is a protein language model trained on protein sequences - feedback: '' - intended_uses: '' - license: - explanation: The license is provided in the [[Github repository]](https://github.com/deepmind/alphafold) - value: Apache 2.0 - modality: - explanation: protein sequence - value: text; text + explanation: The date the model paper was released + url: https://www.nature.com/articles/s41586-021-03819-2 model_card: none - monitoring: '' - name: AlphaFold2 - organization: DeepMind - prohibited_uses: '' - quality_control: '' - size: 93M parameters (dense) + modality: protein sequence + size: 93M parameters (dense model) + analysis: '' + dependencies: [Protein Data Bank] training_emissions: '' - training_hardware: - explanation: Described in paper - value: 128 TPUv3 cores training_time: + value: 11 days explanation: Training takes "approximately 1 week" and finetuning takes "approximately 4 days" - value: 11 days - type: model - url: https://www.nature.com/articles/s41586-021-03819-2 -- access: - explanation: "The model has not been released and no discussion of release is\ - \ stated in the model card [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).\n" - value: closed - analysis: "Model performance was evaluated on image and video datasets primarily,\ - \ including dialogue.\n" - created_date: - explanation: "The date that Flamingo was posted to arXiv [[arXiv]] (https://arxiv.org/abs/2204.14198).\n" - value: 2022-04-29 - dependencies: [M3W, ALIGN, LTIP, VTP, Chinchilla] - description: "Flamingo is a Visual Language Model using the Transformer architecture\ - \ that is intended for few-shot learning.\n" - feedback: - explanation: "No contact information is provided for feedback in the model card\ - \ [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).\n" - value: none - intended_uses: "The intended uses are stated in the model card: \"The primary\ - \ use is research on visual language models (VLM), including: research on VLM\ - \ applications like classification, captioning or visual question answering,\ - \ understanding how strong VLMs can contribute to AGI, advancing fairness and\ - \ safety research in the area of multimodal research, and understanding limitations\ - \ of current large VLMs.\" [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).\n" + training_hardware: + value: 128 TPUv3 cores + explanation: Described in paper + quality_control: '' + access: + value: open + explanation: Models are available for download from the [[Github repository]](https://github.com/deepmind/alphafold) license: - explanation: "No license is provided in the model card [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).\n" - value: unknown - modality: - explanation: Images, Text - value: image, text; image, text - model_card: https://arxiv.org/pdf/2204.14198.pdf#appendix.E - monitoring: - explanation: "There is no information on how DeepMind is internally monitoring\ - \ the use of the model.\n" - value: unknown + value: Apache 2.0 + explanation: The license is provided in the [[Github repository]](https://github.com/deepmind/alphafold) + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: model name: Flamingo + # General organization: DeepMind - prohibited_uses: "The model card lists the following as out of scope uses of the\ - \ model: \"Uses of the model for visually conditioned language generation in\ - \ harmful or deceitful settings. Broadly speaking, the model should not be used\ - \ for downstream applications without further safety and fairness mitigations\ - \ specific to each application.\" [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).\n" - quality_control: - explanation: "Reported in the mitigations in the model card [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).\n" - value: none - size: 80B parameters (dense) + description: > + Flamingo is a Visual Language Model using the Transformer architecture that + is intended for few-shot learning. + created_date: + value: 2022-04-29 + explanation: > + The date that Flamingo was posted to arXiv + [[arXiv]] + (https://arxiv.org/abs/2204.14198). + url: https://arxiv.org/pdf/2204.14198.pdf + model_card: https://arxiv.org/pdf/2204.14198.pdf#appendix.E + modality: Images, Text + size: + value: 80B parameters (dense model) + explanation: > + Gato family has models of several sizes (3.2B, 9.3B, 80B) which involve frozen + language, vision, and trainable parameters + [[Table 5]](https://arxiv.org/pdf/2204.14198.pdf). + analysis: > + Model performance was evaluated on image and video datasets primarily, including + dialogue. + # Construction + dependencies: [M3W, ALIGN, LTIP, VTP, Chinchilla] training_emissions: - explanation: "Authors do not report the training emissions.\n" value: unknown - training_hardware: - explanation: "Reported in the paper checklist [[Checklist]](https://arxiv.org/pdf/2204.14198.pdf).\n" - value: TPU + explanation: > + Authors do not report the training emissions. training_time: - explanation: "Reported in the paper checklist [[Checklist]](https://arxiv.org/pdf/2204.14198.pdf).\n" value: 15 days on 1536 TPUs - type: model - url: https://arxiv.org/pdf/2204.14198.pdf -- access: - explanation: DeepMind does not provide access to AlphaCode to external researchers + explanation: > + Reported in the paper checklist + [[Checklist]](https://arxiv.org/pdf/2204.14198.pdf). + training_hardware: + value: TPU + explanation: > + Reported in the paper checklist + [[Checklist]](https://arxiv.org/pdf/2204.14198.pdf). + quality_control: + value: none + explanation: > + Reported in the mitigations in the model card + [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A). + # Downstream + access: value: closed - analysis: '' - created_date: - explanation: The date the model paper was released - value: 2022-02-02 - dependencies: [] - description: AlphaCode is an autoregressive language model trained on code - feedback: '' - intended_uses: '' + explanation: > + The model has not been released and no discussion of release is stated in + the model card + [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E). license: - explanation: '' value: unknown - modality: - explanation: code - value: code; code - model_card: '' - monitoring: '' + explanation: > + No license is provided in the model card + [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E). + intended_uses: > + The intended uses are stated in the model card: "The primary use is research + on visual language models (VLM), including: research on VLM applications like + classification, captioning or visual question answering, understanding how + strong VLMs can contribute to AGI, advancing + fairness and safety research in the area of multimodal research, and understanding + limitations of current large VLMs." + [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E). + prohibited_uses: > + The model card lists the following as out of scope uses of the model: "Uses + of the model for visually conditioned language generation in harmful or deceitful + settings. Broadly speaking, + the model should not be used for downstream applications + without further safety and fairness mitigations specific to + each application." + [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E). + monitoring: + value: unknown + explanation: > + There is no information on how DeepMind is internally monitoring + the use of the model. + feedback: + value: none + explanation: > + No contact information is provided for feedback in the model card + [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E). + +- type: model name: AlphaCode organization: DeepMind - prohibited_uses: '' - quality_control: '' - size: 41B parameters (dense) + description: AlphaCode is an autoregressive language model trained on code + created_date: + value: 2022-02-02 + explanation: The date the model paper was released + url: https://arxiv.org/abs/2203.07814 + model_card: '' + modality: code + analysis: '' + size: 41B parameters (dense model) + dependencies: [] training_emissions: '' - training_hardware: '' training_time: '' - type: model - url: https://arxiv.org/abs/2203.07814 -- access: - explanation: "The model access is limited to DeepMind researchers. The model\ - \ won't be released to the public [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).\n" + training_hardware: '' + quality_control: '' + access: value: closed - analysis: "Model performance was evaluated and analyzed on 152 NLP tasks including:\ - \ Language Modelling (20), Reading Comprehension (3), Fact Checking (3), Question\ - \ Answering (3), Common Sense (4), MMLU (57), BIG-bench (62) [[Section 4]](https://arxiv.org/pdf/2112.11446.pdf#section.4);\ - \ on toxicity and bias datasets [[Section 5]](https://arxiv.org/pdf/2112.11446.pdf#section.5);\ - \ and on dialogue tasks [[Section 6]](https://arxiv.org/pdf/2112.11446.pdf#section.6).\n" - created_date: - explanation: "The date that Gopher was announced [[DeepMind Blog Post]] (https://www.deepmind.com/blog/language-modelling-at-scale-gopher-ethical-considerations-and-retrieval).\n" - value: 2021-12-08 - dependencies: [MassiveText] - description: "Gopher is an autoregressive language model based on the Transformer\ - \ architecture with two modifications: using RMSNorm instead of LayerNorm and\ - \ using relative positional encoding scheme instead of absolute positional encodings\ - \ [[Section 3]](https://arxiv.org/pdf/2112.11446.pdf#subsection.3.1).\n" - feedback: "The feedback for the model can be provided at the email linked in the\ - \ model card, geoffreyi at google.com [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).\n" - intended_uses: "The intended uses are stated in the Gopher model card: \"The primary\ - \ use is research on language models, including: research on NLP applications\ - \ like machine translation and question answering, understanding how strong\ - \ language models can contribute to AGI, advancing fairness and safety research,\ - \ and understanding limitations of current LLMs\" [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).\n" + explanation: DeepMind does not provide access to AlphaCode to external researchers license: - explanation: "The model likely has a license specifically for DeepMind's use,\ - \ based on the information provided in the model card [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).\n" - value: unknown - modality: - explanation: Text (English) and Code - value: code, text; code, text - model_card: https://arxiv.org/pdf/2112.11446.pdf#appendix.B - monitoring: - explanation: "There is no information on how DeepMind is internally monitoring\ - \ the use of the model.\n" value: unknown + explanation: '' + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: model name: Gopher + # General organization: DeepMind - prohibited_uses: "The model card lists the following as out of scope uses of the\ - \ model: \"for language generation in harmful or deceitful settings. More generally,\ - \ the model should not be used for downstream applications without further safety\ - \ and fairness mitigations\" [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).\n" - quality_control: none - size: 280B parameters (dense) + description: > + Gopher is an autoregressive language model based on the Transformer + architecture with two modifications: using RMSNorm instead of LayerNorm and + using relative positional encoding scheme instead of absolute positional + encodings + [[Section 3]](https://arxiv.org/pdf/2112.11446.pdf#subsection.3.1). + created_date: + value: 2021-12-08 + explanation: > + The date that Gopher was announced + [[DeepMind Blog Post]] + (https://www.deepmind.com/blog/language-modelling-at-scale-gopher-ethical-considerations-and-retrieval). + url: https://arxiv.org/pdf/2112.11446.pdf + model_card: https://arxiv.org/pdf/2112.11446.pdf#appendix.B + modality: Text (English) and Code + size: + value: 280B parameters (dense model) + explanation: > + Gopher family has models of several sizes, but the name + Gopher uniquely identify the 280B parameter version. Sizes for the other + models in the Gopher family can be seen in the paper + [[Table 1]](https://arxiv.org/pdf/2112.11446.pdf#table.caption.1). + analysis: > + Model performance was evaluated and analyzed on 152 NLP tasks including: + Language Modelling (20), Reading Comprehension (3), Fact Checking (3), + Question Answering (3), Common Sense (4), MMLU (57), BIG-bench (62) + [[Section 4]](https://arxiv.org/pdf/2112.11446.pdf#section.4); on toxicity + and bias datasets + [[Section 5]](https://arxiv.org/pdf/2112.11446.pdf#section.5); and on + dialogue tasks + [[Section 6]](https://arxiv.org/pdf/2112.11446.pdf#section.6). + # Construction + dependencies: [MassiveText] training_emissions: - explanation: "The training emission estimate from the paper [[Section F]](https://arxiv.org/pdf/2112.11446.pdf#appendix.F)\n" value: 380 tCO2e - training_hardware: - explanation: "Reported in the paper [[Section F]](https://arxiv.org/pdf/2112.11446.pdf#appendix.F).\n" - value: TPUv3 pods + explanation: > + The training emission estimate from the paper + [[Section F]](https://arxiv.org/pdf/2112.11446.pdf#appendix.F) training_time: - explanation: "The authors reported the training petaflops for all of the 4 different\ - \ sizes of the model. For the 280B parameter model, the petaflops was reported\ - \ as 6.31E+08. We compute the Gopher's training time in petaflop/s-day as\ - \ 6.31E+08 / (60*60*24) = 7303.24 petaflop/s-day.\n" value: 7303.24 petaflop/s-day - type: model - url: https://arxiv.org/pdf/2112.11446.pdf -- access: - explanation: "The model access is limited to DeepMind researchers. The model\ - \ won't be released to the public [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).\n" + explanation: > + The authors reported the training petaflops for all of the 4 different + sizes of the model. For the 280B parameter model, the petaflops was + reported as 6.31E+08. We compute the Gopher's training time in + petaflop/s-day as 6.31E+08 / (60*60*24) = 7303.24 petaflop/s-day. + training_hardware: + value: TPUv3 pods + explanation: > + Reported in the paper + [[Section F]](https://arxiv.org/pdf/2112.11446.pdf#appendix.F). + quality_control: none + # Downstream + access: value: closed - analysis: "Model performance was evaluated and analyzed on many NLP tasks including\ - \ language modeling, reading comprehension, question answering, commonsense-intensive\ - \ tasks, and the BIG-Bench and MMLU meta-benchmarks.\n" - created_date: - explanation: "The date that Chinchilla was posted on arXiv [[arXiv]] (https://arxiv.org/abs/2203.15556).\n" - value: 2022-03-29 - dependencies: [MassiveText] - description: "Chinchilla is an autoregressive language model based on the Transformer\ - \ architecture with improved scaling laws.\n" - feedback: "The feedback for the model can be provided at the email linked in the\ - \ model card, {jordanhoffmann, sborgeaud, amensch,sifre} at deepmind.com [[Model\ - \ Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I).\n" - intended_uses: "The intended uses are stated in the Chinchilla model card: \"\ - The primary use is research on language models, including: research on the scaling\ - \ behaviour of language models along with those listed in Gopher paper\" [[Model\ - \ Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I).\n" + explanation: > + The model access is limited to DeepMind researchers. The model won't be + released to the public + [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B). license: - explanation: "The model likely has a license specifically for DeepMind's use,\ - \ based on the information provided in the model card [[Model Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I).\n" value: unknown - modality: - explanation: text (English) and cpde - value: text; text - model_card: https://arxiv.org/pdf/2203.15556.pdf + explanation: > + The model likely has a license specifically for DeepMind's use, + based on the information provided in the model card + [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B). + intended_uses: > + The intended uses are stated in the Gopher model card: "The primary use is + research on language models, including: research on NLP applications like + machine translation and question answering, understanding how strong + language models can contribute to AGI, advancing fairness and safety + research, and understanding limitations of current LLMs" + [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B). + prohibited_uses: > + The model card lists the following as out of scope uses of the model: "for + language generation in harmful or deceitful settings. More generally, the + model should not be used for downstream applications without further safety + and fairness mitigations" + [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B). monitoring: - explanation: "There is no information on how DeepMind is internally monitoring\ - \ the use of the model.\n" - value: Unknown + value: unknown + explanation: > + There is no information on how DeepMind is internally monitoring + the use of the model. + feedback: > + The feedback for the model can be provided at the email linked in the + model card, geoffreyi at google.com [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B). + +- type: model name: Chinchilla + # General organization: DeepMind - prohibited_uses: "The model card lists the following as out of scope uses of the\ - \ model: \"for language generation in harmful or deceitful settings. More generally,\ - \ the model should not be used for downstream applications without further safety\ - \ and fairness mitigations\" [[Model Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I).\n" - quality_control: none - size: 70B parameters (dense) + description: > + Chinchilla is an autoregressive language model based on the Transformer + architecture with improved scaling laws. + created_date: + value: 2022-03-29 + explanation: > + The date that Chinchilla was posted on arXiv + [[arXiv]] + (https://arxiv.org/abs/2203.15556). + url: https://arxiv.org/pdf/2203.15556.pdf + model_card: https://arxiv.org/pdf/2203.15556.pdf + modality: text (English) and cpde + size: + value: 70B parameters (dense model) + explanation: > + Chinchilla family has many models, but the primary model is the 70B parameter + model. + analysis: > + Model performance was evaluated and analyzed on many NLP tasks including language + modeling, reading comprehension, question answering, commonsense-intensive + tasks, and the BIG-Bench and MMLU meta-benchmarks. + # Construction + dependencies: [MassiveText] training_emissions: - explanation: "Authors do not report the training emissions.\n" value: Unknown - training_hardware: - explanation: "Reported in the paper [[Section 4]](https://arxiv.org/pdf/2203.15556.pdf).\n" - value: TPUv3/TPUv4 pods + explanation: > + Authors do not report the training emissions. training_time: - explanation: "The authors reported the training petaflops for all models, including\ - \ hypothetical larger models. For the 70B parameter model, the petaflops was\ - \ reported as 5.76E+08. We compute the Gopher's training time in petaflop/s-day\ - \ as 5.76E+08 / (60*60*24) = 6666.66 petaflop/s-day.\n" value: 7303.24 petaflop/s-day - type: model - url: https://arxiv.org/pdf/2203.15556.pdf -- access: - explanation: "The model access is limited to DeepMind researchers. The model\ - \ won't be released to the public [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).\n" + explanation: > + The authors reported the training petaflops for all models, including hypothetical + larger models. + For the 70B parameter model, the petaflops was + reported as 5.76E+08. We compute the Gopher's training time in + petaflop/s-day as 5.76E+08 / (60*60*24) = 6666.66 petaflop/s-day. + training_hardware: + value: TPUv3/TPUv4 pods + explanation: > + Reported in the paper + [[Section 4]](https://arxiv.org/pdf/2203.15556.pdf). + quality_control: none + # Downstream + access: value: closed - analysis: "Model performance was evaluated on simulated and robotics task primarily,\ - \ including out-of-distribution and skill generalization.\n" - created_date: - explanation: "The date that Gato was announced [[DeepMind Blog Post]] (https://www.deepmind.com/blog/a-generalist-agent).\n" - value: 2022-05-12 - dependencies: [Gato dataset] - description: "Gato is a generalist agent based on sequence modeling using the\ - \ Transformer architecture to implement multi-modal, multi-task, multi-embodiment\ - \ generalist policy.\n" - feedback: "The feedback for the model can be provided at the email linked in the\ - \ model card, reedscot at google.com [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).\n" - intended_uses: "The intended uses are stated in the Gopher model card: \"Learn\ - \ to accomplish a wide variety of tasks from expert demonstrations, such as\ - \ playing video games, controlling simulated embodiments, and real world block\ - \ stacking.\" [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).\n" + explanation: > + The model access is limited to DeepMind researchers. The model won't be + released to the public + [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B). license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: text, image, control/robotics - value: image, text; image, text - model_card: https://openreview.net/pdf?id=1ikK0kHjvj#appendix.B + explanation: > + The model likely has a license specifically for DeepMind's use, + based on the information provided in the model card + [[Model Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I). + intended_uses: > + The intended uses are stated in the Chinchilla model card: "The primary use + is research on language models, including: + research on the scaling behaviour of language models along with those listed + in Gopher paper" + [[Model Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I). + prohibited_uses: > + The model card lists the following as out of scope uses of the model: "for + language generation in harmful or deceitful settings. More generally, the + model should not be used for downstream applications without further safety + and fairness mitigations" + [[Model Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I). monitoring: - explanation: "There is no information on how DeepMind is internally monitoring\ - \ the use of the model.\n" - value: unknown + value: Unknown + explanation: > + There is no information on how DeepMind is internally monitoring + the use of the model. + feedback: > + The feedback for the model can be provided at the email linked in the model + card, {jordanhoffmann, sborgeaud, amensch,sifre} at deepmind.com + [[Model Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I). + +- type: model name: Gato + # General organization: DeepMind - prohibited_uses: "The model card lists the following as out of scope uses of the\ - \ model: \"Not intended for commercial or production use. Military uses are\ - \ strictly prohibited.\" [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).\n" - quality_control: - explanation: "Reported in the mitigations in the model card [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).\n" - value: none - size: 1.2B parameters (dense) + description: > + Gato is a generalist agent based on sequence modeling using the Transformer + architecture to implement multi-modal, multi-task, multi-embodiment generalist + policy. + created_date: + value: 2022-05-12 + explanation: > + The date that Gato was announced + [[DeepMind Blog Post]] + (https://www.deepmind.com/blog/a-generalist-agent). + url: https://www.deepmind.com/blog/a-generalist-agent + model_card: https://openreview.net/pdf?id=1ikK0kHjvj#appendix.B + modality: text, image, control/robotics + size: + value: 1.2B parameters (dense model) + explanation: > + Gato family has models of several sizes (79M, 364M, 1.18B) + [[Table 6]](https://openreview.net/pdf?id=1ikK0kHjvj). + analysis: > + Model performance was evaluated on simulated and robotics task primarily, + including out-of-distribution and skill generalization. + # Construction + dependencies: [Gato dataset] training_emissions: - explanation: "Authors do not report the training emissions.\n" value: unknown - training_hardware: - explanation: "Reported in the paper [[Section 2.3]](https://openreview.net/pdf?id=1ikK0kHjvj).\n" - value: 16x16 TPU v3 slice + explanation: > + Authors do not report the training emissions. training_time: - explanation: "Reported in the paper [[Section 2.3]](https://openreview.net/pdf?id=1ikK0kHjvj).\n" value: 4 days on a 16x16 TPU v3 slice - type: model - url: https://www.deepmind.com/blog/a-generalist-agent -- access: closed - analysis: '' + explanation: > + Reported in the paper + [[Section 2.3]](https://openreview.net/pdf?id=1ikK0kHjvj). + training_hardware: + value: 16x16 TPU v3 slice + explanation: > + Reported in the paper + [[Section 2.3]](https://openreview.net/pdf?id=1ikK0kHjvj). + quality_control: + value: none + explanation: > + Reported in the mitigations in the model card + [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A). + # Downstream + access: + value: closed + explanation: > + The model access is limited to DeepMind researchers. The model won't be + released to the public + [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A). + license: + value: unknown + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: > + The intended uses are stated in the Gopher model card: "Learn to accomplish + a wide variety of tasks from expert demonstrations, such as playing video + games, controlling simulated embodiments, + and real world block stacking." + [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A). + prohibited_uses: > + The model card lists the following as out of scope uses of the model: "Not + intended for commercial or production use. Military uses are + strictly prohibited." + [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A). + monitoring: + value: unknown + explanation: > + There is no information on how DeepMind is internally monitoring + the use of the model. + feedback: > + The feedback for the model can be provided at the email linked in the model + card, reedscot at google.com + [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A). + +- type: model + name: Sparrow + organization: DeepMind + description: '' created_date: 2022-09-28 + url: https://arxiv.org/abs/2209.14375 + model_card: '' + modality: text + analysis: '' + size: 70B dependencies: - Chinchilla - Google Search - Sparrow Rule reward model - Sparrow Preference reward model - description: '' - feedback: '' - intended_uses: '' - license: - explanation: "The asset isn't released, and hence the license is unknown.\n" - value: unknown - modality: - explanation: text - value: text; text - model_card: '' - monitoring: '' - name: Sparrow - organization: DeepMind - prohibited_uses: '' - quality_control: '' - size: 70B parameters (dense) training_emissions: '' - training_hardware: '' training_time: '' - type: model - url: https://arxiv.org/abs/2209.14375 -- access: closed - analysis: '' - created_date: 2021-12-08 - dependencies: [MassiveText] - description: '' - feedback: '' - intended_uses: '' + training_hardware: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: Text - value: text; text - model_card: '' + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: RETRO organization: DeepMind - prohibited_uses: '' - quality_control: '' - size: 7.5B parameters (dense) - training_emissions: '' - training_hardware: '' - training_time: '' - type: model + description: '' + created_date: 2021-12-08 url: https://arxiv.org/abs/2112.04426 -- access: closed + model_card: '' + modality: Text analysis: '' - created_date: 2022-09-28 - dependencies: [Chinchilla, Sparrow adversarial probing dataset] - description: '' - feedback: '' - intended_uses: '' + size: 7.5B + dependencies: [MassiveText] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: text - value: text; text - model_card: '' + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: Sparrow Rule reward model organization: DeepMind - prohibited_uses: '' - quality_control: '' - size: 70B parameters (dense) - training_emissions: '' - training_hardware: '' - training_time: '' - type: model + description: '' + created_date: 2022-09-28 url: https://arxiv.org/abs/2209.14375 -- access: closed + model_card: '' + modality: text analysis: '' - created_date: 2022-09-28 - dependencies: [Chinchilla, Sparrow response preference dataset] - description: '' - feedback: '' - intended_uses: '' + size: 70B + dependencies: [Chinchilla, Sparrow adversarial probing dataset] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: text - value: text; text - model_card: '' + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: Sparrow Preference reward model organization: DeepMind - prohibited_uses: '' - quality_control: '' - size: 70B parameters (dense) - training_emissions: '' - training_hardware: '' - training_time: '' - type: model + description: '' + created_date: 2022-09-28 url: https://arxiv.org/abs/2209.14375 -- access: closed + model_card: '' + modality: text analysis: '' - created_date: 2022-09-28 - datasheet: '' - dependencies: [Chinchilla] - description: '' - excluded: '' - feedback: '' - included: '' - intended_uses: '' + size: 70B + dependencies: [Chinchilla, Sparrow response preference dataset] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: text + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: dataset name: Sparrow adversarial probing dataset organization: DeepMind - prohibited_uses: '' - quality_control: '' - sample: [] - size: 27k ratings - type: dataset - url: https://arxiv.org/abs/2209.14375 -- access: closed - analysis: '' + description: '' created_date: 2022-09-28 + url: https://arxiv.org/abs/2209.14375 datasheet: '' + modality: text + size: 27k ratings + sample: [] + analysis: '' dependencies: [Chinchilla] - description: '' - excluded: '' - feedback: '' included: '' - intended_uses: '' + excluded: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: text + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: dataset name: Sparrow response preference dataset organization: DeepMind - prohibited_uses: '' - quality_control: '' - sample: [] - size: 72k comparisons - type: dataset + description: '' + created_date: 2022-09-28 url: https://arxiv.org/abs/2209.14375 -- access: closed + datasheet: '' + modality: text + size: 72k comparisons + sample: [] analysis: '' - created_date: 2022-03-16 - dependencies: [Gopher, Google Search, GopherCite reward model] - description: '' - feedback: '' - intended_uses: '' + dependencies: [Chinchilla] + included: '' + excluded: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: Text - value: text; text - model_card: '' + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: GopherCite organization: DeepMind - prohibited_uses: '' - quality_control: '' - size: 280B parameters (dense) - training_emissions: '' - training_hardware: '' - training_time: '' - type: model + description: '' + created_date: 2022-03-16 url: https://storage.googleapis.com/deepmind-media/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes.pdf -- access: closed + model_card: '' + modality: Text analysis: '' - created_date: 2022-03-16 - dependencies: [Gopher, GopherCite Preference dataset] - description: '' - feedback: '' - intended_uses: '' + size: 280B + dependencies: [Gopher, Google Search, GopherCite reward model] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: text - value: text; text - model_card: '' + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: GopherCite reward model organization: DeepMind - prohibited_uses: '' - quality_control: '' - size: 7B parameters (dense) - training_emissions: '' - training_hardware: '' - training_time: '' - type: model + description: '' + created_date: 2022-03-16 url: https://storage.googleapis.com/deepmind-media/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes.pdf -- access: closed + model_card: '' + modality: text analysis: '' - created_date: 2022-03-16 - datasheet: '' - dependencies: [Gopher, Google Search] - description: '' - excluded: '' - feedback: '' - included: '' - intended_uses: '' + size: 7B + dependencies: [Gopher, GopherCite Preference dataset] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: text + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: dataset name: GopherCite Preference dataset organization: DeepMind - prohibited_uses: '' - quality_control: '' - sample: [] - size: 33k response pairs - type: dataset + description: '' + created_date: 2022-03-16 url: https://storage.googleapis.com/deepmind-media/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes.pdf -- access: closed + datasheet: '' + modality: text + size: 33k response pairs + sample: [] analysis: '' - created_date: 2022-09-29 - dependencies: [Chinchilla] - description: '' - feedback: '' - intended_uses: '' + dependencies: [Gopher, Google Search] + included: '' + excluded: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: text - value: text; text - model_card: '' + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: Dramatron organization: DeepMind - prohibited_uses: '' - quality_control: '' - size: 70B parameters (dense) + description: '' + created_date: 2022-09-29 + url: https://arxiv.org/abs/2209.14958 + model_card: '' + modality: text + analysis: '' + size: 70B parameters (dense model) + dependencies: [Chinchilla] training_emissions: '' - training_hardware: '' training_time: '' - type: model - url: https://arxiv.org/abs/2209.14958 + training_hardware: '' + quality_control: '' + access: closed + license: + value: unknown + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/duckduckgo.yaml b/assets/duckduckgo.yaml index a1defb85..465b62eb 100644 --- a/assets/duckduckgo.yaml +++ b/assets/duckduckgo.yaml @@ -1,25 +1,27 @@ --- -- access: open - adaptation: '' - created_date: 2023-03-08 - dependencies: [Anthropic API] + +- type: application + name: DuckAssist + organization: DuckDuckGo description: The first Instant Answer in DuckDuckGo search results to use natural language technology to generate answers to search queries using Wikipedia and other related sources - failures: '' - feedback: '' - intended_uses: '' + created_date: 2023-03-08 + url: https://spreadprivacy.com/duckassist-launch/ + dependencies: [Anthropic API] + adaptation: '' + output_space: '' + quality_control: '' + access: open license: - explanation: "The asset is no longer available, and hence the license is unknown.\n" value: unknown + explanation: > + The asset is no longer available, and hence the license is unknown. + terms_of_service: '' + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' monthly_active_users: '' - name: DuckAssist - organization: DuckDuckGo - output_space: '' - prohibited_uses: '' - quality_control: '' - terms_of_service: '' - type: application - url: https://spreadprivacy.com/duckassist-launch/ user_distribution: '' + failures: '' diff --git a/assets/duolingo.yaml b/assets/duolingo.yaml index 44e4e886..de25f466 100644 --- a/assets/duolingo.yaml +++ b/assets/duolingo.yaml @@ -1,58 +1,62 @@ --- -- access: limited - adaptation: '' - created_date: 2023-03-14 - dependencies: [GPT-4 API] + +- type: application + name: Duolingo Explain My Answer + organization: Duolingo description: Explain My Answer offers learners the chance to learn more about their response in a lesson (whether their answer was correct or incorrect!) By tapping a button after certain exercise types, learners can enter a chat with Duo to get a simple explanation on why their answer was right or wrong, and ask for examples or further clarification. - failures: '' - feedback: '' - intended_uses: '' + created_date: 2023-03-14 + url: https://blog.duolingo.com/duolingo-max/ + dependencies: [GPT-4 API] + adaptation: '' + output_space: '' + quality_control: '' + access: limited license: - explanation: "Users are granted a limited license per Duolingo Terms.\n" value: custom + explanation: > + Users are granted a limited license per Duolingo Terms. + terms_of_service: https://www.duolingo.com/terms + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' monthly_active_users: '' - name: Duolingo Explain My Answer - organization: Duolingo - output_space: '' - prohibited_uses: '' - quality_control: '' - terms_of_service: https://www.duolingo.com/terms - type: application - url: https://blog.duolingo.com/duolingo-max/ user_distribution: '' -- access: limited - adaptation: '' - created_date: 2023-03-14 - dependencies: [Duolingo Role Play, Duolingo Explain My Answer] + failures: '' + +- type: application + name: Duolingo Max + organization: Duolingo description: Duolingo Max is a new subscription tier above Super Duolingo that gives learners access to two brand-new features and exercises - Explain My Answer and Roleplay. - failures: '' - feedback: '' - intended_uses: '' + created_date: 2023-03-14 + url: https://blog.duolingo.com/duolingo-max/ + dependencies: [Duolingo Role Play, Duolingo Explain My Answer] + adaptation: '' + output_space: '' + quality_control: '' + access: limited license: - explanation: "Users are granted a limited license per Duolingo Terms.\n" value: custom + explanation: > + Users are granted a limited license per Duolingo Terms. + terms_of_service: '' + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' monthly_active_users: '' - name: Duolingo Max - organization: Duolingo - output_space: '' - prohibited_uses: '' - quality_control: '' - terms_of_service: '' - type: application - url: https://blog.duolingo.com/duolingo-max/ user_distribution: '' -- access: limited - adaptation: '' - created_date: 2023-03-14 - dependencies: [GPT-4 API] + failures: '' + +- type: application + name: Duolingo Role Play + organization: Duolingo description: Roleplay allows learners to practice real-world conversation skills with world characters in the app. These challenges, which earn XP, will live alongside the path as one of the “Side Quests” learners can access by tapping @@ -60,24 +64,26 @@ scenarios! Learners might discuss future vacation plans with Lin, order coffee at a café in Paris, go furniture shopping with Eddy, or ask a friend to go for a hike. - failures: '' - feedback: '' - intended_uses: '' + created_date: 2023-03-14 + url: https://blog.duolingo.com/duolingo-max/ + dependencies: [GPT-4 API] + adaptation: '' + output_space: '' + quality_control: '' + access: limited license: - explanation: "\"Subject to the terms of these Terms and Conditions, Duolingo\ - \ grants you a non-transferable, non-exclusive license to download, install,\ - \ and use one copy of each App in object code form only on an interactive\ - \ wireless device that you own or control\" - excerpt from the Terms of Service\ - \ document.\n" value: custom + explanation: > + "Subject to the terms of these Terms and Conditions, Duolingo grants you + a non-transferable, non-exclusive license to download, install, and use + one copy of each App in object code form only on an interactive wireless + device that you own or control" - excerpt from the Terms of Service + document. + terms_of_service: https://www.duolingo.com/terms + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' monthly_active_users: '' - name: Duolingo Role Play - organization: Duolingo - output_space: '' - prohibited_uses: '' - quality_control: '' - terms_of_service: https://www.duolingo.com/terms - type: application - url: https://blog.duolingo.com/duolingo-max/ user_distribution: '' + failures: '' diff --git a/assets/eleutherai.yaml b/assets/eleutherai.yaml index 5813633b..524c8010 100644 --- a/assets/eleutherai.yaml +++ b/assets/eleutherai.yaml @@ -1,49 +1,16 @@ --- -- access: - explanation: "The dataset is freely available to the public and can be downloaded\ - \ from The Eye [[The Pile]](https://mystic.the-eye.eu/public/AI/pile/).\n" - value: open - analysis: "Analyses of the data's composition, document statistics, language/dialectal\ - \ coverage, topical distribution, and biases are conducted are conducted in\ - \ the paper [[The Pile Paper]](https://arxiv.org/pdf/2101.00027.pdf).\n" - created_date: 2021-01-01 - datasheet: https://arxiv.org/pdf/2201.07311.pdf - dependencies: [] - description: "A latge language model training dataset, used to train GPT-NeoX-20B.\n" - excluded: "Authors report that they have excluded some datasets \"because they\ - \ were too small to be worth spending time or because the English component\ - \ of the data did not merit inclusion on its own. Three datasets were excluded\ - \ for other reasons: (1) US Congressional Records were excluded because it \"\ - reflects the opinions and biases of the political class over the past 200 years,\ - \ including segregationism and xenophobia.\" (2) Online Fanfiction resources\ - \ amounting to Hundreds of GiB were excluded on logistical grounds. (3) Literotica,\ - \ platform where users can upload short-form erotic fiction, was excluded because\ - \ the authors decided to exclude fanfiction, the corpus would require significant\ - \ investigation, and corpus contain significant amount of stereotyping [[Appendix\ - \ B]](https://arxiv.org/pdf/2101.00027.pdf).\n" - feedback: "Feedback can be given by emailing the authors at contact at eleuther.ai.\n" - included: "The Pile data come from 22 sources, with over half of the data being\ - \ from Common Crawl (Pile-CC; 227GB), fiction and nonfiction books (Books3;\ - \ 101GB), biomedical articles (PubMed Central; 90GB), and code (Github; 95 GB).\ - \ Refer to the paper for full decomposition [[Table 1]](https://arxiv.org/pdf/2101.00027.pdf#table.caption.2).\n" - intended_uses: "The Pile was intended to be used as a high quality large text\ - \ dataset for language modeling tasks, explained in more detail in the paper\ - \ [[Section 1]](https://arxiv.org/pdf/2101.00027.pdf#section.1).\n" - license: - explanation: As indicated in the paper. - value: MIT - modality: - explanation: text (English) and code - value: code, text - monitoring: none + +- type: dataset name: The Pile + # General organization: EleutherAI - prohibited_uses: none - quality_control: "In addition to the data inclusion and exclusion decisions, the\ - \ quality was controlled through filtering for English (pycld2 language classifier),\ - \ filtering for documents similar to OpenWebText2 (classifier on CommonCrawl),\ - \ and several forms of deduplication as detailed in the paper [[Appendix C]](https://arxiv.org/pdf/2101.00027.pdf#appendix.1.C)\ - \ [[Appendix D]](https://arxiv.org/pdf/2101.00027.pdf#appendix.1.D).\n" + description: > + A latge language model training dataset, used to train GPT-NeoX-20B. + created_date: 2021-01-01 + url: https://arxiv.org/pdf/2101.00027.pdf + datasheet: https://arxiv.org/pdf/2201.07311.pdf + modality: text (English) and code + size: 825 GB sample: - '...pot trending topics and the coverage around them. First up, there’s a bit of a visual redesign. Previously, clicking on a trending topic would highlight @@ -64,204 +31,287 @@ the defendants urge that this Court should reconsider the case of Green Mountain Marble Co. v. Highway Board, supra, and follow the Federal practice of looking to the evide - size: 825 GB - type: dataset - url: https://arxiv.org/pdf/2101.00027.pdf -- access: - explanation: "The model can be downloaded for free from [[The Eye]](https://mystic.the-eye.eu/public/AI/GPT-J-6B/step_383500.tar.zstd)\n" + analysis: > + Analyses of the data's composition, document statistics, + language/dialectal coverage, topical distribution, and biases are + conducted are conducted in the paper + [[The Pile Paper]](https://arxiv.org/pdf/2101.00027.pdf). + # Construction + dependencies: [] + license: + value: MIT + explanation: As indicated in the paper. + included: > + The Pile data come from 22 sources, with over half of the data being from + Common Crawl (Pile-CC; 227GB), fiction and nonfiction books (Books3; 101GB), + biomedical articles (PubMed Central; 90GB), and code (Github; 95 GB). + Refer to the paper for full decomposition + [[Table 1]](https://arxiv.org/pdf/2101.00027.pdf#table.caption.2). + excluded: > + Authors report that they have excluded some datasets "because they were too + small to be worth spending time or because the English component of the data + did not merit inclusion on its own. Three datasets were excluded for other + reasons: (1) US Congressional Records were excluded because it "reflects the + opinions and biases of the political class over the past 200 years, + including segregationism and xenophobia." (2) Online Fanfiction resources + amounting to Hundreds of GiB were excluded on logistical grounds. + (3) Literotica, platform where users can upload short-form erotic fiction, + was excluded because the authors decided to exclude fanfiction, the + corpus would require significant investigation, and corpus contain + significant amount of stereotyping + [[Appendix B]](https://arxiv.org/pdf/2101.00027.pdf). + quality_control: > + In addition to the data inclusion and exclusion decisions, the quality was + controlled through filtering for English (pycld2 language classifier), + filtering for documents similar to OpenWebText2 (classifier on CommonCrawl), + and several forms of deduplication as detailed in the paper + [[Appendix C]](https://arxiv.org/pdf/2101.00027.pdf#appendix.1.C) + [[Appendix D]](https://arxiv.org/pdf/2101.00027.pdf#appendix.1.D). + # Downstream + access: value: open - analysis: '' + explanation: > + The dataset is freely available to the public and + can be downloaded from The Eye + [[The Pile]](https://mystic.the-eye.eu/public/AI/pile/). + intended_uses: > + The Pile was intended to be used as a high quality large text dataset for + language modeling tasks, explained in more detail in the paper + [[Section 1]](https://arxiv.org/pdf/2101.00027.pdf#section.1). + prohibited_uses: none + monitoring: none + feedback: > + Feedback can be given by emailing the authors at contact at eleuther.ai. + +- type: model + name: GPT-J + organization: EleutherAI + description: GPT-J is an open-source autoregressive language model. created_date: - explanation: Date model blog post was published value: 2021-06-04 - dependencies: [The Pile] - description: GPT-J is an open-source autoregressive language model. - feedback: '' - intended_uses: '' - license: - explanation: "As indicated in the [[Github repository]](https://github.com/kingoflolz/mesh-transformer-jax)\n" - value: Apache 2.0 - modality: - explanation: text (English) - value: text; text + explanation: Date model blog post was published + url: https://arankomatsuzaki.wordpress.com/2021/06/04/gpt-j/ model_card: '' - monitoring: '' - name: GPT-J - organization: EleutherAI - prohibited_uses: '' - quality_control: '' - size: 6B parameters (dense) + modality: text (English) + analysis: '' + size: 6B parameters (dense model) + dependencies: [The Pile] training_emissions: '' - training_hardware: 'TRC (Unspecified # of TPU v3-8s)' training_time: '' - type: model - url: https://arankomatsuzaki.wordpress.com/2021/06/04/gpt-j/ -- access: - explanation: "The model can be downloaded for free from [[The Eye]](https://mystic.the-eye.eu/public/AI/gptneo-release/GPT3_2-7B/)\n" + training_hardware: 'TRC (Unspecified # of TPU v3-8s)' + quality_control: '' + access: value: open - analysis: '' - created_date: - explanation: Date Github repo was update - value: 2021-03-21 - dependencies: [The Pile] - description: '' - feedback: '' + explanation: > + The model can be downloaded for free from [[The Eye]](https://mystic.the-eye.eu/public/AI/GPT-J-6B/step_383500.tar.zstd) + license: + value: Apache 2.0 + explanation: > + As indicated in the [[Github repository]](https://github.com/kingoflolz/mesh-transformer-jax) intended_uses: '' - license: MIT - modality: - explanation: Text (English) - value: text; text - model_card: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: GPT-Neo organization: EleutherAI - prohibited_uses: '' - quality_control: '' - size: 2.7B parameters (dense) + description: '' + created_date: + value: 2021-03-21 + explanation: Date Github repo was update + url: https://github.com/EleutherAI/gpt-neo + model_card: '' + modality: Text (English) + analysis: '' + size: 2.7B parameters (dense model) + dependencies: [The Pile] training_emissions: '' - training_hardware: '' training_time: '' - type: model - url: https://github.com/EleutherAI/gpt-neo -- access: - explanation: "The model can be downloaded for free The Eye [[GPT-NeoX-20B]](https://mystic.the-eye.eu/public/AI/models/GPT-NeoX-20B/).\n" + training_hardware: '' + quality_control: '' + access: value: open - analysis: "The model was evaluated on standard NLP benchmarks: LAMBADA, ANLI,\ - \ HellaSwag, MMLU among others [[Section 4]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#section.4).\n" - created_date: 2022-02-02 - dependencies: [The Pile] - description: "GPT-NeoX (20B) is an open-sourced autoregressive language model.\n" - feedback: "Feedback can be provided using the # 20b channel in EleutherAI Discord\ - \ group [[EleutherAI Blog Post]](https://blog.eleuther.ai/announcing-20b/).\ - \ Find the Discord link in the FAQ page [[FAQ]](https://www.eleuther.ai/faq/).\n" - intended_uses: "As stated in the model card: \"GPT-NeoX-20B learns an inner representation\ - \ of the English language that can be used to extract features useful for downstream\ - \ tasks. The model is best at what it was pretrained for however, which is generating\ - \ text from a prompt. Due to the generality of the pretraining set, it has acquired\ - \ the ability to generate completions across a wide range of tasks - from programming\ - \ to fiction writing [[Model Card]](https://mystic.the-eye.eu/public/AI/models/GPT-NeoX-20B/20B_model_card.md).\"\ - \n" - license: - explanation: "As indicated in the accompanying blog post [[EleutherAI Blog Post]](https://blog.eleuther.ai/announcing-20b/).\n" - value: Apache 2.0 - modality: - explanation: text (English) and code - value: code, text; code, text - model_card: https://mystic.the-eye.eu/public/AI/models/GPT-NeoX-20B/20B_model_card.md - monitoring: none + explanation: > + The model can be downloaded for free from [[The Eye]](https://mystic.the-eye.eu/public/AI/gptneo-release/GPT3_2-7B/) + license: MIT + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: model name: GPT-NeoX + # General organization: EleutherAI - prohibited_uses: none - quality_control: none - size: 20B parameters (dense) + description: > + GPT-NeoX (20B) is an open-sourced autoregressive language model. + created_date: 2022-02-02 + url: http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf + model_card: https://mystic.the-eye.eu/public/AI/models/GPT-NeoX-20B/20B_model_card.md + modality: text (English) and code + size: 20B parameters (dense model) + analysis: > + The model was evaluated on standard NLP benchmarks: LAMBADA, ANLI, + HellaSwag, MMLU among others + [[Section 4]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#section.4). + # Construction + dependencies: [The Pile] training_emissions: - explanation: "The amount of emission during the development and training of\ - \ the model based on the author's estimation [[Section 6.4]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#subsection.6.4).\n" value: 31.73 tCO2e - training_hardware: - explanation: "As outline by the authors [[Section 2.3]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#subsection.2.3)\n" - value: 12 x 8 A100 GPUs + explanation: > + The amount of emission during the development and training of + the model based on the author's estimation + [[Section 6.4]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#subsection.6.4). training_time: - explanation: "Training time was reported as 1830 hours reported by the authors,\ - \ equaling 76.25 days. [[Section 6.4]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#subsection.6.4).\ - \ The authors report that 96 (12 * 8) A100 GPUs were used during the training.\ - \ The A100 GPUs have a single precision performance of 0.0195 petaflops [[A100\ - \ Datasheet]](https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf).\ - \ Assuming the estimated utilization is 33%, following [[OpenAI AI and Computer\ - \ Blog]](https://openai.com/blog/ai-and-compute/#addendum), the training time\ - \ is 47.10 petaflop/s-day (76.25 * 96 * 0.0195 * 0.33).\n" value: 47.10 petaflop/s-day - type: model - url: http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf -- access: - explanation: "GooseAI API can be accessed by signing up on the goose.ai website.\n" - value: limited - adaptation: unknown - created_date: unknown - dependencies: [GPT-NeoX] - description: "GooseAI API is an API service providing access to NLP services.\n" - failures: unknown - feedback: - explanation: "In the \"Error Reporting and Feedback\" section of the Goose.ai\ - \ Terms of Service, GooseAI asks all the feedback to be sent to support at\ - \ goose.ai [[GooseAI Terms of Service]](https://goose.ai/docs/tos).\n" - value: Email support - intended_uses: "Intended to be used as an NLP infrastructure.\n" + explanation: > + Training time was reported as 1830 hours reported by the authors, equaling + 76.25 days. + [[Section 6.4]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#subsection.6.4). + The authors report that 96 (12 * 8) A100 GPUs were used during the + training. + The A100 GPUs have a single precision performance of 0.0195 petaflops + [[A100 Datasheet]](https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf). + Assuming the estimated utilization is 33%, following + [[OpenAI AI and Computer Blog]](https://openai.com/blog/ai-and-compute/#addendum), + the training time is 47.10 petaflop/s-day (76.25 * 96 * 0.0195 * 0.33). + training_hardware: + value: 12 x 8 A100 GPUs + explanation: > + As outline by the authors + [[Section 2.3]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#subsection.2.3) + quality_control: none + # Downstream + access: + value: open + explanation: > + The model can be downloaded for free The Eye + [[GPT-NeoX-20B]](https://mystic.the-eye.eu/public/AI/models/GPT-NeoX-20B/). license: - explanation: "\"Subject to Customer’s strict compliance with this TOS, GooseAI\ - \ grants Customer a limited, non-exclusive, non-transferable, non-sublicensable,\ - \ revocable license to access and use the Platform as described in and subject\ - \ to this TOS\" - excerpt from the Terms of Service document.\n" - value: custom - monitoring: - explanation: "In the \"GooseAI Monitoring and Enforcement\" section of GooseAI's\ - \ Acceptable Use Policy (AUP), it is stated that Goose.AI has the right to\ - \ investigate any suspected violation of its AUP [[GooseAI Acceptable Use\ - \ Policy]](https://goose.ai/docs/aup).\n" - value: At will monitoring by the provider - monthly_active_users: unknown + value: Apache 2.0 + explanation: > + As indicated in the accompanying blog post + [[EleutherAI Blog Post]](https://blog.eleuther.ai/announcing-20b/). + intended_uses: > + As stated in the model card: "GPT-NeoX-20B learns an inner representation + of the English language that can be used to extract features useful for + downstream tasks. The model is best at what it was pretrained for however, + which is generating text from a prompt. + Due to the generality of the pretraining set, it has acquired the ability + to generate completions across a wide range of tasks - from programming to + fiction writing + [[Model Card]](https://mystic.the-eye.eu/public/AI/models/GPT-NeoX-20B/20B_model_card.md)." + prohibited_uses: none + monitoring: none + feedback: > + Feedback can be provided using the # 20b channel in EleutherAI Discord + group + [[EleutherAI Blog Post]](https://blog.eleuther.ai/announcing-20b/). + Find the Discord link in the FAQ page + [[FAQ]](https://www.eleuther.ai/faq/). + +- type: application name: GooseAI API + # General organization: GooseAI + description: > + GooseAI API is an API service providing access to NLP services. + created_date: unknown + url: goose.ai + # Construction + dependencies: [GPT-NeoX] + adaptation: unknown output_space: - explanation: "Question/Answer and Classification tasks are coming soon according\ - \ to GooseAI [[Main Page]](goose.ai).\n" value: Text Generation, Text Completion - prohibited_uses: - explanation: "Prohibited uses are detailed in the Acceptable Use Policy [[GooseAI\ - \ Acceptable Use Policy]](https://goose.ai/docs/aup).\n" - value: "Illegal or abusive activity, security violations, network abuse\n" + explanation: > + Question/Answer and Classification tasks are coming soon according to + GooseAI + [[Main Page]](goose.ai). quality_control: unknown + # Downstream + access: + value: limited + explanation: > + GooseAI API can be accessed by signing up on the goose.ai website. + license: + value: custom + explanation: > + "Subject to Customer’s strict compliance with this TOS, GooseAI grants + Customer a limited, non-exclusive, non-transferable, non-sublicensable, + revocable license to access and use the Platform as described in and + subject to this TOS" - excerpt from the Terms of Service document. terms_of_service: https://goose.ai/docs/tos - type: application - url: goose.ai + intended_uses: > + Intended to be used as an NLP infrastructure. + prohibited_uses: + value: > + Illegal or abusive activity, security violations, network abuse + explanation: > + Prohibited uses are detailed in the Acceptable Use Policy + [[GooseAI Acceptable Use Policy]](https://goose.ai/docs/aup). + monitoring: + value: At will monitoring by the provider + explanation: > + In the "GooseAI Monitoring and Enforcement" section of GooseAI's + Acceptable Use Policy (AUP), it is stated that Goose.AI has the right to + investigate any suspected violation of its AUP + [[GooseAI Acceptable Use Policy]](https://goose.ai/docs/aup). + feedback: + value: Email support + explanation: > + In the "Error Reporting and Feedback" section of the Goose.ai Terms of + Service, GooseAI asks all the feedback to be sent to support at goose.ai + [[GooseAI Terms of Service]](https://goose.ai/docs/tos). + # Deployment + monthly_active_users: unknown user_distribution: unknown -- access: open - analysis: Evaluated by human testers rating alignment of text input, image output - pairs. - created_date: 2022-09-04 - dependencies: [VQGAN, CLIP] + failures: unknown + +- type: model + name: VQGAN-CLIP + organization: EleutherAI description: VQGAN-CLIP is a model that better generates and edits images using a multimodal encoder to guide image generation. - feedback: '' - intended_uses: '' - license: MIT - modality: - explanation: text input with image output - value: text; image + created_date: 2022-09-04 + url: https://arxiv.org/pdf/2204.08583.pdf model_card: '' - monitoring: '' - name: VQGAN-CLIP - organization: EleutherAI - prohibited_uses: '' - quality_control: '' - size: 227M parameters (dense) + modality: text input with image output + analysis: Evaluated by human testers rating alignment of text input, image output + pairs. + size: 227M parameters + dependencies: [VQGAN, CLIP] training_emissions: unknown - training_hardware: 1 NVIDIA Tesla K80 GPU training_time: Less than 1 V100-hour - type: model - url: https://arxiv.org/pdf/2204.08583.pdf -- access: open - analysis: Evaluated on a variety of NLP benchmarks and found to perform similarly - to OPT and BLOOM models. - created_date: 2023-05-31 - dependencies: [The Pile] - description: A suite of 16 LLMs all trained on public data seen in the exact same - order and ranging in size from 70M to 12B parameters - feedback: https://huggingface.co/EleutherAI/pythia-6.9b/discussions + training_hardware: 1 NVIDIA Tesla K80 GPU + quality_control: '' + access: open + license: MIT intended_uses: '' - license: Apache 2.0 - modality: - explanation: natural language text - value: text; text - model_card: https://huggingface.co/EleutherAI/pythia-12b + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: Pythia organization: Eleuther AI - prohibited_uses: '' - quality_control: '' - size: 12B parameters (dense) + description: A suite of 16 LLMs all trained on public data seen in the exact same + order and ranging in size from 70M to 12B parameters + created_date: 2023-05-31 + url: https://arxiv.org/pdf/2304.01373.pdf + model_card: https://huggingface.co/EleutherAI/pythia-12b + modality: natural language text + analysis: Evaluated on a variety of NLP benchmarks and found to perform similarly + to OPT and BLOOM models. + size: 12B parameters + dependencies: [The Pile] training_emissions: '' - training_hardware: 64 A100 GPUs training_time: '' - type: model - url: https://arxiv.org/pdf/2304.01373.pdf + training_hardware: 64 A100 GPUs + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/EleutherAI/pythia-6.9b/discussions diff --git a/assets/ens.yaml b/assets/ens.yaml index b728b918..5209fe3d 100644 --- a/assets/ens.yaml +++ b/assets/ens.yaml @@ -1,37 +1,32 @@ --- -- access: - explanation: Dataset, evaluation code and models are publicly available at the - [[HowTo100M dataset webpage]](https://www.di.ens.fr/willow/research/howto100m/). - value: open + +- type: dataset + name: HowTo100M + organization: École Normale Supérieure, Inria + description: HowTo100M is a large-scale dataset of narrated videos with an emphasis + on instructional videos where content creators teach complex tasks with an explicit + intention of explaining the visual content on screen. HowTo100M features a total + of 136M video clips with captions sourced from 1.2M Youtube videos (15 years + of video) and 23k activities from domains such as cooking, hand crafting, personal + care, gardening or fitness. + created_date: + value: 2019-06-07 + explanation: The date the [[paper]](https://arxiv.org/abs/1906.03327) was submitted. + url: https://arxiv.org/pdf/1906.03327.pdf + datasheet: https://arxiv.org/pdf/1906.03327.pdf + modality: text (English) and video + size: 136M video clips + sample: [] analysis: - explanation: See [[Experiments]](https://arxiv.org/pdf/1906.03327v2.pdf#section.5) value: Authors use the dataset to learn a joint text-video embedding by leveraging more than 130M video clip-caption pairs. They then evaluate the learned embeddings on the tasks of localizing steps in instructional videos of CrossTask and textbased video retrieval on YouCook2, MSR-VTT and LSMDC datasets. They show that their learned embedding can perform better compared to models trained on existing carefully annotated but smaller video description datasets. - created_date: - explanation: The date the [[paper]](https://arxiv.org/abs/1906.03327) was submitted. - value: 2019-06-07 - datasheet: https://arxiv.org/pdf/1906.03327.pdf + explanation: See [[Experiments]](https://arxiv.org/pdf/1906.03327v2.pdf#section.5) dependencies: [YouTube] - description: HowTo100M is a large-scale dataset of narrated videos with an emphasis - on instructional videos where content creators teach complex tasks with an explicit - intention of explaining the visual content on screen. HowTo100M features a total - of 136M video clips with captions sourced from 1.2M Youtube videos (15 years - of video) and 23k activities from domains such as cooking, hand crafting, personal - care, gardening or fitness. - excluded: - explanation: See [[Data collection]](https://arxiv.org/pdf/1906.03327v2.pdf#subsection.3.1) - value: Categories such as Relationships and Finance and Business, that may be - more abstract, are excluded. Videos with less than 100 views are removed. - Authors also ignore videos that have less than 100 words. Videos longer than - 2,000 seconds are removed. As some videos may appear in several tasks, the - videos are deduplicated based on YouTube IDs. - feedback: '' included: - explanation: See [[Data collection]](https://arxiv.org/pdf/1906.03327v2.pdf#subsection.3.1) value: The dataset features 1.22 million videos from YouTube with a primary focus on videos containing "visual tasks", that involve some interaction with the physical world (e.g. Making peanut butter, Pruning a tree) as compared @@ -43,22 +38,29 @@ Arts and Entertainment, Computers and Electronics). They also restrict to the top 200 YouTube search results, as the latter ones may not be related to the query task. - intended_uses: '' + explanation: See [[Data collection]](https://arxiv.org/pdf/1906.03327v2.pdf#subsection.3.1) + excluded: + value: Categories such as Relationships and Finance and Business, that may be + more abstract, are excluded. Videos with less than 100 views are removed. + Authors also ignore videos that have less than 100 words. Videos longer than + 2,000 seconds are removed. As some videos may appear in several tasks, the + videos are deduplicated based on YouTube IDs. + explanation: See [[Data collection]](https://arxiv.org/pdf/1906.03327v2.pdf#subsection.3.1) + quality_control: '' + access: + value: open + explanation: Dataset, evaluation code and models are publicly available at the + [[HowTo100M dataset webpage]](https://www.di.ens.fr/willow/research/howto100m/). license: Apache 2.0 - modality: {} - monitoring: '' - name: HowTo100M - organization: École Normale Supérieure, Inria + intended_uses: '' prohibited_uses: + value: > + No uses are explicitly prohibited by the authors. They note the following + limitations of the dataset: "We note that the distribution of identities and + activities in the HowTo100M dataset may not be representative of the global + human population and the diversity in society. Please be careful of unintended + societal, gender, racial and other biases when training or deploying models + trained on this data." explanation: See [[HowTo100M dataset webpage]](https://www.di.ens.fr/willow/research/howto100m/) - value: "No uses are explicitly prohibited by the authors. They note the following\ - \ limitations of the dataset: \"We note that the distribution of identities\ - \ and activities in the HowTo100M dataset may not be representative of the\ - \ global human population and the diversity in society. Please be careful\ - \ of unintended societal, gender, racial and other biases when training or\ - \ deploying models trained on this data.\"\n" - quality_control: '' - sample: [] - size: 136M video clips - type: dataset - url: https://arxiv.org/pdf/1906.03327.pdf + monitoring: '' + feedback: '' diff --git a/assets/faraday.yaml b/assets/faraday.yaml index a1af0488..7c986cb4 100644 --- a/assets/faraday.yaml +++ b/assets/faraday.yaml @@ -1,26 +1,27 @@ --- -- access: open - adaptation: '' + +- type: application + name: ARES + organization: Faraday Lab + description: ARES is a text-to-image generator based on Stable Diffusion. The + goal is to provide a simple tool with a user interface allowing mainstream AI + access for artists and creators. created_date: + value: 2023-04-26 explanation: The date Faraday Lab's hugging face card was last updated. Date for ARES release is not published on site. - value: 2023-04-26 + url: https://faradaylab.fr/ dependencies: [Stable Diffusion] - description: ARES is a text-to-image generator based on Stable Diffusion. The - goal is to provide a simple tool with a user interface allowing mainstream AI - access for artists and creators. - failures: '' - feedback: '' - intended_uses: '' - license: unknown - monitoring: '' - monthly_active_users: '' - name: ARES - organization: Faraday Lab + adaptation: '' output_space: generated images - prohibited_uses: '' quality_control: '' + access: open + license: unknown terms_of_service: '' - type: application - url: https://faradaylab.fr/ + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' user_distribution: '' + failures: '' diff --git a/assets/google.yaml b/assets/google.yaml index 42aab482..fb56a8dc 100644 --- a/assets/google.yaml +++ b/assets/google.yaml @@ -1,71 +1,100 @@ --- -- access: - explanation: https://huggingface.co/datasets/c4 - value: open - analysis: https://arxiv.org/abs/2104.08758 + +# Datasets +- type: dataset + name: C4 + # General + organization: Google + description: The Colossal Clean Crawled Corpus (C4) is a processed version of + Common Crawl to facilitate transfer learning in NLP. created_date: - explanation: The date the T5 paper was made public. value: 2019-10-23 + explanation: The date the T5 paper was made public. + url: https://arxiv.org/abs/1910.10683 datasheet: https://huggingface.co/datasets/c4 + modality: Text (English) + size: 750GB + sample: [https://huggingface.co/datasets/c4/viewer/en/train] + analysis: https://arxiv.org/abs/2104.08758 + # Construction dependencies: [CommonCrawl] - description: The Colossal Clean Crawled Corpus (C4) is a processed version of - Common Crawl to facilitate transfer learning in NLP. - excluded: "Data was filtered for English using langdetect. Further, data was filtered\ - \ to end in terminal punctuation, to remove short pages (less than 5 sentences),\ - \ and to remove \"Dirty, Naughty, Obscene or Otherwise Bad Words\".\n" - feedback: https://huggingface.co/datasets/c4/discussions + license: ODC-By 1.0 included: none + excluded: > + Data was filtered for English using langdetect. Further, data was filtered + to end in terminal punctuation, to remove short pages (less than 5 sentences), + and to remove "Dirty, Naughty, Obscene or Otherwise Bad Words". + quality_control: > + Data filtering excluded obscene words from a block list as well + as short documents and some deduplication was done based on string overlap. + # Downstream + access: + value: open + explanation: https://huggingface.co/datasets/c4 intended_uses: To faciliate transfer learning research in NLP. - license: ODC-By 1.0 - modality: {} + prohibited_uses: none monitoring: none - name: C4 + feedback: https://huggingface.co/datasets/c4/discussions + +- type: dataset + name: Internal Google BERT dataset + # General organization: Google - prohibited_uses: none - quality_control: "Data filtering excluded obscene words from a block list as well\ - \ as short documents and some deduplication was done based on string overlap.\n" - sample: [https://huggingface.co/datasets/c4/viewer/en/train] - size: 750GB - type: dataset - url: https://arxiv.org/abs/1910.10683 -- access: closed - analysis: unknown + description: > + The dataset used to train Internal Google BERT models. created_date: - explanation: "The date of the Google product update blog announcing that BERT\ - \ models were for ranking and featured snippets in Search.\n" value: 2019-11-25 + explanation: > + The date of the Google product update blog announcing that BERT models + were for ranking and featured snippets in Search. + url: https://blog.google/products/search/search-language-understanding-bert/ datasheet: none + modality: text + size: unknown + sample: [] + analysis: unknown + # Construction dependencies: [] - description: "The dataset used to train Internal Google BERT models.\n" - excluded: unknown - feedback: unknown + license: + value: unknown + explanation: > + The asset isn't released, and hence the license is unknown. included: - explanation: "Although we don't exactly know the contents of the Internal Google\ - \ BERT dataset, it likely includes contents from web pages and search queries.\n" value: Web pages, and search queries + explanation: > + Although we don't exactly know the contents of the Internal Google BERT + dataset, it likely includes contents from web pages and search queries. + excluded: unknown + quality_control: unknown + # Downstream + access: closed intended_uses: - explanation: "We don't have an exhaustive list of the intended use cases for\ - \ the Internal Google BERT dataset, but we know that BERT was used in Google\ - \ Search.\n" value: unknown - license: - explanation: "The asset isn't released, and hence the license is unknown.\n" - value: unknown - modality: {} + explanation: > + We don't have an exhaustive list of the intended use cases for the + Internal Google BERT dataset, but we know that BERT was used in Google + Search. + prohibited_uses: unknown monitoring: unknown - name: Internal Google BERT dataset + feedback: unknown + +- type: dataset + name: Conceptual Captions + # General organization: Google - prohibited_uses: unknown - quality_control: unknown + description: > + A dataset containing 3 million (image-URL, caption) pairs designed for the + training and evaluation of machine learned image captioning systems. + created_date: + value: 2018-07-01 + explanation: Due to the lack of information about the exact date, it is assumed + to be the 1st of the known month of creation. + url: https://aclanthology.org/P18-1238/ + datasheet: none + modality: text(English) and image + size: 3.3M (image, text) pairs sample: [] - size: unknown - type: dataset - url: https://blog.google/products/search/search-language-understanding-bert/ -- access: - explanation: Dataset can be downloaded at [[Download]](https://ai.google.com/research/ConceptualCaptions/download) - value: open analysis: - explanation: See [[Experimental Results]](https://aclanthology.org/P18-1238.pdf#section.5) value: Authors evaluate the dataset on two image captioning models - RNN-based model and Transformer model, under two experimental conditions - using the training & development sets provided by the MS COCO dataset, versus training @@ -75,631 +104,708 @@ evaluations indicate that the Conceptual-based models are superior. Automatic models fail to corroborate the human evaluation results. This highlights the weakness of these automatic metrics. - created_date: - explanation: Due to the lack of information about the exact date, it is assumed - to be the 1st of the known month of creation. - value: 2018-07-01 - datasheet: none + explanation: See [[Experimental Results]](https://aclanthology.org/P18-1238.pdf#section.5) + # Construction dependencies: [] - description: "A dataset containing 3 million (image-URL, caption) pairs designed\ - \ for the training and evaluation of machine learned image captioning systems.\n" - excluded: - explanation: See [[Conceptual Captions Dataset Creation]](https://aclanthology.org/P18-1238.pdf#section.3) - value: "The following filtering steps are applied in the given order:\n1. Image-based\ - \ Filtering - \"It only keeps JPEG images where both dimensions are greater\ - \ than 400 pixels, and the ratio of larger to smaller dimension is no more\ - \ than 2. It excludes images that trigger pornography or profanity detectors.\ - \ These filters discard more than 65% of the candidates.\"\n2. Text-based\ - \ Filtering - \"Candidates with no determiner, no noun, or no preposition\ - \ are discarded; candidates with a high noun ratio are also discarded; candidates\ - \ with a high rate of token repetition are discarded; candidates where the\ - \ first word is not capitalized, or with too high capitalized-word ratio are\ - \ discarded; we use a vocabulary VW of 1B token types, appearing at least\ - \ 5 times in the English Wikipedia, and discard candidates that contain tokens\ - \ that are not found in this vocabulary. candidates that score too high or\ - \ too low on the polarity annotations, or trigger the pornography/profanity\ - \ detectors, are discarded; predefined boiler-plate prefix/suffix sequences\ - \ matching the text are cropped, e.g. “click to enlarge picture”, “stock photo”;\ - \ we also drop text which begins/ends in certain patterns, e.g. “embedded\ - \ image permalink”, “profile photo”. These filters only allow around 3% of\ - \ the incoming candidates to pass to the later stages.\"\n3. Image&Text-based\ - \ Filtering - \"We filter out candidates for which none of the text tokens\ - \ can be mapped to the content of the image. This filter discards around 60%\ - \ of the incoming candidates.\"\n4. Text Transformation with Hypernymization\ - \ - \"Noun modifiers of certain types (proper nouns, numbers, units) are removed;\ - \ dates, durations, and preposition-based locations (e.g., \"in Los Angeles\"\ - ) are removed; named-entities are identified, matched against the KG entries,\ - \ and substitute with their hypernym; resulting coordination noun-phrases\ - \ with the same head (e.g., \"actor and actor\") are resolved into a single-head,\ - \ pluralized form (e.g., \"actors\"). Around 20% of samples are discarded\ - \ during this transformation. We then cluster all resolved entities (e.g.,\ - \ 2560 \"actor\", \"dog\", \"neighborhood\", etc.) and keep only the candidates\ - \ for which all detected types have a count of over 100 (around 55% of the\ - \ candidates).\"\n" - feedback: Feedback can be provided by creating an issue in the [[Conceptual Captions - GitHub repository]](https://github.com/google-research-datasets/conceptual-captions) - or by emailing at conceptual-captions at google.com - included: '' - intended_uses: '' license: - explanation: "[[Conceptual Captions License]](https://github.com/google-research-datasets/conceptual-captions/blob/master/LICENSE)\n" value: Conceptual Captions License - modality: {} - monitoring: unknown - name: Conceptual Captions - organization: Google - prohibited_uses: unknown - quality_control: + explanation: > + [[Conceptual Captions License]](https://github.com/google-research-datasets/conceptual-captions/blob/master/LICENSE) + included: '' + excluded: + value: | + The following filtering steps are applied in the given order: + 1. Image-based Filtering - "It only keeps JPEG images where both dimensions are greater than 400 pixels, and the ratio of larger to smaller dimension is no more than 2. It excludes images that trigger pornography or profanity detectors. These filters discard more than 65% of the candidates." + 2. Text-based Filtering - "Candidates with no determiner, no noun, or no preposition are discarded; candidates with a high noun ratio are also discarded; candidates with a high rate of token repetition are discarded; candidates where the first word is not capitalized, or with too high capitalized-word ratio are discarded; we use a vocabulary VW of 1B token types, appearing at least 5 times in the English Wikipedia, and discard candidates that contain tokens that are not found in this vocabulary. candidates that score too high or too low on the polarity annotations, or trigger the pornography/profanity detectors, are discarded; predefined boiler-plate prefix/suffix sequences matching the text are cropped, e.g. “click to enlarge picture”, “stock photo”; we also drop text which begins/ends in certain patterns, e.g. “embedded image permalink”, “profile photo”. These filters only allow around 3% of the incoming candidates to pass to the later stages." + 3. Image&Text-based Filtering - "We filter out candidates for which none of the text tokens can be mapped to the content of the image. This filter discards around 60% of the incoming candidates." + 4. Text Transformation with Hypernymization - "Noun modifiers of certain types (proper nouns, numbers, units) are removed; dates, durations, and preposition-based locations (e.g., "in Los Angeles") are removed; named-entities are identified, matched against the KG entries, and substitute with their hypernym; resulting coordination noun-phrases with the same head (e.g., "actor and actor") are resolved into a single-head, pluralized form (e.g., "actors"). Around 20% of samples are discarded during this transformation. We then cluster all resolved entities (e.g., 2560 "actor", "dog", "neighborhood", etc.) and keep only the candidates for which all detected types have a count of over 100 (around 55% of the candidates)." explanation: See [[Conceptual Captions Dataset Creation]](https://aclanthology.org/P18-1238.pdf#section.3) + quality_control: value: Input candidate (image, caption) pairs pass through several stages of filtering and processing to ensure quality. - sample: [] - size: 3.3M (image, text) pairs - type: dataset - url: https://aclanthology.org/P18-1238/ -- access: - explanation: Dataset is available at [[Conceptual 12M GitHub repository]](https://github.com/google-research-datasets/conceptual-12m). + explanation: See [[Conceptual Captions Dataset Creation]](https://aclanthology.org/P18-1238.pdf#section.3) + # Downstream + access: value: open - analysis: - explanation: See [[Evaluating Vision-and-Language PreTraining Data]](https://arxiv.org/pdf/2102.08981.pdf#section.3) - value: "The dataset is benchmarked against CC3M on two most fundamental V+L\ - \ tasks: vision-to-language generation and vision-and-language matching, with\ - \ an emphasis on long-tail visual recognition. The results illustrate the\ - \ benefit of scaling up pre-training data for vision-and-language tasks, as\ - \ indicated by the new state-of-the-art results on both the nocaps and Conceptual\ - \ Captions benchmarks.\n" + explanation: Dataset can be downloaded at [[Download]](https://ai.google.com/research/ConceptualCaptions/download) + intended_uses: '' + prohibited_uses: unknown + monitoring: unknown + feedback: Feedback can be provided by creating an issue in the [[Conceptual Captions + GitHub repository]](https://github.com/google-research-datasets/conceptual-captions) + or by emailing at conceptual-captions at google.com + +- type: dataset + name: Conceptual 12M + # General + organization: Google + description: > + A dataset with 12 million image-text pairs specifically meant to be used for + vision-and-language pre-training. created_date: - explanation: "The date the [[paper]](https://arxiv.org/abs/2102.08981) was submitted.\n" value: 2021-02-17 + explanation: > + The date the [[paper]](https://arxiv.org/abs/2102.08981) was submitted. + url: https://arxiv.org/pdf/2102.08981.pdf datasheet: none + modality: text (English) and image + size: 12M (image, text) pairs + sample: [] + analysis: + value: > + The dataset is benchmarked against CC3M on two most fundamental V+L + tasks: vision-to-language generation and vision-and-language matching, with + an emphasis on long-tail visual recognition. The results illustrate the + benefit of scaling up pre-training data for vision-and-language tasks, as + indicated by the new state-of-the-art results on both the nocaps and Conceptual + Captions benchmarks. + explanation: See [[Evaluating Vision-and-Language PreTraining Data]](https://arxiv.org/pdf/2102.08981.pdf#section.3) + # Construction dependencies: [] - description: "A dataset with 12 million image-text pairs specifically meant to\ - \ be used for vision-and-language pre-training.\n" - excluded: - explanation: See [[Relaxing filters for higher recall]](https://arxiv.org/pdf/2102.08981.pdf#subsection.2.2) - value: "Some of the filtering steps used in the preparation of Conceptual Captions\ - \ dataset are relaxed to trade off high-recall for low-precision. The following\ - \ steps are applied in the given order:\n1. Image-based Filtering - Only keep\ - \ JPEG images where both dimensions are greater than 400 pixels, and the ratio\ - \ of larger to smaller dimension is no more than 2.5. Exclude images that\ - \ trigger pornography or profanity detectors.\n2. Text-based Filtering - Allow\ - \ text between 3 and 256 words in the alt-text. Discard candidates with no\ - \ noun or no determiner, but permit ones without prepositions. Set the maximum\ - \ fraction of word repetition allowed to 0.2. Increase the threshold for counting\ - \ a word type as rare from 5 to 20.\n3. Image&Text-based Filtering - Filter\ - \ out candidates for which none of the text tokens can be mapped to the content\ - \ of the image.\n" - feedback: Feedback can be provided by creating an issue in the [[Conceptual 12M - GitHub repository]](https://github.com/google-research-datasets/conceptual-12m) - or by emailing at conceptual-captions at google.com - included: '' - intended_uses: '' license: - explanation: "[[Conceptual Captions License]](https://github.com/google-research-datasets/conceptual-captions/blob/master/LICENSE)\n" value: Conceptual Captions License - modality: - explanation: text (English) and image - value: image, text - monitoring: unknown - name: Conceptual 12M - organization: Google - prohibited_uses: unknown + explanation: > + [[Conceptual Captions License]](https://github.com/google-research-datasets/conceptual-captions/blob/master/LICENSE) + included: '' + excluded: + value: | + Some of the filtering steps used in the preparation of Conceptual Captions dataset are relaxed to trade off high-recall for low-precision. The following steps are applied in the given order: + 1. Image-based Filtering - Only keep JPEG images where both dimensions are greater than 400 pixels, and the ratio of larger to smaller dimension is no more than 2.5. Exclude images that trigger pornography or profanity detectors. + 2. Text-based Filtering - Allow text between 3 and 256 words in the alt-text. Discard candidates with no noun or no determiner, but permit ones without prepositions. Set the maximum fraction of word repetition allowed to 0.2. Increase the threshold for counting a word type as rare from 5 to 20. + 3. Image&Text-based Filtering - Filter out candidates for which none of the text tokens can be mapped to the content of the image. + explanation: See [[Relaxing filters for higher recall]](https://arxiv.org/pdf/2102.08981.pdf#subsection.2.2) quality_control: - explanation: See [[Vision-and-Language Pre-Training Data]](https://arxiv.org/pdf/2102.08981.pdf#section.2) value: Input candidate (image, caption) pairs pass through several stages of filtering and processing to ensure quality. Person-name substitutions are performed in the alt-texts to protect the privacy of individuals in the associated images. - sample: [] - size: 12M (image, text) pairs - type: dataset - url: https://arxiv.org/pdf/2102.08981.pdf -- access: - explanation: https://huggingface.co/t5-large + explanation: See [[Vision-and-Language Pre-Training Data]](https://arxiv.org/pdf/2102.08981.pdf#section.2) + # Downstream + access: value: open - analysis: https://huggingface.co/t5-base#evaluation + explanation: Dataset is available at [[Conceptual 12M GitHub repository]](https://github.com/google-research-datasets/conceptual-12m). + intended_uses: '' + prohibited_uses: unknown + monitoring: unknown + feedback: Feedback can be provided by creating an issue in the [[Conceptual 12M + GitHub repository]](https://github.com/google-research-datasets/conceptual-12m) + or by emailing at conceptual-captions at google.com + +# Models +- type: model + name: T5 + organization: Google + description: Text-To-Text Transfer Transformer (T5) is a model that unifies all + NLP tasks under the text-to-text format. created_date: - explanation: The date the T5 paper was made public. value: 2019-10-23 + explanation: The date the T5 paper was made public. + url: https://arxiv.org/abs/1910.10683 + model_card: https://huggingface.co/t5-base + modality: text (English) + size: + value: 11B parameters (dense model) + explanation: T5 models were trained on several sizes including 3B and 11B parameters. + analysis: https://huggingface.co/t5-base#evaluation dependencies: [C4] - description: Text-To-Text Transfer Transformer (T5) is a model that unifies all - NLP tasks under the text-to-text format. - feedback: https://huggingface.co/t5-large/discussions - intended_uses: NLP tasks + training_emissions: unknown + training_time: unknown + training_hardware: 1,024 TPU v3 chips (Cloud TPU Pods) + quality_control: The T5 paper documents many analyses/ablations that were considered + before arriving at the final architecture/training procedure. + access: + value: open + explanation: https://huggingface.co/t5-large license: Apache 2.0 - modality: {} - model_card: https://huggingface.co/t5-base + intended_uses: NLP tasks + prohibited_uses: unknown monitoring: none - name: T5 + feedback: https://huggingface.co/t5-large/discussions + +- type: model + name: Internal Google BERT + # General organization: Google - prohibited_uses: unknown - quality_control: The T5 paper documents many analyses/ablations that were considered - before arriving at the final architecture/training procedure. - size: '' - training_emissions: unknown - training_hardware: 1,024 TPU v3 chips (Cloud TPU Pods) - training_time: unknown - type: model - url: https://arxiv.org/abs/1910.10683 -- access: closed - analysis: unknown + description: > + Internal Google BERT model used to power Google Search products. created_date: - explanation: "The date of the Google product update blog announcing that BERT\ - \ models were for ranking and featured snippets in Search.\n" value: 2019-11-25 + explanation: > + The date of the Google product update blog announcing that BERT models + were for ranking and featured snippets in Search. + url: https://blog.google/products/search/search-language-understanding-bert/ + model_card: unknown + modality: text + size: unknown + analysis: unknown + # Construction dependencies: [Internal Google BERT dataset] - description: "Internal Google BERT model used to power Google Search products.\n" - feedback: unknown - intended_uses: - explanation: "We don't have an exhaustive list of the intended use cases for\ - \ the Internal Google BERT model, but we know that Google Search was powered\ - \ by a fine-tuned BERT.\n" - value: unknown + training_emissions: unknown + training_time: unknown + training_hardware: unknown + quality_control: unknown + # Downstream + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: {} - model_card: unknown + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: + value: unknown + explanation: > + We don't have an exhaustive list of the intended use cases for the + Internal Google BERT model, but we know that Google Search was powered + by a fine-tuned BERT. + prohibited_uses: unknown monitoring: unknown - name: Internal Google BERT + feedback: unknown + +- type: application + name: Google Search + # General organization: Google - prohibited_uses: unknown - quality_control: unknown - size: '' - training_emissions: unknown - training_hardware: unknown - training_time: unknown - type: model - url: https://blog.google/products/search/search-language-understanding-bert/ -- access: open - adaptation: unknown + description: > + Google Search is Google's search engine. created_date: - explanation: "The date of the Google product update blog announcing that BERT\ - \ models were for ranking and featured snippets in Search.\n" value: 2019-11-25 + explanation: > + The date of the Google product update blog announcing that BERT models + were for ranking and featured snippets in Search. + url: https://blog.google/products/search/search-language-understanding-bert/ + # Construction dependencies: [Internal Google BERT, MUM] - description: "Google Search is Google's search engine.\n" - failures: unknown - feedback: "Feedback can be sent to Google Feedback using the product interface\ - \ [[Google Feedback]](https://www.google.com/tools/feedback).\n" - intended_uses: Searching the web using text, voice or image - license: - explanation: No license was found. - value: none - monitoring: "It is implied that Google scan uses of its products for spam, malware\ - \ and illegal content in the [[Term of Service]](https://policies.google.com/terms).\n" - monthly_active_users: unknown - name: Google Search - organization: Google + adaptation: unknown output_space: web page ranking - prohibited_uses: "Prohibited use cases aren't specifically spelled out for Google\ - \ search, but several illegal and discouraged use cases are shared in the Respect\ - \ Others section of the [[Term of Service]](https://policies.google.com/terms).\n" quality_control: unknown + # Downstream + access: open + license: + value: none + explanation: No license was found. terms_of_service: https://policies.google.com/terms - type: application - url: https://blog.google/products/search/search-language-understanding-bert/ + intended_uses: Searching the web using text, voice or image + prohibited_uses: > + Prohibited use cases aren't specifically spelled out for Google search, but + several illegal and discouraged use cases are shared in the Respect Others + section of the [[Term of Service]](https://policies.google.com/terms). + monitoring: > + It is implied that Google scan uses of its products for spam, + malware and illegal content in the + [[Term of Service]](https://policies.google.com/terms). + feedback: > + Feedback can be sent to Google Feedback using the product interface + [[Google Feedback]](https://www.google.com/tools/feedback). + # Deployment + monthly_active_users: unknown user_distribution: unknown -- access: closed - analysis: unknown + failures: unknown + +- type: dataset + name: Infiniset + # General + organization: Google + description: > + Infiniset "is a combination of dialog data from public dialog data and + other public web documents" + [[Appendix E]](https://arxiv.org/pdf/2201.08239.pdf#appendix.E). created_date: - explanation: "The date of the Google company news blog announcing LaMDA [[Google\ - \ News Blog]](https://blog.google/technology/ai/lamda/).\n" value: 2021-06-18 + explanation: > + The date of the Google company news blog announcing LaMDA + [[Google News Blog]](https://blog.google/technology/ai/lamda/). + url: https://arxiv.org/pdf/2201.08239.pdf datasheet: none - dependencies: [] - description: "Infiniset \"is a combination of dialog data from public dialog data\ - \ and other public web documents\" [[Appendix E]](https://arxiv.org/pdf/2201.08239.pdf#appendix.E).\n" - excluded: unknown - feedback: - explanation: "Author contact information was not provided.\n" - value: none - included: "Included in the dataset are data from \"public forums (0%); C4 data\ - \ (12.5% ); code documents from sites related to programming like Q&A sites\ - \ tutorials, etc (12.5%); Wikipedia (English) (12.5%); English web documents\ - \ (6.25%); and Non-English web documents (6.25%).\"\n" - intended_uses: - explanation: "Intended uses of the dataset wasn't explicitly linked, but it\ - \ is likely intended for training language models specialized in dialogue.\n" + modality: text and code + size: value: unknown + explanation: > + The size of the dataset is unclear, but it is reported that the dataset + "consists of 2.97B documents and 1.12B dialogs with 13.39B utterances" + [[Appendix E]](https://arxiv.org/pdf/2201.08239.pdf#appendix.E). + sample: [] + analysis: unknown + # Construction + dependencies: [] license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: text and code - value: code, text - monitoring: unknown - name: Infiniset - organization: Google - prohibited_uses: "The prohibited uses for Infiniset weren't specifically listed,\ - \ but the Google AI principles inspired safety objectives in [[Appendix A.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.A.1)\ - \ advises avoiding harm, unjust impact and misinformation, among others.\n" + explanation: > + The asset isn't released, and hence the license is unknown. + included: > + Included in the dataset are data from "public forums (0%); C4 data (12.5% ); + code documents from sites related to programming like Q&A sites tutorials, + etc (12.5%); Wikipedia (English) (12.5%); English web documents (6.25%); + and Non-English web documents (6.25%)." + excluded: unknown quality_control: unknown - sample: [] - size: - explanation: "The size of the dataset is unclear, but it is reported that the\ - \ dataset \"consists of 2.97B documents and 1.12B dialogs with 13.39B utterances\"\ - \ [[Appendix E]](https://arxiv.org/pdf/2201.08239.pdf#appendix.E).\n" + # Downstream + access: closed + intended_uses: value: unknown - type: dataset - url: https://arxiv.org/pdf/2201.08239.pdf -- access: closed - analysis: "The model performance was analyzed on sensibleness, specificity and\ - \ interestingness. The model was also analyzed on safety, following metrics\ - \ derived from Google AI Principles [[Appendix A.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.A.1).\ - \ Finally, the model was analyzed on groundedness, testing its ability to produce\ - \ responses that can be associated with \"known sources whenever possible [[Section\ - \ 4.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.4.1).\"\n" - created_date: - explanation: "The date of the Google company news blog announcing LaMDA [[Google\ - \ News Blog]](https://blog.google/technology/ai/lamda/).\n" - value: 2021-06-18 - dependencies: [Infiniset] - description: "LaMDA stands for Language Models for Dialog Application. It is a\ - \ transformer based language model trained on dialogue data.\n" + explanation: > + Intended uses of the dataset wasn't explicitly linked, but it is likely + intended for training language models specialized in dialogue. + prohibited_uses: > + The prohibited uses for Infiniset weren't specifically listed, but the + Google AI principles inspired safety objectives in + [[Appendix A.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.A.1) + advises avoiding harm, unjust impact and misinformation, among others. + monitoring: unknown feedback: - explanation: "Author contact information was not provided.\n" value: none - intended_uses: "LaMDA is a language model, so it can be used for regular langauge\ - \ modelling tasks without fine-tuning, but its fine-tuned for dialogue tasks.\n" - license: - explanation: "The asset isn't released, and hence the license is unknown.\n" - value: unknown - modality: - explanation: text - value: text; text - model_card: none - monitoring: unknown + explanation: > + Author contact information was not provided. + +- type: model name: LaMDA + # General organization: Google - prohibited_uses: "The prohibited uses of LaMDA weren't specifically listed, but\ - \ the Google AI principles inspired safety objectives in [[Appendix A.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.A.1)\ - \ advises avoiding harm, unjust impact and misinformation, among others.\n" - quality_control: "LaMDA was fine-tuned to predict sensibleness, specificity and\ - \ interestingness as well as safety. Then, the candidates were filtered out\ - \ if the model safety predictions were below a certain threshold. The next candidates\ - \ in the conversation were selected as a combination of these predictions. The\ - \ model was also fine-tuned for groundedness. The results are shown in [[Figure\ - \ 5]](https://arxiv.org/pdf/2201.08239.pdf#figure.caption.23).\n" - size: 137B parameters (dense) + description: > + LaMDA stands for Language Models for Dialog Application. It is a transformer + based language model trained on dialogue data. + created_date: + value: 2021-06-18 + explanation: > + The date of the Google company news blog announcing LaMDA + [[Google News Blog]](https://blog.google/technology/ai/lamda/). + url: https://arxiv.org/pdf/2201.08239.pdf + model_card: none + modality: text + size: + value: 137B parameters (dense model) + explanation: > + Along with the 137B model, the authors also trained 2B and 8B LaMDA + models. + analysis: > + The model performance was analyzed on sensibleness, specificity and + interestingness. + The model was also analyzed on safety, following + metrics derived from Google AI Principles + [[Appendix A.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.A.1). + Finally, the model was analyzed on groundedness, testing its ability to + produce responses that can be associated with "known sources whenever + possible + [[Section 4.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.4.1)." + # Construction + dependencies: [Infiniset] training_emissions: - explanation: "\"...total carbon footprint of LaMDA’s pre-training of the largest\ - \ model is approximately 25.2 tCO2e. The carbon footprint of pre-training\ - \ of smaller models and fine-tuning of all models is approximately 0.7 tCO2e\ - \ ... which brings the total footprint of LaMDA to approximately 26 tCO2e\ - \ [[Section 10]](https://arxiv.org/pdf/2201.08239.pdf#section.10)\"\n" value: 26 tCO2e - training_hardware: - explanation: "Reported in [[Section 10]](https://arxiv.org/pdf/2201.08239.pdf#section.10).\n" - value: 1024 TPU-V3 chips + explanation: > + "...total carbon footprint of LaMDA’s pre-training of the largest model + is approximately 25.2 tCO2e. The carbon footprint of pre-training of + smaller models and fine-tuning of all models is approximately 0.7 tCO2e + ... which brings the total footprint of LaMDA to approximately 26 + tCO2e + [[Section 10]](https://arxiv.org/pdf/2201.08239.pdf#section.10)" training_time: - explanation: "The total number of training flops of LaMDA was reported as 3.55E+23\ - \ (3.55E+8 petaflops) [[Section 10]](https://arxiv.org/pdf/2201.08239.pdf#section.10),\ - \ which is equal to 4108.80 = 3.55E+8 / (60 * 60 * 24) petaflop/s-day.\n" value: 4108.80 petaflop/s-day - type: model - url: https://arxiv.org/pdf/2201.08239.pdf -- access: closed - analysis: unknown - created_date: - explanation: "The date of the Google AI blog announcing the details of PaLM\ - \ [[Google AI Blog]](https://ai.googleblog.com/2022/04/pathways-language-model-palm-scaling-to.html).\n" - value: 2022-04-04 - datasheet: https://arxiv.org/pdf/2204.02311.pdf#appendix.D - dependencies: [Infiniset] - description: "PaLM dataset \"was created for pre-training language models\" [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D).\n" - excluded: "GitHub repositories with copyleft licenses were excluded. Programming\ - \ languageges other than the most common 24 were excluded [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3).\n" - feedback: - explanation: "Author contact information is shared in the paper [[Paper]](https://arxiv.org/pdf/2204.02311.pdf).\n" - value: Contact the authors. - included: "The dataset is based on Infiniset. It included multilingual text containing\ - \ text from over 100 languages. The breakdown of the data included is as follows:\ - \ Social media conversations (multilingual) 50, Filtered webpages (multilingual)\ - \ 27%, BooksCorpus (English) 13%, GitHub (code) 5%, Wikipedia (multilingual)\ - \ 4%, and News (English) 1%. Code was collected from GitHub repositories with\ - \ appropriate licenses, totalling 96GB of source code [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3).\n" - intended_uses: - explanation: "As stated in [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D).\n" - value: "\"The dataset was created for pre-training language models by a team\ - \ of researchers at Google\".\n" + explanation: > + The total number of training flops of LaMDA was reported as 3.55E+23 + (3.55E+8 petaflops) + [[Section 10]](https://arxiv.org/pdf/2201.08239.pdf#section.10), which is + equal to 4108.80 = 3.55E+8 / (60 * 60 * 24) petaflop/s-day. + training_hardware: + value: 1024 TPU-V3 chips + explanation: > + Reported in [[Section 10]](https://arxiv.org/pdf/2201.08239.pdf#section.10). + quality_control: > + LaMDA was fine-tuned to predict sensibleness, specificity and + interestingness as well as safety. Then, the candidates were filtered out + if the model safety predictions were below a certain threshold. The next + candidates in the conversation were selected as a combination of these + predictions. The model was also fine-tuned for groundedness. The results + are shown in + [[Figure 5]](https://arxiv.org/pdf/2201.08239.pdf#figure.caption.23). + # Downstream + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: text and code - value: code, text + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: > + LaMDA is a language model, so it can be used for regular langauge modelling + tasks without fine-tuning, but its fine-tuned for dialogue tasks. + prohibited_uses: > + The prohibited uses of LaMDA weren't specifically listed, but the Google + AI principles inspired safety objectives in + [[Appendix A.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.A.1) + advises avoiding harm, unjust impact and misinformation, among others. monitoring: unknown + feedback: + value: none + explanation: > + Author contact information was not provided. + +- type: dataset name: PaLM dataset + # General organization: Google - prohibited_uses: "\"... should not be used for any of the unacceptable language\ - \ model use cases, e.g., generation of toxic speech\" [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D).\n" - quality_control: "In order to reduce low quality web pages, the web pages were\ - \ sampled according to a \"quality score\" classifier. Code files were de-duplicated\ - \ using Levenshtein distance [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3).\n" - sample: [] + description: > + PaLM dataset "was created for pre-training language models" + [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D). + created_date: + value: 2022-04-04 + explanation: > + The date of the Google AI blog announcing the details of PaLM + [[Google AI Blog]](https://ai.googleblog.com/2022/04/pathways-language-model-palm-scaling-to.html). + url: https://arxiv.org/pdf/2204.02311.pdf + datasheet: https://arxiv.org/pdf/2204.02311.pdf#appendix.D + modality: text and code size: - explanation: "Dataset size in GB is not reported, but the dataset is reported\ - \ to have 780 billion tokens [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D).\ - \ The code portion of the dataset is reported to be 5% totaling a 196GB of\ - \ source code [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3).\ - \ It is unclear whether the reported size is before or after de-duplication.\ - \ Nonetheless, one can estimate the dataset size by multiplying 196GB with\ - \ 20 = 3.92 TB.\n" value: 3.92 TB - type: dataset - url: https://arxiv.org/pdf/2204.02311.pdf -- access: - explanation: Weights can be downloaded from [Github](https://github.com/google-research/t5x/blob/main/docs/models.md) - value: open - analysis: Evaluated on a variety of standard language datasets. - created_date: - explanation: Date paper was released - value: 2022-10-20 - dependencies: [T5, Muffin, P3, NaturalInstructions-v2, Flan CoT] - description: Flan-T5 is a version of the T5 language model fine-tuned on instruction - data - feedback: https://huggingface.co/google/flan-t5-xxl/discussions - intended_uses: unknown + explanation: > + Dataset size in GB is not reported, but the dataset is reported to have + 780 billion tokens + [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D). + The code portion of the dataset is reported to be 5% totaling a 196GB + of source code + [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3). + It is unclear whether the reported size is before or after de-duplication. + Nonetheless, one can estimate the dataset size by multiplying 196GB with + 20 = 3.92 TB. + sample: [] + analysis: unknown + # Construction + dependencies: [Infiniset] license: - explanation: License on the [[Github repository]](https://github.com/google-research/google-research) - value: Apache 2.0 - modality: - explanation: text (English) - value: text; text - model_card: https://arxiv.org/pdf/2210.11416.pdf - monitoring: none + value: unknown + explanation: > + The asset isn't released, and hence the license is unknown. + included: > + The dataset is based on Infiniset. It included multilingual text containing + text from over 100 languages. The breakdown of the data included is as + follows: Social media conversations (multilingual) 50, Filtered webpages + (multilingual) 27%, BooksCorpus (English) 13%, GitHub (code) 5%, Wikipedia + (multilingual) 4%, and News (English) 1%. + Code was collected from GitHub repositories with appropriate licenses, + totalling 96GB of source code + [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3). + excluded: > + GitHub repositories with copyleft licenses were excluded. Programming + languageges other than the most common 24 were excluded + [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3). + quality_control: > + In order to reduce low quality web pages, the web pages were sampled + according to a "quality score" classifier. + Code files were de-duplicated using Levenshtein distance + [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3). + # Downstream + access: closed + intended_uses: + value: > + "The dataset was created for pre-training language models by a team of + researchers at Google". + explanation: > + As stated in + [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D). + prohibited_uses: > + "... should not be used for any of the unacceptable language model use + cases, e.g., generation of toxic speech" + [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D). + monitoring: unknown + feedback: + value: Contact the authors. + explanation: > + Author contact information is shared in the paper + [[Paper]](https://arxiv.org/pdf/2204.02311.pdf). + +- type: model name: Flan-T5 organization: Google - prohibited_uses: none + description: Flan-T5 is a version of the T5 language model fine-tuned on instruction + data + created_date: + value: 2022-10-20 + explanation: Date paper was released + url: https://arxiv.org/abs/2210.11416 + model_card: https://arxiv.org/pdf/2210.11416.pdf + modality: text (English) + analysis: Evaluated on a variety of standard language datasets. + size: 11B parameters (dense model) + dependencies: [T5, Muffin, P3, NaturalInstructions-v2, Flan CoT] + training_emissions: Unknown + training_time: Unknown + training_hardware: 512 v4 TPU Chips quality_control: Across different multitask datasets, templates and formatting were maintained. For the chain-of-thoughts (CoT) data, specific exemplars were used. - size: 11B parameters (dense) - training_emissions: Unknown - training_hardware: 512 v4 TPU Chips - training_time: Unknown - type: model - url: https://arxiv.org/abs/2210.11416 -- access: - explanation: Model weights available for download in the [[Github repo]](https://github.com/google-research/google-research/tree/master/ul2) + access: value: open - analysis: '' + explanation: Weights can be downloaded from [Github](https://github.com/google-research/t5x/blob/main/docs/models.md) + license: + value: Apache 2.0 + explanation: License on the [[Github repository]](https://github.com/google-research/google-research) + intended_uses: unknown + prohibited_uses: none + monitoring: none + feedback: https://huggingface.co/google/flan-t5-xxl/discussions + +- type: model + name: UL2 + organization: Google + description: UL2 is a language model trained with a new pretraining objective created_date: - explanation: Date mdoel paper was released value: 2022-05-10 + explanation: Date mdoel paper was released + url: https://arxiv.org/abs/2205.05131 + model_card: '' + modality: text (English) + analysis: '' + size: 20B parameters (dense model) dependencies: [C4] - description: UL2 is a language model trained with a new pretraining objective - feedback: '' - intended_uses: '' + training_emissions: '' + training_time: '' + training_hardware: 128 TPUv4 + quality_control: '' + access: + value: open + explanation: Model weights available for download in the [[Github repo]](https://github.com/google-research/google-research/tree/master/ul2) license: - explanation: 20B checkpoints only for three different iteration steps value: Apache 2.0 - modality: - explanation: text (English) - value: text; text - model_card: '' + explanation: 20B checkpoints only for three different iteration steps + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: UL2 + feedback: '' + +- type: model + name: Parti organization: Google - prohibited_uses: '' - quality_control: '' - size: 20B parameters (dense) - training_emissions: '' - training_hardware: 128 TPUv4 - training_time: '' - type: model - url: https://arxiv.org/abs/2205.05131 -- access: - explanation: Google does not provide access to Parti for external researchers. - value: closed - analysis: '' + description: Parti is a text-to-image diffusion model created_date: - explanation: Date the model website was made public value: 2022-06-22 + explanation: Date the model website was made public + url: https://parti.research.google/ + model_card: '' + modality: Text (English) and Image + size: 20B parameters + analysis: '' dependencies: [C4, LAION-400M, FIT400M, JFT-4B] - description: Parti is a text-to-image diffusion model - feedback: '' - intended_uses: '' + training_emissions: '' + training_time: unknown + training_hardware: unknown + quality_control: '' + access: + value: closed + explanation: Google does not provide access to Parti for external researchers. license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: Text (English) and Image - value: image, text; image, text - model_card: '' + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: Parti + feedback: '' + +- type: model + name: Imagen organization: Google - prohibited_uses: '' - quality_control: '' - size: 20B parameters (dense) - training_emissions: '' - training_hardware: unknown - training_time: unknown - type: model - url: https://parti.research.google/ -- access: open - analysis: '' + description: Imagen is a text-to-image diffusion model created_date: - explanation: Date the model website was made public value: 2022-05-23 + explanation: Date the model website was made public + url: https://imagen.research.google/ + model_card: '' + modality: text (English) and image + size: > + 14B parameters total. 2B parameters (U-Net model), 11B parameters (T5-XXL), + and 600M and 400M parameter models for super resolution + analysis: '' dependencies: [LAION-400M, Google internal image-text dataset] - description: Imagen is a text-to-image diffusion model - feedback: '' - intended_uses: '' + training_emissions: unknown + training_time: unknown + training_hardware: 128 TPU-v4 + quality_control: '' + access: open license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: text (English) and image - value: image, text; image, text - model_card: '' + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: Imagen + feedback: '' + +- type: model + name: VATT organization: Google - prohibited_uses: '' - quality_control: '' - size: 14B parameters (dense) - training_emissions: unknown - training_hardware: 128 TPU-v4 - training_time: unknown - type: model - url: https://imagen.research.google/ -- access: - explanation: Model checkpoints can be downloaded from the [[Github repository]](https://github.com/google-research/google-research/tree/master/vatt) - value: open - analysis: '' + description: VATT is a family of models trained on multimodal data created_date: - explanation: Date the model paper was made public value: 2022-04-22 + explanation: Date the model paper was made public + url: https://arxiv.org/abs/2104.11178 + model_card: '' + modality: Text (English), Video, and Audio + size: 155M + analysis: '' dependencies: [AudioSet, HowTo100M] - description: VATT is a family of models trained on multimodal data - feedback: '' - intended_uses: '' + training_emissions: unknown + training_time: 3 days + training_hardware: 256 TPU-v3 + quality_control: '' + access: + value: open + explanation: Model checkpoints can be downloaded from the [[Github repository]](https://github.com/google-research/google-research/tree/master/vatt) license: - explanation: License on the [[Github repository]](https://github.com/google-research/google-research) value: Apache 2.0 - modality: - explanation: Text (English), Video, and Audio - value: audio, text, video; audio, text, video - model_card: '' + explanation: License on the [[Github repository]](https://github.com/google-research/google-research) + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: VATT + feedback: '' + +- type: model + name: PaLM + # General organization: Google - prohibited_uses: '' - quality_control: '' - size: 155M parameters (dense) - training_emissions: unknown - training_hardware: 256 TPU-v3 - training_time: 3 days - type: model - url: https://arxiv.org/abs/2104.11178 -- access: - explanation: Made available through the PaLM API as of March 14, 2023. - value: limited - analysis: "\"PaLM is evaluated on English Natural Language Processing (NLP) tasks,\ - \ tasks from BIG-bench, reasoning tasks, code completion tasks, multilingual\ - \ generation and question answering tasks, translation tasks, and bias and toxicity\ - \ benchmarks\" [[Model Card]](https://arxiv.org/pdf/2204.02311.pdf#appendix.E).\n" + description: > + PaLM stands Pathways Language Model, "dense decoder-only Transformer model + trained with the Pathways system" + [[Google ai Blog]](https://ai.googleblog.com/2022/04/pathways-language-model-palm-scaling-to.html). created_date: - explanation: "The date of the Google AI blog announcing the details of PaLM\ - \ [[Google AI Blog]](https://ai.googleblog.com/2022/04/pathways-language-model-palm-scaling-to.html).\n" value: 2022-04-04 - dependencies: [PaLM dataset] - description: "PaLM stands Pathways Language Model, \"dense decoder-only Transformer\ - \ model trained with the Pathways system\" [[Google ai Blog]](https://ai.googleblog.com/2022/04/pathways-language-model-palm-scaling-to.html).\n" - feedback: - explanation: "Author contact information is shared in the paper [[Paper]](https://arxiv.org/pdf/2204.02311.pdf).\n" - value: Contact the authors. - intended_uses: "\"The primary use is research on language models, including: research\ - \ on NLP applications like machine translation and question answering, advancing\ - \ fairness and safety research, and understanding limitations of current LLMs.\ - \ Within Google, PaLM is being used for research on a variety of open- ended\ - \ text and code generation tasks, including reasoning [[Section 6.3]](https://arxiv.org/pdf/2204.02311.pdf#subsection.6.3)\ - \ and code synthesis and understanding [[Section 6.4]](https://arxiv.org/pdf/2204.02311.pdf#subsection.6.4)\"\ - \ [[Model Card]](https://arxiv.org/pdf/2204.02311.pdf#appendix.E).\n" - license: - explanation: "The asset isn't released, and hence the license is unknown.\n" - value: unknown - modality: - explanation: text and code - value: code, text; code, text + explanation: > + The date of the Google AI blog announcing the details of PaLM + [[Google AI Blog]](https://ai.googleblog.com/2022/04/pathways-language-model-palm-scaling-to.html). + url: https://arxiv.org/pdf/2204.02311.pdf model_card: https://arxiv.org/pdf/2204.02311.pdf#appendix.E - monitoring: unknown - name: PaLM - organization: Google - prohibited_uses: "The model \"should not be used for downstream applications without\ - \ further analysis on factors in the proposed downstream application [[Model\ - \ Card]](https://arxiv.org/pdf/2204.02311.pdf#appendix.E)\"\n" - quality_control: Unknown - size: 540B parameters (dense) + modality: text and code + size: 540B parameters (dense model) + analysis: > + "PaLM is evaluated on English Natural Language Processing (NLP) tasks, tasks + from BIG-bench, reasoning tasks, code completion tasks, multilingual + generation and question answering tasks, translation tasks, and bias and + toxicity benchmarks" + [[Model Card]](https://arxiv.org/pdf/2204.02311.pdf#appendix.E). + # Construction + dependencies: [PaLM dataset] training_emissions: - explanation: "Reported in [[Appendix B]](https://arxiv.org/pdf/2204.02311.pdf#appendix.A)\n" value: 271.43 tCO2 - training_hardware: - explanation: "Reported in [[Section 4]](https://arxiv.org/pdf/2204.02311.pdf#section.4).\n" - value: 6144 TPU v4 chips + explanation: > + Reported in + [[Appendix B]](https://arxiv.org/pdf/2204.02311.pdf#appendix.A) training_time: - explanation: "Reported in [[Appendix B]](https://arxiv.org/pdf/2204.02311.pdf#appendix.A)\n" value: 29600 petaflop/s-days - type: model - url: https://arxiv.org/pdf/2204.02311.pdf -- access: limited - adaptation: '' - created_date: 2023-03-14 - dependencies: [PaLM] - description: a new developer offering that makes it easy and safe to experiment - with Google’s language models. - failures: '' - feedback: '' - intended_uses: '' - license: unknown - monitoring: '' - monthly_active_users: '' + explanation: > + Reported in + [[Appendix B]](https://arxiv.org/pdf/2204.02311.pdf#appendix.A) + training_hardware: + value: 6144 TPU v4 chips + explanation: > + Reported in [[Section 4]](https://arxiv.org/pdf/2204.02311.pdf#section.4). + quality_control: Unknown + # Downstream + access: + value: limited + explanation: Made available through the PaLM API as of March 14, 2023. + license: + value: unknown + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: > + "The primary use is research on language models, including: research + on NLP applications like machine translation and question answering, + advancing fairness and safety research, and understanding limitations of + current LLMs. + Within Google, PaLM is being used for research on a variety of open- + ended text and code generation tasks, including reasoning + [[Section 6.3]](https://arxiv.org/pdf/2204.02311.pdf#subsection.6.3) + and code synthesis and understanding + [[Section 6.4]](https://arxiv.org/pdf/2204.02311.pdf#subsection.6.4)" + [[Model Card]](https://arxiv.org/pdf/2204.02311.pdf#appendix.E). + prohibited_uses: > + The model "should not be used for downstream applications without further + analysis on factors in the proposed downstream application + [[Model Card]](https://arxiv.org/pdf/2204.02311.pdf#appendix.E)" + monitoring: unknown + feedback: + value: Contact the authors. + explanation: > + Author contact information is shared in the paper + [[Paper]](https://arxiv.org/pdf/2204.02311.pdf). + +- type: application name: PaLM API organization: Google + description: a new developer offering that makes it easy and safe to experiment + with Google’s language models. + created_date: 2023-03-14 + url: https://developers.googleblog.com/2023/03/announcing-palm-api-and-makersuite.html + dependencies: [PaLM] + adaptation: '' output_space: '' - prohibited_uses: '' quality_control: '' + access: limited + license: unknown terms_of_service: '' - type: application - url: https://developers.googleblog.com/2023/03/announcing-palm-api-and-makersuite.html - user_distribution: '' -- access: closed - analysis: '' - created_date: 2022-12-26 - dependencies: [Flan-PaLM, MultiMedQA] - description: '' - feedback: '' intended_uses: '' - license: - explanation: "The asset isn't released, and hence the license is unknown.\n" - value: unknown - modality: - explanation: text - value: text; text - model_card: '' + prohibited_uses: '' monitoring: '' + feedback: '' + monthly_active_users: '' + user_distribution: '' + failures: '' + +- type: model name: Med-PaLM organization: Google - prohibited_uses: '' - quality_control: '' - size: 540B parameters (dense) + description: '' + created_date: 2022-12-26 + url: https://arxiv.org/abs/2212.13138 + model_card: '' + modality: text + analysis: '' + size: 540B + dependencies: [Flan-PaLM, MultiMedQA] training_emissions: '' - training_hardware: '' training_time: '' - type: model - url: https://arxiv.org/abs/2212.13138 -- access: closed - analysis: Evaluated on MultiMedBench tasks and radiologist evaluations of model-generated - chest X-ray reports - created_date: 2023-07-26 - dependencies: [PaLM-E, MultiMedBench] - description: '' - feedback: '' - intended_uses: '' + training_hardware: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: text, image, and genomics input with text output - value: image, text; text - model_card: '' + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: Med-PaLM Multimodal organization: Google - prohibited_uses: '' - quality_control: '' + description: '' + created_date: 2023-07-26 + url: https://arxiv.org/pdf/2307.14334.pdf + model_card: '' + modality: text, image, and genomics input with text output + analysis: Evaluated on MultiMedBench tasks and radiologist evaluations of model-generated + chest X-ray reports size: 562B parameters (dense) + dependencies: [PaLM-E, MultiMedBench] training_emissions: '' - training_hardware: '' training_time: '' - type: model - url: https://arxiv.org/pdf/2307.14334.pdf -- access: closed - analysis: '' + training_hardware: '' + quality_control: '' + access: closed + license: + value: unknown + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: model + name: MultiMedQA + organization: Google + description: '' created_date: 2022-12-26 + url: https://arxiv.org/abs/2212.13138 + model_card: '' + modality: text + analysis: '' + size: unknown dependencies: - MedQA - MedMCQA @@ -708,149 +814,149 @@ - LiveQA - Medication QA - HealthSearchQA - description: '' - feedback: '' - intended_uses: '' - license: - explanation: "The asset isn't released, and hence the license is unknown.\n" - value: unknown - modality: - explanation: text - value: text; text - model_card: '' - monitoring: '' - name: MultiMedQA - organization: Google - prohibited_uses: '' - quality_control: '' - size: unknown training_emissions: '' - training_hardware: '' training_time: '' - type: model - url: https://arxiv.org/abs/2212.13138 -- access: closed - analysis: '' - created_date: 2022-10-20 - dependencies: [PaLM, Muffin, P3, NaturalInstructions-v2] - description: '' - feedback: '' - intended_uses: '' + training_hardware: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: text - value: text; text - model_card: '' + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: Flan-PaLM organization: Google - prohibited_uses: '' - quality_control: '' - size: 540B parameters (dense) - training_emissions: '' - training_hardware: '' - training_time: '' - type: model + description: '' + created_date: 2022-10-20 url: https://arxiv.org/abs/2210.11416 -- access: closed + model_card: '' + modality: text analysis: '' - created_date: 2022-10-20 - dependencies: [U-PaLM, Muffin, P3, NaturalInstructions-v2] - description: '' - feedback: '' - intended_uses: '' + size: 540B + dependencies: [PaLM, Muffin, P3, NaturalInstructions-v2] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: text - value: text; text - model_card: '' + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: Flan-U-PaLM organization: Google - prohibited_uses: '' - quality_control: '' - size: 540B parameters (dense) - training_emissions: '' - training_hardware: '' - training_time: '' - type: model + description: '' + created_date: 2022-10-20 url: https://arxiv.org/abs/2210.11416 -- access: open + model_card: '' + modality: text analysis: '' - created_date: 2021-09-03 - datasheet: '' - dependencies: [] - description: '' - excluded: '' - feedback: '' - included: '' + size: 540B + dependencies: [U-PaLM, Muffin, P3, NaturalInstructions-v2] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: closed + license: + value: unknown + explanation: > + The asset isn't released, and hence the license is unknown. intended_uses: '' - license: Apache 2.0 - modality: text + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: dataset name: Muffin organization: Google - prohibited_uses: '' - quality_control: '' - sample: [] - size: 62 tasks - type: dataset - url: https://arxiv.org/abs/2109.01652 -- access: closed - analysis: '' - created_date: 2022-10-20 - dependencies: [PaLM, PaLM dataset] description: '' - feedback: '' + created_date: 2021-09-03 + url: https://arxiv.org/abs/2109.01652 + datasheet: '' + modality: text + size: 62 tasks + sample: [] + analysis: '' + dependencies: [] + included: '' + excluded: '' + quality_control: '' + access: open + license: Apache 2.0 intended_uses: '' - license: - explanation: "The asset isn't released, and hence the license is unknown.\n" - value: unknown - modality: - explanation: text - value: text; text - model_card: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: U-PaLM organization: Google - prohibited_uses: '' - quality_control: '' - size: 540B parameters (dense) - training_emissions: '' - training_hardware: '' - training_time: '' - type: model + description: '' + created_date: 2022-10-20 url: https://arxiv.org/abs/2210.11399 -- access: closed + model_card: '' + modality: text analysis: '' - created_date: 2022-08-16 - dependencies: [PaLM] - description: '' - feedback: '' + size: 540B + dependencies: [PaLM, PaLM dataset] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: closed + license: + value: unknown + explanation: > + The asset isn't released, and hence the license is unknown. intended_uses: '' - license: unknown (model weights), Apache 2.0 (SayCan code) - modality: - explanation: text, robotic control - value: text; text - model_card: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: PaLM-SayCan organization: Google - prohibited_uses: '' - quality_control: '' - size: 540B parameters (dense) - training_emissions: '' - training_hardware: '' - training_time: '' - type: model + description: '' + created_date: 2022-08-16 url: https://arxiv.org/abs/2204.01691 -- access: closed + model_card: '' + modality: text, robotic control analysis: '' + size: 540B + dependencies: [PaLM] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: closed + license: unknown (model weights), Apache 2.0 (SayCan code) + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: model + name: GLaM + organization: Google + description: '' created_date: 2021-12-13 + url: https://arxiv.org/abs/2112.06905 + model_card: '' + modality: text + analysis: '' + size: 1.2T parameters (sparse) dependencies: - GLaM Web dataset - Wikipedia @@ -858,740 +964,750 @@ - GLaM Forums dataset - BooksCorpus - GLaM News dataset - description: '' - feedback: '' - intended_uses: '' - license: - explanation: "The asset isn't released, and hence the license is unknown.\n" - value: unknown - modality: - explanation: text - value: text; text - model_card: '' - monitoring: '' - name: GLaM - organization: Google - prohibited_uses: '' - quality_control: '' - size: 1.2T parameters (sparse) training_emissions: '' - training_hardware: '' training_time: '' - type: model - url: https://arxiv.org/abs/2112.06905 -- access: closed - analysis: '' - created_date: 2021-12-13 - datasheet: '' - dependencies: [] - description: '' - excluded: '' - feedback: '' - included: '' - intended_uses: '' + training_hardware: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: text + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: dataset name: GLaM Web dataset organization: Google - prohibited_uses: '' - quality_control: '' - sample: [] - size: unknown - type: dataset - url: https://arxiv.org/abs/2112.06905 -- access: closed - analysis: '' + description: '' created_date: 2021-12-13 + url: https://arxiv.org/abs/2112.06905 datasheet: '' + modality: text + size: unknown + sample: [] + analysis: '' dependencies: [] - description: '' - excluded: '' - feedback: '' included: '' - intended_uses: '' + excluded: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: text + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: dataset name: GLaM Conversations dataset organization: Google - prohibited_uses: '' - quality_control: '' - sample: [] - size: unknown - type: dataset - url: https://arxiv.org/abs/2112.06905 -- access: closed - analysis: '' + description: '' created_date: 2021-12-13 + url: https://arxiv.org/abs/2112.06905 datasheet: '' + modality: text + size: unknown + sample: [] + analysis: '' dependencies: [] - description: '' - excluded: '' - feedback: '' included: '' - intended_uses: '' + excluded: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: text + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: dataset name: GLaM Forums dataset organization: Google - prohibited_uses: '' - quality_control: '' - sample: [] - size: unknown - type: dataset - url: https://arxiv.org/abs/2112.06905 -- access: closed - analysis: '' + description: '' created_date: 2021-12-13 + url: https://arxiv.org/abs/2112.06905 datasheet: '' + modality: text + size: unknown + sample: [] + analysis: '' dependencies: [] - description: '' - excluded: '' - feedback: '' included: '' - intended_uses: '' + excluded: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: text + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: dataset name: GLaM News dataset organization: Google - prohibited_uses: '' - quality_control: '' - sample: [] - size: unknown - type: dataset + description: '' + created_date: 2021-12-13 url: https://arxiv.org/abs/2112.06905 -- access: closed + datasheet: '' + modality: text + size: unknown + sample: [] analysis: '' - created_date: 2021-05-18 - dependencies: [MUM dataset] - description: '' - feedback: '' - intended_uses: '' + dependencies: [] + included: '' + excluded: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: text, image - value: image, text; image, text - model_card: '' + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: MUM organization: Google - prohibited_uses: '' - quality_control: '' + description: '' + created_date: 2021-05-18 + url: https://blog.google/products/search/introducing-mum/ + model_card: '' + modality: text, image + analysis: '' size: unknown + dependencies: [MUM dataset] training_emissions: '' - training_hardware: '' training_time: '' - type: model - url: https://blog.google/products/search/introducing-mum/ -- access: closed - analysis: '' - created_date: 2021-05-18 - datasheet: '' - dependencies: [] - description: '' - excluded: '' - feedback: '' - included: '' - intended_uses: '' + training_hardware: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: text, image - value: image, text + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: dataset name: MUM dataset organization: Google - prohibited_uses: '' - quality_control: '' - sample: [] - size: unknown - type: dataset + description: '' + created_date: 2021-05-18 url: https://blog.google/products/search/introducing-mum/ -- access: closed + datasheet: '' + modality: text, image + size: unknown + sample: [] analysis: '' - created_date: 2023-02-01 - dependencies: [LAION-400M, Phenaki Video-Text Corpus] - description: '' - feedback: '' - intended_uses: '' + dependencies: [] + included: '' + excluded: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: text, video - value: text, video; text, video - model_card: '' + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: Phenaki organization: Google - prohibited_uses: '' - quality_control: '' - size: 1.8B parameters (dense) - training_emissions: '' - training_hardware: '' - training_time: '' - type: model + description: '' + created_date: 2023-02-01 url: https://openreview.net/pdf?id=vOEXS39nOF -- access: closed + model_card: '' + modality: text, video analysis: '' + size: 1.8B parameters (dense model) + dependencies: [LAION-400M, Phenaki Video-Text Corpus] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: closed + license: + value: unknown + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: dataset + name: Phenaki Video-Text Corpus + organization: Google + description: '' created_date: 2023-02-01 + url: https://openreview.net/pdf?id=vOEXS39nOF datasheet: '' - dependencies: [] - description: '' - excluded: '' - feedback: '' + modality: text, video + size: 15M text-video pairs at 8FPS + sample: [] + analysis: '' + dependencies: [] included: '' - intended_uses: '' + excluded: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: text, video + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: Phenaki Video-Text Corpus + feedback: '' + +- type: model + name: Flan-UL2 organization: Google - prohibited_uses: '' - quality_control: '' - sample: [] - size: 15M text-video pairs at 8FPS - type: dataset - url: https://openreview.net/pdf?id=vOEXS39nOF -- access: open - analysis: '' + description: '' created_date: 2023-03-02 + url: https://arxiv.org/abs/2205.05131 + model_card: '' + modality: text + analysis: '' + size: 20B parameters (dense model) dependencies: [UL2, Flan Collection] - description: '' - feedback: '' - intended_uses: '' + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: open license: Apache 2.0 - modality: - explanation: text - value: text; text - model_card: '' + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: Flan-UL2 + feedback: '' + +- type: dataset + name: Flan Collection organization: Google - prohibited_uses: '' - quality_control: '' - size: 20B parameters (dense) - training_emissions: '' - training_hardware: '' - training_time: '' - type: model - url: https://arxiv.org/abs/2205.05131 -- access: open - analysis: '' + description: '' created_date: 2023-01-31 + url: https://arxiv.org/abs/2301.13688 datasheet: '' + modality: text + size: 1836 tasks + sample: [] + analysis: '' dependencies: [Flan dataset, P3, NaturalInstructions-v2] - description: '' - excluded: '' - feedback: '' included: '' - intended_uses: '' + excluded: '' + quality_control: '' + access: open license: Apache 2.0 - modality: text + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: Flan Collection + feedback: '' + +- type: model + name: MusicLM organization: Google - prohibited_uses: '' - quality_control: '' - sample: [] - size: 1836 tasks - type: dataset - url: https://arxiv.org/abs/2301.13688 -- access: closed - analysis: '' + description: '' created_date: 2023-01-26 + url: https://arxiv.org/pdf/2301.11325.pdf + model_card: '' + modality: audio (music), text + analysis: '' + size: 1.4B parameters (430M semantic stage, 430M acoustic stage, 600M w2v-BERT) dependencies: - SoundStream - w2v-BERT - MuLan - MusicLM semantic model - MusicLM acoustic model - description: '' - feedback: '' - intended_uses: '' - license: - explanation: "The asset isn't released, and hence the license is unknown.\n" - value: unknown - modality: - explanation: audio (music), text - value: audio, text; audio, text - model_card: '' - monitoring: '' - name: MusicLM - organization: Google - prohibited_uses: '' - quality_control: '' - size: 1.4B parameters (dense) training_emissions: '' - training_hardware: '' training_time: '' - type: model - url: https://arxiv.org/pdf/2301.11325.pdf -- access: closed - analysis: '' - created_date: 2023-01-26 - dependencies: [Free Music Archive] - description: '' - feedback: '' - intended_uses: '' + training_hardware: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: audio (music) - value: audio; audio - model_card: '' + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: SoundStream organization: Google - prohibited_uses: '' - quality_control: '' - size: unknown - training_emissions: '' - training_hardware: '' - training_time: '' - type: model + description: '' + created_date: 2023-01-26 url: https://arxiv.org/pdf/2301.11325.pdf -- access: closed + model_card: '' + modality: audio (music) analysis: '' - created_date: 2023-01-26 + size: unknown dependencies: [Free Music Archive] - description: '' - feedback: '' - intended_uses: '' + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: audio (music) - value: audio; audio - model_card: '' + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: w2v-BERT organization: Google - prohibited_uses: '' - quality_control: '' - size: 600M parameters (dense) - training_emissions: '' - training_hardware: '' - training_time: '' - type: model + description: '' + created_date: 2023-01-26 url: https://arxiv.org/pdf/2301.11325.pdf -- access: closed + model_card: '' + modality: audio (music) analysis: '' - created_date: 2022-08-26 - dependencies: [AST, BERT, MuLan dataset] - description: '' - feedback: '' - intended_uses: '' + size: 600M parameter (dense model) + dependencies: [Free Music Archive] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: audio (music), text - value: audio, text; audio, text - model_card: '' + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: MuLan organization: Google - prohibited_uses: '' - quality_control: '' + description: '' + created_date: 2022-08-26 + url: https://arxiv.org/abs/2208.12415 + model_card: '' + modality: audio (music), text + analysis: '' size: Unknown + dependencies: [AST, BERT, MuLan dataset] training_emissions: '' - training_hardware: '' training_time: '' - type: model - url: https://arxiv.org/abs/2208.12415 -- access: closed - analysis: '' - created_date: 2022-08-26 - datasheet: '' - dependencies: [] - description: '' - excluded: '' - feedback: '' - included: '' - intended_uses: '' + training_hardware: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: audio (music), text - value: audio, text + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: dataset name: MuLan dataset organization: Google - prohibited_uses: '' - quality_control: '' - sample: [] - size: 370K hours audio - type: dataset + description: '' + created_date: 2022-08-26 url: https://arxiv.org/abs/2208.12415 -- access: closed - analysis: '' - created_date: 2023-01-26 datasheet: '' + modality: audio (music), text + size: 370K hours audio + sample: [] + analysis: '' dependencies: [] - description: '' - excluded: '' - feedback: '' included: '' - intended_uses: '' + excluded: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: audio (music) - value: audio + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: dataset name: MusicLM dataset organization: Google - prohibited_uses: '' - quality_control: '' - sample: [] - size: 280K hours audio - type: dataset + description: '' + created_date: 2023-01-26 url: https://arxiv.org/pdf/2301.11325.pdf -- access: closed + datasheet: '' + modality: audio (music) + size: 280K hours audio + sample: [] analysis: '' - created_date: 2023-01-26 - dependencies: [MusicLM dataset] - description: '' - feedback: '' - intended_uses: '' + dependencies: [] + included: '' + excluded: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: Audio (music) - value: audio; audio - model_card: '' + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: MusicLM semantic model organization: Google - prohibited_uses: '' - quality_control: '' - size: 430M parameters (dense) - training_emissions: '' - training_hardware: '' - training_time: '' - type: model + description: '' + created_date: 2023-01-26 url: https://arxiv.org/pdf/2301.11325.pdf -- access: closed + model_card: '' + modality: Audio (music) analysis: '' - created_date: 2023-01-26 + size: 430M parameters (dense model) dependencies: [MusicLM dataset] - description: '' - feedback: '' - intended_uses: '' + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: Audio (music) - value: audio; audio - model_card: '' + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: MusicLM acoustic model organization: Google - prohibited_uses: '' - quality_control: '' - size: 430M parameters (dense) - training_emissions: '' - training_hardware: '' - training_time: '' - type: model + description: '' + created_date: 2023-01-26 url: https://arxiv.org/pdf/2301.11325.pdf -- access: closed + model_card: '' + modality: Audio (music) analysis: '' - created_date: 2023-02-08 - dependencies: [Noise2Music pseudolabel dataset] - description: '' - feedback: '' - intended_uses: '' + size: 430M parameters (dense model) + dependencies: [MusicLM dataset] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: audio (music), text - value: audio, text; audio, text - model_card: '' + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: Noise2Music organization: Google - prohibited_uses: '' - quality_control: '' + description: '' + created_date: 2023-02-08 + url: https://google-research.github.io/noise2music/noise2music.pdf + model_card: '' + modality: audio (music), text + analysis: '' size: unknown + dependencies: [Noise2Music pseudolabel dataset] training_emissions: '' - training_hardware: '' training_time: '' - type: model - url: https://google-research.github.io/noise2music/noise2music.pdf -- access: closed - analysis: '' - created_date: 2023-02-08 - datasheet: '' - dependencies: [LaMDA] - description: '' - excluded: '' - feedback: '' - included: '' - intended_uses: '' + training_hardware: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: text + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: dataset name: LaMDA-LF organization: Google - prohibited_uses: '' - quality_control: '' - sample: [] - size: 150k songs - type: dataset - url: https://google-research.github.io/noise2music/noise2music.pdf -- access: closed - analysis: '' + description: '' created_date: 2023-02-08 + url: https://google-research.github.io/noise2music/noise2music.pdf datasheet: '' - dependencies: [MusicCaps] - description: '' - excluded: '' - feedback: '' + modality: text + size: 150k songs + sample: [] + analysis: '' + dependencies: [LaMDA] included: '' - intended_uses: '' + excluded: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: text + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: dataset name: Rater-LF organization: Google - prohibited_uses: '' - quality_control: '' - sample: [] - size: 10k captions - type: dataset - url: https://google-research.github.io/noise2music/noise2music.pdf -- access: closed - analysis: '' + description: '' created_date: 2023-02-08 + url: https://google-research.github.io/noise2music/noise2music.pdf datasheet: '' + modality: text + size: 10k captions + sample: [] + analysis: '' dependencies: [MusicCaps] - description: '' - excluded: '' - feedback: '' included: '' - intended_uses: '' + excluded: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: Text - value: text + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: dataset name: Rater-SF organization: Google - prohibited_uses: '' - quality_control: '' - sample: [] - size: 24k captions - type: dataset + description: '' + created_date: 2023-02-08 url: https://google-research.github.io/noise2music/noise2music.pdf -- access: closed + datasheet: '' + modality: Text + size: 24k captions + sample: [] analysis: '' - created_date: 2023-02-08 - dependencies: [MuLan, MuLaMCap, LaMDA-LF, Rater-LF, Rater-SF] - description: '' - feedback: '' - intended_uses: '' + dependencies: [MusicCaps] + included: '' + excluded: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: audio (music), text - value: audio, text; audio, text - model_card: '' + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: Noise2Music pseudolabeler organization: Google - prohibited_uses: '' - quality_control: '' + description: '' + created_date: 2023-02-08 + url: https://google-research.github.io/noise2music/noise2music.pdf + model_card: '' + modality: audio (music), text + analysis: '' size: unknown + dependencies: [MuLan, MuLaMCap, LaMDA-LF, Rater-LF, Rater-SF] training_emissions: '' - training_hardware: '' training_time: '' - type: model - url: https://google-research.github.io/noise2music/noise2music.pdf -- access: closed - analysis: '' - created_date: 2023-02-08 - datasheet: '' - dependencies: [] - description: '' - excluded: '' - feedback: '' - included: '' - intended_uses: '' + training_hardware: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: audio (music) - value: audio + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: dataset name: Noise2Music audio dataset organization: Google - prohibited_uses: '' - quality_control: '' - sample: [] - size: 340k hours audio - type: dataset - url: https://google-research.github.io/noise2music/noise2music.pdf -- access: closed - analysis: '' + description: '' created_date: 2023-02-08 + url: https://google-research.github.io/noise2music/noise2music.pdf datasheet: '' - dependencies: [Noise2Music audio dataset, Noise2Music pseudolabeler] - description: '' - excluded: '' - feedback: '' + modality: audio (music) + size: 340k hours audio + sample: [] + analysis: '' + dependencies: [] included: '' - intended_uses: '' + excluded: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: audio (music), text - value: audio, text + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: dataset name: Noise2Music pseudolabel dataset organization: Google - prohibited_uses: '' - quality_control: '' - sample: [] - size: 340k hours audio with pseudolabels - type: dataset + description: '' + created_date: 2023-02-08 url: https://google-research.github.io/noise2music/noise2music.pdf -- access: limited - adaptation: '' - created_date: 2022-08-25 - dependencies: [LaMDA] - description: AI Test Kitchen provides a new way for people to learn about, experience, - and give feedback on emerging AI technology, like LaMDA. - failures: '' - feedback: '' - intended_uses: '' + datasheet: '' + modality: audio (music), text + size: 340k hours audio with pseudolabels + sample: [] + analysis: '' + dependencies: [Noise2Music audio dataset, Noise2Music pseudolabeler] + included: '' + excluded: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' - monthly_active_users: '' + feedback: '' + +- type: application name: AI Test Kitchen organization: Google - output_space: '' - prohibited_uses: '' - quality_control: '' - terms_of_service: '' - type: application + description: AI Test Kitchen provides a new way for people to learn about, experience, + and give feedback on emerging AI technology, like LaMDA. + created_date: 2022-08-25 url: https://blog.google/technology/ai/join-us-in-the-ai-test-kitchen/ - user_distribution: '' -- access: closed - adaptation: '' - created_date: 2023-02-06 dependencies: [LaMDA] - description: Conversational AI service, powered by LaMDA - failures: '' - feedback: '' - intended_uses: '' + adaptation: '' + output_space: '' + quality_control: '' + access: limited license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown + explanation: > + The asset isn't released, and hence the license is unknown. + terms_of_service: '' + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' monthly_active_users: '' + user_distribution: '' + failures: '' + +- type: application name: Bard organization: Google + description: Conversational AI service, powered by LaMDA + created_date: 2023-02-06 + url: https://blog.google/technology/ai/bard-google-ai-search-updates/ + dependencies: [LaMDA] + adaptation: '' output_space: '' - prohibited_uses: '' quality_control: '' + access: closed + license: + value: unknown + explanation: > + The asset isn't released, and hence the license is unknown. terms_of_service: '' - type: application - url: https://blog.google/technology/ai/bard-google-ai-search-updates/ + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' user_distribution: '' -- access: closed - analysis: '' + failures: '' + +- type: model + name: Minerva + organization: Google + description: '' created_date: 2022-06-29 + url: https://arxiv.org/abs/2206.14858 + model_card: '' + modality: text + analysis: '' + size: 540B parameters (dense model) dependencies: - PaLM - arXiv - PaLM dataset - Minerva Math Web Pages dataset - description: '' - feedback: '' - intended_uses: '' + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: text - value: text; text - model_card: '' + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: Minerva + feedback: '' + +- type: dataset + name: Minerva Math Web Pages dataset organization: Google - prohibited_uses: '' - quality_control: '' - size: 540B parameters (dense) - training_emissions: '' - training_hardware: '' - training_time: '' - type: model - url: https://arxiv.org/abs/2206.14858 -- access: closed - analysis: '' + description: '' created_date: 2022-06-29 + url: https://arxiv.org/abs/2206.14858 datasheet: '' + modality: Text + size: 17.5B tokens + sample: [] + analysis: '' dependencies: [] - description: '' - excluded: '' - feedback: '' included: '' - intended_uses: '' + excluded: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: Text - value: text + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: Minerva Math Web Pages dataset + feedback: '' + +- type: model + name: USM organization: Google - prohibited_uses: '' - quality_control: '' - sample: [] - size: 17.5B tokens - type: dataset - url: https://arxiv.org/abs/2206.14858 -- access: limited - analysis: '' - created_date: 2023-03-06 - dependencies: [YT-NLU-U, Pub-U, Web-NTL, YT-SUP+, Pub-S] description: Universal Speech Model (USM) is a family of state-of-the-art speech models with 2B parameters trained on 12 million hours of speech and 28 billion sentences of text, spanning 300+ languages. USM, which is for use in YouTube @@ -1600,272 +1716,270 @@ Punjabi, Assamese, Santhali, Balinese, Shona, Malagasy, Luganda, Luo, Bambara, Soga, Maninka, Xhosa, Akan, Lingala, Chichewa, Nkore, Nzema to name a few. Some of these languages are spoken by fewer than twenty million people, making it - very hard to find the necessary training data. - feedback: '' - intended_uses: '' - license: - explanation: "The asset isn't released, and hence the license is unknown.\n" - value: unknown - modality: - explanation: speech, text (300+ languages) - value: audio, text; audio, text - model_card: '' - monitoring: '' - name: USM - organization: Google - prohibited_uses: '' - quality_control: '' - size: 2B parameters (dense) + very hard to find the necessary training data. + created_date: 2023-03-06 + url: https://arxiv.org/abs/2303.01037 + model_card: '' + modality: speech, text (300+ languages) + analysis: '' + size: 2B parameters (dense model) + dependencies: [YT-NLU-U, Pub-U, Web-NTL, YT-SUP+, Pub-S] training_emissions: '' - training_hardware: '' training_time: '' - type: model - url: https://arxiv.org/abs/2303.01037 -- access: open - adaptation: '' - created_date: 2005-02-14 - dependencies: [USM] - description: YouTube is a global online video sharing and social media platform - failures: '' - feedback: '' + training_hardware: '' + quality_control: '' + access: limited + license: + value: unknown + explanation: > + The asset isn't released, and hence the license is unknown. intended_uses: '' - license: '' + prohibited_uses: '' monitoring: '' - monthly_active_users: '' + feedback: '' + +- type: application name: YouTube organization: Google + description: YouTube is a global online video sharing and social media platform + created_date: 2005-02-14 + url: https://www.youtube.com/ + dependencies: [USM] + adaptation: '' output_space: '' - prohibited_uses: '' quality_control: '' + access: open + license: '' terms_of_service: '' - type: application - url: https://www.youtube.com/ - user_distribution: '' -- access: closed - analysis: '' - created_date: 2023-03-06 - dependencies: [PaLM, ViT-22B] - description: '' - feedback: '' intended_uses: '' - license: - explanation: "The asset isn't released, and hence the license is unknown.\n" - value: unknown - modality: - explanation: text, image - value: image, text; image, text - model_card: '' + prohibited_uses: '' monitoring: '' + feedback: '' + monthly_active_users: '' + user_distribution: '' + failures: '' + +- type: model name: PaLM-E organization: Google - prohibited_uses: '' - quality_control: '' - size: 562B parameters (dense) - training_emissions: '' - training_hardware: '' - training_time: '' - type: model + description: '' + created_date: 2023-03-06 url: https://arxiv.org/abs/2303.03378 -- access: closed + model_card: '' + modality: text, image analysis: '' - created_date: 2023-02-10 - dependencies: [JFT] - description: '' - feedback: '' - intended_uses: '' + size: 562B parameters (dense model) + dependencies: [PaLM, ViT-22B] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: image - value: image; image - model_card: '' + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: ViT-22B organization: Google - prohibited_uses: '' - quality_control: '' - size: 22B parameters (dense) - training_emissions: '' - training_hardware: '' - training_time: '' - type: model + description: '' + created_date: 2023-02-10 url: https://arxiv.org/abs/2302.05442 -- access: closed + model_card: '' + modality: image analysis: '' - created_date: 2022-09-07 - dependencies: [w2v-BERT, SoundStream] - description: '' - feedback: '' - intended_uses: '' + size: 22B parameters (dense model) + dependencies: [JFT] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: audio, text - value: audio, text; audio, text - model_card: '' + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: AudioLM organization: Google - prohibited_uses: '' - quality_control: '' - size: 1B parameters (dense) - training_emissions: '' - training_hardware: '' - training_time: '' - type: model + description: '' + created_date: 2022-09-07 url: https://arxiv.org/abs/2209.03143 -- access: closed + model_card: '' + modality: audio, text analysis: '' - created_date: 2022-09-14 - dependencies: [mT5, ViT-e, WebLI] - description: '' - feedback: '' - intended_uses: '' + size: 1B parameters (dense model) + dependencies: [w2v-BERT, SoundStream] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: text, image - value: image, text; image, text - model_card: '' + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: PaLI organization: Google - prohibited_uses: '' - quality_control: '' - size: 17B parameters (dense) - training_emissions: '' - training_hardware: '' - training_time: '' - type: model + description: '' + created_date: 2022-09-14 url: https://arxiv.org/abs/2209.06794 -- access: closed + model_card: '' + modality: text, image analysis: '' - created_date: 2022-09-14 - dependencies: [JFT] - description: '' - feedback: '' - intended_uses: '' + size: 17B parameters (13B text, 4B images) + dependencies: [mT5, ViT-e, WebLI] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: image - value: image; image - model_card: '' + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: ViT-e organization: Google - prohibited_uses: '' - quality_control: '' - size: 3.9B parameters (dense) - training_emissions: '' - training_hardware: '' - training_time: '' - type: model + description: '' + created_date: 2022-09-14 url: https://arxiv.org/abs/2209.06794 -- access: closed + model_card: '' + modality: image analysis: '' - created_date: 2022-09-14 - datasheet: '' - dependencies: [] - description: '' - excluded: '' - feedback: '' - included: '' - intended_uses: '' + size: 3.9B parameters (dense model) + dependencies: [JFT] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: text, image - value: image, text + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: dataset name: WebLI organization: Google - prohibited_uses: '' - quality_control: '' - sample: [] - size: 10B images, 12B alt-text - type: dataset + description: '' + created_date: 2022-09-14 url: https://arxiv.org/abs/2209.06794 -- access: open + datasheet: '' + modality: text, image + size: 10B images, 12B alt-text + sample: [] analysis: '' - created_date: 2023-02-27 - dependencies: [T5, CLIP, YT-Temporal-1B] - description: '' - feedback: '' + dependencies: [] + included: '' + excluded: '' + quality_control: '' + access: closed + license: + value: unknown + explanation: > + The asset isn't released, and hence the license is unknown. intended_uses: '' - license: Apache 2.0 - modality: - explanation: text, video - value: text, video; text, video - model_card: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: Vid2Seq organization: Google - prohibited_uses: '' - quality_control: '' - size: 500M parameters (dense) + description: '' + created_date: 2023-02-27 + url: https://arxiv.org/abs/2302.14115 + model_card: '' + modality: text, video + analysis: '' + size: 500M parameters (200M text, 300M video) + dependencies: [T5, CLIP, YT-Temporal-1B] training_emissions: '' - training_hardware: '' training_time: '' - type: model - url: https://arxiv.org/abs/2302.14115 -- access: open - analysis: evaluated on DSTC11 Challenge Task, based on MultiWoz 2.1, with a focus - on dialog state tracking. - created_date: 2023-06-08 - dependencies: [CTC blank-filtering, Speech2Text adapter] - description: Joint speech and language model using a Speech2Text adapter and using - a CTC-based blank-filtering. - feedback: '' + training_hardware: '' + quality_control: '' + access: open + license: Apache 2.0 intended_uses: '' - license: Google - modality: - explanation: speech input with textual output - value: audio; text - model_card: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: Google Joint SLM organization: Google - prohibited_uses: '' - quality_control: '' + description: Joint speech and language model using a Speech2Text adapter and using + a CTC-based blank-filtering. + created_date: 2023-06-08 + url: https://arxiv.org/pdf/2306.07944.pdf + model_card: '' + modality: speech input with textual output + analysis: evaluated on DSTC11 Challenge Task, based on MultiWoz 2.1, with a focus + on dialog state tracking. size: '' + dependencies: [CTC blank-filtering, Speech2Text adapter] training_emissions: '' - training_hardware: '' training_time: '' - type: model - url: https://arxiv.org/pdf/2306.07944.pdf -- access: open - analysis: Reports results on standard code benchmarks across a variety of programming - languages. - created_date: 2023-05-10 - dependencies: [palm 2 dataset] + training_hardware: '' + quality_control: '' + access: open + license: Google + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: model + name: PaLM 2 + organization: Google description: PaLM2 is a new state-of-the-art language model that has better multilingual and reasoning capabilities and is more compute-efficient than its predecessor PaLM. PaLM 2 is a Transformer-based model trained using a mixture of objectives similar to UL2. - feedback: Specific queries provided by annotators + created_date: 2023-05-10 + url: https://blog.google/technology/ai/google-palm-2-ai-large-language-model/ + model_card: https://ai.google/static/documents/palm2techreport.pdf + modality: text (input and output) + analysis: Reports results on standard code benchmarks across a variety of programming + languages. + size: 3.6T parameters + dependencies: [palm 2 dataset] + training_emissions: '' + training_time: '' + training_hardware: TPU v4 (number unspecified) + quality_control: Employed de-duplication, removal of sensitive-PII and filtering. + Added control tokens marking toxicity of text. + access: open + license: Google intended_uses: general use large language model that can be used for language, reasoning, and code tasks. - license: Google - modality: - explanation: text (input and output) - value: text; text - model_card: https://ai.google/static/documents/palm2techreport.pdf - monitoring: Google internal monitoring - name: PaLM 2 - organization: Google prohibited_uses: becoming part of a general-purpose service or product or use within specific downstream applications without prior assessment - quality_control: Employed de-duplication, removal of sensitive-PII and filtering. - Added control tokens marking toxicity of text. - size: 3.6T parameters (dense) - training_emissions: '' - training_hardware: TPU v4 (number unspecified) - training_time: '' - type: model - url: https://blog.google/technology/ai/google-palm-2-ai-large-language-model/ + monitoring: Google internal monitoring + feedback: Specific queries provided by annotators diff --git a/assets/h2oai.yaml b/assets/h2oai.yaml index 53654100..601e629d 100644 --- a/assets/h2oai.yaml +++ b/assets/h2oai.yaml @@ -1,24 +1,23 @@ --- -- access: open - analysis: Evaluated on EleutherAI evaluation harness. - created_date: 2023-06-16 - dependencies: [GPT-NeoX, H2O AI OpenAssistant, h2oGPT Repositories] - description: Series of models fine-tuned on well-known LLMs using the h2oGPT repositories. - feedback: https://huggingface.co/h2oai/h2ogpt-oasst1-512-20b/discussions - intended_uses: '' - license: Apache 2.0 - modality: - explanation: text - value: text; text - model_card: https://huggingface.co/h2oai/h2ogpt-oasst1-512-20b - monitoring: '' + +- type: model name: h2oGPT organization: H2O AI - prohibited_uses: '' - quality_control: '' - size: 20B parameters (dense) + description: Series of models fine-tuned on well-known LLMs using the h2oGPT repositories. + created_date: 2023-06-16 + url: https://arxiv.org/pdf/2306.08161.pdf + model_card: https://huggingface.co/h2oai/h2ogpt-oasst1-512-20b + modality: text + analysis: Evaluated on EleutherAI evaluation harness. + size: 20B parameters + dependencies: [GPT-NeoX, H2O AI OpenAssistant, h2oGPT Repositories] training_emissions: unknown - training_hardware: unspecified number of 48GB A100 NVIDIA GPUs training_time: unknown - type: model - url: https://arxiv.org/pdf/2306.08161.pdf + training_hardware: unspecified number of 48GB A100 NVIDIA GPUs + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/h2oai/h2ogpt-oasst1-512-20b/discussions diff --git a/assets/hubspot.yaml b/assets/hubspot.yaml index 91876dd7..90298984 100644 --- a/assets/hubspot.yaml +++ b/assets/hubspot.yaml @@ -1,26 +1,28 @@ --- -- access: limited - adaptation: '' - created_date: 2023-01-31 - dependencies: [ChatGPT API] + +- type: application + name: ChatGPT powered by OBO + organization: HubSpot description: Give your sales, marketing, and customer service teams one of the most powerful AI tools available - ChatGPT priority access, no timeout limits, company wide access managed through a single account, incorporate into your existing processes without leaving HubSpot - failures: '' - feedback: '' - intended_uses: '' + created_date: 2023-01-31 + url: https://ecosystem.hubspot.com/marketplace/apps/sales/sales-enablement/the-obo-group-chatgpt-1398072 + dependencies: [ChatGPT API] + adaptation: '' + output_space: '' + quality_control: '' + access: limited license: - explanation: "We weren't able to locate a license.\n" value: unknown + explanation: > + We weren't able to locate a license. + terms_of_service: '' + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' monthly_active_users: '' - name: ChatGPT powered by OBO - organization: HubSpot - output_space: '' - prohibited_uses: '' - quality_control: '' - terms_of_service: '' - type: application - url: https://ecosystem.hubspot.com/marketplace/apps/sales/sales-enablement/the-obo-group-chatgpt-1398072 user_distribution: '' + failures: '' diff --git a/assets/huggingface.yaml b/assets/huggingface.yaml index 23e07d5d..84710565 100644 --- a/assets/huggingface.yaml +++ b/assets/huggingface.yaml @@ -1,30 +1,29 @@ --- -- access: - explanation: The license is provided in the [[HuggingFace repository]](https://huggingface.co/codeparrot/codeparrot) - value: open - analysis: none + +- type: model + name: CodeParrot + organization: HuggingFace + description: CodeParrot is an autoregressive language model trained on code created_date: - explanation: The date the model was announced value: 2021-12-06 + explanation: The date the model was announced + url: https://twitter.com/lvwerra/status/1467933794699259908 + model_card: none + modality: code (Python) + analysis: none + size: 1B parameters (dense model) dependencies: [] - description: CodeParrot is an autoregressive language model trained on code - feedback: none - intended_uses: none + training_emissions: unknown + training_time: unknown + training_hardware: 16 x A100 (40GB) + quality_control: none + access: + value: open + explanation: The license is provided in the [[HuggingFace repository]](https://huggingface.co/codeparrot/codeparrot) license: - explanation: No license is explicitly provided for this model. value: none - modality: - explanation: code (Python) - value: code; code - model_card: none - monitoring: none - name: CodeParrot - organization: HuggingFace + explanation: No license is explicitly provided for this model. + intended_uses: none prohibited_uses: none - quality_control: none - size: 1B parameters (dense) - training_emissions: unknown - training_hardware: 16 x A100 (40GB) - training_time: unknown - type: model - url: https://twitter.com/lvwerra/status/1467933794699259908 + monitoring: none + feedback: none diff --git a/assets/inflection.yaml b/assets/inflection.yaml index 9283431b..5113a5f1 100644 --- a/assets/inflection.yaml +++ b/assets/inflection.yaml @@ -1,47 +1,49 @@ --- -- access: limited - analysis: Evaluated on wide range of language benchmarks like MMLU 5-shot, GSM-8K, - and HellaSwag 10-shot among others. - created_date: 2023-06-22 - dependencies: [] + +- type: model + name: Inflection-1 + organization: Inflection AI description: Inflection AI's in-house LLM, which powers Pi and will soon be available via Inflection AI's conversational API. - feedback: '' - intended_uses: '' - license: unknown - modality: - explanation: text - value: text; text + created_date: 2023-06-22 + url: https://inflection.ai/inflection-1 model_card: '' - monitoring: '' - name: Inflection-1 - organization: Inflection AI - prohibited_uses: '' - quality_control: '' - size: unknown + modality: text + analysis: Evaluated on wide range of language benchmarks like MMLU 5-shot, GSM-8K, + and HellaSwag 10-shot among others. + size: + value: unknown + explanation: Designed to be roughly around Chat-GPT's size (175B parameters). + dependencies: [] training_emissions: '' - training_hardware: unknown training_time: '' - type: model - url: https://inflection.ai/inflection-1 -- access: limited - adaptation: '' - created_date: 2023-05-02 - dependencies: [Inflection-1] - description: Personal AI chatbot designed to be conversational and specialized - in emotional intelligence. - failures: '' - feedback: '' - intended_uses: to be used as a personal assistant chatbot for everyday activities + training_hardware: unknown + quality_control: '' + access: limited license: unknown + intended_uses: '' + prohibited_uses: '' monitoring: '' - monthly_active_users: '' + feedback: '' + +- type: application name: Pi organization: Inflection AI + description: Personal AI chatbot designed to be conversational and specialized + in emotional intelligence. + created_date: 2023-05-02 + url: https://inflection.ai/press + dependencies: [Inflection-1] + adaptation: '' output_space: natural language text responses - prohibited_uses: '' quality_control: '' + access: limited + license: unknown terms_of_service: '' - type: application - url: https://inflection.ai/press + intended_uses: to be used as a personal assistant chatbot for everyday activities + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' user_distribution: '' + failures: '' diff --git a/assets/instacart.yaml b/assets/instacart.yaml index 4dc27bf8..b3a970ad 100644 --- a/assets/instacart.yaml +++ b/assets/instacart.yaml @@ -1,26 +1,27 @@ --- -- access: limited - adaptation: '' - created_date: 2023-03-01 - dependencies: [ChatGPT API] + +- type: application + name: Ask Instacart + organization: Instacart description: Instacart is augmenting the Instacart app to enable customers to ask about food and get inspirational, shoppable answers. This uses ChatGPT alongside Instacart’s own AI and product data from their 75,000+ retail partner store locations to help customers discover ideas for open-ended shopping goals, such as “How do I make great fish tacos?” or “What’s a healthy lunch for my kids?” Instacart plans to launch “Ask Instacart” later this year. - failures: '' - feedback: '' - intended_uses: '' - license: '' - monitoring: '' - monthly_active_users: '' - name: Ask Instacart - organization: Instacart + created_date: 2023-03-01 + url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis + dependencies: [ChatGPT API] + adaptation: '' output_space: '' - prohibited_uses: '' quality_control: '' + access: limited + license: '' terms_of_service: '' - type: application - url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' user_distribution: '' + failures: '' diff --git a/assets/juni.yaml b/assets/juni.yaml index 9e175922..13d2757a 100644 --- a/assets/juni.yaml +++ b/assets/juni.yaml @@ -1,21 +1,22 @@ --- -- access: limited - adaptation: '' - created_date: unknown - dependencies: [Anthropic API] - description: An online tutoring solution to help students achieve academic success. - failures: '' - feedback: '' - intended_uses: '' - license: unknown - monitoring: '' - monthly_active_users: '' + +- type: application name: Juni Tutor Bot organization: Juni Learning + description: An online tutoring solution to help students achieve academic success. + created_date: unknown + url: https://junilearning.com/ + dependencies: [Anthropic API] + adaptation: '' output_space: '' - prohibited_uses: '' quality_control: '' + access: limited + license: unknown terms_of_service: '' - type: application - url: https://junilearning.com/ + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' user_distribution: '' + failures: '' diff --git a/assets/kakaobrain.yaml b/assets/kakaobrain.yaml index f95f0b13..4c9546a8 100644 --- a/assets/kakaobrain.yaml +++ b/assets/kakaobrain.yaml @@ -1,25 +1,32 @@ --- -- access: - explanation: You can download the dataset from [[COYO Huggingface page]](https://huggingface.co/datasets/kakaobrain/coyo-700m). - For more information on downloading the image dataset, please refer to [[download/README.md]](https://github.com/kakaobrain/coyo-dataset/blob/main/download/README.md) - on Github. - value: open - analysis: - explanation: See [[Experiments]](https://github.com/kakaobrain/coyo-dataset#experiments) - value: "\"We empirically validated the quality of COYO dataset by re-implementing\ - \ popular models such as ALIGN, unCLIP, and ViT. We trained these models on\ - \ COYO-700M or its subsets from scratch, achieving competitive performance\ - \ to the reported numbers or generated samples in the original papers.\"\n" + +- type: dataset + name: COYO-700M + organization: Kakao Brain + description: > + COYO-700M is a large-scale dataset that contains 747M image-text pairs as well + as many other meta-attributes to increase the usability to train various models. created_date: - explanation: As mentioned in the [[Google Scholar page]](https://scholar.google.com/citations?view_op=view_citation&hl=en&user=8WXfStMAAAAJ&citation_for_view=8WXfStMAAAAJ:-f6ydRqryjwC). value: 2022-08-31 + explanation: As mentioned in the [[Google Scholar page]](https://scholar.google.com/citations?view_op=view_citation&hl=en&user=8WXfStMAAAAJ&citation_for_view=8WXfStMAAAAJ:-f6ydRqryjwC). + url: https://github.com/kakaobrain/coyo-dataset datasheet: none + modality: text (English) and image + size: 747M image-text pairs + sample: [] + analysis: + value: > + "We empirically validated the quality of COYO dataset by re-implementing popular + models such as ALIGN, unCLIP, and ViT. We trained these models on COYO-700M + or its subsets from scratch, achieving competitive performance to the reported + numbers or generated samples in the original papers." + explanation: See [[Experiments]](https://github.com/kakaobrain/coyo-dataset#experiments) dependencies: [CommonCrawl] - description: "COYO-700M is a large-scale dataset that contains 747M image-text\ - \ pairs as well as many other meta-attributes to increase the usability to train\ - \ various models.\n" - excluded: + included: + value: Included all image formats that Pillow library can decode. Collected + only English text using cld3. explanation: See [[Data Filtering]](https://github.com/kakaobrain/coyo-dataset#data-filtering) + excluded: value: Removed images less than 5KB image size. Removed images with an aspect ratio greater than 3.0. Removed images with min(width, height) < 200. Removed images with a score of OpenNSFW2 or GantMan/NSFW higher than 0.5. Removed @@ -29,36 +36,34 @@ texts over 1000 in length. Removed texts appearing more than 10 times. Removed texts containing NSFW words. Removed duplicated samples based on (image_phash, text). - feedback: Feedback can be given by emailing at coyo at kakaobrain.com - included: explanation: See [[Data Filtering]](https://github.com/kakaobrain/coyo-dataset#data-filtering) - value: Included all image formats that Pillow library can decode. Collected - only English text using cld3. + quality_control: '' + access: + value: open + explanation: You can download the dataset from [[COYO Huggingface page]](https://huggingface.co/datasets/kakaobrain/coyo-700m). + For more information on downloading the image dataset, please refer to [[download/README.md]](https://github.com/kakaobrain/coyo-dataset/blob/main/download/README.md) + on Github. + license: CC-BY-4.0 intended_uses: + value: > + "It is strongly recommended that this dataset be used only for research, keeping + this in mind when using the dataset, and Kakao Brain does not recommend using + this dataset as it is without special processing to clear inappropriate data + to create commercial products." explanation: See [[Disclaimer & Content Warning]](https://github.com/kakaobrain/coyo-dataset#disclaimer--content-warning) - value: "\"It is strongly recommended that this dataset be used only for research,\ - \ keeping this in mind when using the dataset, and Kakao Brain does not recommend\ - \ using this dataset as it is without special processing to clear inappropriate\ - \ data to create commercial products.\"\n" - license: CC-BY-4.0 - modality: - explanation: text (English) and image - value: image, text - monitoring: none - name: COYO-700M - organization: Kakao Brain prohibited_uses: + value: > + Authors note the following limitations: "Kakao Brain tried to construct a + "Safe" + dataset when building the COYO dataset. However, despite these efforts, this + large-scale dataset was not hand-picked by humans to avoid the risk due to + its + very large size (over 700M). Keep in mind that the unscreened nature of the + dataset means that the collected images can lead to strongly discomforting + and + disturbing content for humans. The COYO dataset may contain some inappropriate + data, and any problems resulting from such data are the full responsibility + of the user who used it." explanation: See [[Disclaimer & Content Warning]](https://github.com/kakaobrain/coyo-dataset#disclaimer--content-warning) - value: "Authors note the following limitations: \"Kakao Brain tried to construct\ - \ a \"Safe\" dataset when building the COYO dataset. However, despite these\ - \ efforts, this large-scale dataset was not hand-picked by humans to avoid\ - \ the risk due to its very large size (over 700M). Keep in mind that the unscreened\ - \ nature of the dataset means that the collected images can lead to strongly\ - \ discomforting and disturbing content for humans. The COYO dataset may contain\ - \ some inappropriate data, and any problems resulting from such data are the\ - \ full responsibility of the user who used it.\"\n" - quality_control: '' - sample: [] - size: 747M image-text pairs - type: dataset - url: https://github.com/kakaobrain/coyo-dataset + monitoring: none + feedback: Feedback can be given by emailing at coyo at kakaobrain.com diff --git a/assets/khan.yaml b/assets/khan.yaml index abdf0636..75050a66 100644 --- a/assets/khan.yaml +++ b/assets/khan.yaml @@ -1,24 +1,26 @@ --- -- access: limited - adaptation: '' - created_date: 2023-03-14 - dependencies: [GPT-4 API] + +- type: application + name: Khanmigo + organization: Khan Academy description: An AI-powered assistant that functions as both a virtual tutor for students and a classroom assistant for teachers. - failures: '' - feedback: '' - intended_uses: '' + created_date: 2023-03-14 + url: https://www.khanacademy.org/khan-labs#khanmigo + dependencies: [GPT-4 API] + adaptation: '' + output_space: '' + quality_control: '' + access: limited license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown + explanation: > + The asset isn't released, and hence the license is unknown. + terms_of_service: '' + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' monthly_active_users: '' - name: Khanmigo - organization: Khan Academy - output_space: '' - prohibited_uses: '' - quality_control: '' - terms_of_service: '' - type: application - url: https://www.khanacademy.org/khan-labs#khanmigo user_distribution: '' + failures: '' diff --git a/assets/laion.yaml b/assets/laion.yaml index 14972cc3..6a62325e 100644 --- a/assets/laion.yaml +++ b/assets/laion.yaml @@ -1,69 +1,79 @@ --- -- access: - explanation: The dataset index is available from [[dataset blog post]](https://laion.ai/blog/laion-400-open-dataset/) - value: open - analysis: '' + +- type: dataset + name: LAION-400M + # General + organization: LAION + description: LAION-400M is a dataset with CLIP-filtered 400 million image-text + pairs, their CLIP embeddings and kNN indices that allow efficient similarity + search. This dataset is entirely openly, freely accessible. created_date: + value: 2021-08-20 explanation: The date the [[blog post]](https://laion.ai/blog/laion-400-open-dataset/) was released. - value: 2021-08-20 + url: https://laion.ai/blog/laion-400-open-dataset/ datasheet: https://laion.ai/blog/laion-400-open-dataset/ + modality: text (English) and image + size: 400M image-text pairs + sample: [] + analysis: '' + # Construction dependencies: [CLIP, CommonCrawl] - description: LAION-400M is a dataset with CLIP-filtered 400 million image-text - pairs, their CLIP embeddings and kNN indices that allow efficient similarity - search. This dataset is entirely openly, freely accessible. + included: '' excluded: + value: > + Authors apply the following filtering conditions on the WAT files downloaded + from Common Crawl: "All samples with less than 5 character alt-text length + or less than 5 KB image size are dropped. Duplicate removal is performed with + bloom filter based on URL and alt-text. We use CLIP to compute embeddings + of the image and alt-text. Then we compute the cosine similarity of both embeddings + and drop all samples with cosine similarity below 0.3. This threshold was + selected based on human inspections. We use the CLIP embeddings of images + and texts to filter out illegal contents." explanation: See [[Dataset and Methods]](https://arxiv.org/pdf/2111.02114.pdf#section.2) - value: "Authors apply the following filtering conditions on the WAT files downloaded\ - \ from Common Crawl: \"All samples with less than 5 character alt-text length\ - \ or less than 5 KB image size are dropped. Duplicate removal is performed\ - \ with bloom filter based on URL and alt-text. We use CLIP to compute embeddings\ - \ of the image and alt-text. Then we compute the cosine similarity of both\ - \ embeddings and drop all samples with cosine similarity below 0.3. This threshold\ - \ was selected based on human inspections. We use the CLIP embeddings of images\ - \ and texts to filter out illegal contents.\"\n" - feedback: '' - included: '' + quality_control: + value: The authors use CLIP embeddings of images and texts to filter out illegal + contents. They also use CLIP to tag image-text pairs as NSFW. They note that + less than 1% of images were detected as NSFW, which can be filtered out by + an user with NSFW tag. + explanation: See [[Dataset and Methods]](https://arxiv.org/pdf/2111.02114.pdf#section.2) + # Downstream + access: + value: open + explanation: The dataset index is available from [[dataset blog post]](https://laion.ai/blog/laion-400-open-dataset/) + license: + value: CC BY 4.0 + explanation: The license is listed on the [[dataset blog post]](https://laion.ai/blog/laion-400-open-dataset/) intended_uses: The authors recommend using the dataset "for research purposes" and warn that "this large-scale dataset is non-curated. It was built for research purposes to enable testing model training on larger scale for broad researcher and other interested communities, and is not meant for any real-world production or application." - license: - explanation: The license is listed on the [[dataset blog post]](https://laion.ai/blog/laion-400-open-dataset/) - value: CC BY 4.0 - modality: - explanation: text (English) and image - value: image, text - monitoring: '' - name: LAION-400M - organization: LAION prohibited_uses: No uses are explicitly prohibited by the license. Users are warned from using LAION-400M for any real-world production or application. - quality_control: - explanation: See [[Dataset and Methods]](https://arxiv.org/pdf/2111.02114.pdf#section.2) - value: The authors use CLIP embeddings of images and texts to filter out illegal - contents. They also use CLIP to tag image-text pairs as NSFW. They note that - less than 1% of images were detected as NSFW, which can be filtered out by - an user with NSFW tag. - sample: [] - size: 400M image-text pairs - type: dataset - url: https://laion.ai/blog/laion-400-open-dataset/ -- access: - explanation: The dataset index is available from [[dataset blog post]](https://laion.ai/blog/laion-5b/) - value: open - analysis: '' + monitoring: '' + feedback: '' + +- type: dataset + name: LAION-5B + # General + organization: LAION + description: LAION is a dataset of 5 billion image-text pairs from the Internet created_date: + value: 2022-12-12 explanation: The date the [[blog post]](https://laion.ai/blog/laion-5b/) was released. Note that the dataset was already compiled earlier, e.g. the Stable Diffusion model released earlier was trained on a subset of LAION-5B - value: 2022-12-12 + url: https://laion.ai/blog/laion-5b/ datasheet: https://laion.ai/blog/laion-5b/ + modality: text and image + size: 5B image-text pairs + sample: [] + analysis: '' + # Construction dependencies: [CLIP, mCLIP, CommonCrawl] - description: LAION is a dataset of 5 billion image-text pairs from the Internet + included: '' excluded: - explanation: See [[dataset blog post]](https://laion.ai/blog/laion-5b/) value: All samples with less than 5 characters alt-text length or less than 5 KB image size are dropped. All images with the too big resolution, potentially DOS bombs, were dropped before attempting to process them. Duplicate removal @@ -76,43 +86,45 @@ were selected based on human inspection of the test results. We use the CLIP embeddings of images and texts to filter out to the possible extent the illegal content. - feedback: '' - included: '' + explanation: See [[dataset blog post]](https://laion.ai/blog/laion-5b/) + quality_control: '' + # Downstream + access: + value: open + explanation: The dataset index is available from [[dataset blog post]](https://laion.ai/blog/laion-5b/) + license: + value: CC BY 4.0 + explanation: The license is listed on the [[dataset blog post]](https://laion.ai/blog/laion-5b/) intended_uses: The authors recommend using the dataset "for research purposes" and "do not recommend using it for creating ready-to-go industrial products, as the basic research about general properties and safety of such large-scale models, which we would like to encourage with this release, is still in progress" - license: - explanation: The license is listed on the [[dataset blog post]](https://laion.ai/blog/laion-5b/) - value: CC BY 4.0 - modality: - explanation: text and image - value: image, text - monitoring: '' - name: LAION-5B - organization: LAION prohibited_uses: No uses are explicitly prohibited by the license. Users are warned from using LAION-5B for non-research purposes. - quality_control: '' - sample: [] - size: 5B image-text pairs - type: dataset - url: https://laion.ai/blog/laion-5b/ -- access: - explanation: The dataset index is available from [[dataset blog post]](https://laion.ai/blog/laion-5b/) - value: open - analysis: '' + monitoring: '' + feedback: '' + +- type: dataset + name: LAION-2B-en + # General + organization: LAION + description: LAION-2B-en is a subset of the LAION-5B dataset and contains 2.3 + billion English image-text pairs. created_date: + value: 2022-12-12 explanation: The date the [[blog post]](https://laion.ai/blog/laion-5b/) was released. Note that the dataset was already compiled earlier, e.g. the Stable Diffusion model released earlier was trained on a subset of LAION-5B - value: 2022-12-12 + url: https://arxiv.org/pdf/2210.08402.pdf datasheet: https://laion.ai/blog/laion-5b/ + modality: text (English) and image + size: 2.32B image-text pairs + sample: [] + analysis: '' + # Construction dependencies: [CLIP, LAION-5B] - description: LAION-2B-en is a subset of the LAION-5B dataset and contains 2.3 - billion English image-text pairs. + included: '' excluded: - explanation: See [[dataset blog post]](https://laion.ai/blog/laion-5b/) value: All samples with less than 5 characters alt-text length or less than 5 KB image size are dropped. All images with the too big resolution, potentially DOS bombs, were dropped before attempting to process them. Duplicate removal @@ -125,50 +137,44 @@ were selected based on human inspection of the test results. We use the CLIP embeddings of images and texts to filter out to the possible extent the illegal content. - feedback: '' - included: '' + explanation: See [[dataset blog post]](https://laion.ai/blog/laion-5b/) + quality_control: '' + # Downstream + access: + value: open + explanation: The dataset index is available from [[dataset blog post]](https://laion.ai/blog/laion-5b/) + license: + value: CC BY 4.0 + explanation: The license is listed on the [[dataset blog post]](https://laion.ai/blog/laion-5b/) intended_uses: The authors recommend using the dataset "for research purposes" and "do not recommend using it for creating ready-to-go industrial products, as the basic research about general properties and safety of such large-scale models, which we would like to encourage with this release, is still in progress" - license: - explanation: The license is listed on the [[dataset blog post]](https://laion.ai/blog/laion-5b/) - value: CC BY 4.0 - modality: - explanation: text (English) and image - value: image, text - monitoring: '' - name: LAION-2B-en - organization: LAION prohibited_uses: No uses are explicitly prohibited by the license. Users are warned from using LAION-2B-en for non-research purposes. - quality_control: '' - sample: [] - size: 2.32B image-text pairs - type: dataset - url: https://arxiv.org/pdf/2210.08402.pdf -- access: open - analysis: Evaluated on COCO captioning and VQAv2 vision-language tasks. - created_date: 2023-03-28 - dependencies: [LLaMA, CLIP] + monitoring: '' + feedback: '' + +- type: model + name: OpenFlamingo + organization: LAION description: An open-source reproduction of DeepMind's Flamingo model. At its core, OpenFlamingo is a framework that enables training and evaluation of large multimodal models (LMMs). - feedback: '' - intended_uses: academic research purposes - license: MIT - modality: - explanation: image and text input with textual output - value: image, text; text + created_date: 2023-03-28 + url: https://laion.ai/blog/open-flamingo/ model_card: https://github.com/mlfoundations/open_flamingo/blob/main/MODEL_CARD.md - monitoring: '' - name: OpenFlamingo - organization: LAION - prohibited_uses: commercial use - quality_control: '' - size: 9B parameters (dense) + modality: image and text input with textual output + analysis: Evaluated on COCO captioning and VQAv2 vision-language tasks. + size: 9B parameters + dependencies: [LLaMA, CLIP] training_emissions: '' - training_hardware: '' training_time: '' - type: model - url: https://laion.ai/blog/open-flamingo/ + training_hardware: '' + quality_control: '' + access: open + license: MIT + intended_uses: academic research purposes + prohibited_uses: commercial use + monitoring: '' + feedback: '' diff --git a/assets/latitude.yaml b/assets/latitude.yaml index 2e5bc564..f6d5e39c 100644 --- a/assets/latitude.yaml +++ b/assets/latitude.yaml @@ -1,33 +1,43 @@ --- -- access: - explanation: "The game is available to public upon sign up.\n" - value: limited - adaptation: '' + +- type: application + name: AI Dungeon + # General + organization: Latitude + description: > + AI Dungeon is a single-player text adventure game that uses AI to generate + content. created_date: - explanation: "Release date of the game on iOS and Android platforms [[Wikipedia]](https://en.wikipedia.org/wiki/AI_Dungeon).\n" value: 2019-12-17 + explanation: > + Release date of the game on iOS and Android platforms + [[Wikipedia]](https://en.wikipedia.org/wiki/AI_Dungeon). + url: https://play.aidungeon.io + # Construction dependencies: [OpenAI API] - description: "AI Dungeon is a single-player text adventure game that uses AI to\ - \ generate content.\n" - failures: '' - feedback: '' - intended_uses: '' + adaptation: '' + output_space: '' + quality_control: '' + # Downstream + access: + value: limited + explanation: > + The game is available to public upon sign up. + terms_of_service: https://play.aidungeon.io/main/termsOfService license: - explanation: "\"Subject to your compliance with these Terms, the Company hereby\ - \ grants to you, a personal, worldwide, royalty-free, non-assignable, non-sublicensable,\ - \ non-transferrable, and non-exclusive license to use the software provided\ - \ to you as part of the Services (and to download a single copy of the App\ - \ onto the equipment or device specified by us)\" - excerpt from the Terms\ - \ of Service document.\n" value: custom + explanation: > + "Subject to your compliance with these Terms, the Company hereby grants + to you, a personal, worldwide, royalty-free, non-assignable, + non-sublicensable, non-transferrable, and non-exclusive license to use + the software provided to you as part of the Services (and to download a + single copy of the App onto the equipment or device specified by us)" - + excerpt from the Terms of Service document. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + # Deployment monthly_active_users: '' - name: AI Dungeon - organization: Latitude - output_space: '' - prohibited_uses: '' - quality_control: '' - terms_of_service: https://play.aidungeon.io/main/termsOfService - type: application - url: https://play.aidungeon.io user_distribution: '' + failures: '' diff --git a/assets/lehigh.yaml b/assets/lehigh.yaml index c153f345..9e8df418 100644 --- a/assets/lehigh.yaml +++ b/assets/lehigh.yaml @@ -1,31 +1,30 @@ --- -- access: open + +- type: model + name: BiomedGPT + organization: Lehigh University + description: BiomedGPT leverages self-supervision on large and diverse datasets + to accept multi-modal inputs and perform a range of downstream tasks. + created_date: 2023-05-26 + url: https://arxiv.org/pdf/2305.17100.pdf + model_card: '' + modality: image and text input, text output analysis: outperforms majority of preceding state-of-the-art models over 15 unique biomedical modalities. - created_date: 2023-05-26 + size: 472M parameters dependencies: - GPT-style autoregressive decoder - BiomedGPT biomedical datasets - description: BiomedGPT leverages self-supervision on large and diverse datasets - to accept multi-modal inputs and perform a range of downstream tasks. - feedback: '' - intended_uses: furthering research in developing unified and generalist models - for biomedicine. - license: Apache 2.0 - modality: - explanation: image and text input, text output - value: image, text; text - model_card: '' - monitoring: '' - name: BiomedGPT - organization: Lehigh University - prohibited_uses: '' + training_emissions: unknown + training_time: unknown + training_hardware: 10 NVIDIA A5000 GPUs quality_control: No specific quality control is mentioned in model training, though details on data processing and how the model was trained are provided in the paper. - size: 472M parameters (dense) - training_emissions: unknown - training_hardware: 10 NVIDIA A5000 GPUs - training_time: unknown - type: model - url: https://arxiv.org/pdf/2305.17100.pdf + access: open + license: Apache 2.0 + intended_uses: furthering research in developing unified and generalist models + for biomedicine. + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/linkedin.yaml b/assets/linkedin.yaml index a92551e9..0d3d2ab0 100644 --- a/assets/linkedin.yaml +++ b/assets/linkedin.yaml @@ -1,26 +1,27 @@ --- -- access: open - adaptation: '' - created_date: unknown - dependencies: [Azure Cognitive Services for Vision] + +- type: application + name: LinkedIn + organization: LinkedIn description: More than 40 percent of LinkedIn's feed posts include at least one image. We want every member to have equal access to opportunity and are committed to ensuring that we make images accessible to our members who are blind or who have low vision so they can be a part of the online conversation. With Azure Cognitive Service for Vision, we can provide auto-captioning to edit and support alt. text descriptions. - failures: '' - feedback: '' - intended_uses: '' - license: unknown - monitoring: '' - monthly_active_users: '' - name: LinkedIn - organization: LinkedIn + created_date: unknown + url: https://www.linkedin.com/ + dependencies: [Azure Cognitive Services for Vision] + adaptation: '' output_space: '' - prohibited_uses: '' quality_control: '' + access: open + license: unknown terms_of_service: '' - type: application - url: https://www.linkedin.com/ + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' user_distribution: '' + failures: '' diff --git a/assets/lmsys.yaml b/assets/lmsys.yaml index 3058f7a0..6f07781e 100644 --- a/assets/lmsys.yaml +++ b/assets/lmsys.yaml @@ -1,25 +1,24 @@ --- -- access: open - analysis: Evaluated against similar LLMs using GPT-4 as a judge. - created_date: 2023-03-30 - dependencies: [LLaMA, ShareGPT conversations data] + +- type: model + name: Vicuna + organization: LMSYS description: An open-source chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT. - feedback: https://huggingface.co/datasets/bigcode/the-stack/discussions - intended_uses: research on LLMs and chatbots - license: Apache 2.0 - modality: - explanation: natural language text - value: text; text + created_date: 2023-03-30 + url: https://lmsys.org/blog/2023-03-30-vicuna/ model_card: https://huggingface.co/lmsys/vicuna-13b-delta-v0 - monitoring: '' - name: Vicuna - organization: LMSYS - prohibited_uses: '' - quality_control: '' - size: 13B parameters (dense) + modality: natural language text + analysis: Evaluated against similar LLMs using GPT-4 as a judge. + size: 13B parameters + dependencies: [LLaMA, ShareGPT conversations data] training_emissions: '' - training_hardware: 8 A100 GPUs training_time: 1 day - type: model - url: https://lmsys.org/blog/2023-03-30-vicuna/ + training_hardware: 8 A100 GPUs + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: research on LLMs and chatbots + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/datasets/bigcode/the-stack/discussions diff --git a/assets/meta.yaml b/assets/meta.yaml index ad4d99eb..c44d7474 100644 --- a/assets/meta.yaml +++ b/assets/meta.yaml @@ -1,40 +1,51 @@ --- -- access: - explanation: Models are available for download from [[GitHub repository]](https://github.com/facebookresearch/esm#available-models) - value: open - analysis: '' + +- type: model + name: ESM-2 + organization: Meta + description: ESM-2 is a series of protein language models trained on protein sequences created_date: + value: 2022-10-31 explanation: The date the [[model paper]](https://www.biorxiv.org/content/10.1101/2022.07.20.500902v2.full.pdf+html) was released - value: 2022-10-31 + url: https://www.biorxiv.org/content/10.1101/2022.07.20.500902v2.full.pdf+html + model_card: none + modality: protein sequence + size: 15B parameters (dense model) + analysis: '' dependencies: [UniRef50, UniRef90] - description: ESM-2 is a series of protein language models trained on protein sequences - feedback: '' - intended_uses: '' + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: + value: open + explanation: Models are available for download from [[GitHub repository]](https://github.com/facebookresearch/esm#available-models) license: - explanation: "The license is provided in the [[Github repository]](https://github.com/facebookresearch/esm#available-models)\n" value: MIT - modality: - explanation: protein sequence - value: text; text - model_card: none + explanation: > + The license is provided in the [[Github repository]](https://github.com/facebookresearch/esm#available-models) + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: ESM-2 + feedback: '' + +- type: dataset + name: PMD + # General organization: Meta - prohibited_uses: '' - quality_control: '' - size: 15B parameters (dense) - training_emissions: '' - training_hardware: '' - training_time: '' - type: model - url: https://www.biorxiv.org/content/10.1101/2022.07.20.500902v2.full.pdf+html -- access: closed - analysis: none + description: PMD (Public Multimodal Datasets) is a collection of image-text datasets + introduced in the FLAVA work. created_date: - explanation: The date the model paper was released value: 2021-12-08 + explanation: The date the model paper was released + url: https://arxiv.org/abs/2112.04482 datasheet: none + modality: text, image + size: 70M + sample: [] + analysis: none + # Construction dependencies: - COCO - YFCC100M @@ -44,286 +55,329 @@ - Wikipedia - Conceptual Captions - Red Caps - description: PMD (Public Multimodal Datasets) is a collection of image-text datasets - introduced in the FLAVA work. + license: + value: unknown + explanation: > + The asset isn't released, and hence the license is unknown. + included: none excluded: YFCC100M is filtered for non-English captions and very short (< 2 word) captions. - feedback: none - included: none + quality_control: Beyond filtering mentioned in excluded, nothing further is done. + # Downstream + access: closed intended_uses: unknown - license: - explanation: "The asset isn't released, and hence the license is unknown.\n" - value: unknown - modality: - explanation: text, image - value: image, text + prohibited_uses: unknown monitoring: none - name: PMD + feedback: none + +- type: model + name: FLAVA organization: Meta - prohibited_uses: unknown - quality_control: Beyond filtering mentioned in excluded, nothing further is done. - sample: [] - size: 70M - type: dataset + description: FLAVA is a multimodal model composed of an image encoder, text encoder, + and multimodal encoder. + created_date: + value: 2021-12-08 + explanation: The date the model paper was released url: https://arxiv.org/abs/2112.04482 -- access: - explanation: "Model checkpoints are available for download from the [[HuggingFace\ - \ repository]](https://huggingface.co/facebook/flava-full)\n" - value: open + model_card: https://huggingface.co/facebook/flava-full + modality: Text (English) and Image + size: + value: 306M + explanation: > + 110M (Language encoder) + 86M (Vision encoder) + 110M (mul encoder) analysis: FLAVA is benchmarked on a range of vision-only (e.g. CIFAR-10), language-only (e.g. GLUE), and multimodal (e.g. Hateful Memes) standard evaluations. - created_date: - explanation: The date the model paper was released - value: 2021-12-08 dependencies: [PMD] - description: FLAVA is a multimodal model composed of an image encoder, text encoder, - and multimodal encoder. - feedback: https://huggingface.co/facebook/flava-full/discussions - intended_uses: "Per the [[HuggingFace repository]](https://huggingface.co/facebook/flava-full),\ - \ \"The model is intended to serve as a reproducible research artifact for research\ - \ communities in the light of models whose exact reproduction details are never\ - \ released such as CLIP and SimVLM.\"\n" + training_emissions: unknown + training_time: unknown + training_hardware: unknown + quality_control: FLAVA introduces a variety of new modeling techniques, specifically + with an interest in improved text-image alignment through contrastive objectives. + access: + value: open + explanation: > + Model checkpoints are available for download from the [[HuggingFace + repository]](https://huggingface.co/facebook/flava-full) license: - explanation: "The license is provided in the [[HuggingFace repository]](https://huggingface.co/facebook/flava-full)\n" value: BSD-3-Clause - modality: - explanation: Text (English) and Image - value: image, text; image, text - model_card: https://huggingface.co/facebook/flava-full + explanation: > + The license is provided in the [[HuggingFace repository]](https://huggingface.co/facebook/flava-full) + intended_uses: > + Per the [[HuggingFace repository]](https://huggingface.co/facebook/flava-full), + "The model is intended to serve as a reproducible research artifact for research + communities in the light of models whose exact reproduction details are never + released such as CLIP and SimVLM." + prohibited_uses: > + Per the [[HuggingFace repository]](https://huggingface.co/facebook/flava-full), + "Any deployed use case of the model - whether commercial or not" - is currently + out of scope. monitoring: none - name: FLAVA + feedback: https://huggingface.co/facebook/flava-full/discussions + +- type: dataset + name: The Galactica Corpus + # General organization: Meta - prohibited_uses: "Per the [[HuggingFace repository]](https://huggingface.co/facebook/flava-full),\ - \ \"Any deployed use case of the model - whether commercial or not\" - is currently\ - \ out of scope.\n" - quality_control: FLAVA introduces a variety of new modeling techniques, specifically - with an interest in improved text-image alignment through contrastive objectives. - size: 306M parameters (dense) - training_emissions: unknown - training_hardware: unknown - training_time: unknown - type: model - url: https://arxiv.org/abs/2112.04482 -- access: closed - analysis: none + description: The Galactica Corpus is a collection of scientific datasets introduced + in the Galactica work. created_date: - explanation: "The date the Galactica paper was released\n" value: 2022-11-15 + explanation: > + The date the Galactica paper was released + url: https://galactica.org/static/paper.pdf datasheet: none + modality: text + size: 106B tokens + sample: [] + analysis: none + # Construction dependencies: [CommonCrawl, Wikipedia, arXiv] - description: The Galactica Corpus is a collection of scientific datasets introduced - in the Galactica work. - excluded: '' - feedback: none + license: + value: unknown + explanation: > + The asset isn't released, and hence the license is unknown. included: Prompts and reasoning data is explicitly included to improve model capabilities derived from this data. + excluded: '' + quality_control: '' + # Downstream + access: closed intended_uses: unknown - license: - explanation: "The asset isn't released, and hence the license is unknown.\n" - value: unknown - modality: text + prohibited_uses: unknown monitoring: none - name: The Galactica Corpus + feedback: none + +- type: model + name: Galactica organization: Meta - prohibited_uses: unknown - quality_control: '' - sample: [] - size: 106B tokens - type: dataset - url: https://galactica.org/static/paper.pdf -- access: - explanation: Model checkpoints freely available at https://github.com/paperswithcode/galai - value: open - analysis: '' + description: Galactica is a family of autoregressive language models. created_date: - explanation: "The date the Galactica paper was released\n" value: 2022-11-15 - dependencies: [The Galactica Corpus] - description: Galactica is a family of autoregressive language models. - feedback: '' - intended_uses: '' - license: - explanation: https://github.com/paperswithcode/galai/blob/main/LICENSE-MODEL.md - value: CC BY-NC 4.0 - modality: - explanation: Text (English), Code, Math, Chemistry, Biology - value: code, text; code, text + explanation: > + The date the Galactica paper was released + url: https://galactica.org/static/paper.pdf model_card: https://huggingface.co/facebook/galactica-6.7b - monitoring: '' - name: Galactica - organization: Meta - prohibited_uses: '' - quality_control: '' - size: 120B parameters (dense) + modality: Text (English), Code, Math, Chemistry, Biology + analysis: '' + size: 120B parameters (dense model) + dependencies: [The Galactica Corpus] training_emissions: unknown + training_time: unknown training_hardware: Meta AI Cluster. Trained on 1024 80GB A100 GPUs (128 8xA100 80GB nodes) - training_time: unknown - type: model - url: https://galactica.org/static/paper.pdf -- access: - explanation: Model weights are available via the [[HuggingFace repository]](https://huggingface.co/facebook/incoder-6B) + quality_control: '' + access: value: open - analysis: none - created_date: - explanation: The date the model paper was released - value: 2022-04-12 - dependencies: [] - description: InCoder is a language model trained on code with a causal masking - objective - feedback: '' - intended_uses: '' + explanation: Model checkpoints freely available at https://github.com/paperswithcode/galai license: - explanation: The license is provided in the [[HuggingFace repository]](https://huggingface.co/facebook/incoder-6B?text=My+name+is+Lewis+and+I+like+to) value: CC BY-NC 4.0 - modality: - explanation: text (English) and code - value: code, text; code, text - model_card: none + explanation: https://github.com/paperswithcode/galai/blob/main/LICENSE-MODEL.md + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: InCoder organization: Meta, CMU, TTI-Chicago, UC Berkeley, University of Washington - prohibited_uses: '' - quality_control: unknown - size: 6B parameters (dense) + description: InCoder is a language model trained on code with a causal masking + objective + created_date: + value: 2022-04-12 + explanation: The date the model paper was released + url: https://arxiv.org/abs/2204.05999 + model_card: none + modality: text (English) and code + analysis: none + size: 6B parameters (dense model) + dependencies: [] training_emissions: Unknown - training_hardware: 248 V100 GPUs, according to [[the paper]](https://arxiv.org/pdf/2204.05999.pdf) training_time: 24 days, according to [[the paper]](https://arxiv.org/pdf/2204.05999.pdf) - type: model - url: https://arxiv.org/abs/2204.05999 -- access: - explanation: The 175B model requires manual approval from Meta to access. Other - models are available through HuggingFace. - value: limited - analysis: '' - created_date: - explanation: "The date the OPT paper was submitted to Arxiv\n" - value: 2022-05-01 - dependencies: [RoBERTa dataset, The Pile, PushShift.io Reddit] - description: OPT is a family of autoregressive language models. - feedback: '' - intended_uses: '' + training_hardware: 248 V100 GPUs, according to [[the paper]](https://arxiv.org/pdf/2204.05999.pdf) + quality_control: unknown + access: + value: open + explanation: Model weights are available via the [[HuggingFace repository]](https://huggingface.co/facebook/incoder-6B) license: - explanation: "All released with the [[OPT-175B License]](https://github.com/facebookresearch/metaseq/blob/main/projects/OPT/MODEL_LICENSE.md),\ - \ except 66B (TBD) and 17B (requires manual approval)\n" - value: OPT-175B License - modality: - explanation: text (English) - value: text; text - model_card: https://arxiv.org/pdf/2205.01068.pdf + value: CC BY-NC 4.0 + explanation: The license is provided in the [[HuggingFace repository]](https://huggingface.co/facebook/incoder-6B?text=My+name+is+Lewis+and+I+like+to) + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: OPT organization: Meta - prohibited_uses: '' - quality_control: '' - size: 175B parameters (dense) + description: OPT is a family of autoregressive language models. + created_date: + value: 2022-05-01 + explanation: > + The date the OPT paper was submitted to Arxiv + url: https://arxiv.org/abs/2205.01068 + model_card: https://arxiv.org/pdf/2205.01068.pdf + modality: text (English) + analysis: '' + size: 175B parameters (dense model) + dependencies: [RoBERTa dataset, The Pile, PushShift.io Reddit] training_emissions: - explanation: "Estimate by authors for the OPT-175B model only. Not including\ - \ ablations and baselines.\n" value: 75 tCO2e - training_hardware: Meta AI cluster. Trained on 992 80GB A100 GPUs + explanation: > + Estimate by authors for the OPT-175B model only. Not including ablations and + baselines. training_time: '' - type: model - url: https://arxiv.org/abs/2205.01068 -- access: - explanation: "The datasets involved are public, but the full dataset is not\ - \ directly available, nor are filtering scripts.\n" + training_hardware: Meta AI cluster. Trained on 992 80GB A100 GPUs + quality_control: '' + access: value: limited - analysis: '' + explanation: The 175B model requires manual approval from Meta to access. Other + models are available through HuggingFace. + license: + value: OPT-175B License + explanation: > + All released with the + [[OPT-175B License]](https://github.com/facebookresearch/metaseq/blob/main/projects/OPT/MODEL_LICENSE.md), + except 66B (TBD) and 17B (requires manual approval) + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: dataset + name: Make-A-Video dataset + # General + organization: Meta + description: > + The Make-A-Video dataset is the dataset used to train Make-A-Video, which includes + both image-text and video-only datasets with specific and significant filtering. created_date: - explanation: "The date that Make-A-Video was posted to arXiv [[arXiv]] (https://arxiv.org/abs/2209.14792).\n" value: 2022-09-29 + explanation: > + The date that Make-A-Video was posted to arXiv + [[arXiv]] + (https://arxiv.org/abs/2209.14792). + url: https://arxiv.org/pdf/2209.14792.pdf datasheet: none + modality: video, image-text + size: 20M video clips, 2.3B image-text pairs + sample: [] + analysis: '' + # Construction dependencies: [LAION-5B, WebVid-10M, HD-VILA-100M] - description: "The Make-A-Video dataset is the dataset used to train Make-A-Video,\ - \ which includes both image-text and video-only datasets with specific and significant\ - \ filtering.\n" - excluded: "The LAION-5B dataset is filtered to 2.3B by removing NSFW images using\ - \ [https://github.com/GantMan/nsfw](https://github.com/GantMan/nsfw), toxic\ - \ words in text, and images with watermark probability > 0.5. The HD-VILA-100M\ - \ is randomly subsampled to 10M video clips.\n" - feedback: - explanation: "No feedback mechanism is mentioned by the authors.\n" + license: value: none + explanation: > + No license was found, though the underlying datasets are public and have + licenses. included: - explanation: "Data from the three underlying datasets is filtered, but nothing\ - \ is included beyond this.\n" value: none + explanation: > + Data from the three underlying datasets is filtered, but nothing is included + beyond this. + excluded: > + The LAION-5B dataset is filtered to 2.3B by removing NSFW images using [https://github.com/GantMan/nsfw](https://github.com/GantMan/nsfw), + toxic words in text, and images with watermark probability > 0.5. The HD-VILA-100M + is randomly subsampled to 10M video clips. + quality_control: > + The authors exclude NSFW, toxic, and likely watermarked data from LAION-5B. + # Downstream + access: + value: limited + explanation: > + The datasets involved are public, but the full dataset is not directly available, + nor are filtering scripts. intended_uses: unknown - license: - explanation: "No license was found, though the underlying datasets are public\ - \ and have licenses.\n" - value: none - modality: - explanation: video, image-text - value: image, text, video + prohibited_uses: unknown monitoring: - explanation: "There is no information on how Meta is internally monitoring the\ - \ use of the dataset.\n" value: unknown - name: Make-A-Video dataset + explanation: > + There is no information on how Meta is internally monitoring + the use of the dataset. + feedback: + value: none + explanation: > + No feedback mechanism is mentioned by the authors. + +- type: model + name: Make-A-Video + # General organization: Meta - prohibited_uses: unknown - quality_control: "The authors exclude NSFW, toxic, and likely watermarked data\ - \ from LAION-5B.\n" - sample: [] - size: 20M video clips, 2.3B image-text pairs - type: dataset - url: https://arxiv.org/pdf/2209.14792.pdf -- access: - explanation: "The model has not been released; a form existed to potentially\ - \ acquire access but is now closed as of 2022-12-07 [[Access Form]](https://docs.google.com/forms/u/0/d/e/1FAIpQLSfMjC57wcXWUDV0UbS2Tn6VhjLEiCXaHvWZuWgWRa-Zx8-Few/closedform).\n" - value: closed - analysis: "Model performance was evaluated using automated (Frechet Video Distance;\ - \ Frechet Inception Distance) and human evaluation on two datasets (UCF-101,\ - \ MSR-VTT) in the zero-shot setting.\n" + description: > + Make-A-Video is a model for Text-to-Video Generation without Text-Video Data. created_date: - explanation: "The date that Make-A-Video was posted to arXiv [[arXiv]] (https://arxiv.org/abs/2209.14792).\n" value: 2022-09-29 + explanation: > + The date that Make-A-Video was posted to arXiv + [[arXiv]] + (https://arxiv.org/abs/2209.14792). + url: https://arxiv.org/pdf/2209.14792.pdf + model_card: none + modality: text, video + size: + value: unknown + explanation: > + The authors do not state the model size in the paper. + analysis: > + Model performance was evaluated using automated (Frechet Video Distance; Frechet + Inception Distance) and human evaluation on two datasets (UCF-101, MSR-VTT) + in the zero-shot setting. + # Construction dependencies: [Make-A-Video dataset] - description: "Make-A-Video is a model for Text-to-Video Generation without Text-Video\ - \ Data.\n" - feedback: - explanation: "Authors do not mention or provide a feedback mechanism.\n" - value: none - intended_uses: - explanation: "Authors do not report the intended uses.\n" + training_emissions: value: unknown - license: - explanation: No license was found. - value: none - modality: - explanation: text, video - value: text, video; text, video - model_card: none - monitoring: - explanation: "Authors do not report the monitoring process for Make-A-Video\ - \ internally at Meta.\n" + explanation: > + Authors do not report the training emissions. + training_time: value: unknown - name: Make-A-Video - organization: Meta - prohibited_uses: - explanation: "Authors do not report the prohibited uses.\n" + explanation: > + Authors do not report the training time. + training_hardware: value: unknown + explanation: > + Authors do not report the training hardware or provider. quality_control: - explanation: "Authors do not report specific quality control steps taken in\ - \ modeling, though filtering is done in producing the Make-A-Video dataset.\n" value: none - size: unknown - training_emissions: - explanation: "Authors do not report the training emissions.\n" + explanation: > + Authors do not report specific quality control steps taken in modeling, though + filtering is done in producing the Make-A-Video dataset. + # Downstream + access: + value: closed + explanation: > + The model has not been released; a form existed to potentially acquire access + but is now closed as of 2022-12-07 + [[Access Form]](https://docs.google.com/forms/u/0/d/e/1FAIpQLSfMjC57wcXWUDV0UbS2Tn6VhjLEiCXaHvWZuWgWRa-Zx8-Few/closedform). + license: + value: none + explanation: No license was found. + intended_uses: value: unknown - training_hardware: - explanation: "Authors do not report the training hardware or provider.\n" + explanation: > + Authors do not report the intended uses. + prohibited_uses: value: unknown - training_time: - explanation: "Authors do not report the training time.\n" + explanation: > + Authors do not report the prohibited uses. + monitoring: value: unknown - type: model - url: https://arxiv.org/pdf/2209.14792.pdf -- access: open - analysis: '' + explanation: > + Authors do not report the monitoring process for Make-A-Video internally at + Meta. + feedback: + value: none + explanation: > + Authors do not mention or provide a feedback mechanism. + +- type: model + name: LLaMA + organization: Meta + description: '' created_date: 2023-02-24 + url: https://arxiv.org/abs/2302.13971 + model_card: '' + modality: Text + analysis: '' + size: 65B parameters (dense model) dependencies: - CommonCrawl - C4 @@ -332,311 +386,306 @@ - BooksCorpus - arXiv - StackExchange - description: '' - feedback: '' - intended_uses: '' + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: open license: LLaMa License (model weights), GPLv3 (code) - modality: - explanation: Text - value: text; text - model_card: '' + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: LLaMA + feedback: '' + +- type: model + name: LLaMA 2 organization: Meta - prohibited_uses: '' - quality_control: '' - size: 65B parameters (dense) - training_emissions: '' - training_hardware: '' - training_time: '' - type: model - url: https://arxiv.org/abs/2302.13971 -- access: open - analysis: Evaluated on standard academic benchmarks and internal Meta libraries. - created_date: 2023-07-18 - dependencies: [] description: LLaMA 2 is an updated version of LLaMA trained on a new mix of publicly available data. - feedback: '' + created_date: 2023-07-18 + url: https://ai.meta.com/resources/models-and-libraries/llama/ + model_card: Can be found at appendix of paper at https://ai.meta.com/research/publications/llama-2-open-foundation-and-fine-tuned-chat-models/ + modality: text + analysis: Evaluated on standard academic benchmarks and internal Meta libraries. + size: 70B parameters (dense model) + dependencies: [] + training_emissions: 539 tCO2eq + training_time: '' + training_hardware: NVIDIA A100-80GB GPUs (TDP of 350-400W) + quality_control: '' + access: open + license: + value: custom + explanation: The license can be found at https://ai.meta.com/resources/models-and-libraries/llama-downloads/ intended_uses: LLaMA 2 is intended for commercial and research use in English. Tuned models are intended for assistant-like chat, whereas pretrained models can be adapted for a variety of natural language generation tasks. - license: - explanation: The license can be found at https://ai.meta.com/resources/models-and-libraries/llama-downloads/ - value: custom - modality: - explanation: text - value: text; text - model_card: Can be found at appendix of paper at https://ai.meta.com/research/publications/llama-2-open-foundation-and-fine-tuned-chat-models/ - monitoring: '' - name: LLaMA 2 - organization: Meta prohibited_uses: Use in any manner that violates applicable laws or regulations (including trade compliance laws). Use in languages other than English. Use in any other way that is prohibited by the Acceptable Use Policy and Licensing Agreement for LLaMA 2. - quality_control: '' - size: 70B parameters (dense) - training_emissions: 539 tCO2eq - training_hardware: NVIDIA A100-80GB GPUs (TDP of 350-400W) - training_time: '' - type: model - url: https://ai.meta.com/resources/models-and-libraries/llama/ -- access: open - analysis: '' - created_date: 2022-12-22 - dependencies: [OPT, OPT-IML Bench] - description: '' - feedback: '' - intended_uses: '' - license: OPT-IML 175B License - modality: - explanation: text - value: text; text - model_card: '' monitoring: '' + feedback: '' + +- type: model name: OPT-IML organization: Meta - prohibited_uses: '' - quality_control: '' + description: '' + created_date: 2022-12-22 + url: https://arxiv.org/abs/2212.12017 + model_card: '' + modality: text + analysis: '' size: 175B parameters (dense) + dependencies: [OPT, OPT-IML Bench] training_emissions: '' - training_hardware: '' training_time: '' - type: model - url: https://arxiv.org/abs/2212.12017 -- access: - explanation: "The full dataset can be downloaded at [[SA-1B Download]](https://ai.facebook.com/datasets/segment-anything-downloads/).\ - \ A 50k image preview of the full dataset is available [[here]](https://segment-anything.com/dataset/index.html).\n" - value: open - analysis: '' + training_hardware: '' + quality_control: '' + access: open + license: OPT-IML 175B License + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: dataset + name: SA-1B + # General + organization: Meta + description: > + SA-1B (Segment Anything 1 Billion) is a dataset designed for training general-purpose + object segmentation models from open world images. It consists of 11M diverse, + high-resolution, privacy protecting images and 1.1B high-quality segmentation + masks. created_date: + value: 2023-04-05 explanation: The date the [[Meta blog post]](https://ai.facebook.com/blog/segment-anything-foundation-model-image-segmentation/) was released. - value: 2023-04-05 + url: https://ai.facebook.com/datasets/segment-anything/ datasheet: + value: https://arxiv.org/pdf/2304.02643.pdf#page=25 explanation: Datasheet can be found in the Appendix section of the Segment Anything paper. - value: https://arxiv.org/pdf/2304.02643.pdf#page=25 + modality: image + size: 11M images, 1.1B mask annotations + sample: [] + analysis: '' + # Construction dependencies: [] - description: "SA-1B (Segment Anything 1 Billion) is a dataset designed for training\ - \ general-purpose object segmentation models from open world images. It consists\ - \ of 11M diverse, high-resolution, privacy protecting images and 1.1B high-quality\ - \ segmentation masks.\n" - excluded: - explanation: See [[Datasheet]](https://arxiv.org/pdf/2304.02643.pdf#page=25) - value: "\"We withheld ~2k randomly selected images for testing purposes.\" \ - \ \"Each image is accompanied by a short caption that describes the content\ - \ and place of the photo in a free form text. Per our agreement with the photo\ - \ provider we are not allowed to release these captions.\"\n" - feedback: Feedback can be given via the feedback form on their website [segment-anything.com](https://segment-anything.com/) - or by emailing at segment-anything at meta.com. + license: + value: SA-1B Dataset Research License + explanation: SA-1B is released under a favorable license agreement for certain + research uses and with protections for researchers. See [[SA-1B Dataset Research + License]](https://ai.facebook.com/datasets/segment-anything-downloads/). included: + value: > + SA-1B consists of 11M diverse, high-resolution (averaging 1500×2250 pixels), + and privacy protecting images collected and licensed from a third party photo + company. The images are photos taken from a camera, i.e. not artwork. + The images vary in subject matter. Common themes of the images include: locations, + objects, scenes. + The dataset includes 1.1B high-quality segmentation masks collected with the + Segment Anything Data Engine. + SA-1B only includes automatically generated masks (99.1%), as the authors + conclude after experiments that the automatic masks are high quality and effective + for training models. The masks range from large scale objects such as buildings + to fine grained details such as door handles. + Masks are provided in the COCO run-length encoding (RLE) annotation format. explanation: According to section [[Segment Anything Dataset]](https://arxiv.org/pdf/2304.02643.pdf#section.5) of the paper and [[SA-1B website]](https://ai.facebook.com/datasets/segment-anything/). - value: "SA-1B consists of 11M diverse, high-resolution (averaging 1500×2250\ - \ pixels), and privacy protecting images collected and licensed from a third\ - \ party photo company. The images are photos taken from a camera, i.e. not\ - \ artwork. The images vary in subject matter. Common themes of the images\ - \ include: locations, objects, scenes. The dataset includes 1.1B high-quality\ - \ segmentation masks collected with the Segment Anything Data Engine. SA-1B\ - \ only includes automatically generated masks (99.1%), as the authors conclude\ - \ after experiments that the automatic masks are high quality and effective\ - \ for training models. The masks range from large scale objects such as buildings\ - \ to fine grained details such as door handles. Masks are provided in the\ - \ COCO run-length encoding (RLE) annotation format.\n" + excluded: + value: > + "We withheld ~2k randomly selected images for testing purposes." "Each image + is accompanied by a short caption that describes the content and place of + the photo in a free form text. Per our agreement with the photo provider we + are not allowed to release these captions." + explanation: See [[Datasheet]](https://arxiv.org/pdf/2304.02643.pdf#page=25) + quality_control: + value: | + - Dataset quality: + Due to potential accessibility and storage challenges, the original high-resolution images (averaging 3300×4950 pixels) were downsampled to an average resolution of 1500×2250 pixels. Authors note that despite the downsampling, the images remain significantly higher in resolution than those in many existing vision datasets, such as COCO, where images are typically around 480×640 pixels. + The images were processed to blur faces and license plates to protect the identities of those in the image. + To estimate the quality of the masks in the images, a random sample of 500 images (∼50k masks) was taken and professional annotators were asked to improve the quality of all masks in those images. + - Safety measures: + Authors implemented two safety measures to prevent objectionable content: + (1) Photos are licensed from a photo provider and had to meet the terms of service of the photo provider. Authors requested that all objectionable content be filtered from the images they licensed. + (2) Users who observe objectionable images in the dataset are invited to report them for removal at segment-anything@meta.com. + Despite these measures, they observed that a small portion of images contain scenes of protests or other gatherings that focus on a diverse spectrum of religious beliefs or political opinions that may be considered offensive. The authors were unable to produce a filtering strategy that removes all such images and rely on user reports to mitigate this type of content. + explanation: According to sections [[Segment Anything Dataset]](https://arxiv.org/pdf/2304.02643.pdf#section.5) + and [[Datasheet]](https://arxiv.org/pdf/2304.02643.pdf#page=25) of the paper. + # Downstream + access: + value: open + explanation: > + The full dataset can be downloaded at [[SA-1B Download]](https://ai.facebook.com/datasets/segment-anything-downloads/). + A 50k image preview of the full dataset is available [[here]](https://segment-anything.com/dataset/index.html). intended_uses: - explanation: See [[SA-1B website]](https://ai.facebook.com/datasets/segment-anything/) value: SA-1B is intended to be used for research purposes only. It allows access to a privacy protecting and copyright friendly large-scale image dataset. Researchers can use it to train and evaluate generic object segmentation models. - license: - explanation: SA-1B is released under a favorable license agreement for certain - research uses and with protections for researchers. See [[SA-1B Dataset Research - License]](https://ai.facebook.com/datasets/segment-anything-downloads/). - value: SA-1B Dataset Research License - modality: image + explanation: See [[SA-1B website]](https://ai.facebook.com/datasets/segment-anything/) + prohibited_uses: + value: > + Authors note the following limitations of the dataset: + The masks are generated by a segmentation model, so there may be errors + or inconsistencies in the masks. + While no two images are the same, there are instances of images of the same + subject taken close together in time. + The dataset contains scenes of protests, or other gatherings that may suggest + religious beliefs, political opinions or union memberships that may be offensive. + + explanation: See [[Datasheet]](https://arxiv.org/pdf/2304.02643.pdf#page=25) monitoring: + value: > + The dataset will be hosted at https://ai.facebook.com/datasets/segment-anything + and maintained by Meta AI. + "If a user observes objectionable image(s) in the dataset, we invite them + to report the image(s) at segment-anything at meta.com for removal" + "To aid reproducibility of research using SA-1B, the only updates (to the + dataset) will be to remove reported images." + "We encourage users to gather further annotations for SA-1B. Any users who + generate annotations will be liable for hosting and distributing their annotations." explanation: See [[Datasheet]](https://arxiv.org/pdf/2304.02643.pdf#page=25) - value: "The dataset will be hosted at https://ai.facebook.com/datasets/segment-anything\ - \ and maintained by Meta AI. \"If a user observes objectionable image(s) in\ - \ the dataset, we invite them to report the image(s) at segment-anything at\ - \ meta.com for removal\" \"To aid reproducibility of research using SA-1B,\ - \ the only updates (to the dataset) will be to remove reported images.\" \"\ - We encourage users to gather further annotations for SA-1B. Any users who\ - \ generate annotations will be liable for hosting and distributing their annotations.\"\ - \n" - name: SA-1B + feedback: Feedback can be given via the feedback form on their website [segment-anything.com](https://segment-anything.com/) + or by emailing at segment-anything at meta.com. + +- type: model + name: SAM + # General organization: Meta - prohibited_uses: - explanation: See [[Datasheet]](https://arxiv.org/pdf/2304.02643.pdf#page=25) - value: "Authors note the following limitations of the dataset:\n The masks\ - \ are generated by a segmentation model, so there may be errors\nor inconsistencies\ - \ in the masks.\n While no two images are the same, there are instances of\ - \ images of the same\nsubject taken close together in time.\n The dataset\ - \ contains scenes of protests, or other gatherings that may suggest\nreligious\ - \ beliefs, political opinions or union memberships that may be offensive.\n" - quality_control: - explanation: According to sections [[Segment Anything Dataset]](https://arxiv.org/pdf/2304.02643.pdf#section.5) - and [[Datasheet]](https://arxiv.org/pdf/2304.02643.pdf#page=25) of the paper. - value: "- Dataset quality:\n Due to potential accessibility and storage challenges,\ - \ the original high-resolution images (averaging 3300×4950 pixels) were downsampled\ - \ to an average resolution of 1500×2250 pixels. Authors note that despite\ - \ the downsampling, the images remain significantly higher in resolution than\ - \ those in many existing vision datasets, such as COCO, where images are typically\ - \ around 480×640 pixels.\n The images were processed to blur faces and license\ - \ plates to protect the identities of those in the image.\n To estimate the\ - \ quality of the masks in the images, a random sample of 500 images (∼50k\ - \ masks) was taken and professional annotators were asked to improve the quality\ - \ of all masks in those images.\n- Safety measures:\n Authors implemented\ - \ two safety measures to prevent objectionable content:\n (1) Photos are\ - \ licensed from a photo provider and had to meet the terms of service of the\ - \ photo provider. Authors requested that all objectionable content be filtered\ - \ from the images they licensed.\n (2) Users who observe objectionable\ - \ images in the dataset are invited to report them for removal at segment-anything@meta.com.\n\ - \ Despite these measures, they observed that a small portion of images contain\ - \ scenes of protests or other gatherings that focus on a diverse spectrum\ - \ of religious beliefs or political opinions that may be considered offensive.\ - \ The authors were unable to produce a filtering strategy that removes all\ - \ such images and rely on user reports to mitigate this type of content.\n" - sample: [] - size: 11M images, 1.1B mask annotations - type: dataset - url: https://ai.facebook.com/datasets/segment-anything/ -- access: - explanation: "Inference code and model checkpoints are available on the model's\ - \ [[GitHub repository]](https://github.com/facebookresearch/segment-anything).\ - \ Its training dataset SA-1B can be used for research purposes and is available\ - \ for download [here](https://ai.facebook.com/datasets/segment-anything-downloads/).\n" - value: open - analysis: - explanation: See [[Zero-Shot Transfer Experiments]](https://arxiv.org/pdf/2304.02643.pdf#section.7) - for more details. - value: "\"We extensively evaluate SAM. First, using a diverse new suite of 23\ - \ segmentation datasets, we find that SAM produces high-quality masks from\ - \ a single foreground point, often only slightly below that of the manually\ - \ annotated ground truth. Second, we find consistently strong quantitative\ - \ and qualitative results on a variety of downstream tasks under a zero-shot\ - \ transfer protocol using prompt engineering, including edge detection, object\ - \ proposal generation, instance segmentation, and a preliminary exploration\ - \ of text-to-mask prediction.\"\n" - created_date: - explanation: The date the [[Meta blog post]](https://ai.facebook.com/blog/segment-anything-foundation-model-image-segmentation/) - was released. - value: 2023-04-05 - dependencies: [SA-1B] description: SAM (Segment Anything Model) is a foundation model for image segmentation. The model is designed and trained to be promptable, and supports flexible prompts (point, box, mask and free-form text) to compute masks in real-time to allow interactive use. - feedback: Feedback can be given via the feedback form on their website [segment-anything.com](https://segment-anything.com/) - or by emailing at segment-anything at meta.com. - intended_uses: - explanation: See [[Model card]](https://arxiv.org/pdf/2304.02643.pdf#page=28) - value: "\"SAM is intended to be used for any prompt-based segmentation task.\ - \ We explored its use in segmenting objects from a point, edge detection,\ - \ segmenting all objects, and segmenting detected objects. We explored how\ - \ SAM can integrate with other vision models to segment objects from text.\"\ - \n" - license: - explanation: See [[LICENSE]](https://github.com/facebookresearch/segment-anything/blob/main/LICENSE) - value: Apache 2.0 - modality: - explanation: image, text - value: image, text; image, text + created_date: + value: 2023-04-05 + explanation: The date the [[Meta blog post]](https://ai.facebook.com/blog/segment-anything-foundation-model-image-segmentation/) + was released. + url: https://arxiv.org/pdf/2304.02643.pdf model_card: - explanation: Model card can be found in the Appendix section of the paper. value: https://arxiv.org/pdf/2304.02643.pdf#page=28 - monitoring: '' - name: SAM - organization: Meta - prohibited_uses: - explanation: See [[Discussion]](https://arxiv.org/pdf/2304.02643.pdf#section.8) - value: "For out-of-scope use cases see terms of use in [[LICENSE]](https://github.com/facebookresearch/segment-anything/blob/main/LICENSE).\ - \ Authors also discuss the following limitations of the model: \"While SAM\ - \ performs well in general, it is not perfect. It can miss fine structures,\ - \ hallucinates small disconnected components at times, and does not produce\ - \ boundaries as crisply as more computationally intensive methods that “zoom-in”,\ - \ e.g. [18]. In general, we expect dedicated interactive segmentation methods\ - \ to outperform SAM when many points are provided, e.g. [67]. Unlike these\ - \ methods, SAM is designed for generality and breadth of use rather than high\ - \ IoU interactive segmentation. Moreover, SAM can process prompts in real-time,\ - \ but nevertheless SAM's overall performance is not real-time when using a\ - \ heavy image encoder. Our foray into the text-to-mask task is exploratory\ - \ and not entirely robust, although we believe it can be improved with more\ - \ effort. While SAM can perform many tasks, it is unclear how to design simple\ - \ prompts that implement semantic and panoptic segmentation. Finally, there\ - \ are domain-specific tools, such as [7], that we expect to outperform SAM\ - \ in their respective domains.\"\n" - quality_control: - explanation: See [[Segment Anything RAI Analysis]](https://arxiv.org/pdf/2304.02643.pdf#section.6) + explanation: Model card can be found in the Appendix section of the paper. + modality: image, text + analysis: + value: > + "We extensively evaluate SAM. + First, using a diverse new suite of 23 segmentation datasets, we find that + SAM produces high-quality masks from a single foreground point, often only + slightly below that of the manually annotated ground truth. + Second, we find consistently strong quantitative and qualitative results on + a variety of downstream tasks under a zero-shot transfer protocol using prompt + engineering, including edge detection, object proposal generation, instance + segmentation, and a preliminary exploration of text-to-mask prediction." + explanation: See [[Zero-Shot Transfer Experiments]](https://arxiv.org/pdf/2304.02643.pdf#section.7) for more details. - value: "\"We perform a Responsible AI (RAI) analysis of our work by investigating\ - \ potential fairness concerns and biases when using SA-1B and SAM. We focus\ - \ on the geographic and income distribution of SA-1B and fairness of SAM across\ - \ protected attributes of people.\"\n" size: unknown + # Construction + dependencies: [SA-1B] training_emissions: - explanation: See [[Model card]](https://arxiv.org/pdf/2304.02643.pdf#page=28) value: 2.8 metric tons of carbon dioxide - training_hardware: explanation: See [[Model card]](https://arxiv.org/pdf/2304.02643.pdf#page=28) - value: 256 A100 GPUs training_time: - explanation: See [[Model card]](https://arxiv.org/pdf/2304.02643.pdf#page=28) value: 68 hours - type: model - url: https://arxiv.org/pdf/2304.02643.pdf -- access: closed + explanation: See [[Model card]](https://arxiv.org/pdf/2304.02643.pdf#page=28) + training_hardware: + value: 256 A100 GPUs + explanation: See [[Model card]](https://arxiv.org/pdf/2304.02643.pdf#page=28) + quality_control: + value: > + "We perform a Responsible AI (RAI) analysis of our work by investigating potential + fairness concerns and biases when using SA-1B and SAM. We focus on the geographic + and income distribution of SA-1B and fairness of SAM across protected attributes + of people." + explanation: See [[Segment Anything RAI Analysis]](https://arxiv.org/pdf/2304.02643.pdf#section.6) + for more details. + # Downstream + access: + value: open + explanation: > + Inference code and model checkpoints are available on the model's + [[GitHub repository]](https://github.com/facebookresearch/segment-anything). + Its training dataset SA-1B can be used for research purposes and is available + for download [here](https://ai.facebook.com/datasets/segment-anything-downloads/). + license: + value: Apache 2.0 + explanation: See [[LICENSE]](https://github.com/facebookresearch/segment-anything/blob/main/LICENSE) + intended_uses: + value: > + "SAM is intended to be used for any prompt-based segmentation task. We explored + its use in segmenting objects from a point, edge detection, segmenting all + objects, and segmenting detected objects. We explored how SAM can integrate + with other vision models to segment objects from text." + explanation: See [[Model card]](https://arxiv.org/pdf/2304.02643.pdf#page=28) + prohibited_uses: + value: > + For out-of-scope use cases see terms of use in [[LICENSE]](https://github.com/facebookresearch/segment-anything/blob/main/LICENSE). + Authors also discuss the following limitations of the model: "While SAM performs + well in general, it is not perfect. It can miss fine structures, hallucinates + small disconnected components at times, and does not produce boundaries as + crisply as more computationally intensive methods that “zoom-in”, e.g. [18]. + In general, we expect dedicated interactive segmentation methods to outperform + SAM when many points are provided, e.g. [67]. Unlike these methods, SAM is + designed for generality and breadth of use rather than high IoU interactive + segmentation. Moreover, SAM can process prompts in real-time, but nevertheless + SAM's overall performance is not real-time when using a heavy image encoder. + Our foray into the text-to-mask task is exploratory and not entirely robust, + although we believe it can be improved with more effort. While SAM can perform + many tasks, it is unclear how to design simple prompts that implement semantic + and panoptic segmentation. Finally, there are domain-specific tools, such + as [7], that we expect to outperform SAM in their respective domains." + explanation: See [[Discussion]](https://arxiv.org/pdf/2304.02643.pdf#section.8) + monitoring: '' + feedback: Feedback can be given via the feedback form on their website [segment-anything.com](https://segment-anything.com/) + or by emailing at segment-anything at meta.com. + +- type: model + name: Voicebox + organization: Meta + description: Voicebox is the first generative AI model for speech to generalize + across tasks with state-of-the-art performance. + created_date: 2023-06-16 + url: https://research.facebook.com/publications/voicebox-text-guided-multilingual-universal-speech-generation-at-scale/ + model_card: '' + modality: text and audio input and audio output analysis: Evaluated on zero-shot text-to-speech benchmarks, with Voicebox outperforming the current state-of-the-art English model VALL-E. - created_date: 2023-06-16 + size: 330M parameters dependencies: [] - description: Voicebox is the first generative AI model for speech to generalize - across tasks with state-of-the-art performance. - feedback: '' - intended_uses: '' + training_emissions: unknown + training_time: 750,000 iterations + training_hardware: 32 GPUs of unspecified type + quality_control: '' + access: closed license: '' - modality: - explanation: text and audio input and audio output - value: audio, text; audio - model_card: '' + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: Voicebox + feedback: '' + +- type: model + name: PEER organization: Meta - prohibited_uses: '' - quality_control: '' - size: 330M parameters (dense) - training_emissions: unknown - training_hardware: 32 GPUs of unspecified type - training_time: 750,000 iterations - type: model - url: https://research.facebook.com/publications/voicebox-text-guided-multilingual-universal-speech-generation-at-scale/ -- access: open - analysis: PEER is evaluated on core research questions intended to gauge language - understanding, proper use of citations, instruction following, and iterative - use. - created_date: 2022-08-24 - dependencies: [] description: PEER is a collaborative language model that is trained to imitate the entire writing process itself. PEER can write drafts, add suggestions, propose edits and provide explanations for its actions. - feedback: '' - intended_uses: adapting LLMs to work with collaborative writing and updating. - license: '' - modality: - explanation: natural language text - value: text; text + created_date: 2022-08-24 + url: https://arxiv.org/pdf/2208.11663.pdf model_card: '' - monitoring: '' - name: PEER - organization: Meta - prohibited_uses: '' - quality_control: Heuristics and edit filtering was used on data set, which consisted - mostly of Wikipedia pages. - size: 3B parameters (dense) + modality: natural language text + analysis: PEER is evaluated on core research questions intended to gauge language + understanding, proper use of citations, instruction following, and iterative + use. + size: 3B parameters + dependencies: [] training_emissions: '' - training_hardware: 64 GPUs training_time: '' - type: model - url: https://arxiv.org/pdf/2208.11663.pdf + training_hardware: 64 GPUs + quality_control: Heuristics and edit filtering was used on data set, which consisted + mostly of Wikipedia pages. + access: open + license: '' + intended_uses: adapting LLMs to work with collaborative writing and updating. + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/microsoft.yaml b/assets/microsoft.yaml index 773e3bb4..8d324607 100644 --- a/assets/microsoft.yaml +++ b/assets/microsoft.yaml @@ -1,13 +1,17 @@ --- -- access: - explanation: Microsoft does not provide VLMo to external researchers. One author - commented that code would be pushed "soon" in [[November of 2021]](https://github.com/microsoft/unilm/issues/532), - but the repository does not contain relevant changes. - value: closed - analysis: '' + +- type: model + name: VLMo + organization: Microsoft + description: VLMo is a model for text-to-image generation created_date: - explanation: The date the model paper was released value: 2021-11-03 + explanation: The date the model paper was released + url: https://arxiv.org/abs/2111.02358 + model_card: none + modality: text (English) and image + size: 562M + analysis: '' dependencies: - Conceptual Captions - SBU Captions @@ -15,263 +19,300 @@ - Visual Genome - Wikipedia - BooksCorpus - description: VLMo is a model for text-to-image generation - feedback: '' - intended_uses: '' + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: + value: closed + explanation: Microsoft does not provide VLMo to external researchers. One author + commented that code would be pushed "soon" in [[November of 2021]](https://github.com/microsoft/unilm/issues/532), + but the repository does not contain relevant changes. license: - explanation: No license was found. value: none - modality: - explanation: text (English) and image - value: image, text; image, text - model_card: none + explanation: No license was found. + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: VLMo + feedback: '' + +- type: model + name: T-ULRv5 organization: Microsoft - prohibited_uses: '' - quality_control: '' - size: 562M parameters (dense) - training_emissions: '' - training_hardware: '' - training_time: '' - type: model - url: https://arxiv.org/abs/2111.02358 -- access: - explanation: Manual approval through early access request form required. - value: limited - analysis: '' + description: T-ULRv5 is a language model trained with two unique training objectives created_date: - explanation: Date model blog post was released value: 2022-09-28 - dependencies: [] - description: T-ULRv5 is a language model trained with two unique training objectives - feedback: '' - intended_uses: '' - license: - explanation: "The asset isn't released, and hence the license is unknown.\n" - value: unknown - modality: - explanation: Text (English, 93 other languages) - value: text; text + explanation: Date model blog post was released + url: https://www.microsoft.com/en-us/research/blog/microsoft-turing-universal-language-representation-model-t-ulrv5-tops-xtreme-leaderboard-and-trains-100x-faster/ model_card: '' - monitoring: '' - name: T-ULRv5 - organization: Microsoft - prohibited_uses: '' - quality_control: '' - size: 2.2B parameters (dense) + modality: Text (English, 93 other languages) + size: 2.2B parameters (dense model) + analysis: '' + dependencies: [] training_emissions: '' - training_hardware: 256 A100 training_time: Less than two weeks - type: model - url: https://www.microsoft.com/en-us/research/blog/microsoft-turing-universal-language-representation-model-t-ulrv5-tops-xtreme-leaderboard-and-trains-100x-faster/ -- access: - explanation: Manual approval through early access request form required. + training_hardware: 256 A100 + quality_control: '' + access: value: limited - analysis: '' - created_date: - explanation: Date model blog post was released - value: 2021-12-02 - dependencies: [] - description: '' - feedback: '' - intended_uses: '' + explanation: Manual approval through early access request form required. license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: text (English) - value: text; text - model_card: '' + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: Turing NLR-v5 organization: Microsoft - prohibited_uses: '' - quality_control: '' - size: 5B parameters (dense) + description: '' + created_date: + value: 2021-12-02 + explanation: Date model blog post was released + url: https://www.microsoft.com/en-us/research/blog/efficiently-and-effectively-scaling-up-language-model-pretraining-for-best-language-representation-model-on-glue-and-superglue/?OCID=msr_blog_TNLRV5_tw + model_card: '' + modality: text (English) + analysis: '' + size: 5B parameters (dense model) + dependencies: [] training_emissions: '' - training_hardware: '' training_time: '' - type: model - url: https://www.microsoft.com/en-us/research/blog/efficiently-and-effectively-scaling-up-language-model-pretraining-for-best-language-representation-model-on-glue-and-superglue/?OCID=msr_blog_TNLRV5_tw -- access: - explanation: Megatron-Turing NLG can be accessed through the [[Turing Academic - Program]](https://www.microsoft.com/en-us/research/collaboration/microsoft-turing-academic-program/) + training_hardware: '' + quality_control: '' + access: value: limited - analysis: '' - created_date: - explanation: "The date of the Microsoft Research blog announcing MT-NLG [[Microsoft\ - \ Research Blog]](https://www.microsoft.com/en-us/research/blog/using-deepspeed-and-megatron-to-train-megatron-turing-nlg-530b-the-worlds-largest-and-most-powerful-generative-language-model/).\n" - value: 2022-01-28 - dependencies: [The Pile] - description: "Megatron-Turing NLG is a 530B parameter autoregressive language\ - \ model.\n" - feedback: '' - intended_uses: '' + explanation: Manual approval through early access request form required. license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: text (English) - value: text; text - model_card: '' + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: Megatron-Turing NLG organization: Microsoft, NVIDIA - prohibited_uses: '' - quality_control: '' - size: 530B parameters (dense) - training_emissions: '' - training_hardware: 4480 A100s (560 x 8) - training_time: '' - type: model + description: > + Megatron-Turing NLG is a 530B parameter autoregressive language model. + created_date: + value: 2022-01-28 + explanation: > + The date of the Microsoft Research blog announcing MT-NLG + [[Microsoft Research Blog]](https://www.microsoft.com/en-us/research/blog/using-deepspeed-and-megatron-to-train-megatron-turing-nlg-530b-the-worlds-largest-and-most-powerful-generative-language-model/). url: https://arxiv.org/abs/2201.11990 -- access: - explanation: Microsoft does not provide public access to VALL-E - value: closed + model_card: '' + modality: text (English) + size: 530B parameters (dense model) analysis: '' - created_date: - explanation: The date the [[model paper]](https://arxiv.org/abs/2301.02111) - was released - value: 2023-01-05 - dependencies: [] - description: Vall-E is a neural code model for text-to-speech synthesis - feedback: '' - intended_uses: '' + dependencies: [The Pile] + training_emissions: '' + training_time: '' + training_hardware: 4480 A100s (560 x 8) + quality_control: '' + access: + value: limited + explanation: Megatron-Turing NLG can be accessed through the [[Turing Academic + Program]](https://www.microsoft.com/en-us/research/collaboration/microsoft-turing-academic-program/) license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: text, audio - value: audio, text; audio, text - model_card: none + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: VALL-E organization: Microsoft - prohibited_uses: '' - quality_control: '' + description: Vall-E is a neural code model for text-to-speech synthesis + created_date: + value: 2023-01-05 + explanation: The date the [[model paper]](https://arxiv.org/abs/2301.02111) + was released + url: https://valle-demo.github.io/ + model_card: none + modality: text, audio size: unknown + analysis: '' + dependencies: [] training_emissions: '' - training_hardware: 16 V100 32GB GPUs training_time: '' - type: model - url: https://valle-demo.github.io/ -- access: - explanation: "The feature is available to developers in a restricted technical\ - \ preview [[GitHub CoPilot]](https://copilot.github.com/).\n" - value: limited - adaptation: unknown - created_date: - explanation: "Date of the blog post introducing CoPilot [[GitHub Blog Post]]\ - \ (https://github.blog/2021-06-29-introducing-github-copilot-ai-pair-programmer/).\n" - value: 2021-06-29 - dependencies: [Codex] - description: "GitHub CoPilot is a coding pair programmer assisting programmers\ - \ as they write code.\n" - failures: unknown - feedback: "Feedback can be provided in the CoPilot feedback project [[CoPilot\ - \ feedback]] (https://github.com/github/feedback/discussions/categories/copilot-feedback).\n" - intended_uses: "GitHub CoPilot is intended to be used as a coding assistant.\n" + training_hardware: 16 V100 32GB GPUs + quality_control: '' + access: + value: closed + explanation: Microsoft does not provide public access to VALL-E license: - explanation: "We weren't able to locate the license.\n" value: unknown - monitoring: "value: unknown explanation: >\n There may be internal monitoring\ - \ mechanisms unknown to the public.\n" - monthly_active_users: "GitHub Copilot reportedly has over 1 million sign-ups [[Tweet\ - \ Source]](https://twitter.com/sama/status/1539737789310259200?s=21&t=YPaYd0ZueJzrR6rLslUqzg).\n" + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + +# Applications + +- type: application name: GitHub CoPilot + # General organization: Microsoft - output_space: Code completions - prohibited_uses: "Access to GPT-3 is governed by GitHub Acceptable Use Policies\ - \ and Terms of Service, both of which list a set of prohibited uses [[Use Policies]]\ - \ (https://docs.github.com/en/site-policy/acceptable-use-policies/github-acceptable-use-policies)\ - \ [[Terms of Service]] (https://docs.github.com/en/site-policy/github-terms/github-terms-of-service).\n" - quality_control: "GitHub is working on a filter to detect and suppress code generations\ - \ that are verbatim from the training set [[GitHub Research Recitation]] (https://docs.github.com/en/github/copilot/research-recitation).\ - \ According to the FAQ, GitHub implemented a simple filter that blocks emails\ - \ in standard formats to protect personally identifiable data that may be present\ - \ in the training data [[GitHub CoPilot]](https://copilot.github.com/).\n" - terms_of_service: "https://docs.github.com/en/site-policy/github-terms/github-terms-of-service\n" - type: application + description: > + GitHub CoPilot is a coding pair programmer assisting programmers as they + write code. + created_date: + value: 2021-06-29 + explanation: > + Date of the blog post introducing CoPilot + [[GitHub Blog Post]] + (https://github.blog/2021-06-29-introducing-github-copilot-ai-pair-programmer/). url: https://copilot.github.com/ + # Construction + dependencies: [Codex] + adaptation: unknown + output_space: Code completions + quality_control: > + GitHub is working on a filter to detect and suppress code generations that + are verbatim from the training set + [[GitHub Research Recitation]] + (https://docs.github.com/en/github/copilot/research-recitation). + According to the FAQ, GitHub implemented a simple filter that blocks emails + in standard formats to protect personally identifiable data that may be + present in the training data + [[GitHub CoPilot]](https://copilot.github.com/). + # Downstream + access: + value: limited + explanation: > + The feature is available to developers in a restricted + technical preview + [[GitHub CoPilot]](https://copilot.github.com/). + license: + value: unknown + explanation: > + We weren't able to locate the license. + terms_of_service: > + https://docs.github.com/en/site-policy/github-terms/github-terms-of-service + intended_uses: > + GitHub CoPilot is intended to be used as a coding assistant. + prohibited_uses: > + Access to GPT-3 is governed by GitHub Acceptable Use Policies and Terms of + Service, both of which list a set of prohibited uses + [[Use Policies]] + (https://docs.github.com/en/site-policy/acceptable-use-policies/github-acceptable-use-policies) + [[Terms of Service]] + (https://docs.github.com/en/site-policy/github-terms/github-terms-of-service). + monitoring: > + value: unknown + explanation: > + There may be internal monitoring mechanisms unknown to the public. + feedback: > + Feedback can be provided in the CoPilot feedback project + [[CoPilot feedback]] + (https://github.com/github/feedback/discussions/categories/copilot-feedback). + # Deployment + monthly_active_users: > + GitHub Copilot reportedly has over 1 million sign-ups + [[Tweet Source]](https://twitter.com/sama/status/1539737789310259200?s=21&t=YPaYd0ZueJzrR6rLslUqzg). user_distribution: unknown -- access: open - analysis: '' + failures: unknown + +- type: model + name: BioGPT + organization: Microsoft + description: '' created_date: 2022-09-24 + url: https://academic.oup.com/bib/article/23/6/bbac409/6713511?guestAccessKey=a66d9b5d-4f83-4017-bb52-405815c907b9&login=true + model_card: '' + modality: text + analysis: '' + size: 1.5B parameters (dense model) dependencies: [PubMed] - description: '' - feedback: '' - intended_uses: '' + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: open license: MIT - modality: - explanation: text - value: text; text - model_card: '' + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: BioGPT + feedback: '' + +- type: application + name: Microsoft Teams organization: Microsoft - prohibited_uses: '' - quality_control: '' - size: 1.5B parameters (dense) - training_emissions: '' - training_hardware: '' - training_time: '' - type: model - url: https://academic.oup.com/bib/article/23/6/bbac409/6713511?guestAccessKey=a66d9b5d-4f83-4017-bb52-405815c907b9&login=true -- access: open - adaptation: '' - created_date: 2023-02-01 - dependencies: [ChatGPT API] description: Microsoft Teams Premium is generally available. Built on the familiar, all-in-one collaborative experience of Microsoft Teams, Teams Premium brings the latest technologies, powered by OpenAI’s GPT-3.5, to make meetings more intelligent, personalized, and protected—whether it’s one-on-one, large meetings, virtual appointments, or webinars. - failures: '' - feedback: '' - intended_uses: '' + created_date: 2023-02-01 + url: https://www.microsoft.com/en-us/microsoft-365/blog/2023/02/01/microsoft-teams-premium-cut-costs-and-add-ai-powered-productivity/ + dependencies: [ChatGPT API] + adaptation: '' + output_space: '' + quality_control: '' + access: open license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown + explanation: > + The asset isn't released, and hence the license is unknown. + terms_of_service: '' + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' monthly_active_users: '' - name: Microsoft Teams - organization: Microsoft - output_space: '' - prohibited_uses: '' - quality_control: '' - terms_of_service: '' - type: application - url: https://www.microsoft.com/en-us/microsoft-365/blog/2023/02/01/microsoft-teams-premium-cut-costs-and-add-ai-powered-productivity/ user_distribution: '' -- access: limited - adaptation: unknown - created_date: 2023-02-07 - dependencies: [ChatGPT API] + failures: '' + +- type: application + name: Bing Search + organization: Microsoft description: AI-powered Bing search engine and Edge browser, available in preview now at Bing.com, to deliver better search, more complete answers, a new chat experience and the ability to generate content. We think of these tools as an AI copilot for the web. - failures: '' - feedback: "Feedback can be submitted at [bing.com](bing.com).\n" - intended_uses: Search engine + created_date: 2023-02-07 + url: https://blogs.microsoft.com/blog/2023/02/07/reinventing-search-with-a-new-ai-powered-microsoft-bing-and-edge-your-copilot-for-the-web/ + dependencies: [ChatGPT API] + adaptation: unknown + output_space: Search results + quality_control: '' + access: limited license: - explanation: "Software license as described in the Terms of Service document.\n" value: custom + explanation: > + Software license as described in the Terms of Service document. + terms_of_service: https://www.microsoft.com/legal/terms-of-use + intended_uses: Search engine + prohibited_uses: '' monitoring: '' + feedback: > + Feedback can be submitted at [bing.com](bing.com). monthly_active_users: '' - name: Bing Search - organization: Microsoft - output_space: Search results - prohibited_uses: '' - quality_control: '' - terms_of_service: https://www.microsoft.com/legal/terms-of-use - type: application - url: https://blogs.microsoft.com/blog/2023/02/07/reinventing-search-with-a-new-ai-powered-microsoft-bing-and-edge-your-copilot-for-the-web/ user_distribution: '' -- access: closed - analysis: '' + failures: '' + +- type: model + name: KOSMOS-1 + organization: Microsoft + description: KOSMOS-1 is a multimodal language model that is capable of perceiving + multimodal input, following instructions, and performing in-context learning + for not only language tasks but also multimodal tasks. created_date: 2023-03-01 + url: https://arxiv.org/pdf/2302.14045.pdf + model_card: '' + modality: text, image + analysis: '' + size: 1.6B parameters (dense model) dependencies: - The Pile - CommonCrawl @@ -279,427 +320,439 @@ - LAION-400M - COYO-700M - Conceptual Captions - description: KOSMOS-1 is a multimodal language model that is capable of perceiving - multimodal input, following instructions, and performing in-context learning - for not only language tasks but also multimodal tasks. - feedback: '' - intended_uses: '' + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: closed license: MIT - modality: - explanation: text, image - value: image, text; image, text - model_card: '' + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: KOSMOS-1 + feedback: '' + +- type: model + name: Prometheus organization: Microsoft - prohibited_uses: '' - quality_control: '' - size: 1.6B parameters (dense) - training_emissions: '' - training_hardware: '' - training_time: '' - type: model - url: https://arxiv.org/pdf/2302.14045.pdf -- access: closed - analysis: '' - created_date: 2023-02-07 - dependencies: [] description: In the context of Bing, we have developed a proprietary way of working with the OpenAI model that allows us to best leverage its power. We call this collection of capabilities and techniques the Prometheus model. This combination gives you more relevant, timely and targeted results, with improved safety. - feedback: '' - intended_uses: '' - license: - explanation: "We weren't able to locate a license.\n" - value: unknown - modality: {} + created_date: 2023-02-07 + url: https://blogs.microsoft.com/blog/2023/02/07/reinventing-search-with-a-new-ai-powered-microsoft-bing-and-edge-your-copilot-for-the-web/ model_card: '' - monitoring: '' - name: Prometheus - organization: Microsoft - prohibited_uses: '' - quality_control: '' + modality: unknown + analysis: '' size: unknown + dependencies: [] training_emissions: '' - training_hardware: '' training_time: '' - type: model - url: https://blogs.microsoft.com/blog/2023/02/07/reinventing-search-with-a-new-ai-powered-microsoft-bing-and-edge-your-copilot-for-the-web/ -- access: closed - analysis: '' - created_date: 2022-11-23 - dependencies: [FLD-900M] - description: '' - feedback: '' - intended_uses: '' + training_hardware: '' + quality_control: '' + access: closed license: - explanation: "We weren't able to locate a license.\n" value: unknown - modality: - explanation: text, image - value: image, text; image, text - model_card: '' + explanation: > + We weren't able to locate a license. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: Florence organization: Microsoft - prohibited_uses: '' - quality_control: '' - size: 900M parameters (dense) - training_emissions: '' - training_hardware: '' - training_time: '' - type: model + description: '' + created_date: 2022-11-23 url: https://arxiv.org/abs/2111.11432 -- access: closed + model_card: '' + modality: text, image analysis: '' + size: 900M parameters (637M image, 256M text) + dependencies: [FLD-900M] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: closed + license: + value: unknown + explanation: > + We weren't able to locate a license. + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: dataset + name: FLD-900M + organization: Microsoft + description: '' created_date: 2022-11-23 + url: https://arxiv.org/abs/2111.11432 datasheet: '' + modality: text, image + size: 900M image-text pairs + sample: [] + analysis: '' dependencies: [] - description: '' - excluded: '' - feedback: '' included: '' - intended_uses: '' + excluded: '' + quality_control: '' + access: closed license: - explanation: "We weren't able to locate a license.\n" value: unknown - modality: - explanation: text, image - value: image, text + explanation: > + We weren't able to locate a license. + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: FLD-900M + feedback: '' + +- type: application + name: Azure Cognitive Services for Vision organization: Microsoft - prohibited_uses: '' - quality_control: '' - sample: [] - size: 900M image-text pairs - type: dataset - url: https://arxiv.org/abs/2111.11432 -- access: limited - adaptation: '' - created_date: 2023-03-07 - dependencies: [Florence] description: Cost-effective, production-ready computer vision services in Azure Cognitive Service for Vision. The improved Vision Services enables developers to create cutting-edge, market-ready, responsible computer vision applications across various industries. - failures: '' - feedback: '' - intended_uses: '' + created_date: 2023-03-07 + url: https://azure.microsoft.com/en-us/blog/announcing-a-renaissance-in-computer-vision-ai-with-microsofts-florence-foundation-model/?utm_content=buffer16fa0&utm_medium=social&utm_source=twitter.com&utm_campaign=buffer + dependencies: [Florence] + adaptation: '' + output_space: '' + quality_control: '' + access: limited license: - explanation: "Software license in the Microsoft Terms of Use govern the license\ - \ rules for Azure services as outlined in the Terms of Service document.\n" value: custom + explanation: > + Software license in the Microsoft Terms of Use govern the license rules + for Azure services as outlined in the Terms of Service document. + terms_of_service: https://azure.microsoft.com/en-us/support/legal/ + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' monthly_active_users: '' - name: Azure Cognitive Services for Vision - organization: Microsoft - output_space: '' - prohibited_uses: '' - quality_control: '' - terms_of_service: https://azure.microsoft.com/en-us/support/legal/ - type: application - url: https://azure.microsoft.com/en-us/blog/announcing-a-renaissance-in-computer-vision-ai-with-microsofts-florence-foundation-model/?utm_content=buffer16fa0&utm_medium=social&utm_source=twitter.com&utm_campaign=buffer user_distribution: '' -- access: closed - analysis: '' + failures: '' + +- type: model + name: VisualChatGPT + organization: Microsoft + description: '' created_date: 2023-03-08 + url: https://arxiv.org/pdf/2303.04671.pdf + model_card: '' + modality: text, image + analysis: '' + size: unknown dependencies: [OpenAI API] - description: '' - feedback: '' - intended_uses: '' + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: closed license: - explanation: No license was found. value: none - modality: - explanation: text, image - value: image, text; image, text - model_card: '' + explanation: No license was found. + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: VisualChatGPT + feedback: '' + +- type: application + name: Microsoft 365 Copilot organization: Microsoft - prohibited_uses: '' - quality_control: '' - size: unknown - training_emissions: '' - training_hardware: '' - training_time: '' - type: model - url: https://arxiv.org/pdf/2303.04671.pdf -- access: limited - adaptation: '' - created_date: 2023-03-16 - dependencies: [GPT-4 API] description: It combines the power of language models with your data in the Microsoft Graph and the Microsoft 365 apps to turn your words into the most powerful productivity tool on the planet. - failures: '' - feedback: '' - intended_uses: '' + created_date: 2023-03-16 + url: https://blogs.microsoft.com/blog/2023/03/16/introducing-microsoft-365-copilot-your-copilot-for-work/ + dependencies: [GPT-4 API] + adaptation: '' + output_space: '' + quality_control: '' + access: limited license: - explanation: "Software license as described in the Terms of Service document.\n" value: custom + explanation: > + Software license as described in the Terms of Service document. + terms_of_service: https://www.microsoft.com/legal/terms-of-use + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' monthly_active_users: '' - name: Microsoft 365 Copilot - organization: Microsoft - output_space: '' - prohibited_uses: '' - quality_control: '' - terms_of_service: https://www.microsoft.com/legal/terms-of-use - type: application - url: https://blogs.microsoft.com/blog/2023/03/16/introducing-microsoft-365-copilot-your-copilot-for-work/ user_distribution: '' -- access: limited - adaptation: '' - created_date: 2023-03-16 - dependencies: [Microsoft 365 Copilot] + failures: '' + +- type: application + name: Microsoft Business Chat + organization: Microsoft description: Business Chat works across the langugae model, the Microsoft 365 apps, and your data — your calendar, emails, chats, documents, meetings and contacts — to do things you’ve never been able to do before. You can give it natural language prompts like “Tell my team how we updated the product strategy,” and it will generate a status update based on the morning’s meetings, emails and chat threads. - failures: '' - feedback: '' - intended_uses: '' - license: - explanation: "Software license as described in the Terms of Service document.\n" - value: custom - monitoring: '' - monthly_active_users: '' - name: Microsoft Business Chat - organization: Microsoft - output_space: '' - prohibited_uses: '' - quality_control: '' - terms_of_service: https://www.microsoft.com/legal/terms-of-use - type: application + created_date: 2023-03-16 url: https://blogs.microsoft.com/blog/2023/03/16/introducing-microsoft-365-copilot-your-copilot-for-work/ - user_distribution: '' -- access: open - adaptation: '' - created_date: unknown dependencies: [Microsoft 365 Copilot] - description: Microsoft Excel is the industry leading spreadsheet software program, - a powerful data visualization and analysis tool. - failures: '' - feedback: '' - intended_uses: '' + adaptation: '' + output_space: '' + quality_control: '' + access: limited license: - explanation: "Software license as described in the Terms of Service document.\n" value: custom + explanation: > + Software license as described in the Terms of Service document. + terms_of_service: https://www.microsoft.com/legal/terms-of-use + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' monthly_active_users: '' + user_distribution: '' + failures: '' + +- type: application name: Microsoft Excel organization: Microsoft - output_space: '' - prohibited_uses: '' - quality_control: '' - terms_of_service: https://www.microsoft.com/legal/terms-of-use - type: application - url: https://www.microsoft.com/en-us/microsoft-365/excel - user_distribution: '' -- access: open - adaptation: '' + description: Microsoft Excel is the industry leading spreadsheet software program, + a powerful data visualization and analysis tool. created_date: unknown + url: https://www.microsoft.com/en-us/microsoft-365/excel dependencies: [Microsoft 365 Copilot] - description: Microsoft Outlook is a personal information manager software system - from Microsoft, available as a part of the Microsoft Office and Microsoft 365 - software suites. - failures: '' - feedback: '' - intended_uses: '' + adaptation: '' + output_space: '' + quality_control: '' + access: open license: - explanation: "Software license as described in the Terms of Service document.\n" value: custom + explanation: > + Software license as described in the Terms of Service document. + terms_of_service: https://www.microsoft.com/legal/terms-of-use + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' monthly_active_users: '' + user_distribution: '' + failures: '' + +- type: application name: Microsoft Outlook organization: Microsoft - output_space: '' - prohibited_uses: '' - quality_control: '' - terms_of_service: https://www.microsoft.com/legal/terms-of-use - type: application - url: https://www.microsoft.com/en-us/microsoft-365/outlook/email-and-calendar-software-microsoft-outlook - user_distribution: '' -- access: limited - adaptation: '' + description: Microsoft Outlook is a personal information manager software system + from Microsoft, available as a part of the Microsoft Office and Microsoft 365 + software suites. created_date: unknown + url: https://www.microsoft.com/en-us/microsoft-365/outlook/email-and-calendar-software-microsoft-outlook dependencies: [Microsoft 365 Copilot] - description: Microsoft Power Platform is a line of business intelligence, app - development, and app connectivity software applications. - failures: '' - feedback: '' - intended_uses: '' + adaptation: '' + output_space: '' + quality_control: '' + access: open license: - explanation: "Software license as described in the Terms of Service document.\n" value: custom + explanation: > + Software license as described in the Terms of Service document. + terms_of_service: https://www.microsoft.com/legal/terms-of-use + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' monthly_active_users: '' + user_distribution: '' + failures: '' + +- type: application name: Microsoft Power Platform organization: Microsoft - output_space: '' - prohibited_uses: '' - quality_control: '' - terms_of_service: https://www.microsoft.com/legal/terms-of-use - type: application - url: https://powerplatform.microsoft.com/en-us/ - user_distribution: '' -- access: open - adaptation: '' + description: Microsoft Power Platform is a line of business intelligence, app + development, and app connectivity software applications. created_date: unknown + url: https://powerplatform.microsoft.com/en-us/ dependencies: [Microsoft 365 Copilot] - description: Microsoft PowerPoint empowers you to create clean slideshow presentations - and intricate pitch decks and gives you a powerful presentation maker. - failures: '' - feedback: '' - intended_uses: '' + adaptation: '' + output_space: '' + quality_control: '' + access: limited license: - explanation: "Software license as described in the Terms of Service document.\n" value: custom + explanation: > + Software license as described in the Terms of Service document. + terms_of_service: https://www.microsoft.com/legal/terms-of-use + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' monthly_active_users: '' + user_distribution: '' + failures: '' + +- type: application name: Microsoft PowerPoint organization: Microsoft - output_space: '' - prohibited_uses: '' - quality_control: '' - terms_of_service: https://www.microsoft.com/legal/terms-of-use - type: application + description: Microsoft PowerPoint empowers you to create clean slideshow presentations + and intricate pitch decks and gives you a powerful presentation maker. + created_date: unknown url: https://www.microsoft.com/en-us/microsoft-365/powerpoint - user_distribution: '' -- access: open + dependencies: [Microsoft 365 Copilot] adaptation: '' - created_date: unknown - dependencies: [Microsoft 365 Copilot, Microsoft Business Chat] - description: Microsoft Teams is a proprietary business communication platform - developed by Microsoft, as part of the Microsoft 365 family of products. - failures: '' - feedback: '' - intended_uses: '' + output_space: '' + quality_control: '' + access: open license: - explanation: "Software license as described in the Terms of Service document.\n" value: custom + explanation: > + Software license as described in the Terms of Service document. + terms_of_service: https://www.microsoft.com/legal/terms-of-use + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' monthly_active_users: '' + user_distribution: '' + failures: '' + +- type: application name: Microsoft Teams organization: Microsoft - output_space: '' - prohibited_uses: '' - quality_control: '' - terms_of_service: https://www.microsoft.com/legal/terms-of-use - type: application + description: Microsoft Teams is a proprietary business communication platform + developed by Microsoft, as part of the Microsoft 365 family of products. + created_date: unknown url: https://www.microsoft.com/en-us/microsoft-teams/group-chat-software - user_distribution: '' -- access: open + dependencies: [Microsoft 365 Copilot, Microsoft Business Chat] adaptation: '' - created_date: unknown - dependencies: [Microsoft 365 Copilot] - description: Microsoft Word is a word processing software developed by Microsoft - failures: '' - feedback: '' - intended_uses: '' + output_space: '' + quality_control: '' + access: open license: - explanation: "Software license as described in the Terms of Service document.\n" value: custom + explanation: > + Software license as described in the Terms of Service document. + terms_of_service: https://www.microsoft.com/legal/terms-of-use + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' monthly_active_users: '' + user_distribution: '' + failures: '' + +- type: application name: Microsoft Word organization: Microsoft + description: Microsoft Word is a word processing software developed by Microsoft + created_date: unknown + url: https://www.microsoft.com/en-us/microsoft-365/word + dependencies: [Microsoft 365 Copilot] + adaptation: '' output_space: '' - prohibited_uses: '' quality_control: '' - terms_of_service: https://www.microsoft.com/legal/terms-of-use - type: application - url: https://www.microsoft.com/en-us/microsoft-365/word - user_distribution: '' -- access: limited - adaptation: unknown - created_date: unknown - dependencies: [] - description: "Inside look is a Microsoft Office feature, composing document insights\ - \ highlighting key points, expected time to read, and popularity among others.\n" - failures: unknown - feedback: unknown - intended_uses: Providing document insights to users. + access: open license: - explanation: "Software license as described in the Terms of Service document.\n" value: custom - monitoring: unknown - monthly_active_users: unknown + explanation: > + Software license as described in the Terms of Service document. + terms_of_service: https://www.microsoft.com/legal/terms-of-use + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' + user_distribution: '' + failures: '' + +- type: application name: Microsoft Inside Look + # General organization: Microsoft - output_space: Document level insights for users. - prohibited_uses: unknown - quality_control: unknown - terms_of_service: https://www.microsoft.com/legal/terms-of-use - type: application - url: https://support.microsoft.com/en-us/office/see-file-insights-before-you-open-a-file-87a23bbc-a516-42e2-a7b6-0ecb8259e026 - user_distribution: unknown -- access: limited - adaptation: unknown + description: > + Inside look is a Microsoft Office feature, composing document insights + highlighting key points, expected time to read, and popularity among others. created_date: unknown + url: https://support.microsoft.com/en-us/office/see-file-insights-before-you-open-a-file-87a23bbc-a516-42e2-a7b6-0ecb8259e026 dependencies: [] - description: "Suggested replies is a Microsoft Outlook feature that suggests responses\ - \ to emails, available in: English, Spanish, Italian, French, German, Portuguese\ - \ Chinese Simplified, Chinese Traditional, Swedish, Russian, Korean, Czech,\ - \ Hungarian, Arabic, Hebrew, Thai, Turkish, Japanese, Dutch, Norwegian, Danish,\ - \ and Polish.\n" - failures: unknown - feedback: unknown - intended_uses: Suggesting email replies. + adaptation: unknown + output_space: Document level insights for users. + quality_control: unknown + # Downstream + access: limited license: - explanation: "Software license as described in the Terms of Service document.\n" value: custom + explanation: > + Software license as described in the Terms of Service document. + terms_of_service: https://www.microsoft.com/legal/terms-of-use + intended_uses: Providing document insights to users. + prohibited_uses: unknown monitoring: unknown + feedback: unknown + # Deployment monthly_active_users: unknown + user_distribution: unknown + failures: unknown + +- type: application name: Microsoft Suggested Replies + # General organization: Microsoft + description: > + Suggested replies is a Microsoft Outlook feature that suggests responses + to emails, available in: English, Spanish, Italian, French, German, + Portuguese Chinese Simplified, Chinese Traditional, Swedish, Russian, + Korean, Czech, Hungarian, Arabic, Hebrew, Thai, Turkish, Japanese, Dutch, + Norwegian, Danish, and Polish. + created_date: unknown + url: https://support.microsoft.com/en-us/office/use-suggested-replies-in-outlook-19316194-0434-43ba-a742-6b5890157379 + dependencies: [] + adaptation: unknown output_space: Suggested emails. - prohibited_uses: unknown quality_control: unknown + # Downstream + access: limited + license: + value: custom + explanation: > + Software license as described in the Terms of Service document. terms_of_service: https://www.microsoft.com/legal/terms-of-use - type: application - url: https://support.microsoft.com/en-us/office/use-suggested-replies-in-outlook-19316194-0434-43ba-a742-6b5890157379 + intended_uses: Suggesting email replies. + prohibited_uses: unknown + monitoring: unknown + feedback: unknown + # Deployment + monthly_active_users: unknown user_distribution: unknown -- access: limited + failures: unknown + +- type: application + name: Microsoft Security Copilot + # General + organization: Microsoft + description: > + Microsoft Security Copilot is an AI-powered security analysis tool that enables + analysts to respond to threats quickly, process signals at machine speed, and + assess risk exposure in minutes. + created_date: + value: 2023-03-28 + explanation: The date Security Copilot was announced in the [[Microsoft blog + post]](https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/). + url: https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/ + # Construction + dependencies: [GPT-4, Microsoft security-specific model] adaptation: - explanation: See [[blog post]](https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/). value: Security Copilot combines OpenAI's GPT-4 generative AI with a security-specific model from Microsoft. This security-specific model in turn incorporates a growing set of security-specific skills and is informed by Microsoft's unique global threat intelligence and more than 65 trillion daily signals. - created_date: - explanation: The date Security Copilot was announced in the [[Microsoft blog - post]](https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/). - value: 2023-03-28 - dependencies: [GPT-4, Microsoft security-specific model] - description: "Microsoft Security Copilot is an AI-powered security analysis tool\ - \ that enables analysts to respond to threats quickly, process signals at machine\ - \ speed, and assess risk exposure in minutes.\n" - failures: unknown - feedback: unknown - intended_uses: Security Copilot is designed to enhance the capabilities of cybersecurity - professionals. It leverages machine speed and scale to accelerate response to - security incidents, discover and process threat signals, and assess risk exposure - within minutes. - license: - explanation: "Software license as described in the Terms of Service document.\n" - value: custom - monitoring: '' - monthly_active_users: unknown - name: Microsoft Security Copilot - organization: Microsoft + explanation: See [[blog post]](https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/). output_space: - explanation: See [[product demo]](https://www.microsoft.com/en-us/security/business/ai-machine-learning/microsoft-security-copilot). value: Actionable responses to security-related questions (text and image). Security event, incident or threat reports (PowerPoint slide). - prohibited_uses: unknown + explanation: See [[product demo]](https://www.microsoft.com/en-us/security/business/ai-machine-learning/microsoft-security-copilot). quality_control: - explanation: See [[blog post]](https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/). value: Security Copilot employs a closed-loop learning system that learns from user interactions and feedback, enabling it to provide more coherent, relevant, and useful answers that continually improve over time. Security Copilot is @@ -707,106 +760,119 @@ that customers' data and AI models are protected with enterprise compliance and security controls. Customer data is owned and controlled by them, and not used to train AI models for anyone outside their organization. + explanation: See [[blog post]](https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/). + # Downstream + access: limited + license: + value: custom + explanation: > + Software license as described in the Terms of Service document. terms_of_service: https://www.microsoft.com/legal/terms-of-use - type: application - url: https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/ + intended_uses: Security Copilot is designed to enhance the capabilities of cybersecurity + professionals. It leverages machine speed and scale to accelerate response to + security incidents, discover and process threat signals, and assess risk exposure + within minutes. + prohibited_uses: unknown + monitoring: '' + feedback: unknown + # Deployment + monthly_active_users: unknown user_distribution: unknown -- access: open - analysis: Evaluated on GLUE, SQuAD 2.0, and CoQA benchmarks. - created_date: 2019-10-01 - dependencies: [] + failures: unknown + +- type: model + name: UniLM + organization: Microsoft description: UniLM is a unified language model that can be fine-tuned for both natural language understanding and generation tasks. - feedback: '' - intended_uses: '' - license: MIT - modality: {} + created_date: 2019-10-01 + url: https://proceedings.neurips.cc/paper_files/paper/2019/file/c20bb2d9a50d5ac1f713f8b34d9aac5a-Paper.pdf model_card: '' - monitoring: '' - name: UniLM - organization: Microsoft - prohibited_uses: '' - quality_control: '' - size: '' + modality: text + analysis: Evaluated on GLUE, SQuAD 2.0, and CoQA benchmarks. + size: 340M parameters + dependencies: [] training_emissions: unknown - training_hardware: 8 NVIDIA Tesla V100 32GB GPUs training_time: 10,000 steps in 7 hours - type: model - url: https://proceedings.neurips.cc/paper_files/paper/2019/file/c20bb2d9a50d5ac1f713f8b34d9aac5a-Paper.pdf -- access: limited - analysis: '' - created_date: 2021-04-12 - dependencies: [] - description: Docugami is a LLM focused on writing business documents and data - using generative AI. - feedback: '' - intended_uses: analyzing, writing, and connecting business documents and data - license: '' - modality: - explanation: text - value: text; text - model_card: '' + training_hardware: 8 NVIDIA Tesla V100 32GB GPUs + quality_control: '' + access: open + license: MIT + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: Docugami organization: Microsoft - prohibited_uses: '' - quality_control: '' - size: 20B parameters (dense) + description: Docugami is a LLM focused on writing business documents and data + using generative AI. + created_date: 2021-04-12 + url: https://www.docugami.com/generative-ai + model_card: '' + modality: text + analysis: '' + size: 20B parameters + dependencies: [] training_emissions: unknown - training_hardware: '' training_time: unknown - type: model - url: https://www.docugami.com/generative-ai -- access: open - analysis: Evaluated on a range of standardized vision benchmarks, and achieves - state of the art performance on all experimentally. - created_date: 2022-08-31 - dependencies: [Multiway Transformer network] - description: BEiT-3 is a general-purpose multimodal foundation model for vision - and vision-language tasks. - feedback: '' - intended_uses: '' + training_hardware: '' + quality_control: '' + access: limited license: '' - modality: - explanation: text and image input and output - value: image, text; image, text - model_card: '' + intended_uses: analyzing, writing, and connecting business documents and data + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: BEiT-3 organization: Microsoft - prohibited_uses: '' - quality_control: '' - size: 1.9B parameters (dense) + description: BEiT-3 is a general-purpose multimodal foundation model for vision + and vision-language tasks. + created_date: 2022-08-31 + url: https://arxiv.org/pdf/2208.10442.pdf + model_card: '' + modality: text and image input and output + analysis: Evaluated on a range of standardized vision benchmarks, and achieves + state of the art performance on all experimentally. + size: 1.9B parameters + dependencies: [Multiway Transformer network] training_emissions: unknown - training_hardware: '' training_time: '' - type: model - url: https://arxiv.org/pdf/2208.10442.pdf -- access: open - analysis: Reports results on standard LLM benchmarks in comparison to other LLMs - and test sets. - created_date: 2023-04-24 - dependencies: [LLaMA, Evol-Instruct, Alpaca dataset] + training_hardware: '' + quality_control: '' + access: open + license: '' + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: model + name: WizardLM + organization: Microsoft description: Starting with an initial set of instructions, we use our proposed Evol-Instruct to rewrite them step by step into more complex instructions. Then, we mix all generated instruction data to fine-tune LLaMA. We call the resulting model WizardLM. - feedback: https://huggingface.co/datasets/WizardLM/evol_instruct_70k/discussions - intended_uses: Creating large amounts of instruction data, particularly with high - complexity - license: Apache 2.0 - modality: - explanation: natural language text - value: text; text + created_date: 2023-04-24 + url: https://arxiv.org/pdf/2304.12244v1.pdf model_card: https://huggingface.co/WizardLM/WizardLM-13B-1.0 - monitoring: '' - name: WizardLM - organization: Microsoft - prohibited_uses: '' - quality_control: '' - size: 7B parameters (dense) + modality: natural language text + analysis: Reports results on standard LLM benchmarks in comparison to other LLMs + and test sets. + size: 7B parameters + dependencies: [LLaMA, Evol-Instruct, Alpaca dataset] training_emissions: '' - training_hardware: 8 V100 GPUs training_time: 70 hours on 3 epochs - type: model - url: https://arxiv.org/pdf/2304.12244v1.pdf + training_hardware: 8 V100 GPUs + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: Creating large amounts of instruction data, particularly with high + complexity + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/datasets/WizardLM/evol_instruct_70k/discussions diff --git a/assets/mosaic.yaml b/assets/mosaic.yaml index a58049f1..a887f3b4 100644 --- a/assets/mosaic.yaml +++ b/assets/mosaic.yaml @@ -1,25 +1,24 @@ --- -- access: open - analysis: Evaluated on a range of benchmarks and performed on par with LLaMA-7B. - created_date: 2023-05-05 - dependencies: [RedPajama-Data, C4, The Stack, Multimodal C4] + +- type: model + name: MPT + organization: Mosaic description: MPT is a series of large language models seeking to address the limitations of other open source models like LLaMA and Pythia. - feedback: '' - intended_uses: '' - license: Apache 2.0 - modality: - explanation: text - value: text; text + created_date: 2023-05-05 + url: https://www.mosaicml.com/blog/mpt-7b model_card: '' - monitoring: '' - name: MPT - organization: Mosaic - prohibited_uses: '' - quality_control: '' - size: 7B parameters (dense) + modality: text + analysis: Evaluated on a range of benchmarks and performed on par with LLaMA-7B. + size: 7B parameters + dependencies: [RedPajama-Data, C4, The Stack, Multimodal C4] training_emissions: unknown - training_hardware: 440 A100 40GB GPUs training_time: 9.5 days - type: model - url: https://www.mosaicml.com/blog/mpt-7b + training_hardware: 440 A100 40GB GPUs + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/nanyang.yaml b/assets/nanyang.yaml index e5b04469..02f40c24 100644 --- a/assets/nanyang.yaml +++ b/assets/nanyang.yaml @@ -1,28 +1,27 @@ --- -- access: open - analysis: Evaluated on researcher experiments to test deeper understanding and - advanced commonsense reasoning - created_date: 2023-05-05 - dependencies: [MIMIC-IT, OpenFlamingo] + +- type: model + name: Otter + organization: Nanyang Technological University description: Otter is a multi-modal model based on OpenFlamingo (open-sourced version of DeepMind’s Flamingo), trained on MIMIC-IT and showcasing improved instruction-following ability and in-context learning. - feedback: '' - intended_uses: Following and executing new instructions with few in-context learning - examples given image and textual input. - license: MIT - modality: - explanation: outputs text from text and image input - value: text; image, text + created_date: 2023-05-05 + url: https://arxiv.org/pdf/2305.03726v1.pdf model_card: https://github.com/Luodian/Otter/blob/main/docs/model_card.md - monitoring: '' - name: Otter - organization: Nanyang Technological University - prohibited_uses: '' - quality_control: '' - size: 1.3B parameters (dense) + modality: outputs text from text and image input + analysis: Evaluated on researcher experiments to test deeper understanding and + advanced commonsense reasoning + size: 1.3B parameters + dependencies: [MIMIC-IT, OpenFlamingo] training_emissions: '' - training_hardware: 4 RTX-3090 GPUs training_time: '' - type: model - url: https://arxiv.org/pdf/2305.03726v1.pdf + training_hardware: 4 RTX-3090 GPUs + quality_control: '' + access: open + license: MIT + intended_uses: Following and executing new instructions with few in-context learning + examples given image and textual input. + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/naver.yaml b/assets/naver.yaml index 588ea0a4..a262bc3a 100644 --- a/assets/naver.yaml +++ b/assets/naver.yaml @@ -1,30 +1,31 @@ --- -- access: - explanation: The model is not available for download or direct API use - value: closed - analysis: '' + +# models +- type: model + name: HyperCLOVA + organization: Naver + description: HyperClova is an autoregressive language model created_date: - explanation: The date the model paper was submitted to arxiv value: 2021-05-21 + explanation: The date the model paper was submitted to arxiv + url: https://arxiv.org/abs/2109.04650 + model_card: '' + modality: text (Korean | 한국어) + size: 82B parameters (dense model) + analysis: '' dependencies: [] - description: HyperClova is an autoregressive language model - feedback: '' - intended_uses: '' + training_emissions: '' + training_time: 130.4 days + training_hardware: 1024 A100 GPUs + quality_control: '' + access: + value: closed + explanation: The model is not available for download or direct API use license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: text (Korean | 한국어) - value: text; text - model_card: '' - monitoring: '' - name: HyperCLOVA - organization: Naver + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' prohibited_uses: '' - quality_control: '' - size: 82B parameters (dense) - training_emissions: '' - training_hardware: 1024 A100 GPUs - training_time: 130.4 days - type: model - url: https://arxiv.org/abs/2109.04650 + monitoring: '' + feedback: '' diff --git a/assets/neeva.yaml b/assets/neeva.yaml index c081acf3..18e64ffa 100644 --- a/assets/neeva.yaml +++ b/assets/neeva.yaml @@ -1,79 +1,85 @@ --- -- access: closed - analysis: '' + +- type: dataset + name: Neeva dataset + organization: Neeva + description: '' created_date: unknown + url: https://neeva.com/index datasheet: '' + modality: text + size: unknown + sample: [] + analysis: '' dependencies: [] - description: '' - excluded: '' - feedback: '' included: '' - intended_uses: '' + excluded: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: {} + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: Neeva dataset + feedback: '' + +- type: model + name: Neeva model organization: Neeva - prohibited_uses: '' - quality_control: '' - sample: [] - size: unknown - type: dataset + description: '' + created_date: unknown url: https://neeva.com/index -- access: closed + model_card: '' + modality: text analysis: '' - created_date: unknown + size: unknown dependencies: [Neeva dataset] - description: '' - feedback: '' - intended_uses: '' + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: {} - model_card: '' + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: Neeva model + feedback: '' + +- type: application + name: NeevaAI organization: Neeva - prohibited_uses: '' - quality_control: '' - size: '' - training_emissions: '' - training_hardware: '' - training_time: '' - type: model - url: https://neeva.com/index -- access: open - adaptation: '' + description: NeevaAI is an AI-powered search tool that combines the capabilities + of LLMs with Neeva's independent in-house search stack to create a unique and + transformative search experience. created_date: + value: 2023-01-06 explanation: The [[Blog post]](https://neeva.com/blog/introducing-neevaai) announcing NeevaAI. - value: 2023-01-06 + url: https://neeva.com/blog/introducing-neevaai dependencies: [Neeva model] - description: NeevaAI is an AI-powered search tool that combines the capabilities - of LLMs with Neeva's independent in-house search stack to create a unique and - transformative search experience. - failures: '' - feedback: '' - intended_uses: '' + adaptation: '' + output_space: '' + quality_control: '' + access: open license: - explanation: "\"If you comply with these Terms, Neeva grants to you, during\ - \ the term of these Terms, a limited non-exclusive, non-transferable license,\ - \ with no right to sublicense, to download and install the App on your personal\ - \ computers, mobile handsets, tablets, wearable devices, and/or other devices\ - \ and to run the App solely for your own personal non-commercial purposes.\"\ - \ - excerpt from the Terms of Service document.\n" value: Custom + explanation: > + "If you comply with these Terms, Neeva grants to you, during the term of + these Terms, a limited non-exclusive, non-transferable license, with no + right to sublicense, to download and install the App on your personal + computers, mobile handsets, tablets, wearable devices, and/or other + devices and to run the App solely for your own personal non-commercial + purposes." - excerpt from the Terms of Service document. + terms_of_service: https://neeva.com/terms + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' monthly_active_users: '' - name: NeevaAI - organization: Neeva - output_space: '' - prohibited_uses: '' - quality_control: '' - terms_of_service: https://neeva.com/terms - type: application - url: https://neeva.com/blog/introducing-neevaai user_distribution: '' + failures: '' diff --git a/assets/nextdoor.yaml b/assets/nextdoor.yaml index 1cb54b17..0dfafe79 100644 --- a/assets/nextdoor.yaml +++ b/assets/nextdoor.yaml @@ -1,23 +1,24 @@ --- -- access: open - adaptation: '' - created_date: 2023-05-02 - dependencies: [ChatGPT] + +- type: application + name: Nextdoor Assistant + organization: Nextdoor description: AI chatbot on Nextdoor that helps users write more clear and conscientious posts. - failures: '' - feedback: '' + created_date: 2023-05-02 + url: https://help.nextdoor.com/s/article/Introducing-Assistant + dependencies: [ChatGPT] + adaptation: '' + output_space: natural language text guidance + quality_control: '' + access: open + license: unknown + terms_of_service: '' intended_uses: to be used to help make the Nextdoor experience more positive for users - license: unknown + prohibited_uses: '' monitoring: '' + feedback: '' monthly_active_users: '' - name: Nextdoor Assistant - organization: Nextdoor - output_space: natural language text guidance - prohibited_uses: '' - quality_control: '' - terms_of_service: '' - type: application - url: https://help.nextdoor.com/s/article/Introducing-Assistant user_distribution: '' + failures: '' diff --git a/assets/nolano.yaml b/assets/nolano.yaml index 0f23e021..f2b8fe84 100644 --- a/assets/nolano.yaml +++ b/assets/nolano.yaml @@ -1,22 +1,23 @@ --- -- access: limited - adaptation: '' - created_date: 2023-03-19 - dependencies: [] - description: Cformers is a set of transformers that act as an API for AI inference - in code. - failures: '' - feedback: '' - intended_uses: '' - license: MIT - monitoring: '' - monthly_active_users: '' + +- type: application name: Cformers organization: Nolano + description: Cformers is a set of transformers that act as an API for AI inference + in code. + created_date: 2023-03-19 + url: https://www.nolano.org/services/Cformers/ + dependencies: [] + adaptation: '' output_space: '' - prohibited_uses: '' quality_control: '' + access: limited + license: MIT terms_of_service: '' - type: application - url: https://www.nolano.org/services/Cformers/ + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' user_distribution: '' + failures: '' diff --git a/assets/notion.yaml b/assets/notion.yaml index d0f580c5..ba21e83f 100644 --- a/assets/notion.yaml +++ b/assets/notion.yaml @@ -1,23 +1,24 @@ --- -- access: limited - adaptation: '' - created_date: 2023-02-22 - dependencies: [Anthropic API] + +- type: application + name: Notion AI + organization: Notion description: Notion AI is a connected assistant that helps you think bigger, work faster, and augments your creativity, right inside the functional workspace you’re already familiar with. - failures: '' - feedback: '' - intended_uses: '' - license: '' - monitoring: '' - monthly_active_users: '' - name: Notion AI - organization: Notion + created_date: 2023-02-22 + url: https://www.notion.so/help/guides/notion-ai-for-docs + dependencies: [Anthropic API] + adaptation: '' output_space: '' - prohibited_uses: '' quality_control: '' + access: limited + license: '' terms_of_service: '' - type: application - url: https://www.notion.so/help/guides/notion-ai-for-docs + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' user_distribution: '' + failures: '' diff --git a/assets/nvidia.yaml b/assets/nvidia.yaml index 39e0f118..5f9e0978 100644 --- a/assets/nvidia.yaml +++ b/assets/nvidia.yaml @@ -1,100 +1,99 @@ --- -- access: - explanation: "Neither the 8.3B parameter model trained to convergence nor the\ - \ 1 trillion paramter model is available for download\n" - value: closed - analysis: '' + +# models +- type: model + name: Megatron-LM + organization: NVIDIA + description: Megatron-LM is an autoregressive language model created_date: - explanation: The date the paper for the 1 trillion parameter model was published value: 2021-04-09 + explanation: The date the paper for the 1 trillion parameter model was published + url: https://arxiv.org/abs/2104.04473 + model_card: none + modality: text (English) + analysis: '' + size: 1000B parameters (dense model) dependencies: [] - description: Megatron-LM is an autoregressive language model - feedback: none - intended_uses: none + training_emissions: unknown + training_time: 84 days + training_hardware: 3072 A100 GPUs + quality_control: unknown + access: + value: closed + explanation: > + Neither the 8.3B parameter model trained to convergence nor the 1 trillion + paramter model is available for download license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: text (English) - value: text; text - model_card: none + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: none + prohibited_uses: none monitoring: none - name: Megatron-LM + feedback: none + +- type: dataset + name: MineDojo organization: NVIDIA - prohibited_uses: none - quality_control: unknown - size: 1000B parameters (dense) - training_emissions: unknown - training_hardware: 3072 A100 GPUs - training_time: 84 days - type: model - url: https://arxiv.org/abs/2104.04473 -- access: open - analysis: '' + description: '' created_date: 2022-06-17 + url: https://arxiv.org/abs/2206.08853 datasheet: '' + modality: Videos, Text + size: 730k videos, 6k Wikipedia pages, 340k reddit posts + sample: [] + analysis: '' dependencies: [YouTube, Wikipedia, Reddit] - description: '' - excluded: '' - feedback: '' included: '' - intended_uses: '' + excluded: '' + quality_control: '' + access: open license: MIT - modality: - explanation: Videos, Text - value: text, video - monitoring: '' - name: MineDojo - organization: NVIDIA + intended_uses: '' prohibited_uses: '' - quality_control: '' - sample: [] - size: 730k videos, 6k Wikipedia pages, 340k reddit posts - type: dataset - url: https://arxiv.org/abs/2206.08853 -- access: open - analysis: '' + monitoring: '' + feedback: '' + +- type: dataset + name: VIMA dataset + organization: NVIDIA, Stanford + description: '' created_date: 2022-10-06 + url: https://vimalabs.github.io/ datasheet: '' + modality: text and image + size: 200M parameters (dense model) + sample: [] + analysis: '' dependencies: [T5, Mask R-CNN, VIMA dataset] - description: '' - excluded: '' - feedback: '' included: '' - intended_uses: '' + excluded: '' + quality_control: '' + access: open license: MIT - modality: - explanation: text and image - value: image, text + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: VIMA dataset + feedback: '' + +- type: model + name: VIMA organization: NVIDIA, Stanford - prohibited_uses: '' - quality_control: '' - sample: [] - size: 200M parameters (dense model) - type: dataset + description: '' + created_date: 2022-10-06 url: https://vimalabs.github.io/ -- access: open + model_card: '' + modality: Simulation analysis: '' - created_date: 2022-10-06 + size: 650K successful trajectories dependencies: [] - description: '' - feedback: '' - intended_uses: '' - license: MIT - modality: - explanation: Simulation - value: other; other - model_card: '' - monitoring: '' - name: VIMA - organization: NVIDIA, Stanford - prohibited_uses: '' - quality_control: '' - size: 650K parameters (dense) training_emissions: '' - training_hardware: '' training_time: '' - type: model - url: https://vimalabs.github.io/ + training_hardware: '' + quality_control: '' + access: open + license: MIT + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/openai.yaml b/assets/openai.yaml index ec2714f9..74e6779e 100644 --- a/assets/openai.yaml +++ b/assets/openai.yaml @@ -1,104 +1,118 @@ --- -- access: - explanation: "The GPT-3 dataset isn't released to the public, but it may be\ - \ available to Microsoft through the GPT-3 licencing agreement between OpenAI\ - \ and Microsoft [[OpenAI Blog Post]] (https://openai.com/blog/openai-licenses-gpt-3-technology-to-microsoft/).\n" - value: closed - analysis: "The GPT-3 paper, which also introduces the GPT-3 dataset, provides\ - \ a limited analysis on the GPT-3 dataset, reporting the dirtiness of the dataset\ - \ after the it was filtered for text occurring in common benchmarking tasks.\ - \ The authors report that \"as the dataset becomes more contaminated, the variance\ - \ of the clean over all fraction increases, but there is no apparent bias towards\ - \ improved or degraded performance\" [[Appendix C]](https://arxiv.org/pdf/2005.14165.pdf#appendix.C).\n" + +# Datasets +- type: dataset + name: GPT-3 dataset + # General + organization: OpenAI + description: > + The GPT-3 dataset is the text corpus that was used to train the GPT-3 + model. Information on the GPT-3 dataset is limited to discussion in the + paper introducing GPT-3 + [[Section 2.2]](https://arxiv.org/pdf/2005.14165.pdf#subsection.2.2). created_date: - explanation: "The date for the public announcement of GPT-3. The GPT-3 dataset\ - \ didn't have a specific release date separate from the model [[Open AI Blog\ - \ Post]](https://openai.com/blog/openai-api/).\n" value: 2020-06-11 + explanation: > + The date for the public announcement of GPT-3. The GPT-3 + dataset didn't have a specific release date separate from the model + [[Open AI Blog Post]](https://openai.com/blog/openai-api/). + url: https://arxiv.org/pdf/2005.14165.pdf datasheet: - explanation: No datasheet available as of 2022-04-04. value: none + explanation: No datasheet available as of 2022-04-04. + modality: text (English) + size: 570 GB + sample: [] + analysis: > + The GPT-3 paper, which also introduces the GPT-3 dataset, provides a limited + analysis on the GPT-3 dataset, reporting the dirtiness of the dataset after + the it was filtered for text occurring in common benchmarking tasks. + The authors report that "as the dataset becomes more contaminated, the + variance of the clean over all fraction increases, but there is no apparent + bias + towards improved or degraded performance" + [[Appendix C]](https://arxiv.org/pdf/2005.14165.pdf#appendix.C). + # Construction dependencies: [WebText] - description: "The GPT-3 dataset is the text corpus that was used to train the\ - \ GPT-3 model. Information on the GPT-3 dataset is limited to discussion in\ - \ the paper introducing GPT-3 [[Section 2.2]](https://arxiv.org/pdf/2005.14165.pdf#subsection.2.2).\n" - excluded: "The Common Crawl dataset was processed using a classifier that kept\ - \ high quality documents and filtered low quality documents. WebText was used\ - \ as a proxy for high quality documents [[Appendix A]](https://arxiv.org/pdf/2005.14165.pdf#appendix.A).\n" - feedback: - explanation: "There are no known (internal or external) feedback mechanisms\ - \ for the GPT-3 dataset as of 2022-04-04.\n" - value: unknown - included: "The dataset is composed several NLP corpora: Common Crawl (filtered,\ - \ 60%), WebText2 (22%), Books1 (8%), Books2 (8%), Wikipedia (3%) [[Section 2.2]](https://arxiv.org/pdf/2005.14165.pdf#subsection.2.2).\n" - intended_uses: "The intended use of the GPT-3 dataset is to train language models.\n" license: - explanation: "There is no known license specific to the GPT-3 dataset, however,\ - \ the governing organization, OpenAI, licensed GPT-3 to Microsoft, which makes\ - \ it likely that the GPT-3 dataset was also licensed [[OpenAI Blog Post]]\ - \ (https://openai.com/blog/openai-licenses-gpt-3-technology-to-microsoft/).\n" value: unknown - modality: - explanation: text (English) - value: text + explanation: > + There is no known license specific to the GPT-3 dataset, however, + the governing organization, OpenAI, licensed GPT-3 to Microsoft, which + makes it likely that the GPT-3 dataset was also licensed + [[OpenAI Blog Post]] + (https://openai.com/blog/openai-licenses-gpt-3-technology-to-microsoft/). + included: > + The dataset is composed several NLP corpora: Common Crawl (filtered, 60%), + WebText2 (22%), Books1 (8%), Books2 (8%), Wikipedia (3%) + [[Section 2.2]](https://arxiv.org/pdf/2005.14165.pdf#subsection.2.2). + excluded: > + The Common Crawl dataset was processed using a classifier that kept high + quality documents and filtered low quality documents. WebText was used as a + proxy for high quality documents + [[Appendix A]](https://arxiv.org/pdf/2005.14165.pdf#appendix.A). + quality_control: > + In addition to excluding low quality documents from the Common Crawl + dataset, the authors fuzzily deduplicated documents within each dataset, by + removing documents that have high overlap with each other. The same + procedure was followed to fuzzily deduplicate WebText from Common Crawl + [[Appendix A]](https://arxiv.org/pdf/2005.14165.pdf#appendix.A). + Text occuring in benchmark datasets were also partially removed + [[Appendix C]](https://arxiv.org/pdf/2005.14165.pdf#appendix.C). + # Downstream + access: + value: closed + explanation: > + The GPT-3 dataset isn't released to the public, but it may be available + to Microsoft through the GPT-3 licencing agreement between OpenAI and + Microsoft [[OpenAI Blog Post]] + (https://openai.com/blog/openai-licenses-gpt-3-technology-to-microsoft/). + intended_uses: > + The intended use of the GPT-3 dataset is to train language models. + prohibited_uses: + value: unknown + explanation: > + OpenAI didn't provide a list of prohibited uses specifically for + the GPT-3 dataset. However, public OpenAI products are governed by the + OpenAI Terms of Use, which may also apply to the OpenAI dataset. + The OpenAI Terms of Use prohibit the following: + (i) Illegal activities, such as child pornography, gambling, cybercrime, + piracy, violating copyright, trademark or other intellectual property laws; + (ii) Accessing or authorizing anyone to access the APIs from an embargoed + country, region, or territory as prohibited by the U.S. government; + (iii) Threatening, stalking, defaming, defrauding, degrading, victimizing + or intimidating anyone for any reason + [[Open AI Terms of Use]](https://openai.com/api/policies/terms/). monitoring: - explanation: "There are no known (internal or external) monitoring mechanisms\ - \ that are in place for the use of the GPT-3 dataset as of 2022-04-04.\n" value: unknown - name: GPT-3 dataset - organization: OpenAI - prohibited_uses: - explanation: "OpenAI didn't provide a list of prohibited uses specifically for\ - \ the GPT-3 dataset. However, public OpenAI products are governed by the OpenAI\ - \ Terms of Use, which may also apply to the OpenAI dataset. The OpenAI Terms\ - \ of Use prohibit the following: (i) Illegal activities, such as child pornography,\ - \ gambling, cybercrime, piracy, violating copyright, trademark or other intellectual\ - \ property laws; (ii) Accessing or authorizing anyone to access the APIs from\ - \ an embargoed country, region, or territory as prohibited by the U.S. government;\ - \ (iii) Threatening, stalking, defaming, defrauding, degrading, victimizing\ - \ or intimidating anyone for any reason [[Open AI Terms of Use]](https://openai.com/api/policies/terms/).\n" + explanation: > + There are no known (internal or external) monitoring mechanisms + that are in place for the use of the GPT-3 dataset as of 2022-04-04. + feedback: value: unknown - quality_control: "In addition to excluding low quality documents from the Common\ - \ Crawl dataset, the authors fuzzily deduplicated documents within each dataset,\ - \ by removing documents that have high overlap with each other. The same procedure\ - \ was followed to fuzzily deduplicate WebText from Common Crawl [[Appendix A]](https://arxiv.org/pdf/2005.14165.pdf#appendix.A).\ - \ Text occuring in benchmark datasets were also partially removed [[Appendix\ - \ C]](https://arxiv.org/pdf/2005.14165.pdf#appendix.C).\n" - sample: [] - size: 570 GB - type: dataset - url: https://arxiv.org/pdf/2005.14165.pdf -- access: - explanation: "HumanEval dataset is publicly available and comes with a an evaluation\ - \ framework [[HumanEval GitHub Repository]](https://www.github.com/openai/human-eval).\n" - value: open - analysis: none + explanation: > + There are no known (internal or external) feedback mechanisms for + the GPT-3 dataset as of 2022-04-04. + +- type: dataset + name: HumanEval + # General + organization: OpenAI + description: > + HumanEval is a dataset of 164 programming problems hand-written to evaluate + their Codex model. created_date: - explanation: "The date that Codex, the model evaluated on the HumanEval dataset,\ - \ was announced to the public [[OpenAI Blog Post]](https://openai.com/blog/openai-codex/).\n" value: 2021-08-10 + explanation: > + The date that Codex, the model evaluated on the HumanEval + dataset, was announced to the public + [[OpenAI Blog Post]](https://openai.com/blog/openai-codex/). + url: https://arxiv.org/pdf/2107.03374.pdf datasheet: - explanation: No datasheet available as of 2022-04-10. value: none - dependencies: [] - description: "HumanEval is a dataset of 164 programming problems hand-written\ - \ to evaluate their Codex model.\n" - excluded: "Code problems easily found on the internet.\n" - feedback: "Email the authors [[Codex Paper]](https://arxiv.org/pdf/2107.03374.pdf).\n" - included: "164 hand-written questions.\n" - intended_uses: "Evaluating code generation capabilities of models.\n" - license: - explanation: https://github.com/openai/human-eval/blob/master/LICENSE - value: MIT - modality: - explanation: code (Python) - value: code - monitoring: none - name: HumanEval - organization: OpenAI - prohibited_uses: none - quality_control: "The evaluation dataset was handwritten to ensure that the evaluation\ - \ problems do not exist in the Codex dataset [[Section 2.2]](https://arxiv.org/pdf/2107.03374.pdf#subsection.2.2).\n" + explanation: No datasheet available as of 2022-04-10. + modality: code (Python) + size: 214 KB sample: - "\n\ndef string_sequence(n: int) -> str:\n \"\"\" Return a string containing\ \ space-delimited numbers starting from 0 upto n inclusive.\n >>> string_sequence(0)\n\ @@ -125,680 +139,936 @@ \ 'seven', 'eight' and 'nine'.\n Return the string with numbers sorted\ \ from smallest to largest\n >>> sort_numbers('three one five')\n 'one\ \ three five'\n \"\"\"\n" - size: 214 KB - type: dataset - url: https://arxiv.org/pdf/2107.03374.pdf -- access: - explanation: "The dataset might have been made available to Microsoft as part\ - \ of OpenAI giving Microsoft access to its Codex model [GitHub Copilot](https://copilot.github.com/).\n" - value: closed - analysis: - explanation: The paper doesn't provide an analysis on the training dataset. - value: none + analysis: none + # Construction + dependencies: [] + license: + value: MIT + explanation: https://github.com/openai/human-eval/blob/master/LICENSE + included: > + 164 hand-written questions. + excluded: > + Code problems easily found on the internet. + quality_control: > + The evaluation dataset was handwritten to ensure that the evaluation + problems do not exist in the Codex dataset + [[Section 2.2]](https://arxiv.org/pdf/2107.03374.pdf#subsection.2.2). + # Downstream + access: + value: open + explanation: > + HumanEval dataset is publicly available and comes + with a an evaluation framework + [[HumanEval GitHub Repository]](https://www.github.com/openai/human-eval). + intended_uses: > + Evaluating code generation capabilities of models. + prohibited_uses: none + monitoring: none + feedback: > + Email the authors [[Codex Paper]](https://arxiv.org/pdf/2107.03374.pdf). + +- type: dataset + name: Codex dataset + # General + organization: OpenAI + description: > + The dataset used to train the Codex model. created_date: - explanation: "The date that Codex, the model trained on the Codex dataset, was\ - \ announced to the public [[OpenAI Blog Post]](https://openai.com/blog/openai-codex/).\n" value: 2021-08-10 + explanation: > + The date that Codex, the model trained on the Codex dataset, + was announced to the public + [[OpenAI Blog Post]](https://openai.com/blog/openai-codex/). + url: https://arxiv.org/pdf/2107.03374.pdf datasheet: none + modality: code + size: + value: 159 GB + explanation: As reported by the authors [[Section 3.1]](https://arxiv.org/pdf/2107.03374.pdf#subsection.3.1). + sample: [] + analysis: + value: none + explanation: The paper doesn't provide an analysis on the training dataset. + # Construction dependencies: [] - description: "The dataset used to train the Codex model.\n" - excluded: "Following were filtered from the dataset: autogenerated files; files\ - \ with average line length > 100, maximum line length > 1000, or few alphanumeric\ - \ characters [[Section 3.1]](https://arxiv.org/pdf/2107.03374.pdf#subsection.3.1).\n" - feedback: "Email the authors [[Codex Paper]](https://arxiv.org/pdf/2107.03374.pdf).\n" - included: "The dataset includes 54 million public software repositories hosted\ - \ on GitHub as of an unspecified date in May 2020 [[Section 3.1]](https://arxiv.org/pdf/2107.03374.pdf#subsection.3.1).\n" - intended_uses: Training language models on code. license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: code + explanation: > + The asset isn't released, and hence the license is unknown. + included: > + The dataset includes 54 million public software repositories hosted on + GitHub as of an unspecified date in May 2020 + [[Section 3.1]](https://arxiv.org/pdf/2107.03374.pdf#subsection.3.1). + excluded: > + Following were filtered from the dataset: autogenerated files; files with + average line length > 100, maximum line length > 1000, or few alphanumeric + characters + [[Section 3.1]](https://arxiv.org/pdf/2107.03374.pdf#subsection.3.1). + quality_control: > + Dataset was filtered using simple heuristics, as outlined in the excluded + field. + # Downstream + access: + value: closed + explanation: > + The dataset might have been made available to Microsoft + as part of OpenAI giving Microsoft access to its Codex model + [GitHub Copilot](https://copilot.github.com/). + intended_uses: Training language models on code. + prohibited_uses: unknown monitoring: unknown - name: Codex dataset + feedback: > + Email the authors + [[Codex Paper]](https://arxiv.org/pdf/2107.03374.pdf). + +- type: dataset + name: CLIP dataset + # General organization: OpenAI - prohibited_uses: unknown - quality_control: "Dataset was filtered using simple heuristics, as outlined in\ - \ the excluded field.\n" - sample: [] - size: - explanation: As reported by the authors [[Section 3.1]](https://arxiv.org/pdf/2107.03374.pdf#subsection.3.1). - value: 159 GB - type: dataset - url: https://arxiv.org/pdf/2107.03374.pdf -- access: - explanation: The dataset wasn't released to the public. - value: closed - analysis: "The dataset contained some overlap with the test sets of the benchmarks\ - \ used for evaluation, but the authors determined the impact to be small: \"\ - There is a median overlap of 2.2% and an average overlap of 3.2%. Due to this\ - \ small amount of overlap, overall accuracy is rarely shifted by more than 0.1%\ - \ with only 7 datasets above this threshold\" [[Section 5]](https://arxiv.org/pdf/2103.00020.pdf#section.5).\n" + description: > + CLIP dataset contains text-image pairs crawled from the internet. created_date: - explanation: "The date of the blog post announcing CLIP [[OpenAI Blog Post]](https://openai.com/blog/clip/).\n" value: 2021-01-05 + explanation: > + The date of the blog post announcing CLIP + [[OpenAI Blog Post]](https://openai.com/blog/clip/). + url: https://arxiv.org/pdf/2103.00020.pdf datasheet: none + modality: text (English) and image + size: 400M (image, text) pairs + sample: [] + analysis: > + The dataset contained some overlap with the test sets of the benchmarks used + for evaluation, but the authors determined the impact to be small: "There + is a median overlap of 2.2% and an average overlap of 3.2%. Due to this + small amount of overlap, overall accuracy is rarely shifted by more than + 0.1% with only 7 datasets above this threshold" + [[Section 5]](https://arxiv.org/pdf/2103.00020.pdf#section.5). + # Construction dependencies: [] - description: "CLIP dataset contains text-image pairs crawled from the internet.\n" - excluded: none - feedback: - explanation: The feedback mechanisms in place are unknown. + license: value: unknown - included: "Data crawled from the internet, without any filtering (including de-duplication)\ - \ or curation.\n" + explanation: > + The asset doesn't have a known license as it hasn't been publicly + released. + included: > + Data crawled from the internet, without any filtering (including + de-duplication) or curation. + excluded: none + quality_control: > + The data was "only crawled websites that had policies against excessively + violent and adult images and allowed us to filter out such content" + [[Model Card]](https://github.com/openai/CLIP/blob/main/model-card.md). + # Downstream + access: + value: closed + explanation: The dataset wasn't released to the public. intended_uses: Training multimodal vision models. - license: - explanation: "The asset doesn't have a known license as it hasn't been publicly\ - \ released.\n" + prohibited_uses: value: unknown - modality: - explanation: text (English) and image - value: image, text + explanation: The prohibited uses of the dataset are unknown. monitoring: + value: unknown explanation: The monitoring mechanisms in place are unknown. + feedback: value: unknown - name: CLIP dataset + explanation: The feedback mechanisms in place are unknown. + +- type: dataset + name: DALL·E dataset + # General organization: OpenAI - prohibited_uses: - explanation: The prohibited uses of the dataset are unknown. - value: unknown - quality_control: "The data was \"only crawled websites that had policies against\ - \ excessively violent and adult images and allowed us to filter out such content\"\ - \ [[Model Card]](https://github.com/openai/CLIP/blob/main/model-card.md).\n" + description: > + DALL·E dataset is the training set consisting of image and text pairs + collected to train the DALL·E model. + created_date: + value: 2021-01-05 + explanation: > + The date of the blog post announcing DALL·E + [[OpenAI Blog Post]](https://openai.com/blog/dall-e/). + url: https://arxiv.org/abs/2102.12092 + datasheet: none + modality: text (English) and image + size: > + 250M (image, text) pairs sample: [] - size: 400M (image, text) pairs - type: dataset - url: https://arxiv.org/pdf/2103.00020.pdf -- access: - explanation: The dataset wasn't released to the public. - value: closed analysis: The authors found that the dataset contained 21% of the images in the MS-COCO validation set, but observed no significant changes in the performance of the accompanying DALL·E when tested on MS-COCO evaluation set with and without the said images [[Section 3.1]](https://arxiv.org/pdf/2102.12092.pdf#subsection.3.1). - created_date: - explanation: "The date of the blog post announcing DALL·E [[OpenAI Blog Post]](https://openai.com/blog/dall-e/).\n" - value: 2021-01-05 - datasheet: none + # Construction dependencies: [] - description: "DALL·E dataset is the training set consisting of image and text\ - \ pairs collected to train the DALL·E model.\n" - excluded: "MS-COCO was excluded from the dataset, but because MS-COCO was created\ - \ from YFCC100M, some of the test images (not the captions) were included.\n" - feedback: - explanation: The feedback mechanisms in place are unknown. + license: value: unknown - included: "Data from the internet, including Conceptual Captions and a filtered\ - \ subset of YFCC100M.\n" + explanation: > + The asset doesn't have a known license as it hasn't been publicly + released. + included: > + Data from the internet, including Conceptual Captions and a filtered subset + of YFCC100M. + excluded: > + MS-COCO was excluded from the dataset, but because MS-COCO was created from + YFCC100M, some of the test images (not the captions) were included. + quality_control: > + The data was de-duplicated + [[Section 3.2]](https://arxiv.org/pdf/2102.12092.pdf#subsection.3.2). + The data collected from the internet was filtered using image, text and + joint image and text filters, which included: "discarding instances whose + captions are too short, are classified as non-English by the Python package + cld3, or that consist primarily of boilerplate phrases such as “photographed + on ”, where matches various formats for dates that we found in + the data". The authors also discard "instances whose images have aspect + ratios not in [1/2, 2]" + [[Appendix C]](https://arxiv.org/pdf/2102.12092.pdf#appendix.C). + # Downstream + access: + value: closed + explanation: The dataset wasn't released to the public. intended_uses: Training multimodal vision models. - license: - explanation: "The asset doesn't have a known license as it hasn't been publicly\ - \ released.\n" + prohibited_uses: value: unknown - modality: - explanation: text (English) and image - value: image, text + explanation: The prohibited uses of the dataset are unknown. monitoring: + value: unknown explanation: The monitoring mechanisms in place are unknown. + feedback: value: unknown - name: DALL·E dataset + explanation: The feedback mechanisms in place are unknown. + +- type: dataset + name: Whisper dataset + # General organization: OpenAI - prohibited_uses: - explanation: The prohibited uses of the dataset are unknown. - value: unknown - quality_control: "The data was de-duplicated [[Section 3.2]](https://arxiv.org/pdf/2102.12092.pdf#subsection.3.2).\ - \ The data collected from the internet was filtered using image, text and joint\ - \ image and text filters, which included: \"discarding instances whose captions\ - \ are too short, are classified as non-English by the Python package cld3, or\ - \ that consist primarily of boilerplate phrases such as “photographed on ”,\ - \ where matches various formats for dates that we found in the data\"\ - . The authors also discard \"instances whose images have aspect ratios not in\ - \ [1/2, 2]\" [[Appendix C]](https://arxiv.org/pdf/2102.12092.pdf#appendix.C).\n" - sample: [] - size: "250M (image, text) pairs\n" - type: dataset - url: https://arxiv.org/abs/2102.12092 -- access: - explanation: "The dataset isn't released to the public.\n" - value: closed - analysis: "The Whisper paper provides limited details on preprocessing.\n" + description: > + The Whisper dataset is the speech corpus that was used to train the Whisper + model. Information on the dataset is limited to discussion in the + paper introducing Whisper. + [[Section 2.1]](https://cdn.openai.com/papers/whisper.pdf). created_date: - explanation: "The date for the public announcement of Whisper. The dataset didn't\ - \ have a specific release date separate from the model [[Open AI Blog Post]](https://openai.com/blog/whisper/).\n" value: 2022-09-21 + explanation: > + The date for the public announcement of Whisper. The + dataset didn't have a specific release date separate from the model + [[Open AI Blog Post]](https://openai.com/blog/whisper/). + url: https://cdn.openai.com/papers/whisper.pdf datasheet: - explanation: No datasheet available as of 2022-12-07. value: none + explanation: No datasheet available as of 2022-12-07. + modality: speech + size: 680k hours + sample: [] + analysis: > + The Whisper paper provides limited details on preprocessing. + # Construction dependencies: [] - description: "The Whisper dataset is the speech corpus that was used to train\ - \ the Whisper model. Information on the dataset is limited to discussion in\ - \ the paper introducing Whisper. [[Section 2.1]](https://cdn.openai.com/papers/whisper.pdf).\n" - excluded: "Automated filtering was conducted.\n" - feedback: - explanation: "There are no known (internal or external) feedback mechanisms\ - \ for the dataset as of 2022-12-07.\n" - value: unknown - included: "The dataset is composed three major sources: multilingual speech recognition\ - \ (17%), translation (18%), and English speech recognition (65%). [[Figure 11]](https://cdn.openai.com/papers/whisper.pdf).\n" - intended_uses: The intended use is to train speech models. license: - explanation: "The asset doesn't have a known license as it hasn't been publicly\ - \ released.\n" value: unknown - modality: - explanation: speech - value: audio + explanation: > + The asset doesn't have a known license as it hasn't been publicly + released. + included: > + The dataset is composed three major sources: multilingual speech recognition + (17%), translation (18%), and English speech recognition (65%). + [[Figure 11]](https://cdn.openai.com/papers/whisper.pdf). + excluded: > + Automated filtering was conducted. + quality_control: > + In addition to filtering, basic text standardization was done. + # Downstream + access: + value: closed + explanation: > + The dataset isn't released to the public. + intended_uses: The intended use is to train speech models. + prohibited_uses: + value: none + explanation: > + No uses are explicitly prohibited. monitoring: - explanation: "There are no known (internal or external) monitoring mechanisms\ - \ that are in place for the use of the dataset as of 2022-12-07.\n" value: none - name: Whisper dataset + explanation: > + There are no known (internal or external) monitoring mechanisms + that are in place for the use of the dataset as of 2022-12-07. + feedback: + value: unknown + explanation: > + There are no known (internal or external) feedback mechanisms for + the dataset as of 2022-12-07. + +- type: dataset + name: WebText + # General organization: OpenAI - prohibited_uses: - explanation: "No uses are explicitly prohibited.\n" - value: none - quality_control: "In addition to filtering, basic text standardization was done.\n" - sample: [] - size: 680k hours - type: dataset - url: https://cdn.openai.com/papers/whisper.pdf -- access: closed - analysis: '' + description: '' created_date: + value: 2019-11-01 explanation: Due to the lack of information about the exact date, it is assumed to be the 1st of the known month of creation. - value: 2019-11-01 + url: https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf datasheet: '' + modality: text (English) + size: 40 GB + sample: [] + analysis: '' + # Construction dependencies: [] - description: '' - excluded: '' - feedback: '' - included: '' - intended_uses: '' license: - explanation: "The asset doesn't have a known license as it hasn't been publicly\ - \ released.\n" value: unknown - modality: {} + explanation: > + The asset doesn't have a known license as it hasn't been publicly + released. + included: '' + excluded: '' + quality_control: '' + # Downstream + access: closed + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: WebText + feedback: '' + +# Models +- type: model + name: GPT-2 + # General organization: OpenAI - prohibited_uses: '' - quality_control: '' - sample: [] - size: 40 GB - type: dataset - url: https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf -- access: open - analysis: '' + description: '' created_date: + value: 2019-11-01 explanation: Due to the lack of information about the exact date, it is assumed to be the 1st of the known month of creation. - value: 2019-11-01 + url: https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf + model_card: https://github.com/openai/gpt-2/blob/master/model_card.md + modality: text (English) + size: 1.5B parameters (dense model) + analysis: '' + # Construction dependencies: [WebText] - description: '' - feedback: '' - intended_uses: '' + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + # Downstream + access: open license: - explanation: "GPT-2 uses a [[modified MIT License]](https://github.com/openai/gpt-2/blob/master/LICENSE).\n" value: Modified MIT License - modality: {} - model_card: https://github.com/openai/gpt-2/blob/master/model_card.md + explanation: > + GPT-2 uses a + [[modified MIT License]](https://github.com/openai/gpt-2/blob/master/LICENSE). + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: GPT-2 + feedback: '' + +- type: model + name: GPT-3 + # General organization: OpenAI - prohibited_uses: '' - quality_control: '' - size: '' - training_emissions: '' - training_hardware: '' - training_time: '' - type: model - url: https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf -- access: - explanation: "The GPT-3 model isn't fully released to the public, but it was\ - \ made available to Microsoft through the licencing agreement between OpenAI\ - \ and Microsoft [[OpenAI Blog Post]] (https://openai.com/blog/openai-licenses-gpt-3-technology-to-microsoft/).\ - \ The public can access the model through the Open AI API, which is available\ - \ in supported countries [[Supported Countries]](https://beta.openai.com/docs/supported-countries)\ - \ [[OpenAI API]](https://openai.com/api/).\n" - value: limited - analysis: "The GPT-3 model was evaluated on language modeling, closed-book question\ - \ answering, translation, Winograd-style tasks, commonsense reasoning, reading\ - \ comprehension, SuperGLUE, NLI, synthetic tasks, and generation [[Section 4]](https://arxiv.org/pdf/2005.14165.pdf#section.4);\ - \ as well as on fairness and biases [[Section 6]](https://arxiv.org/pdf/2005.14165.pdf#section.6).\n" + description: > + GPT-3 is an autoregressive language model. created_date: - explanation: "The date that GPT-3 was announced to the public [[OpenAI Blog\ - \ Post]](https://openai.com/blog/openai-api/).\n" value: 2020-06-11 - dependencies: [GPT-3 dataset] - description: "GPT-3 is an autoregressive language model.\n" - feedback: "Feedback for GPT-3 can be provided on the feedback form linked in the\ - \ model card [[Model Card]](https://github.com/openai/gpt-3/blob/master/model-card.md).\ - \ The form is especially meant to collect feedback on concerns about misuse,\ - \ synthetic text detection, bias, and risk of generative language models.\n" - intended_uses: "GPT-3 was intended to be use through the OpenAI API by developers\ - \ for language applications. Other intended use of GPT-3 include researchers\ - \ accessing the model through the API to study its paradigms [[Model Card]](https://github.com/openai/gpt-3/blob/master/model-card.md).\n" - license: - explanation: "The asset doesn't have a known license as it hasn't been publicly\ - \ released.\n" - value: unknown - modality: - explanation: text (English) - value: text; text + explanation: > + The date that GPT-3 was announced to the public + [[OpenAI Blog Post]](https://openai.com/blog/openai-api/). + url: https://arxiv.org/pdf/2005.14165.pdf model_card: https://github.com/openai/gpt-3/blob/master/model-card.md - monitoring: "OpenAI reviews all use cases of the model [[Model Card]](https://github.com/openai/gpt-3/blob/master/model-card.md).\n" - name: GPT-3 - organization: OpenAI - prohibited_uses: "Access to GPT-3 is governed by Open AI API Usage Guidelines\ - \ and API Terms of Use, prohibiting the use of the API in a way that causes\ - \ societal harm. [[Usage Guidelines]] (https://beta.openai.com/docs/usage-guidelines/content-policy)\ - \ [[Terms of Use]](https://openai.com/api/policies/terms/). The list of disallowed\ - \ applications can be found in the usage guidelines [[Disallowed Applications]]\ - \ (https://beta.openai.com/docs/usage-guidelines/disallowed-applications).\n" - quality_control: "One quality control method OpenAI employed was releasing GPT-3\ - \ only through the OpenAI API. OpenAI states that it is easier to respond to\ - \ misuse when the access to the model is gated through the API. It also hints\ - \ that it plans to broaden the API access over time based on the amount of misuse\ - \ [[OpenAI API Blog Post]](https://openai.com/blog/openai-api/). The authors\ - \ identify potential misuses of GPT-3 in the paper and analyze it for fairness,\ - \ bias and representation issues, but do not identify mitigation strategies\ - \ [[Section 6]](https://arxiv.org/pdf/2005.14165.pdf#section.6).\n" - size: 175B parameters (dense) + modality: text (English) + size: + value: 175B parameters (dense model) + explanation: > + GPT-3 comes in several sizes. Here we report the size of the Davinci + model, the largest GPT-3 model served through the OpenAI API. Sizes of the + other models can be found in the paper + [[Table 2.1]](https://arxiv.org/pdf/2005.14165.pdf#table.caption.7). + analysis: > + The GPT-3 model was evaluated on language modeling, closed-book question + answering, translation, Winograd-style tasks, commonsense reasoning, + reading comprehension, SuperGLUE, NLI, synthetic tasks, and generation + [[Section 4]](https://arxiv.org/pdf/2005.14165.pdf#section.4); + as well as on fairness and biases + [[Section 6]](https://arxiv.org/pdf/2005.14165.pdf#section.6). + # Construction + dependencies: [GPT-3 dataset] training_emissions: - explanation: "Estimate of the CO2(e) emissions for GPT-3 were not provided by\ - \ OpenAI, but they were provided by a follow up work investigating the CO2\ - \ equivalent emissions (CO2e) of GPT-3 [[Patterson et al.]] (https://arxiv.org/ftp/arxiv/papers/2104/2104.10350.pdf).\n" value: 552.1 tCO2e - training_hardware: - explanation: "The original paper doesn't specify the training hardware for GPT-3,\ - \ but a follow up blog post indicates that it was trained on a cluster on\ - \ Azure cluster, using 10000 GPUs with 400 Gbps [[Microsoft Blog Post]] (https://blogs.microsoft.com/ai/openai-azure-supercomputer/).\n" - value: Azure + explanation: > + Estimate of the CO2(e) emissions for GPT-3 were not provided + by OpenAI, but they were provided by a follow up work investigating the CO2 + equivalent emissions (CO2e) of GPT-3 + [[Patterson et al.]] + (https://arxiv.org/ftp/arxiv/papers/2104/2104.10350.pdf). training_time: - explanation: "The time required to train different sized GPT-3 models are listed\ - \ in [[Table D.1]](https://arxiv.org/pdf/2005.14165.pdf#table.caption.50).\ - \ The time required to train the GPT-3 model with 175B parameters is reported\ - \ as 3.64E+03 petaflop/s-days.\n" value: 3640 petaflop/s-days - type: model - url: https://arxiv.org/pdf/2005.14165.pdf -- access: - explanation: "The model is made available via the OpenAI API [[OpenAI API]](https://openai.com/api/)\ - \ as code-cushman-001 according to the [[Model Index]](https://platform.openai.com/docs/model-index-for-researchers).\n" + explanation: > + The time required to train different sized GPT-3 models are listed in + [[Table D.1]](https://arxiv.org/pdf/2005.14165.pdf#table.caption.50). + The time required to train the GPT-3 model with 175B parameters is + reported as 3.64E+03 petaflop/s-days. + training_hardware: + value: Azure + explanation: > + The original paper doesn't specify the training hardware for GPT-3, + but a follow up blog post indicates that it was trained on a cluster on + Azure cluster, using 10000 GPUs with 400 Gbps + [[Microsoft Blog Post]] + (https://blogs.microsoft.com/ai/openai-azure-supercomputer/). + quality_control: > + One quality control method OpenAI employed was releasing GPT-3 only through + the OpenAI API. OpenAI states that it is easier to respond to misuse when + the access to the model is gated through the API. It also hints that it + plans to broaden the API access over time based on the amount of misuse + [[OpenAI API Blog Post]](https://openai.com/blog/openai-api/). + The authors identify potential misuses of GPT-3 in the paper and analyze + it for fairness, bias and representation issues, but do not identify + mitigation strategies + [[Section 6]](https://arxiv.org/pdf/2005.14165.pdf#section.6). + # Downstream + access: value: limited - analysis: "The model was evaluated using the HumanEval dataset with pass@k metric\ - \ and BLEU scores [[Section 2]](https://arxiv.org/pdf/2107.03374.pdf#section.2).\n" - created_date: - explanation: "The date that Codex was announced to the public [[OpenAI Blog\ - \ Post]](https://openai.com/blog/openai-codex/).\n" - value: 2021-08-10 - dependencies: [GPT-3, Codex dataset, HumanEval] - description: "Codex is a GPT language model fine-tuned on publicly available code\ - \ from GitHub.\n" - feedback: "Email the authors [[Codex Paper]](https://arxiv.org/pdf/2107.03374.pdf).\n" - intended_uses: "Codex is intended to be used for coding related language modelling\ - \ tasks.\n" + explanation: > + The GPT-3 model isn't fully released to the public, but + it was made available to Microsoft through the licencing agreement between + OpenAI and Microsoft + [[OpenAI Blog Post]] + (https://openai.com/blog/openai-licenses-gpt-3-technology-to-microsoft/). + The public can access the model through the Open AI API, which is available + in supported countries + [[Supported Countries]](https://beta.openai.com/docs/supported-countries) + [[OpenAI API]](https://openai.com/api/). license: - explanation: "The asset doesn't have a known license as it hasn't been publicly\ - \ released.\n" - value: unknown - modality: - explanation: text (English) and code - value: code, text; code, text - model_card: none - monitoring: - explanation: "There isn't any known monitoring in place for the model, but there\ - \ may be internal mechanisms.\n" value: unknown + explanation: > + The asset doesn't have a known license as it hasn't been publicly + released. + intended_uses: > + GPT-3 was intended to be use through the OpenAI API by developers for + language applications. Other intended use of GPT-3 include researchers + accessing the model through the API to study its paradigms + [[Model Card]](https://github.com/openai/gpt-3/blob/master/model-card.md). + prohibited_uses: > + Access to GPT-3 is governed by Open AI API Usage Guidelines and API Terms + of Use, prohibiting the use of the API in a way that causes societal harm. + [[Usage Guidelines]] + (https://beta.openai.com/docs/usage-guidelines/content-policy) + [[Terms of Use]](https://openai.com/api/policies/terms/). + The list of disallowed applications can be found in the usage guidelines + [[Disallowed Applications]] + (https://beta.openai.com/docs/usage-guidelines/disallowed-applications). + monitoring: > + OpenAI reviews all use cases of the model + [[Model Card]](https://github.com/openai/gpt-3/blob/master/model-card.md). + feedback: > + Feedback for GPT-3 can be provided on the feedback form linked in the + model card + [[Model Card]](https://github.com/openai/gpt-3/blob/master/model-card.md). + The form is especially meant to collect feedback on concerns about misuse, + synthetic text detection, bias, and risk of generative language models. + +- type: model name: Codex + # General organization: OpenAI - prohibited_uses: - explanation: The prohibited uses of the model aren't specified. - value: unknown - quality_control: "The model wasn't fully released to the public as a quality control\ - \ measure. The authors identify potential risks of Codex in their paper due\ - \ to the following: over-reliance, misalignment, bias and representation, economic\ - \ and labor market impacts, security implications, environmental impact and\ - \ legal implications. They also make suggestions for some of these, but do not\ - \ implement them in Codex [[Section 7]](https://arxiv.org/pdf/2107.03374.pdf#section.7).\n" - size: 12B parameters (dense) + description: > + Codex is a GPT language model fine-tuned on publicly available code from + GitHub. + created_date: + value: 2021-08-10 + explanation: > + The date that Codex was announced to the public + [[OpenAI Blog Post]](https://openai.com/blog/openai-codex/). + url: https://arxiv.org/pdf/2107.03374.pdf + model_card: none + modality: text (English) and code + size: 12B parameters (dense model) + analysis: > + The model was evaluated using the HumanEval dataset with pass@k metric and + BLEU scores + [[Section 2]](https://arxiv.org/pdf/2107.03374.pdf#section.2). + # Construction + dependencies: [GPT-3, Codex dataset, HumanEval] training_emissions: - explanation: Authors do not report the training emissions. value: unknown - training_hardware: - explanation: "The paper specifies that Azure was used, but the underlying architecture\ - \ wasn't specified.\n" - value: Azure + explanation: Authors do not report the training emissions. training_time: - explanation: "Authors estimate hundreds of petaflop/s-days of compute [[Section\ - \ 7.6]](https://arxiv.org/pdf/2107.03374.pdf#subsection.7.6), but don't provide\ - \ an exact number.\n" value: 100-1000 petaflop/s-days - type: model - url: https://arxiv.org/pdf/2107.03374.pdf -- access: - explanation: "The model is made available via the OpenAI API but the specific\ - \ endpoint (davinci-instruct-beta) is currently unavailable. [[Model Index]](https://platform.openai.com/docs/model-index-for-researchers).\n" - value: closed - analysis: The model was evaluated on human ratings to the InstructGPT answers - to the prompts submitted to the OpenAI API as well as on public NLP datasets - spanning truthfulness, toxicity, and bias, question answering, reading comprehension, - and summarization tasks. - created_date: - explanation: "Date of the public announcement introducing InstructGPT [[OpenAI\ - \ Blog Post]] (https://openai.com/blog/instruction-following/).\n" - value: 2022-01-27 - dependencies: [GPT-3, OpenAI API] - description: "InstructGPT is a family of GPT-3 based models fine-tuned on human\ - \ feedback, which allows for better instruction following capabilities than\ - \ GPT-3.\n" - feedback: "Email the authors [[InstructGPT Paper]](https://arxiv.org/pdf/2203.02155.pdf).\n" - intended_uses: "As stated in the model card: \"The intended direct users of InstructGPT\ - \ are developers who access its capabilities via the OpenAI API. Through the\ - \ OpenAI API, the model can be used by those who may not have AI development\ - \ experience, to build and explore language modeling systems across a wide range\ - \ of functions. We also anticipate that the model will continue to be used by\ - \ researchers to better understand the behaviors, capabilities, biases, and\ - \ constraints of large-scale language models\" [[Model Card]](https://github.com/openai/following-instructions-human-feedback/blob/main/model-card.md).\n" + explanation: > + Authors estimate hundreds of petaflop/s-days of compute + [[Section 7.6]](https://arxiv.org/pdf/2107.03374.pdf#subsection.7.6), but + don't provide an exact number. + training_hardware: + value: Azure + explanation: > + The paper specifies that Azure was used, but the underlying + architecture wasn't specified. + quality_control: > + The model wasn't fully released to the public as a quality control measure. + The authors identify potential risks of Codex in their paper due to the + following: over-reliance, misalignment, bias and representation, economic + and labor market impacts, security implications, environmental impact and + legal implications. They also make suggestions for some of these, but do not + implement them in Codex + [[Section 7]](https://arxiv.org/pdf/2107.03374.pdf#section.7). + # Downstream + access: + value: limited + explanation: > + The model is made available via the OpenAI API + [[OpenAI API]](https://openai.com/api/) as code-cushman-001 according to the + [[Model Index]](https://platform.openai.com/docs/model-index-for-researchers). license: - explanation: "The asset doesn't have a known license as it hasn't been publicly\ - \ released.\n" value: unknown - modality: - explanation: text (English) and code - value: code, text; code, text - model_card: https://github.com/openai/following-instructions-human-feedback/blob/main/model-card.md + explanation: > + The asset doesn't have a known license as it hasn't been publicly + released. + intended_uses: > + Codex is intended to be used for coding related language modelling tasks. + prohibited_uses: + value: unknown + explanation: The prohibited uses of the model aren't specified. monitoring: - explanation: "There isn't any known monitoring in place for the model, but there\ - \ may be internal mechanisms.\n" value: unknown + explanation: > + There isn't any known monitoring in place for the model, but there + may be internal mechanisms. + feedback: > + Email the authors + [[Codex Paper]](https://arxiv.org/pdf/2107.03374.pdf). + +- type: model name: InstructGPT + # General organization: OpenAI - prohibited_uses: "Access to InstructGPT is governed by Open AI API Usage Guidelines\ - \ and API Terms of Use, prohibiting the use of the API in a way that causes\ - \ societal harm. [[Usage Guidelines]] (https://beta.openai.com/docs/usage-guidelines/content-policy)\ - \ [[Terms of Use]](https://openai.com/api/policies/terms/). The list of disallowed\ - \ applications can be found in the usage guidelines [[Disallowed Applications]]\ - \ (https://beta.openai.com/docs/usage-guidelines/disallowed-applications).\n" - quality_control: "The model wasn't fully released to the public as a quality control\ - \ measure.\n" - size: 175B parameters (dense) - training_emissions: - explanation: The authors do not estimate the emissions of the model. - value: unknown - training_hardware: - explanation: The authors do not disclose the training hardware used. - value: unknown - training_time: - explanation: "175B SFT model required 4.9 petaflops/s-days; 175B PPO-ptx model\ - \ required 60 petaflops/s-days [[Section 5]](https://arxiv.org/pdf/2203.02155.pdf#section.5).\n" - value: 60 petaflops/s-days - type: model - url: https://arxiv.org/pdf/2203.02155.pdf -- access: - explanation: "Models are available at [https://github.com/openai/whisper](https://github.com/openai/whisper).\n" - value: open - analysis: The model was evaluated for zero-shot English and multingual speech - recognition, translation, language identification and robustness to noise. + description: > + InstructGPT is a family of GPT-3 based models fine-tuned on human feedback, + which allows for better instruction following capabilities than GPT-3. created_date: - explanation: "The date that Whisper was announced to the public [[OpenAI Blog\ - \ Post]](https://openai.com/blog/whisper/).\n" - value: 2022-09-21 - dependencies: [Whisper dataset] - description: "GPT-3 is a speech recognition model.\n" - feedback: "The discussions page of the codebase is not formally cited as a place\ - \ for feedback, but is being used in this way [[Discussions page]](https://github.com/openai/whisper/discussions)\n" - intended_uses: "Whisper is a general-purpose speech recognition model; it is a\ - \ multi-task model that can perform multilingual speech recognition as well\ - \ as speech translation and language identification.\n" + value: 2022-01-27 + explanation: > + Date of the public announcement introducing InstructGPT + [[OpenAI Blog Post]] (https://openai.com/blog/instruction-following/). + url: https://arxiv.org/pdf/2203.02155.pdf + model_card: https://github.com/openai/following-instructions-human-feedback/blob/main/model-card.md + modality: text (English) and code + size: + value: 175B parameters (dense model) + explanation: Size of the largest InstructGPT model. + analysis: The model was evaluated on human ratings to the InstructGPT answers + to the prompts submitted to the OpenAI API as well as on public NLP datasets + spanning truthfulness, toxicity, and bias, question answering, reading comprehension, + and summarization tasks. + # Construction + dependencies: [GPT-3, OpenAI API] + training_emissions: + value: unknown + explanation: The authors do not estimate the emissions of the model. + training_time: + value: 60 petaflops/s-days + explanation: > + 175B SFT model required 4.9 petaflops/s-days; 175B PPO-ptx model required + 60 petaflops/s-days + [[Section 5]](https://arxiv.org/pdf/2203.02155.pdf#section.5). + training_hardware: + value: unknown + explanation: The authors do not disclose the training hardware used. + quality_control: > + The model wasn't fully released to the public as a quality control measure. + # Downstream + access: + value: closed + explanation: > + The model is made available via the OpenAI API but the specific endpoint (davinci-instruct-beta) + is currently unavailable. + [[Model Index]](https://platform.openai.com/docs/model-index-for-researchers). license: - explanation: https://github.com/openai/whisper](https://github.com/openai/whisper - value: MIT - modality: - explanation: Speech (multilingual, though primarily English) - value: audio; audio - model_card: https://github.com/openai/whisper/blob/main/model-card.md + value: unknown + explanation: > + The asset doesn't have a known license as it hasn't been publicly + released. + intended_uses: > + As stated in the model card: "The intended direct users of InstructGPT are + developers who access its capabilities via the OpenAI API. Through the + OpenAI API, the model can be used by those who may not have AI development + experience, to build and explore language modeling systems across a wide + range of functions. We also anticipate that the model will continue to be + used by researchers to better understand the behaviors, capabilities, + biases, and constraints of large-scale language models" + [[Model Card]](https://github.com/openai/following-instructions-human-feedback/blob/main/model-card.md). + prohibited_uses: > + Access to InstructGPT is governed by Open AI API Usage Guidelines and API + Terms + of Use, prohibiting the use of the API in a way that causes societal harm. + [[Usage Guidelines]] + (https://beta.openai.com/docs/usage-guidelines/content-policy) + [[Terms of Use]](https://openai.com/api/policies/terms/). + The list of disallowed applications can be found in the usage guidelines + [[Disallowed Applications]] + (https://beta.openai.com/docs/usage-guidelines/disallowed-applications). monitoring: - explanation: "No monitoring is mentioned by the authors.\n" - value: none + value: unknown + explanation: > + There isn't any known monitoring in place for the model, but there + may be internal mechanisms. + feedback: > + Email the authors [[InstructGPT Paper]](https://arxiv.org/pdf/2203.02155.pdf). + +- type: model name: Whisper + # General organization: OpenAI - prohibited_uses: unknown - quality_control: No specific quality control methods are documented. - size: 1.5B parameters (dense) + description: > + GPT-3 is a speech recognition model. + created_date: + value: 2022-09-21 + explanation: > + The date that Whisper was announced to the public + [[OpenAI Blog Post]](https://openai.com/blog/whisper/). + url: https://cdn.openai.com/papers/whisper.pdf + model_card: https://github.com/openai/whisper/blob/main/model-card.md + modality: Speech (multilingual, though primarily English) + size: + value: 1.5B parameters (dense model) + explanation: > + Whisper comes in several sizes (39M, 74M, 244M, 769M, 1.55B). Here we report + the size of the largest + model [[Table 1]](https://cdn.openai.com/papers/whisper.pdf). + analysis: The model was evaluated for zero-shot English and multingual speech + recognition, translation, language identification and robustness to noise. + # Construction + dependencies: [Whisper dataset] training_emissions: - explanation: "Authors do not report the training emissions.\n" value: unknown - training_hardware: - explanation: "The original paper doesn't specify the training hardware for Whisper,\ - \ but we expect it is trained on Azure given other models from OpenAI are\ - \ trained using Azure [[Microsoft Blog Post]] (https://blogs.microsoft.com/ai/openai-azure-supercomputer/).\n" - value: Azure + explanation: > + Authors do not report the training emissions. training_time: - explanation: "Authors do not report the training time.\n" value: unknown - type: model - url: https://cdn.openai.com/papers/whisper.pdf -- access: - explanation: "Model checkpoints and the helper code can be accessed at the official\ - \ CLIP repository [[CLIP Repository]](https://github.com/openai/CLIP).\n" + explanation: > + Authors do not report the training time. + training_hardware: + value: Azure + explanation: > + The original paper doesn't specify the training hardware for Whisper, + but we expect it is trained on Azure given other models from OpenAI are trained + using Azure + [[Microsoft Blog Post]] + (https://blogs.microsoft.com/ai/openai-azure-supercomputer/). + quality_control: No specific quality control methods are documented. + # Downstream + access: value: open - analysis: The model was evaluated on standard vision datasets (e.g. CIFAR10, ImageNet) - and showed robust state of the art results. - created_date: - explanation: "The date of the blog post announcing CLIP [[OpenAI Blog Post]](https://openai.com/blog/clip/).\n" - value: 2021-01-05 - dependencies: [CLIP dataset] - description: "\"CLIP (Contrastive Language-Image Pre-Training) is a neural network\ - \ trained on a variety of (image, text) pairs. It can be instructed in natural\ - \ language to predict the most relevant text snippet, given an image, without\ - \ directly optimizing for the task, similarly to the zero-shot capabilities\ - \ of GPT-2 and 3. We found CLIP matches the performance of the original ResNet50\ - \ on ImageNet “zero-shot” without using any of the original 1.28M labeled examples,\ - \ overcoming several major challenges in computer vision\" [[CLIP Repository]](https://github.com/openai/CLIP).\n" - feedback: Questions can be shared at the feedback form linked in the CLIP model - card [[Model Card]](https://github.com/openai/CLIP/blob/main/model-card.mdlicen). - intended_uses: "The model is intended to be used by AI researchers to better understand\ - \ \"robustness, generalization, and other capabilities, biases, and constraints\ - \ of computer vision models\" [[CLIP Model Card]](https://github.com/openai/CLIP/blob/main/model-card.md).\n" - license: MIT - modality: - explanation: Text (English) and Image - value: image, text; image, text - model_card: https://github.com/openai/CLIP/blob/main/model-card.md + explanation: > + Models are available at [https://github.com/openai/whisper](https://github.com/openai/whisper). + license: + value: MIT + explanation: https://github.com/openai/whisper](https://github.com/openai/whisper + intended_uses: > + Whisper is a general-purpose speech recognition model; it is a multi-task + model that can perform multilingual speech recognition as well as speech + translation and language identification. + prohibited_uses: unknown monitoring: - explanation: There are no monitoring mechanisms in place for CLIP. value: none + explanation: > + No monitoring is mentioned by the authors. + feedback: > + The discussions page of the codebase is not formally cited as a place + for feedback, but is being used in this way [[Discussions page]](https://github.com/openai/whisper/discussions) + +- type: model name: CLIP + # General organization: OpenAI - prohibited_uses: "\"Any deployed use case of the model - whether commercial or\ - \ not - is currently out of scope. Non-deployed use cases such as image search\ - \ in a constrained environment, are also not recommended unless there is thorough\ - \ in-domain testing of the model with a specific, fixed class taxonomy. This\ - \ is because our safety assessment demonstrated a high need for task specific\ - \ testing especially given the variability of CLIP’s performance with different\ - \ class taxonomies. This makes untested and unconstrained deployment of the\ - \ model in any use case currently potentially harmful.\nCertain use cases which\ - \ would fall under the domain of surveillance and facial recognition are always\ - \ out-of-scope regardless of performance of the model. This is because the use\ - \ of artificial intelligence for tasks such as these can be premature currently\ - \ given the lack of testing norms and checks to ensure its fair use.\nSince\ - \ the model has not been purposefully trained in or evaluated on any languages\ - \ other than English, its use should be limited to English language use cases\"\ - \ [[Model Card]](https://github.com/openai/CLIP/blob/main/model-card.mdlicen).\n" - quality_control: "The authors found that the performance of the model depended\ - \ heavily on which classes are included (and excluded) for a given task. They\ - \ reported significant race and gender based disparities on the Fairface dataset,\ - \ depending on how the classes were constructed. The authors also demonstrated\ - \ that the model was capable of racial profiling with high accuracy [[Section\ - \ 7]](https://arxiv.org/pdf/2103.00020.pdf#section.7).\n" - size: unknown - training_emissions: unknown - training_hardware: NVIDIA V100 GPUs - training_time: - explanation: "The exact training time of CLIP depends on the vision and language\ - \ encoders used: \"The largest ResNet model, RN50x64, took 18 days to train\ - \ on 592 V100 GPUs while the largest Vision Transformer took 12 days on 256\ - \ V100 GPUs. For the ViT-L/14 we also pre-train at a higher 336 pixel resolution\ - \ for one additional epoch to boost performance ... Unless otherwise specified,\ - \ all results reported in this paper as “CLIP” use this model which we found\ - \ to perform best\" [[CLIP paper]](https://arxiv.org/pdf/2103.00020.pdf).\n\ - Using the GPU Time method outlined in the [[OpenAI AI and Computer Blog]](https://openai.com/blog/ai-and-compute/#addendum),\ - \ we can compute the training time in petaflop/s-day for RN50x64 and ViT-L/14\ - \ with the following equation: Number of GPUs * (peta-flops/GPU) * days trained\ - \ * estimated utilization. We will assume that estimated utilization is 33%,\ - \ following [[OpenAI AI and Computer Blog]](https://openai.com/blog/ai-and-compute/#addendum).\ - \ The specific V100 GPU used isn't cleared from the paper, so we will assume\ - \ that V100 PCle was used. The V100 PCle GPUs have a single precision performance\ - \ of 15.7 teraflops [[V100 Datasheet]](https://images.nvidia.com/content/technologies/volta/pdf/volta-v100-datasheet-update-us-1165301-r5.pdf),\ - \ which is equal to 15.7 / 1000 = 0.0157 petaflops.\nFollowing the formula\ - \ \"Number of GPUs * (peta-flops/GPU) * days trained * estimated utilization\"\ - , we can compute the petaflop/s-day for RN50x64 as follows: 592 * 0.0157 *\ - \ 18 * 0.33. Similarly, for ViT-L/14, we can compute the same as: 256 * 0.0157\ - \ * 12 * 0.33. Adding the two, we estimate the total training time as 71.12\ - \ petaflop/s-day.\n" - value: 71.12 petaflop/s-day - type: model - url: https://arxiv.org/pdf/2103.00020.pdf -- access: - explanation: Available via the OpenAI API as of Q1 2023. - value: limited - analysis: "The model was evaluated against three prior approaches, AttnGAN, DM-GAN,\ - \ and DF-GAN using Inception Score and Fréchet Inception Distance on MS-COCO\ - \ as metrics. The model was also evaluated by humans and received the majority\ - \ of the votes in generating images that look realistic and better match the\ - \ caption when compared to the images generated by DF-GAN [[Section]](https://arxiv.org/pdf/2102.12092.pdf#section.3).\n" + description: > + "CLIP (Contrastive Language-Image Pre-Training) is a neural network trained + on a variety of (image, text) pairs. It can be instructed in natural + language to predict the most relevant text snippet, given an image, + without directly optimizing for the task, similarly to the zero-shot + capabilities of GPT-2 and 3. We found CLIP matches the performance of the + original ResNet50 on ImageNet “zero-shot” without using any of the original + 1.28M labeled examples, overcoming several major challenges in computer + vision" + [[CLIP Repository]](https://github.com/openai/CLIP). created_date: - explanation: "The date of the blog post announcing DALL·E [[OpenAI Blog Post]](https://openai.com/blog/dall-e/).\n" value: 2021-01-05 - dependencies: [DALL·E dataset] - description: "DALL·E is a GPT-3 based model trained to generate images from text\ - \ descriptions. The authors found that it had \"a diverse set of capabilities,\ - \ including creating anthropomorphized versions of animals and objects, combining\ - \ unrelated concepts in plausible ways, rendering text, and applying transformations\ - \ to existing images\" [[OpenAI Blog Post]](https://openai.com/blog/dall-e/).\n" - feedback: "Contact the paper author(s) specified on the paper [[Paper]](https://arxiv.org/pdf/2102.12092.pdf).\n" - intended_uses: "\"The model is intended for others to use for training their own\ - \ generative models\" [[Model Card]](https://github.com/openai/DALL-E/blob/master/model_card.md).\n" - license: - explanation: "The asset doesn't have a known license as it hasn't been publicly\ - \ released.\n" + explanation: > + The date of the blog post announcing CLIP + [[OpenAI Blog Post]](https://openai.com/blog/clip/). + url: https://arxiv.org/pdf/2103.00020.pdf + model_card: https://github.com/openai/CLIP/blob/main/model-card.md + modality: Text (English) and Image + size: value: unknown - modality: - explanation: Text (English) and Image - value: image, text; image, text - model_card: https://github.com/openai/DALL-E/blob/master/model_card.md + explanation: > + The total size is unknown, but the largest CLIP model is a + a combination of 63M-parameter (dense) text encoder and a 307M-parameter + vision encoder. + analysis: The model was evaluated on standard vision datasets (e.g. CIFAR10, ImageNet) + and showed robust state of the art results. + # Construction + dependencies: [CLIP dataset] + training_emissions: unknown + training_time: + value: 71.12 petaflop/s-day + explanation: > + The exact training time of CLIP depends on the vision and language + encoders used: "The largest ResNet model, RN50x64, took 18 days to train + on 592 V100 GPUs while the largest Vision Transformer took 12 days on 256 + V100 GPUs. For the ViT-L/14 we also pre-train at a higher 336 pixel + resolution for one additional epoch to boost performance ... Unless + otherwise specified, all results reported in this paper as “CLIP” use this + model which we found to perform best" + [[CLIP paper]](https://arxiv.org/pdf/2103.00020.pdf). + + Using the GPU Time method outlined in the + [[OpenAI AI and Computer Blog]](https://openai.com/blog/ai-and-compute/#addendum), + we can compute the training time in petaflop/s-day for RN50x64 and + ViT-L/14 with the following equation: Number of GPUs * (peta-flops/GPU) * + days trained * estimated utilization. + We will assume that estimated utilization is 33%, following + [[OpenAI AI and Computer Blog]](https://openai.com/blog/ai-and-compute/#addendum). + The specific V100 GPU used isn't cleared from the paper, so we will assume + that V100 PCle was used. The V100 PCle GPUs have a single precision + performance of 15.7 teraflops + [[V100 Datasheet]](https://images.nvidia.com/content/technologies/volta/pdf/volta-v100-datasheet-update-us-1165301-r5.pdf), + which is equal to 15.7 / 1000 = 0.0157 petaflops. + + Following the formula "Number of GPUs * (peta-flops/GPU) * days trained * + estimated utilization", we can compute the petaflop/s-day for RN50x64 as + follows: + 592 * 0.0157 * 18 * 0.33. + Similarly, for ViT-L/14, we can compute the same as: + 256 * 0.0157 * 12 * 0.33. + Adding the two, we estimate the total training time as 71.12 + petaflop/s-day. + training_hardware: NVIDIA V100 GPUs + quality_control: > + The authors found that the performance of the model depended heavily on + which classes are included (and excluded) for a given task. They reported + significant race and gender based disparities on the Fairface dataset, + depending on how the classes were constructed. The authors also demonstrated + that the model was capable of racial profiling with high accuracy + [[Section 7]](https://arxiv.org/pdf/2103.00020.pdf#section.7). + # Downstream + access: + value: open + explanation: > + Model checkpoints and the helper code can be accessed + at the official CLIP repository + [[CLIP Repository]](https://github.com/openai/CLIP). + license: MIT + intended_uses: > + The model is intended to be used by AI researchers to better understand + "robustness, generalization, and other capabilities, biases, and constraints + of computer vision models" + [[CLIP Model Card]](https://github.com/openai/CLIP/blob/main/model-card.md). + prohibited_uses: > + "Any deployed use case of the model - whether commercial or not - is + currently out of scope. Non-deployed use cases such as image search in a + constrained environment, are also not recommended unless there is thorough + in-domain testing of the model with a specific, fixed class taxonomy. + This is because our safety assessment demonstrated a high need for task + specific testing especially given the variability of CLIP’s performance + with different class taxonomies. This makes untested and unconstrained + deployment of the model in any use case currently potentially harmful. + + Certain use cases which would fall under the domain of surveillance and + facial recognition are always out-of-scope regardless of performance of the + model. This is because the use of artificial intelligence for tasks such as + these can be premature currently given the lack of testing norms and checks + to ensure its fair use. + + Since the model has not been purposefully trained in or evaluated on any + languages other than English, its use should be limited to English language + use cases" + [[Model Card]](https://github.com/openai/CLIP/blob/main/model-card.mdlicen). monitoring: - explanation: There are no monitoring mechanisms in place for DALL·E. value: none + explanation: There are no monitoring mechanisms in place for CLIP. + feedback: Questions can be shared at the feedback form linked in the CLIP model + card [[Model Card]](https://github.com/openai/CLIP/blob/main/model-card.mdlicen). + +- type: model name: DALL·E + # General organization: OpenAI - prohibited_uses: - explanation: The prohibited uses of the model are unknown. - value: unknown - quality_control: unknown - size: 12B parameters (dense) + description: > + DALL·E is a GPT-3 based model trained to generate images from text + descriptions. The authors found that it had "a diverse set of capabilities, + including creating anthropomorphized versions of animals and objects, + combining unrelated concepts in plausible ways, rendering text, and + applying transformations to existing images" + [[OpenAI Blog Post]](https://openai.com/blog/dall-e/). + created_date: + value: 2021-01-05 + explanation: > + The date of the blog post announcing DALL·E + [[OpenAI Blog Post]](https://openai.com/blog/dall-e/). + url: https://arxiv.org/pdf/2102.12092.pdf + model_card: https://github.com/openai/DALL-E/blob/master/model_card.md + modality: Text (English) and Image + size: 12B parameters (dense model) + analysis: > + The model was evaluated against three prior approaches, AttnGAN, DM-GAN, and + DF-GAN using Inception Score and Fréchet Inception Distance on MS-COCO as + metrics. The model was also evaluated by humans and received the majority + of the votes in generating images that look realistic and better match the + caption when compared to the images generated by DF-GAN + [[Section]](https://arxiv.org/pdf/2102.12092.pdf#section.3). + # Construction + dependencies: [DALL·E dataset] training_emissions: - explanation: The training emissions were not reported. value: unknown - training_hardware: NVIDIA V100 GPUs + explanation: The training emissions were not reported. training_time: + value: unknown explanation: The training emissions were not reported. + training_hardware: NVIDIA V100 GPUs + quality_control: unknown + # Downstream + access: + value: limited + explanation: Available via the OpenAI API as of Q1 2023. + license: value: unknown - type: model - url: https://arxiv.org/pdf/2102.12092.pdf -- access: - explanation: The model can be downloaded from the [Github repository](https://github.com/openai/jukebox) - value: open - analysis: Evaluations in paper are primarily considering the fidelity and novelty - of samples from Jukebox. + explanation: > + The asset doesn't have a known license as it hasn't been publicly + released. + intended_uses: > + "The model is intended for others to use for training their own generative + models" + [[Model Card]](https://github.com/openai/DALL-E/blob/master/model_card.md). + prohibited_uses: + value: unknown + explanation: The prohibited uses of the model are unknown. + monitoring: + value: none + explanation: There are no monitoring mechanisms in place for DALL·E. + feedback: > + Contact the paper author(s) specified on the paper + [[Paper]](https://arxiv.org/pdf/2102.12092.pdf). + + +- type: model + name: Jukebox + organization: OpenAI + description: Jukebox is a generate model that produces music created_date: - explanation: The date the model paper was released value: 2020-04-30 + explanation: The date the model paper was released + url: https://arxiv.org/abs/2005.00341 + model_card: + value: none + explanation: No model card found as of 2023-01-09 + modality: Audio (music) + size: 5B parameters + analysis: Evaluations in paper are primarily considering the fidelity and novelty + of samples from Jukebox. dependencies: [Jukebox Dataset] - description: Jukebox is a generate model that produces music - feedback: none - intended_uses: '' + training_emissions: unknown + training_time: 4 weeks + training_hardware: 510 V100s + quality_control: '' + access: + value: open + explanation: The model can be downloaded from the [Github repository](https://github.com/openai/jukebox) license: - explanation: "The license is provided in the [Github repository](https://github.com/openai/jukebox).\n" value: Noncommercial Use License - modality: - explanation: Audio (music) - value: audio; audio - model_card: - explanation: No model card found as of 2023-01-09 - value: none + explanation: > + The license is provided in the [Github repository](https://github.com/openai/jukebox). + intended_uses: '' + prohibited_uses: none monitoring: none - name: Jukebox + feedback: none + +- type: model + name: DALL·E 2 + # General organization: OpenAI - prohibited_uses: none - quality_control: '' - size: 5B parameters (dense) - training_emissions: unknown - training_hardware: 510 V100s - training_time: 4 weeks - type: model - url: https://arxiv.org/abs/2005.00341 -- access: - explanation: "The model is available to OpenAI employees, researchers, creatives\ - \ and company friends. OpenAI opened a waitlist for DALL·E 2 access. [[System\ - \ Card]] (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#access).\n" - value: limited - analysis: "The model is capable of generating explicit content and the researchers\ - \ found limited amount of spurious content generated. The researchers also found\ - \ that visual synonyms can be used to prompt the model to surface unwanted generations\ - \ [[Probes and Evaluations]] (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#probes-and-evaluations).\n" + description: > + "DALL·E 2 is an artificial intelligence model that takes a text prompt + and/or existing image as an input and generates a new image as an output" + [[System Card]] + (https://github.com/openai/dalle-2-preview/blob/main/system-card.md). + The model wasn't fully released, but OpenAI released a version of the model + (DALL·E 2 Preview) to a select group of testers. created_date: - explanation: "OpenAI released in a blog post in April 2020 [[OpenAI Blog Post]](https://openai.com/dall-e-2/).\n" value: 2022-04-13 - dependencies: [DALL·E dataset, CLIP dataset] - description: "\"DALL·E 2 is an artificial intelligence model that takes a text\ - \ prompt and/or existing image as an input and generates a new image as an output\"\ - \ [[System Card]] (https://github.com/openai/dalle-2-preview/blob/main/system-card.md).\ - \ The model wasn't fully released, but OpenAI released a version of the model\ - \ (DALL·E 2 Preview) to a select group of testers.\n" - feedback: Feedback can be provided at support at openai.com. - intended_uses: "\"The intended use of the DALL·E 2 Preview at this time is for\ - \ personal, non-commercial exploration and research purposes by people who are\ - \ interested in understanding the potential uses of these capabilities\" [[Use]]\ - \ (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#use).\n" - license: - explanation: "The asset isn't released, and hence the license is unknown.\n" - value: unknown - modality: - explanation: Text (English) and Image - value: image, text; image, text + explanation: > + OpenAI released in a blog post in April 2020 + [[OpenAI Blog Post]](https://openai.com/dall-e-2/). + url: https://arxiv.org/abs/2204.06125 model_card: https://github.com/openai/dalle-2-preview/blob/main/system-card.md - monitoring: "Uses of the model are monitored. In the preview version, any user\ - \ can flag content. The specific policies for monitoring are not disclosed,\ - \ but possible measures include disabling of accounts violating the content\ - \ policies [[Monitoring and Reporting]] (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#monitoring-and-reporting).\n" - name: DALL·E 2 - organization: OpenAI - prohibited_uses: "Use of the model is governed by the OpenAI Content Policy, which\ - \ prohibits posting of G rated content. Users are not allowed to utilize the\ - \ model in commercial products in the preview version [[Content Policy]] (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#policies-and-enforcement).\n" - quality_control: "The model isn't fully released to the public as part of a quality\ - \ control measure. The usage of the model by testers is monitored and user provided\ - \ prompts are filtered [[Input filters]] (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#input-filters).\n" + modality: Text (English) and Image size: unknown + analysis: > + The model is capable of generating explicit content and the researchers + found limited amount of spurious content generated. The researchers also + found that visual synonyms can be used to prompt the model to surface + unwanted generations + [[Probes and Evaluations]] + (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#probes-and-evaluations). + # Construction + dependencies: [DALL·E dataset, CLIP dataset] training_emissions: unknown - training_hardware: unknown training_time: unknown - type: model - url: https://arxiv.org/abs/2204.06125 -- access: - explanation: "The OpenAI API is available to the public in supported countries\ - \ [[Supported Countries]](https://beta.openai.com/docs/supported-countries)\ - \ [[OpenAI API]](https://openai.com/api/).\n" + training_hardware: unknown + quality_control: > + The model isn't fully released to the public as part of a quality control + measure. The usage of the model by testers is monitored and user provided + prompts are filtered + [[Input filters]] + (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#input-filters). + # Downstream + access: value: limited - adaptation: The API exposes the models fairly direclty with a range of hyperparameters - (e.g. temperature scaling). + explanation: > + The model is available to OpenAI employees, researchers, creatives + and company friends. OpenAI opened a waitlist for DALL·E 2 access. + [[System Card]] + (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#access). + license: + value: unknown + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: > + "The intended use of the DALL·E 2 Preview at this time is for personal, + non-commercial exploration and research purposes by people who are + interested in understanding the potential uses of these capabilities" + [[Use]] + (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#use). + prohibited_uses: > + Use of the model is governed by the OpenAI Content Policy, which prohibits + posting of G rated content. + Users are not allowed to utilize the model in commercial products in the + preview version + [[Content Policy]] + (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#policies-and-enforcement). + monitoring: > + Uses of the model are monitored. In the preview version, any user can flag + content. The specific policies for monitoring are not disclosed, but + possible measures include disabling of accounts violating the content + policies + [[Monitoring and Reporting]] + (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#monitoring-and-reporting). + feedback: Feedback can be provided at support at openai.com. + +- type: application + name: OpenAI API + # General + organization: OpenAI + description: > + OpenAI API is a general purpose "text in, text out" interface connecting + users with a suite of language models. The API was initially released + as a gateway to GPT-3, but it now supports access to other, more + specialized OpenAI models. + [[Open AI Blog Post]](https://openai.com/blog/openai-api/) created_date: - explanation: "The date that OpenAI API was announced to the public [[Open AI\ - \ Blog Post]](https://openai.com/blog/openai-api/).\n" value: 2020-06-11 + explanation: > + The date that OpenAI API was announced to the public + [[Open AI Blog Post]](https://openai.com/blog/openai-api/). + url: https://openai.com/api/ + # Construction dependencies: - GPT-3 - Codex @@ -809,559 +1079,588 @@ - Whisper - DALL·E - GPT-4 - description: "OpenAI API is a general purpose \"text in, text out\" interface\ - \ connecting users with a suite of language models. The API was initially released\ - \ as a gateway to GPT-3, but it now supports access to other, more specialized\ - \ OpenAI models. [[Open AI Blog Post]](https://openai.com/blog/openai-api/)\n" - failures: - explanation: "There are no known documented failures of the OpenAI API at the\ - \ time of writing.\n" - value: unknown - feedback: - explanation: "There is no known specific feedback channel for the OpenAI API,\ - \ but OpenAI support theme can be reached via email at support at openai.com.\n" - value: unknown - intended_uses: "OpenAI API was designed to be used by developers to empower applications,\ - \ and researchers to study language models [[Section 3]](https://openai.com/api/policies/terms/).\n" + adaptation: The API exposes the models fairly direclty with a range of hyperparameters + (e.g. temperature scaling). + output_space: > + Given a prompting text, the OpenAI API provides access to text completions, + and log probabilities. The support for text and code embeddings were added + on 2022-01-25 + [[OpenAI Blog Post]] + (https://openai.com/blog/introducing-text-and-code-embeddings/). + quality_control: > + Given a prompt, OpenAI API checks whether a completion contains unsafe + language using its filters and marks the completion accordingly if so. + The API also provides developers with special endpoints that scope the + API usage. OpenAI also developed user guidelines to help developers + understand safety issues + [[OpenAI API]](https://openai.com/api/). + # Downstream + access: + value: limited + explanation: > + The OpenAI API is available to the public in supported countries + [[Supported Countries]](https://beta.openai.com/docs/supported-countries) + [[OpenAI API]](https://openai.com/api/). + terms_of_service: https://openai.com/api/policies/terms/ license: + value: custom explanation: Per the Terms of Use, a limited license is provided to the users during their use of the API [[Section 2]](https://openai.com/api/policies/terms/). - value: custom - monitoring: "OpenAI may monitor the API use to ensure \"quality and improve OpenAI\ - \ systems, products and services; perform research; and ensure compliance\"\ - \ with the Terms of Service and all applicable laws. Users of the API will give\ - \ OpenAI reasonable access to their application to monitor compliance with the\ - \ terms listed in the Terms of Service [[Section 5(b)]](https://openai.com/api/policies/terms/).\ - \ Apps using the OpenAI API should submit an application once they are deployed\ - \ to real users. The review form takes 10 minutes to complete and over 97% of\ - \ the applications are directly accepted or conditionally accepted. The applicants\ - \ are notified of the decision within 2 business days [[App Review Guidelines]]\ - \ (https://beta.openai.com/docs/usage-guidelines/app-review).\n" + intended_uses: > + OpenAI API was designed to be used by developers to empower applications, + and researchers to study language models + [[Section 3]](https://openai.com/api/policies/terms/). + prohibited_uses: > + OpenAI API Terms of Use prohibits the use of the API in a way violating + the applicable law, including: (i) "Illegal activities, such as child + pornography, gambling, cybercrime, piracy, violating copyright, + trademark or other intellectual property laws"; (ii) "Accessing or + authorizing anyone to access the APIs from an embargoed country, region, or + territory as prohibited by the U.S. government"; (iii) "Threatening, + stalking, defaming, defrauding, degrading, victimizing or intimidating + anyone for any reason". + The usage requirements are detailed in the Terms of Use + [[Section 3]](https://openai.com/api/policies/terms/). + monitoring: > + OpenAI may monitor the API use to ensure "quality and improve OpenAI + systems, products and services; perform research; and ensure compliance" + with the Terms of Service and all applicable laws. Users of the API will + give OpenAI reasonable access to their application to monitor compliance + with the terms listed in the Terms of Service + [[Section 5(b)]](https://openai.com/api/policies/terms/). + Apps using the OpenAI API should submit an application once they are + deployed to real users. The review form takes 10 minutes to complete and + over 97% of the applications are directly accepted or conditionally + accepted. The applicants are notified of the decision within 2 business + days + [[App Review Guidelines]] + (https://beta.openai.com/docs/usage-guidelines/app-review). + feedback: + value: unknown + explanation: > + There is no known specific feedback channel for the OpenAI API, + but OpenAI support theme can be reached via email at support at openai.com. + # Deployment monthly_active_users: - explanation: "The number of monthly active users is not known publicly, but\ - \ OpenAI mentioned that the API was being used by tens of thousands of developers\ - \ in a blog post from 2021-11-18 [[OpenAI Blog Post]](https://openai.com/blog/api-no-waitlist/).\n" value: unknown - name: OpenAI API - organization: OpenAI - output_space: "Given a prompting text, the OpenAI API provides access to text\ - \ completions, and log probabilities. The support for text and code embeddings\ - \ were added on 2022-01-25 [[OpenAI Blog Post]] (https://openai.com/blog/introducing-text-and-code-embeddings/).\n" - prohibited_uses: "OpenAI API Terms of Use prohibits the use of the API in a way\ - \ violating the applicable law, including: (i) \"Illegal activities, such as\ - \ child pornography, gambling, cybercrime, piracy, violating copyright, trademark\ - \ or other intellectual property laws\"; (ii) \"Accessing or authorizing anyone\ - \ to access the APIs from an embargoed country, region, or territory as prohibited\ - \ by the U.S. government\"; (iii) \"Threatening, stalking, defaming, defrauding,\ - \ degrading, victimizing or intimidating anyone for any reason\". The usage\ - \ requirements are detailed in the Terms of Use [[Section 3]](https://openai.com/api/policies/terms/).\n" - quality_control: "Given a prompt, OpenAI API checks whether a completion contains\ - \ unsafe language using its filters and marks the completion accordingly if\ - \ so. The API also provides developers with special endpoints that scope the\ - \ API usage. OpenAI also developed user guidelines to help developers understand\ - \ safety issues [[OpenAI API]](https://openai.com/api/).\n" - terms_of_service: https://openai.com/api/policies/terms/ - type: application - url: https://openai.com/api/ + explanation: > + The number of monthly active users is not known publicly, but + OpenAI mentioned that the API was being used by tens of thousands of + developers in a blog post from 2021-11-18 + [[OpenAI Blog Post]](https://openai.com/blog/api-no-waitlist/). user_distribution: - explanation: "The distribution of the users is not known, but we estimate majority\ - \ of the users to be developers based in the United States.\n" value: unknown -- access: open - analysis: '' + explanation: > + The distribution of the users is not known, but we estimate + majority of the users to be developers based in the United States. + failures: + value: unknown + explanation: > + There are no known documented failures of the OpenAI API at the + time of writing. + +- type: model + name: VPT + organization: OpenAI + description: '' created_date: 2022-06-23 + url: https://arxiv.org/abs/2206.11795 + model_card: '' + modality: video + analysis: '' + size: 500M parameters (dense model) dependencies: [web_clean] - description: '' - feedback: '' - intended_uses: '' + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: open license: MIT - modality: - explanation: video - value: video; video - model_card: '' + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: VPT + feedback: '' + +- type: dataset + name: web_clean organization: OpenAI - prohibited_uses: '' - quality_control: '' - size: 500M parameters (dense) - training_emissions: '' - training_hardware: '' - training_time: '' - type: model - url: https://arxiv.org/abs/2206.11795 -- access: closed - analysis: '' + description: '' created_date: 2022-06-23 + url: https://arxiv.org/abs/2206.11795 datasheet: '' + modality: Video + size: 70k hours + sample: [] + analysis: '' dependencies: [] - description: '' - excluded: '' - feedback: '' included: '' - intended_uses: '' + excluded: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: Video - value: video + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: web_clean + feedback: '' + +- type: application + name: ChatGPT organization: OpenAI - prohibited_uses: '' - quality_control: '' - sample: [] - size: 70k hours - type: dataset - url: https://arxiv.org/abs/2206.11795 -- access: open - adaptation: '' + description: ChatGPT is an artificial intelligence chatbot developed by OpenAI created_date: 2022-11-30 + url: https://openai.com/blog/chatgpt dependencies: [gpt-3.5-turbo, OpenAI toxicity classifier] - description: ChatGPT is an artificial intelligence chatbot developed by OpenAI - failures: '' - feedback: '' - intended_uses: '' + adaptation: '' + output_space: '' + quality_control: '' + access: open license: + value: custom explanation: Per the Terms of Use, a limited license is provided to the users during their use of the API [[Section 2]](https://openai.com/api/policies/terms/). - value: custom + terms_of_service: '' + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' monthly_active_users: - explanation: https://www.reuters.com/technology/chatgpt-sets-record-fastest-growing-user-base-analyst-note-2023-02-01/ value: 100000000 - name: ChatGPT - organization: OpenAI - output_space: '' - prohibited_uses: '' - quality_control: '' - terms_of_service: '' - type: application - url: https://openai.com/blog/chatgpt + explanation: https://www.reuters.com/technology/chatgpt-sets-record-fastest-growing-user-base-analyst-note-2023-02-01/ user_distribution: '' -- access: limited - analysis: '' + failures: '' + +- type: model + name: gpt-3.5-turbo + organization: OpenAI + description: '' created_date: 2023-03-01 + url: https://platform.openai.com/docs/models/gpt-3-5 + model_card: '' + modality: text + analysis: '' + size: unknown dependencies: [gpt-3.5-turbo dataset] - description: '' - feedback: '' - intended_uses: '' + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: limited license: + value: custom explanation: Per the Terms of Use, a limited license is provided to the users during their use of the API [[Section 2]](https://openai.com/api/policies/terms/). - value: custom - modality: - explanation: text - value: text; text - model_card: '' + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: gpt-3.5-turbo + feedback: '' + +- type: dataset + name: gpt-3.5-turbo dataset organization: OpenAI - prohibited_uses: '' - quality_control: '' - size: unknown - training_emissions: '' - training_hardware: '' - training_time: '' - type: model - url: https://platform.openai.com/docs/models/gpt-3-5 -- access: limited - analysis: '' + description: '' created_date: 2023-03-01 + url: https://platform.openai.com/docs/models/gpt-3-5 datasheet: '' - dependencies: [] - description: '' - excluded: '' - feedback: '' - included: '' - intended_uses: '' - license: - explanation: "The asset isn't released, and hence the license is unknown.\n" - value: unknown modality: text - monitoring: '' - name: gpt-3.5-turbo dataset - organization: OpenAI - prohibited_uses: '' - quality_control: '' - sample: [] size: unknown - type: dataset - url: https://platform.openai.com/docs/models/gpt-3-5 -- access: limited + sample: [] analysis: '' - created_date: 2022-05-01 - datasheet: '' dependencies: [] - description: '' - excluded: '' - feedback: '' included: '' - intended_uses: '' + excluded: '' + quality_control: '' + access: limited license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: code and text - value: code, text + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: dataset name: code-davinci-002 dataset organization: OpenAI - prohibited_uses: '' - quality_control: '' - sample: [] - size: unknown - type: dataset + description: '' + created_date: 2022-05-01 url: https://platform.openai.com/docs/model-index-for-researchers -- access: limited + datasheet: '' + modality: code and text + size: unknown + sample: [] analysis: '' - created_date: 2022-05-01 - dependencies: [code-davinci-002 dataset] - description: '' - feedback: '' - intended_uses: '' + dependencies: [] + included: '' + excluded: '' + quality_control: '' + access: limited license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: code and text - value: code, text; code, text - model_card: '' + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: code-davinci-002 organization: OpenAI - prohibited_uses: '' - quality_control: '' + description: '' + created_date: 2022-05-01 + url: https://platform.openai.com/docs/model-index-for-researchers + model_card: '' + modality: code and text + analysis: '' size: unknown + dependencies: [code-davinci-002 dataset] training_emissions: '' - training_hardware: '' training_time: '' - type: model - url: https://platform.openai.com/docs/model-index-for-researchers -- access: limited - analysis: '' - created_date: 2022-05-01 - dependencies: [code-davinci-002] - description: '' - feedback: '' - intended_uses: '' + training_hardware: '' + quality_control: '' + access: limited license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: text - value: text; text - model_card: '' + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: text-davinci-002 organization: OpenAI - prohibited_uses: '' - quality_control: '' + description: '' + created_date: 2022-05-01 + url: https://platform.openai.com/docs/model-index-for-researchers + model_card: '' + modality: text + analysis: '' size: unknown + dependencies: [code-davinci-002] training_emissions: '' - training_hardware: '' training_time: '' - type: model - url: https://platform.openai.com/docs/model-index-for-researchers -- access: limited - analysis: '' - created_date: 2022-11-30 - dependencies: [text-davinci-002] - description: '' - feedback: '' - intended_uses: '' + training_hardware: '' + quality_control: '' + access: limited license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: - explanation: text - value: text; text - model_card: '' + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: text-davinci-003 organization: OpenAI - prohibited_uses: '' - quality_control: '' + description: '' + created_date: 2022-11-30 + url: https://platform.openai.com/docs/model-index-for-researchers + model_card: '' + modality: text + analysis: '' size: unknown + dependencies: [text-davinci-002] training_emissions: '' - training_hardware: '' training_time: '' - type: model - url: https://platform.openai.com/docs/model-index-for-researchers -- access: open - adaptation: '' - created_date: 2023-03-01 - dependencies: [Whisper] - description: API to query OpenAI's Whisper model. - failures: '' - feedback: '' - intended_uses: '' + training_hardware: '' + quality_control: '' + access: limited license: - explanation: Per the Terms of Use, a limited license is provided to the users - during their use of the API [[Section 2]](https://openai.com/api/policies/terms/). - value: custom + value: unknown + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' - monthly_active_users: '' + feedback: '' + +- type: application name: Whisper API organization: OpenAI - output_space: '' - prohibited_uses: '' - quality_control: '' - terms_of_service: '' - type: application + description: API to query OpenAI's Whisper model. + created_date: 2023-03-01 url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis - user_distribution: '' -- access: open + dependencies: [Whisper] adaptation: '' - created_date: 2023-03-01 - dependencies: [ChatGPT] - description: API to query OpenAI's ChatGPT model. - failures: '' - feedback: '' - intended_uses: '' + output_space: '' + quality_control: '' + access: open license: + value: custom explanation: Per the Terms of Use, a limited license is provided to the users during their use of the API [[Section 2]](https://openai.com/api/policies/terms/). - value: custom + terms_of_service: '' + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' monthly_active_users: '' + user_distribution: '' + failures: '' + +- type: application name: ChatGPT API organization: OpenAI - output_space: '' - prohibited_uses: '' - quality_control: '' - terms_of_service: '' - type: application + description: API to query OpenAI's ChatGPT model. + created_date: 2023-03-01 url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis - user_distribution: '' -- access: open + dependencies: [ChatGPT] adaptation: '' - created_date: 2022-08-10 - dependencies: [OpenAI toxicity classifier] - description: This endpoint provides OpenAI API developers with free access to - GPT-based classifiers that detect undesired content—an instance of using AI - systems to assist with human supervision of these systems. - failures: '' - feedback: '' - intended_uses: '' + output_space: '' + quality_control: '' + access: open license: + value: custom explanation: Per the Terms of Use, a limited license is provided to the users during their use of the API [[Section 2]](https://openai.com/api/policies/terms/). - value: custom + terms_of_service: '' + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' monthly_active_users: '' + user_distribution: '' + failures: '' + +- type: application name: OpenAI Moderation API organization: OpenAI + description: This endpoint provides OpenAI API developers with free access to + GPT-based classifiers that detect undesired content—an instance of using AI + systems to assist with human supervision of these systems. + created_date: 2022-08-10 + url: https://openai.com/blog/new-and-improved-content-moderation-tooling + dependencies: [OpenAI toxicity classifier] + adaptation: '' output_space: '' - prohibited_uses: '' quality_control: '' + access: open + license: + value: custom + explanation: Per the Terms of Use, a limited license is provided to the users + during their use of the API [[Section 2]](https://openai.com/api/policies/terms/). terms_of_service: '' - type: application - url: https://openai.com/blog/new-and-improved-content-moderation-tooling - user_distribution: '' -- access: closed - analysis: '' - created_date: 2023-01-18 - dependencies: [OpenAI toxicity dataset] - description: '' - feedback: '' intended_uses: '' - license: - explanation: "The asset isn't released, and hence the license is unknown.\n" - value: unknown - modality: - explanation: Text - value: text; text - model_card: '' + prohibited_uses: '' monitoring: '' + feedback: '' + monthly_active_users: '' + user_distribution: '' + failures: '' + +- type: model name: OpenAI toxicity classifier organization: OpenAI - prohibited_uses: '' - quality_control: '' + description: '' + created_date: 2023-01-18 + url: https://time.com/6247678/openai-chatgpt-kenya-workers/#:~:text=In%20a%20statement%2C%20an%20OpenAI,datasets%20of%20tools%20like%20ChatGPT. + model_card: '' + modality: Text + analysis: '' size: unknown + dependencies: [OpenAI toxicity dataset] training_emissions: '' - training_hardware: '' training_time: '' - type: model - url: https://time.com/6247678/openai-chatgpt-kenya-workers/#:~:text=In%20a%20statement%2C%20an%20OpenAI,datasets%20of%20tools%20like%20ChatGPT. -- access: closed - analysis: '' - created_date: 2023-01-18 - datasheet: '' - dependencies: [] - description: '' - excluded: '' - feedback: '' - included: '' - intended_uses: '' + training_hardware: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: text + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: dataset name: OpenAI toxicity dataset organization: OpenAI - prohibited_uses: '' - quality_control: '' - sample: [] - size: unknown - type: dataset + description: '' + created_date: 2023-01-18 url: https://time.com/6247678/openai-chatgpt-kenya-workers/#:~:text=In%20a%20statement%2C%20an%20OpenAI,datasets%20of%20tools%20like%20ChatGPT. -- access: limited - adaptation: '' - created_date: 2023-02-03 - dependencies: [Sage] - description: A chatbot language model available via Quora's Poe - failures: '' - feedback: '' + datasheet: '' + modality: text + size: unknown + sample: [] + analysis: '' + dependencies: [] + included: '' + excluded: '' + quality_control: '' + access: closed + license: + value: unknown + explanation: > + The asset isn't released, and hence the license is unknown. intended_uses: '' - license: unknown + prohibited_uses: '' monitoring: '' - monthly_active_users: '' + feedback: '' + +- type: application name: Sage API organization: OpenAI + description: A chatbot language model available via Quora's Poe + created_date: 2023-02-03 + url: https://quorablog.quora.com/Poe-1 + dependencies: [Sage] + adaptation: '' output_space: '' - prohibited_uses: '' quality_control: '' + access: limited + license: unknown terms_of_service: '' - type: application - url: https://quorablog.quora.com/Poe-1 - user_distribution: '' -- access: limited - adaptation: '' - created_date: 2023-02-03 - dependencies: [Dragonfly] - description: A chatbot language model available via Quora's Poe - failures: '' - feedback: '' intended_uses: '' - license: unknown + prohibited_uses: '' monitoring: '' + feedback: '' monthly_active_users: '' + user_distribution: '' + failures: '' + +- type: application name: Dragonfly API organization: OpenAI + description: A chatbot language model available via Quora's Poe + created_date: 2023-02-03 + url: https://quorablog.quora.com/Poe-1 + dependencies: [Dragonfly] + adaptation: '' output_space: '' - prohibited_uses: '' quality_control: '' + access: limited + license: unknown terms_of_service: '' - type: application - url: https://quorablog.quora.com/Poe-1 - user_distribution: '' -- access: limited - analysis: '' - created_date: 2023-02-03 - dependencies: [] - description: A chatbot language model available via Quora's Poe - feedback: '' intended_uses: '' - license: unknown - modality: - explanation: text - value: text; text - model_card: '' + prohibited_uses: '' monitoring: '' + feedback: '' + monthly_active_users: '' + user_distribution: '' + failures: '' + +- type: model name: Sage organization: OpenAI - prohibited_uses: '' - quality_control: '' - size: unknown - training_emissions: '' - training_hardware: '' - training_time: '' - type: model + description: A chatbot language model available via Quora's Poe + created_date: 2023-02-03 url: https://quorablog.quora.com/Poe-1 -- access: limited + model_card: '' + modality: text analysis: '' - created_date: 2023-02-03 + size: unknown dependencies: [] - description: A chatbot language model available via Quora's Poe - feedback: '' - intended_uses: '' + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: limited license: unknown - modality: - explanation: text - value: text; text - model_card: '' + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: Dragonfly organization: OpenAI - prohibited_uses: '' - quality_control: '' + description: A chatbot language model available via Quora's Poe + created_date: 2023-02-03 + url: https://quorablog.quora.com/Poe-1 + model_card: '' + modality: text + analysis: '' size: unknown + dependencies: [] training_emissions: '' - training_hardware: '' training_time: '' - type: model - url: https://quorablog.quora.com/Poe-1 -- access: limited - adaptation: '' - created_date: 2023-03-07 - dependencies: [ChatGPT API] - description: The app integrates ChatGPT’s powerful AI technology to deliver instant - conversation summaries, research tools, and writing assistance directly in Slack - to help millions of companies work more productively. - failures: '' - feedback: '' - intended_uses: '' + training_hardware: '' + quality_control: '' + access: limited license: unknown + intended_uses: '' + prohibited_uses: '' monitoring: '' - monthly_active_users: '' + feedback: '' + +- type: application name: ChatGPT for Slack organization: OpenAI, Salesforce + description: The app integrates ChatGPT’s powerful AI technology to deliver instant + conversation summaries, research tools, and writing assistance directly in Slack + to help millions of companies work more productively. + created_date: 2023-03-07 + url: https://www.salesforce.com/news/stories/chatgpt-app-for-slack/ + dependencies: [ChatGPT API] + adaptation: '' output_space: '' - prohibited_uses: '' quality_control: '' + access: limited + license: unknown terms_of_service: '' - type: application - url: https://www.salesforce.com/news/stories/chatgpt-app-for-slack/ - user_distribution: '' -- access: limited - analysis: '' - created_date: 2023-03-14 - dependencies: [] - description: '' - feedback: '' intended_uses: '' - license: - explanation: "The asset isn't released, and hence the license is unknown.\n" - value: unknown - modality: - explanation: text and image - value: image, text; image, text - model_card: '' + prohibited_uses: '' monitoring: '' + feedback: '' + monthly_active_users: '' + user_distribution: '' + failures: '' + +- type: model name: GPT-4 organization: OpenAI - prohibited_uses: '' - quality_control: '' + description: '' + created_date: 2023-03-14 + url: https://arxiv.org/abs/2303.08774 + model_card: '' + modality: text and image + analysis: '' size: unknown + dependencies: [] training_emissions: '' - training_hardware: '' training_time: '' - type: model - url: https://arxiv.org/abs/2303.08774 -- access: limited - adaptation: '' - created_date: 2023-03-14 - dependencies: [GPT-4] - description: GPT-4 is OpenAI’s most advanced system, producing safer and more - useful responses - failures: '' - feedback: '' - intended_uses: '' + training_hardware: '' + quality_control: '' + access: limited license: - explanation: Per the Terms of Use, a limited license is provided to the users - during their use of the API [[Section 2]](https://openai.com/api/policies/terms/). - value: custom + value: unknown + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' - monthly_active_users: '' + feedback: '' + +- type: application name: GPT-4 API organization: OpenAI + description: GPT-4 is OpenAI’s most advanced system, producing safer and more + useful responses + created_date: 2023-03-14 + url: https://openai.com/product/gpt-4 + dependencies: [GPT-4] + adaptation: '' output_space: '' - prohibited_uses: '' quality_control: '' + access: limited + license: + value: custom + explanation: Per the Terms of Use, a limited license is provided to the users + during their use of the API [[Section 2]](https://openai.com/api/policies/terms/). terms_of_service: '' - type: application - url: https://openai.com/product/gpt-4 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' user_distribution: '' + failures: '' diff --git a/assets/othersideai.yaml b/assets/othersideai.yaml index 7d9b0f05..0753d45c 100644 --- a/assets/othersideai.yaml +++ b/assets/othersideai.yaml @@ -1,31 +1,42 @@ --- -- access: limited - adaptation: unknown - created_date: unknown - dependencies: [OpenAI API] - description: "HyperWrite is a writing assistant that generates text based on a\ - \ user's request, as well as style and tone choices.\n" - failures: unknown - feedback: unknown - intended_uses: "HyperWrite is intended to be used as a writing assistant.\n" - license: - explanation: "\"The Software, including software embedded in the Software, is\ - \ licensed, not sold, to you by Otherside only under the terms of this Agreement;\ - \ and Otherside reserves all rights not expressly granted under this Agreemen\"\ - \ - excerpt from the Terms of Service.\n" - value: custom - monitoring: unknown - monthly_active_users: unknown + +- type: application name: HyperWrite + # General organization: OthersideAI + description: > + HyperWrite is a writing assistant that generates text based on a user's + request, as well as style and tone choices. + created_date: unknown + url: https://hyperwriteai.com/ + # Construction + dependencies: [OpenAI API] + adaptation: unknown output_space: Generation - prohibited_uses: unknown quality_control: - explanation: "There is a HyperWrite blog warning the users about the risks of\ - \ writing with an AI assistant, but the company doesn't list the quality control\ - \ measures taken to prevent the listed risks, if any [[HyperWrite Blog]] (https://blog.hyperwrite.ai/what-are-the-risks-of-ai-writing/).\n" value: unknown + explanation: > + There is a HyperWrite blog warning the users about the risks of writing + with an AI assistant, but the company doesn't list the quality control + measures taken to prevent the listed risks, if any + [[HyperWrite Blog]] + (https://blog.hyperwrite.ai/what-are-the-risks-of-ai-writing/). + # Downstream + access: limited + license: + value: custom + explanation: > + "The Software, including software embedded in the Software, is licensed, + not sold, to you by Otherside only under the terms of this Agreement; and + Otherside reserves all rights not expressly granted under this Agreemen" + - excerpt from the Terms of Service. terms_of_service: https://hyperwriteai.com/terms - type: application - url: https://hyperwriteai.com/ + intended_uses: > + HyperWrite is intended to be used as a writing assistant. + prohibited_uses: unknown + monitoring: unknown + feedback: unknown + # Deployment + monthly_active_users: unknown user_distribution: unknown + failures: unknown diff --git a/assets/oxford.yaml b/assets/oxford.yaml index c6f1477c..d57f98bc 100644 --- a/assets/oxford.yaml +++ b/assets/oxford.yaml @@ -1,110 +1,133 @@ --- -- access: - explanation: The captions and video url's can be found at the [[dataset page]](https://m-bain.github.io/webvid-dataset/). - CLIP ViT-B/32 features of this dataset can be found at the [[WebVid github - repository]](https://github.com/m-bain/webvid#download-clip-features-%EF%B8%8F). - value: open - analysis: unknown + +- type: dataset + name: WebVid-10M + # General + organization: University of Oxford + description: > + WebVid-10M is a large-scale dataset of short videos with textual descriptions + sourced from stock footage sites. created_date: - explanation: "The date the [[paper]](https://arxiv.org/abs/2104.00650) was released.\n" value: 2021-04-01 + explanation: > + The date the [[paper]](https://arxiv.org/abs/2104.00650) was released. + url: https://m-bain.github.io/webvid-dataset/ datasheet: none + modality: text (English) and video + size: 10.7M video-text pairs, 52K hours video + sample: [] + analysis: unknown + # Construction dependencies: [] - description: "WebVid-10M is a large-scale dataset of short videos with textual\ - \ descriptions sourced from stock footage sites.\n" - excluded: unknown - feedback: Feedback can be given by emailing at maxbain at robots.ox.ac.uk + license: + value: WebVid Dataset Terms + explanation: > + [WebVid Dataset Terms](https://github.com/m-bain/webvid/blob/main/TERMS.md) included: + value: Video URLs and textual description annotations explanation: Authors note in the paper:"We scrape the web for a new dataset of videos with textual description annotations, called WebVid-2M. The data was scraped from the web following a similar procedure to Google Conceptual Captions" - value: Video URLs and textual description annotations - intended_uses: - explanation: See [[Terms of Access]](https://github.com/m-bain/webvid/blob/main/TERMS.md) - value: "\"Provided you keep to these rules, the University grants you (the researcher)\ - \ a non-exclusive and non-transferable licence to use the content free of\ - \ charge strictly for non-commercial research (i.e., whose output artefacts\ - \ are not incorporated in commercial products) for 12 months.\"\n" - license: - explanation: "[WebVid Dataset Terms](https://github.com/m-bain/webvid/blob/main/TERMS.md)\n" - value: WebVid Dataset Terms - modality: - explanation: text (English) and video - value: text, video - monitoring: '' - name: WebVid-10M - organization: University of Oxford - prohibited_uses: "- Authors note the following prohibited uses: \"You must not\ - \ use the content other than for the Permitted Purpose in strict conformity\ - \ with these terms and any other reasonable instructions of the University.\ - \ You must not, except as may be strictly necessary for carrying out the Permitted\ - \ Purpose, provide or otherwise make available content to any third party or\ - \ allow use of it or them by or on behalf of any third party, in whole or in\ - \ part, whether by way of sale, resale, loan, transfer, hire or any other form\ - \ of exploitation; or attempt to identify any living or deceased individual\ - \ from the content.\" [[Terms of Access]](https://github.com/m-bain/webvid/blob/main/TERMS.md)\n\ - - Authors also note the following limitations of the dataset: \"We note that\ - \ data sourced from the web may be prone to biases and may contain graphic content.\ - \ Please be careful of unintended societal, gender, racial and other biases\ - \ when training or deploying models trained on this data.\" [[Disclaimer]](https://github.com/m-bain/webvid#disclaimer-%EF%B8%8F)\n" + excluded: unknown quality_control: unknown - sample: [] - size: 10.7M video-text pairs, 52K hours video - type: dataset - url: https://m-bain.github.io/webvid-dataset/ -- access: + # Downstream + access: + value: open explanation: The captions and video url's can be found at the [[dataset page]](https://m-bain.github.io/webvid-dataset/). CLIP ViT-B/32 features of this dataset can be found at the [[WebVid github repository]](https://github.com/m-bain/webvid#download-clip-features-%EF%B8%8F). - value: open - analysis: unknown + intended_uses: + value: > + "Provided you keep to these rules, the University grants you (the researcher) + a non-exclusive and non-transferable licence to use the content free of charge + strictly for non-commercial research (i.e., whose output artefacts are not + incorporated + in commercial products) for 12 months." + explanation: See [[Terms of Access]](https://github.com/m-bain/webvid/blob/main/TERMS.md) + prohibited_uses: > + - Authors note the following prohibited uses: "You must not use the content + other than for the Permitted Purpose in strict + conformity with these terms and any other reasonable instructions of the University. + You must not, except as may be strictly necessary for carrying out the Permitted + Purpose, + provide or otherwise make available content to any third party or allow use + of it or them by or on behalf of any third party, in whole or in part, whether + by way of sale, resale, loan, transfer, hire or any other form of exploitation; + or attempt to identify any living or deceased individual from the content." + [[Terms of Access]](https://github.com/m-bain/webvid/blob/main/TERMS.md) + + - Authors also note the following limitations of the dataset: "We note that + data + sourced from the web may be prone to biases and may contain graphic content. + Please be careful of unintended societal, gender, racial and other biases when + training or deploying models trained on this data." + [[Disclaimer]](https://github.com/m-bain/webvid#disclaimer-%EF%B8%8F) + monitoring: '' + feedback: Feedback can be given by emailing at maxbain at robots.ox.ac.uk + +- type: dataset + name: WebVid-2M + # General + organization: University of Oxford + description: > + WebVid-2M is a large-scale dataset of 2.5M short videos with textual descriptions + sourced from stock footage sites. A subset of the WebVid-10M dataset. created_date: - explanation: "The date the [[paper]](https://arxiv.org/abs/2104.00650) was released.\n" value: 2021-04-01 + explanation: > + The date the [[paper]](https://arxiv.org/abs/2104.00650) was released. + url: https://m-bain.github.io/webvid-dataset/ datasheet: none + modality: text (English) and video + size: 2.5M video-text pairs, 13K hours video + sample: [] + analysis: unknown + # Construction dependencies: [WebVid-10M] - description: "WebVid-2M is a large-scale dataset of 2.5M short videos with textual\ - \ descriptions sourced from stock footage sites. A subset of the WebVid-10M\ - \ dataset.\n" - excluded: unknown - feedback: Feedback can be given by emailing at maxbain at robots.ox.ac.uk + license: + value: WebVid Dataset Terms + explanation: > + [WebVid Dataset Terms](https://github.com/m-bain/webvid/blob/main/TERMS.md) included: + value: Video URLs and textual description annotations explanation: Authors note in the paper:"We scrape the web for a new dataset of videos with textual description annotations, called WebVid-2M. The data was scraped from the web following a similar procedure to Google Conceptual Captions" - value: Video URLs and textual description annotations + excluded: unknown + quality_control: unknown + # Downstream + access: + value: open + explanation: The captions and video url's can be found at the [[dataset page]](https://m-bain.github.io/webvid-dataset/). + CLIP ViT-B/32 features of this dataset can be found at the [[WebVid github + repository]](https://github.com/m-bain/webvid#download-clip-features-%EF%B8%8F). intended_uses: + value: > + "Provided you keep to these rules, the University grants you (the researcher) + a non-exclusive and non-transferable licence to use the content free of charge + strictly for non-commercial research (i.e., whose output artefacts are not + incorporated + in commercial products) for 12 months." explanation: See [[Terms of Access]](https://github.com/m-bain/webvid/blob/main/TERMS.md) - value: "\"Provided you keep to these rules, the University grants you (the researcher)\ - \ a non-exclusive and non-transferable licence to use the content free of\ - \ charge strictly for non-commercial research (i.e., whose output artefacts\ - \ are not incorporated in commercial products) for 12 months.\"\n" - license: - explanation: "[WebVid Dataset Terms](https://github.com/m-bain/webvid/blob/main/TERMS.md)\n" - value: WebVid Dataset Terms - modality: - explanation: text (English) and video - value: text, video + prohibited_uses: > + - Authors note the following prohibited uses: "You must not use the content + other than for the Permitted Purpose in strict + conformity with these terms and any other reasonable instructions of the University. + You must not, except as may be strictly necessary for carrying out the Permitted + Purpose, + provide or otherwise make available content to any third party or allow use + of it or them by or on behalf of any third party, in whole or in part, whether + by way of sale, resale, loan, transfer, hire or any other form of exploitation; + or attempt to identify any living or deceased individual from the content." + [[Terms of Access]](https://github.com/m-bain/webvid/blob/main/TERMS.md) + + - Authors also note the following limitations of the dataset: "We note that + data + sourced from the web may be prone to biases and may contain graphic content. + Please be careful of unintended societal, gender, racial and other biases when + training or deploying models trained on this data." + [[Disclaimer]](https://github.com/m-bain/webvid#disclaimer-%EF%B8%8F) monitoring: '' - name: WebVid-2M - organization: University of Oxford - prohibited_uses: "- Authors note the following prohibited uses: \"You must not\ - \ use the content other than for the Permitted Purpose in strict conformity\ - \ with these terms and any other reasonable instructions of the University.\ - \ You must not, except as may be strictly necessary for carrying out the Permitted\ - \ Purpose, provide or otherwise make available content to any third party or\ - \ allow use of it or them by or on behalf of any third party, in whole or in\ - \ part, whether by way of sale, resale, loan, transfer, hire or any other form\ - \ of exploitation; or attempt to identify any living or deceased individual\ - \ from the content.\" [[Terms of Access]](https://github.com/m-bain/webvid/blob/main/TERMS.md)\n\ - - Authors also note the following limitations of the dataset: \"We note that\ - \ data sourced from the web may be prone to biases and may contain graphic content.\ - \ Please be careful of unintended societal, gender, racial and other biases\ - \ when training or deploying models trained on this data.\" [[Disclaimer]](https://github.com/m-bain/webvid#disclaimer-%EF%B8%8F)\n" - quality_control: unknown - sample: [] - size: 2.5M video-text pairs, 13K hours video - type: dataset - url: https://m-bain.github.io/webvid-dataset/ + feedback: Feedback can be given by emailing at maxbain at robots.ox.ac.uk diff --git a/assets/paladin.yaml b/assets/paladin.yaml index 80aec963..76870f97 100644 --- a/assets/paladin.yaml +++ b/assets/paladin.yaml @@ -1,21 +1,22 @@ --- -- access: limited - adaptation: '' - created_date: 2023-02-01 - dependencies: [] - description: UnderwriteGPT is the world's first generative AI underwriting tool. - failures: '' - feedback: '' - intended_uses: '' - license: '' - monitoring: '' - monthly_active_users: '' + +- type: application name: UnderwriteGPT organization: Paladin Group and Dais Technology + description: UnderwriteGPT is the world's first generative AI underwriting tool. + created_date: 2023-02-01 + url: https://dais.com/underwritegpt/ + dependencies: [] + adaptation: '' output_space: '' - prohibited_uses: '' quality_control: '' + access: limited + license: '' terms_of_service: '' - type: application - url: https://dais.com/underwritegpt/ + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' user_distribution: '' + failures: '' diff --git a/assets/perplexity.yaml b/assets/perplexity.yaml index 8bf35081..117a7acc 100644 --- a/assets/perplexity.yaml +++ b/assets/perplexity.yaml @@ -1,50 +1,52 @@ --- -- access: open - adaptation: '' + +- type: application + name: Perplexity Ask + organization: Perplexity + description: Perplexity Ask is a new search interface that uses advanced artificial + intelligence technologies created_date: + value: 2022-12-07 explanation: Perplexity Ask was announced in a [[Twitter post]](https://twitter.com/perplexity_ai/status/1600551871554338816?s=20) by the company. - value: 2022-12-07 + url: https://www.perplexity.ai/ dependencies: [GPT-3.5, Bing Search] - description: Perplexity Ask is a new search interface that uses advanced artificial - intelligence technologies - failures: '' - feedback: '' - intended_uses: '' - license: - explanation: No license was found. - value: none - monitoring: '' - monthly_active_users: '' - name: Perplexity Ask - organization: Perplexity + adaptation: '' output_space: '' - prohibited_uses: '' quality_control: '' - terms_of_service: '' - type: application - url: https://www.perplexity.ai/ - user_distribution: '' -- access: deprecated - adaptation: '' - created_date: 2022-12-15 - dependencies: [Perplexity Ask, OpenAI API] - description: Twitter search interface that is powered by Perplexity's structured - search engine. - failures: '' - feedback: '' - intended_uses: '' + access: open license: - explanation: No license was found. value: none + explanation: No license was found. + terms_of_service: '' + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' monthly_active_users: '' + user_distribution: '' + failures: '' + +- type: application name: Bird SQL organization: Perplexity + description: Twitter search interface that is powered by Perplexity's structured + search engine. + created_date: 2022-12-15 + url: https://www.perplexity.ai/sql + dependencies: [Perplexity Ask, OpenAI API] + adaptation: '' output_space: '' - prohibited_uses: '' quality_control: '' + access: deprecated + license: + value: none + explanation: No license was found. terms_of_service: '' - type: application - url: https://www.perplexity.ai/sql + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' user_distribution: '' + failures: '' diff --git a/assets/portkey.yaml b/assets/portkey.yaml index 51fac815..2fca2885 100644 --- a/assets/portkey.yaml +++ b/assets/portkey.yaml @@ -1,22 +1,23 @@ --- -- access: open - adaptation: '' - created_date: 2023-05-06 - dependencies: [] - description: Portkey is a hosted middleware that allows users to create generative - AI applications - failures: '' - feedback: '' - intended_uses: '' - license: '' - monitoring: '' - monthly_active_users: '' + +- type: application name: Portkey organization: Portkey + description: Portkey is a hosted middleware that allows users to create generative + AI applications + created_date: 2023-05-06 + url: https://portkey.ai/ + dependencies: [] + adaptation: '' output_space: generative AI apps - prohibited_uses: '' quality_control: '' + access: open + license: '' terms_of_service: https://portkey.ai/terms - type: application - url: https://portkey.ai/ + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' user_distribution: '' + failures: '' diff --git a/assets/quizlet.yaml b/assets/quizlet.yaml index a7580d0b..cfd5d9c6 100644 --- a/assets/quizlet.yaml +++ b/assets/quizlet.yaml @@ -1,25 +1,26 @@ --- -- access: open - adaptation: '' - created_date: 2023-03-01 - dependencies: [ChatGPT API] + +- type: application + name: Q-Chat + organization: Quizlet description: Quizlet is introducing Q-Chat, a fully-adaptive AI tutor that engages students with adaptive questions based on relevant study materials delivered through a fun chat experience. - failures: '' - feedback: '' - intended_uses: '' + created_date: 2023-03-01 + url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis + dependencies: [ChatGPT API] + adaptation: '' + output_space: '' + quality_control: '' + access: open license: - explanation: No license was found. value: none + explanation: No license was found. + terms_of_service: https://quizlet.com/tos + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' monthly_active_users: '' - name: Q-Chat - organization: Quizlet - output_space: '' - prohibited_uses: '' - quality_control: '' - terms_of_service: https://quizlet.com/tos - type: application - url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis user_distribution: '' + failures: '' diff --git a/assets/quora.yaml b/assets/quora.yaml index 2b32f96d..e28e31a2 100644 --- a/assets/quora.yaml +++ b/assets/quora.yaml @@ -1,31 +1,32 @@ --- -- access: limited - adaptation: '' + +- type: application + name: Poe + organization: Quora + description: Poe lets people ask questions, get instant answers, and have back-and-forth + conversations with several AI-powered bots. It is initially available on iOS, + but we will be adding support for all major platforms in the next few months, + along with more bots. created_date: 2023-02-03 + url: https://quorablog.quora.com/Poe-1 dependencies: - ChatGPT API - GPT-4 API - Claude API - Dragonfly API - Sage API - description: Poe lets people ask questions, get instant answers, and have back-and-forth - conversations with several AI-powered bots. It is initially available on iOS, - but we will be adding support for all major platforms in the next few months, - along with more bots. - failures: '' - feedback: '' - intended_uses: '' + adaptation: '' + output_space: '' + quality_control: '' + access: limited license: - explanation: No license was found. value: none + explanation: No license was found. + terms_of_service: https://poe.com/tos + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' monthly_active_users: '' - name: Poe - organization: Quora - output_space: '' - prohibited_uses: '' - quality_control: '' - terms_of_service: https://poe.com/tos - type: application - url: https://quorablog.quora.com/Poe-1 user_distribution: '' + failures: '' diff --git a/assets/robin.yaml b/assets/robin.yaml index 813dc634..123cc804 100644 --- a/assets/robin.yaml +++ b/assets/robin.yaml @@ -1,26 +1,27 @@ --- -- access: limited - adaptation: '' - created_date: unknown - dependencies: [Anthropic API] + +- type: application + name: Robin AI + organization: Robin AI description: Robin AI uses Claude and Anthropic's models to understand language - including in technical domains like legal language. It's also very confident at drafting, summarising, translations, and explaining complex concepts in simple terms - failures: '' - feedback: '' - intended_uses: '' + created_date: unknown + url: https://www.robinai.co.uk/ + dependencies: [Anthropic API] + adaptation: '' + output_space: '' + quality_control: '' + access: limited license: - explanation: No license was found. value: none + explanation: No license was found. + terms_of_service: https://www.robinai.co.uk/terms + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' monthly_active_users: '' - name: Robin AI - organization: Robin AI - output_space: '' - prohibited_uses: '' - quality_control: '' - terms_of_service: https://www.robinai.co.uk/terms - type: application - url: https://www.robinai.co.uk/ user_distribution: '' + failures: '' diff --git a/assets/salesforce.yaml b/assets/salesforce.yaml index f6c12de0..19a1d021 100644 --- a/assets/salesforce.yaml +++ b/assets/salesforce.yaml @@ -1,35 +1,42 @@ --- -- access: - explanation: Model weights are available for download with links in the [[GitHub - repo]](https://github.com/salesforce/CodeGen) - value: open - analysis: '' + +- type: model + name: CodeGen + organization: Salesforce + description: CodeGen is a language model for code created_date: - explanation: Date model paper was released value: 2022-03-25 + explanation: Date model paper was released + url: https://arxiv.org/abs/2203.13474 + model_card: '' + modality: text (English) and code + analysis: '' + size: 16B parameters (dense model) dependencies: [] - description: CodeGen is a language model for code - feedback: '' - intended_uses: '' + training_emissions: '' + training_time: '' + training_hardware: Unspecified Salesforce Compute (TPU-V4s) + quality_control: '' + access: + value: open + explanation: Model weights are available for download with links in the [[GitHub + repo]](https://github.com/salesforce/CodeGen) license: none (model weights), BSD-3-Clause (code) - modality: - explanation: text (English) and code - value: code, text; code, text - model_card: '' + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: CodeGen + feedback: '' + +- type: model + name: BLIP organization: Salesforce - prohibited_uses: '' - quality_control: '' - size: 16B parameters (dense) - training_emissions: '' - training_hardware: Unspecified Salesforce Compute (TPU-V4s) - training_time: '' - type: model - url: https://arxiv.org/abs/2203.13474 -- access: open - analysis: '' + description: '' created_date: 2022-01-28 + url: https://arxiv.org/abs/2201.12086 + model_card: '' + modality: text and image + analysis: '' + size: unknown dependencies: - ViT-B - BERT @@ -39,68 +46,60 @@ - Conceptual 12M - SBU Captions - LAION-115M - description: '' - feedback: '' - intended_uses: '' + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: open license: BSD-3-Clause - modality: - explanation: text and image - value: image, text; image, text - model_card: '' + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: BLIP + feedback: '' + +- type: dataset + name: LAION-115M organization: Salesforce - prohibited_uses: '' - quality_control: '' - size: unknown - training_emissions: '' - training_hardware: '' - training_time: '' - type: model - url: https://arxiv.org/abs/2201.12086 -- access: open - analysis: '' + description: '' created_date: 2022-01-28 + url: https://arxiv.org/abs/2201.12086 datasheet: '' + modality: text and image + size: 115M image-text pairs + sample: [] + analysis: '' dependencies: [LAION-400M] - description: '' - excluded: '' - feedback: '' included: '' - intended_uses: '' + excluded: '' + quality_control: '' + access: open license: BSD-3-Clause - modality: - explanation: text and image - value: image, text + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: LAION-115M + feedback: '' + +- type: application + name: EinsteinGPT organization: Salesforce - prohibited_uses: '' - quality_control: '' - sample: [] - size: 115M image-text pairs - type: dataset - url: https://arxiv.org/abs/2201.12086 -- access: limited - adaptation: '' - created_date: 2023-03-07 - dependencies: [ChatGPT API] description: EinsteinGPT is generative AI for customer relationship management (CRFM). - failures: '' - feedback: '' - intended_uses: '' + created_date: 2023-03-07 + url: https://www.salesforce.com/products/einstein/overview/?d=cta-body-promo-8 + dependencies: [ChatGPT API] + adaptation: '' + output_space: '' + quality_control: '' + access: limited license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown + explanation: > + The asset isn't released, and hence the license is unknown. + terms_of_service: '' + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' monthly_active_users: '' - name: EinsteinGPT - organization: Salesforce - output_space: '' - prohibited_uses: '' - quality_control: '' - terms_of_service: '' - type: application - url: https://www.salesforce.com/products/einstein/overview/?d=cta-body-promo-8 user_distribution: '' + failures: '' diff --git a/assets/sana.yaml b/assets/sana.yaml index 22234f8f..09bda05f 100644 --- a/assets/sana.yaml +++ b/assets/sana.yaml @@ -1,40 +1,55 @@ --- -- access: limited - adaptation: "Customized GPT-3, fine-tuned on private data [[Sana GPT-3 Demo]](https://gpt3demo.com/apps/sanalabs).\n" + +- type: application + name: Sana + # General + organization: Sana + description: > + "Sana is your all-in-one, AI-assisted, online learning platform (LMS). + Author employee training courses and measure team development with Sana's + powerful analytics. Sana partners with the world's most important + organizations and fastest-growing startups to make personalized, adaptive + learning available for everyone, everywhere" + [[Sana GPT-3 Demo]](https://gpt3demo.com/apps/sanalabs). created_date: - explanation: "The company was founded in 2016 according to [[Crunchbase]](https://www.crunchbase.com/organization/sana-2).\ - \ It is unclear when Sana adapted the OpenAI API to their products.\n" value: unknown + explanation: > + The company was founded in 2016 according to + [[Crunchbase]](https://www.crunchbase.com/organization/sana-2). It is + unclear when Sana adapted the OpenAI API to their products. + url: https://www.sanalabs.com/ + # Construction dependencies: [OpenAI API] - description: "\"Sana is your all-in-one, AI-assisted, online learning platform\ - \ (LMS). Author employee training courses and measure team development with\ - \ Sana's powerful analytics. Sana partners with the world's most important organizations\ - \ and fastest-growing startups to make personalized, adaptive learning available\ - \ for everyone, everywhere\" [[Sana GPT-3 Demo]](https://gpt3demo.com/apps/sanalabs).\n" - failures: unknown - feedback: unknown - intended_uses: "Sana is intended to be used by employers to provide a learning\ - \ service for their employees.\n" + adaptation: > + Customized GPT-3, fine-tuned on private data + [[Sana GPT-3 Demo]](https://gpt3demo.com/apps/sanalabs). + output_space: question and answer, summarization, sentiment analysis,topic identification + quality_control: unknown + # Downstream + access: limited license: - explanation: "\"Sana Labs grants Subscriber a limited non-exclusive, non-transferable,\ - \ non-sublicensable license to use Sana Labs' web-based personalized and collaborative\ - \ learning platform (“Sana Platform”) solely in connection with the subscribed\ - \ Services in accordance with the Agreement.\" - excerpt from the Terms of\ - \ Service.\n" value: custom - monitoring: unknown - monthly_active_users: unknown - name: Sana - organization: Sana - output_space: question and answer, summarization, sentiment analysis,topic identification + explanation: > + "Sana Labs grants Subscriber a limited non-exclusive, non-transferable, + non-sublicensable license to use Sana Labs' web-based personalized and + collaborative learning platform (“Sana Platform”) solely in connection + with the subscribed Services in accordance with the Agreement." - + excerpt from the Terms of Service. + terms_of_service: https://www.sanalabs.com/legal/ + intended_uses: > + Sana is intended to be used by employers to provide a learning service for + their employees. prohibited_uses: - explanation: "\"Sana provides an extensive set of legal documents [[Sana Legal]](https://www.sanalabs.com/legal/),\ - \ but missing from the documents are prohibited uses of the Sana platform,\ - \ beyond a mention of what is impermissible under relevant law.\" - exceprt\ - \ from the Terms of Service document.\n" value: none - quality_control: unknown - terms_of_service: https://www.sanalabs.com/legal/ - type: application - url: https://www.sanalabs.com/ + explanation: > + "Sana provides an extensive set of legal documents + [[Sana Legal]](https://www.sanalabs.com/legal/), but missing from the + documents are prohibited uses of the Sana platform, beyond a mention of + what is impermissible under relevant law." - exceprt from the Terms of + Service document. + monitoring: unknown + feedback: unknown + # Deployment + monthly_active_users: unknown user_distribution: unknown + failures: unknown diff --git a/assets/shanghai.yaml b/assets/shanghai.yaml index 80208ec7..8dae5081 100644 --- a/assets/shanghai.yaml +++ b/assets/shanghai.yaml @@ -1,7 +1,15 @@ --- -- access: open - analysis: '' + +- type: model + name: InternVideo + organization: Shanghai AI Laboratory + description: '' created_date: 2022-12-06 + url: https://arxiv.org/pdf/2212.03191.pdf + model_card: '' + modality: text and video + analysis: '' + size: 1.3B parameters (dense model) dependencies: - Kinetics-400 - WebVid-2M @@ -10,46 +18,36 @@ - AVA - Something-Something-v2 - Kinetics-710 - description: '' - feedback: '' - intended_uses: '' - license: Apache 2.0 - modality: - explanation: text and video - value: text, video; text, video - model_card: '' - monitoring: '' - name: InternVideo - organization: Shanghai AI Laboratory - prohibited_uses: '' - quality_control: '' - size: 1.3B parameters (dense) training_emissions: '' - training_hardware: '' training_time: '' - type: model - url: https://arxiv.org/pdf/2212.03191.pdf -- access: open - analysis: Evaluated based on own constructed dataset covering 433 languages. - created_date: 2023-05-29 - dependencies: [OPUS] - description: Lego-MT is a multilingual large language model which uses a more - efficient approach of being an effective detachable model. - feedback: '' + training_hardware: '' + quality_control: '' + access: open + license: Apache 2.0 intended_uses: '' - license: '' - modality: - explanation: multilingual text - value: text; text - model_card: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: Lego-MT organization: Shanghai AI Laboratory - prohibited_uses: '' - quality_control: '' - size: 1.2B parameters (dense) + description: Lego-MT is a multilingual large language model which uses a more + efficient approach of being an effective detachable model. + created_date: 2023-05-29 + url: https://arxiv.org/pdf/2212.10551.pdf + model_card: '' + modality: multilingual text + analysis: Evaluated based on own constructed dataset covering 433 languages. + size: 1.2B parameters + dependencies: [OPUS] training_emissions: unknown - training_hardware: 32 A100 GPUs training_time: 15 days - type: model - url: https://arxiv.org/pdf/2212.10551.pdf + training_hardware: 32 A100 GPUs + quality_control: '' + access: open + license: '' + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/shop.yaml b/assets/shop.yaml index 60403fc0..5700ad95 100644 --- a/assets/shop.yaml +++ b/assets/shop.yaml @@ -1,24 +1,25 @@ --- -- access: open - adaptation: '' - created_date: 2023-03-01 - dependencies: [ChatGPT API] + +- type: application + name: Shop Assistant + organization: Shop description: When shoppers search for products, the shopping assistant makes personalized recommendations based on their requests. Shop’s new AI-powered shopping assistant will streamline in-app shopping by scanning millions of products to quickly find what buyers are looking for—or help them discover something new. - failures: '' - feedback: '' - intended_uses: '' - license: '' - monitoring: '' - monthly_active_users: '' - name: Shop Assistant - organization: Shop + created_date: 2023-03-01 + url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis + dependencies: [ChatGPT API] + adaptation: '' output_space: '' - prohibited_uses: '' quality_control: '' + access: open + license: '' terms_of_service: '' - type: application - url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' user_distribution: '' + failures: '' diff --git a/assets/singapore.yaml b/assets/singapore.yaml index 3e378eb9..5e0ab85c 100644 --- a/assets/singapore.yaml +++ b/assets/singapore.yaml @@ -1,28 +1,27 @@ --- -- access: open + +- type: model + name: GOAT + organization: National University of Singapore + description: GOAT is a fine-tuned LLaMA model which uses the tokenization of numbers + to significantly outperform benchmark standards on a range of arithmetic tasks. + created_date: 2023-05-23 + url: https://arxiv.org/pdf/2305.14201.pdf + model_card: none + modality: textual input, numerical data output analysis: Performance assessed on BIG-bench arithmetic sub-task, and various elementary arithmetic tasks. - created_date: 2023-05-23 + size: 7B parameters dependencies: [LLaMA, GOAT dataset] - description: GOAT is a fine-tuned LLaMA model which uses the tokenization of numbers - to significantly outperform benchmark standards on a range of arithmetic tasks. - feedback: '' + training_emissions: unknown + training_time: unknown + training_hardware: 24 GB VRAM GPU + quality_control: Number data is randomly generated from log space to reduce likelihood + of redundancy and range of magnitudes. + access: open + license: Apache 2.0 intended_uses: Integration into other instruction-tuned LLMs to further enhance arithmetic reasoning abilities in solving math word problems. - license: Apache 2.0 - modality: - explanation: textual input, numerical data output - value: text; other - model_card: none - monitoring: '' - name: GOAT - organization: National University of Singapore prohibited_uses: '' - quality_control: Number data is randomly generated from log space to reduce likelihood - of redundancy and range of magnitudes. - size: 7B parameters (dense) - training_emissions: unknown - training_hardware: 24 GB VRAM GPU - training_time: unknown - type: model - url: https://arxiv.org/pdf/2305.14201.pdf + monitoring: '' + feedback: '' diff --git a/assets/snap.yaml b/assets/snap.yaml index d5a384d3..67727722 100644 --- a/assets/snap.yaml +++ b/assets/snap.yaml @@ -1,28 +1,30 @@ --- -- access: open - adaptation: '' - created_date: 2023-03-01 - dependencies: [ChatGPT API] + +- type: application + name: My AI for Snapchat + organization: Snap description: My AI offers Snapchatters a friendly, customizable chatbot at their fingertips that offers recommendations, and can even write a haiku for friends in seconds. Snapchat, where communication and messaging is a daily behavior, has 750 million monthly Snapchatters. - failures: '' - feedback: '' - intended_uses: '' + created_date: 2023-03-01 + url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis + dependencies: [ChatGPT API] + adaptation: '' + output_space: '' + quality_control: '' + access: open license: - explanation: "\"Snap grants you a worldwide, royalty-free, non-assignable, non-exclusive,\ - \ revocable, and non-sublicensable license to use the Services.\" - excerpt\ - \ from the Terms of Service document.\n" value: custom + explanation: > + "Snap grants you a worldwide, royalty-free, non-assignable, non-exclusive, revocable, + and non-sublicensable license to use the Services." - excerpt + from the Terms of Service document. + terms_of_service: https://snap.com/terms + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' monthly_active_users: '' - name: My AI for Snapchat - organization: Snap - output_space: '' - prohibited_uses: '' - quality_control: '' - terms_of_service: https://snap.com/terms - type: application - url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis user_distribution: '' + failures: '' diff --git a/assets/speak.yaml b/assets/speak.yaml index 94e61ac3..8771590f 100644 --- a/assets/speak.yaml +++ b/assets/speak.yaml @@ -1,23 +1,24 @@ --- -- access: open - adaptation: '' - created_date: 2023-03-01 - dependencies: [Whisper API] + +- type: application + name: Speak + organization: Speak description: Speak is an AI-powered language learning app focused on building the best path to spoken fluency and is the the fastest-growing English app in South Korea. - failures: '' - feedback: '' - intended_uses: '' - license: '' - monitoring: '' - monthly_active_users: '' - name: Speak - organization: Speak + created_date: 2023-03-01 + url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis + dependencies: [Whisper API] + adaptation: '' output_space: '' - prohibited_uses: '' quality_control: '' + access: open + license: '' terms_of_service: '' - type: application - url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' user_distribution: '' + failures: '' diff --git a/assets/spotify.yaml b/assets/spotify.yaml index 6e436586..f7016bf5 100644 --- a/assets/spotify.yaml +++ b/assets/spotify.yaml @@ -1,29 +1,32 @@ --- -- access: limited - adaptation: '' - created_date: 2023-02-23 - dependencies: [ChatGPT API, Sonantic AI] + +- type: application + name: AI DJ + organization: Spotify description: The DJ is a personalized AI guide that knows you and your music taste so well that it can choose what to play for you. This feature, first rolling out in beta, will deliver a curated lineup of music alongside commentary around the tracks and artists we think you’ll like in a stunningly realistic voice. - failures: '' - feedback: '' - intended_uses: '' + created_date: 2023-02-23 + url: https://newsroom.spotify.com/2023-02-22/spotify-debuts-a-new-ai-dj-right-in-your-pocket/ + dependencies: [ChatGPT API, Sonantic AI] + adaptation: '' + output_space: '' + quality_control: '' + access: limited license: - explanation: "\"The Spotify software applications and the Content are licensed,\ - \ not sold or transferred to you, and Spotify and its licensors retain ownership\ - \ of all copies of the Spotify software applications and Content even after\ - \ installation on your Devices.\" - excerpt from the Terms of Service document.\n" value: custom + explanation: > + "The Spotify software applications and the Content are licensed, not sold + or transferred to you, and Spotify and its licensors retain ownership of + all copies of the Spotify software applications and Content even after + installation on your Devices." - excerpt from the Terms of Service + document. + terms_of_service: https://www.spotify.com/us/legal/end-user-agreement/ + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' monthly_active_users: '' - name: AI DJ - organization: Spotify - output_space: '' - prohibited_uses: '' - quality_control: '' - terms_of_service: https://www.spotify.com/us/legal/end-user-agreement/ - type: application - url: https://newsroom.spotify.com/2023-02-22/spotify-debuts-a-new-ai-dj-right-in-your-pocket/ user_distribution: '' + failures: '' diff --git a/assets/stability.yaml b/assets/stability.yaml index 8b137891..8c572529 100644 --- a/assets/stability.yaml +++ b/assets/stability.yaml @@ -1 +1,102 @@ +--- +- type: model + name: DeepFloyd IF + organization: Stability AI + description: A text-to-image cascaded pixel diffusion model released in conjunction + with AI research lab DeepFloyd. + created_date: 2023-04-28 + url: https://stability.ai/blog/deepfloyd-if-text-to-image-model + model_card: https://huggingface.co/DeepFloyd/IF-I-XL-v1.0 + modality: textual input with image output + analysis: Evaluated on the COCO dataset. + size: 4.3B parameters + dependencies: [LAION-5B] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: open + license: + value: custom + explanation: License agreement can be found on model card https://huggingface.co/DeepFloyd/IF-I-XL-v1.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/DeepFloyd/IF-I-XL-v1.0/discussions + + +- type: model + name: StableLM + organization: Stability AI + description: Large language models trained on up to 1.5 trillion tokens. + created_date: 2023-04-20 + url: https://github.com/Stability-AI/StableLM + model_card: '' + modality: natural language text + analysis: '' + size: 7B parameters + dependencies: + - StableLM-Alpha dataset + - Alpaca dataset + - gpt4all dataset + - ShareGPT52K dataset + - Dolly dataset + - HH dataset + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + + +- type: application + name: Stable Diffusion + organization: Stability AI + description: Stable Diffusion is a generative software that creates images from + text prompts. + created_date: 2022-08-22 + url: https://stability.ai/blog/stable-diffusion-public-release + dependencies: [] + adaptation: '' + output_space: image + quality_control: '' + access: open + license: + value: custom + explanation: License agreement can be found on https://github.com/CompVis/stable-diffusion/blob/main/LICENSE + terms_of_service: '' + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/CompVis/stable-diffusion/discussions + monthly_active_users: '' + user_distribution: '' + failures: '' + +- type: application + name: Stable Diffusion XL + organization: Stability AI + description: Stable Diffusion XL is an updated version of Stable Diffusion, and + creates descriptive images with shorter prompts and generate words within images. + created_date: 2023-07-26 + url: https://stability.ai/stablediffusion + dependencies: [] + adaptation: '' + output_space: image + quality_control: '' + access: limited + license: MIT + terms_of_service: '' + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' + user_distribution: '' + failures: '' diff --git a/assets/stanford.yaml b/assets/stanford.yaml index 37f02528..f54d0d1e 100644 --- a/assets/stanford.yaml +++ b/assets/stanford.yaml @@ -1,77 +1,74 @@ --- -- access: open - analysis: '' - created_date: 2022-12-15 - dependencies: [The Pile] - description: '' - feedback: '' - intended_uses: '' - license: bigscience-bloom-rail-1.0 - modality: - explanation: text - value: text; text - model_card: '' - monitoring: '' + +- type: model name: BioMedLM organization: Stanford - prohibited_uses: '' - quality_control: '' - size: 2.7B parameters (dense) + description: '' + created_date: 2022-12-15 + url: https://crfm.stanford.edu/2022/12/15/pubmedgpt.html + model_card: '' + modality: text + analysis: '' + size: 2.7B parameters (dense model) + dependencies: [The Pile] training_emissions: '' - training_hardware: '' training_time: '' - type: model - url: https://crfm.stanford.edu/2022/12/15/pubmedgpt.html -- access: open - analysis: Evaluated on own framework that tests domain-specific tasks in medical - field. - created_date: 2022-11-23 - dependencies: [Stable Diffusion, RoentGen radiology dataset] - description: RoentGen is a generative medical imaging model that can create visually - convincing X-ray images. - feedback: '' + training_hardware: '' + quality_control: '' + access: open + license: bigscience-bloom-rail-1.0 intended_uses: '' - license: '' - modality: - explanation: text input and image output - value: text; image - model_card: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: RoentGen organization: Stanford - prohibited_uses: '' - quality_control: '' - size: 330M parameters (dense) + description: RoentGen is a generative medical imaging model that can create visually + convincing X-ray images. + created_date: 2022-11-23 + url: https://arxiv.org/pdf/2211.12737.pdf + model_card: '' + modality: text input and image output + analysis: Evaluated on own framework that tests domain-specific tasks in medical + field. + size: 330M parameters + dependencies: [Stable Diffusion, RoentGen radiology dataset] training_emissions: unknown - training_hardware: 64 A100 GPUs training_time: 60k training steps per day - type: model - url: https://arxiv.org/pdf/2211.12737.pdf -- access: open - analysis: Evaluated on three physical control tasks, drawing, steering, and human - body movement on various dynamics - created_date: 2023-06-12 - dependencies: [GPT-2, BABEL, text-davinci-003] - description: Model trained to generate language corrections for physical control - tasks. - feedback: '' + training_hardware: 64 A100 GPUs + quality_control: '' + access: open + license: '' intended_uses: '' - license: MIT - modality: - explanation: trajectory input with text output - value: other; text - model_card: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: CORGI organization: Stanford - prohibited_uses: '' - quality_control: '' - size: 124M parameters (dense) + description: Model trained to generate language corrections for physical control + tasks. + created_date: 2023-06-12 + url: https://arxiv.org/pdf/2306.07012.pdf + model_card: '' + modality: trajectory input with text output + analysis: Evaluated on three physical control tasks, drawing, steering, and human + body movement on various dynamics + size: 124M parameters + dependencies: [GPT-2, BABEL, text-davinci-003] training_emissions: '' - training_hardware: one NVIDIA A40 GPU training_time: + value: unknown explanation: The authors do not report the training time, but do report that they train for 200 epochs. - value: unknown - type: model - url: https://arxiv.org/pdf/2306.07012.pdf + training_hardware: one NVIDIA A40 GPU + quality_control: '' + access: open + license: MIT + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/stonybrook.yaml b/assets/stonybrook.yaml index a160a186..ce4a4657 100644 --- a/assets/stonybrook.yaml +++ b/assets/stonybrook.yaml @@ -1,51 +1,62 @@ --- -- access: - explanation: The dataset is available for download on the [[SBU Captions Dataset - website]](https://www.cs.rice.edu/~vo9/sbucaptions/), along with additional - resources. - value: open - analysis: - explanation: See [[Section 5]](https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf) - value: "Authors perform two quantitative evaluations for image captioning -\ - \ direct user ratings of relevance and BLEU score. They also propose a new\ - \ evaluation task: \"we propose a new evaluation task where a user is presented\ - \ with two photographs and one caption. The user must assign the caption to\ - \ the most relevant image. For evaluation we use a query image, a random image\ - \ and a generated caption.\"\n" + +- type: dataset + name: SBU Captions + # General + organization: Stony Brook University + description: > + SBU Captions Dataset is a collection of 1 million images and associated captions + from Flickr, filtered so that the descriptions are likely to refer to visual + content. created_date: - explanation: "The date the [[paper]](https://papers.nips.cc/paper_files/paper/2011/hash/5dd9db5e033da9c6fb5ba83c7a7ebea9-Abstract.html)\ - \ was published.\n" value: 2011-12-12 + explanation: > + The date the [[paper]](https://papers.nips.cc/paper_files/paper/2011/hash/5dd9db5e033da9c6fb5ba83c7a7ebea9-Abstract.html) + was published. + url: https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf datasheet: none + modality: text (English) and image + size: 1M image-text pairs + sample: [] + analysis: + value: > + Authors perform two quantitative evaluations for image captioning - direct + user + ratings of relevance and BLEU score. They also propose a new evaluation task: + "we propose a new evaluation task where a user is presented with two photographs + and one caption. The user must assign the caption to the most relevant image. + For evaluation we use a query image, a random image and a generated caption." + explanation: See [[Section 5]](https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf) + # Construction dependencies: [Flickr] - description: "SBU Captions Dataset is a collection of 1 million images and associated\ - \ captions from Flickr, filtered so that the descriptions are likely to refer\ - \ to visual content.\n" - excluded: - explanation: See [[Section 2]](https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf) - value: "\"This produces a very large, but noisy initial set of photographs with\ - \ associated text. We filter this set of photos so that the descriptions attached\ - \ to a picture are relevant and visually descriptive.\"\n" - feedback: '' + license: + value: none + explanation: No license was found. included: + value: > + "To encourage visual descriptiveness in our collection, we select only those + images with descriptions of satisfactory length based on observed lengths + in + visual descriptions. We also enforce that retained descriptions contain at + least + 2 words belonging to our term lists and at least one prepositional word, e.g. + “on”, “under” which often indicate visible spatial relationships." explanation: See [[Section 2]](https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf) - value: "\"To encourage visual descriptiveness in our collection, we select only\ - \ those images with descriptions of satisfactory length based on observed\ - \ lengths in visual descriptions. We also enforce that retained descriptions\ - \ contain at least 2 words belonging to our term lists and at least one prepositional\ - \ word, e.g. “on”, “under” which often indicate visible spatial relationships.\"\ - \n" + excluded: + value: > + "This produces a very large, but noisy initial set of photographs with associated + text. We filter this set of photos so that the descriptions attached to a + picture + are relevant and visually descriptive." + explanation: See [[Section 2]](https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf) + quality_control: unknown + # Downstream + access: + value: open + explanation: The dataset is available for download on the [[SBU Captions Dataset + website]](https://www.cs.rice.edu/~vo9/sbucaptions/), along with additional + resources. intended_uses: '' - license: - explanation: No license was found. - value: none - modality: {} - monitoring: none - name: SBU Captions - organization: Stony Brook University prohibited_uses: '' - quality_control: unknown - sample: [] - size: 1M image-text pairs - type: dataset - url: https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf + monitoring: none + feedback: '' diff --git a/assets/suno.yaml b/assets/suno.yaml index 5e4e59e6..6e277fb6 100644 --- a/assets/suno.yaml +++ b/assets/suno.yaml @@ -1,25 +1,24 @@ --- -- access: open - analysis: '' - created_date: 2023-04-20 - dependencies: [AudioLM] + +- type: model + name: Bark + organization: Suno description: Bark is a text-to-audio model that can generate multilingual speech as well as other noises. - feedback: https://huggingface.co/spaces/suno/bark/discussions - intended_uses: '' - license: MIT - modality: - explanation: text input with audio output - value: text; audio + created_date: 2023-04-20 + url: https://github.com/suno-ai/bark model_card: https://github.com/suno-ai/bark/blob/main/model-card.md - monitoring: '' - name: Bark - organization: Suno - prohibited_uses: '' - quality_control: '' + modality: text input with audio output + analysis: '' size: '' + dependencies: [AudioLM] training_emissions: unknown - training_hardware: '' training_time: unknown - type: model - url: https://github.com/suno-ai/bark + training_hardware: '' + quality_control: '' + access: open + license: MIT + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/spaces/suno/bark/discussions diff --git a/assets/together.yaml b/assets/together.yaml index 1ec0ab7c..2bb3cb97 100644 --- a/assets/together.yaml +++ b/assets/together.yaml @@ -1,134 +1,134 @@ --- -- access: open - analysis: '' - created_date: 2022-11-29 - dependencies: [GPT-J, P3, NaturalInstructions-v2] - description: '' - feedback: '' - intended_uses: '' - license: Apache 2.0 - modality: - explanation: text - value: text; text - model_card: '' - monitoring: '' + +- type: model name: GPT-JT organization: Together - prohibited_uses: '' - quality_control: '' - size: 6B parameters (dense) - training_emissions: '' - training_hardware: '' - training_time: '' - type: model + description: '' + created_date: 2022-11-29 url: https://www.together.xyz/blog/releasing-v1-of-gpt-jt-powered-by-open-source-ai -- access: open + model_card: '' + modality: text analysis: '' - created_date: 2023-03-10 - dependencies: [GPT-NeoX, OIG-43M] - description: '' - feedback: '' - intended_uses: '' + size: 6B parameters (dense model) + dependencies: [GPT-J, P3, NaturalInstructions-v2] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: open license: Apache 2.0 - modality: - explanation: text - value: text; text - model_card: '' + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: GPT-NeoXT-Chat-Base organization: Together - prohibited_uses: '' - quality_control: '' - size: 20B parameters (dense) - training_emissions: '' - training_hardware: '' - training_time: '' - type: model + description: '' + created_date: 2023-03-10 url: https://www.together.xyz/blog/openchatkit -- access: open + model_card: '' + modality: text analysis: '' - created_date: 2023-03-10 - dependencies: [GPT-JT, OIG-moderation] - description: '' - feedback: '' - intended_uses: '' + size: 20B parameters (dense model) + dependencies: [GPT-NeoX, OIG-43M] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: open license: Apache 2.0 - modality: - explanation: text - value: text; text - model_card: '' + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: OpenChatKit moderation model organization: Together - prohibited_uses: '' - quality_control: '' - size: 6B parameters (dense) - training_emissions: '' - training_hardware: '' - training_time: '' - type: model + description: '' + created_date: 2023-03-10 url: https://www.together.xyz/blog/openchatkit -- access: open + model_card: '' + modality: text analysis: '' - created_date: 2023-03-10 - datasheet: '' - dependencies: [P3, NaturalInstructions-v2, FLAN dataset] - description: '' - excluded: '' - feedback: '' - included: '' - intended_uses: '' + size: 6B parameters (dense model) + dependencies: [GPT-JT, OIG-moderation] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: open license: Apache 2.0 - modality: text + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: dataset name: OIG-43M organization: Together, LAION, Ontocord - prohibited_uses: '' - quality_control: '' - sample: [] - size: 43M instructions - type: dataset - url: https://laion.ai/blog/oig-dataset/ -- access: open - analysis: '' + description: '' created_date: 2023-03-10 + url: https://laion.ai/blog/oig-dataset/ datasheet: '' - dependencies: [] - description: '' - excluded: '' - feedback: '' + modality: text + size: 43M instructions + sample: [] + analysis: '' + dependencies: [P3, NaturalInstructions-v2, FLAN dataset] included: '' - intended_uses: '' + excluded: '' + quality_control: '' + access: open license: Apache 2.0 - modality: text + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: dataset name: OIG-moderation organization: Together, LAION, Ontocord - prohibited_uses: '' - quality_control: '' - sample: [] - size: unknown - type: dataset + description: '' + created_date: 2023-03-10 url: https://laion.ai/blog/oig-dataset/ -- access: open + datasheet: '' + modality: text + size: unknown + sample: [] analysis: '' - created_date: 2022-04-17 - datasheet: https://huggingface.co/datasets/togethercomputer/RedPajama-Data-1T - dependencies: [GitHub, Wikipedia] - description: The RedPajama base dataset is a 1.2 trillion token fully-open dataset - created by following the recipe described in the LLaMA paper - excluded: '' - feedback: '' + dependencies: [] included: '' - intended_uses: '' + excluded: '' + quality_control: '' + access: open license: Apache 2.0 - modality: text + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: dataset name: RedPajama-Data organization: Together - prohibited_uses: '' - quality_control: '' - sample: [] - size: 1.2 trillion tokens - type: dataset + description: The RedPajama base dataset is a 1.2 trillion token fully-open dataset + created by following the recipe described in the LLaMA paper + created_date: 2022-04-17 url: https://www.together.xyz/blog/redpajama + datasheet: https://huggingface.co/datasets/togethercomputer/RedPajama-Data-1T + modality: text + size: 1.2 trillion tokens + sample: [] + analysis: '' + dependencies: [GitHub, Wikipedia] + included: '' + excluded: '' + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/transformify.yaml b/assets/transformify.yaml index 59a59c34..118b559f 100644 --- a/assets/transformify.yaml +++ b/assets/transformify.yaml @@ -1,22 +1,23 @@ --- -- access: open - adaptation: '' - created_date: 2023-05-30 - dependencies: [GPT-4] - description: Transformify Automate is a platform for automated task integration - using natural language prompts. - failures: '' - feedback: '' - intended_uses: '' - license: '' - monitoring: '' - monthly_active_users: '' + +- type: application name: Transformify Automate organization: Transformify + description: Transformify Automate is a platform for automated task integration + using natural language prompts. + created_date: 2023-05-30 + url: https://www.transformify.ai/automate + dependencies: [GPT-4] + adaptation: '' output_space: text and code - prohibited_uses: '' quality_control: '' + access: open + license: '' terms_of_service: https://www.transformify.ai/legal-stuff - type: application - url: https://www.transformify.ai/automate + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' user_distribution: '' + failures: '' diff --git a/assets/trevor.yaml b/assets/trevor.yaml index 02126721..9c21533d 100644 --- a/assets/trevor.yaml +++ b/assets/trevor.yaml @@ -1,38 +1,50 @@ --- -- access: - explanation: "The tool can only be accessed by the Trevor Project will be counsolors\ - \ for training purposes.\n" - value: closed - adaptation: - explanation: "The base GPT-2 model was fine-tuned on the previous conversations\ - \ between new trainees and the Trevor Project counselors posing to be a fictional\ - \ persona in crisis.\n" - value: Fine-tuning + +- type: application + name: Crisis Contact Simulator + # General + organization: The Trevor Project + description: > + Crisis Contact Simulator, developed as part of a collaboration with + Google.org, helps train The Trevor Project counselors by + mimicking to be a teen in crisis. Crisis Contact Simulator is used + as part of the training programs for the Trevor Project's 24/7 digital + crisis services that supports LGBTQ youth + [[Trevor Project Blog]](https://www.thetrevorproject.org/blog/the-trevor-project-launches-new-ai-tool-to-support-crisis-counselor-training/). created_date: - explanation: "The release date of the blog post introducing Crisis Contact Simulator\ - \ [[Trevor Project Blog]](https://www.thetrevorproject.org/blog/the-trevor-project-launches-new-ai-tool-to-support-crisis-counselor-training/).\n" value: 2021-03-24 + explanation: > + The release date of the blog post introducing Crisis Contact Simulator + [[Trevor Project Blog]](https://www.thetrevorproject.org/blog/the-trevor-project-launches-new-ai-tool-to-support-crisis-counselor-training/). + url: https://www.thetrevorproject.org/ + # Construction dependencies: [OpenAI API] - description: "Crisis Contact Simulator, developed as part of a collaboration with\ - \ Google.org, helps train The Trevor Project counselors by mimicking to be a\ - \ teen in crisis. Crisis Contact Simulator is used as part of the training programs\ - \ for the Trevor Project's 24/7 digital crisis services that supports LGBTQ\ - \ youth [[Trevor Project Blog]](https://www.thetrevorproject.org/blog/the-trevor-project-launches-new-ai-tool-to-support-crisis-counselor-training/).\n" - failures: unknown - feedback: unknown - intended_uses: "Training counselors\n" - license: unknown - monitoring: unknown - monthly_active_users: unknown - name: Crisis Contact Simulator - organization: The Trevor Project + adaptation: + value: Fine-tuning + explanation: > + The base GPT-2 model was fine-tuned on the previous conversations between + new trainees and the Trevor Project counselors posing to be a fictional + persona in crisis. output_space: Dialogue - prohibited_uses: unknown quality_control: - explanation: "The tool can only be accessed by the Trevor Project will be counsolors\ - \ for training purposes.\n" value: Limited release + explanation: > + The tool can only be accessed by the Trevor Project will be counsolors + for training purposes. + # Downstream + access: + value: closed + explanation: > + The tool can only be accessed by the Trevor Project will be counsolors + for training purposes. + license: unknown terms_of_service: unknown - type: application - url: https://www.thetrevorproject.org/ + intended_uses: > + Training counselors + prohibited_uses: unknown + monitoring: unknown + feedback: unknown + # Deployment + monthly_active_users: unknown user_distribution: unknown + failures: unknown diff --git a/assets/tsinghua.yaml b/assets/tsinghua.yaml index c8b21ee6..38fbaa3a 100644 --- a/assets/tsinghua.yaml +++ b/assets/tsinghua.yaml @@ -1,149 +1,148 @@ --- -- access: - explanation: Model weights are available but gated via an [[application form]](https://models.aminer.cn/codegeex/download/request) - value: limited - analysis: none + +# models +- type: model + name: CodeGeeX + organization: Tsinghua + description: CodeGeeX is an autoregressive language model trained on code created_date: 2022-09-20 + url: https://github.com/THUDM/CodeGeeX + model_card: none + modality: code + analysis: none + size: 13B parameters (dense model) dependencies: [] - description: CodeGeeX is an autoregressive language model trained on code - feedback: none - intended_uses: none + training_emissions: unknown + training_time: unknown + training_hardware: THUDM 1536 Ascend 910 (32GB) Cluster + quality_control: none + access: + value: limited + explanation: Model weights are available but gated via an [[application form]](https://models.aminer.cn/codegeex/download/request) license: - explanation: The license is provided in the [[Github repository]](https://github.com/THUDM/CodeGeeX) value: Apache 2.0 - modality: - explanation: code - value: code; code - model_card: none + explanation: The license is provided in the [[Github repository]](https://github.com/THUDM/CodeGeeX) + intended_uses: none + prohibited_uses: none monitoring: none - name: CodeGeeX + feedback: none + +- type: model + name: CogView organization: Tsinghua - prohibited_uses: none - quality_control: none - size: 13B parameters (dense) - training_emissions: unknown - training_hardware: THUDM 1536 Ascend 910 (32GB) Cluster - training_time: unknown - type: model - url: https://github.com/THUDM/CodeGeeX -- access: - explanation: Model checkpoints available from [[Wudao-Wenhui]](https://resource.wudaoai.cn/home?ind=2&name=WuDao%20WenHui&id=1399364355975327744) - value: open - analysis: '' + description: CogView is a transformer model for text-to-image generation created_date: - explanation: The date the model paper was released value: 2021-05-26 - dependencies: [] - description: CogView is a transformer model for text-to-image generation - feedback: '' - intended_uses: '' - license: - explanation: "The license is provided in the [[Github repository]](https://github.com/THUDM/CogView)\n" - value: Apache 2.0 - modality: - explanation: text (Chinese) and image - value: image, text; image, text + explanation: The date the model paper was released + url: https://arxiv.org/abs/2105.13290 model_card: none - monitoring: '' - name: CogView - organization: Tsinghua - prohibited_uses: '' - quality_control: '' - size: 4B parameters (dense) + modality: text (Chinese) and image + size: 4B parameters + analysis: '' + dependencies: [] training_emissions: '' - training_hardware: '' training_time: '' - type: model - url: https://arxiv.org/abs/2105.13290 -- access: - explanation: The model checkpoints are available for download from [[BAAI]](https://model.baai.ac.cn/model-detail/100041) + training_hardware: '' + quality_control: '' + access: value: open - analysis: '' - created_date: - explanation: The date the model paper was released - value: 2022-04-28 - dependencies: [] - description: CogView 2 is a hierarchical transformer for text-to-image generation - feedback: '' - intended_uses: '' + explanation: Model checkpoints available from [[Wudao-Wenhui]](https://resource.wudaoai.cn/home?ind=2&name=WuDao%20WenHui&id=1399364355975327744) license: - explanation: "The license is provided in the [[Github repository]](https://github.com/THUDM/CogView2)\n" value: Apache 2.0 - modality: - explanation: text (Chinese, English) and image - value: image, text; image, text - model_card: none + explanation: > + The license is provided in the [[Github repository]](https://github.com/THUDM/CogView) + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: CogView 2 organization: Tsinghua - prohibited_uses: '' - quality_control: '' - size: 6B parameters (dense) - training_emissions: '' - training_hardware: '' - training_time: '' - type: model - url: https://arxiv.org/abs/2204.14217 -- access: - explanation: Model checkpoints are available for download from https://github.com/THUDM/CogVideo - value: open - analysis: '' + description: CogView 2 is a hierarchical transformer for text-to-image generation created_date: + value: 2022-04-28 explanation: The date the model paper was released - value: 2022-05-29 + url: https://arxiv.org/abs/2204.14217 + model_card: none + modality: text (Chinese, English) and image + size: 6B parameters + analysis: '' dependencies: [] - description: CogVideo is a transformer model for text-to-video generation - feedback: '' - intended_uses: '' + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: + value: open + explanation: The model checkpoints are available for download from [[BAAI]](https://model.baai.ac.cn/model-detail/100041) license: - explanation: "The license is provided in the [[Github repository]](https://github.com/THUDM/CogVideo)\n" value: Apache 2.0 - modality: - explanation: text (Chinese) and video - value: text, video; text, video - model_card: none + explanation: > + The license is provided in the [[Github repository]](https://github.com/THUDM/CogView2) + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' + +- type: model name: CogVideo organization: Tsinghua - prohibited_uses: '' - quality_control: '' + description: CogVideo is a transformer model for text-to-video generation + created_date: + value: 2022-05-29 + explanation: The date the model paper was released + url: https://arxiv.org/abs/2205.15868 + model_card: none + modality: text (Chinese) and video size: unknown + analysis: '' + dependencies: [] training_emissions: '' - training_hardware: '' training_time: '' - type: model - url: https://arxiv.org/abs/2205.15868 -- access: - explanation: Model checkpoints are available from the [[GitHub repository]](https://github.com/THUDM/GLM-130B/blob/main/MODEL_LICENSE) + training_hardware: '' + quality_control: '' + access: value: open - analysis: '' + explanation: Model checkpoints are available for download from https://github.com/THUDM/CogVideo + license: + value: Apache 2.0 + explanation: > + The license is provided in the [[Github repository]](https://github.com/THUDM/CogVideo) + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: model + name: GLM-130B + organization: Tsinghua + description: GLM-130B is a bidirectional language model trained on English and + Chinese created_date: - explanation: The date the model website was made public value: 2022-08-04 + explanation: The date the model website was made public + url: https://keg.cs.tsinghua.edu.cn/glm-130b/ + model_card: none + modality: text (Chinese | English) + size: 130B parameters (dense model) + analysis: '' dependencies: - The Pile - GLM-130B Chinese corpora - P3 - DeepStruct finetuning dataset - description: GLM-130B is a bidirectional language model trained on English and - Chinese - feedback: '' - intended_uses: '' + training_emissions: '' + training_time: '' + training_hardware: THUDM 96 DGX-A100 (40G) cluster + quality_control: '' + access: + value: open + explanation: Model checkpoints are available from the [[GitHub repository]](https://github.com/THUDM/GLM-130B/blob/main/MODEL_LICENSE) license: - explanation: Unique model license. See the [[GitHub repository]](https://github.com/THUDM/GLM-130B/blob/main/MODEL_LICENSE) value: GLM-130B License - modality: - explanation: text (Chinese | English) - value: text; text - model_card: none - monitoring: '' - name: GLM-130B - organization: Tsinghua + explanation: Unique model license. See the [[GitHub repository]](https://github.com/THUDM/GLM-130B/blob/main/MODEL_LICENSE) + intended_uses: '' prohibited_uses: '' - quality_control: '' - size: 130B parameters (dense) - training_emissions: '' - training_hardware: THUDM 96 DGX-A100 (40G) cluster - training_time: '' - type: model - url: https://keg.cs.tsinghua.edu.cn/glm-130b/ + monitoring: '' + feedback: '' diff --git a/assets/uae.yaml b/assets/uae.yaml index 9ed62419..127d82d0 100644 --- a/assets/uae.yaml +++ b/assets/uae.yaml @@ -1,50 +1,50 @@ --- -- access: open - analysis: '' - created_date: 2023-06-14 - dependencies: [RefinedWeb] + +- type: model + name: Falcon + organization: UAE Technology Innovation Institute description: Falcon-40B is a 40B parameters causal decoder-only model built by TII and trained on 1,000B tokens of RefinedWeb enhanced with curated corpora. - feedback: https://huggingface.co/tiiuae/falcon-40b/discussions - intended_uses: '' - license: Apache 2.0 - modality: - explanation: natural language text - value: text; text + created_date: 2023-06-14 + url: https://huggingface.co/tiiuae/falcon-40b model_card: https://huggingface.co/tiiuae/falcon-40b - monitoring: None - name: Falcon - organization: UAE Technology Innovation Institute - prohibited_uses: irresponsible or harmful use or production use without adequate - assessment of risks and mitigation - quality_control: '' - size: 40B parameters (dense) + modality: natural language text + analysis: '' + size: 40B parameters + dependencies: [RefinedWeb] training_emissions: '' - training_hardware: 384 A100 40GB GPUs training_time: 2 months - type: model - url: https://huggingface.co/tiiuae/falcon-40b -- access: open - analysis: '' + training_hardware: 384 A100 40GB GPUs + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: irresponsible or harmful use or production use without adequate + assessment of risks and mitigation + monitoring: None + feedback: https://huggingface.co/tiiuae/falcon-40b/discussions + +- type: dataset + name: RefinedWeb + organization: UAE Technology Innovation Institute + description: RefinedWeb is a high-quality five trillion tokens web-only English + pretraining dataset. created_date: 2023-06-01 + url: https://arxiv.org/pdf/2306.01116.pdf datasheet: https://huggingface.co/datasets/tiiuae/falcon-refinedweb + modality: text + size: 600B tokens + sample: [] + analysis: '' dependencies: [] - description: RefinedWeb is a high-quality five trillion tokens web-only English - pretraining dataset. - excluded: '' - feedback: '' included: '' - intended_uses: '' + excluded: '' + quality_control: '' + access: open license: - explanation: License can be found at https://huggingface.co/datasets/tiiuae/falcon-refinedweb value: unknown - modality: text - monitoring: '' - name: RefinedWeb - organization: UAE Technology Innovation Institute + explanation: License can be found at https://huggingface.co/datasets/tiiuae/falcon-refinedweb + intended_uses: '' prohibited_uses: '' - quality_control: '' - sample: [] - size: 600B tokens - type: dataset - url: https://arxiv.org/pdf/2306.01116.pdf + monitoring: '' + feedback: '' diff --git a/assets/uw.yaml b/assets/uw.yaml index 15d2832c..29a49c7d 100644 --- a/assets/uw.yaml +++ b/assets/uw.yaml @@ -1,22 +1,23 @@ --- -- access: open - analysis: '' + +- type: dataset + name: YT-Temporal-1B + organization: University of Washington + description: '' created_date: 2022-01-07 + url: https://arxiv.org/abs/2201.02639 datasheet: '' + modality: video + size: 20M videos + sample: [] + analysis: '' dependencies: [YouTube] - description: '' - excluded: '' - feedback: '' included: '' - intended_uses: '' + excluded: '' + quality_control: '' + access: open license: MIT - modality: video - monitoring: '' - name: YT-Temporal-1B - organization: University of Washington + intended_uses: '' prohibited_uses: '' - quality_control: '' - sample: [] - size: 20M videos - type: dataset - url: https://arxiv.org/abs/2201.02639 + monitoring: '' + feedback: '' diff --git a/assets/uwashington.yaml b/assets/uwashington.yaml index aded82ab..32208f00 100644 --- a/assets/uwashington.yaml +++ b/assets/uwashington.yaml @@ -1,27 +1,26 @@ --- -- access: open - analysis: Reports results on the Vicuna benchmark and compares performance level - and time expenditure with ChatGPT - created_date: 2023-05-23 - dependencies: [QLoRA, OASST1] + +- type: model + name: Guanaco + organization: University of Washington description: Guanaco is a model family trained with QLORA, an efficient finetuning approach that reduces memory usage enough to finetune a 65B parameter model on a single 48GB GPU while preserving full 16-bit finetuning task performance. - feedback: '' - intended_uses: '' - license: MIT - modality: - explanation: natural language text - value: text; text + created_date: 2023-05-23 + url: https://arxiv.org/pdf/2305.14314v1.pdf model_card: '' - monitoring: '' - name: Guanaco - organization: University of Washington - prohibited_uses: '' - quality_control: '' - size: 33B parameters (dense) + modality: natural language text + analysis: Reports results on the Vicuna benchmark and compares performance level + and time expenditure with ChatGPT + size: 33B parameters + dependencies: [QLoRA, OASST1] training_emissions: '' - training_hardware: A single 24 GB GPU training_time: '' - type: model - url: https://arxiv.org/pdf/2305.14314v1.pdf + training_hardware: A single 24 GB GPU + quality_control: '' + access: open + license: MIT + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/viable.yaml b/assets/viable.yaml index 35ab68cd..c29dce6e 100644 --- a/assets/viable.yaml +++ b/assets/viable.yaml @@ -1,28 +1,38 @@ --- -- access: limited - adaptation: unknown - created_date: unknown - dependencies: [OpenAI API] - description: "Viable analyzes qualitative consumer feedback and provides summary\ - \ feedback to companies.\n" - failures: unknown - feedback: - explanation: "The feedback mechanism isn't explicitly listed. Company contact\ - \ information is provided in the Terms of Service [[Terms of Service]](https://www.askviable.com/terms-of-service).\n" - value: unknown - intended_uses: "Intended to be used by companies to digest qualitative consumer\ - \ feedback.\n" - license: unknown - monitoring: unknown - monthly_active_users: unknown + +- type: application name: Viable + # General organization: Viable + description: > + Viable analyzes qualitative consumer feedback and provides summary feedback + to companies. + created_date: unknown + url: https://www.askviable.com/ + # Construction + dependencies: [OpenAI API] + adaptation: unknown output_space: Question and answer, summarization, sentiment analysis, topic identification - prohibited_uses: "Prohibited uses are listed in the Terms of Service [[Terms of\ - \ Service]](https://www.askviable.com/terms-of-service). The terms don't include\ - \ statements specific to the use of the content generated by the system or GPT-3.\n" quality_control: unknown + # Downstream + access: limited + license: unknown terms_of_service: https://www.askviable.com/terms-of-service - type: application - url: https://www.askviable.com/ + intended_uses: > + Intended to be used by companies to digest qualitative consumer feedback. + prohibited_uses: > + Prohibited uses are listed in the Terms of Service + [[Terms of Service]](https://www.askviable.com/terms-of-service). + The terms don't include statements specific to the use of the content + generated by the system or GPT-3. + monitoring: unknown + feedback: + value: unknown + explanation: > + The feedback mechanism isn't explicitly listed. Company contact + information is provided in the Terms of Service + [[Terms of Service]](https://www.askviable.com/terms-of-service). + # Deployment + monthly_active_users: unknown user_distribution: unknown + failures: unknown diff --git a/assets/writer.yaml b/assets/writer.yaml index 9879f42f..a56591b9 100644 --- a/assets/writer.yaml +++ b/assets/writer.yaml @@ -1,55 +1,53 @@ --- -- access: open - analysis: Evaluated on the SuperGLUE benchmark + +- type: model + name: Palmyra + organization: Writer + description: Palmyra is a privacy-first LLM for enterprises trained on business + and marketing writing. created_date: + value: 2023-01-01 explanation: The model was stated to be published in January, but which day is not specified on the website. - value: 2023-01-01 + url: https://gpt3demo.com/apps/palmyra + model_card: https://huggingface.co/Writer/palmyra-base + modality: text + analysis: Evaluated on the SuperGLUE benchmark + size: 128M (Small), 5B (Base), 20B (Large) parameters dependencies: [Writer dataset] - description: Palmyra is a privacy-first LLM for enterprises trained on business - and marketing writing. - feedback: https://huggingface.co/Writer/palmyra-base/discussions - intended_uses: generating text from a prompt + training_emissions: unknown + training_time: unknown + training_hardware: '' + quality_control: '' + access: open license: Apache 2.0 - modality: - explanation: text - value: text; text - model_card: https://huggingface.co/Writer/palmyra-base + intended_uses: generating text from a prompt + prohibited_uses: '' monitoring: '' - name: Palmyra + feedback: https://huggingface.co/Writer/palmyra-base/discussions + +- type: model + name: Camel organization: Writer - prohibited_uses: '' - quality_control: '' - size: 128M parameters (dense) - training_emissions: unknown - training_hardware: '' - training_time: unknown - type: model - url: https://gpt3demo.com/apps/palmyra -- access: open - analysis: '' + description: Camel is an instruction-following large language model tailored for + advanced NLP and comprehension capabilities. created_date: + value: 2023-04-01 explanation: The model was stated to be published in April, but which day is not specified on the website. - value: 2023-04-01 - dependencies: [Palmyra, Camel dataset] - description: Camel is an instruction-following large language model tailored for - advanced NLP and comprehension capabilities. - feedback: https://huggingface.co/Writer/camel-5b-hf/discussions - intended_uses: '' - license: Apache 2.0 - modality: - explanation: text - value: text; text + url: https://chatcamel.vercel.app/ model_card: https://huggingface.co/Writer/camel-5b-hf - monitoring: '' - name: Camel - organization: Writer - prohibited_uses: '' - quality_control: '' - size: 5B parameters (dense) + modality: text + analysis: '' + size: 5B parameters + dependencies: [Palmyra, Camel dataset] training_emissions: unknown - training_hardware: '' training_time: unknown - type: model - url: https://chatcamel.vercel.app/ + training_hardware: '' + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/Writer/camel-5b-hf/discussions diff --git a/assets/yandex.yaml b/assets/yandex.yaml index 0090546b..becdb8e5 100644 --- a/assets/yandex.yaml +++ b/assets/yandex.yaml @@ -1,59 +1,63 @@ --- -- access: open - analysis: '' + +- type: model + name: YaLM + organization: Yandex + description: YaLM is a 100B parameter autoregressive model trained on 25% English + and 75% Russian text. created_date: - explanation: The date the blog post about YaLM was published. value: 2022-06-22 + explanation: The date the blog post about YaLM was published. + url: https://medium.com/yandex/yandex-publishes-yalm-100b-its-the-largest-gpt-like-neural-network-in-open-source-d1df53d0e9a6 + model_card: '' + modality: text (English, Russian) + analysis: '' + size: 100B parameters (dense model) dependencies: [The Pile, Yandex Russian Pretraining Dataset] - description: YaLM is a 100B parameter autoregressive model trained on 25% English - and 75% Russian text. - feedback: '' - intended_uses: '' + training_emissions: '' + training_time: '' + training_hardware: Yandex 800 A100 Cluster + quality_control: '' + access: open license: - explanation: '' value: Apache 2.0 - modality: - explanation: text (English, Russian) - value: text; text - model_card: '' + explanation: '' + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: YaLM + feedback: '' + +- type: application + name: Yandex Search organization: Yandex - prohibited_uses: '' - quality_control: '' - size: 100B parameters (dense) - training_emissions: '' - training_hardware: Yandex 800 A100 Cluster - training_time: '' - type: model - url: https://medium.com/yandex/yandex-publishes-yalm-100b-its-the-largest-gpt-like-neural-network-in-open-source-d1df53d0e9a6 -- access: open - adaptation: '' - created_date: 2022-06-23 - dependencies: [YaLM] description: Yandex is a search engine and web portal. Yandex offers internet search and other services - failures: '' - feedback: '' - intended_uses: '' + created_date: 2022-06-23 + url: https://medium.com/yandex/yandex-publishes-yalm-100b-its-the-largest-gpt-like-neural-network-in-open-source-d1df53d0e9a6 + dependencies: [YaLM] + adaptation: '' + output_space: '' + quality_control: '' + access: open license: - explanation: "\"3.1. The Rightholder, on the basis of a simple (non-exclusive)\ - \ license, shall grant the User a free, worldwide, non-transferable right\ - \ to use the Software as set forth below:\n3.1.1. To use the Software in accordance\ - \ with its functionality by copying and installing it on the User's personal\ - \ device(s). The User shall be entitled to install the Software on any number\ - \ of personal devices.\n3.1.2. To reproduce and to distribute this Software\ - \ in an unmodified form and solely for non-commercial purposes (free of charge).\"\ - \ - excerpt from the Terms of Service document for browsers.\n" value: custom + explanation: > + "3.1. The Rightholder, on the basis of a simple (non-exclusive) license, + shall grant the User a free, worldwide, non-transferable right to use the + Software as set forth below: + + 3.1.1. To use the Software in accordance with its functionality by + copying and installing it on the User's personal device(s). The User shall + be entitled to install the Software on any number of personal devices. + + 3.1.2. To reproduce and to distribute this Software in an unmodified form + and solely for non-commercial purposes (free of charge)." - excerpt + from the Terms of Service document for browsers. + terms_of_service: https://yandex.com/legal/browser_agreement/ + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' monthly_active_users: '' - name: Yandex Search - organization: Yandex - output_space: '' - prohibited_uses: '' - quality_control: '' - terms_of_service: https://yandex.com/legal/browser_agreement/ - type: application - url: https://medium.com/yandex/yandex-publishes-yalm-100b-its-the-largest-gpt-like-neural-network-in-open-source-d1df53d0e9a6 user_distribution: '' + failures: '' diff --git a/assets/you.yaml b/assets/you.yaml index f7a1ffe3..60bf2a22 100644 --- a/assets/you.yaml +++ b/assets/you.yaml @@ -1,71 +1,77 @@ --- -- access: closed - analysis: '' + +- type: dataset + name: You dataset + organization: You + description: '' created_date: unknown + url: https://you.com/ datasheet: '' + modality: text + size: unknown + sample: [] + analysis: '' dependencies: [] - description: '' - excluded: '' - feedback: '' included: '' - intended_uses: '' + excluded: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: {} + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: You dataset + feedback: '' + +- type: model + name: You model organization: You - prohibited_uses: '' - quality_control: '' - sample: [] - size: unknown - type: dataset + description: '' + created_date: unknown url: https://you.com/ -- access: closed + model_card: '' + modality: Text analysis: '' - created_date: unknown + size: unknown dependencies: [You dataset] - description: '' - feedback: '' - intended_uses: '' + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: closed license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown - modality: {} - model_card: '' + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' monitoring: '' - name: You model + feedback: '' + +- type: application + name: You Search organization: You - prohibited_uses: '' - quality_control: '' - size: '' - training_emissions: '' - training_hardware: '' - training_time: '' - type: model - url: https://you.com/ -- access: open - adaptation: '' - created_date: unknown - dependencies: [You model] description: You.com is a search engine built on artificial intelligence that provides users with a customized search experience while keeping their data 100% private. - failures: '' - feedback: '' - intended_uses: '' + created_date: unknown + url: https://you.com/ + dependencies: [You model] + adaptation: '' + output_space: '' + quality_control: '' + access: open license: - explanation: "The asset isn't released, and hence the license is unknown.\n" value: unknown + explanation: > + The asset isn't released, and hence the license is unknown. + terms_of_service: '' + intended_uses: '' + prohibited_uses: '' monitoring: '' + feedback: '' monthly_active_users: '' - name: You Search - organization: You - output_space: '' - prohibited_uses: '' - quality_control: '' - terms_of_service: '' - type: application - url: https://you.com/ user_distribution: '' + failures: ''