diff --git a/assets/ai2.yaml b/assets/ai2.yaml index dc9e232b..2c890050 100644 --- a/assets/ai2.yaml +++ b/assets/ai2.yaml @@ -21,3 +21,75 @@ prohibited_uses: '' monitoring: '' feedback: '' + +- type: dataset + name: SODA + organization: AI2 + description: SODA is the first publicly available, million-scale, high-quality + dialogue dataset covering a wide range of social interactions. + created_date: 2023-05-24 + url: https://arxiv.org/pdf/2212.10465.pdf + datasheet: https://huggingface.co/datasets/allenai/soda + modality: text + size: 1.5M dialogues + sample: [] + analysis: Randomly sampled dialogues from dataset are evaluated according to six + established criteria of natural flow, context dependence, topic consistency, + speaker consistency, specificity, and overall. + dependencies: [] + included: '' + excluded: '' + quality_control: '' + access: open + license: CC BY 4.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: dataset + name: Multimodal C4 + organization: AI2 + description: An augmentation of C4 with images added and made openly available. + created_date: 2023-06-09 + url: https://arxiv.org/pdf/2304.06939.pdf + datasheet: '' + modality: text and images + size: 43B English tokens with 101.2M documents and 571M images + sample: [] + analysis: Conducted experiments on models trained with Multimodal C4 in comparison + to models trained on single image/caption datasets + dependencies: [C4] + included: '' + excluded: '' + quality_control: '' + access: open + license: MIT + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: model + name: COSMO + organization: AI2 + description: COSMO is a conversation agent with greater generalizability on both + in- and out-of-domain chitchat datasets + created_date: 2023-05-24 + url: https://arxiv.org/pdf/2212.10465.pdf + model_card: https://huggingface.co/allenai/cosmo-xl + modality: text + analysis: Evaluated by human testers on generalization capabilities and responses + compared to other chatbots. + size: 11B parameters + dependencies: [SODA, ProsocialDialog, T5] + training_emissions: unknown + training_time: unknown + training_hardware: v3-128 TPU accelerators with batch size 256 + quality_control: '' + access: open + license: '' + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/allenai/cosmo-xl/discussions diff --git a/assets/aleph_alpha.yaml b/assets/aleph_alpha.yaml index b7b635f6..587a9aca 100644 --- a/assets/aleph_alpha.yaml +++ b/assets/aleph_alpha.yaml @@ -100,3 +100,26 @@ monthly_active_users: unknown user_distribution: unknown failures: unknown + +- type: model + name: MAGMA + organization: Aleph Alpha + description: An autoregressive VL model that is able to generate text from an + arbitrary combination of visual and textual input + created_date: 2022-10-24 + url: https://arxiv.org/pdf/2112.05253.pdf + model_card: '' + modality: image and text input with natural language text output + analysis: Evaluated on the OKVQA benchmark as a fully open-ended generative task. + size: 6B parameters + dependencies: [GPT-J, CLIP] + training_emissions: '' + training_time: '' + training_hardware: 32 A100 GPUs + quality_control: '' + access: open + license: MIT + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/amazon.yaml b/assets/amazon.yaml new file mode 100644 index 00000000..94a67a4a --- /dev/null +++ b/assets/amazon.yaml @@ -0,0 +1,32 @@ +--- + +- type: application + name: Bedrock + organization: Amazon + description: Bedrock is a new service that makes FMs from AI21 Labs, Anthropic, + Stability AI, and Amazon accessible via an API. Bedrock is intended for customers + to build and scale generative AI-based applications using FMs, democratizing + access for all builders. using an API. + created_date: 2023-04-13 + url: https://aws.amazon.com/bedrock/ + dependencies: + - Jurassic-2 + - Claude + - Stable Diffusion + - Amazon Titan + - Claude 2 + - Cohere Command + adaptation: '' + output_space: foundation models made accessible via an API + quality_control: '' + access: limited + license: unknown + terms_of_service: https://aws.amazon.com/service-terms/ + intended_uses: allowing companies to incorporate generative AI into their business + models + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' + user_distribution: '' + failures: '' diff --git a/assets/anthropic.yaml b/assets/anthropic.yaml index 385d53ed..b47dc7b3 100644 --- a/assets/anthropic.yaml +++ b/assets/anthropic.yaml @@ -594,3 +594,36 @@ prohibited_uses: '' monitoring: '' feedback: '' + +- type: model + name: Claude 2 + organization: Anthropic + description: Claude 2 is a more evolved and refined version of Claude, which is + a general purpose large language model using a transformer architecture and + trained via unsupervised learning. + created_date: 2023-07-11 + url: https://www.anthropic.com/index/claude-2 + model_card: https://www-files.anthropic.com/production/images/Model-Card-Claude-2.pdf + modality: text + analysis: Evaluated with human feedback on helpfulness, harmfulness, and honesty + and on the Bias Benchmark for QA. + size: '' + dependencies: + - Claude human feedback data + - Unknown licensed third party datasets + training_emissions: '' + training_time: '' + training_hardware: unknown + quality_control: '' + access: open + license: '' + intended_uses: Claude 2 tends to perform well at general, open-ended conversation; + search, writing, editing, outlining, and summarizing text; coding; and providing + helpful advice about a broad range of subjects. Claude 2 is particularly well + suited to support creative or literary use cases. They can take direction on + tone and “personality,” and users have described them as feeling steerable and + conversational. + prohibited_uses: Claude 2 should not be used on their own in high stakes situations + where an incorrect answer would cause harm. + monitoring: '' + feedback: '' diff --git a/assets/autogpt.yaml b/assets/autogpt.yaml new file mode 100644 index 00000000..5a3a040f --- /dev/null +++ b/assets/autogpt.yaml @@ -0,0 +1,23 @@ +--- + +- type: application + name: Auto-GPT + organization: Auto-GPT + description: Auto-GPT is an experimental open-source application showcasing the + capabilities of the GPT-4 language model. + created_date: 2023-04-16 + url: https://news.agpt.co/ + dependencies: [GPT-4 API] + adaptation: GPT-4 adapted to run autonomously by chaining together LLM "thoughts" + output_space: text + quality_control: '' + access: open + license: MIT + terms_of_service: '' + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' + user_distribution: '' + failures: '' diff --git a/assets/berkeley.yaml b/assets/berkeley.yaml new file mode 100644 index 00000000..e8bab3a0 --- /dev/null +++ b/assets/berkeley.yaml @@ -0,0 +1,72 @@ +--- + +- type: model + name: Koala + organization: Berkeley + description: A relatively small chatbot trained by fine-tuning Meta’s LLaMA on + dialogue data gathered from the web. + created_date: 2023-04-03 + url: https://bair.berkeley.edu/blog/2023/04/03/koala/ + model_card: https://huggingface.co/TheBloke/koala-7B-GPTQ-4bit-128g + modality: natural language text + analysis: Evaluated in comparison with ChatGPT and Stanford Alpaca. + size: 13B parameters + dependencies: [LLaMA, web-scraped dialogue data] + training_emissions: '' + training_time: 6 hours + training_hardware: 8 A100 GPUs + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: academic research + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/TheBloke/koala-7B-GPTQ-4bit-128g/discussions + +- type: model + name: Gorilla + organization: Berkeley + description: Gorilla is a finetuned LLaMA-based model that surpasses the performance + of GPT-4 on writing API calls. + created_date: 2023-05-24 + url: https://arxiv.org/pdf/2305.15334v1.pdf + model_card: '' + modality: outputs API from natural language input + analysis: Evaluated using AST sub-tree matching technique and compared to other + models in terms of API functionality accuracy. + size: 7B parameters + dependencies: [LLaMA, Gorilla document retriever] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: No specific quality control is mentioned in model training, though + details on data processing and collection are provided in the paper. + access: open + license: Apache 2.0 + intended_uses: In conjunction with a LLM to improve its capability for using API + calls. + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: model + name: OpenLLaMA + organization: Berkeley + description: OpenLlama is an open source reproduction of Meta's LLaMA model. + created_date: 2023-05-03 + url: https://github.com/openlm-research/open_llama + model_card: '' + modality: text + analysis: Evaluated on wide range of tasks using own evaluation benchmarks. + size: 17B parameters + dependencies: [RedPajama] + training_emissions: unknown + training_time: unknown + training_hardware: '' + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/bigcode.yaml b/assets/bigcode.yaml new file mode 100644 index 00000000..4a5362ea --- /dev/null +++ b/assets/bigcode.yaml @@ -0,0 +1,76 @@ +--- + +- type: model + name: StarCoder + organization: BigCode + description: StarCoder is a Large Language Model for Code (Code LLM) trained on + permissively licensed data from GitHub, including from 80+ programming languages, + Git commits, GitHub issues, and Jupyter notebooks. + created_date: 2023-05-09 + url: https://arxiv.org/pdf/2305.06161.pdf + model_card: https://huggingface.co/bigcode/starcoder + modality: code (80+ programming languages) + analysis: Tested on several benchmarks, most notably Python benchmark HumanEval. + size: 15.5B parameters + dependencies: [The Stack] + training_emissions: 16.68 tons of CO2eq + training_time: 2 days + training_hardware: 64 NVIDIA A100 GPUs + quality_control: No specific quality control is mentioned in model training, though + details on data processing and how the tokenizer was trained are provided in + the paper. + access: open + license: Apache 2.0 + intended_uses: With a Tech Assistant prompt and not as an instruction model given + training limitations. + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/bigcode/starcoder/discussions + +- type: model + name: SantaCoder + organization: BigCode + description: Multilingual code model derived from findings of BigCode Project + analysis on Github stars' association to data quality. + created_date: 2023-02-24 + url: https://arxiv.org/pdf/2301.03988.pdf + model_card: '' + modality: code + analysis: Evaluated on MultiPL-E system benchmarks. + size: 1.1B parameters + dependencies: [The Stack, BigCode Dataset] + training_emissions: '' + training_time: 3.1 days + training_hardware: 96 NVIDIA Tesla V100 GPUs + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: dataset + name: The Stack + organization: BigCode + description: The Stack is a 3.1 TB dataset consisting of permissively licensed + source code inteded for use in creating code LLMs. + created_date: 2022-11-20 + url: https://arxiv.org/pdf/2211.15533.pdf + datasheet: https://huggingface.co/datasets/bigcode/the-stack + modality: code (358 programming languages) + size: 3.1 TB + sample: [] + analysis: Evaluated models trained on The Stack on HumanEval and MBPP and compared + against similarly-sized models. + dependencies: [GitHub] + included: '' + excluded: '' + quality_control: allowed users whose data were part of The Stack's training data + to opt-out + access: open + license: Apache 2.0 + intended_uses: creating code LLMs + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/casia.yaml b/assets/casia.yaml new file mode 100644 index 00000000..0634f801 --- /dev/null +++ b/assets/casia.yaml @@ -0,0 +1,25 @@ +--- + +- type: model + name: BigTrans + organization: Institute of Automation Chinese Academy of Sciences + description: BigTrans is a model which adapts LLaMA that covers only 20 languages + and enhances it with multilingual translation capability on more than 100 languages + created_date: 2023-05-29 + url: https://arxiv.org/pdf/2305.18098v1.pdf + model_card: https://huggingface.co/James-WYang/BigTrans + modality: text + analysis: Reports results on standard translation benchmarks across 102 languages + in comparison with Google Translate and ChatGPT + size: 13B parameters (dense model) + dependencies: [LLaMA, CLUE, BigTrans parallel dataset] + training_emissions: unknown + training_time: unknown + training_hardware: 16 A100 GPUs with 80 GB of RAM + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: Advancing future research in multilingual LLMs + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/James-WYang/BigTrans/discussions diff --git a/assets/chatglm.yaml b/assets/chatglm.yaml new file mode 100644 index 00000000..5bb6d82e --- /dev/null +++ b/assets/chatglm.yaml @@ -0,0 +1,24 @@ +--- + +- type: model + name: ChatGLM + organization: ChatGLM + description: ChatGLM is a Chinese-English language model with question and answer + and dialogue functions, and is aimed at a Chinese audience. + created_date: 2023-03-14 + url: https://chatglm.cn/blog + model_card: '' + modality: text + analysis: Performance evaluated on English and Chinese language benchmark tests. + size: 6B parameters + dependencies: [] + training_emissions: unknown + training_time: unknown + training_hardware: '' + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/columbia.yaml b/assets/columbia.yaml new file mode 100644 index 00000000..2ca39802 --- /dev/null +++ b/assets/columbia.yaml @@ -0,0 +1,23 @@ +--- + +- type: model + name: OpenFold + organization: Columbia + description: OpenFold is an open source recreation of AlphaFold2. + created_date: 2022-11-20 + url: https://www.biorxiv.org/content/10.1101/2022.11.20.517210v2 + model_card: '' + modality: protein structures + analysis: Evaluated on wide range of tasks using own evaluation benchmarks. + size: '' + dependencies: [AlphaFold2, OpenProteinSet] + training_emissions: unknown + training_time: 50,000 GPU hours + training_hardware: Single A100 NVIDIA GPU + quality_control: '' + access: open + license: CC BY 4.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/eleutherai.yaml b/assets/eleutherai.yaml index c79165bb..524c8010 100644 --- a/assets/eleutherai.yaml +++ b/assets/eleutherai.yaml @@ -267,3 +267,51 @@ monthly_active_users: unknown user_distribution: unknown failures: unknown + +- type: model + name: VQGAN-CLIP + organization: EleutherAI + description: VQGAN-CLIP is a model that better generates and edits images using + a multimodal encoder to guide image generation. + created_date: 2022-09-04 + url: https://arxiv.org/pdf/2204.08583.pdf + model_card: '' + modality: text input with image output + analysis: Evaluated by human testers rating alignment of text input, image output + pairs. + size: 227M parameters + dependencies: [VQGAN, CLIP] + training_emissions: unknown + training_time: Less than 1 V100-hour + training_hardware: 1 NVIDIA Tesla K80 GPU + quality_control: '' + access: open + license: MIT + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: model + name: Pythia + organization: Eleuther AI + description: A suite of 16 LLMs all trained on public data seen in the exact same + order and ranging in size from 70M to 12B parameters + created_date: 2023-05-31 + url: https://arxiv.org/pdf/2304.01373.pdf + model_card: https://huggingface.co/EleutherAI/pythia-12b + modality: natural language text + analysis: Evaluated on a variety of NLP benchmarks and found to perform similarly + to OPT and BLOOM models. + size: 12B parameters + dependencies: [The Pile] + training_emissions: '' + training_time: '' + training_hardware: 64 A100 GPUs + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/EleutherAI/pythia-6.9b/discussions diff --git a/assets/faraday.yaml b/assets/faraday.yaml new file mode 100644 index 00000000..7c986cb4 --- /dev/null +++ b/assets/faraday.yaml @@ -0,0 +1,27 @@ +--- + +- type: application + name: ARES + organization: Faraday Lab + description: ARES is a text-to-image generator based on Stable Diffusion. The + goal is to provide a simple tool with a user interface allowing mainstream AI + access for artists and creators. + created_date: + value: 2023-04-26 + explanation: The date Faraday Lab's hugging face card was last updated. Date + for ARES release is not published on site. + url: https://faradaylab.fr/ + dependencies: [Stable Diffusion] + adaptation: '' + output_space: generated images + quality_control: '' + access: open + license: unknown + terms_of_service: '' + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' + user_distribution: '' + failures: '' diff --git a/assets/google.yaml b/assets/google.yaml index 09556eee..fb56a8dc 100644 --- a/assets/google.yaml +++ b/assets/google.yaml @@ -615,7 +615,7 @@ training_time: unknown training_hardware: 128 TPU-v4 quality_control: '' - access: closed + access: open license: value: unknown explanation: > @@ -770,6 +770,32 @@ monitoring: '' feedback: '' +- type: model + name: Med-PaLM Multimodal + organization: Google + description: '' + created_date: 2023-07-26 + url: https://arxiv.org/pdf/2307.14334.pdf + model_card: '' + modality: text, image, and genomics input with text output + analysis: Evaluated on MultiMedBench tasks and radiologist evaluations of model-generated + chest X-ray reports + size: 562B parameters (dense) + dependencies: [PaLM-E, MultiMedBench] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: closed + license: + value: unknown + explanation: > + The asset isn't released, and hence the license is unknown. + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + - type: model name: MultiMedQA organization: Google @@ -1904,3 +1930,56 @@ prohibited_uses: '' monitoring: '' feedback: '' + +- type: model + name: Google Joint SLM + organization: Google + description: Joint speech and language model using a Speech2Text adapter and using + a CTC-based blank-filtering. + created_date: 2023-06-08 + url: https://arxiv.org/pdf/2306.07944.pdf + model_card: '' + modality: speech input with textual output + analysis: evaluated on DSTC11 Challenge Task, based on MultiWoz 2.1, with a focus + on dialog state tracking. + size: '' + dependencies: [CTC blank-filtering, Speech2Text adapter] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: open + license: Google + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: model + name: PaLM 2 + organization: Google + description: PaLM2 is a new state-of-the-art language model that has better multilingual + and reasoning capabilities and is more compute-efficient than its predecessor + PaLM. PaLM 2 is a Transformer-based model trained using a mixture of objectives + similar to UL2. + created_date: 2023-05-10 + url: https://blog.google/technology/ai/google-palm-2-ai-large-language-model/ + model_card: https://ai.google/static/documents/palm2techreport.pdf + modality: text (input and output) + analysis: Reports results on standard code benchmarks across a variety of programming + languages. + size: 3.6T parameters + dependencies: [palm 2 dataset] + training_emissions: '' + training_time: '' + training_hardware: TPU v4 (number unspecified) + quality_control: Employed de-duplication, removal of sensitive-PII and filtering. + Added control tokens marking toxicity of text. + access: open + license: Google + intended_uses: general use large language model that can be used for language, + reasoning, and code tasks. + prohibited_uses: becoming part of a general-purpose service or product or use + within specific downstream applications without prior assessment + monitoring: Google internal monitoring + feedback: Specific queries provided by annotators diff --git a/assets/h2oai.yaml b/assets/h2oai.yaml new file mode 100644 index 00000000..601e629d --- /dev/null +++ b/assets/h2oai.yaml @@ -0,0 +1,23 @@ +--- + +- type: model + name: h2oGPT + organization: H2O AI + description: Series of models fine-tuned on well-known LLMs using the h2oGPT repositories. + created_date: 2023-06-16 + url: https://arxiv.org/pdf/2306.08161.pdf + model_card: https://huggingface.co/h2oai/h2ogpt-oasst1-512-20b + modality: text + analysis: Evaluated on EleutherAI evaluation harness. + size: 20B parameters + dependencies: [GPT-NeoX, H2O AI OpenAssistant, h2oGPT Repositories] + training_emissions: unknown + training_time: unknown + training_hardware: unspecified number of 48GB A100 NVIDIA GPUs + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/h2oai/h2ogpt-oasst1-512-20b/discussions diff --git a/assets/inflection.yaml b/assets/inflection.yaml new file mode 100644 index 00000000..5113a5f1 --- /dev/null +++ b/assets/inflection.yaml @@ -0,0 +1,49 @@ +--- + +- type: model + name: Inflection-1 + organization: Inflection AI + description: Inflection AI's in-house LLM, which powers Pi and will soon be available + via Inflection AI's conversational API. + created_date: 2023-06-22 + url: https://inflection.ai/inflection-1 + model_card: '' + modality: text + analysis: Evaluated on wide range of language benchmarks like MMLU 5-shot, GSM-8K, + and HellaSwag 10-shot among others. + size: + value: unknown + explanation: Designed to be roughly around Chat-GPT's size (175B parameters). + dependencies: [] + training_emissions: '' + training_time: '' + training_hardware: unknown + quality_control: '' + access: limited + license: unknown + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: application + name: Pi + organization: Inflection AI + description: Personal AI chatbot designed to be conversational and specialized + in emotional intelligence. + created_date: 2023-05-02 + url: https://inflection.ai/press + dependencies: [Inflection-1] + adaptation: '' + output_space: natural language text responses + quality_control: '' + access: limited + license: unknown + terms_of_service: '' + intended_uses: to be used as a personal assistant chatbot for everyday activities + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' + user_distribution: '' + failures: '' diff --git a/assets/laion.yaml b/assets/laion.yaml index 9d026ebc..6a62325e 100644 --- a/assets/laion.yaml +++ b/assets/laion.yaml @@ -154,3 +154,27 @@ from using LAION-2B-en for non-research purposes. monitoring: '' feedback: '' + +- type: model + name: OpenFlamingo + organization: LAION + description: An open-source reproduction of DeepMind's Flamingo model. At its + core, OpenFlamingo is a framework that enables training and evaluation of large + multimodal models (LMMs). + created_date: 2023-03-28 + url: https://laion.ai/blog/open-flamingo/ + model_card: https://github.com/mlfoundations/open_flamingo/blob/main/MODEL_CARD.md + modality: image and text input with textual output + analysis: Evaluated on COCO captioning and VQAv2 vision-language tasks. + size: 9B parameters + dependencies: [LLaMA, CLIP] + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: open + license: MIT + intended_uses: academic research purposes + prohibited_uses: commercial use + monitoring: '' + feedback: '' diff --git a/assets/lehigh.yaml b/assets/lehigh.yaml new file mode 100644 index 00000000..9e8df418 --- /dev/null +++ b/assets/lehigh.yaml @@ -0,0 +1,30 @@ +--- + +- type: model + name: BiomedGPT + organization: Lehigh University + description: BiomedGPT leverages self-supervision on large and diverse datasets + to accept multi-modal inputs and perform a range of downstream tasks. + created_date: 2023-05-26 + url: https://arxiv.org/pdf/2305.17100.pdf + model_card: '' + modality: image and text input, text output + analysis: outperforms majority of preceding state-of-the-art models over 15 unique + biomedical modalities. + size: 472M parameters + dependencies: + - GPT-style autoregressive decoder + - BiomedGPT biomedical datasets + training_emissions: unknown + training_time: unknown + training_hardware: 10 NVIDIA A5000 GPUs + quality_control: No specific quality control is mentioned in model training, though + details on data processing and how the model was trained are provided in the + paper. + access: open + license: Apache 2.0 + intended_uses: furthering research in developing unified and generalist models + for biomedicine. + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/lmsys.yaml b/assets/lmsys.yaml new file mode 100644 index 00000000..6f07781e --- /dev/null +++ b/assets/lmsys.yaml @@ -0,0 +1,24 @@ +--- + +- type: model + name: Vicuna + organization: LMSYS + description: An open-source chatbot trained by fine-tuning LLaMA on user-shared + conversations collected from ShareGPT. + created_date: 2023-03-30 + url: https://lmsys.org/blog/2023-03-30-vicuna/ + model_card: https://huggingface.co/lmsys/vicuna-13b-delta-v0 + modality: natural language text + analysis: Evaluated against similar LLMs using GPT-4 as a judge. + size: 13B parameters + dependencies: [LLaMA, ShareGPT conversations data] + training_emissions: '' + training_time: 1 day + training_hardware: 8 A100 GPUs + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: research on LLMs and chatbots + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/datasets/bigcode/the-stack/discussions diff --git a/assets/meta.yaml b/assets/meta.yaml index 0fdf0efa..c44d7474 100644 --- a/assets/meta.yaml +++ b/assets/meta.yaml @@ -369,7 +369,7 @@ Authors do not mention or provide a feedback mechanism. - type: model - name: LLaMa + name: LLaMA organization: Meta description: '' created_date: 2023-02-24 @@ -397,6 +397,36 @@ monitoring: '' feedback: '' +- type: model + name: LLaMA 2 + organization: Meta + description: LLaMA 2 is an updated version of LLaMA trained on a new mix of publicly + available data. + created_date: 2023-07-18 + url: https://ai.meta.com/resources/models-and-libraries/llama/ + model_card: Can be found at appendix of paper at https://ai.meta.com/research/publications/llama-2-open-foundation-and-fine-tuned-chat-models/ + modality: text + analysis: Evaluated on standard academic benchmarks and internal Meta libraries. + size: 70B parameters (dense model) + dependencies: [] + training_emissions: 539 tCO2eq + training_time: '' + training_hardware: NVIDIA A100-80GB GPUs (TDP of 350-400W) + quality_control: '' + access: open + license: + value: custom + explanation: The license can be found at https://ai.meta.com/resources/models-and-libraries/llama-downloads/ + intended_uses: LLaMA 2 is intended for commercial and research use in English. + Tuned models are intended for assistant-like chat, whereas pretrained models + can be adapted for a variety of natural language generation tasks. + prohibited_uses: Use in any manner that violates applicable laws or regulations + (including trade compliance laws). Use in languages other than English. Use + in any other way that is prohibited by the Acceptable Use Policy and Licensing + Agreement for LLaMA 2. + monitoring: '' + feedback: '' + - type: model name: OPT-IML organization: Meta @@ -608,3 +638,54 @@ monitoring: '' feedback: Feedback can be given via the feedback form on their website [segment-anything.com](https://segment-anything.com/) or by emailing at segment-anything at meta.com. + +- type: model + name: Voicebox + organization: Meta + description: Voicebox is the first generative AI model for speech to generalize + across tasks with state-of-the-art performance. + created_date: 2023-06-16 + url: https://research.facebook.com/publications/voicebox-text-guided-multilingual-universal-speech-generation-at-scale/ + model_card: '' + modality: text and audio input and audio output + analysis: Evaluated on zero-shot text-to-speech benchmarks, with Voicebox outperforming + the current state-of-the-art English model VALL-E. + size: 330M parameters + dependencies: [] + training_emissions: unknown + training_time: 750,000 iterations + training_hardware: 32 GPUs of unspecified type + quality_control: '' + access: closed + license: '' + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: model + name: PEER + organization: Meta + description: PEER is a collaborative language model that is trained to imitate + the entire writing process itself. PEER can write drafts, add suggestions, propose + edits and provide explanations for its actions. + created_date: 2022-08-24 + url: https://arxiv.org/pdf/2208.11663.pdf + model_card: '' + modality: natural language text + analysis: PEER is evaluated on core research questions intended to gauge language + understanding, proper use of citations, instruction following, and iterative + use. + size: 3B parameters + dependencies: [] + training_emissions: '' + training_time: '' + training_hardware: 64 GPUs + quality_control: Heuristics and edit filtering was used on data set, which consisted + mostly of Wikipedia pages. + access: open + license: '' + intended_uses: adapting LLMs to work with collaborative writing and updating. + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/microsoft.yaml b/assets/microsoft.yaml index 298c806e..8d324607 100644 --- a/assets/microsoft.yaml +++ b/assets/microsoft.yaml @@ -779,3 +779,100 @@ monthly_active_users: unknown user_distribution: unknown failures: unknown + +- type: model + name: UniLM + organization: Microsoft + description: UniLM is a unified language model that can be fine-tuned for both + natural language understanding and generation tasks. + created_date: 2019-10-01 + url: https://proceedings.neurips.cc/paper_files/paper/2019/file/c20bb2d9a50d5ac1f713f8b34d9aac5a-Paper.pdf + model_card: '' + modality: text + analysis: Evaluated on GLUE, SQuAD 2.0, and CoQA benchmarks. + size: 340M parameters + dependencies: [] + training_emissions: unknown + training_time: 10,000 steps in 7 hours + training_hardware: 8 NVIDIA Tesla V100 32GB GPUs + quality_control: '' + access: open + license: MIT + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: model + name: Docugami + organization: Microsoft + description: Docugami is a LLM focused on writing business documents and data + using generative AI. + created_date: 2021-04-12 + url: https://www.docugami.com/generative-ai + model_card: '' + modality: text + analysis: '' + size: 20B parameters + dependencies: [] + training_emissions: unknown + training_time: unknown + training_hardware: '' + quality_control: '' + access: limited + license: '' + intended_uses: analyzing, writing, and connecting business documents and data + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: model + name: BEiT-3 + organization: Microsoft + description: BEiT-3 is a general-purpose multimodal foundation model for vision + and vision-language tasks. + created_date: 2022-08-31 + url: https://arxiv.org/pdf/2208.10442.pdf + model_card: '' + modality: text and image input and output + analysis: Evaluated on a range of standardized vision benchmarks, and achieves + state of the art performance on all experimentally. + size: 1.9B parameters + dependencies: [Multiway Transformer network] + training_emissions: unknown + training_time: '' + training_hardware: '' + quality_control: '' + access: open + license: '' + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: model + name: WizardLM + organization: Microsoft + description: Starting with an initial set of instructions, we use our proposed + Evol-Instruct to rewrite them step by step into more complex instructions. Then, + we mix all generated instruction data to fine-tune LLaMA. We call the resulting + model WizardLM. + created_date: 2023-04-24 + url: https://arxiv.org/pdf/2304.12244v1.pdf + model_card: https://huggingface.co/WizardLM/WizardLM-13B-1.0 + modality: natural language text + analysis: Reports results on standard LLM benchmarks in comparison to other LLMs + and test sets. + size: 7B parameters + dependencies: [LLaMA, Evol-Instruct, Alpaca dataset] + training_emissions: '' + training_time: 70 hours on 3 epochs + training_hardware: 8 V100 GPUs + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: Creating large amounts of instruction data, particularly with high + complexity + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/datasets/WizardLM/evol_instruct_70k/discussions diff --git a/assets/mosaic.yaml b/assets/mosaic.yaml new file mode 100644 index 00000000..a887f3b4 --- /dev/null +++ b/assets/mosaic.yaml @@ -0,0 +1,24 @@ +--- + +- type: model + name: MPT + organization: Mosaic + description: MPT is a series of large language models seeking to address the limitations + of other open source models like LLaMA and Pythia. + created_date: 2023-05-05 + url: https://www.mosaicml.com/blog/mpt-7b + model_card: '' + modality: text + analysis: Evaluated on a range of benchmarks and performed on par with LLaMA-7B. + size: 7B parameters + dependencies: [RedPajama-Data, C4, The Stack, Multimodal C4] + training_emissions: unknown + training_time: 9.5 days + training_hardware: 440 A100 40GB GPUs + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/nanyang.yaml b/assets/nanyang.yaml new file mode 100644 index 00000000..02f40c24 --- /dev/null +++ b/assets/nanyang.yaml @@ -0,0 +1,27 @@ +--- + +- type: model + name: Otter + organization: Nanyang Technological University + description: Otter is a multi-modal model based on OpenFlamingo (open-sourced + version of DeepMind’s Flamingo), trained on MIMIC-IT and showcasing improved + instruction-following ability and in-context learning. + created_date: 2023-05-05 + url: https://arxiv.org/pdf/2305.03726v1.pdf + model_card: https://github.com/Luodian/Otter/blob/main/docs/model_card.md + modality: outputs text from text and image input + analysis: Evaluated on researcher experiments to test deeper understanding and + advanced commonsense reasoning + size: 1.3B parameters + dependencies: [MIMIC-IT, OpenFlamingo] + training_emissions: '' + training_time: '' + training_hardware: 4 RTX-3090 GPUs + quality_control: '' + access: open + license: MIT + intended_uses: Following and executing new instructions with few in-context learning + examples given image and textual input. + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/nextdoor.yaml b/assets/nextdoor.yaml new file mode 100644 index 00000000..0dfafe79 --- /dev/null +++ b/assets/nextdoor.yaml @@ -0,0 +1,24 @@ +--- + +- type: application + name: Nextdoor Assistant + organization: Nextdoor + description: AI chatbot on Nextdoor that helps users write more clear and conscientious + posts. + created_date: 2023-05-02 + url: https://help.nextdoor.com/s/article/Introducing-Assistant + dependencies: [ChatGPT] + adaptation: '' + output_space: natural language text guidance + quality_control: '' + access: open + license: unknown + terms_of_service: '' + intended_uses: to be used to help make the Nextdoor experience more positive for + users + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' + user_distribution: '' + failures: '' diff --git a/assets/nolano.yaml b/assets/nolano.yaml new file mode 100644 index 00000000..f2b8fe84 --- /dev/null +++ b/assets/nolano.yaml @@ -0,0 +1,23 @@ +--- + +- type: application + name: Cformers + organization: Nolano + description: Cformers is a set of transformers that act as an API for AI inference + in code. + created_date: 2023-03-19 + url: https://www.nolano.org/services/Cformers/ + dependencies: [] + adaptation: '' + output_space: '' + quality_control: '' + access: limited + license: MIT + terms_of_service: '' + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' + user_distribution: '' + failures: '' diff --git a/assets/paladin.yaml b/assets/paladin.yaml new file mode 100644 index 00000000..76870f97 --- /dev/null +++ b/assets/paladin.yaml @@ -0,0 +1,22 @@ +--- + +- type: application + name: UnderwriteGPT + organization: Paladin Group and Dais Technology + description: UnderwriteGPT is the world's first generative AI underwriting tool. + created_date: 2023-02-01 + url: https://dais.com/underwritegpt/ + dependencies: [] + adaptation: '' + output_space: '' + quality_control: '' + access: limited + license: '' + terms_of_service: '' + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' + user_distribution: '' + failures: '' diff --git a/assets/portkey.yaml b/assets/portkey.yaml new file mode 100644 index 00000000..2fca2885 --- /dev/null +++ b/assets/portkey.yaml @@ -0,0 +1,23 @@ +--- + +- type: application + name: Portkey + organization: Portkey + description: Portkey is a hosted middleware that allows users to create generative + AI applications + created_date: 2023-05-06 + url: https://portkey.ai/ + dependencies: [] + adaptation: '' + output_space: generative AI apps + quality_control: '' + access: open + license: '' + terms_of_service: https://portkey.ai/terms + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' + user_distribution: '' + failures: '' diff --git a/assets/shanghai.yaml b/assets/shanghai.yaml index b4a206a7..8dae5081 100644 --- a/assets/shanghai.yaml +++ b/assets/shanghai.yaml @@ -28,3 +28,26 @@ prohibited_uses: '' monitoring: '' feedback: '' + +- type: model + name: Lego-MT + organization: Shanghai AI Laboratory + description: Lego-MT is a multilingual large language model which uses a more + efficient approach of being an effective detachable model. + created_date: 2023-05-29 + url: https://arxiv.org/pdf/2212.10551.pdf + model_card: '' + modality: multilingual text + analysis: Evaluated based on own constructed dataset covering 433 languages. + size: 1.2B parameters + dependencies: [OPUS] + training_emissions: unknown + training_time: 15 days + training_hardware: 32 A100 GPUs + quality_control: '' + access: open + license: '' + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/singapore.yaml b/assets/singapore.yaml new file mode 100644 index 00000000..5e0ab85c --- /dev/null +++ b/assets/singapore.yaml @@ -0,0 +1,27 @@ +--- + +- type: model + name: GOAT + organization: National University of Singapore + description: GOAT is a fine-tuned LLaMA model which uses the tokenization of numbers + to significantly outperform benchmark standards on a range of arithmetic tasks. + created_date: 2023-05-23 + url: https://arxiv.org/pdf/2305.14201.pdf + model_card: none + modality: textual input, numerical data output + analysis: Performance assessed on BIG-bench arithmetic sub-task, and various elementary + arithmetic tasks. + size: 7B parameters + dependencies: [LLaMA, GOAT dataset] + training_emissions: unknown + training_time: unknown + training_hardware: 24 GB VRAM GPU + quality_control: Number data is randomly generated from log space to reduce likelihood + of redundancy and range of magnitudes. + access: open + license: Apache 2.0 + intended_uses: Integration into other instruction-tuned LLMs to further enhance + arithmetic reasoning abilities in solving math word problems. + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/stability.yaml b/assets/stability.yaml index fc209b52..8c572529 100644 --- a/assets/stability.yaml +++ b/assets/stability.yaml @@ -1,31 +1,102 @@ --- - type: model - name: Stable Diffusion + name: DeepFloyd IF organization: Stability AI - description: Stable Diffusion is an open source text-to-image model - created_date: - value: 2022-08-22 - explanation: Date the model was made publicly available - url: https://github.com/CompVis/stable-diffusion - model_card: https://huggingface.co/CompVis/stable-diffusion-v1-4?text=A+mecha+robot+in+a+favela+in+expressionist+style - modality: text and image - size: 890M parameters - analysis: '' + description: A text-to-image cascaded pixel diffusion model released in conjunction + with AI research lab DeepFloyd. + created_date: 2023-04-28 + url: https://stability.ai/blog/deepfloyd-if-text-to-image-model + model_card: https://huggingface.co/DeepFloyd/IF-I-XL-v1.0 + modality: textual input with image output + analysis: Evaluated on the COCO dataset. + size: 4.3B parameters dependencies: [LAION-5B] training_emissions: '' - training_time: 25 days according to Emad Mostaque (CEO of Stability) on [[Twitter]](https://twitter.com/emostaque/status/1563870674111832066) - training_hardware: 256 A100 GPUs according to Emad Mostaque (CEO of Stability) - on [[Twitter]](https://twitter.com/emostaque/status/1563870674111832066) + training_time: '' + training_hardware: '' + quality_control: '' + access: open + license: + value: custom + explanation: License agreement can be found on model card https://huggingface.co/DeepFloyd/IF-I-XL-v1.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/DeepFloyd/IF-I-XL-v1.0/discussions + + +- type: model + name: StableLM + organization: Stability AI + description: Large language models trained on up to 1.5 trillion tokens. + created_date: 2023-04-20 + url: https://github.com/Stability-AI/StableLM + model_card: '' + modality: natural language text + analysis: '' + size: 7B parameters + dependencies: + - StableLM-Alpha dataset + - Alpaca dataset + - gpt4all dataset + - ShareGPT52K dataset + - Dolly dataset + - HH dataset + training_emissions: '' + training_time: '' + training_hardware: '' + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + + +- type: application + name: Stable Diffusion + organization: Stability AI + description: Stable Diffusion is a generative software that creates images from + text prompts. + created_date: 2022-08-22 + url: https://stability.ai/blog/stable-diffusion-public-release + dependencies: [] + adaptation: '' + output_space: image quality_control: '' - access: - value: open - explanation: Model weights are available for download from the [[Github repo]](https://github.com/CompVis/stable-diffusion) + access: open license: - value: CreativeML Open RAIL-M - explanation: > - [[CreativeML Open RAIL-M]](https://github.com/CompVis/stable-diffusion/blob/main/LICENSE) + value: custom + explanation: License agreement can be found on https://github.com/CompVis/stable-diffusion/blob/main/LICENSE + terms_of_service: '' + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/CompVis/stable-diffusion/discussions + monthly_active_users: '' + user_distribution: '' + failures: '' + +- type: application + name: Stable Diffusion XL + organization: Stability AI + description: Stable Diffusion XL is an updated version of Stable Diffusion, and + creates descriptive images with shorter prompts and generate words within images. + created_date: 2023-07-26 + url: https://stability.ai/stablediffusion + dependencies: [] + adaptation: '' + output_space: image + quality_control: '' + access: limited + license: MIT + terms_of_service: '' intended_uses: '' prohibited_uses: '' monitoring: '' feedback: '' + monthly_active_users: '' + user_distribution: '' + failures: '' diff --git a/assets/stanford.yaml b/assets/stanford.yaml index 88454882..f54d0d1e 100644 --- a/assets/stanford.yaml +++ b/assets/stanford.yaml @@ -21,3 +21,54 @@ prohibited_uses: '' monitoring: '' feedback: '' + +- type: model + name: RoentGen + organization: Stanford + description: RoentGen is a generative medical imaging model that can create visually + convincing X-ray images. + created_date: 2022-11-23 + url: https://arxiv.org/pdf/2211.12737.pdf + model_card: '' + modality: text input and image output + analysis: Evaluated on own framework that tests domain-specific tasks in medical + field. + size: 330M parameters + dependencies: [Stable Diffusion, RoentGen radiology dataset] + training_emissions: unknown + training_time: 60k training steps per day + training_hardware: 64 A100 GPUs + quality_control: '' + access: open + license: '' + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + +- type: model + name: CORGI + organization: Stanford + description: Model trained to generate language corrections for physical control + tasks. + created_date: 2023-06-12 + url: https://arxiv.org/pdf/2306.07012.pdf + model_card: '' + modality: trajectory input with text output + analysis: Evaluated on three physical control tasks, drawing, steering, and human + body movement on various dynamics + size: 124M parameters + dependencies: [GPT-2, BABEL, text-davinci-003] + training_emissions: '' + training_time: + value: unknown + explanation: The authors do not report the training time, but do report that + they train for 200 epochs. + training_hardware: one NVIDIA A40 GPU + quality_control: '' + access: open + license: MIT + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/suno.yaml b/assets/suno.yaml new file mode 100644 index 00000000..6e277fb6 --- /dev/null +++ b/assets/suno.yaml @@ -0,0 +1,24 @@ +--- + +- type: model + name: Bark + organization: Suno + description: Bark is a text-to-audio model that can generate multilingual speech + as well as other noises. + created_date: 2023-04-20 + url: https://github.com/suno-ai/bark + model_card: https://github.com/suno-ai/bark/blob/main/model-card.md + modality: text input with audio output + analysis: '' + size: '' + dependencies: [AudioLM] + training_emissions: unknown + training_time: unknown + training_hardware: '' + quality_control: '' + access: open + license: MIT + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/spaces/suno/bark/discussions diff --git a/assets/together.yaml b/assets/together.yaml index 191dd5fc..2bb3cb97 100644 --- a/assets/together.yaml +++ b/assets/together.yaml @@ -109,3 +109,26 @@ prohibited_uses: '' monitoring: '' feedback: '' + +- type: dataset + name: RedPajama-Data + organization: Together + description: The RedPajama base dataset is a 1.2 trillion token fully-open dataset + created by following the recipe described in the LLaMA paper + created_date: 2022-04-17 + url: https://www.together.xyz/blog/redpajama + datasheet: https://huggingface.co/datasets/togethercomputer/RedPajama-Data-1T + modality: text + size: 1.2 trillion tokens + sample: [] + analysis: '' + dependencies: [GitHub, Wikipedia] + included: '' + excluded: '' + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/transformify.yaml b/assets/transformify.yaml new file mode 100644 index 00000000..118b559f --- /dev/null +++ b/assets/transformify.yaml @@ -0,0 +1,23 @@ +--- + +- type: application + name: Transformify Automate + organization: Transformify + description: Transformify Automate is a platform for automated task integration + using natural language prompts. + created_date: 2023-05-30 + url: https://www.transformify.ai/automate + dependencies: [GPT-4] + adaptation: '' + output_space: text and code + quality_control: '' + access: open + license: '' + terms_of_service: https://www.transformify.ai/legal-stuff + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' + monthly_active_users: '' + user_distribution: '' + failures: '' diff --git a/assets/uae.yaml b/assets/uae.yaml new file mode 100644 index 00000000..127d82d0 --- /dev/null +++ b/assets/uae.yaml @@ -0,0 +1,50 @@ +--- + +- type: model + name: Falcon + organization: UAE Technology Innovation Institute + description: Falcon-40B is a 40B parameters causal decoder-only model built by + TII and trained on 1,000B tokens of RefinedWeb enhanced with curated corpora. + created_date: 2023-06-14 + url: https://huggingface.co/tiiuae/falcon-40b + model_card: https://huggingface.co/tiiuae/falcon-40b + modality: natural language text + analysis: '' + size: 40B parameters + dependencies: [RefinedWeb] + training_emissions: '' + training_time: 2 months + training_hardware: 384 A100 40GB GPUs + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: irresponsible or harmful use or production use without adequate + assessment of risks and mitigation + monitoring: None + feedback: https://huggingface.co/tiiuae/falcon-40b/discussions + +- type: dataset + name: RefinedWeb + organization: UAE Technology Innovation Institute + description: RefinedWeb is a high-quality five trillion tokens web-only English + pretraining dataset. + created_date: 2023-06-01 + url: https://arxiv.org/pdf/2306.01116.pdf + datasheet: https://huggingface.co/datasets/tiiuae/falcon-refinedweb + modality: text + size: 600B tokens + sample: [] + analysis: '' + dependencies: [] + included: '' + excluded: '' + quality_control: '' + access: open + license: + value: unknown + explanation: License can be found at https://huggingface.co/datasets/tiiuae/falcon-refinedweb + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/uwashington.yaml b/assets/uwashington.yaml new file mode 100644 index 00000000..32208f00 --- /dev/null +++ b/assets/uwashington.yaml @@ -0,0 +1,26 @@ +--- + +- type: model + name: Guanaco + organization: University of Washington + description: Guanaco is a model family trained with QLORA, an efficient finetuning + approach that reduces memory usage enough to finetune a 65B parameter model + on a single 48GB GPU while preserving full 16-bit finetuning task performance. + created_date: 2023-05-23 + url: https://arxiv.org/pdf/2305.14314v1.pdf + model_card: '' + modality: natural language text + analysis: Reports results on the Vicuna benchmark and compares performance level + and time expenditure with ChatGPT + size: 33B parameters + dependencies: [QLoRA, OASST1] + training_emissions: '' + training_time: '' + training_hardware: A single 24 GB GPU + quality_control: '' + access: open + license: MIT + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: '' diff --git a/assets/writer.yaml b/assets/writer.yaml new file mode 100644 index 00000000..a56591b9 --- /dev/null +++ b/assets/writer.yaml @@ -0,0 +1,53 @@ +--- + +- type: model + name: Palmyra + organization: Writer + description: Palmyra is a privacy-first LLM for enterprises trained on business + and marketing writing. + created_date: + value: 2023-01-01 + explanation: The model was stated to be published in January, but which day + is not specified on the website. + url: https://gpt3demo.com/apps/palmyra + model_card: https://huggingface.co/Writer/palmyra-base + modality: text + analysis: Evaluated on the SuperGLUE benchmark + size: 128M (Small), 5B (Base), 20B (Large) parameters + dependencies: [Writer dataset] + training_emissions: unknown + training_time: unknown + training_hardware: '' + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: generating text from a prompt + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/Writer/palmyra-base/discussions + +- type: model + name: Camel + organization: Writer + description: Camel is an instruction-following large language model tailored for + advanced NLP and comprehension capabilities. + created_date: + value: 2023-04-01 + explanation: The model was stated to be published in April, but which day is + not specified on the website. + url: https://chatcamel.vercel.app/ + model_card: https://huggingface.co/Writer/camel-5b-hf + modality: text + analysis: '' + size: 5B parameters + dependencies: [Palmyra, Camel dataset] + training_emissions: unknown + training_time: unknown + training_hardware: '' + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/Writer/camel-5b-hf/discussions diff --git a/js/main.js b/js/main.js index c62ac282..5434dd3c 100644 --- a/js/main.js +++ b/js/main.js @@ -629,6 +629,30 @@ function loadAssetsAndRenderPageContent() { const paths = [ 'assets/adept.yaml', + 'assets/writer.yaml', + 'assets/berkeley.yaml', + 'assets/faraday.yaml', + 'assets/autogpt.yaml', + 'assets/suno.yaml', + 'assets/amazon.yaml', + 'assets/casia.yaml', + 'assets/lehigh.yaml', + 'assets/nolano.yaml', + 'assets/chatglm.yaml', + 'assets/uae.yaml', + 'assets/singapore.yaml', + 'assets/uwashington.yaml', + 'assets/h2oai.yaml', + 'assets/inflection.yaml', + 'assets/mosaic.yaml', + 'assets/nextdoor.yaml', + 'assets/columbia.yaml', + 'assets/nanyang.yaml', + 'assets/portkey.yaml', + 'assets/bigcode.yaml', + 'assets/transformify.yaml', + 'assets/paladin.yaml', + 'assets/lmsys.yaml', 'assets/ai2.yaml', 'assets/ai21.yaml', 'assets/aleph_alpha.yaml',