From dfd1e9fff696729a323bf9269f2360f3a7748cdc Mon Sep 17 00:00:00 2001 From: jxue16 <105090474+jxue16@users.noreply.github.com> Date: Mon, 9 Sep 2024 00:05:09 -0700 Subject: [PATCH 01/29] add notable summer assets --- assets/360.yaml | 8 ++- assets/ai21.yaml | 33 +++++++++ assets/aleph_alpha.yaml | 36 ++++++++++ assets/anthropic.yaml | 40 +++++++++++ assets/aspia_space,_institu.yaml | 35 ++++++++++ assets/cartesia.yaml | 14 +++- assets/deepmind.yaml | 64 +++++++++++++---- assets/evolutionaryscale.yaml | 36 ++++++++++ assets/google.yaml | 68 ++++++++++++++++++ assets/laion_e.v..yaml | 39 +++++++++++ assets/lg_ai_research.yaml | 40 +++++++++++ assets/meta.yaml | 49 ++++++++++++- assets/microsoft.yaml | 52 +++++++++++++- assets/mistral.yaml | 113 +++++++++++++++++++++++++++++- assets/openai.yaml | 12 ++-- assets/qwen_team.yaml | 42 +++++++++++ assets/roblox.yaml | 35 ++++++++++ assets/runway_ai,_inc..yaml | 34 +++++++++ assets/samba.yaml | 27 +++++++ assets/stability_ai.yaml | 116 +++++++++++++++++++++++++++++++ assets/stanford.yaml | 39 +++++++++++ assets/team_glm,_zhipu_ai,_.yaml | 35 ++++++++++ assets/unknown.yaml | 38 ++++++++++ assets/writer.yaml | 92 ++++++++++++++++++++++++ assets/xai.yaml | 36 ++++++++++ js/main.js | 10 +++ 26 files changed, 1111 insertions(+), 32 deletions(-) create mode 100644 assets/aspia_space,_institu.yaml create mode 100644 assets/evolutionaryscale.yaml create mode 100644 assets/laion_e.v..yaml create mode 100644 assets/lg_ai_research.yaml create mode 100644 assets/qwen_team.yaml create mode 100644 assets/roblox.yaml create mode 100644 assets/runway_ai,_inc..yaml create mode 100644 assets/stability_ai.yaml create mode 100644 assets/team_glm,_zhipu_ai,_.yaml create mode 100644 assets/unknown.yaml diff --git a/assets/360.yaml b/assets/360.yaml index 0926d33a..6ed6bcc0 100644 --- a/assets/360.yaml +++ b/assets/360.yaml @@ -2,12 +2,14 @@ - type: model name: 360 Zhinao organization: 360 Security - description: 360 Zhinao is a multilingual LLM in Chinese and English with chat capabilities. + description: 360 Zhinao is a multilingual LLM in Chinese and English with chat + capabilities. created_date: 2024-05-23 url: https://arxiv.org/pdf/2405.13386 model_card: none modality: text; text - analysis: Achieved competitive performance on relevant benchmarks against other 7B models in Chinese, English, and coding tasks. + analysis: Achieved competitive performance on relevant benchmarks against other + 7B models in Chinese, English, and coding tasks. size: 7B parameters dependencies: [] training_emissions: unknown @@ -19,4 +21,4 @@ intended_uses: '' prohibited_uses: '' monitoring: '' - feedback: none \ No newline at end of file + feedback: none diff --git a/assets/ai21.yaml b/assets/ai21.yaml index f78e4283..33520f40 100644 --- a/assets/ai21.yaml +++ b/assets/ai21.yaml @@ -318,3 +318,36 @@ prohibited_uses: '' monitoring: '' feedback: https://huggingface.co/ai21labs/Jamba-v0.1/discussions +- type: model + name: Jamba 1.5 Open Model Family (Jamba 1.5 Mini, Jamba 1.5 Large) + organization: AI21 + description: A family of models that demonstrate superior long context handling, + speed, and quality. Built on a novel SSM-Transformer architecture, they surpass + other models in their size class. These models are useful for enterprise applications, + such as lengthy document summarization and analysis. The Jamba 1.5 family also + includes the longest context window, at 256K, among open models. They are fast, + quality-focused, and handle long contexts efficiently. + created_date: 2024-08-22 + url: https://www.ai21.com/blog/announcing-jamba-model-family + model_card: unknown + modality: Unknown + analysis: The models were evaluated based on their ability to handle long contexts, + speed, and quality. They outperformed competitors in their size class, scoring + high on the Arena Hard benchmark. + size: Unknown + dependencies: [SSM-Transformer architecture, Mamba] + training_emissions: Unknown + training_time: Unknown + training_hardware: For speed comparisons, Jamba 1.5 Mini used 2xA100 80GB GPUs, + and Jamba 1.5 Large used 8xA100 80GB GPUs. + quality_control: The models were evaluated on the Arena Hard benchmark. For maintaining + long context performance, they were tested on the RULER benchmark. + access: Open + license: Jamba Open Model License + intended_uses: The models are built for enterprise scale AI applications. They + are purpose-built for efficiency, speed, and ability to solve critical tasks + that businesses care about, such as lengthy document summarization and analysis. + They can also be used for RAG and agentic workflows. + prohibited_uses: Unknown + monitoring: Unknown + feedback: Unknown diff --git a/assets/aleph_alpha.yaml b/assets/aleph_alpha.yaml index 24fc2fe9..ae6059a0 100644 --- a/assets/aleph_alpha.yaml +++ b/assets/aleph_alpha.yaml @@ -99,3 +99,39 @@ prohibited_uses: '' monitoring: '' feedback: '' +- type: model + name: Pharia-1-LLM-7B + organization: Aleph Alpha + description: Pharia-1-LLM-7B is a model that falls within the Pharia-1-LLM model + family. It is designed to deliver short, controlled responses that match the + performance of leading open-source models around 7-8 billion parameters. The + model is culturally and linguistically tuned for German, French, and Spanish + languages. It is trained on carefully curated data in line with relevant EU + and national regulations. The model shows improved token efficiency and is particularly + effective in domain-specific applications, especially in the automotive and + engineering industries. It can also be aligned to user preferences, making it + appropriate for critical applications without the risk of shut-down behaviour. + created_date: 2024-09-08 + url: https://aleph-alpha.com/introducing-pharia-1-llm-transparent-and-compliant/#:~:text=Pharia%2D1%2DLLM%2D7B + model_card: unknown + modality: Text; text + analysis: Extensive evaluations were done with ablation experiments performed + on pre-training benchmarks such as lambada, triviaqa, hellaswag, winogrande, + webqs, arc, and boolq. Direct comparisons were also performed with applications + like GPT and Llama 2. + size: 7B parameters + dependencies: [GPT, Llama 2] + training_emissions: Unknown + training_time: Unknown + training_hardware: Unknown + quality_control: The model comes with additional safety guardrails via alignment + methods to ensure safe usage. Training data is carefully curated to ensure compliance + with EU and national regulations. + access: Open + license: Aleph Open + intended_uses: The model is intended for use in domain-specific applications, + particularly in the automotive and engineering industries. It can also be tailored + to user preferences. + prohibited_uses: Unknown + monitoring: Unknown + feedback: Feedback can be sent to support@aleph-alpha.com. diff --git a/assets/anthropic.yaml b/assets/anthropic.yaml index ac94a976..740d24f9 100644 --- a/assets/anthropic.yaml +++ b/assets/anthropic.yaml @@ -598,3 +598,43 @@ and decisions related to financing, employment, and housing. monitoring: '' feedback: none +- type: model + name: Claude 3.5 Sonnet + organization: Anthropic PBC + description: Claude 3.5 Sonnet is an AI model with advanced understanding and + generation abilities in text, vision, and code. It sets new industry benchmarks + for graduate-level reasoning (GPQA), undergrad-level knowledge (MMLU), coding + proficiency (HumanEval), and visual reasoning. The model operates at twice the + speed of its predecessor, Claude 3 Opus, and is designed to tackle tasks like + context-sensitive customer support, orchestrating multi-step workflows, interpreting + charts and graphs, and transcribing text from images. + created_date: 2024-06-21 + url: https://www.anthropic.com/news/claude-3-5-sonnet + model_card: unknown + modality: text; vision; code + analysis: The model has been evaluated on a range of tests including graduate-level + reasoning (GPQA), undergraduate-level knowledge (MMLU), coding proficiency (HumanEval), + and standard vision benchmarks. In an internal agentic coding evaluation, Claude + 3.5 Sonnet solved 64% of problems, outperforming the previous version, Claude + 3 Opus, which solved 38%. + size: Unknown + dependencies: [] + training_emissions: Unknown + training_time: Unknown + training_hardware: Unknown + quality_control: The model underwent a red-teaming assessment, and has been tested + and refined by external experts. It was also provided to the UK's AI Safety + Institute (UK AISI) for a pre-deployment safety evaluation. + access: Open + license: {{Unknown: null}: null} + intended_uses: The model is intended for complex tasks such as context-sensitive + customer support, orchestrating multi-step workflows, interpreting charts and + graphs, transcribing text from images, as well as writing, editing, and executing + code. + prohibited_uses: Misuse of the model is discouraged though specific use cases + are not mentioned. + monitoring: Measures have been taken to evaluate the model against various types + of misuse, and policy feedback from external experts has been integrated to + ensure robustness of evaluations. + feedback: Feedback on Claude 3.5 Sonnet can be submitted directly in-product to + inform the development roadmap and improve user experience. diff --git a/assets/aspia_space,_institu.yaml b/assets/aspia_space,_institu.yaml new file mode 100644 index 00000000..c8475605 --- /dev/null +++ b/assets/aspia_space,_institu.yaml @@ -0,0 +1,35 @@ +--- +- type: model + name: AstroPT + organization: Aspia Space, Instituto de Astrofísica de Canarias (IAC), UniverseTBD, + Astrophysics Research Institute, Liverpool John Moores University, Departamento + Astrofísica, Universidad de la Laguna, Observatoire de Paris, LERMA, PSL University, + and Universit´e Paris-Cit´e. + description: AstroPT is an autoregressive pretrained transformer developed with + astronomical use-cases in mind. The models have been pretrained on 8.6 million + 512x512 pixel grz-band galaxy postage stamp observations from the DESI Legacy + Survey DR8. They have created a range of models with varying complexity, ranging + from 1 million to 2.1 billion parameters. + created_date: 2024-09-08 + url: https://arxiv.org/pdf/2405.14930v1 + model_card: unknown + modality: Image; Image (The model takes an image as input and processes it to + provide insights) + analysis: The models’ performance on downstream tasks was evaluated by linear + probing. The models follow a similar saturating log-log scaling law to textual + models, their performance improves with the increase in model size up to the + saturation point of parameters. + size: Ranges from 1 million to 2.1 billion parameters. + dependencies: [DESI Legacy Survey DR8] + training_emissions: Unknown + training_time: Unknown + training_hardware: Unknown + quality_control: The models’ performances were evaluated on downstream tasks as + measured by linear probing. + access: open + license: MIT + intended_uses: The models are intended for astronomical use-cases, particularly + in handling and interpreting large observation data from astronomical sources. + prohibited_uses: Unknown + monitoring: Unknown + feedback: Any problem with the model can be reported to Michael J. Smith at mike@mjjsmith.com. diff --git a/assets/cartesia.yaml b/assets/cartesia.yaml index a3490c7d..573e1b64 100644 --- a/assets/cartesia.yaml +++ b/assets/cartesia.yaml @@ -2,12 +2,18 @@ - type: model name: Sonic organization: Cartesia - description: Sonic is a low-latency voice model that generates lifelike speech. Developed by Cartesia, it was designed to be an efficient real-time AI capable of processing any-sized contexts and running on any device. + description: Sonic is a low-latency voice model that generates lifelike speech. + Developed by Cartesia, it was designed to be an efficient real-time AI capable + of processing any-sized contexts and running on any device. created_date: 2024-05-29 url: https://cartesia.ai/blog/sonic model_card: none modality: text; audio - analysis: Extensive testing on Multilingual Librispeech dataset resulted in 20% lower validation perplexity. In downstream evaluations, this leads to a 2x lower word error rate and a 1 point higher quality score. Sonic also displays impressive performance metrics at inference, achieving lower latency (1.5x lower time-to-first-audio), faster inference speed (2x lower real-time factor), and higher throughput (4x). + analysis: Extensive testing on Multilingual Librispeech dataset resulted in 20% + lower validation perplexity. In downstream evaluations, this leads to a 2x lower + word error rate and a 1 point higher quality score. Sonic also displays impressive + performance metrics at inference, achieving lower latency (1.5x lower time-to-first-audio), + faster inference speed (2x lower real-time factor), and higher throughput (4x). size: 2024-05-29 dependencies: [Multilingual Librispeech dataset] training_emissions: unknown @@ -16,7 +22,9 @@ quality_control: '' access: limited license: unknown - intended_uses: Sonic has potential applications across customer support, entertainment, and content creation and is a part of Cartesias broader mission to bring real-time multimodal intelligence to every device. + intended_uses: Sonic has potential applications across customer support, entertainment, + and content creation and is a part of Cartesias broader mission to bring real-time + multimodal intelligence to every device. prohibited_uses: unknown monitoring: unknown feedback: Contact through the provided form or via email at join@cartesia.ai. diff --git a/assets/deepmind.yaml b/assets/deepmind.yaml index db9a6c38..319323ee 100644 --- a/assets/deepmind.yaml +++ b/assets/deepmind.yaml @@ -687,28 +687,50 @@ - type: model name: Imagen 3 organization: Google DeepMind - description: Imagen 3 is a high-quality text-to-image model, capable of generating images with better detail, richer lighting, and fewer distracting artifacts compared to previous models. Improved understanding of prompts allows for a wide range of visual styles and captures small details from longer prompts. It also understands prompts written in natural, everyday language, making it easier to use. Imagen 3 is available in multiple versions, optimized for different types of tasks, from generating quick sketches to high-resolution images. + description: Imagen 3 is a high-quality text-to-image model, capable of generating + images with better detail, richer lighting, and fewer distracting artifacts + compared to previous models. Improved understanding of prompts allows for a + wide range of visual styles and captures small details from longer prompts. + It also understands prompts written in natural, everyday language, making it + easier to use. Imagen 3 is available in multiple versions, optimized for different + types of tasks, from generating quick sketches to high-resolution images. created_date: 2024-05-14 url: https://deepmind.google/technologies/imagen-3/ model_card: none modality: text; image - analysis: The model was tested and evaluated on various prompts to assess its understanding of natural language, its ability to generate high-quality images in various formats and styles and generate fine details and complex textures. Red teaming and evaluations were conducted on topics including fairness, bias, and content safety. + analysis: The model was tested and evaluated on various prompts to assess its + understanding of natural language, its ability to generate high-quality images + in various formats and styles and generate fine details and complex textures. + Red teaming and evaluations were conducted on topics including fairness, bias, + and content safety. size: unknown dependencies: [] training_emissions: unknown training_time: unknown training_hardware: unknown - quality_control: Extensive filtering and data labeling were used to minimize harmful content in datasets and reduce the likelihood of harmful outputs. Privacy, safety, and security technologies were leveraged in deploying the model, including watermarking tool SynthID. + quality_control: Extensive filtering and data labeling were used to minimize harmful + content in datasets and reduce the likelihood of harmful outputs. Privacy, safety, + and security technologies were leveraged in deploying the model, including watermarking + tool SynthID. access: limited license: unknown - intended_uses: Generate high-quality images for various purposes, from photorealistic landscapes to textured oil paintings or whimsical claymation scenes. It is useful in situations where detailed visual representation is required based on the textual description. + intended_uses: Generate high-quality images for various purposes, from photorealistic + landscapes to textured oil paintings or whimsical claymation scenes. It is useful + in situations where detailed visual representation is required based on the + textual description. prohibited_uses: unknown - monitoring: Through digital watermarking tool SynthID embedded in pixels for detection and identification. + monitoring: Through digital watermarking tool SynthID embedded in pixels for detection + and identification. feedback: unknown - type: model name: Veo organization: Google DeepMind - description: Veo is Google DeepMind's most capable video generation model to date. It generates high-quality, 1080p resolution videos that can go beyond a minute, in a wide range of cinematic and visual styles. It accurately captures the nuance and tone of a prompt, and provides an unprecedented level of creative control. The model is also capable of maintaining visual consistency in video frames, and supports masked editing. + description: Veo is Google DeepMind's most capable video generation model to date. + It generates high-quality, 1080p resolution videos that can go beyond a minute, + in a wide range of cinematic and visual styles. It accurately captures the nuance + and tone of a prompt, and provides an unprecedented level of creative control. + The model is also capable of maintaining visual consistency in video frames, + and supports masked editing. created_date: 2024-05-14 url: https://deepmind.google/technologies/veo/ model_card: none @@ -719,31 +741,47 @@ training_emissions: unknown training_time: unknown training_hardware: unknown - quality_control: Videos created by Veo are watermarked using SynthID, DeepMinds tool for watermarking and identifying AI-generated content, and passed through safety filters and memorization checking processes to mitigate privacy, copyright and bias risks. + quality_control: Videos created by Veo are watermarked using SynthID, DeepMinds + tool for watermarking and identifying AI-generated content, and passed through + safety filters and memorization checking processes to mitigate privacy, copyright + and bias risks. access: closed license: unknown - intended_uses: Veo is intended to help create tools that make video production accessible to everyone. It can be used by filmmakers, creators, or educators for storytelling, education and more. Some of its features will be also brought to products like YouTube Shorts. + intended_uses: Veo is intended to help create tools that make video production + accessible to everyone. It can be used by filmmakers, creators, or educators + for storytelling, education and more. Some of its features will be also brought + to products like YouTube Shorts. prohibited_uses: unknown monitoring: unknown - feedback: Feedback from leading creators and filmmakers is incorporated to improve Veo's generative video technologies. + feedback: Feedback from leading creators and filmmakers is incorporated to improve + Veo's generative video technologies. - type: model name: Gemini 1.5 Flash organization: Google DeepMind - description: Gemini Flash is a lightweight model, optimized for speed and efficiency. It features multimodal reasoning and a breakthrough long context window of up to one million tokens. It's designed to serve at scale and is efficient on cost, providing quality results at a fraction of the cost of larger models. + description: Gemini Flash is a lightweight model, optimized for speed and efficiency. + It features multimodal reasoning and a breakthrough long context window of up + to one million tokens. It's designed to serve at scale and is efficient on cost, + providing quality results at a fraction of the cost of larger models. created_date: 2024-05-30 url: https://deepmind.google/technologies/gemini/flash/ model_card: none modality: audio, image, text, video; text - analysis: The model was evaluated on various benchmarks like General MMLU, Code Natural2Code, MATH, GPQA, Big-Bench, WMT23, MMMU, and MathVista providing performance across various domains like multilingual translation, image processing, and code generation. + analysis: The model was evaluated on various benchmarks like General MMLU, Code + Natural2Code, MATH, GPQA, Big-Bench, WMT23, MMMU, and MathVista providing performance + across various domains like multilingual translation, image processing, and + code generation. size: unknown dependencies: [] training_emissions: unknown training_time: unknown training_hardware: unknown - quality_control: The research team is continually exploring new ideas at the frontier of AI and building innovative products for consistent progress. + quality_control: The research team is continually exploring new ideas at the frontier + of AI and building innovative products for consistent progress. access: limited license: Googles Terms and Conditions - intended_uses: The model is intended for developer and enterprise use cases. It can process hours of video and audio, and hundreds of thousands of words or lines of code, making it beneficial for a wide range of tasks. + intended_uses: The model is intended for developer and enterprise use cases. It + can process hours of video and audio, and hundreds of thousands of words or + lines of code, making it beneficial for a wide range of tasks. prohibited_uses: '' monitoring: unknown feedback: none diff --git a/assets/evolutionaryscale.yaml b/assets/evolutionaryscale.yaml new file mode 100644 index 00000000..0ae4658c --- /dev/null +++ b/assets/evolutionaryscale.yaml @@ -0,0 +1,36 @@ +--- +- type: model + name: ESM3 + organization: EvolutionaryScale + description: ESM3 is the first generative model for biology that simultaneously + reasons over the sequence, structure, and function of proteins. It is trained + across the natural diversity of Earth, reasoning over billions of proteins from + diverse environments. It advances the ability to program and create with the + code of life, simulating evolution, and making biology programmable. ESM3 is + generative, and scientists can guide the model to create proteins for various + applications. + created_date: 2024-06-25 + url: https://www.evolutionaryscale.ai/blog/esm3-release + model_card: unknown + modality: Text; Text (the model takes textual instructions as inputs and generates + textual descriptions of proteins as outputs) + analysis: The model was tested in the generation of a new green fluorescent protein. + Its effectiveness was compared to natural evolutionary processes, and it was + deemed to simulate over 500 million years of evolution. + size: 98B parameters (Dense) + dependencies: [ESM2(base model), largest dataset of proteins] + training_emissions: Unknown + training_time: Unknown + training_hardware: One of the highest throughput GPU clusters in the world. + quality_control: The creators have put in place a responsible development framework + to ensure transparency and accountability from the start. ESM3 was tested in + the generation of a new protein, ensuring its quality and effectiveness. + access: Open + license: Unknown + intended_uses: To engineer biology from first principles. It functions as a tool + for scientists to create proteins for various applications, including medicine, + biology research, and clean energy. + prohibited_uses: Unknown + monitoring: Measures associated with their responsible development framework, + though specific measures are not specified. + feedback: Unknown diff --git a/assets/google.yaml b/assets/google.yaml index 965e3289..574a551c 100644 --- a/assets/google.yaml +++ b/assets/google.yaml @@ -1811,3 +1811,71 @@ domain. monitoring: '' feedback: none +- type: model + name: Imagen 3 + organization: Google DeepMind + description: Imagen 3 is a high-quality text-to-image model capable of generating + images with improved detail, richer lighting, and fewer distracting artifacts. + It features improved prompt understanding and can be used to generate a wide + array of visual styles from quick sketches to high-resolution images. The model + is available in multiple versions, each optimized for particular types of tasks. + Imagen 3 has been trained to capture nuances like specific camera angles or + compositions in long, complex prompts, making it a versatile tool for image + generation from textual inputs. + created_date: 2024-09-05 + url: https://deepmind.google/technologies/imagen-3/ + model_card: unknown + modality: Text; Image + analysis: Unknown + size: Unknown + dependencies: [] + training_emissions: Unknown + training_time: Unknown + training_hardware: Unknown + quality_control: Unknown + access: open + license: Unknown + intended_uses: Imagen 3 is intended to be used for generation of high-resolution + images from textual prompts, from photorealistic landscapes to richly textured + oil paintings or whimsical claymation scenes. It can also be used for stylized + birthday cards, presentations, and more, due to its improved text rendering + capabilities. + prohibited_uses: Unknown + monitoring: Unknown + feedback: Unknown +- type: model + name: Gemma 2 + organization: Google DeepMind + description: Gemma 2 is an open model that offers best-in-class performance and + runs at incredible speed across different hardware. It easily integrates with + other AI tools. This model is built on a redesigned architecture engineered + for exceptional performance and inference efficiency. It is available in both + 9 billion (9B) and 27 billion (27B) parameter sizes. Gemma 2 is optimized to + run at incredible speed across a range of hardware, from powerful gaming laptops + and high-end desktops, to cloud-based setups. + created_date: 2024-06-27 + url: https://blog.google/technology/developers/google-gemma-2/ + model_card: unknown + modality: text; text + analysis: The 27B Gemma 2 model outperforms other open models in its size category + offering cutting-edge performance. Specific details can be found in the provided + technical report. + size: 27B parameters (dense) + dependencies: [Gemma, CodeGemma, RecurrentGemma, PaliGemma] + training_emissions: Unknown + training_time: Unknown + training_hardware: Google Cloud TPU host, NVIDIA A100 80GB Tensor Core GPU, NVIDIA + H100 Tensor Core GPU + quality_control: Google DeepMind implemented a refined architecture for Gemma + 2. The model has improvements in safety and efficiency over the first generation. + The deployment of Gemma 2 on Vertex AI, scheduled for the next month, will offer + effortless management of the model. + access: Open + license: Gemma (commercially-friendly license given by Google DeepMind) + intended_uses: Gemma 2 is designed for developers and researchers for various + AI tasks. It can be used via the integrations it offers with other AI tools/platforms + and can additionally be deployed for more accessible and budget-friendly AI + deployments. + prohibited_uses: Not specified + monitoring: Unknown + feedback: Unknown diff --git a/assets/laion_e.v..yaml b/assets/laion_e.v..yaml new file mode 100644 index 00000000..6cd18f23 --- /dev/null +++ b/assets/laion_e.v..yaml @@ -0,0 +1,39 @@ +--- +- type: model + name: Re-LAION-5B + organization: LAION e.V. + description: Re-LAION-5B is an updated version of LAION-5B, the first web-scale, + text-link to images pair dataset to be thoroughly cleaned of known links to + suspected CSAM. It is an open dataset for fully reproducible research on language-vision + learning. This model was developed in response to issues identified by the Stanford + Internet Observatory in December 2023. The updates were made in collaboration + with multiple organizations like the Internet Watch Foundation (IWF), the Canadian + Center for Child Protection (C3P), and Stanford Internet Observatory. + created_date: 2024-08-30 + url: https://laion.ai/blog/relaion-5b/ + model_card: unknown + modality: Text; Image + analysis: Re-LAION-5B aims to fix the issues as reported by Stanford Internet + Observatory for the original LAION-5B. It is available for download in two versions, + research and research-safe. In total, 2236 links that potentially led to inappropriate + content were removed. + size: 5.5B (5,526,641,167) text-link to images pairs + dependencies: [LAION-5B] + training_emissions: Unknown + training_time: Unknown + training_hardware: Unknown + quality_control: The model utilized lists of link and image hashes provided by + partner organizations. These were used to remove inappropriate links from the + original LAION-5B dataset to create Re-LAION-5B. + access: Open + license: Apache 2.0 + intended_uses: Re-LAION-5B is designed for research on language-vision learning. + It can also be used by third parties to clean existing derivatives of LAION-5B + by generating diffs and removing all matched content from their versions. + prohibited_uses: The dataset should not be utilized for purposes that breach legal + parameters or ethical standards, such as dealing with illegal content. + monitoring: This version is a response to continuous scrutiny & safety revisions. + It's also meant to allow inspection and validation by a broad community. + feedback: Problems with the dataset should be reported to the LAION organization. + They have open lines for communication with their partners and the broader research + community. diff --git a/assets/lg_ai_research.yaml b/assets/lg_ai_research.yaml new file mode 100644 index 00000000..e669c028 --- /dev/null +++ b/assets/lg_ai_research.yaml @@ -0,0 +1,40 @@ +--- +- type: model + name: EXAONE 3.0 Instruction Tuned Language Model + organization: LG AI Research + description: EXAONE 3.0 is an instruction-tuned large language model developed + by LG AI Research. It demonstrates notably robust performance across a range + of tasks and benchmarks. It has been fine-tuned to be capable of complex reasoning + and has a particular proficiency in Korean. The released 7.8B parameter model + is designed to promote open research and innovation. + created_date: 2024-09-08 + url: https://arxiv.org/pdf/2408.03541 + model_card: unknown + modality: Text; text + analysis: The model was evaluated extensively across a wide range of public and + in-house benchmarks. The comparative analysis showed that the performance of + EXAONE 3.0 was competitive in English and excellent in Korean compared to other + large language models of a similar size. + size: 7.8B parameters (dense) + dependencies: + - GQA + - SwiGLU + - Rotary Position Embeddings + - MeCab + - BBPE + training_emissions: Unknown + training_time: Unknown + training_hardware: Unknown + quality_control: Extensive pre-training on a diverse dataset, and advanced post-training + techniques were employed to enhance instruction-following capabilities. The + model was also trained to fully comply with data handling standards. + access: Open + license: Unknown + intended_uses: The model was intended for non-commercial and research purposes. + The capabilities of the model allow for use cases that involve advanced AI and + language processing tasks, particularly in fields requiring proficiency in English + and Korean. + prohibited_uses: Commercial use is not intended for this model. Its intended use + is for non-commercial research and innovation. + monitoring: Unknown + feedback: Unknown diff --git a/assets/meta.yaml b/assets/meta.yaml index 9b831737..f5f115c8 100644 --- a/assets/meta.yaml +++ b/assets/meta.yaml @@ -830,17 +830,24 @@ - type: model name: Chameleon organization: Meta FAIR - description: Chameleon is a family of early-fusion token-based mixed-modal models capable of understanding and generating images and text in any arbitrary sequence. + description: Chameleon is a family of early-fusion token-based mixed-modal models + capable of understanding and generating images and text in any arbitrary sequence. created_date: 2024-05-17 url: https://arxiv.org/pdf/2405.09818 model_card: none modality: image, text; image, text - analysis: Evaluated on a comprehensive range of tasks, including visual question answering, image captioning, text generation, image generation, and long-form mixed modal generation. Chameleon demonstrates broad and general capabilities, including state-of-the-art performance in image captioning tasks, outperforms Llama-2 in text-only tasks while being competitive with models such as Mixtral 8x7B and Gemini-Pro. + analysis: Evaluated on a comprehensive range of tasks, including visual question + answering, image captioning, text generation, image generation, and long-form + mixed modal generation. Chameleon demonstrates broad and general capabilities, + including state-of-the-art performance in image captioning tasks, outperforms + Llama-2 in text-only tasks while being competitive with models such as Mixtral + 8x7B and Gemini-Pro. size: 34B parameters dependencies: [] training_emissions: unknown training_time: unknown - training_hardware: Meta's Research Super Cluster (powered by NVIDIA A100 80GB GPUs) + training_hardware: Meta's Research Super Cluster (powered by NVIDIA A100 80GB + GPUs) quality_control: '' access: open license: unknown @@ -848,3 +855,39 @@ prohibited_uses: '' monitoring: '' feedback: none +- type: model + name: Llama 3.1 405B + organization: Meta AI + description: Llama 3.1 405B is the first openly available model that rivals the + top AI models when it comes to state-of-the-art capabilities in general knowledge, + steerability, math, tool use, and multilingual translation. With the release + of the 405B model, the Llama versions support advanced use cases, such as long-form + text summarization, multilingual conversational agents, and coding assistants. + It is the largest and most capable openly available foundation model. + created_date: 2024-07-23 + url: https://ai.meta.com/blog/meta-llama-3-1/?utm_source=twitter&utm_medium=organic_social&utm_content=video&utm_campaign=llama31 + model_card: unknown + modality: text; text + analysis: The model was evaluated on over 150 benchmark datasets that span a wide + range of languages. An experimental evaluation suggests that the model is competitive + with leading foundation models across a range of tasks. Also, smaller models + of Llama 3.1 405B are competitive with closed and open models that have a similar + number of parameters. + size: 405B parameters (dense) + dependencies: [Unknown] + training_emissions: Unknown + training_time: Unknown + training_hardware: Over 16 thousand H100 GPUs + quality_control: The development process was focused on keeping the model scalable + and straightforward. It adopted an iterative post-training procedure, where + each round uses supervised fine-tuning and direct preference optimization. The + model also underwent quality assurance and filtering for pre-and post-training + data. + access: open + license: Unknown + intended_uses: For advanced use cases, such as long-form text summarization, multilingual + conversational agents, and coding assistants. May also be useful in the development + of custom offerings and systems by developers. + prohibited_uses: Unknown + monitoring: Unknown + feedback: Unknown diff --git a/assets/microsoft.yaml b/assets/microsoft.yaml index eb538758..cf431631 100644 --- a/assets/microsoft.yaml +++ b/assets/microsoft.yaml @@ -947,7 +947,8 @@ - type: model name: Aurora organization: Microsoft - description: Aurora is a large-scale foundation model of the atmosphere trained on over a million hours of diverse weather and climate data. + description: Aurora is a large-scale foundation model of the atmosphere trained + on over a million hours of diverse weather and climate data. created_date: 2024-05-28 url: https://arxiv.org/pdf/2405.13063 model_card: none @@ -968,12 +969,15 @@ - type: model name: Prov-GigaPath organization: Microsoft - description: Prov-GigaPath is a whole-slide pathology foundation model pretrained on 1.3 billion 256 × 256 pathology image tiles. + description: Prov-GigaPath is a whole-slide pathology foundation model pretrained + on 1.3 billion 256 × 256 pathology image tiles. created_date: 2024-05-22 url: https://www.nature.com/articles/s41586-024-07441-w model_card: none modality: image; embeddings - analysis: Evaluated on a digital pathology benchmark comprising 9 cancer subtyping tasks and 17 pathomics tasks, with Prov-GigaPath demonstrating SoTA performance in 25 out of 26 tasks. + analysis: Evaluated on a digital pathology benchmark comprising 9 cancer subtyping + tasks and 17 pathomics tasks, with Prov-GigaPath demonstrating SoTA performance + in 25 out of 26 tasks. size: unknown dependencies: [GigaPath] training_emissions: unknown @@ -986,3 +990,45 @@ prohibited_uses: '' monitoring: '' feedback: none +- type: model + name: Phi-3.5-MoE + organization: Microsoft + description: Phi-3.5-MoE is a lightweight, state-of-the-art open model built upon + datasets used for Phi-3 - synthetic data and filtered publicly available documents, + with a focus on very high-quality, reasoning dense data. It supports multilingual + and has a 128K context length in tokens. The model underwent a rigorous enhancement + process, incorporating supervised fine-tuning, proximal policy optimization, + and direct preference optimization to ensure instruction adherence and robust + safety measures. + created_date: 2024-09-08 + url: https://huggingface.co/microsoft/Phi-3.5-MoE-instruct + model_card: https://huggingface.co/microsoft/Phi-3.5-MoE-instruct + modality: Unknown + analysis: The model was evaluated across a variety of public benchmarks, comparing + with a set of models including Mistral-Nemo-12B-instruct-2407, Llama-3.1-8B-instruct, + Gemma-2-9b-It, Gemini-1.5-Flash, and GPT-4o-mini-2024-07-18. It achieved a similar + level of language understanding and math as much larger models. It also displayed + superior performance in reasoning capability, even with only 6.6B active parameters. + It was also evaluated for multilingual tasks. + size: 6.6B active parameters + dependencies: [Phi-3] + training_emissions: Unknown + training_time: Unknown + training_hardware: Unknown + quality_control: The model was enhanced through supervised fine-tuning, proximal + policy optimization, and direct preference optimization processes for safety + measures. + access: Open + license: Unknown + intended_uses: The model is intended for commercial and research use in multiple + languages. It is designed to accelerate research on language and multimodal + models, and for use as a building block for generative AI powered features. + It is suitable for general purpose AI systems and applications which require + memory/computed constrained environments, latency bound scenarios, and strong + reasoning. + prohibited_uses: The model should not be used for downstream purposes it was not + specifically designed or evaluated for. Developers should evaluate and mitigate + for accuracy, safety, and fariness before using within a specific downstream + use case, particularly for high risk scenarios. + monitoring: Unknown + feedback: Unknown diff --git a/assets/mistral.yaml b/assets/mistral.yaml index f8fd24ad..0c9f7d7a 100644 --- a/assets/mistral.yaml +++ b/assets/mistral.yaml @@ -67,12 +67,18 @@ - type: model name: Codestral organization: Mistral AI - description: Codestral is an open-weight generative AI model explicitly designed for code generation tasks. It helps developers write and interact with code through a shared instruction and completion API endpoint. Mastering code and English, it can be used to design advanced AI applications for software developers. It is fluent in 80+ programming languages. + description: Codestral is an open-weight generative AI model explicitly designed + for code generation tasks. It helps developers write and interact with code + through a shared instruction and completion API endpoint. Mastering code and + English, it can be used to design advanced AI applications for software developers. + It is fluent in 80+ programming languages. created_date: 2024-05-29 url: https://mistral.ai/news/codestral/ model_card: none modality: text; code - analysis: Performance of Codestral is evaluated in Python, SQL, and additional languages, C++, bash, Java, PHP, Typescript, and C#. Fill-in-the-middle performance is assessed using HumanEval pass@1 in Python, JavaScript, and Java. + analysis: Performance of Codestral is evaluated in Python, SQL, and additional + languages, C++, bash, Java, PHP, Typescript, and C#. Fill-in-the-middle performance + is assessed using HumanEval pass@1 in Python, JavaScript, and Java. size: 22B parameters dependencies: [] training_emissions: unknown @@ -81,7 +87,108 @@ quality_control: '' access: open license: Mistral AI Non-Production License - intended_uses: Helps developers write and interact with code, design advanced AI applications for software developers, integrated into LlamaIndex and LangChain for building applications, integrated in VSCode and JetBrains environments for code generation and interactive conversation. + intended_uses: Helps developers write and interact with code, design advanced + AI applications for software developers, integrated into LlamaIndex and LangChain + for building applications, integrated in VSCode and JetBrains environments for + code generation and interactive conversation. prohibited_uses: unknown monitoring: unknown feedback: none +- type: model + name: Mistral NeMo + organization: Mistral AI, NVIDIA + description: The Mistral NeMo model is a state-of-the-art 12B model built in collaboration + with NVIDIA, offering a large context window of up to 128k tokens. The model + is suitable for multilingual applications and exhibits excellent reasoning, + world knowledge, and coding accuracy. It's easy to use and a drop-in replacement + in a system that uses Mistral 7B. The model uses a new tokenizer, Tekken, based + on Tiktoken, which is trained on over 100 languages. It compresses natural language + text and source code more efficiently than previously used tokenizers. + created_date: 2024-07-18 + url: https://mistral.ai/news/mistral-nemo/ + model_card: unknown + modality: Text; Text + analysis: The model underwent an advanced fine-tuning and alignment phase. Its + performance was evaluated using GPT4o as a judge on official references. It + was compared to recent open-source pre-trained models Gemma 2 9B, Llama 3 8B + regarding multilingual performance and coding accuracy. Tekken tokenizer's compression + ability was compared with previous tokenizers like SentencePiece and the Llama + 3 tokenizer. + size: 12B parameters + dependencies: [GPT4o, Mistral 7B, Tekken] + training_emissions: Unknown + training_time: Unknown + training_hardware: NVIDIA hardware, specifics unknown + quality_control: The model underwent an advanced fine-tuning and alignment phase. + Various measures such as accuracy comparisons with other models and instruction-tuning + were implemented to ensure its quality. + access: Open + license: Apache 2.0 + intended_uses: The model can be used for multilingual applications, understanding + and generating natural language as well as source code, handling multi-turn + conversations, and providing more precise instruction following. + prohibited_uses: Unknown + monitoring: Unknown + feedback: Problems should be reported to the Mistral AI team, though the specific + method of reporting is unknown. +- type: model + name: Codestral Mamba + organization: Mistral AI + description: Codestral Mamba is a Mamba2 language model that is specialized in + code generation. It has a theoretical ability to model sequences of infinite + length and offers linear time inference. This makes it effective for extensive + user engagement and is especially practical for code productivity use cases. + Codestral Mamba can be deployed using the mistral-inference SDK or through TensorRT-LLM, + and users can download the raw weights from HuggingFace. + created_date: 2024-07-16 + url: https://mistral.ai/news/codestral-mamba/ + model_card: unknown + modality: Text; Text + analysis: The model has been tested for in-context retrieval capabilities up to + 256k tokens. It has been created with advanced code and reasoning capabilities, + which enables it to perform on par with SOTA transformer-based models. + size: 7.3B parameters + dependencies: [Mamba's GitHub repository, HuggingFace] + training_emissions: Unknown + training_time: Unknown + training_hardware: Unknown + quality_control: Unknown + access: Open + license: Apache 2.0 + intended_uses: The model is intended for code generation and can be utilized as + a local code assistant. + prohibited_uses: Unknown + monitoring: Unknown + feedback: Problems with the model can be reported through the organization's website. +- type: model + name: MathΣtral + organization: Mistral AI + description: MathΣtral is a 7B model designed for math reasoning and scientific + discovery. It achieves state-of-the-art reasoning capacities in its size category + across various industry-standard benchmarks. This model stands on the shoulders + of Mistral 7B and specializes in STEM subjects. It is designed to assist efforts + in advanced mathematical problems requiring complex, multi-step logical reasoning. + It particularly achieves 56.6% on MATH and 63.47% on MMLU. + created_date: 2024-07-16 + url: https://mistral.ai/news/mathstral/ + model_card: unknown + modality: Text-to-text (presumed based on description) + analysis: The model's performance has been evaluated on the MATH and MMLU industry-standard + benchmarks. It scored notably higher on both these tests than the base model + Mistral 7B. + size: 7B parameters + dependencies: [Mistral 7B] + training_emissions: Unknown + training_time: Unknown + training_hardware: Unknown + quality_control: This model has been fine-tuned from a base model and its inference + and performance have been tested on several industry benchmarks. + access: open + license: Apache 2.0 + intended_uses: The model is intended for use in solving advanced mathematical + problems requiring complex, multi-step logical reasoning or any math-related + STEM subjects challenges. + prohibited_uses: Unknown + monitoring: Unknown + feedback: Feedback is likely expected to be given through the HuggingFace platform + where the model's weights are hosted or directly to the Mistral AI team. diff --git a/assets/openai.yaml b/assets/openai.yaml index 192a1518..02b0d6cc 100644 --- a/assets/openai.yaml +++ b/assets/openai.yaml @@ -1424,21 +1424,25 @@ - type: model name: GPT-4o organization: OpenAI - description: GPT-4o is OpenAI's new flagship model, as of release, that can reason across audio, vision, and text in real time. + description: GPT-4o is OpenAI's new flagship model, as of release, that can reason + across audio, vision, and text in real time. created_date: 2024-05-13 url: https://openai.com/index/hello-gpt-4o/ model_card: none modality: audio, image, text, video; audio, image, text - analysis: When evaluated on standard performance benchmarks, achieves similar levels of performance to GPT-4 Turbo. + analysis: When evaluated on standard performance benchmarks, achieves similar + levels of performance to GPT-4 Turbo. size: unknown dependencies: [] training_emissions: unknown training_time: unknown training_hardware: unknown - quality_control: Training data filtering and post-training refinement act as additional guardrails for preventing harmful outputs. + quality_control: Training data filtering and post-training refinement act as additional + guardrails for preventing harmful outputs. access: limited license: unknown intended_uses: '' prohibited_uses: '' - monitoring: Internal monitoring of risk for non-text outputs before a public release (currently only image, text inputs and text outputs are available). + monitoring: Internal monitoring of risk for non-text outputs before a public release + (currently only image, text inputs and text outputs are available). feedback: none diff --git a/assets/qwen_team.yaml b/assets/qwen_team.yaml new file mode 100644 index 00000000..067fa4b5 --- /dev/null +++ b/assets/qwen_team.yaml @@ -0,0 +1,42 @@ +--- +- type: model + name: Qwen2-Math + organization: Qwen Team + description: Qwen2-Math is a series of specialized math language models built + upon the Qwen2 large language models, with a focus on enhancing the reasoning + and mathematical capabilities. Their intended use is for solving complex mathematical + problems. They significantly outperform both open-source and closed-source models + in terms of mathematical capabilities. + created_date: 2024-08-08 + url: https://qwenlm.github.io/blog/qwen2-math/ + model_card: unknown + modality: Text; Text + analysis: Models have been evaluated on a series of math benchmarks, demonstrating + outperformance of the state-of-the-art models in both the English and Chinese + language. + size: The size of the largest model in the Qwen2-Math series is 72B parameters. + dependencies: + - GSM8K + - Math + - MMLU-STEM + - CMATH + - GaoKao Math Cloze + - GaoKao Math QA + - OlympiadBench + - CollegeMath + - GaoKao + - AIME2024 + - AMC2023 + - CN Middle School 24 + training_emissions: Unknown + training_time: Unknown + training_hardware: Unknown + quality_control: The models were tested with few-shot chain-of-thought prompting + and evaluated across mathematical benchmarks in both English and Chinese. + access: open + license: Unknown + intended_uses: These models are intended for solving complex mathematical problems. + prohibited_uses: Uses that go against the ethical usage policies of Qwen Team. + monitoring: Unknown + feedback: Problems with the model should be reported to the Qwen Team via their + official channels. diff --git a/assets/roblox.yaml b/assets/roblox.yaml new file mode 100644 index 00000000..f19a5e75 --- /dev/null +++ b/assets/roblox.yaml @@ -0,0 +1,35 @@ +--- +- type: model + name: Voice Safety Classifier + organization: Roblox + description: A large classification model for toxicity detection in voice chats. + The model is trained on a manually curated real-world dataset comprising 2,374 + hours of voice chat audio clips and was fine-tuned from the WavLM base plus + it offers. It classifies each piece of content across multiple labels such as + Profanity, DatingAndSexting, Racist, Bullying, Other, NoViolation. + created_date: 2024-09-08 + url: https://huggingface.co/Roblox/voice-safety-classifier + model_card: https://huggingface.co/Roblox/voice-safety-classifier + modality: Audio; Text + analysis: The model was evaluated on a dataset with human annotated labels containing + 9,795 samples. Binarized average precision is calculated for each of the toxicity + classes and reaches up to 94.48%. + size: Unknown + dependencies: + - WavLM base plus + - Python + - HuggingFace + - Voice chat audio clips + training_emissions: Unknown + training_time: Unknown + training_hardware: Unknown + quality_control: Manually curated real-world dataset to reflect actual usage. + Evaluated using human annotated samples. The model also calculates precision + for each of the classes. + access: open + license: Unknown + intended_uses: The model is intended to be used for detecting and classifying + toxicity in voice chat content. + prohibited_uses: Unknown + monitoring: Unknown + feedback: The feedback mechanism was not provided as part of the description. diff --git a/assets/runway_ai,_inc..yaml b/assets/runway_ai,_inc..yaml new file mode 100644 index 00000000..4d628ccb --- /dev/null +++ b/assets/runway_ai,_inc..yaml @@ -0,0 +1,34 @@ +--- +- type: model + name: Gen-3 Alpha + organization: Runway AI, Inc. + description: Gen-3 Alpha is a foundation model trained for large-scale multimodal + tasks. It is a major improvement in fidelity, consistency, and motion over the + previous generation, Gen-2. Gen-3 Alpha can power various tools, such as Text + to Video, Image to Video, and Text to Image. The model excels at generating + expressive human characters with a wide range of actions, gestures, and emotions, + and is capable of interpreting a wide range of styles and cinematic terminology. + It is also a step towards building General World Models. It has been designed + for use by research scientists, engineers, and artists, and can be fine-tuned + for customization according to specific stylistic and narrative requirements. + created_date: 2024-06-17 + url: https://runwayml.com/research/introducing-gen-3-alpha?utm_source=xinquji + model_card: unknown + modality: Text, image, video; video + analysis: Unknown + size: Unknown + dependencies: [] + training_emissions: Unknown + training_time: Unknown + training_hardware: Unknown + quality_control: It will be released with a set of new safeguards, including an + improved in-house visual moderation system and C2PA provenance standards. + access: open + license: Terms of Use listed on Runway AI, Inc.'s website, specific license unknown + intended_uses: Can be used to create expressive human characters, interpret a + wide range of styles and cinematic terminology, and power tools for Text to + Video, Image to Video, and Text to Image tasks. + prohibited_uses: Unknown + monitoring: The model includes a new and improved in-house visual moderation system. + feedback: Companies interested in fine-tuning and custom models can reach out + to Runway AI, Inc. using a form on their website. diff --git a/assets/samba.yaml b/assets/samba.yaml index 1776b9d2..07092214 100644 --- a/assets/samba.yaml +++ b/assets/samba.yaml @@ -57,3 +57,30 @@ prohibited_uses: '' monitoring: unknown feedback: none +- type: model + name: sarvam-2b + organization: sarvamAI + description: This is an early checkpoint of sarvam-2b, a small, yet powerful language + model pre-trained from scratch on 2 trillion tokens. It is designed to be proficient + in 10 Indic languages (Bengali, Gujarati, Hindi, Kannada, Malayalam, Marathi, + Oriya, Punjabi, Tamil, and Telugu) + English. + created_date: 2024-08-15 + url: https://huggingface.co/sarvamai/sarvam-2b-v0.5 + model_card: https://huggingface.co/sarvamai/sarvam-2b-v0.5 + modality: text; text + analysis: Analysis for the model is not yet provided; however, it has been reported + that more technical details like evaluations and benchmarking will be posted + soon. + size: Unknown + dependencies: [] + training_emissions: Unknown + training_time: Unknown + training_hardware: NVIDIA NeMo™ Framework, Yotta Shakti Cloud, HGX H100 systems. + quality_control: Unknown + access: Open + license: Unknown + intended_uses: The model can be used for text completion and supervised fine-tuning, + particularly in the languages it was trained on. + prohibited_uses: Unknown + monitoring: Unknown + feedback: Unknown diff --git a/assets/stability_ai.yaml b/assets/stability_ai.yaml new file mode 100644 index 00000000..8cdfe766 --- /dev/null +++ b/assets/stability_ai.yaml @@ -0,0 +1,116 @@ +--- +- type: model + name: Stable Diffusion 3 Medium + organization: Stability AI + description: Stable Diffusion 3 Medium is Stability AI’s advanced text-to-image + open model. It's suitable for running on consumer PCs and laptops as well as + enterprise-tier GPUs. The model is known for its overall Quality and Photorealism, + prompt understanding, typography, being resource-efficient, and being fine-tuned. + The model in collaboration with NVIDIA and AMD has enhanced performance. + created_date: 2024-06-12 + url: https://stability.ai/news/stable-diffusion-3-medium + model_card: unknown + modality: text; image + analysis: The model was tested extensively internally and externally. It has developed + and implemented numerous safeguards to prevent harms. They have also received + user feedback to make continuous improvements. + size: 2B parameters + dependencies: [] + training_emissions: Unknown + training_time: Unknown + training_hardware: NVIDIA RTX GPUs, TensorRT, AMD’s APUs, consumer GPUs and MI-300X + Enterprise GPUs + quality_control: They have conducted extensive internal and external testing of + this model and have implemented numerous safeguards to prevent harms. Safety + measures were implemented from the start of training the model and continued + throughout testing, evaluation, and deployment. + access: Open + license: Stability Non-Commercial Research Community License + intended_uses: The model can be used by professional artists, designers, developers, + and AI enthusiasts for creating high-quality image outputs from text inputs. + prohibited_uses: Large-scale commercial use requires contacting the organization + for licensing details. The model should not be used for any purpose that does + not adhere to the usage guidelines. + monitoring: Continuous collaboration with researchers, experts, and the community + to ensure that the model is being used appropriately. + feedback: Feedback can be given through Twitter, Instagram, LinkedIn, or Discord + Community. +- type: model + name: Stable Video 4D + organization: Stability AI + description: Stable Video 4D is our latest AI model for dynamic multi-angle video + generation. It allows users to upload a single video and receive novel-view + videos of eight new angles/views. This advancement moves from image-based video + generation to full 3D dynamic video synthesis. Users can specify camera angles, + tailoring the output to meet specific creative needs. The model is currently + available on Hugging Face and can generate 5-frame videos across the 8 views + in about 40 seconds. + created_date: 2024-07-24 + url: https://stability.ai/news/stable-video-4d + model_card: unknown + modality: video; video + analysis: Consistency across the spatial and temporal axes greatly improves with + this model. Stable Video 4D is able to generate novel view videos that are more + detailed, faithful to the input video, and are consistent across frames and + views compared to existing works. + size: Unknown + dependencies: [Stable Video Diffusion Model] + training_emissions: Unknown + training_time: Unknown + training_hardware: Unknown + quality_control: The Stability AI team is dedicated to continuous innovation and + exploration of real-world use-cases for this model and others. They are actively + working to refine and optimize the model beyond the current synthetic datasets + it has been trained on. + access: Open + license: Stable AI License + intended_uses: This model can be used for creating dynamic multi-angle videos, + with applications in game development, video editing, and virtual reality. It + allows professionals in these fields to visualize objects from multiple angles, + enhancing the realism and immersion of their products. + prohibited_uses: Unknown + monitoring: Continuous monitoring by the Stability AI team for improvements and + refinements. + feedback: Feedback and reports about the progress should be shared via their social + channels like Twitter, Instagram, LinkedIn or their Discord Community. +- type: model + name: Stable Fast 3D + organization: Stability AI + description: Stable Fast 3D is a ground-breaking model in 3D asset generation + technology. It can transform a single input image into a highly detailed 3D + asset in around half a second, setting new standards in terms of speed and quality + in the realm of 3D reconstruction. Users start the process by uploading an image + of an object. Stable Fast 3D then swiftly generates a complete 3D asset, which + includes, UV unwrapped mesh, material parameters, albedo colors with reduced + illumination bake-in, and optional quad or triangle remeshing. This model has + various applications, notably for game and virtual reality developers, as well + as professionals in retail, architecture, design, and other graphic-intensive + professions. + created_date: 2024-08-01 + url: https://stability.ai/news/introducing-stable-fast-3d + model_card: unknown + modality: image; 3D + analysis: The model was evaluated on its ability to quickly and accurately transform + a single image into a detailed 3D asset. This evaluation highlighted the model's + unprecedented speed and quality, marking it as a valuable tool for rapid prototyping + in 3D work. Compared to the previous SV3D model, Stable Fast 3D offers significantly + reduced inference times--0.5 seconds versus 10 minutes--while maintaining high-quality + output. + size: Unknown + dependencies: [TripoSR] + training_emissions: Unknown + training_time: Unknown + training_hardware: GPU with 7GB VRAM + quality_control: Unknown + access: open + license: Stability AI Community + intended_uses: The model is intended for use in game development, virtual reality, + retail, architecture, design and other graphically intense professions. It allows + for rapid prototyping in 3D work, assisting both enterprises and indie developers. + It's also used in movie production for creating static assets for games and + 3D models for e-commerce, as well as fast model creation for AR/VR. + prohibited_uses: Use by individuals or organizations with over $1M in annual revenue + without obtaining an Enterprise License. + monitoring: Unknown + feedback: Information on any downstream issues with the model can be reported + to Stability AI through their support request system. diff --git a/assets/stanford.yaml b/assets/stanford.yaml index 0d795d95..94a67866 100644 --- a/assets/stanford.yaml +++ b/assets/stanford.yaml @@ -143,3 +143,42 @@ prohibited_uses: '' monitoring: '' feedback: Feedback can be provided on [[GitHub Issues]](https://github.com/tatsu-lab/stanford_alpaca/issues). +- type: model + name: Merlin + organization: Stanford Center for Artificial Intelligence in Medicine and Imaging, + Stanford University + description: Merlin is a 3D Vision Language Model that's designed for interpretation + of abdominal computed tomography (CT) scans. It uses both structured Electronic + Health Record (EHR) and unstructured radiology reports for supervision without + requiring additional manual annotations. The model was trained on a high-quality + clinical dataset of paired CT scans, EHR diagnosis codes, and radiology reports + and was evaluated on 6 task types and 752 individual tasks. + created_date: 2024-09-08 + url: https://arxiv.org/pdf/2406.06512 + model_card: unknown + modality: Image; text + analysis: Merlin has been comprehensively evaluated on 6 task types and 752 individual + tasks. The non-adapted (off-the-shelf) tasks include zero-shot findings classification, + phenotype classification, and zero-shot cross-modal retrieval, while model adapted + tasks include 5-year chronic disease prediction, radiology report generation, + and 3D semantic segmentation. It has undergone internal validation on a test + set of 5,137 CTs, and external validation on 7,000 clinical CTs and on two public + CT datasets (VerSe, TotalSegmentator). + size: Unknown + dependencies: ['VerSe, TotalSegmentator'] + training_emissions: Unknown + training_time: Unknown + training_hardware: Single GPU. + quality_control: The model has undergone extensive evaluations and also internal + and external validation tests. + access: open + license: Unknown + intended_uses: This model is intended for use in the interpretation of abdominal + computed tomography (CT) scans, chronic disease prediction, radiology report + generation, and 3D semantic segmentation. + prohibited_uses: The model should not be used outside of healthcare-related context, + such as for personal or non-medical commercial purposes. + monitoring: Unknown + feedback: Feedback and reports for problems with the model should likely be routed + to Stanford Center for Artificial Intelligence in Medicine and Imaging, or the + corresponding author of the research (louis.blankemeier@stanford.edu). diff --git a/assets/team_glm,_zhipu_ai,_.yaml b/assets/team_glm,_zhipu_ai,_.yaml new file mode 100644 index 00000000..855dd563 --- /dev/null +++ b/assets/team_glm,_zhipu_ai,_.yaml @@ -0,0 +1,35 @@ +--- +- type: model + name: ChatGLM + organization: Team GLM, Zhipu AI, Tsinghua University + description: ChatGLM is an evolving family of large language models that have + been developed over time. The GLM-4 language series, includes GLM-4, GLM-4-Air, + and GLM-4-9B. They are pre-trained on ten trillions of tokens mostly in Chinese + and English and are aligned primarily for Chinese and English usage. The high-quality + alignment is achieved via a multi-stage post-training process, which involves + supervised fine-tuning and learning from human feedback. GLM-4 All Tools model + is further aligned to understand user intent and autonomously decide when and + which tool(s) to use. + created_date: 2023-07-02 + url: https://arxiv.org/pdf/2406.12793 + model_card: unknown + modality: text; text + analysis: Evaluations show that GLM-4, 1) closely rivals or outperforms GPT-4 + in terms of general metrics such as MMLU, GSM8K, MATH, BBH, GPQA, and HumanEval, + 2) gets close to GPT-4-Turbo in instruction following as measured by IFEval, + 3) matches GPT-4 Turbo (128K) and Claude 3 for long context tasks, and 4) outperforms + GPT-4 in Chinese alignments as measured by AlignBench. + size: From 6.2 billion parameters to 9 billion parameters and 130 billion parameters. + dependencies: [GPT models, GLM-10B, GLM-130B] + training_emissions: Unknown + training_time: Unknown + training_hardware: Unknown + quality_control: High-quality alignment is achieved via a multi-stage post-training + process, which involves supervised fine-tuning and learning from human feedback. + access: Open + license: Unknown + intended_uses: General language modeling, complex tasks like accessing online + information via web browsing and solving math problems using Python interpreter. + prohibited_uses: Unknown + monitoring: Unknown + feedback: Unknown diff --git a/assets/unknown.yaml b/assets/unknown.yaml new file mode 100644 index 00000000..ecc09eb5 --- /dev/null +++ b/assets/unknown.yaml @@ -0,0 +1,38 @@ +--- +- type: model + name: Dragonfly + organization: Unknown (Team members include, Kezhen Chen, Rahul Thapa, Rahul Chalamala, + Ben Athiwaratkun, Shuaiwen Leon Song, James Zou) + description: A large vision-language model with multi-resolution zoom that enhances + fine-grained visual understanding and reasoning about image regions. The Dragonfly + model comes in two variants, the general-domain model ("Llama-3-8b-Dragonfly-v1") + trained on 5.5 million image-instruction pairs, and the biomedical variant ("Llama-3-8b-Dragonfly-Med-v1") + fine-tuned on an additional 1.4 million biomedical image-instruction pairs. + Dragonfly demonstrates promising performance on vision-language benchmarks like + commonsense visual QA and image captioning. + created_date: 2024-06-06 + url: https://www.together.ai/blog/dragonfly-v1 + model_card: unknown + modality: Image; text + analysis: The model was evaluated using five popular vision-language benchmarks + that require strong commonsense reasoning and detailed image understanding, + AI2D, ScienceQA, MMMU, MMVet, and POPE. It demonstrated competitive performance + in these evaluations compared to other vision-language models. + size: 8B parameters (Unknown if sparse or dense) + dependencies: [LLaMA] + training_emissions: Unknown + training_time: Unknown + training_hardware: Unknown + quality_control: The model employs two key strategies (multi-resolution visual + encoding and zoom-in patch selection) that enable it to efficiently focus on + fine-grained details in image regions and provide better commonsense reasoning. + Its performance was evaluated on several benchmark tasks for quality assurance. + access: open + license: Unknown + intended_uses: Dragonfly is designed for image-text tasks, including commonsense + visual question answering and image captioning. It is further focused on tasks + that require fine-grained understanding of high-resolution image regions, such + as in medical imaging. + prohibited_uses: Unknown + monitoring: Unknown + feedback: Unknown diff --git a/assets/writer.yaml b/assets/writer.yaml index 88b8d47c..4b5109ff 100644 --- a/assets/writer.yaml +++ b/assets/writer.yaml @@ -49,3 +49,95 @@ prohibited_uses: '' monitoring: '' feedback: https://huggingface.co/Writer/camel-5b-hf/discussions +- type: model + name: Palmyra-Med-70b-32k + organization: Writer + description: Palmyra-Med-70b-32k is a Language Model designed specifically for + healthcare and biomedical applications. It builds upon the foundation of Palmyra-Med-70b + and offers an extended context length. This model integrates the DPO dataset, + a custom medical instruction dataset, and has been fine-tuned to meet the unique + requirements of the medical and life sciences sectors. It is ranked as the leading + LLM on biomedical benchmarks with an average score of 85.87%. + created_date: 2024-09-08 + url: https://huggingface.co/Writer/Palmyra-Med-70B-32K + model_card: https://huggingface.co/Writer/Palmyra-Med-70B-32K + modality: text; text + analysis: The model was evaluated across 9 diverse biomedical datasets where it + achieved state-of-the-art results with an average score of 85.9%. It also demonstrated + robust capability in efficiently processing extensive medical documents, as + showcased by its near-perfect score in the NIH evaluation. + size: Unknown + dependencies: [Palmyra-X-004] + training_emissions: Unknown + training_time: Unknown + training_hardware: Unknown + quality_control: The model has been refined using Policy Optimization and a finely + crafted fine-tuning dataset. It contains watermarks to detect and prevent misuse + and illegal use. + access: open + license: Writer open model + intended_uses: Palmyra-Med-70b-32k is intended for non-commercial and research + use in English. Specifically, it can be used for tasks like clinical entity + recognition and knowledge discovery from EHRs, research articles, and other + biomedical sources. It excels in analyzing and summarizing complex clinical + notes, EHR data, and discharge summaries. + prohibited_uses: The model should not be used in any manner that violates applicable + laws or regulations. It is not to be used in direct patient care, clinical decision + support, or professional medical purposes. The model should not replace professional + medical judgment. + monitoring: Measures in place to monitor misuse include the addition of watermarks + in all models built by Writer.com to detect and prevent misuse and illegal use. + feedback: Downstream problems with this model should be reported via email to + Hello@writer.com. +- type: model + name: Palmyra-Fin-70B-32K + organization: Writer + description: Palmyra-Fin-70B-32K is a leading LLM built specifically to meet the + needs of the financial industry. It has been fine-tuned on an extensive collection + of high-quality financial data and it is highly adept at handling the specific + needs of the finance field. It outperforms other large language models in various + financial tasks and evaluations, achieving state-of-the-art results across various + financial datasets. Its strong performance in tasks like financial document + analysis, market trend prediction, risk assessment underscores its effective + grasp of financial knowledge. + created_date: 2024-09-08 + url: https://huggingface.co/Writer/Palmyra-Fin-70B-32K + model_card: https://huggingface.co/Writer/Palmyra-Fin-70B-32K + modality: text; text + analysis: The model has been evaluated internally, showing state-of-the-art results + on various financial datasets. It has shown 100% accuracy in needle-in-haystack + tasks and superior performance in comparison to other models in the organization's + internal finance evaluations. It passed the CFA Level III test with a score + of 73% and has shown superior performance compared to other models in the long-fin-eval, + an internally created benchmark that simulates real-world financial scenarios. + size: 70 Billion parameters + dependencies: + - Palmyra-X-004 + - Writer in-house financial instruction dataset + training_emissions: Unknown + training_time: Unknown + training_hardware: Unknown + quality_control: The model was trained with a proprietary internal database and + a fine-tuning recipe to ensure a greater level of domain-specific accuracy and + fluency. Still, the model may contain inaccuracies, biases, or misalignments + and its usage for direct financial decision-making or professional financial + advice without human oversight is not recommended. It has not been rigorously + evaluated in real-world financial settings and it requires further testing, + regulatory compliance, bias mitigation, and human oversight for more critical + financial applications. + access: open + license: Writer open model license + intended_uses: The model is intended for use in English for financial analysis, + market trend prediction, risk assessment, financial report generation, automated + financial advice, and answering questions from long financial documents. It + can be used for entity recognition, identifying key financial concepts such + as market trends, economic indicators, and financial instruments from unstructured + text. + prohibited_uses: The model should not be used in manners that violate applicable + laws or regulations, including trade compliance laws, use prohibited by Writer's + acceptable use policy, the Writer open model license, and in languages other + than English. It is advised not to use the model for direct financial decision-making + or professional financial advice without human oversight. Always consult a qualified + financial professional for personal financial needs. + monitoring: Unknown + feedback: Downstream problems with this model should be reported to Hello@writer.com. diff --git a/assets/xai.yaml b/assets/xai.yaml index b656754a..8e8a1857 100644 --- a/assets/xai.yaml +++ b/assets/xai.yaml @@ -52,3 +52,39 @@ prohibited_uses: unknown monitoring: unknown feedback: none +- type: model + name: Grok-2 + organization: xAI + description: Grok-2 is a state-of-the-art language model with advanced capabilities + in both text and vision understanding. It demonstrates significant improvements + in reasoning with retrieved content and tool use capabilities over its previous + Grok-1.5 model. It also excels in vision-based tasks and delivers high performance + in document-based question answering and visual math reasoning (MathVista). + Grok-2 mini, a smaller version of Grok-2, is also introduced, offering a balance + between speed and answer quality. + created_date: 2024-08-13 + url: https://x.ai/blog/grok-2 + model_card: unknown + modality: text; text, vision + analysis: The Grok-2 models were evaluated across a series of academic benchmarks + that included reasoning, reading comprehension, math, science, and coding. They + showed significant improvements over the earlier model Grok-1.5 and achieved + performance levels competitive to other frontier models in areas such as graduate-level + science knowledge (GPQA), general knowledge (MMLU, MMLU-Pro), and math competition + problems (MATH). + size: Unknown + dependencies: [Grok-1.5, FLUX.1] + training_emissions: Unknown + training_time: Unknown + training_hardware: Unknown + quality_control: Grok-2 models were tested in real-world scenarios using AI tutors + that engaged with the models across a variety of tasks and selected the superior + response based on specific criteria outlined in the guidelines. + access: Limited + license: Unknown + intended_uses: The model is intended to be used for understanding text and vision, + answering questions, collaborating on writing, solving coding tasks, and enhancing + search capabilities. + prohibited_uses: Unknown + monitoring: Unknown + feedback: Issues with the model should be reported to xAI. diff --git a/js/main.js b/js/main.js index 6b7611e5..90c545b0 100644 --- a/js/main.js +++ b/js/main.js @@ -635,9 +635,19 @@ function loadAssetsAndRenderPageContent() { const paths = [ 'assets/adept.yaml', + 'assets/aspia_space,_institu.yaml', + 'assets/evolutionaryscale.yaml', + 'assets/laion_e.v..yaml', + 'assets/lg_ai_research.yaml', 'assets/mila.yaml', + 'assets/qwen_team.yaml', + 'assets/roblox.yaml', + 'assets/runway_ai,_inc..yaml', 'assets/soochow.yaml', 'assets/baichuan.yaml', + 'assets/stability_ai.yaml', + 'assets/team_glm,_zhipu_ai,_.yaml', + 'assets/unknown.yaml', 'assets/xwin.yaml', 'assets/mistral.yaml', 'assets/adobe.yaml', From ddb02b24c5e2f0c38da509841acad5646e5d08f4 Mon Sep 17 00:00:00 2001 From: Jonathan Xue <105090474+jxue16@users.noreply.github.com> Date: Thu, 26 Sep 2024 17:03:49 -0700 Subject: [PATCH 02/29] Update assets/ai21.yaml --- assets/ai21.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/ai21.yaml b/assets/ai21.yaml index 33520f40..e1b18337 100644 --- a/assets/ai21.yaml +++ b/assets/ai21.yaml @@ -330,7 +330,7 @@ created_date: 2024-08-22 url: https://www.ai21.com/blog/announcing-jamba-model-family model_card: unknown - modality: Unknown + modality: text; text analysis: The models were evaluated based on their ability to handle long contexts, speed, and quality. They outperformed competitors in their size class, scoring high on the Arena Hard benchmark. From b84e4dd855746ecc8b2f475e9c1e35de7268ce04 Mon Sep 17 00:00:00 2001 From: Jonathan Xue <105090474+jxue16@users.noreply.github.com> Date: Thu, 26 Sep 2024 17:03:55 -0700 Subject: [PATCH 03/29] Update assets/ai21.yaml --- assets/ai21.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/ai21.yaml b/assets/ai21.yaml index e1b18337..874a42dc 100644 --- a/assets/ai21.yaml +++ b/assets/ai21.yaml @@ -334,7 +334,7 @@ analysis: The models were evaluated based on their ability to handle long contexts, speed, and quality. They outperformed competitors in their size class, scoring high on the Arena Hard benchmark. - size: Unknown + size: 94B parameters dependencies: [SSM-Transformer architecture, Mamba] training_emissions: Unknown training_time: Unknown From d6dc9917348c65cb7cce65d3205d273374483bea Mon Sep 17 00:00:00 2001 From: Jonathan Xue <105090474+jxue16@users.noreply.github.com> Date: Thu, 26 Sep 2024 17:04:01 -0700 Subject: [PATCH 04/29] Update assets/ai21.yaml --- assets/ai21.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/ai21.yaml b/assets/ai21.yaml index 874a42dc..6ed488c3 100644 --- a/assets/ai21.yaml +++ b/assets/ai21.yaml @@ -335,7 +335,7 @@ speed, and quality. They outperformed competitors in their size class, scoring high on the Arena Hard benchmark. size: 94B parameters - dependencies: [SSM-Transformer architecture, Mamba] + dependencies: [] training_emissions: Unknown training_time: Unknown training_hardware: For speed comparisons, Jamba 1.5 Mini used 2xA100 80GB GPUs, From 5e84affcbfed6366a0f9f83d5b54117062e181d8 Mon Sep 17 00:00:00 2001 From: Jonathan Xue <105090474+jxue16@users.noreply.github.com> Date: Thu, 26 Sep 2024 17:04:08 -0700 Subject: [PATCH 05/29] Update assets/aleph_alpha.yaml --- assets/aleph_alpha.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/aleph_alpha.yaml b/assets/aleph_alpha.yaml index ae6059a0..8a3949ad 100644 --- a/assets/aleph_alpha.yaml +++ b/assets/aleph_alpha.yaml @@ -120,7 +120,7 @@ webqs, arc, and boolq. Direct comparisons were also performed with applications like GPT and Llama 2. size: 7B parameters - dependencies: [GPT, Llama 2] + dependencies: [] training_emissions: Unknown training_time: Unknown training_hardware: Unknown From 4e79b131b4c299d21fe003c4425b69560725195c Mon Sep 17 00:00:00 2001 From: Jonathan Xue <105090474+jxue16@users.noreply.github.com> Date: Thu, 26 Sep 2024 17:04:31 -0700 Subject: [PATCH 06/29] Update assets/google.yaml Co-authored-by: Rishi Bommasani <47439426+rishibommasani@users.noreply.github.com> --- assets/google.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/google.yaml b/assets/google.yaml index 574a551c..01fb7809 100644 --- a/assets/google.yaml +++ b/assets/google.yaml @@ -1825,7 +1825,7 @@ created_date: 2024-09-05 url: https://deepmind.google/technologies/imagen-3/ model_card: unknown - modality: Text; Image + modality: text; image analysis: Unknown size: Unknown dependencies: [] From f4e8451fc6e3de9cb9d5f52040599b311563a213 Mon Sep 17 00:00:00 2001 From: Jonathan Xue <105090474+jxue16@users.noreply.github.com> Date: Thu, 26 Sep 2024 17:05:17 -0700 Subject: [PATCH 07/29] Update assets/writer.yaml Co-authored-by: Rishi Bommasani <47439426+rishibommasani@users.noreply.github.com> --- assets/writer.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/writer.yaml b/assets/writer.yaml index 4b5109ff..77179848 100644 --- a/assets/writer.yaml +++ b/assets/writer.yaml @@ -110,7 +110,7 @@ internal finance evaluations. It passed the CFA Level III test with a score of 73% and has shown superior performance compared to other models in the long-fin-eval, an internally created benchmark that simulates real-world financial scenarios. - size: 70 Billion parameters + size: 70B parameters (dense) dependencies: - Palmyra-X-004 - Writer in-house financial instruction dataset From 55da7acf7ac0a741e7a7f3dee6def7101db09586 Mon Sep 17 00:00:00 2001 From: Jonathan Xue <105090474+jxue16@users.noreply.github.com> Date: Thu, 26 Sep 2024 17:05:28 -0700 Subject: [PATCH 08/29] Update assets/anthropic.yaml --- assets/anthropic.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/anthropic.yaml b/assets/anthropic.yaml index 740d24f9..31f83c8f 100644 --- a/assets/anthropic.yaml +++ b/assets/anthropic.yaml @@ -611,7 +611,7 @@ created_date: 2024-06-21 url: https://www.anthropic.com/news/claude-3-5-sonnet model_card: unknown - modality: text; vision; code + modality: text; image, text analysis: The model has been evaluated on a range of tests including graduate-level reasoning (GPQA), undergraduate-level knowledge (MMLU), coding proficiency (HumanEval), and standard vision benchmarks. In an internal agentic coding evaluation, Claude From d5a008f9d891922279df9dbb66542eaa40fc6a86 Mon Sep 17 00:00:00 2001 From: Jonathan Xue <105090474+jxue16@users.noreply.github.com> Date: Thu, 26 Sep 2024 17:05:35 -0700 Subject: [PATCH 09/29] Update assets/anthropic.yaml --- assets/anthropic.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/anthropic.yaml b/assets/anthropic.yaml index 31f83c8f..0fa0c76a 100644 --- a/assets/anthropic.yaml +++ b/assets/anthropic.yaml @@ -626,7 +626,7 @@ and refined by external experts. It was also provided to the UK's AI Safety Institute (UK AISI) for a pre-deployment safety evaluation. access: Open - license: {{Unknown: null}: null} + license: unknown intended_uses: The model is intended for complex tasks such as context-sensitive customer support, orchestrating multi-step workflows, interpreting charts and graphs, transcribing text from images, as well as writing, editing, and executing From 86f064fa4cff8aa546b0cd7b3678dffbc5231753 Mon Sep 17 00:00:00 2001 From: Jonathan Xue <105090474+jxue16@users.noreply.github.com> Date: Thu, 26 Sep 2024 17:05:41 -0700 Subject: [PATCH 10/29] Update assets/aleph_alpha.yaml Co-authored-by: Rishi Bommasani <47439426+rishibommasani@users.noreply.github.com> --- assets/aleph_alpha.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/aleph_alpha.yaml b/assets/aleph_alpha.yaml index 8a3949ad..e7acf73e 100644 --- a/assets/aleph_alpha.yaml +++ b/assets/aleph_alpha.yaml @@ -114,7 +114,7 @@ created_date: 2024-09-08 url: https://aleph-alpha.com/introducing-pharia-1-llm-transparent-and-compliant/#:~:text=Pharia%2D1%2DLLM%2D7B model_card: unknown - modality: Text; text + modality: text; text analysis: Extensive evaluations were done with ablation experiments performed on pre-training benchmarks such as lambada, triviaqa, hellaswag, winogrande, webqs, arc, and boolq. Direct comparisons were also performed with applications From 4d688225dd2c7c15b9139ebff111ecc9a05a398c Mon Sep 17 00:00:00 2001 From: Jonathan Xue <105090474+jxue16@users.noreply.github.com> Date: Thu, 26 Sep 2024 17:05:47 -0700 Subject: [PATCH 11/29] Update assets/anthropic.yaml Co-authored-by: Rishi Bommasani <47439426+rishibommasani@users.noreply.github.com> --- assets/anthropic.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/anthropic.yaml b/assets/anthropic.yaml index 0fa0c76a..74b3bfcb 100644 --- a/assets/anthropic.yaml +++ b/assets/anthropic.yaml @@ -600,7 +600,7 @@ feedback: none - type: model name: Claude 3.5 Sonnet - organization: Anthropic PBC + organization: Anthropic description: Claude 3.5 Sonnet is an AI model with advanced understanding and generation abilities in text, vision, and code. It sets new industry benchmarks for graduate-level reasoning (GPQA), undergrad-level knowledge (MMLU), coding From 1d69f1171d94c6939517b43cb65e6c9d7b714911 Mon Sep 17 00:00:00 2001 From: Jonathan Xue <105090474+jxue16@users.noreply.github.com> Date: Thu, 26 Sep 2024 17:05:54 -0700 Subject: [PATCH 12/29] Update assets/anthropic.yaml Co-authored-by: Rishi Bommasani <47439426+rishibommasani@users.noreply.github.com> --- assets/anthropic.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/anthropic.yaml b/assets/anthropic.yaml index 74b3bfcb..31659d5a 100644 --- a/assets/anthropic.yaml +++ b/assets/anthropic.yaml @@ -633,7 +633,7 @@ code. prohibited_uses: Misuse of the model is discouraged though specific use cases are not mentioned. - monitoring: Measures have been taken to evaluate the model against various types + monitoring: Unknown of misuse, and policy feedback from external experts has been integrated to ensure robustness of evaluations. feedback: Feedback on Claude 3.5 Sonnet can be submitted directly in-product to From b2c89196e55e5e13c051933918a29b85907fb0e0 Mon Sep 17 00:00:00 2001 From: Jonathan Xue <105090474+jxue16@users.noreply.github.com> Date: Thu, 26 Sep 2024 17:06:00 -0700 Subject: [PATCH 13/29] Update assets/aspia_space,_institu.yaml Co-authored-by: Rishi Bommasani <47439426+rishibommasani@users.noreply.github.com> --- assets/aspia_space,_institu.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/aspia_space,_institu.yaml b/assets/aspia_space,_institu.yaml index c8475605..89a74d92 100644 --- a/assets/aspia_space,_institu.yaml +++ b/assets/aspia_space,_institu.yaml @@ -13,7 +13,7 @@ created_date: 2024-09-08 url: https://arxiv.org/pdf/2405.14930v1 model_card: unknown - modality: Image; Image (The model takes an image as input and processes it to + modality: image; image provide insights) analysis: The models’ performance on downstream tasks was evaluated by linear probing. The models follow a similar saturating log-log scaling law to textual From 48b4a3b6390d476b1e5f5086b00dde19afa8078c Mon Sep 17 00:00:00 2001 From: Jonathan Xue <105090474+jxue16@users.noreply.github.com> Date: Thu, 26 Sep 2024 17:06:20 -0700 Subject: [PATCH 14/29] Update assets/evolutionaryscale.yaml Co-authored-by: Rishi Bommasani <47439426+rishibommasani@users.noreply.github.com> --- assets/evolutionaryscale.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/evolutionaryscale.yaml b/assets/evolutionaryscale.yaml index 0ae4658c..6079db7a 100644 --- a/assets/evolutionaryscale.yaml +++ b/assets/evolutionaryscale.yaml @@ -12,7 +12,7 @@ created_date: 2024-06-25 url: https://www.evolutionaryscale.ai/blog/esm3-release model_card: unknown - modality: Text; Text (the model takes textual instructions as inputs and generates + modality: text; text textual descriptions of proteins as outputs) analysis: The model was tested in the generation of a new green fluorescent protein. Its effectiveness was compared to natural evolutionary processes, and it was From 43e96a9a1f9c850a7d59984afa57f109bf4f69c1 Mon Sep 17 00:00:00 2001 From: Jonathan Xue <105090474+jxue16@users.noreply.github.com> Date: Thu, 26 Sep 2024 17:06:38 -0700 Subject: [PATCH 15/29] Update assets/evolutionaryscale.yaml Co-authored-by: Rishi Bommasani <47439426+rishibommasani@users.noreply.github.com> --- assets/evolutionaryscale.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/evolutionaryscale.yaml b/assets/evolutionaryscale.yaml index 6079db7a..1ed26835 100644 --- a/assets/evolutionaryscale.yaml +++ b/assets/evolutionaryscale.yaml @@ -31,6 +31,6 @@ for scientists to create proteins for various applications, including medicine, biology research, and clean energy. prohibited_uses: Unknown - monitoring: Measures associated with their responsible development framework, + monitoring: Unknown though specific measures are not specified. feedback: Unknown From 9b699aed5daf25c1483b41e9e7d787a2982cc0f4 Mon Sep 17 00:00:00 2001 From: Jonathan Xue <105090474+jxue16@users.noreply.github.com> Date: Thu, 26 Sep 2024 17:06:53 -0700 Subject: [PATCH 16/29] Update assets/laion_e.v..yaml Co-authored-by: Rishi Bommasani <47439426+rishibommasani@users.noreply.github.com> --- assets/laion_e.v..yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/laion_e.v..yaml b/assets/laion_e.v..yaml index 6cd18f23..f73dcb58 100644 --- a/assets/laion_e.v..yaml +++ b/assets/laion_e.v..yaml @@ -12,7 +12,7 @@ created_date: 2024-08-30 url: https://laion.ai/blog/relaion-5b/ model_card: unknown - modality: Text; Image + modality: text; image analysis: Re-LAION-5B aims to fix the issues as reported by Stanford Internet Observatory for the original LAION-5B. It is available for download in two versions, research and research-safe. In total, 2236 links that potentially led to inappropriate From fed56b057afe817f63925ed277a2f3949d1160d2 Mon Sep 17 00:00:00 2001 From: Jonathan Xue <105090474+jxue16@users.noreply.github.com> Date: Thu, 26 Sep 2024 21:53:20 -0700 Subject: [PATCH 17/29] Update assets/laion_e.v..yaml Co-authored-by: Rishi Bommasani <47439426+rishibommasani@users.noreply.github.com> --- assets/laion_e.v..yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/laion_e.v..yaml b/assets/laion_e.v..yaml index f73dcb58..228208c3 100644 --- a/assets/laion_e.v..yaml +++ b/assets/laion_e.v..yaml @@ -17,7 +17,7 @@ Observatory for the original LAION-5B. It is available for download in two versions, research and research-safe. In total, 2236 links that potentially led to inappropriate content were removed. - size: 5.5B (5,526,641,167) text-link to images pairs + size: 5.5B (text, image) pairs dependencies: [LAION-5B] training_emissions: Unknown training_time: Unknown From af54757e2f7d7323556dbe0788b23077ba87e912 Mon Sep 17 00:00:00 2001 From: Jonathan Xue <105090474+jxue16@users.noreply.github.com> Date: Thu, 26 Sep 2024 21:53:35 -0700 Subject: [PATCH 18/29] Update assets/lg_ai_research.yaml Co-authored-by: Rishi Bommasani <47439426+rishibommasani@users.noreply.github.com> --- assets/lg_ai_research.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/lg_ai_research.yaml b/assets/lg_ai_research.yaml index e669c028..ceda7a44 100644 --- a/assets/lg_ai_research.yaml +++ b/assets/lg_ai_research.yaml @@ -10,7 +10,7 @@ created_date: 2024-09-08 url: https://arxiv.org/pdf/2408.03541 model_card: unknown - modality: Text; text + modality: text; text analysis: The model was evaluated extensively across a wide range of public and in-house benchmarks. The comparative analysis showed that the performance of EXAONE 3.0 was competitive in English and excellent in Korean compared to other From 217a079eb3471539ffd28214410137a0a00e709d Mon Sep 17 00:00:00 2001 From: Jonathan Xue <105090474+jxue16@users.noreply.github.com> Date: Thu, 26 Sep 2024 21:53:52 -0700 Subject: [PATCH 19/29] Update assets/mistral.yaml Co-authored-by: Rishi Bommasani <47439426+rishibommasani@users.noreply.github.com> --- assets/mistral.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/mistral.yaml b/assets/mistral.yaml index 0c9f7d7a..00794b30 100644 --- a/assets/mistral.yaml +++ b/assets/mistral.yaml @@ -107,7 +107,7 @@ created_date: 2024-07-18 url: https://mistral.ai/news/mistral-nemo/ model_card: unknown - modality: Text; Text + modality: text; text analysis: The model underwent an advanced fine-tuning and alignment phase. Its performance was evaluated using GPT4o as a judge on official references. It was compared to recent open-source pre-trained models Gemma 2 9B, Llama 3 8B From 69482c26008e7c6a4fc4a88f315037b228d77229 Mon Sep 17 00:00:00 2001 From: Jonathan Xue <105090474+jxue16@users.noreply.github.com> Date: Thu, 26 Sep 2024 21:54:22 -0700 Subject: [PATCH 20/29] Update assets/mistral.yaml Co-authored-by: Rishi Bommasani <47439426+rishibommasani@users.noreply.github.com> --- assets/mistral.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/mistral.yaml b/assets/mistral.yaml index 00794b30..adafa68c 100644 --- a/assets/mistral.yaml +++ b/assets/mistral.yaml @@ -115,7 +115,7 @@ ability was compared with previous tokenizers like SentencePiece and the Llama 3 tokenizer. size: 12B parameters - dependencies: [GPT4o, Mistral 7B, Tekken] + dependencies: [] training_emissions: Unknown training_time: Unknown training_hardware: NVIDIA hardware, specifics unknown From 9f24c6eed212887b5ca2d1f0e36bb0c1b5367821 Mon Sep 17 00:00:00 2001 From: Jonathan Xue <105090474+jxue16@users.noreply.github.com> Date: Thu, 26 Sep 2024 21:54:42 -0700 Subject: [PATCH 21/29] Update assets/mistral.yaml Co-authored-by: Rishi Bommasani <47439426+rishibommasani@users.noreply.github.com> --- assets/mistral.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/mistral.yaml b/assets/mistral.yaml index adafa68c..124f35d8 100644 --- a/assets/mistral.yaml +++ b/assets/mistral.yaml @@ -143,7 +143,7 @@ created_date: 2024-07-16 url: https://mistral.ai/news/codestral-mamba/ model_card: unknown - modality: Text; Text + modality: text; text analysis: The model has been tested for in-context retrieval capabilities up to 256k tokens. It has been created with advanced code and reasoning capabilities, which enables it to perform on par with SOTA transformer-based models. From a7eeba71a7c07cfe24d9235abea6f85a59f72266 Mon Sep 17 00:00:00 2001 From: Jonathan Xue <105090474+jxue16@users.noreply.github.com> Date: Thu, 26 Sep 2024 21:54:58 -0700 Subject: [PATCH 22/29] Update assets/mistral.yaml Co-authored-by: Rishi Bommasani <47439426+rishibommasani@users.noreply.github.com> --- assets/mistral.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/mistral.yaml b/assets/mistral.yaml index 124f35d8..ce0bd91e 100644 --- a/assets/mistral.yaml +++ b/assets/mistral.yaml @@ -148,7 +148,7 @@ 256k tokens. It has been created with advanced code and reasoning capabilities, which enables it to perform on par with SOTA transformer-based models. size: 7.3B parameters - dependencies: [Mamba's GitHub repository, HuggingFace] + dependencies: [] training_emissions: Unknown training_time: Unknown training_hardware: Unknown From a5ef84cf8f7b7212e71910df5fe96c281631fb39 Mon Sep 17 00:00:00 2001 From: Jonathan Xue <105090474+jxue16@users.noreply.github.com> Date: Thu, 26 Sep 2024 21:55:25 -0700 Subject: [PATCH 23/29] Update assets/qwen_team.yaml Co-authored-by: Rishi Bommasani <47439426+rishibommasani@users.noreply.github.com> --- assets/qwen_team.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/qwen_team.yaml b/assets/qwen_team.yaml index 067fa4b5..61f34aa6 100644 --- a/assets/qwen_team.yaml +++ b/assets/qwen_team.yaml @@ -10,7 +10,7 @@ created_date: 2024-08-08 url: https://qwenlm.github.io/blog/qwen2-math/ model_card: unknown - modality: Text; Text + modality: text; text analysis: Models have been evaluated on a series of math benchmarks, demonstrating outperformance of the state-of-the-art models in both the English and Chinese language. From f94b0f723da28ece12395d5b9f90655ce2d80d2e Mon Sep 17 00:00:00 2001 From: Jonathan Xue <105090474+jxue16@users.noreply.github.com> Date: Thu, 26 Sep 2024 21:55:48 -0700 Subject: [PATCH 24/29] Update assets/runway_ai,_inc..yaml Co-authored-by: Rishi Bommasani <47439426+rishibommasani@users.noreply.github.com> --- assets/runway_ai,_inc..yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/runway_ai,_inc..yaml b/assets/runway_ai,_inc..yaml index 4d628ccb..973d1be1 100644 --- a/assets/runway_ai,_inc..yaml +++ b/assets/runway_ai,_inc..yaml @@ -14,7 +14,7 @@ created_date: 2024-06-17 url: https://runwayml.com/research/introducing-gen-3-alpha?utm_source=xinquji model_card: unknown - modality: Text, image, video; video + modality: text, image, video; video analysis: Unknown size: Unknown dependencies: [] From f831f562ead47ed450268cecb91767e992fe1870 Mon Sep 17 00:00:00 2001 From: Jonathan Xue <105090474+jxue16@users.noreply.github.com> Date: Thu, 26 Sep 2024 21:56:24 -0700 Subject: [PATCH 25/29] Update assets/stanford.yaml Co-authored-by: Rishi Bommasani <47439426+rishibommasani@users.noreply.github.com> --- assets/stanford.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/stanford.yaml b/assets/stanford.yaml index 94a67866..6feaa27d 100644 --- a/assets/stanford.yaml +++ b/assets/stanford.yaml @@ -165,7 +165,7 @@ set of 5,137 CTs, and external validation on 7,000 clinical CTs and on two public CT datasets (VerSe, TotalSegmentator). size: Unknown - dependencies: ['VerSe, TotalSegmentator'] + dependencies: [] training_emissions: Unknown training_time: Unknown training_hardware: Single GPU. From 9c4b5161c7ccb9369c2d48eab3d42b9b501a15b6 Mon Sep 17 00:00:00 2001 From: Jonathan Xue <105090474+jxue16@users.noreply.github.com> Date: Thu, 26 Sep 2024 21:59:40 -0700 Subject: [PATCH 26/29] Update assets/stability_ai.yaml Co-authored-by: Rishi Bommasani <47439426+rishibommasani@users.noreply.github.com> --- assets/stability_ai.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/stability_ai.yaml b/assets/stability_ai.yaml index 8cdfe766..d512b3cc 100644 --- a/assets/stability_ai.yaml +++ b/assets/stability_ai.yaml @@ -24,7 +24,7 @@ this model and have implemented numerous safeguards to prevent harms. Safety measures were implemented from the start of training the model and continued throughout testing, evaluation, and deployment. - access: Open + access: open license: Stability Non-Commercial Research Community License intended_uses: The model can be used by professional artists, designers, developers, and AI enthusiasts for creating high-quality image outputs from text inputs. From ef56f7d697138561131c9cee2c71f260ab121f13 Mon Sep 17 00:00:00 2001 From: Jonathan Xue <105090474+jxue16@users.noreply.github.com> Date: Thu, 26 Sep 2024 22:00:03 -0700 Subject: [PATCH 27/29] Update assets/stability_ai.yaml Co-authored-by: Rishi Bommasani <47439426+rishibommasani@users.noreply.github.com> --- assets/stability_ai.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/stability_ai.yaml b/assets/stability_ai.yaml index d512b3cc..443e5184 100644 --- a/assets/stability_ai.yaml +++ b/assets/stability_ai.yaml @@ -62,7 +62,7 @@ exploration of real-world use-cases for this model and others. They are actively working to refine and optimize the model beyond the current synthetic datasets it has been trained on. - access: Open + access: open license: Stable AI License intended_uses: This model can be used for creating dynamic multi-angle videos, with applications in game development, video editing, and virtual reality. It From 2195c62806995d04c9586389aa0e2c01b6cb2d78 Mon Sep 17 00:00:00 2001 From: Jonathan Xue <105090474+jxue16@users.noreply.github.com> Date: Thu, 26 Sep 2024 22:00:48 -0700 Subject: [PATCH 28/29] Update assets/stability_ai.yaml Co-authored-by: Rishi Bommasani <47439426+rishibommasani@users.noreply.github.com> --- assets/stability_ai.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/stability_ai.yaml b/assets/stability_ai.yaml index 443e5184..73eba3f9 100644 --- a/assets/stability_ai.yaml +++ b/assets/stability_ai.yaml @@ -100,7 +100,7 @@ dependencies: [TripoSR] training_emissions: Unknown training_time: Unknown - training_hardware: GPU with 7GB VRAM + training_hardware: unknown quality_control: Unknown access: open license: Stability AI Community From 3517dc03c84a91a68b3e106249207b6ca2363520 Mon Sep 17 00:00:00 2001 From: jxue16 <105090474+jxue16@users.noreply.github.com> Date: Fri, 27 Sep 2024 17:28:57 -0700 Subject: [PATCH 29/29] clean up changes --- assets/ai21.yaml | 4 +- assets/aleph_alpha.yaml | 2 +- assets/anthropic.yaml | 7 ++-- assets/aspia_space,_institu.yaml | 3 +- assets/evolutionaryscale.yaml | 12 +++--- .../{team_glm,_zhipu_ai,_.yaml => glm.yaml} | 8 ++-- assets/google.yaml | 2 +- assets/laion_e.v..yaml | 5 +-- assets/lg_ai_research.yaml | 9 +---- assets/meta.yaml | 4 +- assets/microsoft.yaml | 10 ++--- assets/mistral.yaml | 6 +-- assets/qwen_team.yaml | 20 ++-------- assets/roblox.yaml | 35 ----------------- assets/samba.yaml | 27 ------------- assets/stability_ai.yaml | 13 +++---- assets/stanford.yaml | 2 +- assets/together.yaml | 36 ++++++++++++++++++ assets/unknown.yaml | 38 ------------------- assets/writer.yaml | 2 +- assets/xai.yaml | 6 +-- js/main.js | 4 +- 22 files changed, 83 insertions(+), 172 deletions(-) rename assets/{team_glm,_zhipu_ai,_.yaml => glm.yaml} (90%) delete mode 100644 assets/roblox.yaml delete mode 100644 assets/unknown.yaml diff --git a/assets/ai21.yaml b/assets/ai21.yaml index 6ed488c3..0c9bb91b 100644 --- a/assets/ai21.yaml +++ b/assets/ai21.yaml @@ -319,7 +319,7 @@ monitoring: '' feedback: https://huggingface.co/ai21labs/Jamba-v0.1/discussions - type: model - name: Jamba 1.5 Open Model Family (Jamba 1.5 Mini, Jamba 1.5 Large) + name: Jamba 1.5 organization: AI21 description: A family of models that demonstrate superior long context handling, speed, and quality. Built on a novel SSM-Transformer architecture, they surpass @@ -342,7 +342,7 @@ and Jamba 1.5 Large used 8xA100 80GB GPUs. quality_control: The models were evaluated on the Arena Hard benchmark. For maintaining long context performance, they were tested on the RULER benchmark. - access: Open + access: open license: Jamba Open Model License intended_uses: The models are built for enterprise scale AI applications. They are purpose-built for efficiency, speed, and ability to solve critical tasks diff --git a/assets/aleph_alpha.yaml b/assets/aleph_alpha.yaml index e7acf73e..fe9cbecb 100644 --- a/assets/aleph_alpha.yaml +++ b/assets/aleph_alpha.yaml @@ -127,7 +127,7 @@ quality_control: The model comes with additional safety guardrails via alignment methods to ensure safe usage. Training data is carefully curated to ensure compliance with EU and national regulations. - access: Open + access: open license: Aleph Open intended_uses: The model is intended for use in domain-specific applications, particularly in the automotive and engineering industries. It can also be tailored diff --git a/assets/anthropic.yaml b/assets/anthropic.yaml index 31659d5a..1fe22119 100644 --- a/assets/anthropic.yaml +++ b/assets/anthropic.yaml @@ -625,7 +625,7 @@ quality_control: The model underwent a red-teaming assessment, and has been tested and refined by external experts. It was also provided to the UK's AI Safety Institute (UK AISI) for a pre-deployment safety evaluation. - access: Open + access: open license: unknown intended_uses: The model is intended for complex tasks such as context-sensitive customer support, orchestrating multi-step workflows, interpreting charts and @@ -633,8 +633,7 @@ code. prohibited_uses: Misuse of the model is discouraged though specific use cases are not mentioned. - monitoring: Unknown - of misuse, and policy feedback from external experts has been integrated to - ensure robustness of evaluations. + monitoring: Unknown of misuse, and policy feedback from external experts has been + integrated to ensure robustness of evaluations. feedback: Feedback on Claude 3.5 Sonnet can be submitted directly in-product to inform the development roadmap and improve user experience. diff --git a/assets/aspia_space,_institu.yaml b/assets/aspia_space,_institu.yaml index 89a74d92..5925ea60 100644 --- a/assets/aspia_space,_institu.yaml +++ b/assets/aspia_space,_institu.yaml @@ -14,12 +14,11 @@ url: https://arxiv.org/pdf/2405.14930v1 model_card: unknown modality: image; image - provide insights) analysis: The models’ performance on downstream tasks was evaluated by linear probing. The models follow a similar saturating log-log scaling law to textual models, their performance improves with the increase in model size up to the saturation point of parameters. - size: Ranges from 1 million to 2.1 billion parameters. + size: 2.1B parameters dependencies: [DESI Legacy Survey DR8] training_emissions: Unknown training_time: Unknown diff --git a/assets/evolutionaryscale.yaml b/assets/evolutionaryscale.yaml index 1ed26835..f3ec7171 100644 --- a/assets/evolutionaryscale.yaml +++ b/assets/evolutionaryscale.yaml @@ -12,25 +12,23 @@ created_date: 2024-06-25 url: https://www.evolutionaryscale.ai/blog/esm3-release model_card: unknown - modality: text; text - textual descriptions of proteins as outputs) + modality: text; image, text analysis: The model was tested in the generation of a new green fluorescent protein. Its effectiveness was compared to natural evolutionary processes, and it was deemed to simulate over 500 million years of evolution. size: 98B parameters (Dense) - dependencies: [ESM2(base model), largest dataset of proteins] + dependencies: [] training_emissions: Unknown training_time: Unknown - training_hardware: One of the highest throughput GPU clusters in the world. + training_hardware: unknown quality_control: The creators have put in place a responsible development framework to ensure transparency and accountability from the start. ESM3 was tested in the generation of a new protein, ensuring its quality and effectiveness. - access: Open + access: open license: Unknown intended_uses: To engineer biology from first principles. It functions as a tool for scientists to create proteins for various applications, including medicine, biology research, and clean energy. prohibited_uses: Unknown - monitoring: Unknown - though specific measures are not specified. + monitoring: Unknown though specific measures are not specified. feedback: Unknown diff --git a/assets/team_glm,_zhipu_ai,_.yaml b/assets/glm.yaml similarity index 90% rename from assets/team_glm,_zhipu_ai,_.yaml rename to assets/glm.yaml index 855dd563..bc9010ab 100644 --- a/assets/team_glm,_zhipu_ai,_.yaml +++ b/assets/glm.yaml @@ -12,22 +12,22 @@ which tool(s) to use. created_date: 2023-07-02 url: https://arxiv.org/pdf/2406.12793 - model_card: unknown + model_card: https://huggingface.co/THUDM/glm-4-9b modality: text; text analysis: Evaluations show that GLM-4, 1) closely rivals or outperforms GPT-4 in terms of general metrics such as MMLU, GSM8K, MATH, BBH, GPQA, and HumanEval, 2) gets close to GPT-4-Turbo in instruction following as measured by IFEval, 3) matches GPT-4 Turbo (128K) and Claude 3 for long context tasks, and 4) outperforms GPT-4 in Chinese alignments as measured by AlignBench. - size: From 6.2 billion parameters to 9 billion parameters and 130 billion parameters. - dependencies: [GPT models, GLM-10B, GLM-130B] + size: 9B parameters + dependencies: [] training_emissions: Unknown training_time: Unknown training_hardware: Unknown quality_control: High-quality alignment is achieved via a multi-stage post-training process, which involves supervised fine-tuning and learning from human feedback. access: Open - license: Unknown + license: Apache 2.0 intended_uses: General language modeling, complex tasks like accessing online information via web browsing and solving math problems using Python interpreter. prohibited_uses: Unknown diff --git a/assets/google.yaml b/assets/google.yaml index 01fb7809..9c0d6397 100644 --- a/assets/google.yaml +++ b/assets/google.yaml @@ -1870,7 +1870,7 @@ 2. The model has improvements in safety and efficiency over the first generation. The deployment of Gemma 2 on Vertex AI, scheduled for the next month, will offer effortless management of the model. - access: Open + access: open license: Gemma (commercially-friendly license given by Google DeepMind) intended_uses: Gemma 2 is designed for developers and researchers for various AI tasks. It can be used via the integrations it offers with other AI tools/platforms diff --git a/assets/laion_e.v..yaml b/assets/laion_e.v..yaml index 228208c3..42e6593d 100644 --- a/assets/laion_e.v..yaml +++ b/assets/laion_e.v..yaml @@ -25,15 +25,14 @@ quality_control: The model utilized lists of link and image hashes provided by partner organizations. These were used to remove inappropriate links from the original LAION-5B dataset to create Re-LAION-5B. - access: Open + access: open license: Apache 2.0 intended_uses: Re-LAION-5B is designed for research on language-vision learning. It can also be used by third parties to clean existing derivatives of LAION-5B by generating diffs and removing all matched content from their versions. prohibited_uses: The dataset should not be utilized for purposes that breach legal parameters or ethical standards, such as dealing with illegal content. - monitoring: This version is a response to continuous scrutiny & safety revisions. - It's also meant to allow inspection and validation by a broad community. + monitoring: unknown feedback: Problems with the dataset should be reported to the LAION organization. They have open lines for communication with their partners and the broader research community. diff --git a/assets/lg_ai_research.yaml b/assets/lg_ai_research.yaml index ceda7a44..0ad50d6d 100644 --- a/assets/lg_ai_research.yaml +++ b/assets/lg_ai_research.yaml @@ -16,19 +16,14 @@ EXAONE 3.0 was competitive in English and excellent in Korean compared to other large language models of a similar size. size: 7.8B parameters (dense) - dependencies: - - GQA - - SwiGLU - - Rotary Position Embeddings - - MeCab - - BBPE + dependencies: [MeCab] training_emissions: Unknown training_time: Unknown training_hardware: Unknown quality_control: Extensive pre-training on a diverse dataset, and advanced post-training techniques were employed to enhance instruction-following capabilities. The model was also trained to fully comply with data handling standards. - access: Open + access: open license: Unknown intended_uses: The model was intended for non-commercial and research purposes. The capabilities of the model allow for use cases that involve advanced AI and diff --git a/assets/meta.yaml b/assets/meta.yaml index f5f115c8..0b7653ec 100644 --- a/assets/meta.yaml +++ b/assets/meta.yaml @@ -865,8 +865,8 @@ text summarization, multilingual conversational agents, and coding assistants. It is the largest and most capable openly available foundation model. created_date: 2024-07-23 - url: https://ai.meta.com/blog/meta-llama-3-1/?utm_source=twitter&utm_medium=organic_social&utm_content=video&utm_campaign=llama31 - model_card: unknown + url: https://ai.meta.com/blog/meta-llama-3-1/ + model_card: https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/MODEL_CARD.md modality: text; text analysis: The model was evaluated on over 150 benchmark datasets that span a wide range of languages. An experimental evaluation suggests that the model is competitive diff --git a/assets/microsoft.yaml b/assets/microsoft.yaml index cf431631..137996a4 100644 --- a/assets/microsoft.yaml +++ b/assets/microsoft.yaml @@ -1003,23 +1003,23 @@ created_date: 2024-09-08 url: https://huggingface.co/microsoft/Phi-3.5-MoE-instruct model_card: https://huggingface.co/microsoft/Phi-3.5-MoE-instruct - modality: Unknown + modality: text; text analysis: The model was evaluated across a variety of public benchmarks, comparing with a set of models including Mistral-Nemo-12B-instruct-2407, Llama-3.1-8B-instruct, Gemma-2-9b-It, Gemini-1.5-Flash, and GPT-4o-mini-2024-07-18. It achieved a similar level of language understanding and math as much larger models. It also displayed superior performance in reasoning capability, even with only 6.6B active parameters. It was also evaluated for multilingual tasks. - size: 6.6B active parameters - dependencies: [Phi-3] + size: 61B parameters (sparse); 6.6B active parameters + dependencies: [Phi-3 dataset] training_emissions: Unknown training_time: Unknown training_hardware: Unknown quality_control: The model was enhanced through supervised fine-tuning, proximal policy optimization, and direct preference optimization processes for safety measures. - access: Open - license: Unknown + access: open + license: MIT intended_uses: The model is intended for commercial and research use in multiple languages. It is designed to accelerate research on language and multimodal models, and for use as a building block for generative AI powered features. diff --git a/assets/mistral.yaml b/assets/mistral.yaml index ce0bd91e..3c78ac23 100644 --- a/assets/mistral.yaml +++ b/assets/mistral.yaml @@ -122,7 +122,7 @@ quality_control: The model underwent an advanced fine-tuning and alignment phase. Various measures such as accuracy comparisons with other models and instruction-tuning were implemented to ensure its quality. - access: Open + access: open license: Apache 2.0 intended_uses: The model can be used for multilingual applications, understanding and generating natural language as well as source code, handling multi-turn @@ -153,7 +153,7 @@ training_time: Unknown training_hardware: Unknown quality_control: Unknown - access: Open + access: open license: Apache 2.0 intended_uses: The model is intended for code generation and can be utilized as a local code assistant. @@ -172,7 +172,7 @@ created_date: 2024-07-16 url: https://mistral.ai/news/mathstral/ model_card: unknown - modality: Text-to-text (presumed based on description) + modality: text; text analysis: The model's performance has been evaluated on the MATH and MMLU industry-standard benchmarks. It scored notably higher on both these tests than the base model Mistral 7B. diff --git a/assets/qwen_team.yaml b/assets/qwen_team.yaml index 61f34aa6..9f21ab6d 100644 --- a/assets/qwen_team.yaml +++ b/assets/qwen_team.yaml @@ -9,32 +9,20 @@ in terms of mathematical capabilities. created_date: 2024-08-08 url: https://qwenlm.github.io/blog/qwen2-math/ - model_card: unknown + model_card: https://huggingface.co/Qwen/Qwen2-Math-72B modality: text; text analysis: Models have been evaluated on a series of math benchmarks, demonstrating outperformance of the state-of-the-art models in both the English and Chinese language. - size: The size of the largest model in the Qwen2-Math series is 72B parameters. - dependencies: - - GSM8K - - Math - - MMLU-STEM - - CMATH - - GaoKao Math Cloze - - GaoKao Math QA - - OlympiadBench - - CollegeMath - - GaoKao - - AIME2024 - - AMC2023 - - CN Middle School 24 + size: 72B parameters + dependencies: [] training_emissions: Unknown training_time: Unknown training_hardware: Unknown quality_control: The models were tested with few-shot chain-of-thought prompting and evaluated across mathematical benchmarks in both English and Chinese. access: open - license: Unknown + license: Tongyi Qianwen intended_uses: These models are intended for solving complex mathematical problems. prohibited_uses: Uses that go against the ethical usage policies of Qwen Team. monitoring: Unknown diff --git a/assets/roblox.yaml b/assets/roblox.yaml deleted file mode 100644 index f19a5e75..00000000 --- a/assets/roblox.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -- type: model - name: Voice Safety Classifier - organization: Roblox - description: A large classification model for toxicity detection in voice chats. - The model is trained on a manually curated real-world dataset comprising 2,374 - hours of voice chat audio clips and was fine-tuned from the WavLM base plus - it offers. It classifies each piece of content across multiple labels such as - Profanity, DatingAndSexting, Racist, Bullying, Other, NoViolation. - created_date: 2024-09-08 - url: https://huggingface.co/Roblox/voice-safety-classifier - model_card: https://huggingface.co/Roblox/voice-safety-classifier - modality: Audio; Text - analysis: The model was evaluated on a dataset with human annotated labels containing - 9,795 samples. Binarized average precision is calculated for each of the toxicity - classes and reaches up to 94.48%. - size: Unknown - dependencies: - - WavLM base plus - - Python - - HuggingFace - - Voice chat audio clips - training_emissions: Unknown - training_time: Unknown - training_hardware: Unknown - quality_control: Manually curated real-world dataset to reflect actual usage. - Evaluated using human annotated samples. The model also calculates precision - for each of the classes. - access: open - license: Unknown - intended_uses: The model is intended to be used for detecting and classifying - toxicity in voice chat content. - prohibited_uses: Unknown - monitoring: Unknown - feedback: The feedback mechanism was not provided as part of the description. diff --git a/assets/samba.yaml b/assets/samba.yaml index 07092214..1776b9d2 100644 --- a/assets/samba.yaml +++ b/assets/samba.yaml @@ -57,30 +57,3 @@ prohibited_uses: '' monitoring: unknown feedback: none -- type: model - name: sarvam-2b - organization: sarvamAI - description: This is an early checkpoint of sarvam-2b, a small, yet powerful language - model pre-trained from scratch on 2 trillion tokens. It is designed to be proficient - in 10 Indic languages (Bengali, Gujarati, Hindi, Kannada, Malayalam, Marathi, - Oriya, Punjabi, Tamil, and Telugu) + English. - created_date: 2024-08-15 - url: https://huggingface.co/sarvamai/sarvam-2b-v0.5 - model_card: https://huggingface.co/sarvamai/sarvam-2b-v0.5 - modality: text; text - analysis: Analysis for the model is not yet provided; however, it has been reported - that more technical details like evaluations and benchmarking will be posted - soon. - size: Unknown - dependencies: [] - training_emissions: Unknown - training_time: Unknown - training_hardware: NVIDIA NeMo™ Framework, Yotta Shakti Cloud, HGX H100 systems. - quality_control: Unknown - access: Open - license: Unknown - intended_uses: The model can be used for text completion and supervised fine-tuning, - particularly in the languages it was trained on. - prohibited_uses: Unknown - monitoring: Unknown - feedback: Unknown diff --git a/assets/stability_ai.yaml b/assets/stability_ai.yaml index 73eba3f9..3ff8188c 100644 --- a/assets/stability_ai.yaml +++ b/assets/stability_ai.yaml @@ -18,14 +18,13 @@ dependencies: [] training_emissions: Unknown training_time: Unknown - training_hardware: NVIDIA RTX GPUs, TensorRT, AMD’s APUs, consumer GPUs and MI-300X - Enterprise GPUs + training_hardware: unknown quality_control: They have conducted extensive internal and external testing of this model and have implemented numerous safeguards to prevent harms. Safety measures were implemented from the start of training the model and continued throughout testing, evaluation, and deployment. access: open - license: Stability Non-Commercial Research Community License + license: Stability Community License intended_uses: The model can be used by professional artists, designers, developers, and AI enthusiasts for creating high-quality image outputs from text inputs. prohibited_uses: Large-scale commercial use requires contacting the organization @@ -63,7 +62,7 @@ working to refine and optimize the model beyond the current synthetic datasets it has been trained on. access: open - license: Stable AI License + license: Stability Community License intended_uses: This model can be used for creating dynamic multi-angle videos, with applications in game development, video editing, and virtual reality. It allows professionals in these fields to visualize objects from multiple angles, @@ -88,7 +87,7 @@ professions. created_date: 2024-08-01 url: https://stability.ai/news/introducing-stable-fast-3d - model_card: unknown + model_card: https://huggingface.co/stabilityai/stable-fast-3d modality: image; 3D analysis: The model was evaluated on its ability to quickly and accurately transform a single image into a detailed 3D asset. This evaluation highlighted the model's @@ -96,14 +95,14 @@ in 3D work. Compared to the previous SV3D model, Stable Fast 3D offers significantly reduced inference times--0.5 seconds versus 10 minutes--while maintaining high-quality output. - size: Unknown + size: unknown dependencies: [TripoSR] training_emissions: Unknown training_time: Unknown training_hardware: unknown quality_control: Unknown access: open - license: Stability AI Community + license: Stability Community License intended_uses: The model is intended for use in game development, virtual reality, retail, architecture, design and other graphically intense professions. It allows for rapid prototyping in 3D work, assisting both enterprises and indie developers. diff --git a/assets/stanford.yaml b/assets/stanford.yaml index 6feaa27d..c171b063 100644 --- a/assets/stanford.yaml +++ b/assets/stanford.yaml @@ -156,7 +156,7 @@ created_date: 2024-09-08 url: https://arxiv.org/pdf/2406.06512 model_card: unknown - modality: Image; text + modality: image; text analysis: Merlin has been comprehensively evaluated on 6 task types and 752 individual tasks. The non-adapted (off-the-shelf) tasks include zero-shot findings classification, phenotype classification, and zero-shot cross-modal retrieval, while model adapted diff --git a/assets/together.yaml b/assets/together.yaml index 7defabd2..8610083c 100644 --- a/assets/together.yaml +++ b/assets/together.yaml @@ -222,3 +222,39 @@ prohibited_uses: '' monitoring: '' feedback: https://huggingface.co/togethercomputer/StripedHyena-Nous-7B/discussions +- type: model + name: Dragonfly + organization: Together + description: A large vision-language model with multi-resolution zoom that enhances + fine-grained visual understanding and reasoning about image regions. The Dragonfly + model comes in two variants, the general-domain model ("Llama-3-8b-Dragonfly-v1") + trained on 5.5 million image-instruction pairs, and the biomedical variant ("Llama-3-8b-Dragonfly-Med-v1") + fine-tuned on an additional 1.4 million biomedical image-instruction pairs. + Dragonfly demonstrates promising performance on vision-language benchmarks like + commonsense visual QA and image captioning. + created_date: 2024-06-06 + url: https://www.together.ai/blog/dragonfly-v1 + model_card: unknown + modality: image, text; text + analysis: The model was evaluated using five popular vision-language benchmarks + that require strong commonsense reasoning and detailed image understanding, + AI2D, ScienceQA, MMMU, MMVet, and POPE. It demonstrated competitive performance + in these evaluations compared to other vision-language models. + size: 8B parameters + dependencies: [LLaMA] + training_emissions: unknown + training_time: unknown + training_hardware: unknown + quality_control: The model employs two key strategies (multi-resolution visual + encoding and zoom-in patch selection) that enable it to efficiently focus on + fine-grained details in image regions and provide better commonsense reasoning. + Its performance was evaluated on several benchmark tasks for quality assurance. + access: open + license: unknown + intended_uses: Dragonfly is designed for image-text tasks, including commonsense + visual question answering and image captioning. It is further focused on tasks + that require fine-grained understanding of high-resolution image regions, such + as in medical imaging. + prohibited_uses: Unknown + monitoring: Unknown + feedback: Unknown diff --git a/assets/unknown.yaml b/assets/unknown.yaml deleted file mode 100644 index ecc09eb5..00000000 --- a/assets/unknown.yaml +++ /dev/null @@ -1,38 +0,0 @@ ---- -- type: model - name: Dragonfly - organization: Unknown (Team members include, Kezhen Chen, Rahul Thapa, Rahul Chalamala, - Ben Athiwaratkun, Shuaiwen Leon Song, James Zou) - description: A large vision-language model with multi-resolution zoom that enhances - fine-grained visual understanding and reasoning about image regions. The Dragonfly - model comes in two variants, the general-domain model ("Llama-3-8b-Dragonfly-v1") - trained on 5.5 million image-instruction pairs, and the biomedical variant ("Llama-3-8b-Dragonfly-Med-v1") - fine-tuned on an additional 1.4 million biomedical image-instruction pairs. - Dragonfly demonstrates promising performance on vision-language benchmarks like - commonsense visual QA and image captioning. - created_date: 2024-06-06 - url: https://www.together.ai/blog/dragonfly-v1 - model_card: unknown - modality: Image; text - analysis: The model was evaluated using five popular vision-language benchmarks - that require strong commonsense reasoning and detailed image understanding, - AI2D, ScienceQA, MMMU, MMVet, and POPE. It demonstrated competitive performance - in these evaluations compared to other vision-language models. - size: 8B parameters (Unknown if sparse or dense) - dependencies: [LLaMA] - training_emissions: Unknown - training_time: Unknown - training_hardware: Unknown - quality_control: The model employs two key strategies (multi-resolution visual - encoding and zoom-in patch selection) that enable it to efficiently focus on - fine-grained details in image regions and provide better commonsense reasoning. - Its performance was evaluated on several benchmark tasks for quality assurance. - access: open - license: Unknown - intended_uses: Dragonfly is designed for image-text tasks, including commonsense - visual question answering and image captioning. It is further focused on tasks - that require fine-grained understanding of high-resolution image regions, such - as in medical imaging. - prohibited_uses: Unknown - monitoring: Unknown - feedback: Unknown diff --git a/assets/writer.yaml b/assets/writer.yaml index 77179848..f50caa93 100644 --- a/assets/writer.yaml +++ b/assets/writer.yaml @@ -66,7 +66,7 @@ achieved state-of-the-art results with an average score of 85.9%. It also demonstrated robust capability in efficiently processing extensive medical documents, as showcased by its near-perfect score in the NIH evaluation. - size: Unknown + size: 70B parameters dependencies: [Palmyra-X-004] training_emissions: Unknown training_time: Unknown diff --git a/assets/xai.yaml b/assets/xai.yaml index 8e8a1857..8e099c99 100644 --- a/assets/xai.yaml +++ b/assets/xai.yaml @@ -72,15 +72,15 @@ performance levels competitive to other frontier models in areas such as graduate-level science knowledge (GPQA), general knowledge (MMLU, MMLU-Pro), and math competition problems (MATH). - size: Unknown - dependencies: [Grok-1.5, FLUX.1] + size: unknown + dependencies: [] training_emissions: Unknown training_time: Unknown training_hardware: Unknown quality_control: Grok-2 models were tested in real-world scenarios using AI tutors that engaged with the models across a variety of tasks and selected the superior response based on specific criteria outlined in the guidelines. - access: Limited + access: limited license: Unknown intended_uses: The model is intended to be used for understanding text and vision, answering questions, collaborating on writing, solving coding tasks, and enhancing diff --git a/js/main.js b/js/main.js index 90c545b0..595c652f 100644 --- a/js/main.js +++ b/js/main.js @@ -641,13 +641,11 @@ function loadAssetsAndRenderPageContent() { 'assets/lg_ai_research.yaml', 'assets/mila.yaml', 'assets/qwen_team.yaml', - 'assets/roblox.yaml', 'assets/runway_ai,_inc..yaml', 'assets/soochow.yaml', 'assets/baichuan.yaml', 'assets/stability_ai.yaml', - 'assets/team_glm,_zhipu_ai,_.yaml', - 'assets/unknown.yaml', + 'assets/glm.yaml', 'assets/xwin.yaml', 'assets/mistral.yaml', 'assets/adobe.yaml',