From a404acec1fee392d1c55f3b6cbe18267dff3cbc1 Mon Sep 17 00:00:00 2001
From: jxue16 <105090474+jxue16@users.noreply.github.com>
Date: Fri, 7 Jul 2023 18:42:48 -0700
Subject: [PATCH 01/10] add all assets

---
 assets/ares.yaml          | 25 +++++++++++++++++++++++++
 assets/autogpt.yaml       | 23 +++++++++++++++++++++++
 assets/bark.yaml          | 24 ++++++++++++++++++++++++
 assets/bedrock.yaml       | 24 ++++++++++++++++++++++++
 assets/beit.yaml          | 25 +++++++++++++++++++++++++
 assets/bigtrans.yaml      | 25 +++++++++++++++++++++++++
 assets/biomedgpt.yaml     | 30 ++++++++++++++++++++++++++++++
 assets/camel.yaml         | 27 +++++++++++++++++++++++++++
 assets/cformers.yaml      | 23 +++++++++++++++++++++++
 assets/chatglm.yaml       | 24 ++++++++++++++++++++++++
 assets/corgi.yaml         | 27 +++++++++++++++++++++++++++
 assets/cosmo.yaml         | 25 +++++++++++++++++++++++++
 assets/deepfloyd.yaml     | 26 ++++++++++++++++++++++++++
 assets/docugami.yaml      | 24 ++++++++++++++++++++++++
 assets/falcon.yaml        | 25 +++++++++++++++++++++++++
 assets/goat.yaml          | 27 +++++++++++++++++++++++++++
 assets/gorilla.yaml       | 27 +++++++++++++++++++++++++++
 assets/guanaco.yaml       | 26 ++++++++++++++++++++++++++
 assets/h2ogpt.yaml        | 23 +++++++++++++++++++++++
 assets/inflection-1.yaml  | 25 +++++++++++++++++++++++++
 assets/jointslm.yaml      | 25 +++++++++++++++++++++++++
 assets/koala.yaml         | 24 ++++++++++++++++++++++++
 assets/legomt.yaml        | 24 ++++++++++++++++++++++++
 assets/magma.yaml         | 24 ++++++++++++++++++++++++
 assets/mpt.yaml           | 24 ++++++++++++++++++++++++
 assets/multimodal.yaml    | 24 ++++++++++++++++++++++++
 assets/nextdoor.yaml      | 24 ++++++++++++++++++++++++
 assets/openflamingo.yaml  | 25 +++++++++++++++++++++++++
 assets/openfold.yaml      | 23 +++++++++++++++++++++++
 assets/openllama.yaml     | 23 +++++++++++++++++++++++
 assets/otter.yaml         | 27 +++++++++++++++++++++++++++
 assets/palm2.yaml         | 30 ++++++++++++++++++++++++++++++
 assets/palmyra.yaml       | 26 ++++++++++++++++++++++++++
 assets/peer.yaml          | 28 ++++++++++++++++++++++++++++
 assets/pi.yaml            | 23 +++++++++++++++++++++++
 assets/portkey.yaml       | 23 +++++++++++++++++++++++
 assets/pythia.yaml        | 25 +++++++++++++++++++++++++
 assets/redpajama.yaml     | 24 ++++++++++++++++++++++++
 assets/refinedweb.yaml    | 25 +++++++++++++++++++++++++
 assets/roentgen.yaml      | 25 +++++++++++++++++++++++++
 assets/santacoder.yaml    | 24 ++++++++++++++++++++++++
 assets/soda.yaml          | 26 ++++++++++++++++++++++++++
 assets/stablelm.yaml      | 23 +++++++++++++++++++++++
 assets/stack.yaml         | 26 ++++++++++++++++++++++++++
 assets/starcoder.yaml     | 28 ++++++++++++++++++++++++++++
 assets/transformify.yaml  | 23 +++++++++++++++++++++++
 assets/underwritegpt.yaml | 22 ++++++++++++++++++++++
 assets/unilm.yaml         | 24 ++++++++++++++++++++++++
 assets/vicuna.yaml        | 24 ++++++++++++++++++++++++
 assets/voicebox.yaml      | 25 +++++++++++++++++++++++++
 assets/vqganclip.yaml     | 25 +++++++++++++++++++++++++
 assets/wizardlm.yaml      | 28 ++++++++++++++++++++++++++++
 52 files changed, 1299 insertions(+)
 create mode 100644 assets/ares.yaml
 create mode 100644 assets/autogpt.yaml
 create mode 100644 assets/bark.yaml
 create mode 100644 assets/bedrock.yaml
 create mode 100644 assets/beit.yaml
 create mode 100644 assets/bigtrans.yaml
 create mode 100644 assets/biomedgpt.yaml
 create mode 100644 assets/camel.yaml
 create mode 100644 assets/cformers.yaml
 create mode 100644 assets/chatglm.yaml
 create mode 100644 assets/corgi.yaml
 create mode 100644 assets/cosmo.yaml
 create mode 100644 assets/deepfloyd.yaml
 create mode 100644 assets/docugami.yaml
 create mode 100644 assets/falcon.yaml
 create mode 100644 assets/goat.yaml
 create mode 100644 assets/gorilla.yaml
 create mode 100644 assets/guanaco.yaml
 create mode 100644 assets/h2ogpt.yaml
 create mode 100644 assets/inflection-1.yaml
 create mode 100644 assets/jointslm.yaml
 create mode 100644 assets/koala.yaml
 create mode 100644 assets/legomt.yaml
 create mode 100644 assets/magma.yaml
 create mode 100644 assets/mpt.yaml
 create mode 100644 assets/multimodal.yaml
 create mode 100644 assets/nextdoor.yaml
 create mode 100644 assets/openflamingo.yaml
 create mode 100644 assets/openfold.yaml
 create mode 100644 assets/openllama.yaml
 create mode 100644 assets/otter.yaml
 create mode 100644 assets/palm2.yaml
 create mode 100644 assets/palmyra.yaml
 create mode 100644 assets/peer.yaml
 create mode 100644 assets/pi.yaml
 create mode 100644 assets/portkey.yaml
 create mode 100644 assets/pythia.yaml
 create mode 100644 assets/redpajama.yaml
 create mode 100644 assets/refinedweb.yaml
 create mode 100644 assets/roentgen.yaml
 create mode 100644 assets/santacoder.yaml
 create mode 100644 assets/soda.yaml
 create mode 100644 assets/stablelm.yaml
 create mode 100644 assets/stack.yaml
 create mode 100644 assets/starcoder.yaml
 create mode 100644 assets/transformify.yaml
 create mode 100644 assets/underwritegpt.yaml
 create mode 100644 assets/unilm.yaml
 create mode 100644 assets/vicuna.yaml
 create mode 100644 assets/voicebox.yaml
 create mode 100644 assets/vqganclip.yaml
 create mode 100644 assets/wizardlm.yaml

diff --git a/assets/ares.yaml b/assets/ares.yaml
new file mode 100644
index 00000000..3af17a66
--- /dev/null
+++ b/assets/ares.yaml
@@ -0,0 +1,25 @@
+---
+
+- type: application
+  name: ARES
+  organization: Faraday Lab
+  description: ARES is a text-to-image generator based on Stable Diffusion. The goal is to provide a simple tool with a user interface allowing mainstream AI access for artists and creators.
+  created_date:
+    value: 2023-04-26
+    explanation: The date Faraday Lab's hugging face card was last updated. Date
+      for ARES release is not published on site.
+  url: https://faradaylab.fr/
+  dependencies: [Stable Diffusion]
+  adaptation: ''
+  output_space: generated images
+  quality_control: ''
+  access: open
+  license: unknown
+  terms_of_service: ''
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
+  user_distribution: ''
+  failures: ''
diff --git a/assets/autogpt.yaml b/assets/autogpt.yaml
new file mode 100644
index 00000000..5a3a040f
--- /dev/null
+++ b/assets/autogpt.yaml
@@ -0,0 +1,23 @@
+---
+
+- type: application
+  name: Auto-GPT
+  organization: Auto-GPT
+  description: Auto-GPT is an experimental open-source application showcasing the
+    capabilities of the GPT-4 language model.
+  created_date: 2023-04-16
+  url: https://news.agpt.co/
+  dependencies: [GPT-4 API]
+  adaptation: GPT-4 adapted to run autonomously by chaining together LLM "thoughts"
+  output_space: text
+  quality_control: ''
+  access: open
+  license: MIT
+  terms_of_service: ''
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
+  user_distribution: ''
+  failures: ''
diff --git a/assets/bark.yaml b/assets/bark.yaml
new file mode 100644
index 00000000..6e277fb6
--- /dev/null
+++ b/assets/bark.yaml
@@ -0,0 +1,24 @@
+---
+
+- type: model
+  name: Bark
+  organization: Suno
+  description: Bark is a text-to-audio model that can generate multilingual speech
+    as well as other noises.
+  created_date: 2023-04-20
+  url: https://github.com/suno-ai/bark
+  model_card: https://github.com/suno-ai/bark/blob/main/model-card.md
+  modality: text input with audio output
+  analysis: ''
+  size: ''
+  dependencies: [AudioLM]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license: MIT
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/spaces/suno/bark/discussions
diff --git a/assets/bedrock.yaml b/assets/bedrock.yaml
new file mode 100644
index 00000000..1022592a
--- /dev/null
+++ b/assets/bedrock.yaml
@@ -0,0 +1,24 @@
+---
+
+- type: application
+  name: Bedrock
+  organization: Amazon
+  description: Bedrock is a new service that makes FMs from AI21 Labs, Anthropic, Stability AI, and Amazon accessible via an API. Bedrock is intended for customers to build and scale generative AI-based applications using FMs, democratizing access for all builders.
+    using an API.
+  created_date: 2023-04-13
+  url: https://aws.amazon.com/bedrock/
+  dependencies: [Jurassic-2, Claude, Stable Diffusion, Amazon Titan]
+  adaptation: ''
+  output_space: foundation models made accessible via an API
+  quality_control: ''
+  access: limited
+  license: unknown
+  terms_of_service: https://aws.amazon.com/service-terms/
+  intended_uses: allowing companies to incorporate generative AI into their business
+    models
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
+  user_distribution: ''
+  failures: ''
diff --git a/assets/beit.yaml b/assets/beit.yaml
new file mode 100644
index 00000000..adabec71
--- /dev/null
+++ b/assets/beit.yaml
@@ -0,0 +1,25 @@
+---
+
+- type: model
+  name: BEiT-3
+  organization: Microsoft
+  description: BEiT-3 is a general-purpose multimodal foundation model for vision
+    and vision-language tasks.
+  created_date: 2022-08-31
+  url: https://arxiv.org/pdf/2208.10442.pdf
+  model_card: ''
+  modality: text and image input and output
+  analysis: Evaluated on a range of standardized vision benchmarks, and achieves
+    state of the art performance on all experimentally.
+  size: 1.9B parameters
+  dependencies: [Multiway Transformer network]
+  training_emissions: unknown
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license: ''
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/bigtrans.yaml b/assets/bigtrans.yaml
new file mode 100644
index 00000000..0634f801
--- /dev/null
+++ b/assets/bigtrans.yaml
@@ -0,0 +1,25 @@
+---
+
+- type: model
+  name: BigTrans
+  organization: Institute of Automation Chinese Academy of Sciences
+  description: BigTrans is a model which adapts LLaMA that covers only 20 languages
+    and enhances it with multilingual translation capability on more than 100 languages
+  created_date: 2023-05-29
+  url: https://arxiv.org/pdf/2305.18098v1.pdf
+  model_card: https://huggingface.co/James-WYang/BigTrans
+  modality: text
+  analysis: Reports results on standard translation benchmarks across 102 languages
+    in comparison with Google Translate and ChatGPT
+  size: 13B parameters (dense model)
+  dependencies: [LLaMA, CLUE, BigTrans parallel dataset]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: 16 A100 GPUs with 80 GB of RAM
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: Advancing future research in multilingual LLMs
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/James-WYang/BigTrans/discussions
diff --git a/assets/biomedgpt.yaml b/assets/biomedgpt.yaml
new file mode 100644
index 00000000..9e8df418
--- /dev/null
+++ b/assets/biomedgpt.yaml
@@ -0,0 +1,30 @@
+---
+
+- type: model
+  name: BiomedGPT
+  organization: Lehigh University
+  description: BiomedGPT leverages self-supervision on large and diverse datasets
+    to accept multi-modal inputs and perform a range of downstream tasks.
+  created_date: 2023-05-26
+  url: https://arxiv.org/pdf/2305.17100.pdf
+  model_card: ''
+  modality: image and text input, text output
+  analysis: outperforms majority of preceding state-of-the-art models over 15 unique
+    biomedical modalities.
+  size: 472M parameters
+  dependencies:
+    - GPT-style autoregressive decoder
+    - BiomedGPT biomedical datasets
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: 10 NVIDIA A5000 GPUs
+  quality_control: No specific quality control is mentioned in model training, though
+    details on data processing and how the model was trained are provided in the
+    paper.
+  access: open
+  license: Apache 2.0
+  intended_uses: furthering research in developing unified and generalist models
+    for biomedicine.
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/camel.yaml b/assets/camel.yaml
new file mode 100644
index 00000000..3ac188dd
--- /dev/null
+++ b/assets/camel.yaml
@@ -0,0 +1,27 @@
+---
+
+- type: model
+  name: Camel
+  organization: Writer
+  description: Camel is an instruction-following large language model tailored
+    for advanced NLP and comprehension capabilities.
+  created_date:
+    value: 2023-04-01
+    explanation: The model was stated to be published in April, but which day is
+      not specified on the website.
+  url: https://chatcamel.vercel.app/
+  model_card: https://huggingface.co/Writer/camel-5b-hf
+  modality: text
+  analysis: ''
+  size: 5B parameters
+  dependencies: [Palmyra, Camel dataset]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/Writer/camel-5b-hf/discussions
diff --git a/assets/cformers.yaml b/assets/cformers.yaml
new file mode 100644
index 00000000..f2b8fe84
--- /dev/null
+++ b/assets/cformers.yaml
@@ -0,0 +1,23 @@
+---
+
+- type: application
+  name: Cformers
+  organization: Nolano
+  description: Cformers is a set of transformers that act as an API for AI inference
+    in code.
+  created_date: 2023-03-19
+  url: https://www.nolano.org/services/Cformers/
+  dependencies: []
+  adaptation: ''
+  output_space: ''
+  quality_control: ''
+  access: limited
+  license: MIT
+  terms_of_service: ''
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
+  user_distribution: ''
+  failures: ''
diff --git a/assets/chatglm.yaml b/assets/chatglm.yaml
new file mode 100644
index 00000000..f41b3a75
--- /dev/null
+++ b/assets/chatglm.yaml
@@ -0,0 +1,24 @@
+---
+
+- type: model
+  name: ChatGLM
+  organization: ChatGLM
+  description: ChatGLM is a Chinese-English language model with question and answer
+    and dialogue functions, and is aimed at a Chinese audience.
+  created_date: 2023-03-14
+  url: https://chatglm.cn/blog
+  model_card: ''
+  modality: text
+  analysis: Performance evaluated on English and Chinese language benchmark tests.
+  size: 6B parameters
+  dependencies: [GLM-130B]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/corgi.yaml b/assets/corgi.yaml
new file mode 100644
index 00000000..968b1228
--- /dev/null
+++ b/assets/corgi.yaml
@@ -0,0 +1,27 @@
+---
+
+- type: model
+  name: CORGI
+  organization: Stanford
+  description: Model trained to generate language corrections for physical control
+    tasks.
+  created_date: 2023-06-12
+  url: https://arxiv.org/pdf/2306.07012.pdf
+  model_card: ''
+  modality: trajectory input with text output
+  analysis: Evaluated on three physical control tasks, drawing, steering, and human
+    body movement on various dynamics
+  size: 124M parameters
+  dependencies: [GPT-2, BABEL, text-davinci-003]
+  training_emissions: ''
+  training_time: 
+    value: unknown
+    explanation: The authors do not report the training time, but do report that they train for 200 epochs.
+  training_hardware: one NVIDIA A40 GPU
+  quality_control: ''
+  access: open
+  license: MIT
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/cosmo.yaml b/assets/cosmo.yaml
new file mode 100644
index 00000000..6b5230a4
--- /dev/null
+++ b/assets/cosmo.yaml
@@ -0,0 +1,25 @@
+---
+
+- type: model
+  name: COSMO
+  organization: Allen Institute for AI
+  description: COSMO is a conversation agent with greater generalizability on both
+    in- and out-of-domain chitchat datasets
+  created_date: 2023-05-24
+  url: https://arxiv.org/pdf/2212.10465.pdf
+  model_card: https://huggingface.co/allenai/cosmo-xl
+  modality: text
+  analysis: Evaluated by human testers on generalization capabilities and responses
+    compared to other chatbots.
+  size: 11B parameters
+  dependencies: [SODA, ProsocialDialog, T5]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: v3-128 TPU accelerators with batch size 256
+  quality_control: ''
+  access: open
+  license: ''
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/allenai/cosmo-xl/discussions
diff --git a/assets/deepfloyd.yaml b/assets/deepfloyd.yaml
new file mode 100644
index 00000000..b04b90ae
--- /dev/null
+++ b/assets/deepfloyd.yaml
@@ -0,0 +1,26 @@
+---
+
+- type: model
+  name: DeepFloyd IF
+  organization: Stability AI
+  description: A text-to-image cascaded pixel diffusion model released in conjunction
+    with AI research lab DeepFloyd.
+  created_date: 2023-04-28
+  url: https://stability.ai/blog/deepfloyd-if-text-to-image-model
+  model_card: https://huggingface.co/DeepFloyd/IF-I-XL-v1.0
+  modality: textual input with image output
+  analysis: Evaluated on the COCO dataset.
+  size: 4.3B parameters
+  dependencies: [LAION-5B]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license:
+    value: custom
+    explanation: License agreement can be found on model card https://huggingface.co/DeepFloyd/IF-I-XL-v1.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/DeepFloyd/IF-I-XL-v1.0/discussions
diff --git a/assets/docugami.yaml b/assets/docugami.yaml
new file mode 100644
index 00000000..b1b56bf6
--- /dev/null
+++ b/assets/docugami.yaml
@@ -0,0 +1,24 @@
+---
+
+- type: model
+  name: Docugami
+  organization: Microsoft
+  description: Docugami is a LLM focused on writing business documents and data
+    using generative AI.
+  created_date: 2021-04-12
+  url: https://www.docugami.com/generative-ai
+  model_card: ''
+  modality: text
+  analysis: ''
+  size: 20B parameters
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: ''
+  quality_control: ''
+  access: limited
+  license: ''
+  intended_uses: analyzing, writing, and connecting business documents and data
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/falcon.yaml b/assets/falcon.yaml
new file mode 100644
index 00000000..b1fede02
--- /dev/null
+++ b/assets/falcon.yaml
@@ -0,0 +1,25 @@
+---
+
+- type: model
+  name: Falcon
+  organization: UAE Technology Innovation Institute
+  description: Falcon-40B is a 40B parameters causal decoder-only model built by
+    TII and trained on 1,000B tokens of RefinedWeb enhanced with curated corpora.
+  created_date: 2023-06-14
+  url: https://huggingface.co/tiiuae/falcon-40b
+  model_card: https://huggingface.co/tiiuae/falcon-40b
+  modality: natural language text
+  analysis: ''
+  size: 40B parameters
+  dependencies: [RefinedWeb]
+  training_emissions: ''
+  training_time: 2 months
+  training_hardware: 384 A100 40GB GPUs
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: irresponsible or harmful use or production use without adequate
+    assessment of risks and mitigation
+  monitoring: None
+  feedback: https://huggingface.co/tiiuae/falcon-40b/discussions
diff --git a/assets/goat.yaml b/assets/goat.yaml
new file mode 100644
index 00000000..5e0ab85c
--- /dev/null
+++ b/assets/goat.yaml
@@ -0,0 +1,27 @@
+---
+
+- type: model
+  name: GOAT
+  organization: National University of Singapore
+  description: GOAT is a fine-tuned LLaMA model which uses the tokenization of numbers
+    to significantly outperform benchmark standards on a range of arithmetic tasks.
+  created_date: 2023-05-23
+  url: https://arxiv.org/pdf/2305.14201.pdf
+  model_card: none
+  modality: textual input, numerical data output
+  analysis: Performance assessed on BIG-bench arithmetic sub-task, and various elementary
+    arithmetic tasks.
+  size: 7B parameters
+  dependencies: [LLaMA, GOAT dataset]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: 24 GB VRAM GPU
+  quality_control: Number data is randomly generated from log space to reduce likelihood
+    of redundancy and range of magnitudes.
+  access: open
+  license: Apache 2.0
+  intended_uses: Integration into other instruction-tuned LLMs to further enhance
+    arithmetic reasoning abilities in solving math word problems.
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/gorilla.yaml b/assets/gorilla.yaml
new file mode 100644
index 00000000..d6c440d4
--- /dev/null
+++ b/assets/gorilla.yaml
@@ -0,0 +1,27 @@
+---
+
+- type: model
+  name: Gorilla
+  organization: UC Berkeley
+  description: Gorilla is a finetuned LLaMA-based model that surpasses the performance
+    of GPT-4 on writing API calls.
+  created_date: 2023-05-24
+  url: https://arxiv.org/pdf/2305.15334v1.pdf
+  model_card: ''
+  modality: outputs API from natural language input
+  analysis: Evaluated using AST sub-tree matching technique and compared to other
+    models in terms of API functionality accuracy.
+  size: 7B parameters
+  dependencies: [LLaMA, Gorilla document retriever]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: No specific quality control is mentioned in model training, though
+    details on data processing and collection are provided in the paper.
+  access: open
+  license: Apache 2.0
+  intended_uses: In conjunction with a LLM to improve its capability for using API
+    calls.
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/guanaco.yaml b/assets/guanaco.yaml
new file mode 100644
index 00000000..32208f00
--- /dev/null
+++ b/assets/guanaco.yaml
@@ -0,0 +1,26 @@
+---
+
+- type: model
+  name: Guanaco
+  organization: University of Washington
+  description: Guanaco is a model family trained with QLORA, an efficient finetuning
+    approach that reduces memory usage enough to finetune a 65B parameter model
+    on a single 48GB GPU while preserving full 16-bit finetuning task performance.
+  created_date: 2023-05-23
+  url: https://arxiv.org/pdf/2305.14314v1.pdf
+  model_card: ''
+  modality: natural language text
+  analysis: Reports results on the Vicuna benchmark and compares performance level
+    and time expenditure with ChatGPT
+  size: 33B parameters
+  dependencies: [QLoRA, OASST1]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: A single 24 GB GPU
+  quality_control: ''
+  access: open
+  license: MIT
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/h2ogpt.yaml b/assets/h2ogpt.yaml
new file mode 100644
index 00000000..601e629d
--- /dev/null
+++ b/assets/h2ogpt.yaml
@@ -0,0 +1,23 @@
+---
+
+- type: model
+  name: h2oGPT
+  organization: H2O AI
+  description: Series of models fine-tuned on well-known LLMs using the h2oGPT repositories.
+  created_date: 2023-06-16
+  url: https://arxiv.org/pdf/2306.08161.pdf
+  model_card: https://huggingface.co/h2oai/h2ogpt-oasst1-512-20b
+  modality: text
+  analysis: Evaluated on EleutherAI evaluation harness.
+  size: 20B parameters
+  dependencies: [GPT-NeoX, H2O AI OpenAssistant, h2oGPT Repositories]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unspecified number of 48GB A100 NVIDIA GPUs
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/h2oai/h2ogpt-oasst1-512-20b/discussions
diff --git a/assets/inflection-1.yaml b/assets/inflection-1.yaml
new file mode 100644
index 00000000..1af3f04e
--- /dev/null
+++ b/assets/inflection-1.yaml
@@ -0,0 +1,25 @@
+---
+
+- type: model
+  name: Inflection-1
+  organization: Inflection AI
+  description: Inflection AI's in-house LLM, which powers Pi and will soon be available via Inflection AI's conversational API.
+  created_date: 2023-06-22
+  url: https://inflection.ai/inflection-1
+  model_card: ''
+  modality: text
+  analysis: Evaluated on wide range of language benchmarks like MMLU 5-shot, GSM-8K, and HellaSwag 10-shot among others.
+  size:
+    value: unknown
+    explanation: Designed to be roughly around Chat-GPT's size (175B parameters).
+  dependencies: []
+  training_emissions: ''
+  training_time: ''
+  training_hardware: unknown
+  quality_control: ''
+  access: limited
+  license: unknown
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/jointslm.yaml b/assets/jointslm.yaml
new file mode 100644
index 00000000..0465fc82
--- /dev/null
+++ b/assets/jointslm.yaml
@@ -0,0 +1,25 @@
+---
+
+- type: model
+  name: Google Joint SLM
+  organization: Google
+  description: Joint speech and language model using a Speech2Text adapter and using
+    a CTC-based blank-filtering.
+  created_date: 2023-06-08
+  url: https://arxiv.org/pdf/2306.07944.pdf
+  model_card: ''
+  modality: speech input with textual output
+  analysis: evaluated on DSTC11 Challenge Task, based on MultiWoz 2.1, with a focus
+    on dialog state tracking.
+  size: ''
+  dependencies: [CTC blank-filtering, Speech2Text adapter]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license: Google
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/koala.yaml b/assets/koala.yaml
new file mode 100644
index 00000000..03b456c7
--- /dev/null
+++ b/assets/koala.yaml
@@ -0,0 +1,24 @@
+---
+
+- type: model
+  name: Koala
+  organization: Berkeley
+  description: A relatively small chatbot trained by fine-tuning Meta’s LLaMA on
+    dialogue data gathered from the web.
+  created_date: 2023-04-03
+  url: https://bair.berkeley.edu/blog/2023/04/03/koala/
+  model_card: https://huggingface.co/TheBloke/koala-7B-GPTQ-4bit-128g
+  modality: natural language text
+  analysis: Evaluated in comparison with ChatGPT and Stanford Alpaca.
+  size: 13B parameters
+  dependencies: [LLaMA, web-scraped dialogue data]
+  training_emissions: ''
+  training_time: 6 hours
+  training_hardware: 8 A100 GPUs
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: academic research
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/TheBloke/koala-7B-GPTQ-4bit-128g/discussions
diff --git a/assets/legomt.yaml b/assets/legomt.yaml
new file mode 100644
index 00000000..fb7a3353
--- /dev/null
+++ b/assets/legomt.yaml
@@ -0,0 +1,24 @@
+---
+
+- type: model
+  name: Lego-MT
+  organization: Shanghai AI Laboratory
+  description: Lego-MT is a multilingual large language model which uses a more
+    efficient approach of being an effective detachable model.
+  created_date: 2023-05-29
+  url: https://arxiv.org/pdf/2212.10551.pdf
+  model_card: ''
+  modality: multilingual text
+  analysis: Evaluated based on own constructed dataset covering 433 languages.
+  size: 1.2B parameters
+  dependencies: [OPUS]
+  training_emissions: unknown
+  training_time: 15 days
+  training_hardware: 32 A100 GPUs
+  quality_control: ''
+  access: open
+  license: ''
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/magma.yaml b/assets/magma.yaml
new file mode 100644
index 00000000..7b290942
--- /dev/null
+++ b/assets/magma.yaml
@@ -0,0 +1,24 @@
+---
+
+- type: model
+  name: MAGMA
+  organization: Aleph Alpha
+  description: An autoregressive VL model that is able to generate text from an
+    arbitrary combination of visual and textual input
+  created_date: 2022-10-24
+  url: https://arxiv.org/pdf/2112.05253.pdf
+  model_card: ''
+  modality: image and text input with natural language text output
+  analysis: Evaluated on the OKVQA benchmark as a fully open-ended generative task.
+  size: 6B parameters
+  dependencies: [GPT-J, CLIP]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: 32 A100 GPUs
+  quality_control: ''
+  access: open
+  license: MIT
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/mpt.yaml b/assets/mpt.yaml
new file mode 100644
index 00000000..5d97e67e
--- /dev/null
+++ b/assets/mpt.yaml
@@ -0,0 +1,24 @@
+---
+
+- type: model
+  name: MPT
+  organization: Mosaic
+  description: MPT is a series of large language models seeking to address the limitations
+    of other open source models like LLaMA and Pythia.
+  created_date: 2023-05-05
+  url: https://www.mosaicml.com/blog/mpt-7b
+  model_card: ''
+  modality: text
+  analysis: Evaluated on a range of benchmarks and performed on par with LLaMA-7B.
+  size: 7B parameters
+  dependencies: []
+  training_emissions: unknown
+  training_time: 9.5 days
+  training_hardware: 440 A100 40GB GPUs
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/multimodal.yaml b/assets/multimodal.yaml
new file mode 100644
index 00000000..4f7727cc
--- /dev/null
+++ b/assets/multimodal.yaml
@@ -0,0 +1,24 @@
+---
+
+- type: dataset
+  name: Multimodal C4
+  organization: Allen Institute for AI
+  description: An augmentation of C4 with images added and made openly available.
+  created_date: 2023-06-09
+  url: https://arxiv.org/pdf/2304.06939.pdf
+  datasheet: ''
+  modality: text and images
+  size: 43B English tokens with 101.2M documents and 571M images
+  sample: []
+  analysis: Conducted experiments on models trained with Multimodal C4 in comparison
+    to models trained on single image/caption datasets
+  dependencies: [C4]
+  included: ''
+  excluded: ''
+  quality_control: ''
+  access: open
+  license: MIT
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/nextdoor.yaml b/assets/nextdoor.yaml
new file mode 100644
index 00000000..0dfafe79
--- /dev/null
+++ b/assets/nextdoor.yaml
@@ -0,0 +1,24 @@
+---
+
+- type: application
+  name: Nextdoor Assistant
+  organization: Nextdoor
+  description: AI chatbot on Nextdoor that helps users write more clear and conscientious
+    posts.
+  created_date: 2023-05-02
+  url: https://help.nextdoor.com/s/article/Introducing-Assistant
+  dependencies: [ChatGPT]
+  adaptation: ''
+  output_space: natural language text guidance
+  quality_control: ''
+  access: open
+  license: unknown
+  terms_of_service: ''
+  intended_uses: to be used to help make the Nextdoor experience more positive for
+    users
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
+  user_distribution: ''
+  failures: ''
diff --git a/assets/openflamingo.yaml b/assets/openflamingo.yaml
new file mode 100644
index 00000000..1a79d3db
--- /dev/null
+++ b/assets/openflamingo.yaml
@@ -0,0 +1,25 @@
+---
+
+- type: model
+  name: OpenFlamingo
+  organization: LAION
+  description: An open-source reproduction of DeepMind's Flamingo model. At its
+    core, OpenFlamingo is a framework that enables training and evaluation of large
+    multimodal models (LMMs).
+  created_date: 2023-03-28
+  url: https://laion.ai/blog/open-flamingo/
+  model_card: https://github.com/mlfoundations/open_flamingo/blob/main/MODEL_CARD.md
+  modality: image and text input with textual output
+  analysis: Evaluated on COCO captioning and VQAv2 vision-language tasks.
+  size: 9B parameters
+  dependencies: [LLaMA, CLIP]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license: MIT
+  intended_uses: academic research purposes
+  prohibited_uses: commercial use
+  monitoring: ''
+  feedback: ''
diff --git a/assets/openfold.yaml b/assets/openfold.yaml
new file mode 100644
index 00000000..5723593a
--- /dev/null
+++ b/assets/openfold.yaml
@@ -0,0 +1,23 @@
+---
+
+- type: model
+  name: OpenFold
+  organization: Columbia University
+  description: OpenFold is an open source recreation of AlphaFold2.
+  created_date: 2022-11-20
+  url: https://www.biorxiv.org/content/10.1101/2022.11.20.517210v2
+  model_card: ''
+  modality: protein structures
+  analysis: Evaluated on wide range of tasks using own evaluation benchmarks.
+  size: ''
+  dependencies: [AlphaFold2, OpenProteinSet]
+  training_emissions: unknown
+  training_time: 50,000 GPU hours
+  training_hardware: Single A100 NVIDIA GPU
+  quality_control: ''
+  access: open
+  license: CC BY 4.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/openllama.yaml b/assets/openllama.yaml
new file mode 100644
index 00000000..a43c19cb
--- /dev/null
+++ b/assets/openllama.yaml
@@ -0,0 +1,23 @@
+---
+
+- type: model
+  name: OpenLLaMA
+  organization: Berkeley AI Research
+  description: OpenLlama is a open source reproduction of Meta's LLaMA model.
+  created_date: 2023-05-03
+  url: https://github.com/openlm-research/open_llama
+  model_card: ''
+  modality: text
+  analysis: Evaluated on wide range of tasks using own evaluation benchmarks.
+  size: 17B parameters
+  dependencies: [LLaMA, RedPajama]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/otter.yaml b/assets/otter.yaml
new file mode 100644
index 00000000..02f40c24
--- /dev/null
+++ b/assets/otter.yaml
@@ -0,0 +1,27 @@
+---
+
+- type: model
+  name: Otter
+  organization: Nanyang Technological University
+  description: Otter is a multi-modal model based on OpenFlamingo (open-sourced
+    version of DeepMind’s Flamingo), trained on MIMIC-IT and showcasing improved
+    instruction-following ability and in-context learning.
+  created_date: 2023-05-05
+  url: https://arxiv.org/pdf/2305.03726v1.pdf
+  model_card: https://github.com/Luodian/Otter/blob/main/docs/model_card.md
+  modality: outputs text from text and image input
+  analysis: Evaluated on researcher experiments to test deeper understanding and
+    advanced commonsense reasoning
+  size: 1.3B parameters
+  dependencies: [MIMIC-IT, OpenFlamingo]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: 4 RTX-3090 GPUs
+  quality_control: ''
+  access: open
+  license: MIT
+  intended_uses: Following and executing new instructions with few in-context learning
+    examples given image and textual input.
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/palm2.yaml b/assets/palm2.yaml
new file mode 100644
index 00000000..0791eb27
--- /dev/null
+++ b/assets/palm2.yaml
@@ -0,0 +1,30 @@
+---
+
+- type: model
+  name: PaLM 2
+  organization: Google
+  description: PaLM2 is a new state-of-the-art language model that has better multilingual
+    and reasoning capabilities and is more compute-efficient than its predecessor
+    PaLM. PaLM 2 is a Transformer-based model trained using a mixture of objectives
+    similar to UL2.
+  created_date: 2023-05-10
+  url: https://blog.google/technology/ai/google-palm-2-ai-large-language-model/
+  model_card: https://ai.google/static/documents/palm2techreport.pdf
+  modality: text (input and output)
+  analysis: Reports results on standard code benchmarks across a variety of programming
+    languages.
+  size: 3.6T parameters
+  dependencies: [palm 2 dataset]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: TPU v4 (number unspecified)
+  quality_control: Employed de-duplication, removal of sensitive-PII and filtering.
+    Added control tokens marking toxicity of text.
+  access: open
+  license: Google
+  intended_uses: general use large language model that can be used for language,
+    reasoning, and code tasks.
+  prohibited_uses: becoming part of a general-purpose service or product or use
+    within specific downstream applications without prior assessment
+  monitoring: Google internal monitoring
+  feedback: Specific queries provided by annotators
diff --git a/assets/palmyra.yaml b/assets/palmyra.yaml
new file mode 100644
index 00000000..5dd78f33
--- /dev/null
+++ b/assets/palmyra.yaml
@@ -0,0 +1,26 @@
+---
+
+- type: model
+  name: Palmyra
+  organization: Writer
+  description: Palmyra is a privacy-first LLM for enterprises trained on business and marketing writing.
+  created_date:
+    value: 2023-01-01
+    explanation: The model was stated to be published in January, but which day is
+      not specified on the website.
+  url: https://gpt3demo.com/apps/palmyra
+  model_card: https://huggingface.co/Writer/palmyra-base
+  modality: text
+  analysis: Evaluated on the SuperGLUE benchmark
+  size: 128M (Small), 5B (Base), 20B (Large) parameters
+  dependencies: [Writer dataset]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: generating text from a prompt
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/Writer/palmyra-base/discussions
diff --git a/assets/peer.yaml b/assets/peer.yaml
new file mode 100644
index 00000000..12ce8a96
--- /dev/null
+++ b/assets/peer.yaml
@@ -0,0 +1,28 @@
+---
+
+- type: model
+  name: PEER
+  organization: Meta
+  description: PEER is a collaborative language model that is trained to imitate
+    the entire writing process itself. PEER can write drafts, add suggestions, propose
+    edits and provide explanations for its actions.
+  created_date: 2022-08-24
+  url: https://arxiv.org/pdf/2208.11663.pdf
+  model_card: ''
+  modality: natural language text
+  analysis: PEER is evaluated on core research questions intended to gauge language
+    understanding, proper use of citations, instruction following, and iterative
+    use.
+  size: 3B parameters
+  dependencies: []
+  training_emissions: ''
+  training_time: ''
+  training_hardware: 64 GPUs
+  quality_control: Heuristics and edit filtering was used on data set, which consisted
+    mostly of Wikipedia pages.
+  access: open
+  license: ''
+  intended_uses: adapting LLMs to work with collaborative writing and updating.
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/pi.yaml b/assets/pi.yaml
new file mode 100644
index 00000000..3c35b7b4
--- /dev/null
+++ b/assets/pi.yaml
@@ -0,0 +1,23 @@
+---
+
+- type: application
+  name: Pi
+  organization: Inflection AI
+  description: Personal AI chatbot designed to be conversational and specialized
+    in emotional intelligence.
+  created_date: 2023-05-02
+  url: https://inflection.ai/press
+  dependencies: [Inflection-1]
+  adaptation: ''
+  output_space: natural language text responses
+  quality_control: ''
+  access: limited
+  license: unknown
+  terms_of_service: ''
+  intended_uses: to be used as a personal assistant chatbot for everyday activities
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
+  user_distribution: ''
+  failures: ''
diff --git a/assets/portkey.yaml b/assets/portkey.yaml
new file mode 100644
index 00000000..2fca2885
--- /dev/null
+++ b/assets/portkey.yaml
@@ -0,0 +1,23 @@
+---
+
+- type: application
+  name: Portkey
+  organization: Portkey
+  description: Portkey is a hosted middleware that allows users to create generative
+    AI applications
+  created_date: 2023-05-06
+  url: https://portkey.ai/
+  dependencies: []
+  adaptation: ''
+  output_space: generative AI apps
+  quality_control: ''
+  access: open
+  license: ''
+  terms_of_service: https://portkey.ai/terms
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
+  user_distribution: ''
+  failures: ''
diff --git a/assets/pythia.yaml b/assets/pythia.yaml
new file mode 100644
index 00000000..6421f741
--- /dev/null
+++ b/assets/pythia.yaml
@@ -0,0 +1,25 @@
+---
+
+- type: model
+  name: Pythia
+  organization: Eleuther AI
+  description: A suite of 16 LLMs all trained on public data seen in the exact same
+    order and ranging in size from 70M to 12B parameters
+  created_date: 2023-05-31
+  url: https://arxiv.org/pdf/2304.01373.pdf
+  model_card: https://huggingface.co/EleutherAI/pythia-12b
+  modality: natural language text
+  analysis: Evaluated on a variety of NLP benchmarks and found to perform similarly
+    to OPT and BLOOM models.
+  size: 12B parameters
+  dependencies: [The Pile]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: 64 A100 GPUs
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/EleutherAI/pythia-6.9b/discussions
diff --git a/assets/redpajama.yaml b/assets/redpajama.yaml
new file mode 100644
index 00000000..0f1a51ba
--- /dev/null
+++ b/assets/redpajama.yaml
@@ -0,0 +1,24 @@
+---
+
+- type: dataset
+  name: RedPajama-Data
+  organization: Together
+  description: The RedPajama base dataset is a 1.2 trillion token fully-open dataset
+    created by following the recipe described in the LLaMA paper
+  created_date: 2022-04-17
+  url: https://www.together.xyz/blog/redpajama
+  datasheet: https://huggingface.co/datasets/togethercomputer/RedPajama-Data-1T
+  modality: text
+  size: 1.2 trillion tokens
+  sample: []
+  analysis: ''
+  dependencies: [LLaMA dataset, GitHub, Wikipedia]
+  included: ''
+  excluded: ''
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/refinedweb.yaml b/assets/refinedweb.yaml
new file mode 100644
index 00000000..89e584d2
--- /dev/null
+++ b/assets/refinedweb.yaml
@@ -0,0 +1,25 @@
+---
+
+- type: dataset
+  name: RefinedWeb
+  organization: UAE Technology Innovation Institute
+  description: RefinedWeb is a high-quality five trillion tokens web-only English pretraining dataset.
+  created_date: 2023-06-01
+  url: https://arxiv.org/pdf/2306.01116.pdf
+  datasheet: https://huggingface.co/datasets/tiiuae/falcon-refinedweb
+  modality: text
+  size: 600B tokens
+  sample: []
+  analysis: ''
+  dependencies: []
+  included: ''
+  excluded: ''
+  quality_control: ''
+  access: open
+  license:
+    value: unknown
+    explanation: License can be found at https://huggingface.co/datasets/tiiuae/falcon-refinedweb
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/roentgen.yaml b/assets/roentgen.yaml
new file mode 100644
index 00000000..31450cd2
--- /dev/null
+++ b/assets/roentgen.yaml
@@ -0,0 +1,25 @@
+---
+
+- type: model
+  name: RoentGen
+  organization: Stanford University AIMI
+  description: RoentGen is a generative medical imaging model that can create visually
+    convincing X-ray images.
+  created_date: 2022-11-23
+  url: https://arxiv.org/pdf/2211.12737.pdf
+  model_card: ''
+  modality: text input and image output
+  analysis: Evaluated on own framework that tests domain-specific tasks in medical
+    field.
+  size: 330M parameters
+  dependencies: [Stable Diffusion, RoentGen radiology dataset]
+  training_emissions: unknown
+  training_time: 60k training steps per day
+  training_hardware: 64 A100 GPUs
+  quality_control: ''
+  access: open
+  license: ''
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/santacoder.yaml b/assets/santacoder.yaml
new file mode 100644
index 00000000..ddaf3596
--- /dev/null
+++ b/assets/santacoder.yaml
@@ -0,0 +1,24 @@
+---
+
+- type: model
+  name: SantaCoder
+  organization: BigCode
+  description: Multilingual code model derived from findings of BigCode Project
+    analysis on Github stars' association to data quality.
+  created_date: 2023-02-24
+  url: https://arxiv.org/pdf/2301.03988.pdf
+  model_card: ''
+  modality: code
+  analysis: Evaluated on MultiPL-E system benchmarks.
+  size: 1.1B parameters
+  dependencies: [The Stack, BigCode Dataset]
+  training_emissions: ''
+  training_time: 3.1 days
+  training_hardware: 96 Tesla V100 GPUs
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/soda.yaml b/assets/soda.yaml
new file mode 100644
index 00000000..dd5991d7
--- /dev/null
+++ b/assets/soda.yaml
@@ -0,0 +1,26 @@
+---
+
+- type: dataset
+  name: SODA
+  organization: Allen Institute of AI
+  description: SODA is the first publicly available, million-scale, high-quality
+    dialogue dataset covering a wide range of social interactions.
+  created_date: 2023-05-24
+  url: https://arxiv.org/pdf/2212.10465.pdf
+  datasheet: https://huggingface.co/datasets/allenai/soda
+  modality: text
+  size: 1.5M dialogues
+  sample: []
+  analysis: Randomly sampled dialogues from dataset are evaluated according to six
+    established criteria of natural flow, context dependence, topic consistency,
+    speaker consistency, specificity, and overall.
+  dependencies: []
+  included: ''
+  excluded: ''
+  quality_control: ''
+  access: open
+  license: CC BY 4.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/stablelm.yaml b/assets/stablelm.yaml
new file mode 100644
index 00000000..d92fdf8c
--- /dev/null
+++ b/assets/stablelm.yaml
@@ -0,0 +1,23 @@
+---
+
+- type: model
+  name: StableLM
+  organization: Stability AI
+  description: Large language models trained on up to 1.5 trillion tokens.
+  created_date: 2023-04-20
+  url: https://github.com/Stability-AI/StableLM
+  model_card: ''
+  modality: natural language text
+  analysis: ''
+  size: 7B parameters
+  dependencies: [StableLM-Alpha dataset, Alpaca dataset, gpt4all dataset, ShareGPT52K dataset, Dolly dataset, HH dataset]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/stack.yaml b/assets/stack.yaml
new file mode 100644
index 00000000..416829f6
--- /dev/null
+++ b/assets/stack.yaml
@@ -0,0 +1,26 @@
+---
+
+- type: dataset
+  name: The Stack
+  organization: BigCode
+  description: The Stack is a 3.1 TB dataset consisting of permissively licensed
+    source code inteded for use in creating code LLMs.
+  created_date: 2022-11-20
+  url: https://arxiv.org/pdf/2211.15533.pdf
+  datasheet: https://huggingface.co/datasets/bigcode/the-stack
+  modality: code (358 programming languages)
+  size: 3.1 TB
+  sample: []
+  analysis: Evaluated models trained on The Stack on HumanEval and MBPP and compared
+    against similarly-sized models.
+  dependencies: [GitHub]
+  included: ''
+  excluded: ''
+  quality_control: allowed users whose data were part of The Stack's training data
+    to opt-out
+  access: open
+  license: Apache 2.0
+  intended_uses: creating code LLMs
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/starcoder.yaml b/assets/starcoder.yaml
new file mode 100644
index 00000000..55f2dc24
--- /dev/null
+++ b/assets/starcoder.yaml
@@ -0,0 +1,28 @@
+---
+
+- type: model
+  name: StarCoder
+  organization: BigCode
+  description: StarCoder is a Large Language Model for Code (Code LLM) trained on
+    permissively licensed data from GitHub, including from 80+ programming languages,
+    Git commits, GitHub issues, and Jupyter notebooks.
+  created_date: 2023-05-09
+  url: https://arxiv.org/pdf/2305.06161.pdf
+  model_card: https://huggingface.co/bigcode/starcoder
+  modality: code (80+ programming languages)
+  analysis: Tested on several benchmarks, most notably Python benchmark HumanEval.
+  size: 15.5B parameters
+  dependencies: [The Stack]
+  training_emissions: ''
+  training_time: 24 days
+  training_hardware: 512 Tesla A100 GPU
+  quality_control: No specific quality control is mentioned in model training, though
+    details on data processing and how the tokenizer was trained are provided in
+    the paper.
+  access: open
+  license: Apache 2.0
+  intended_uses: With a Tech Assistant prompt and not as an instruction model given
+    training limitations.
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/bigcode/starcoder/discussions
diff --git a/assets/transformify.yaml b/assets/transformify.yaml
new file mode 100644
index 00000000..118b559f
--- /dev/null
+++ b/assets/transformify.yaml
@@ -0,0 +1,23 @@
+---
+
+- type: application
+  name: Transformify Automate
+  organization: Transformify
+  description: Transformify Automate is a platform for automated task integration
+    using natural language prompts.
+  created_date: 2023-05-30
+  url: https://www.transformify.ai/automate
+  dependencies: [GPT-4]
+  adaptation: ''
+  output_space: text and code
+  quality_control: ''
+  access: open
+  license: ''
+  terms_of_service: https://www.transformify.ai/legal-stuff
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
+  user_distribution: ''
+  failures: ''
diff --git a/assets/underwritegpt.yaml b/assets/underwritegpt.yaml
new file mode 100644
index 00000000..76870f97
--- /dev/null
+++ b/assets/underwritegpt.yaml
@@ -0,0 +1,22 @@
+---
+
+- type: application
+  name: UnderwriteGPT
+  organization: Paladin Group and Dais Technology
+  description: UnderwriteGPT is the world's first generative AI underwriting tool.
+  created_date: 2023-02-01
+  url: https://dais.com/underwritegpt/
+  dependencies: []
+  adaptation: ''
+  output_space: ''
+  quality_control: ''
+  access: limited
+  license: ''
+  terms_of_service: ''
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
+  user_distribution: ''
+  failures: ''
diff --git a/assets/unilm.yaml b/assets/unilm.yaml
new file mode 100644
index 00000000..845df162
--- /dev/null
+++ b/assets/unilm.yaml
@@ -0,0 +1,24 @@
+---
+
+- type: model
+  name: UniLM
+  organization: Microsoft
+  description: UniLM is a unified language model that can be fine-tuned for both
+    natural language understanding and generation tasks.
+  created_date: 2019-10-01
+  url: https://proceedings.neurips.cc/paper_files/paper/2019/file/c20bb2d9a50d5ac1f713f8b34d9aac5a-Paper.pdf
+  model_card: ''
+  modality: text
+  analysis: Evaluated on GLUE, SQuAD 2.0, and CoQA benchmarks.
+  size: 340M parameters
+  dependencies: []
+  training_emissions: unknown
+  training_time: 10,000 steps in 7 hours
+  training_hardware: 8 NVIDIA Tesla V100 32GB GPUs
+  quality_control: ''
+  access: open
+  license: MIT
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/vicuna.yaml b/assets/vicuna.yaml
new file mode 100644
index 00000000..6f07781e
--- /dev/null
+++ b/assets/vicuna.yaml
@@ -0,0 +1,24 @@
+---
+
+- type: model
+  name: Vicuna
+  organization: LMSYS
+  description: An open-source chatbot trained by fine-tuning LLaMA on user-shared
+    conversations collected from ShareGPT.
+  created_date: 2023-03-30
+  url: https://lmsys.org/blog/2023-03-30-vicuna/
+  model_card: https://huggingface.co/lmsys/vicuna-13b-delta-v0
+  modality: natural language text
+  analysis: Evaluated against similar LLMs using GPT-4 as a judge.
+  size: 13B parameters
+  dependencies: [LLaMA, ShareGPT conversations data]
+  training_emissions: ''
+  training_time: 1 day
+  training_hardware: 8 A100 GPUs
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: research on LLMs and chatbots
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/datasets/bigcode/the-stack/discussions
diff --git a/assets/voicebox.yaml b/assets/voicebox.yaml
new file mode 100644
index 00000000..2f7a3579
--- /dev/null
+++ b/assets/voicebox.yaml
@@ -0,0 +1,25 @@
+---
+
+- type: model
+  name: Voicebox
+  organization: Meta AI
+  description: Voicebox is the first generative AI model for speech to generalize
+    across tasks with state-of-the-art performance.
+  created_date: 2023-06-16
+  url: https://research.facebook.com/publications/voicebox-text-guided-multilingual-universal-speech-generation-at-scale/
+  model_card: ''
+  modality: text and audio input and audio output
+  analysis: Evaluated on zero-shot text-to-speech benchmarks, with Voicebox outperforming
+    the current state-of-the-art English model VALL-E.
+  size: 330M parameters
+  dependencies: []
+  training_emissions: unknown
+  training_time: 750,000 iterations
+  training_hardware: 32 GPUs of unspecified type
+  quality_control: ''
+  access: closed
+  license: ''
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/vqganclip.yaml b/assets/vqganclip.yaml
new file mode 100644
index 00000000..29e64681
--- /dev/null
+++ b/assets/vqganclip.yaml
@@ -0,0 +1,25 @@
+---
+
+- type: model
+  name: VQGAN-CLIP
+  organization: EleutherAI
+  description: VQGAN-CLIP is a model that better generates and edits images using
+    a multimodal encoder to guide image generation.
+  created_date: 2023-09-04
+  url: https://arxiv.org/pdf/2204.08583.pdf
+  model_card: ''
+  modality: text input with image output
+  analysis: Evaluated by human testers rating alignment of text input, image output
+    pairs.
+  size: 227M parameters
+  dependencies: [VQGAN, CLIP]
+  training_emissions: unknown
+  training_time: Less than 1 V100-hour
+  training_hardware: 1 NVIDIA Tesla K80 GPU
+  quality_control: ''
+  access: open
+  license: MIT
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/wizardlm.yaml b/assets/wizardlm.yaml
new file mode 100644
index 00000000..3f94d441
--- /dev/null
+++ b/assets/wizardlm.yaml
@@ -0,0 +1,28 @@
+---
+
+- type: model
+  name: WizardLM
+  organization: Microsoft
+  description: Starting with an initial set of instructions, we use our proposed
+    Evol-Instruct to rewrite them step by step into more complex instructions. Then,
+    we mix all generated instruction data to fine-tune LLaMA. We call the resulting
+    model WizardLM.
+  created_date: 2023-04-24
+  url: https://arxiv.org/pdf/2304.12244v1.pdf
+  model_card: https://huggingface.co/WizardLM/WizardLM-13B-1.0
+  modality: natural language text
+  analysis: Reports results on standard LLM benchmarks in comparison to other LLMs
+    and test sets.
+  size: 7B parameters
+  dependencies: [LLaMA, Evol-Instruct, Alpaca dataset]
+  training_emissions: ''
+  training_time: 70 hours on 3 epochs
+  training_hardware: 8 V100 GPUs
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: Creating large amounts of instruction data, particularly with high
+    complexity
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/datasets/WizardLM/evol_instruct_70k/discussions

From 547de3c31bef8be31794ae9fbf84c4d80262a367 Mon Sep 17 00:00:00 2001
From: jxue16 <105090474+jxue16@users.noreply.github.com>
Date: Fri, 7 Jul 2023 18:49:54 -0700
Subject: [PATCH 02/10] update main

---
 js/main.js | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/js/main.js b/js/main.js
index c62ac282..bd6cdb54 100644
--- a/js/main.js
+++ b/js/main.js
@@ -629,6 +629,57 @@ function loadAssetsAndRenderPageContent() {
 
   const paths = [
     'assets/adept.yaml',
+    'assets/ares.yaml',
+    'assets/autogpt.yaml',
+    'assets/bark.yaml',
+    'assets/beit.yaml',
+    'assets/bigtrans.yaml',
+    'assets/biomedgpt.yaml',
+    'assets/camel.yaml',
+    'assets/cformers.yaml',
+    'assets/chatglm.yaml',
+    'assets/corgi.yaml',
+    'assets/cosmo.yaml',
+    'assets/deepfloyd.yaml',
+    'assets/docugami.yaml',
+    'assets/falcon.yaml',
+    'assets/goat.yaml',
+    'assets/gorilla.yaml',
+    'assets/guanaco.yaml',
+    'assets/h2ogpt.yaml',
+    'assets/inflection-1.yaml',
+    'assets/jointslm.yaml',
+    'assets/koala.yaml',
+    'assets/legomt.yaml',
+    'assets/magma.yaml',
+    'assets/mpt.yaml',
+    'assets/multimodal.yaml',
+    'assets/nextdoor.yaml',
+    'assets/openflamingo.yaml',
+    'assets/openfold.yaml',
+    'assets/openllama.yaml',
+    'assets/otter.yaml',
+    'assets/palm2.yaml',
+    'assets/palmyra.yaml',
+    'assets/peer.yaml',
+    'assets/pi.yaml',
+    'assets/portkey.yaml',
+    'assets/pythia.yaml',
+    'assets/redpajama.yaml',
+    'assets/refinedweb.yaml',
+    'assets/roentgen.yaml',
+    'assets/santacoder.yaml',
+    'assets/soda.yaml',
+    'assets/stablelm.yaml',
+    'assets/stack.yaml',
+    'assets/starcoder.yaml',
+    'assets/transformify.yaml',
+    'assets/underwritegpt.yaml',
+    'assets/unilm.yaml',
+    'assets/vicuna.yaml',
+    'assets/voicebox.yaml',
+    'assets/vqganclip.yaml',
+    'assets/wizardlm.yaml',
     'assets/ai2.yaml',
     'assets/ai21.yaml',
     'assets/aleph_alpha.yaml',

From 45ea92098d9a6bffe03a270aad76e4a80ecd9f83 Mon Sep 17 00:00:00 2001
From: jxue16 <105090474+jxue16@users.noreply.github.com>
Date: Fri, 7 Jul 2023 18:51:08 -0700
Subject: [PATCH 03/10] minor change

---
 assets/vqganclip.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/assets/vqganclip.yaml b/assets/vqganclip.yaml
index 29e64681..59fb2959 100644
--- a/assets/vqganclip.yaml
+++ b/assets/vqganclip.yaml
@@ -5,7 +5,7 @@
   organization: EleutherAI
   description: VQGAN-CLIP is a model that better generates and edits images using
     a multimodal encoder to guide image generation.
-  created_date: 2023-09-04
+  created_date: 2022-09-04
   url: https://arxiv.org/pdf/2204.08583.pdf
   model_card: ''
   modality: text input with image output

From a59f7584305028513e87b15d814fe9df6a8f3eb9 Mon Sep 17 00:00:00 2001
From: jxue16 <105090474+jxue16@users.noreply.github.com>
Date: Wed, 12 Jul 2023 21:39:04 -0700
Subject: [PATCH 04/10] reformat by organization

---
 assets/ai2.yaml                             | 72 +++++++++++++++
 assets/aleph_alpha.yaml                     | 23 +++++
 assets/{bedrock.yaml => amazon.yaml}        |  6 +-
 assets/anthropic.yaml                       | 33 +++++++
 assets/beit.yaml                            | 25 ------
 assets/berkeley.yaml                        | 72 +++++++++++++++
 assets/bigcode.yaml                         | 76 ++++++++++++++++
 assets/camel.yaml                           | 27 ------
 assets/{bigtrans.yaml => casia.yaml}        |  0
 assets/{openfold.yaml => columbia.yaml}     |  2 +-
 assets/corgi.yaml                           | 27 ------
 assets/cosmo.yaml                           | 25 ------
 assets/deepfloyd.yaml                       | 26 ------
 assets/docugami.yaml                        | 24 -----
 assets/eleutherai.yaml                      | 48 ++++++++++
 assets/{ares.yaml => faraday.yaml}          |  4 +-
 assets/google.yaml                          | 53 +++++++++++
 assets/gorilla.yaml                         | 27 ------
 assets/{h2ogpt.yaml => h2oai.yaml}          |  0
 assets/inflection-1.yaml                    | 25 ------
 assets/inflection.yaml                      | 49 +++++++++++
 assets/jointslm.yaml                        | 25 ------
 assets/koala.yaml                           | 24 -----
 assets/laion.yaml                           | 24 +++++
 assets/legomt.yaml                          | 24 -----
 assets/{biomedgpt.yaml => lehigh.yaml}      |  0
 assets/{vicuna.yaml => lmsys.yaml}          |  0
 assets/magma.yaml                           | 24 -----
 assets/meta.yaml                            | 51 +++++++++++
 assets/microsoft.yaml                       | 97 +++++++++++++++++++++
 assets/{mpt.yaml => mosaic.yaml}            |  0
 assets/multimodal.yaml                      | 24 -----
 assets/{otter.yaml => nanyang.yaml}         |  0
 assets/{cformers.yaml => nolano.yaml}       |  0
 assets/openflamingo.yaml                    | 25 ------
 assets/openllama.yaml                       | 23 -----
 assets/{underwritegpt.yaml => paladin.yaml} |  0
 assets/palm2.yaml                           | 30 -------
 assets/palmyra.yaml                         | 26 ------
 assets/peer.yaml                            | 28 ------
 assets/pi.yaml                              | 23 -----
 assets/pythia.yaml                          | 25 ------
 assets/redpajama.yaml                       | 24 -----
 assets/refinedweb.yaml                      | 25 ------
 assets/roentgen.yaml                        | 25 ------
 assets/santacoder.yaml                      | 24 -----
 assets/shanghai.yaml                        | 23 +++++
 assets/{goat.yaml => singapore.yaml}        |  0
 assets/soda.yaml                            | 26 ------
 assets/stability.yaml                       | 53 +++++++++++
 assets/stablelm.yaml                        | 23 -----
 assets/stack.yaml                           | 26 ------
 assets/stanford.yaml                        | 50 +++++++++++
 assets/starcoder.yaml                       | 28 ------
 assets/{bark.yaml => suno.yaml}             |  0
 assets/together.yaml                        | 23 +++++
 assets/{falcon.yaml => uae.yaml}            | 25 ++++++
 assets/unilm.yaml                           | 24 -----
 assets/{guanaco.yaml => uwashington.yaml}   |  0
 assets/voicebox.yaml                        | 25 ------
 assets/vqganclip.yaml                       | 25 ------
 assets/wizardlm.yaml                        | 28 ------
 assets/writer.yaml                          | 53 +++++++++++
 js/main.js                                  | 65 ++++----------
 64 files changed, 852 insertions(+), 860 deletions(-)
 rename assets/{bedrock.yaml => amazon.yaml} (74%)
 delete mode 100644 assets/beit.yaml
 create mode 100644 assets/berkeley.yaml
 create mode 100644 assets/bigcode.yaml
 delete mode 100644 assets/camel.yaml
 rename assets/{bigtrans.yaml => casia.yaml} (100%)
 rename assets/{openfold.yaml => columbia.yaml} (94%)
 delete mode 100644 assets/corgi.yaml
 delete mode 100644 assets/cosmo.yaml
 delete mode 100644 assets/deepfloyd.yaml
 delete mode 100644 assets/docugami.yaml
 rename assets/{ares.yaml => faraday.yaml} (82%)
 delete mode 100644 assets/gorilla.yaml
 rename assets/{h2ogpt.yaml => h2oai.yaml} (100%)
 delete mode 100644 assets/inflection-1.yaml
 create mode 100644 assets/inflection.yaml
 delete mode 100644 assets/jointslm.yaml
 delete mode 100644 assets/koala.yaml
 delete mode 100644 assets/legomt.yaml
 rename assets/{biomedgpt.yaml => lehigh.yaml} (100%)
 rename assets/{vicuna.yaml => lmsys.yaml} (100%)
 delete mode 100644 assets/magma.yaml
 rename assets/{mpt.yaml => mosaic.yaml} (100%)
 delete mode 100644 assets/multimodal.yaml
 rename assets/{otter.yaml => nanyang.yaml} (100%)
 rename assets/{cformers.yaml => nolano.yaml} (100%)
 delete mode 100644 assets/openflamingo.yaml
 delete mode 100644 assets/openllama.yaml
 rename assets/{underwritegpt.yaml => paladin.yaml} (100%)
 delete mode 100644 assets/palm2.yaml
 delete mode 100644 assets/palmyra.yaml
 delete mode 100644 assets/peer.yaml
 delete mode 100644 assets/pi.yaml
 delete mode 100644 assets/pythia.yaml
 delete mode 100644 assets/redpajama.yaml
 delete mode 100644 assets/refinedweb.yaml
 delete mode 100644 assets/roentgen.yaml
 delete mode 100644 assets/santacoder.yaml
 rename assets/{goat.yaml => singapore.yaml} (100%)
 delete mode 100644 assets/soda.yaml
 delete mode 100644 assets/stablelm.yaml
 delete mode 100644 assets/stack.yaml
 delete mode 100644 assets/starcoder.yaml
 rename assets/{bark.yaml => suno.yaml} (100%)
 rename assets/{falcon.yaml => uae.yaml} (55%)
 delete mode 100644 assets/unilm.yaml
 rename assets/{guanaco.yaml => uwashington.yaml} (100%)
 delete mode 100644 assets/voicebox.yaml
 delete mode 100644 assets/vqganclip.yaml
 delete mode 100644 assets/wizardlm.yaml
 create mode 100644 assets/writer.yaml

diff --git a/assets/ai2.yaml b/assets/ai2.yaml
index dc9e232b..2c890050 100644
--- a/assets/ai2.yaml
+++ b/assets/ai2.yaml
@@ -21,3 +21,75 @@
   prohibited_uses: ''
   monitoring: ''
   feedback: ''
+
+- type: dataset
+  name: SODA
+  organization: AI2
+  description: SODA is the first publicly available, million-scale, high-quality
+    dialogue dataset covering a wide range of social interactions.
+  created_date: 2023-05-24
+  url: https://arxiv.org/pdf/2212.10465.pdf
+  datasheet: https://huggingface.co/datasets/allenai/soda
+  modality: text
+  size: 1.5M dialogues
+  sample: []
+  analysis: Randomly sampled dialogues from dataset are evaluated according to six
+    established criteria of natural flow, context dependence, topic consistency,
+    speaker consistency, specificity, and overall.
+  dependencies: []
+  included: ''
+  excluded: ''
+  quality_control: ''
+  access: open
+  license: CC BY 4.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+
+- type: dataset
+  name: Multimodal C4
+  organization: AI2
+  description: An augmentation of C4 with images added and made openly available.
+  created_date: 2023-06-09
+  url: https://arxiv.org/pdf/2304.06939.pdf
+  datasheet: ''
+  modality: text and images
+  size: 43B English tokens with 101.2M documents and 571M images
+  sample: []
+  analysis: Conducted experiments on models trained with Multimodal C4 in comparison
+    to models trained on single image/caption datasets
+  dependencies: [C4]
+  included: ''
+  excluded: ''
+  quality_control: ''
+  access: open
+  license: MIT
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+
+- type: model
+  name: COSMO
+  organization: AI2
+  description: COSMO is a conversation agent with greater generalizability on both
+    in- and out-of-domain chitchat datasets
+  created_date: 2023-05-24
+  url: https://arxiv.org/pdf/2212.10465.pdf
+  model_card: https://huggingface.co/allenai/cosmo-xl
+  modality: text
+  analysis: Evaluated by human testers on generalization capabilities and responses
+    compared to other chatbots.
+  size: 11B parameters
+  dependencies: [SODA, ProsocialDialog, T5]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: v3-128 TPU accelerators with batch size 256
+  quality_control: ''
+  access: open
+  license: ''
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/allenai/cosmo-xl/discussions
diff --git a/assets/aleph_alpha.yaml b/assets/aleph_alpha.yaml
index b7b635f6..587a9aca 100644
--- a/assets/aleph_alpha.yaml
+++ b/assets/aleph_alpha.yaml
@@ -100,3 +100,26 @@
   monthly_active_users: unknown
   user_distribution: unknown
   failures: unknown
+
+- type: model
+  name: MAGMA
+  organization: Aleph Alpha
+  description: An autoregressive VL model that is able to generate text from an
+    arbitrary combination of visual and textual input
+  created_date: 2022-10-24
+  url: https://arxiv.org/pdf/2112.05253.pdf
+  model_card: ''
+  modality: image and text input with natural language text output
+  analysis: Evaluated on the OKVQA benchmark as a fully open-ended generative task.
+  size: 6B parameters
+  dependencies: [GPT-J, CLIP]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: 32 A100 GPUs
+  quality_control: ''
+  access: open
+  license: MIT
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/bedrock.yaml b/assets/amazon.yaml
similarity index 74%
rename from assets/bedrock.yaml
rename to assets/amazon.yaml
index 1022592a..152e0389 100644
--- a/assets/bedrock.yaml
+++ b/assets/amazon.yaml
@@ -3,8 +3,10 @@
 - type: application
   name: Bedrock
   organization: Amazon
-  description: Bedrock is a new service that makes FMs from AI21 Labs, Anthropic, Stability AI, and Amazon accessible via an API. Bedrock is intended for customers to build and scale generative AI-based applications using FMs, democratizing access for all builders.
-    using an API.
+  description: Bedrock is a new service that makes FMs from AI21 Labs, Anthropic,
+    Stability AI, and Amazon accessible via an API. Bedrock is intended for customers
+    to build and scale generative AI-based applications using FMs, democratizing
+    access for all builders. using an API.
   created_date: 2023-04-13
   url: https://aws.amazon.com/bedrock/
   dependencies: [Jurassic-2, Claude, Stable Diffusion, Amazon Titan]
diff --git a/assets/anthropic.yaml b/assets/anthropic.yaml
index 385d53ed..b47dc7b3 100644
--- a/assets/anthropic.yaml
+++ b/assets/anthropic.yaml
@@ -594,3 +594,36 @@
   prohibited_uses: ''
   monitoring: ''
   feedback: ''
+
+- type: model
+  name: Claude 2
+  organization: Anthropic
+  description: Claude 2 is a more evolved and refined version of Claude, which is
+    a general purpose large language model using a transformer architecture and
+    trained via unsupervised learning.
+  created_date: 2023-07-11
+  url: https://www.anthropic.com/index/claude-2
+  model_card: https://www-files.anthropic.com/production/images/Model-Card-Claude-2.pdf
+  modality: text
+  analysis: Evaluated with human feedback on helpfulness, harmfulness, and honesty
+    and on the Bias Benchmark for QA.
+  size: ''
+  dependencies:
+    - Claude human feedback data
+    - Unknown licensed third party datasets
+  training_emissions: ''
+  training_time: ''
+  training_hardware: unknown
+  quality_control: ''
+  access: open
+  license: ''
+  intended_uses: Claude 2 tends to perform well at general, open-ended conversation;
+    search, writing, editing, outlining, and summarizing text; coding; and providing
+    helpful advice about a broad range of subjects. Claude 2 is particularly well
+    suited to support creative or literary use cases. They can take direction on
+    tone and “personality,” and users have described them as feeling steerable and
+    conversational.
+  prohibited_uses: Claude 2 should not be used on their own in high stakes situations
+    where an incorrect answer would cause harm.
+  monitoring: ''
+  feedback: ''
diff --git a/assets/beit.yaml b/assets/beit.yaml
deleted file mode 100644
index adabec71..00000000
--- a/assets/beit.yaml
+++ /dev/null
@@ -1,25 +0,0 @@
----
-
-- type: model
-  name: BEiT-3
-  organization: Microsoft
-  description: BEiT-3 is a general-purpose multimodal foundation model for vision
-    and vision-language tasks.
-  created_date: 2022-08-31
-  url: https://arxiv.org/pdf/2208.10442.pdf
-  model_card: ''
-  modality: text and image input and output
-  analysis: Evaluated on a range of standardized vision benchmarks, and achieves
-    state of the art performance on all experimentally.
-  size: 1.9B parameters
-  dependencies: [Multiway Transformer network]
-  training_emissions: unknown
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: open
-  license: ''
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
diff --git a/assets/berkeley.yaml b/assets/berkeley.yaml
new file mode 100644
index 00000000..2b8d64de
--- /dev/null
+++ b/assets/berkeley.yaml
@@ -0,0 +1,72 @@
+---
+
+- type: model
+  name: Koala
+  organization: Berkeley
+  description: A relatively small chatbot trained by fine-tuning Meta’s LLaMA on
+    dialogue data gathered from the web.
+  created_date: 2023-04-03
+  url: https://bair.berkeley.edu/blog/2023/04/03/koala/
+  model_card: https://huggingface.co/TheBloke/koala-7B-GPTQ-4bit-128g
+  modality: natural language text
+  analysis: Evaluated in comparison with ChatGPT and Stanford Alpaca.
+  size: 13B parameters
+  dependencies: [LLaMA, web-scraped dialogue data]
+  training_emissions: ''
+  training_time: 6 hours
+  training_hardware: 8 A100 GPUs
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: academic research
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/TheBloke/koala-7B-GPTQ-4bit-128g/discussions
+
+- type: model
+  name: Gorilla
+  organization: Berkeley
+  description: Gorilla is a finetuned LLaMA-based model that surpasses the performance
+    of GPT-4 on writing API calls.
+  created_date: 2023-05-24
+  url: https://arxiv.org/pdf/2305.15334v1.pdf
+  model_card: ''
+  modality: outputs API from natural language input
+  analysis: Evaluated using AST sub-tree matching technique and compared to other
+    models in terms of API functionality accuracy.
+  size: 7B parameters
+  dependencies: [LLaMA, Gorilla document retriever]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: No specific quality control is mentioned in model training, though
+    details on data processing and collection are provided in the paper.
+  access: open
+  license: Apache 2.0
+  intended_uses: In conjunction with a LLM to improve its capability for using API
+    calls.
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+
+- type: model
+  name: OpenLLaMA
+  organization: Berkeley
+  description: OpenLlama is an open source reproduction of Meta's LLaMA model.
+  created_date: 2023-05-03
+  url: https://github.com/openlm-research/open_llama
+  model_card: ''
+  modality: text
+  analysis: Evaluated on wide range of tasks using own evaluation benchmarks.
+  size: 17B parameters
+  dependencies: [LLaMA, RedPajama]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
\ No newline at end of file
diff --git a/assets/bigcode.yaml b/assets/bigcode.yaml
new file mode 100644
index 00000000..8f3ababc
--- /dev/null
+++ b/assets/bigcode.yaml
@@ -0,0 +1,76 @@
+---
+
+- type: model
+  name: StarCoder
+  organization: BigCode
+  description: StarCoder is a Large Language Model for Code (Code LLM) trained on
+    permissively licensed data from GitHub, including from 80+ programming languages,
+    Git commits, GitHub issues, and Jupyter notebooks.
+  created_date: 2023-05-09
+  url: https://arxiv.org/pdf/2305.06161.pdf
+  model_card: https://huggingface.co/bigcode/starcoder
+  modality: code (80+ programming languages)
+  analysis: Tested on several benchmarks, most notably Python benchmark HumanEval.
+  size: 15.5B parameters
+  dependencies: [The Stack]
+  training_emissions: ''
+  training_time: 24 days
+  training_hardware: 512 Tesla A100 GPU
+  quality_control: No specific quality control is mentioned in model training, though
+    details on data processing and how the tokenizer was trained are provided in
+    the paper.
+  access: open
+  license: Apache 2.0
+  intended_uses: With a Tech Assistant prompt and not as an instruction model given
+    training limitations.
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/bigcode/starcoder/discussions
+
+- type: model
+  name: SantaCoder
+  organization: BigCode
+  description: Multilingual code model derived from findings of BigCode Project
+    analysis on Github stars' association to data quality.
+  created_date: 2023-02-24
+  url: https://arxiv.org/pdf/2301.03988.pdf
+  model_card: ''
+  modality: code
+  analysis: Evaluated on MultiPL-E system benchmarks.
+  size: 1.1B parameters
+  dependencies: [The Stack, BigCode Dataset]
+  training_emissions: ''
+  training_time: 3.1 days
+  training_hardware: 96 Tesla V100 GPUs
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+
+- type: dataset
+  name: The Stack
+  organization: BigCode
+  description: The Stack is a 3.1 TB dataset consisting of permissively licensed
+    source code inteded for use in creating code LLMs.
+  created_date: 2022-11-20
+  url: https://arxiv.org/pdf/2211.15533.pdf
+  datasheet: https://huggingface.co/datasets/bigcode/the-stack
+  modality: code (358 programming languages)
+  size: 3.1 TB
+  sample: []
+  analysis: Evaluated models trained on The Stack on HumanEval and MBPP and compared
+    against similarly-sized models.
+  dependencies: [GitHub]
+  included: ''
+  excluded: ''
+  quality_control: allowed users whose data were part of The Stack's training data
+    to opt-out
+  access: open
+  license: Apache 2.0
+  intended_uses: creating code LLMs
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
\ No newline at end of file
diff --git a/assets/camel.yaml b/assets/camel.yaml
deleted file mode 100644
index 3ac188dd..00000000
--- a/assets/camel.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
----
-
-- type: model
-  name: Camel
-  organization: Writer
-  description: Camel is an instruction-following large language model tailored
-    for advanced NLP and comprehension capabilities.
-  created_date:
-    value: 2023-04-01
-    explanation: The model was stated to be published in April, but which day is
-      not specified on the website.
-  url: https://chatcamel.vercel.app/
-  model_card: https://huggingface.co/Writer/camel-5b-hf
-  modality: text
-  analysis: ''
-  size: 5B parameters
-  dependencies: [Palmyra, Camel dataset]
-  training_emissions: unknown
-  training_time: unknown
-  training_hardware: ''
-  quality_control: ''
-  access: open
-  license: Apache 2.0
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: https://huggingface.co/Writer/camel-5b-hf/discussions
diff --git a/assets/bigtrans.yaml b/assets/casia.yaml
similarity index 100%
rename from assets/bigtrans.yaml
rename to assets/casia.yaml
diff --git a/assets/openfold.yaml b/assets/columbia.yaml
similarity index 94%
rename from assets/openfold.yaml
rename to assets/columbia.yaml
index 5723593a..2ca39802 100644
--- a/assets/openfold.yaml
+++ b/assets/columbia.yaml
@@ -2,7 +2,7 @@
 
 - type: model
   name: OpenFold
-  organization: Columbia University
+  organization: Columbia
   description: OpenFold is an open source recreation of AlphaFold2.
   created_date: 2022-11-20
   url: https://www.biorxiv.org/content/10.1101/2022.11.20.517210v2
diff --git a/assets/corgi.yaml b/assets/corgi.yaml
deleted file mode 100644
index 968b1228..00000000
--- a/assets/corgi.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
----
-
-- type: model
-  name: CORGI
-  organization: Stanford
-  description: Model trained to generate language corrections for physical control
-    tasks.
-  created_date: 2023-06-12
-  url: https://arxiv.org/pdf/2306.07012.pdf
-  model_card: ''
-  modality: trajectory input with text output
-  analysis: Evaluated on three physical control tasks, drawing, steering, and human
-    body movement on various dynamics
-  size: 124M parameters
-  dependencies: [GPT-2, BABEL, text-davinci-003]
-  training_emissions: ''
-  training_time: 
-    value: unknown
-    explanation: The authors do not report the training time, but do report that they train for 200 epochs.
-  training_hardware: one NVIDIA A40 GPU
-  quality_control: ''
-  access: open
-  license: MIT
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
diff --git a/assets/cosmo.yaml b/assets/cosmo.yaml
deleted file mode 100644
index 6b5230a4..00000000
--- a/assets/cosmo.yaml
+++ /dev/null
@@ -1,25 +0,0 @@
----
-
-- type: model
-  name: COSMO
-  organization: Allen Institute for AI
-  description: COSMO is a conversation agent with greater generalizability on both
-    in- and out-of-domain chitchat datasets
-  created_date: 2023-05-24
-  url: https://arxiv.org/pdf/2212.10465.pdf
-  model_card: https://huggingface.co/allenai/cosmo-xl
-  modality: text
-  analysis: Evaluated by human testers on generalization capabilities and responses
-    compared to other chatbots.
-  size: 11B parameters
-  dependencies: [SODA, ProsocialDialog, T5]
-  training_emissions: unknown
-  training_time: unknown
-  training_hardware: v3-128 TPU accelerators with batch size 256
-  quality_control: ''
-  access: open
-  license: ''
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: https://huggingface.co/allenai/cosmo-xl/discussions
diff --git a/assets/deepfloyd.yaml b/assets/deepfloyd.yaml
deleted file mode 100644
index b04b90ae..00000000
--- a/assets/deepfloyd.yaml
+++ /dev/null
@@ -1,26 +0,0 @@
----
-
-- type: model
-  name: DeepFloyd IF
-  organization: Stability AI
-  description: A text-to-image cascaded pixel diffusion model released in conjunction
-    with AI research lab DeepFloyd.
-  created_date: 2023-04-28
-  url: https://stability.ai/blog/deepfloyd-if-text-to-image-model
-  model_card: https://huggingface.co/DeepFloyd/IF-I-XL-v1.0
-  modality: textual input with image output
-  analysis: Evaluated on the COCO dataset.
-  size: 4.3B parameters
-  dependencies: [LAION-5B]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: open
-  license:
-    value: custom
-    explanation: License agreement can be found on model card https://huggingface.co/DeepFloyd/IF-I-XL-v1.0
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: https://huggingface.co/DeepFloyd/IF-I-XL-v1.0/discussions
diff --git a/assets/docugami.yaml b/assets/docugami.yaml
deleted file mode 100644
index b1b56bf6..00000000
--- a/assets/docugami.yaml
+++ /dev/null
@@ -1,24 +0,0 @@
----
-
-- type: model
-  name: Docugami
-  organization: Microsoft
-  description: Docugami is a LLM focused on writing business documents and data
-    using generative AI.
-  created_date: 2021-04-12
-  url: https://www.docugami.com/generative-ai
-  model_card: ''
-  modality: text
-  analysis: ''
-  size: 20B parameters
-  dependencies: []
-  training_emissions: unknown
-  training_time: unknown
-  training_hardware: ''
-  quality_control: ''
-  access: limited
-  license: ''
-  intended_uses: analyzing, writing, and connecting business documents and data
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
diff --git a/assets/eleutherai.yaml b/assets/eleutherai.yaml
index c79165bb..945c27eb 100644
--- a/assets/eleutherai.yaml
+++ b/assets/eleutherai.yaml
@@ -267,3 +267,51 @@
   monthly_active_users: unknown
   user_distribution: unknown
   failures: unknown
+
+- type: model
+  name: VQGAN-CLIP
+  organization: EleutherAI
+  description: VQGAN-CLIP is a model that better generates and edits images using
+    a multimodal encoder to guide image generation.
+  created_date: 2022-09-04
+  url: https://arxiv.org/pdf/2204.08583.pdf
+  model_card: ''
+  modality: text input with image output
+  analysis: Evaluated by human testers rating alignment of text input, image output
+    pairs.
+  size: 227M parameters
+  dependencies: [VQGAN, CLIP]
+  training_emissions: unknown
+  training_time: Less than 1 V100-hour
+  training_hardware: 1 NVIDIA Tesla K80 GPU
+  quality_control: ''
+  access: open
+  license: MIT
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+
+- type: model
+  name: Pythia
+  organization: Eleuther AI
+  description: A suite of 16 LLMs all trained on public data seen in the exact same
+    order and ranging in size from 70M to 12B parameters
+  created_date: 2023-05-31
+  url: https://arxiv.org/pdf/2304.01373.pdf
+  model_card: https://huggingface.co/EleutherAI/pythia-12b
+  modality: natural language text
+  analysis: Evaluated on a variety of NLP benchmarks and found to perform similarly
+    to OPT and BLOOM models.
+  size: 12B parameters
+  dependencies: [The Pile]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: 64 A100 GPUs
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/EleutherAI/pythia-6.9b/discussions
\ No newline at end of file
diff --git a/assets/ares.yaml b/assets/faraday.yaml
similarity index 82%
rename from assets/ares.yaml
rename to assets/faraday.yaml
index 3af17a66..7c986cb4 100644
--- a/assets/ares.yaml
+++ b/assets/faraday.yaml
@@ -3,7 +3,9 @@
 - type: application
   name: ARES
   organization: Faraday Lab
-  description: ARES is a text-to-image generator based on Stable Diffusion. The goal is to provide a simple tool with a user interface allowing mainstream AI access for artists and creators.
+  description: ARES is a text-to-image generator based on Stable Diffusion. The
+    goal is to provide a simple tool with a user interface allowing mainstream AI
+    access for artists and creators.
   created_date:
     value: 2023-04-26
     explanation: The date Faraday Lab's hugging face card was last updated. Date
diff --git a/assets/google.yaml b/assets/google.yaml
index 09556eee..4103f04b 100644
--- a/assets/google.yaml
+++ b/assets/google.yaml
@@ -1904,3 +1904,56 @@
   prohibited_uses: ''
   monitoring: ''
   feedback: ''
+
+- type: model
+  name: Google Joint SLM
+  organization: Google
+  description: Joint speech and language model using a Speech2Text adapter and using
+    a CTC-based blank-filtering.
+  created_date: 2023-06-08
+  url: https://arxiv.org/pdf/2306.07944.pdf
+  model_card: ''
+  modality: speech input with textual output
+  analysis: evaluated on DSTC11 Challenge Task, based on MultiWoz 2.1, with a focus
+    on dialog state tracking.
+  size: ''
+  dependencies: [CTC blank-filtering, Speech2Text adapter]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license: Google
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+
+- type: model
+  name: PaLM 2
+  organization: Google
+  description: PaLM2 is a new state-of-the-art language model that has better multilingual
+    and reasoning capabilities and is more compute-efficient than its predecessor
+    PaLM. PaLM 2 is a Transformer-based model trained using a mixture of objectives
+    similar to UL2.
+  created_date: 2023-05-10
+  url: https://blog.google/technology/ai/google-palm-2-ai-large-language-model/
+  model_card: https://ai.google/static/documents/palm2techreport.pdf
+  modality: text (input and output)
+  analysis: Reports results on standard code benchmarks across a variety of programming
+    languages.
+  size: 3.6T parameters
+  dependencies: [palm 2 dataset]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: TPU v4 (number unspecified)
+  quality_control: Employed de-duplication, removal of sensitive-PII and filtering.
+    Added control tokens marking toxicity of text.
+  access: open
+  license: Google
+  intended_uses: general use large language model that can be used for language,
+    reasoning, and code tasks.
+  prohibited_uses: becoming part of a general-purpose service or product or use
+    within specific downstream applications without prior assessment
+  monitoring: Google internal monitoring
+  feedback: Specific queries provided by annotators
diff --git a/assets/gorilla.yaml b/assets/gorilla.yaml
deleted file mode 100644
index d6c440d4..00000000
--- a/assets/gorilla.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
----
-
-- type: model
-  name: Gorilla
-  organization: UC Berkeley
-  description: Gorilla is a finetuned LLaMA-based model that surpasses the performance
-    of GPT-4 on writing API calls.
-  created_date: 2023-05-24
-  url: https://arxiv.org/pdf/2305.15334v1.pdf
-  model_card: ''
-  modality: outputs API from natural language input
-  analysis: Evaluated using AST sub-tree matching technique and compared to other
-    models in terms of API functionality accuracy.
-  size: 7B parameters
-  dependencies: [LLaMA, Gorilla document retriever]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: No specific quality control is mentioned in model training, though
-    details on data processing and collection are provided in the paper.
-  access: open
-  license: Apache 2.0
-  intended_uses: In conjunction with a LLM to improve its capability for using API
-    calls.
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
diff --git a/assets/h2ogpt.yaml b/assets/h2oai.yaml
similarity index 100%
rename from assets/h2ogpt.yaml
rename to assets/h2oai.yaml
diff --git a/assets/inflection-1.yaml b/assets/inflection-1.yaml
deleted file mode 100644
index 1af3f04e..00000000
--- a/assets/inflection-1.yaml
+++ /dev/null
@@ -1,25 +0,0 @@
----
-
-- type: model
-  name: Inflection-1
-  organization: Inflection AI
-  description: Inflection AI's in-house LLM, which powers Pi and will soon be available via Inflection AI's conversational API.
-  created_date: 2023-06-22
-  url: https://inflection.ai/inflection-1
-  model_card: ''
-  modality: text
-  analysis: Evaluated on wide range of language benchmarks like MMLU 5-shot, GSM-8K, and HellaSwag 10-shot among others.
-  size:
-    value: unknown
-    explanation: Designed to be roughly around Chat-GPT's size (175B parameters).
-  dependencies: []
-  training_emissions: ''
-  training_time: ''
-  training_hardware: unknown
-  quality_control: ''
-  access: limited
-  license: unknown
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
diff --git a/assets/inflection.yaml b/assets/inflection.yaml
new file mode 100644
index 00000000..9d000476
--- /dev/null
+++ b/assets/inflection.yaml
@@ -0,0 +1,49 @@
+---
+
+- type: model
+  name: Inflection-1
+  organization: Inflection AI
+  description: Inflection AI's in-house LLM, which powers Pi and will soon be available
+    via Inflection AI's conversational API.
+  created_date: 2023-06-22
+  url: https://inflection.ai/inflection-1
+  model_card: ''
+  modality: text
+  analysis: Evaluated on wide range of language benchmarks like MMLU 5-shot, GSM-8K,
+    and HellaSwag 10-shot among others.
+  size:
+    value: unknown
+    explanation: Designed to be roughly around Chat-GPT's size (175B parameters).
+  dependencies: []
+  training_emissions: ''
+  training_time: ''
+  training_hardware: unknown
+  quality_control: ''
+  access: limited
+  license: unknown
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+
+- type: application
+  name: Pi
+  organization: Inflection AI
+  description: Personal AI chatbot designed to be conversational and specialized
+    in emotional intelligence.
+  created_date: 2023-05-02
+  url: https://inflection.ai/press
+  dependencies: [Inflection-1]
+  adaptation: ''
+  output_space: natural language text responses
+  quality_control: ''
+  access: limited
+  license: unknown
+  terms_of_service: ''
+  intended_uses: to be used as a personal assistant chatbot for everyday activities
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
+  user_distribution: ''
+  failures: ''
\ No newline at end of file
diff --git a/assets/jointslm.yaml b/assets/jointslm.yaml
deleted file mode 100644
index 0465fc82..00000000
--- a/assets/jointslm.yaml
+++ /dev/null
@@ -1,25 +0,0 @@
----
-
-- type: model
-  name: Google Joint SLM
-  organization: Google
-  description: Joint speech and language model using a Speech2Text adapter and using
-    a CTC-based blank-filtering.
-  created_date: 2023-06-08
-  url: https://arxiv.org/pdf/2306.07944.pdf
-  model_card: ''
-  modality: speech input with textual output
-  analysis: evaluated on DSTC11 Challenge Task, based on MultiWoz 2.1, with a focus
-    on dialog state tracking.
-  size: ''
-  dependencies: [CTC blank-filtering, Speech2Text adapter]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: open
-  license: Google
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
diff --git a/assets/koala.yaml b/assets/koala.yaml
deleted file mode 100644
index 03b456c7..00000000
--- a/assets/koala.yaml
+++ /dev/null
@@ -1,24 +0,0 @@
----
-
-- type: model
-  name: Koala
-  organization: Berkeley
-  description: A relatively small chatbot trained by fine-tuning Meta’s LLaMA on
-    dialogue data gathered from the web.
-  created_date: 2023-04-03
-  url: https://bair.berkeley.edu/blog/2023/04/03/koala/
-  model_card: https://huggingface.co/TheBloke/koala-7B-GPTQ-4bit-128g
-  modality: natural language text
-  analysis: Evaluated in comparison with ChatGPT and Stanford Alpaca.
-  size: 13B parameters
-  dependencies: [LLaMA, web-scraped dialogue data]
-  training_emissions: ''
-  training_time: 6 hours
-  training_hardware: 8 A100 GPUs
-  quality_control: ''
-  access: open
-  license: Apache 2.0
-  intended_uses: academic research
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: https://huggingface.co/TheBloke/koala-7B-GPTQ-4bit-128g/discussions
diff --git a/assets/laion.yaml b/assets/laion.yaml
index 9d026ebc..6a62325e 100644
--- a/assets/laion.yaml
+++ b/assets/laion.yaml
@@ -154,3 +154,27 @@
     from using LAION-2B-en for non-research purposes.
   monitoring: ''
   feedback: ''
+
+- type: model
+  name: OpenFlamingo
+  organization: LAION
+  description: An open-source reproduction of DeepMind's Flamingo model. At its
+    core, OpenFlamingo is a framework that enables training and evaluation of large
+    multimodal models (LMMs).
+  created_date: 2023-03-28
+  url: https://laion.ai/blog/open-flamingo/
+  model_card: https://github.com/mlfoundations/open_flamingo/blob/main/MODEL_CARD.md
+  modality: image and text input with textual output
+  analysis: Evaluated on COCO captioning and VQAv2 vision-language tasks.
+  size: 9B parameters
+  dependencies: [LLaMA, CLIP]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license: MIT
+  intended_uses: academic research purposes
+  prohibited_uses: commercial use
+  monitoring: ''
+  feedback: ''
diff --git a/assets/legomt.yaml b/assets/legomt.yaml
deleted file mode 100644
index fb7a3353..00000000
--- a/assets/legomt.yaml
+++ /dev/null
@@ -1,24 +0,0 @@
----
-
-- type: model
-  name: Lego-MT
-  organization: Shanghai AI Laboratory
-  description: Lego-MT is a multilingual large language model which uses a more
-    efficient approach of being an effective detachable model.
-  created_date: 2023-05-29
-  url: https://arxiv.org/pdf/2212.10551.pdf
-  model_card: ''
-  modality: multilingual text
-  analysis: Evaluated based on own constructed dataset covering 433 languages.
-  size: 1.2B parameters
-  dependencies: [OPUS]
-  training_emissions: unknown
-  training_time: 15 days
-  training_hardware: 32 A100 GPUs
-  quality_control: ''
-  access: open
-  license: ''
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
diff --git a/assets/biomedgpt.yaml b/assets/lehigh.yaml
similarity index 100%
rename from assets/biomedgpt.yaml
rename to assets/lehigh.yaml
diff --git a/assets/vicuna.yaml b/assets/lmsys.yaml
similarity index 100%
rename from assets/vicuna.yaml
rename to assets/lmsys.yaml
diff --git a/assets/magma.yaml b/assets/magma.yaml
deleted file mode 100644
index 7b290942..00000000
--- a/assets/magma.yaml
+++ /dev/null
@@ -1,24 +0,0 @@
----
-
-- type: model
-  name: MAGMA
-  organization: Aleph Alpha
-  description: An autoregressive VL model that is able to generate text from an
-    arbitrary combination of visual and textual input
-  created_date: 2022-10-24
-  url: https://arxiv.org/pdf/2112.05253.pdf
-  model_card: ''
-  modality: image and text input with natural language text output
-  analysis: Evaluated on the OKVQA benchmark as a fully open-ended generative task.
-  size: 6B parameters
-  dependencies: [GPT-J, CLIP]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: 32 A100 GPUs
-  quality_control: ''
-  access: open
-  license: MIT
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
diff --git a/assets/meta.yaml b/assets/meta.yaml
index 0fdf0efa..a847abd6 100644
--- a/assets/meta.yaml
+++ b/assets/meta.yaml
@@ -608,3 +608,54 @@
   monitoring: ''
   feedback: Feedback can be given via the feedback form on their website [segment-anything.com](https://segment-anything.com/)
     or by emailing at segment-anything at meta.com.
+
+- type: model
+  name: Voicebox
+  organization: Meta
+  description: Voicebox is the first generative AI model for speech to generalize
+    across tasks with state-of-the-art performance.
+  created_date: 2023-06-16
+  url: https://research.facebook.com/publications/voicebox-text-guided-multilingual-universal-speech-generation-at-scale/
+  model_card: ''
+  modality: text and audio input and audio output
+  analysis: Evaluated on zero-shot text-to-speech benchmarks, with Voicebox outperforming
+    the current state-of-the-art English model VALL-E.
+  size: 330M parameters
+  dependencies: []
+  training_emissions: unknown
+  training_time: 750,000 iterations
+  training_hardware: 32 GPUs of unspecified type
+  quality_control: ''
+  access: closed
+  license: ''
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+
+- type: model
+  name: PEER
+  organization: Meta
+  description: PEER is a collaborative language model that is trained to imitate
+    the entire writing process itself. PEER can write drafts, add suggestions, propose
+    edits and provide explanations for its actions.
+  created_date: 2022-08-24
+  url: https://arxiv.org/pdf/2208.11663.pdf
+  model_card: ''
+  modality: natural language text
+  analysis: PEER is evaluated on core research questions intended to gauge language
+    understanding, proper use of citations, instruction following, and iterative
+    use.
+  size: 3B parameters
+  dependencies: []
+  training_emissions: ''
+  training_time: ''
+  training_hardware: 64 GPUs
+  quality_control: Heuristics and edit filtering was used on data set, which consisted
+    mostly of Wikipedia pages.
+  access: open
+  license: ''
+  intended_uses: adapting LLMs to work with collaborative writing and updating.
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/microsoft.yaml b/assets/microsoft.yaml
index 298c806e..8d324607 100644
--- a/assets/microsoft.yaml
+++ b/assets/microsoft.yaml
@@ -779,3 +779,100 @@
   monthly_active_users: unknown
   user_distribution: unknown
   failures: unknown
+
+- type: model
+  name: UniLM
+  organization: Microsoft
+  description: UniLM is a unified language model that can be fine-tuned for both
+    natural language understanding and generation tasks.
+  created_date: 2019-10-01
+  url: https://proceedings.neurips.cc/paper_files/paper/2019/file/c20bb2d9a50d5ac1f713f8b34d9aac5a-Paper.pdf
+  model_card: ''
+  modality: text
+  analysis: Evaluated on GLUE, SQuAD 2.0, and CoQA benchmarks.
+  size: 340M parameters
+  dependencies: []
+  training_emissions: unknown
+  training_time: 10,000 steps in 7 hours
+  training_hardware: 8 NVIDIA Tesla V100 32GB GPUs
+  quality_control: ''
+  access: open
+  license: MIT
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+
+- type: model
+  name: Docugami
+  organization: Microsoft
+  description: Docugami is a LLM focused on writing business documents and data
+    using generative AI.
+  created_date: 2021-04-12
+  url: https://www.docugami.com/generative-ai
+  model_card: ''
+  modality: text
+  analysis: ''
+  size: 20B parameters
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: ''
+  quality_control: ''
+  access: limited
+  license: ''
+  intended_uses: analyzing, writing, and connecting business documents and data
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+
+- type: model
+  name: BEiT-3
+  organization: Microsoft
+  description: BEiT-3 is a general-purpose multimodal foundation model for vision
+    and vision-language tasks.
+  created_date: 2022-08-31
+  url: https://arxiv.org/pdf/2208.10442.pdf
+  model_card: ''
+  modality: text and image input and output
+  analysis: Evaluated on a range of standardized vision benchmarks, and achieves
+    state of the art performance on all experimentally.
+  size: 1.9B parameters
+  dependencies: [Multiway Transformer network]
+  training_emissions: unknown
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license: ''
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+
+- type: model
+  name: WizardLM
+  organization: Microsoft
+  description: Starting with an initial set of instructions, we use our proposed
+    Evol-Instruct to rewrite them step by step into more complex instructions. Then,
+    we mix all generated instruction data to fine-tune LLaMA. We call the resulting
+    model WizardLM.
+  created_date: 2023-04-24
+  url: https://arxiv.org/pdf/2304.12244v1.pdf
+  model_card: https://huggingface.co/WizardLM/WizardLM-13B-1.0
+  modality: natural language text
+  analysis: Reports results on standard LLM benchmarks in comparison to other LLMs
+    and test sets.
+  size: 7B parameters
+  dependencies: [LLaMA, Evol-Instruct, Alpaca dataset]
+  training_emissions: ''
+  training_time: 70 hours on 3 epochs
+  training_hardware: 8 V100 GPUs
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: Creating large amounts of instruction data, particularly with high
+    complexity
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/datasets/WizardLM/evol_instruct_70k/discussions
diff --git a/assets/mpt.yaml b/assets/mosaic.yaml
similarity index 100%
rename from assets/mpt.yaml
rename to assets/mosaic.yaml
diff --git a/assets/multimodal.yaml b/assets/multimodal.yaml
deleted file mode 100644
index 4f7727cc..00000000
--- a/assets/multimodal.yaml
+++ /dev/null
@@ -1,24 +0,0 @@
----
-
-- type: dataset
-  name: Multimodal C4
-  organization: Allen Institute for AI
-  description: An augmentation of C4 with images added and made openly available.
-  created_date: 2023-06-09
-  url: https://arxiv.org/pdf/2304.06939.pdf
-  datasheet: ''
-  modality: text and images
-  size: 43B English tokens with 101.2M documents and 571M images
-  sample: []
-  analysis: Conducted experiments on models trained with Multimodal C4 in comparison
-    to models trained on single image/caption datasets
-  dependencies: [C4]
-  included: ''
-  excluded: ''
-  quality_control: ''
-  access: open
-  license: MIT
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
diff --git a/assets/otter.yaml b/assets/nanyang.yaml
similarity index 100%
rename from assets/otter.yaml
rename to assets/nanyang.yaml
diff --git a/assets/cformers.yaml b/assets/nolano.yaml
similarity index 100%
rename from assets/cformers.yaml
rename to assets/nolano.yaml
diff --git a/assets/openflamingo.yaml b/assets/openflamingo.yaml
deleted file mode 100644
index 1a79d3db..00000000
--- a/assets/openflamingo.yaml
+++ /dev/null
@@ -1,25 +0,0 @@
----
-
-- type: model
-  name: OpenFlamingo
-  organization: LAION
-  description: An open-source reproduction of DeepMind's Flamingo model. At its
-    core, OpenFlamingo is a framework that enables training and evaluation of large
-    multimodal models (LMMs).
-  created_date: 2023-03-28
-  url: https://laion.ai/blog/open-flamingo/
-  model_card: https://github.com/mlfoundations/open_flamingo/blob/main/MODEL_CARD.md
-  modality: image and text input with textual output
-  analysis: Evaluated on COCO captioning and VQAv2 vision-language tasks.
-  size: 9B parameters
-  dependencies: [LLaMA, CLIP]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: open
-  license: MIT
-  intended_uses: academic research purposes
-  prohibited_uses: commercial use
-  monitoring: ''
-  feedback: ''
diff --git a/assets/openllama.yaml b/assets/openllama.yaml
deleted file mode 100644
index a43c19cb..00000000
--- a/assets/openllama.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
----
-
-- type: model
-  name: OpenLLaMA
-  organization: Berkeley AI Research
-  description: OpenLlama is a open source reproduction of Meta's LLaMA model.
-  created_date: 2023-05-03
-  url: https://github.com/openlm-research/open_llama
-  model_card: ''
-  modality: text
-  analysis: Evaluated on wide range of tasks using own evaluation benchmarks.
-  size: 17B parameters
-  dependencies: [LLaMA, RedPajama]
-  training_emissions: unknown
-  training_time: unknown
-  training_hardware: ''
-  quality_control: ''
-  access: open
-  license: Apache 2.0
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
diff --git a/assets/underwritegpt.yaml b/assets/paladin.yaml
similarity index 100%
rename from assets/underwritegpt.yaml
rename to assets/paladin.yaml
diff --git a/assets/palm2.yaml b/assets/palm2.yaml
deleted file mode 100644
index 0791eb27..00000000
--- a/assets/palm2.yaml
+++ /dev/null
@@ -1,30 +0,0 @@
----
-
-- type: model
-  name: PaLM 2
-  organization: Google
-  description: PaLM2 is a new state-of-the-art language model that has better multilingual
-    and reasoning capabilities and is more compute-efficient than its predecessor
-    PaLM. PaLM 2 is a Transformer-based model trained using a mixture of objectives
-    similar to UL2.
-  created_date: 2023-05-10
-  url: https://blog.google/technology/ai/google-palm-2-ai-large-language-model/
-  model_card: https://ai.google/static/documents/palm2techreport.pdf
-  modality: text (input and output)
-  analysis: Reports results on standard code benchmarks across a variety of programming
-    languages.
-  size: 3.6T parameters
-  dependencies: [palm 2 dataset]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: TPU v4 (number unspecified)
-  quality_control: Employed de-duplication, removal of sensitive-PII and filtering.
-    Added control tokens marking toxicity of text.
-  access: open
-  license: Google
-  intended_uses: general use large language model that can be used for language,
-    reasoning, and code tasks.
-  prohibited_uses: becoming part of a general-purpose service or product or use
-    within specific downstream applications without prior assessment
-  monitoring: Google internal monitoring
-  feedback: Specific queries provided by annotators
diff --git a/assets/palmyra.yaml b/assets/palmyra.yaml
deleted file mode 100644
index 5dd78f33..00000000
--- a/assets/palmyra.yaml
+++ /dev/null
@@ -1,26 +0,0 @@
----
-
-- type: model
-  name: Palmyra
-  organization: Writer
-  description: Palmyra is a privacy-first LLM for enterprises trained on business and marketing writing.
-  created_date:
-    value: 2023-01-01
-    explanation: The model was stated to be published in January, but which day is
-      not specified on the website.
-  url: https://gpt3demo.com/apps/palmyra
-  model_card: https://huggingface.co/Writer/palmyra-base
-  modality: text
-  analysis: Evaluated on the SuperGLUE benchmark
-  size: 128M (Small), 5B (Base), 20B (Large) parameters
-  dependencies: [Writer dataset]
-  training_emissions: unknown
-  training_time: unknown
-  training_hardware: ''
-  quality_control: ''
-  access: open
-  license: Apache 2.0
-  intended_uses: generating text from a prompt
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: https://huggingface.co/Writer/palmyra-base/discussions
diff --git a/assets/peer.yaml b/assets/peer.yaml
deleted file mode 100644
index 12ce8a96..00000000
--- a/assets/peer.yaml
+++ /dev/null
@@ -1,28 +0,0 @@
----
-
-- type: model
-  name: PEER
-  organization: Meta
-  description: PEER is a collaborative language model that is trained to imitate
-    the entire writing process itself. PEER can write drafts, add suggestions, propose
-    edits and provide explanations for its actions.
-  created_date: 2022-08-24
-  url: https://arxiv.org/pdf/2208.11663.pdf
-  model_card: ''
-  modality: natural language text
-  analysis: PEER is evaluated on core research questions intended to gauge language
-    understanding, proper use of citations, instruction following, and iterative
-    use.
-  size: 3B parameters
-  dependencies: []
-  training_emissions: ''
-  training_time: ''
-  training_hardware: 64 GPUs
-  quality_control: Heuristics and edit filtering was used on data set, which consisted
-    mostly of Wikipedia pages.
-  access: open
-  license: ''
-  intended_uses: adapting LLMs to work with collaborative writing and updating.
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
diff --git a/assets/pi.yaml b/assets/pi.yaml
deleted file mode 100644
index 3c35b7b4..00000000
--- a/assets/pi.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
----
-
-- type: application
-  name: Pi
-  organization: Inflection AI
-  description: Personal AI chatbot designed to be conversational and specialized
-    in emotional intelligence.
-  created_date: 2023-05-02
-  url: https://inflection.ai/press
-  dependencies: [Inflection-1]
-  adaptation: ''
-  output_space: natural language text responses
-  quality_control: ''
-  access: limited
-  license: unknown
-  terms_of_service: ''
-  intended_uses: to be used as a personal assistant chatbot for everyday activities
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
-  monthly_active_users: ''
-  user_distribution: ''
-  failures: ''
diff --git a/assets/pythia.yaml b/assets/pythia.yaml
deleted file mode 100644
index 6421f741..00000000
--- a/assets/pythia.yaml
+++ /dev/null
@@ -1,25 +0,0 @@
----
-
-- type: model
-  name: Pythia
-  organization: Eleuther AI
-  description: A suite of 16 LLMs all trained on public data seen in the exact same
-    order and ranging in size from 70M to 12B parameters
-  created_date: 2023-05-31
-  url: https://arxiv.org/pdf/2304.01373.pdf
-  model_card: https://huggingface.co/EleutherAI/pythia-12b
-  modality: natural language text
-  analysis: Evaluated on a variety of NLP benchmarks and found to perform similarly
-    to OPT and BLOOM models.
-  size: 12B parameters
-  dependencies: [The Pile]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: 64 A100 GPUs
-  quality_control: ''
-  access: open
-  license: Apache 2.0
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: https://huggingface.co/EleutherAI/pythia-6.9b/discussions
diff --git a/assets/redpajama.yaml b/assets/redpajama.yaml
deleted file mode 100644
index 0f1a51ba..00000000
--- a/assets/redpajama.yaml
+++ /dev/null
@@ -1,24 +0,0 @@
----
-
-- type: dataset
-  name: RedPajama-Data
-  organization: Together
-  description: The RedPajama base dataset is a 1.2 trillion token fully-open dataset
-    created by following the recipe described in the LLaMA paper
-  created_date: 2022-04-17
-  url: https://www.together.xyz/blog/redpajama
-  datasheet: https://huggingface.co/datasets/togethercomputer/RedPajama-Data-1T
-  modality: text
-  size: 1.2 trillion tokens
-  sample: []
-  analysis: ''
-  dependencies: [LLaMA dataset, GitHub, Wikipedia]
-  included: ''
-  excluded: ''
-  quality_control: ''
-  access: open
-  license: Apache 2.0
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
diff --git a/assets/refinedweb.yaml b/assets/refinedweb.yaml
deleted file mode 100644
index 89e584d2..00000000
--- a/assets/refinedweb.yaml
+++ /dev/null
@@ -1,25 +0,0 @@
----
-
-- type: dataset
-  name: RefinedWeb
-  organization: UAE Technology Innovation Institute
-  description: RefinedWeb is a high-quality five trillion tokens web-only English pretraining dataset.
-  created_date: 2023-06-01
-  url: https://arxiv.org/pdf/2306.01116.pdf
-  datasheet: https://huggingface.co/datasets/tiiuae/falcon-refinedweb
-  modality: text
-  size: 600B tokens
-  sample: []
-  analysis: ''
-  dependencies: []
-  included: ''
-  excluded: ''
-  quality_control: ''
-  access: open
-  license:
-    value: unknown
-    explanation: License can be found at https://huggingface.co/datasets/tiiuae/falcon-refinedweb
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
diff --git a/assets/roentgen.yaml b/assets/roentgen.yaml
deleted file mode 100644
index 31450cd2..00000000
--- a/assets/roentgen.yaml
+++ /dev/null
@@ -1,25 +0,0 @@
----
-
-- type: model
-  name: RoentGen
-  organization: Stanford University AIMI
-  description: RoentGen is a generative medical imaging model that can create visually
-    convincing X-ray images.
-  created_date: 2022-11-23
-  url: https://arxiv.org/pdf/2211.12737.pdf
-  model_card: ''
-  modality: text input and image output
-  analysis: Evaluated on own framework that tests domain-specific tasks in medical
-    field.
-  size: 330M parameters
-  dependencies: [Stable Diffusion, RoentGen radiology dataset]
-  training_emissions: unknown
-  training_time: 60k training steps per day
-  training_hardware: 64 A100 GPUs
-  quality_control: ''
-  access: open
-  license: ''
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
diff --git a/assets/santacoder.yaml b/assets/santacoder.yaml
deleted file mode 100644
index ddaf3596..00000000
--- a/assets/santacoder.yaml
+++ /dev/null
@@ -1,24 +0,0 @@
----
-
-- type: model
-  name: SantaCoder
-  organization: BigCode
-  description: Multilingual code model derived from findings of BigCode Project
-    analysis on Github stars' association to data quality.
-  created_date: 2023-02-24
-  url: https://arxiv.org/pdf/2301.03988.pdf
-  model_card: ''
-  modality: code
-  analysis: Evaluated on MultiPL-E system benchmarks.
-  size: 1.1B parameters
-  dependencies: [The Stack, BigCode Dataset]
-  training_emissions: ''
-  training_time: 3.1 days
-  training_hardware: 96 Tesla V100 GPUs
-  quality_control: ''
-  access: open
-  license: Apache 2.0
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
diff --git a/assets/shanghai.yaml b/assets/shanghai.yaml
index b4a206a7..8dae5081 100644
--- a/assets/shanghai.yaml
+++ b/assets/shanghai.yaml
@@ -28,3 +28,26 @@
   prohibited_uses: ''
   monitoring: ''
   feedback: ''
+
+- type: model
+  name: Lego-MT
+  organization: Shanghai AI Laboratory
+  description: Lego-MT is a multilingual large language model which uses a more
+    efficient approach of being an effective detachable model.
+  created_date: 2023-05-29
+  url: https://arxiv.org/pdf/2212.10551.pdf
+  model_card: ''
+  modality: multilingual text
+  analysis: Evaluated based on own constructed dataset covering 433 languages.
+  size: 1.2B parameters
+  dependencies: [OPUS]
+  training_emissions: unknown
+  training_time: 15 days
+  training_hardware: 32 A100 GPUs
+  quality_control: ''
+  access: open
+  license: ''
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/goat.yaml b/assets/singapore.yaml
similarity index 100%
rename from assets/goat.yaml
rename to assets/singapore.yaml
diff --git a/assets/soda.yaml b/assets/soda.yaml
deleted file mode 100644
index dd5991d7..00000000
--- a/assets/soda.yaml
+++ /dev/null
@@ -1,26 +0,0 @@
----
-
-- type: dataset
-  name: SODA
-  organization: Allen Institute of AI
-  description: SODA is the first publicly available, million-scale, high-quality
-    dialogue dataset covering a wide range of social interactions.
-  created_date: 2023-05-24
-  url: https://arxiv.org/pdf/2212.10465.pdf
-  datasheet: https://huggingface.co/datasets/allenai/soda
-  modality: text
-  size: 1.5M dialogues
-  sample: []
-  analysis: Randomly sampled dialogues from dataset are evaluated according to six
-    established criteria of natural flow, context dependence, topic consistency,
-    speaker consistency, specificity, and overall.
-  dependencies: []
-  included: ''
-  excluded: ''
-  quality_control: ''
-  access: open
-  license: CC BY 4.0
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
diff --git a/assets/stability.yaml b/assets/stability.yaml
index fc209b52..da5b51be 100644
--- a/assets/stability.yaml
+++ b/assets/stability.yaml
@@ -29,3 +29,56 @@
   prohibited_uses: ''
   monitoring: ''
   feedback: ''
+
+- type: model
+  name: DeepFloyd IF
+  organization: Stability AI
+  description: A text-to-image cascaded pixel diffusion model released in conjunction
+    with AI research lab DeepFloyd.
+  created_date: 2023-04-28
+  url: https://stability.ai/blog/deepfloyd-if-text-to-image-model
+  model_card: https://huggingface.co/DeepFloyd/IF-I-XL-v1.0
+  modality: textual input with image output
+  analysis: Evaluated on the COCO dataset.
+  size: 4.3B parameters
+  dependencies: [LAION-5B]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license:
+    value: custom
+    explanation: License agreement can be found on model card https://huggingface.co/DeepFloyd/IF-I-XL-v1.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/DeepFloyd/IF-I-XL-v1.0/discussions
+
+- type: model
+  name: StableLM
+  organization: Stability AI
+  description: Large language models trained on up to 1.5 trillion tokens.
+  created_date: 2023-04-20
+  url: https://github.com/Stability-AI/StableLM
+  model_card: ''
+  modality: natural language text
+  analysis: ''
+  size: 7B parameters
+  dependencies:
+    - StableLM-Alpha dataset
+    - Alpaca dataset
+    - gpt4all dataset
+    - ShareGPT52K dataset
+    - Dolly dataset
+    - HH dataset
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/stablelm.yaml b/assets/stablelm.yaml
deleted file mode 100644
index d92fdf8c..00000000
--- a/assets/stablelm.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
----
-
-- type: model
-  name: StableLM
-  organization: Stability AI
-  description: Large language models trained on up to 1.5 trillion tokens.
-  created_date: 2023-04-20
-  url: https://github.com/Stability-AI/StableLM
-  model_card: ''
-  modality: natural language text
-  analysis: ''
-  size: 7B parameters
-  dependencies: [StableLM-Alpha dataset, Alpaca dataset, gpt4all dataset, ShareGPT52K dataset, Dolly dataset, HH dataset]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: open
-  license: Apache 2.0
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
diff --git a/assets/stack.yaml b/assets/stack.yaml
deleted file mode 100644
index 416829f6..00000000
--- a/assets/stack.yaml
+++ /dev/null
@@ -1,26 +0,0 @@
----
-
-- type: dataset
-  name: The Stack
-  organization: BigCode
-  description: The Stack is a 3.1 TB dataset consisting of permissively licensed
-    source code inteded for use in creating code LLMs.
-  created_date: 2022-11-20
-  url: https://arxiv.org/pdf/2211.15533.pdf
-  datasheet: https://huggingface.co/datasets/bigcode/the-stack
-  modality: code (358 programming languages)
-  size: 3.1 TB
-  sample: []
-  analysis: Evaluated models trained on The Stack on HumanEval and MBPP and compared
-    against similarly-sized models.
-  dependencies: [GitHub]
-  included: ''
-  excluded: ''
-  quality_control: allowed users whose data were part of The Stack's training data
-    to opt-out
-  access: open
-  license: Apache 2.0
-  intended_uses: creating code LLMs
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
diff --git a/assets/stanford.yaml b/assets/stanford.yaml
index 88454882..01d082d5 100644
--- a/assets/stanford.yaml
+++ b/assets/stanford.yaml
@@ -21,3 +21,53 @@
   prohibited_uses: ''
   monitoring: ''
   feedback: ''
+
+- type: model
+  name: RoentGen
+  organization: Stanford
+  description: RoentGen is a generative medical imaging model that can create visually
+    convincing X-ray images.
+  created_date: 2022-11-23
+  url: https://arxiv.org/pdf/2211.12737.pdf
+  model_card: ''
+  modality: text input and image output
+  analysis: Evaluated on own framework that tests domain-specific tasks in medical
+    field.
+  size: 330M parameters
+  dependencies: [Stable Diffusion, RoentGen radiology dataset]
+  training_emissions: unknown
+  training_time: 60k training steps per day
+  training_hardware: 64 A100 GPUs
+  quality_control: ''
+  access: open
+  license: ''
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+
+- type: model
+  name: CORGI
+  organization: Stanford
+  description: Model trained to generate language corrections for physical control
+    tasks.
+  created_date: 2023-06-12
+  url: https://arxiv.org/pdf/2306.07012.pdf
+  model_card: ''
+  modality: trajectory input with text output
+  analysis: Evaluated on three physical control tasks, drawing, steering, and human
+    body movement on various dynamics
+  size: 124M parameters
+  dependencies: [GPT-2, BABEL, text-davinci-003]
+  training_emissions: ''
+  training_time: 
+    value: unknown
+    explanation: The authors do not report the training time, but do report that they train for 200 epochs.
+  training_hardware: one NVIDIA A40 GPU
+  quality_control: ''
+  access: open
+  license: MIT
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/starcoder.yaml b/assets/starcoder.yaml
deleted file mode 100644
index 55f2dc24..00000000
--- a/assets/starcoder.yaml
+++ /dev/null
@@ -1,28 +0,0 @@
----
-
-- type: model
-  name: StarCoder
-  organization: BigCode
-  description: StarCoder is a Large Language Model for Code (Code LLM) trained on
-    permissively licensed data from GitHub, including from 80+ programming languages,
-    Git commits, GitHub issues, and Jupyter notebooks.
-  created_date: 2023-05-09
-  url: https://arxiv.org/pdf/2305.06161.pdf
-  model_card: https://huggingface.co/bigcode/starcoder
-  modality: code (80+ programming languages)
-  analysis: Tested on several benchmarks, most notably Python benchmark HumanEval.
-  size: 15.5B parameters
-  dependencies: [The Stack]
-  training_emissions: ''
-  training_time: 24 days
-  training_hardware: 512 Tesla A100 GPU
-  quality_control: No specific quality control is mentioned in model training, though
-    details on data processing and how the tokenizer was trained are provided in
-    the paper.
-  access: open
-  license: Apache 2.0
-  intended_uses: With a Tech Assistant prompt and not as an instruction model given
-    training limitations.
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: https://huggingface.co/bigcode/starcoder/discussions
diff --git a/assets/bark.yaml b/assets/suno.yaml
similarity index 100%
rename from assets/bark.yaml
rename to assets/suno.yaml
diff --git a/assets/together.yaml b/assets/together.yaml
index 191dd5fc..2eb881ac 100644
--- a/assets/together.yaml
+++ b/assets/together.yaml
@@ -109,3 +109,26 @@
   prohibited_uses: ''
   monitoring: ''
   feedback: ''
+
+- type: dataset
+  name: RedPajama-Data
+  organization: Together
+  description: The RedPajama base dataset is a 1.2 trillion token fully-open dataset
+    created by following the recipe described in the LLaMA paper
+  created_date: 2022-04-17
+  url: https://www.together.xyz/blog/redpajama
+  datasheet: https://huggingface.co/datasets/togethercomputer/RedPajama-Data-1T
+  modality: text
+  size: 1.2 trillion tokens
+  sample: []
+  analysis: ''
+  dependencies: [LLaMA dataset, GitHub, Wikipedia]
+  included: ''
+  excluded: ''
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/falcon.yaml b/assets/uae.yaml
similarity index 55%
rename from assets/falcon.yaml
rename to assets/uae.yaml
index b1fede02..f2295fc6 100644
--- a/assets/falcon.yaml
+++ b/assets/uae.yaml
@@ -23,3 +23,28 @@
     assessment of risks and mitigation
   monitoring: None
   feedback: https://huggingface.co/tiiuae/falcon-40b/discussions
+
+- type: dataset
+  name: RefinedWeb
+  organization: UAE Technology Innovation Institute
+  description: RefinedWeb is a high-quality five trillion tokens web-only English
+    pretraining dataset.
+  created_date: 2023-06-01
+  url: https://arxiv.org/pdf/2306.01116.pdf
+  datasheet: https://huggingface.co/datasets/tiiuae/falcon-refinedweb
+  modality: text
+  size: 600B tokens
+  sample: []
+  analysis: ''
+  dependencies: []
+  included: ''
+  excluded: ''
+  quality_control: ''
+  access: open
+  license:
+    value: unknown
+    explanation: License can be found at https://huggingface.co/datasets/tiiuae/falcon-refinedweb
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
\ No newline at end of file
diff --git a/assets/unilm.yaml b/assets/unilm.yaml
deleted file mode 100644
index 845df162..00000000
--- a/assets/unilm.yaml
+++ /dev/null
@@ -1,24 +0,0 @@
----
-
-- type: model
-  name: UniLM
-  organization: Microsoft
-  description: UniLM is a unified language model that can be fine-tuned for both
-    natural language understanding and generation tasks.
-  created_date: 2019-10-01
-  url: https://proceedings.neurips.cc/paper_files/paper/2019/file/c20bb2d9a50d5ac1f713f8b34d9aac5a-Paper.pdf
-  model_card: ''
-  modality: text
-  analysis: Evaluated on GLUE, SQuAD 2.0, and CoQA benchmarks.
-  size: 340M parameters
-  dependencies: []
-  training_emissions: unknown
-  training_time: 10,000 steps in 7 hours
-  training_hardware: 8 NVIDIA Tesla V100 32GB GPUs
-  quality_control: ''
-  access: open
-  license: MIT
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
diff --git a/assets/guanaco.yaml b/assets/uwashington.yaml
similarity index 100%
rename from assets/guanaco.yaml
rename to assets/uwashington.yaml
diff --git a/assets/voicebox.yaml b/assets/voicebox.yaml
deleted file mode 100644
index 2f7a3579..00000000
--- a/assets/voicebox.yaml
+++ /dev/null
@@ -1,25 +0,0 @@
----
-
-- type: model
-  name: Voicebox
-  organization: Meta AI
-  description: Voicebox is the first generative AI model for speech to generalize
-    across tasks with state-of-the-art performance.
-  created_date: 2023-06-16
-  url: https://research.facebook.com/publications/voicebox-text-guided-multilingual-universal-speech-generation-at-scale/
-  model_card: ''
-  modality: text and audio input and audio output
-  analysis: Evaluated on zero-shot text-to-speech benchmarks, with Voicebox outperforming
-    the current state-of-the-art English model VALL-E.
-  size: 330M parameters
-  dependencies: []
-  training_emissions: unknown
-  training_time: 750,000 iterations
-  training_hardware: 32 GPUs of unspecified type
-  quality_control: ''
-  access: closed
-  license: ''
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
diff --git a/assets/vqganclip.yaml b/assets/vqganclip.yaml
deleted file mode 100644
index 59fb2959..00000000
--- a/assets/vqganclip.yaml
+++ /dev/null
@@ -1,25 +0,0 @@
----
-
-- type: model
-  name: VQGAN-CLIP
-  organization: EleutherAI
-  description: VQGAN-CLIP is a model that better generates and edits images using
-    a multimodal encoder to guide image generation.
-  created_date: 2022-09-04
-  url: https://arxiv.org/pdf/2204.08583.pdf
-  model_card: ''
-  modality: text input with image output
-  analysis: Evaluated by human testers rating alignment of text input, image output
-    pairs.
-  size: 227M parameters
-  dependencies: [VQGAN, CLIP]
-  training_emissions: unknown
-  training_time: Less than 1 V100-hour
-  training_hardware: 1 NVIDIA Tesla K80 GPU
-  quality_control: ''
-  access: open
-  license: MIT
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
diff --git a/assets/wizardlm.yaml b/assets/wizardlm.yaml
deleted file mode 100644
index 3f94d441..00000000
--- a/assets/wizardlm.yaml
+++ /dev/null
@@ -1,28 +0,0 @@
----
-
-- type: model
-  name: WizardLM
-  organization: Microsoft
-  description: Starting with an initial set of instructions, we use our proposed
-    Evol-Instruct to rewrite them step by step into more complex instructions. Then,
-    we mix all generated instruction data to fine-tune LLaMA. We call the resulting
-    model WizardLM.
-  created_date: 2023-04-24
-  url: https://arxiv.org/pdf/2304.12244v1.pdf
-  model_card: https://huggingface.co/WizardLM/WizardLM-13B-1.0
-  modality: natural language text
-  analysis: Reports results on standard LLM benchmarks in comparison to other LLMs
-    and test sets.
-  size: 7B parameters
-  dependencies: [LLaMA, Evol-Instruct, Alpaca dataset]
-  training_emissions: ''
-  training_time: 70 hours on 3 epochs
-  training_hardware: 8 V100 GPUs
-  quality_control: ''
-  access: open
-  license: Apache 2.0
-  intended_uses: Creating large amounts of instruction data, particularly with high
-    complexity
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: https://huggingface.co/datasets/WizardLM/evol_instruct_70k/discussions
diff --git a/assets/writer.yaml b/assets/writer.yaml
new file mode 100644
index 00000000..a56591b9
--- /dev/null
+++ b/assets/writer.yaml
@@ -0,0 +1,53 @@
+---
+
+- type: model
+  name: Palmyra
+  organization: Writer
+  description: Palmyra is a privacy-first LLM for enterprises trained on business
+    and marketing writing.
+  created_date:
+    value: 2023-01-01
+    explanation: The model was stated to be published in January, but which day
+      is not specified on the website.
+  url: https://gpt3demo.com/apps/palmyra
+  model_card: https://huggingface.co/Writer/palmyra-base
+  modality: text
+  analysis: Evaluated on the SuperGLUE benchmark
+  size: 128M (Small), 5B (Base), 20B (Large) parameters
+  dependencies: [Writer dataset]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: generating text from a prompt
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/Writer/palmyra-base/discussions
+
+- type: model
+  name: Camel
+  organization: Writer
+  description: Camel is an instruction-following large language model tailored for
+    advanced NLP and comprehension capabilities.
+  created_date:
+    value: 2023-04-01
+    explanation: The model was stated to be published in April, but which day is
+      not specified on the website.
+  url: https://chatcamel.vercel.app/
+  model_card: https://huggingface.co/Writer/camel-5b-hf
+  modality: text
+  analysis: ''
+  size: 5B parameters
+  dependencies: [Palmyra, Camel dataset]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/Writer/camel-5b-hf/discussions
diff --git a/js/main.js b/js/main.js
index bd6cdb54..5434dd3c 100644
--- a/js/main.js
+++ b/js/main.js
@@ -629,57 +629,30 @@ function loadAssetsAndRenderPageContent() {
 
   const paths = [
     'assets/adept.yaml',
-    'assets/ares.yaml',
+    'assets/writer.yaml',
+    'assets/berkeley.yaml',
+    'assets/faraday.yaml',
     'assets/autogpt.yaml',
-    'assets/bark.yaml',
-    'assets/beit.yaml',
-    'assets/bigtrans.yaml',
-    'assets/biomedgpt.yaml',
-    'assets/camel.yaml',
-    'assets/cformers.yaml',
+    'assets/suno.yaml',
+    'assets/amazon.yaml',
+    'assets/casia.yaml',
+    'assets/lehigh.yaml',
+    'assets/nolano.yaml',
     'assets/chatglm.yaml',
-    'assets/corgi.yaml',
-    'assets/cosmo.yaml',
-    'assets/deepfloyd.yaml',
-    'assets/docugami.yaml',
-    'assets/falcon.yaml',
-    'assets/goat.yaml',
-    'assets/gorilla.yaml',
-    'assets/guanaco.yaml',
-    'assets/h2ogpt.yaml',
-    'assets/inflection-1.yaml',
-    'assets/jointslm.yaml',
-    'assets/koala.yaml',
-    'assets/legomt.yaml',
-    'assets/magma.yaml',
-    'assets/mpt.yaml',
-    'assets/multimodal.yaml',
+    'assets/uae.yaml',
+    'assets/singapore.yaml',
+    'assets/uwashington.yaml',
+    'assets/h2oai.yaml',
+    'assets/inflection.yaml',
+    'assets/mosaic.yaml',
     'assets/nextdoor.yaml',
-    'assets/openflamingo.yaml',
-    'assets/openfold.yaml',
-    'assets/openllama.yaml',
-    'assets/otter.yaml',
-    'assets/palm2.yaml',
-    'assets/palmyra.yaml',
-    'assets/peer.yaml',
-    'assets/pi.yaml',
+    'assets/columbia.yaml',
+    'assets/nanyang.yaml',
     'assets/portkey.yaml',
-    'assets/pythia.yaml',
-    'assets/redpajama.yaml',
-    'assets/refinedweb.yaml',
-    'assets/roentgen.yaml',
-    'assets/santacoder.yaml',
-    'assets/soda.yaml',
-    'assets/stablelm.yaml',
-    'assets/stack.yaml',
-    'assets/starcoder.yaml',
+    'assets/bigcode.yaml',
     'assets/transformify.yaml',
-    'assets/underwritegpt.yaml',
-    'assets/unilm.yaml',
-    'assets/vicuna.yaml',
-    'assets/voicebox.yaml',
-    'assets/vqganclip.yaml',
-    'assets/wizardlm.yaml',
+    'assets/paladin.yaml',
+    'assets/lmsys.yaml',
     'assets/ai2.yaml',
     'assets/ai21.yaml',
     'assets/aleph_alpha.yaml',

From 31b43f6259db9ea47a4dee9fe1aae4fbcee4e604 Mon Sep 17 00:00:00 2001
From: jxue16 <105090474+jxue16@users.noreply.github.com>
Date: Wed, 26 Jul 2023 17:49:20 -0700
Subject: [PATCH 05/10] spacing changes

---
 assets/berkeley.yaml   |  2 +-
 assets/bigcode.yaml    |  2 +-
 assets/eleutherai.yaml |  2 +-
 assets/inflection.yaml |  2 +-
 assets/stability.yaml  | 84 ------------------------------------------
 assets/uae.yaml        |  2 +-
 6 files changed, 5 insertions(+), 89 deletions(-)

diff --git a/assets/berkeley.yaml b/assets/berkeley.yaml
index 2b8d64de..5ee01b6e 100644
--- a/assets/berkeley.yaml
+++ b/assets/berkeley.yaml
@@ -69,4 +69,4 @@
   intended_uses: ''
   prohibited_uses: ''
   monitoring: ''
-  feedback: ''
\ No newline at end of file
+  feedback: ''
diff --git a/assets/bigcode.yaml b/assets/bigcode.yaml
index 8f3ababc..74bc5255 100644
--- a/assets/bigcode.yaml
+++ b/assets/bigcode.yaml
@@ -73,4 +73,4 @@
   intended_uses: creating code LLMs
   prohibited_uses: ''
   monitoring: ''
-  feedback: ''
\ No newline at end of file
+  feedback: ''
diff --git a/assets/eleutherai.yaml b/assets/eleutherai.yaml
index 945c27eb..524c8010 100644
--- a/assets/eleutherai.yaml
+++ b/assets/eleutherai.yaml
@@ -314,4 +314,4 @@
   intended_uses: ''
   prohibited_uses: ''
   monitoring: ''
-  feedback: https://huggingface.co/EleutherAI/pythia-6.9b/discussions
\ No newline at end of file
+  feedback: https://huggingface.co/EleutherAI/pythia-6.9b/discussions
diff --git a/assets/inflection.yaml b/assets/inflection.yaml
index 9d000476..5113a5f1 100644
--- a/assets/inflection.yaml
+++ b/assets/inflection.yaml
@@ -46,4 +46,4 @@
   feedback: ''
   monthly_active_users: ''
   user_distribution: ''
-  failures: ''
\ No newline at end of file
+  failures: ''
diff --git a/assets/stability.yaml b/assets/stability.yaml
index da5b51be..e69de29b 100644
--- a/assets/stability.yaml
+++ b/assets/stability.yaml
@@ -1,84 +0,0 @@
----
-
-- type: model
-  name: Stable Diffusion
-  organization: Stability AI
-  description: Stable Diffusion is an open source text-to-image model
-  created_date:
-    value: 2022-08-22
-    explanation: Date the model was made publicly available
-  url: https://github.com/CompVis/stable-diffusion
-  model_card: https://huggingface.co/CompVis/stable-diffusion-v1-4?text=A+mecha+robot+in+a+favela+in+expressionist+style
-  modality: text and image
-  size: 890M parameters
-  analysis: ''
-  dependencies: [LAION-5B]
-  training_emissions: ''
-  training_time: 25 days according to Emad Mostaque (CEO of Stability) on [[Twitter]](https://twitter.com/emostaque/status/1563870674111832066)
-  training_hardware: 256 A100 GPUs according to Emad Mostaque (CEO of Stability)
-    on [[Twitter]](https://twitter.com/emostaque/status/1563870674111832066)
-  quality_control: ''
-  access:
-    value: open
-    explanation: Model weights are available for download from the [[Github repo]](https://github.com/CompVis/stable-diffusion)
-  license:
-    value: CreativeML Open RAIL-M
-    explanation: >
-      [[CreativeML Open RAIL-M]](https://github.com/CompVis/stable-diffusion/blob/main/LICENSE)
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
-
-- type: model
-  name: DeepFloyd IF
-  organization: Stability AI
-  description: A text-to-image cascaded pixel diffusion model released in conjunction
-    with AI research lab DeepFloyd.
-  created_date: 2023-04-28
-  url: https://stability.ai/blog/deepfloyd-if-text-to-image-model
-  model_card: https://huggingface.co/DeepFloyd/IF-I-XL-v1.0
-  modality: textual input with image output
-  analysis: Evaluated on the COCO dataset.
-  size: 4.3B parameters
-  dependencies: [LAION-5B]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: open
-  license:
-    value: custom
-    explanation: License agreement can be found on model card https://huggingface.co/DeepFloyd/IF-I-XL-v1.0
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: https://huggingface.co/DeepFloyd/IF-I-XL-v1.0/discussions
-
-- type: model
-  name: StableLM
-  organization: Stability AI
-  description: Large language models trained on up to 1.5 trillion tokens.
-  created_date: 2023-04-20
-  url: https://github.com/Stability-AI/StableLM
-  model_card: ''
-  modality: natural language text
-  analysis: ''
-  size: 7B parameters
-  dependencies:
-    - StableLM-Alpha dataset
-    - Alpaca dataset
-    - gpt4all dataset
-    - ShareGPT52K dataset
-    - Dolly dataset
-    - HH dataset
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: open
-  license: Apache 2.0
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
diff --git a/assets/uae.yaml b/assets/uae.yaml
index f2295fc6..127d82d0 100644
--- a/assets/uae.yaml
+++ b/assets/uae.yaml
@@ -47,4 +47,4 @@
   intended_uses: ''
   prohibited_uses: ''
   monitoring: ''
-  feedback: ''
\ No newline at end of file
+  feedback: ''

From 2bcb75e7398e89d2de0fe7a9ab36ed2833eec9a6 Mon Sep 17 00:00:00 2001
From: jxue16 <105090474+jxue16@users.noreply.github.com>
Date: Wed, 26 Jul 2023 17:59:18 -0700
Subject: [PATCH 06/10] fix stability

---
 assets/stability.yaml | 49 +++++++++++++++++++++++++++++++++++++++++++
 assets/stanford.yaml  |  5 +++--
 2 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/assets/stability.yaml b/assets/stability.yaml
index e69de29b..3bda315c 100644
--- a/assets/stability.yaml
+++ b/assets/stability.yaml
@@ -0,0 +1,49 @@
+---
+
+- type: model
+  name: DeepFloyd IF
+  organization: Stability AI
+  description: A text-to-image cascaded pixel diffusion model released in conjunction
+    with AI research lab DeepFloyd.
+  created_date: 2023-04-28
+  url: https://stability.ai/blog/deepfloyd-if-text-to-image-model
+  model_card: https://huggingface.co/DeepFloyd/IF-I-XL-v1.0
+  modality: textual input with image output
+  analysis: Evaluated on the COCO dataset.
+  size: 4.3B parameters
+  dependencies: [LAION-5B]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license:
+    value: custom
+    explanation: License agreement can be found on model card https://huggingface.co/DeepFloyd/IF-I-XL-v1.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/DeepFloyd/IF-I-XL-v1.0/discussions
+
+
+- type: model
+  name: StableLM
+  organization: Stability AI
+  description: Large language models trained on up to 1.5 trillion tokens.
+  created_date: 2023-04-20
+  url: https://github.com/Stability-AI/StableLM
+  model_card: ''
+  modality: natural language text
+  analysis: ''
+  size: 7B parameters
+  dependencies: [StableLM-Alpha dataset, Alpaca dataset, gpt4all dataset, ShareGPT52K dataset, Dolly dataset, HH dataset]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/stanford.yaml b/assets/stanford.yaml
index 01d082d5..f54d0d1e 100644
--- a/assets/stanford.yaml
+++ b/assets/stanford.yaml
@@ -60,9 +60,10 @@
   size: 124M parameters
   dependencies: [GPT-2, BABEL, text-davinci-003]
   training_emissions: ''
-  training_time: 
+  training_time:
     value: unknown
-    explanation: The authors do not report the training time, but do report that they train for 200 epochs.
+    explanation: The authors do not report the training time, but do report that
+      they train for 200 epochs.
   training_hardware: one NVIDIA A40 GPU
   quality_control: ''
   access: open

From be3b61f7e09699d92dfb7c234e329b4484343bbd Mon Sep 17 00:00:00 2001
From: jxue16 <105090474+jxue16@users.noreply.github.com>
Date: Thu, 27 Jul 2023 20:42:55 -0700
Subject: [PATCH 07/10] fix based on review

---
 assets/berkeley.yaml | 2 +-
 assets/bigcode.yaml  | 8 ++++----
 assets/chatglm.yaml  | 2 +-
 assets/mosaic.yaml   | 2 +-
 assets/together.yaml | 2 +-
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/assets/berkeley.yaml b/assets/berkeley.yaml
index 5ee01b6e..e8bab3a0 100644
--- a/assets/berkeley.yaml
+++ b/assets/berkeley.yaml
@@ -59,7 +59,7 @@
   modality: text
   analysis: Evaluated on wide range of tasks using own evaluation benchmarks.
   size: 17B parameters
-  dependencies: [LLaMA, RedPajama]
+  dependencies: [RedPajama]
   training_emissions: unknown
   training_time: unknown
   training_hardware: ''
diff --git a/assets/bigcode.yaml b/assets/bigcode.yaml
index 74bc5255..4a5362ea 100644
--- a/assets/bigcode.yaml
+++ b/assets/bigcode.yaml
@@ -13,9 +13,9 @@
   analysis: Tested on several benchmarks, most notably Python benchmark HumanEval.
   size: 15.5B parameters
   dependencies: [The Stack]
-  training_emissions: ''
-  training_time: 24 days
-  training_hardware: 512 Tesla A100 GPU
+  training_emissions: 16.68 tons of CO2eq
+  training_time: 2 days
+  training_hardware: 64 NVIDIA A100 GPUs
   quality_control: No specific quality control is mentioned in model training, though
     details on data processing and how the tokenizer was trained are provided in
     the paper.
@@ -41,7 +41,7 @@
   dependencies: [The Stack, BigCode Dataset]
   training_emissions: ''
   training_time: 3.1 days
-  training_hardware: 96 Tesla V100 GPUs
+  training_hardware: 96 NVIDIA Tesla V100 GPUs
   quality_control: ''
   access: open
   license: Apache 2.0
diff --git a/assets/chatglm.yaml b/assets/chatglm.yaml
index f41b3a75..5bb6d82e 100644
--- a/assets/chatglm.yaml
+++ b/assets/chatglm.yaml
@@ -11,7 +11,7 @@
   modality: text
   analysis: Performance evaluated on English and Chinese language benchmark tests.
   size: 6B parameters
-  dependencies: [GLM-130B]
+  dependencies: []
   training_emissions: unknown
   training_time: unknown
   training_hardware: ''
diff --git a/assets/mosaic.yaml b/assets/mosaic.yaml
index 5d97e67e..a887f3b4 100644
--- a/assets/mosaic.yaml
+++ b/assets/mosaic.yaml
@@ -11,7 +11,7 @@
   modality: text
   analysis: Evaluated on a range of benchmarks and performed on par with LLaMA-7B.
   size: 7B parameters
-  dependencies: []
+  dependencies: [RedPajama-Data, C4, The Stack, Multimodal C4]
   training_emissions: unknown
   training_time: 9.5 days
   training_hardware: 440 A100 40GB GPUs
diff --git a/assets/together.yaml b/assets/together.yaml
index 2eb881ac..2bb3cb97 100644
--- a/assets/together.yaml
+++ b/assets/together.yaml
@@ -122,7 +122,7 @@
   size: 1.2 trillion tokens
   sample: []
   analysis: ''
-  dependencies: [LLaMA dataset, GitHub, Wikipedia]
+  dependencies: [GitHub, Wikipedia]
   included: ''
   excluded: ''
   quality_control: ''

From 747e4ec33a492de8f0d8d5432b78a60f3c71c353 Mon Sep 17 00:00:00 2001
From: jxue16 <105090474+jxue16@users.noreply.github.com>
Date: Sun, 30 Jul 2023 15:18:59 -0700
Subject: [PATCH 08/10] add recent assets

---
 assets/amazon.yaml    |  8 ++++++-
 assets/google.yaml    | 28 +++++++++++++++++++++-
 assets/meta.yaml      | 32 ++++++++++++++++++++++++-
 assets/stability.yaml | 55 ++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 119 insertions(+), 4 deletions(-)

diff --git a/assets/amazon.yaml b/assets/amazon.yaml
index 152e0389..94a67a4a 100644
--- a/assets/amazon.yaml
+++ b/assets/amazon.yaml
@@ -9,7 +9,13 @@
     access for all builders. using an API.
   created_date: 2023-04-13
   url: https://aws.amazon.com/bedrock/
-  dependencies: [Jurassic-2, Claude, Stable Diffusion, Amazon Titan]
+  dependencies:
+    - Jurassic-2
+    - Claude
+    - Stable Diffusion
+    - Amazon Titan
+    - Claude 2
+    - Cohere Command
   adaptation: ''
   output_space: foundation models made accessible via an API
   quality_control: ''
diff --git a/assets/google.yaml b/assets/google.yaml
index 4103f04b..fb56a8dc 100644
--- a/assets/google.yaml
+++ b/assets/google.yaml
@@ -615,7 +615,7 @@
   training_time: unknown
   training_hardware: 128 TPU-v4
   quality_control: ''
-  access: closed
+  access: open
   license:
     value: unknown
     explanation: >
@@ -770,6 +770,32 @@
   monitoring: ''
   feedback: ''
 
+- type: model
+  name: Med-PaLM Multimodal
+  organization: Google
+  description: ''
+  created_date: 2023-07-26
+  url: https://arxiv.org/pdf/2307.14334.pdf
+  model_card: ''
+  modality: text, image, and genomics input with text output
+  analysis: Evaluated on MultiMedBench tasks and radiologist evaluations of model-generated
+    chest X-ray reports
+  size: 562B parameters (dense)
+  dependencies: [PaLM-E, MultiMedBench]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
+  license:
+    value: unknown
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+
 - type: model
   name: MultiMedQA
   organization: Google
diff --git a/assets/meta.yaml b/assets/meta.yaml
index a847abd6..c44d7474 100644
--- a/assets/meta.yaml
+++ b/assets/meta.yaml
@@ -369,7 +369,7 @@
       Authors do not mention or provide a feedback mechanism.
 
 - type: model
-  name: LLaMa
+  name: LLaMA
   organization: Meta
   description: ''
   created_date: 2023-02-24
@@ -397,6 +397,36 @@
   monitoring: ''
   feedback: ''
 
+- type: model
+  name: LLaMA 2
+  organization: Meta
+  description: LLaMA 2 is an updated version of LLaMA trained on a new mix of publicly
+    available data.
+  created_date: 2023-07-18
+  url: https://ai.meta.com/resources/models-and-libraries/llama/
+  model_card: Can be found at appendix of paper at https://ai.meta.com/research/publications/llama-2-open-foundation-and-fine-tuned-chat-models/
+  modality: text
+  analysis: Evaluated on standard academic benchmarks and internal Meta libraries.
+  size: 70B parameters (dense model)
+  dependencies: []
+  training_emissions: 539 tCO2eq
+  training_time: ''
+  training_hardware: NVIDIA A100-80GB GPUs (TDP of 350-400W)
+  quality_control: ''
+  access: open
+  license:
+    value: custom
+    explanation: The license can be found at https://ai.meta.com/resources/models-and-libraries/llama-downloads/
+  intended_uses: LLaMA 2 is intended for commercial and research use in English.
+    Tuned models are intended for assistant-like chat, whereas pretrained models
+    can be adapted for a variety of natural language generation tasks.
+  prohibited_uses: Use in any manner that violates applicable laws or regulations
+    (including trade compliance laws). Use in languages other than English. Use
+    in any other way that is prohibited by the Acceptable Use Policy and Licensing
+    Agreement for LLaMA 2.
+  monitoring: ''
+  feedback: ''
+
 - type: model
   name: OPT-IML
   organization: Meta
diff --git a/assets/stability.yaml b/assets/stability.yaml
index 3bda315c..8c572529 100644
--- a/assets/stability.yaml
+++ b/assets/stability.yaml
@@ -36,7 +36,13 @@
   modality: natural language text
   analysis: ''
   size: 7B parameters
-  dependencies: [StableLM-Alpha dataset, Alpaca dataset, gpt4all dataset, ShareGPT52K dataset, Dolly dataset, HH dataset]
+  dependencies:
+    - StableLM-Alpha dataset
+    - Alpaca dataset
+    - gpt4all dataset
+    - ShareGPT52K dataset
+    - Dolly dataset
+    - HH dataset
   training_emissions: ''
   training_time: ''
   training_hardware: ''
@@ -47,3 +53,50 @@
   prohibited_uses: ''
   monitoring: ''
   feedback: ''
+
+
+- type: application
+  name: Stable Diffusion
+  organization: Stability AI
+  description: Stable Diffusion is a generative software that creates images from
+    text prompts.
+  created_date: 2022-08-22
+  url: https://stability.ai/blog/stable-diffusion-public-release
+  dependencies: []
+  adaptation: ''
+  output_space: image
+  quality_control: ''
+  access: open
+  license:
+    value: custom
+    explanation: License agreement can be found on https://github.com/CompVis/stable-diffusion/blob/main/LICENSE
+  terms_of_service: ''
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/CompVis/stable-diffusion/discussions
+  monthly_active_users: ''
+  user_distribution: ''
+  failures: ''
+
+- type: application
+  name: Stable Diffusion XL
+  organization: Stability AI
+  description: Stable Diffusion XL is an updated version of Stable Diffusion, and
+    creates descriptive images with shorter prompts and generate words within images.
+  created_date: 2023-07-26
+  url: https://stability.ai/stablediffusion
+  dependencies: []
+  adaptation: ''
+  output_space: image
+  quality_control: ''
+  access: limited
+  license: MIT
+  terms_of_service: ''
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
+  user_distribution: ''
+  failures: ''

From 5d19360ee3129f3123fc3586081df8fb34df674d Mon Sep 17 00:00:00 2001
From: jxue16 <105090474+jxue16@users.noreply.github.com>
Date: Sun, 30 Jul 2023 15:26:23 -0700
Subject: [PATCH 09/10] standardize features

---
 assets/adept.yaml        |   44 +-
 assets/ai2.yaml          |  130 +-
 assets/ai21.yaml         |  472 +++----
 assets/aleph_alpha.yaml  |  180 ++-
 assets/alibaba.yaml      |   69 +-
 assets/amazon.yaml       |   35 +-
 assets/anthropic.yaml    |  790 +++++------
 assets/argonne.yaml      |   37 +-
 assets/assembly.yaml     |  167 ++-
 assets/autogpt.yaml      |   31 +-
 assets/baai.yaml         |   95 +-
 assets/baidu.yaml        |  137 +-
 assets/bain.yaml         |   31 +-
 assets/be_my_eyes.yaml   |   31 +-
 assets/berkeley.yaml     |  111 +-
 assets/bigcode.yaml      |  115 +-
 assets/bigscience.yaml   |  318 +++--
 assets/bloomberg.yaml    |  225 ++-
 assets/brex.yaml         |   38 +-
 assets/casia.yaml        |   39 +-
 assets/cerebras.yaml     |  121 +-
 assets/chatglm.yaml      |   37 +-
 assets/cmu.yaml          |   53 +-
 assets/cohere.yaml       |  937 +++++--------
 assets/columbia.yaml     |   35 +-
 assets/databricks.yaml   |  103 +-
 assets/deepmind.yaml     | 1202 +++++++---------
 assets/duckduckgo.yaml   |   34 +-
 assets/duolingo.yaml     |  110 +-
 assets/eleutherai.yaml   |  460 +++----
 assets/ens.yaml          |   82 +-
 assets/faraday.yaml      |   35 +-
 assets/google.yaml       | 2828 ++++++++++++++++++--------------------
 assets/h2oai.yaml        |   37 +-
 assets/hubspot.yaml      |   34 +-
 assets/huggingface.yaml  |   45 +-
 assets/inflection.yaml   |   68 +-
 assets/instacart.yaml    |   31 +-
 assets/juni.yaml         |   29 +-
 assets/kakaobrain.yaml   |   93 +-
 assets/khan.yaml         |   34 +-
 assets/laion.yaml        |  212 ++-
 assets/latitude.yaml     |   58 +-
 assets/lehigh.yaml       |   43 +-
 assets/linkedin.yaml     |   31 +-
 assets/lmsys.yaml        |   37 +-
 assets/meta.yaml         | 1021 +++++++-------
 assets/microsoft.yaml    | 1204 ++++++++--------
 assets/mosaic.yaml       |   37 +-
 assets/nanyang.yaml      |   41 +-
 assets/naver.yaml        |   47 +-
 assets/neeva.yaml        |  114 +-
 assets/nextdoor.yaml     |   31 +-
 assets/nolano.yaml       |   31 +-
 assets/notion.yaml       |   31 +-
 assets/nvidia.yaml       |  147 +-
 assets/openai.yaml       | 2355 ++++++++++++++-----------------
 assets/othersideai.yaml  |   57 +-
 assets/oxford.yaml       |  191 ++-
 assets/paladin.yaml      |   29 +-
 assets/perplexity.yaml   |   68 +-
 assets/portkey.yaml      |   31 +-
 assets/quizlet.yaml      |   33 +-
 assets/quora.yaml        |   37 +-
 assets/robin.yaml        |   33 +-
 assets/salesforce.yaml   |  143 +-
 assets/sana.yaml         |   77 +-
 assets/shanghai.yaml     |   72 +-
 assets/shop.yaml         |   31 +-
 assets/singapore.yaml    |   41 +-
 assets/snap.yaml         |   38 +-
 assets/speak.yaml        |   31 +-
 assets/spotify.yaml      |   41 +-
 assets/stability.yaml    |  101 --
 assets/stanford.yaml     |  113 +-
 assets/stonybrook.yaml   |   95 +-
 assets/suno.yaml         |   35 +-
 assets/together.yaml     |  196 +--
 assets/transformify.yaml |   31 +-
 assets/trevor.yaml       |   70 +-
 assets/tsinghua.yaml     |  217 +--
 assets/uae.yaml          |   74 +-
 assets/uw.yaml           |   31 +-
 assets/uwashington.yaml  |   39 +-
 assets/viable.yaml       |   52 +-
 assets/writer.yaml       |   80 +-
 assets/yandex.yaml       |   92 +-
 assets/you.yaml          |  100 +-
 88 files changed, 7922 insertions(+), 9300 deletions(-)

diff --git a/assets/adept.yaml b/assets/adept.yaml
index 1db1c261..ecb2b1a7 100644
--- a/assets/adept.yaml
+++ b/assets/adept.yaml
@@ -1,30 +1,30 @@
 ---
-
-- type: model
-  name: ACT-1
-  organization: Adept
+- access: closed
+  analysis: ''
+  created_date:
+    explanation: The date the model was announced in the [[Adept blog post]](https://www.adept.ai/blog/act-1).
+    value: 2022-09-14
+  dependencies: []
   description: ACT-1 (ACtion Transformer) is a large-scale transformer model designed
     and trained specifically for taking actions on computers (use software tools,
     APIs and websites) in response to the user's natural language commands.
-  created_date:
-    value: 2022-09-14
-    explanation: The date the model was announced in the [[Adept blog post]](https://www.adept.ai/blog/act-1).
-  url: https://www.adept.ai/blog/act-1
+  feedback: ''
+  intended_uses: ''
+  license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
+    value: unknown
+  modality:
+    explanation: text
+    value: text; text
   model_card: none
-  modality: text
-  analysis: ''
+  monitoring: ''
+  name: ACT-1
+  organization: Adept
+  prohibited_uses: ''
+  quality_control: ''
   size: ''
-  dependencies: []
   training_emissions: unknown
-  training_time: unknown
   training_hardware: unknown
-  quality_control: ''
-  access: closed
-  license:
-    value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
+  training_time: unknown
+  type: model
+  url: https://www.adept.ai/blog/act-1
diff --git a/assets/ai2.yaml b/assets/ai2.yaml
index 2c890050..300e1b3a 100644
--- a/assets/ai2.yaml
+++ b/assets/ai2.yaml
@@ -1,95 +1,95 @@
 ---
-
-- type: dataset
-  name: NaturalInstructions-v2
-  organization: AI2
-  description: ''
+- access: open
+  analysis: ''
   created_date: 2022-04-16
-  url: https://arxiv.org/abs/2204.07705
   datasheet: ''
-  modality: text
-  size: 1600 tasks
-  sample: []
-  analysis: ''
   dependencies: []
-  included: ''
+  description: ''
   excluded: ''
-  quality_control: ''
-  access: open
-  license: Apache 2.0
+  feedback: ''
+  included: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: Apache 2.0
+  modality: text
   monitoring: ''
-  feedback: ''
-
-- type: dataset
-  name: SODA
+  name: NaturalInstructions-v2
   organization: AI2
-  description: SODA is the first publicly available, million-scale, high-quality
-    dialogue dataset covering a wide range of social interactions.
-  created_date: 2023-05-24
-  url: https://arxiv.org/pdf/2212.10465.pdf
-  datasheet: https://huggingface.co/datasets/allenai/soda
-  modality: text
-  size: 1.5M dialogues
+  prohibited_uses: ''
+  quality_control: ''
   sample: []
+  size: 1600 tasks
+  type: dataset
+  url: https://arxiv.org/abs/2204.07705
+- access: open
   analysis: Randomly sampled dialogues from dataset are evaluated according to six
     established criteria of natural flow, context dependence, topic consistency,
     speaker consistency, specificity, and overall.
+  created_date: 2023-05-24
+  datasheet: https://huggingface.co/datasets/allenai/soda
   dependencies: []
-  included: ''
+  description: SODA is the first publicly available, million-scale, high-quality
+    dialogue dataset covering a wide range of social interactions.
   excluded: ''
-  quality_control: ''
-  access: open
-  license: CC BY 4.0
+  feedback: ''
+  included: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: CC BY 4.0
+  modality: text
   monitoring: ''
-  feedback: ''
-
-- type: dataset
-  name: Multimodal C4
+  name: SODA
   organization: AI2
-  description: An augmentation of C4 with images added and made openly available.
-  created_date: 2023-06-09
-  url: https://arxiv.org/pdf/2304.06939.pdf
-  datasheet: ''
-  modality: text and images
-  size: 43B English tokens with 101.2M documents and 571M images
+  prohibited_uses: ''
+  quality_control: ''
   sample: []
+  size: 1.5M dialogues
+  type: dataset
+  url: https://arxiv.org/pdf/2212.10465.pdf
+- access: open
   analysis: Conducted experiments on models trained with Multimodal C4 in comparison
     to models trained on single image/caption datasets
+  created_date: 2023-06-09
+  datasheet: ''
   dependencies: [C4]
-  included: ''
+  description: An augmentation of C4 with images added and made openly available.
   excluded: ''
-  quality_control: ''
-  access: open
-  license: MIT
+  feedback: ''
+  included: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: MIT
+  modality:
+    explanation: text and images
+    value: image, text
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: COSMO
+  name: Multimodal C4
   organization: AI2
-  description: COSMO is a conversation agent with greater generalizability on both
-    in- and out-of-domain chitchat datasets
-  created_date: 2023-05-24
-  url: https://arxiv.org/pdf/2212.10465.pdf
-  model_card: https://huggingface.co/allenai/cosmo-xl
-  modality: text
+  prohibited_uses: ''
+  quality_control: ''
+  sample: []
+  size: 43B English tokens with 101.2M documents and 571M images
+  type: dataset
+  url: https://arxiv.org/pdf/2304.06939.pdf
+- access: open
   analysis: Evaluated by human testers on generalization capabilities and responses
     compared to other chatbots.
-  size: 11B parameters
+  created_date: 2023-05-24
   dependencies: [SODA, ProsocialDialog, T5]
-  training_emissions: unknown
-  training_time: unknown
-  training_hardware: v3-128 TPU accelerators with batch size 256
-  quality_control: ''
-  access: open
-  license: ''
+  description: COSMO is a conversation agent with greater generalizability on both
+    in- and out-of-domain chitchat datasets
+  feedback: https://huggingface.co/allenai/cosmo-xl/discussions
   intended_uses: ''
-  prohibited_uses: ''
+  license: ''
+  modality:
+    explanation: text
+    value: text; text
+  model_card: https://huggingface.co/allenai/cosmo-xl
   monitoring: ''
-  feedback: https://huggingface.co/allenai/cosmo-xl/discussions
+  name: COSMO
+  organization: AI2
+  prohibited_uses: ''
+  quality_control: ''
+  size: 11B parameters (dense)
+  training_emissions: unknown
+  training_hardware: v3-128 TPU accelerators with batch size 256
+  training_time: unknown
+  type: model
+  url: https://arxiv.org/pdf/2212.10465.pdf
diff --git a/assets/ai21.yaml b/assets/ai21.yaml
index 2da4e56c..4a794e68 100644
--- a/assets/ai21.yaml
+++ b/assets/ai21.yaml
@@ -1,365 +1,319 @@
 ---
-
-# Datasets
-- type: dataset
-  name: Jurassic-1 dataset
-  # General
-  organization: AI21 Labs
-  description: >
-    The dataset used to train the Jurassic-1 models, based on publicly available
-    data.
+- access: closed
+  analysis:
+    explanation: No analysis is provided in the paper.
+    value: none
   created_date:
-    value: 2021-08-11
     explanation: The date the white paper was made public.
-  url: https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf
+    value: 2021-08-11
   datasheet: none
-  modality: text
-  size: 300B tokens
-  sample: []
-  analysis:
-    value: none
-    explanation: No analysis is provided in the paper.
-  # Construction
   dependencies: []
+  description: "The dataset used to train the Jurassic-1 models, based on publicly\
+    \ available data.\n"
+  excluded: unknown
+  feedback: none
+  included: unknown
+  intended_uses: unknown
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  included: unknown
-  excluded: unknown
-  quality_control:
+  modality: text
+  monitoring: none
+  name: Jurassic-1 dataset
+  organization: AI21 Labs
+  prohibited_uses:
+    explanation: No uses are prohibited in the paper.
     value: none
+  quality_control:
     explanation: No quality control is provided in the paper.
-  # Downstream
-  access: closed
-  intended_uses: unknown
-  prohibited_uses:
     value: none
-    explanation: No uses are prohibited in the paper.
-  monitoring: none
-  feedback: none
-
-- type: dataset
-  name: Jurassic-1 Instruct dataset
-  # General
-  organization: AI21 Labs
-  description: >
-    The dataset used to instruction-tune the Jurassic-1 Instruct models.
+  sample: []
+  size: 300B tokens
+  type: dataset
+  url: https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf
+- access: closed
+  analysis: unknown
   created_date:
+    explanation: "The date the model was [announced](https://docs.ai21.com/changelog/jurassic-1-instruct-is-now-available).\n"
     value: 2022-12-01
-    explanation: >
-      The date the model was [announced](https://docs.ai21.com/changelog/jurassic-1-instruct-is-now-available).
-  url: none
   datasheet: none
-  modality: text
-  size: unknown
-  sample: []
-  analysis: unknown
-  # Construction
   dependencies: []
-  license:
-    value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  included: unknown
+  description: "The dataset used to instruction-tune the Jurassic-1 Instruct models.\n"
   excluded: unknown
-  quality_control: unknown
-  # Downstream
-  access: closed
+  feedback: none
+  included: unknown
   intended_uses: unknown
-  prohibited_uses: unknown
+  license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
+    value: unknown
+  modality: text
   monitoring: none
-  feedback: none
-
-# Models
-- type: model
-  name: Jurassic-1
+  name: Jurassic-1 Instruct dataset
   organization: AI21 Labs
-  description: >
-    Jurassic-1 is a family of autoregressive language models (Large, Grande, Jumbo).
-  created_date:
-    value: 2021-08-11
-    explanation: >
-      The date the white paper was made public.
-  url: https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf
-  model_card: none
-  modality: text (English)
-  size:
-    value: 178B parameters (dense model)
-    explanation: The model sizes are 7.5B (Large), 17B (Grande), and 178B (Jumbo).
+  prohibited_uses: unknown
+  quality_control: unknown
+  sample: []
+  size: unknown
+  type: dataset
+  url: none
+- access:
+    explanation: Jurassic-1 is available through AI21's paid API.
+    value: limited
   analysis:
+    explanation: Results are reported in the [the paper](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)
     value: Evaluated on several standard benchmarks (e.g. ARC, BoolQ, HellaSwag,
       RTE, Winogrande)
-    explanation: Results are reported in the [the paper](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)
+  created_date:
+    explanation: "The date the white paper was made public.\n"
+    value: 2021-08-11
   dependencies: [Jurassic-1 dataset]
-  training_emissions: unknown
-  training_time:
-    value: Several months
-    explanation: Training details from [the paper](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)
-  training_hardware:
-    value: Over 800 [A100] GPUs
-    explanation: Training details from [the paper](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)
-  quality_control: none
-  access:
-    value: limited
-    explanation: Jurassic-1 is available through AI21's paid API.
+  description: "Jurassic-1 is a family of autoregressive language models (Large,\
+    \ Grande, Jumbo).\n"
+  feedback: none
+  intended_uses: unknown
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: unknown
+  modality:
+    explanation: text (English)
+    value: text; text
+  model_card: none
+  monitoring: none
+  name: Jurassic-1
+  organization: AI21 Labs
   prohibited_uses:
+    explanation: https://docs.ai21.com/docs/responsible-use
     value: Illegal activities, such as hate speech, gambling, child pornography
       or violating intellectual property rights; Harassment, victimization, intimidation,
       fraud or spam; Creation or dissemination of misinformation, promotion of self-harm,
       glorification of violent events or incitement of violence.
-    explanation: https://docs.ai21.com/docs/responsible-use
-  monitoring: none
-  feedback: none
-
-- type: model
-  name: Jurassic-1 Instruct
-  organization: AI21 Labs
-  description: >
-    Jurassic-1 Instruct is an instruction-tuned autoregressive language model.
+  quality_control: none
+  size: 178B parameters (dense)
+  training_emissions: unknown
+  training_hardware:
+    explanation: Training details from [the paper](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)
+    value: Over 800 [A100] GPUs
+  training_time:
+    explanation: Training details from [the paper](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)
+    value: Several months
+  type: model
+  url: https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf
+- access:
+    explanation: Jurassic-1 Instruct is available through AI21's paid API.
+    value: limited
+  analysis: none
   created_date:
+    explanation: "The date the model was [announced](https://docs.ai21.com/changelog/jurassic-1-instruct-is-now-available).\n"
     value: 2022-12-01
-    explanation: >
-      The date the model was [announced](https://docs.ai21.com/changelog/jurassic-1-instruct-is-now-available).
-  url: https://docs.ai21.com/docs/jurassic-1-instruct-beta
-  model_card: none
-  modality: text (English)
-  size: 17B parameters (dense model)
-  analysis: none
   dependencies: [Jurassic-1, Jurassic-1 Instruct dataset]
-  training_emissions: unknown
-  training_time: unknown
-  training_hardware: unknown
-  quality_control: unknown
-  access:
-    value: limited
-    explanation: Jurassic-1 Instruct is available through AI21's paid API.
-  license:
-    value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
+  description: "Jurassic-1 Instruct is an instruction-tuned autoregressive language\
+    \ model.\n"
+  feedback: none
   intended_uses: Jurassic-1 Instruct was trained specifically to handle instructions-only
     prompts ("zero-shot") without examples ("few-shot"). It is the most natural
     way to interact with language models, and it is the best way to get a sense
     of the optimal output for your task without any examples.
+  license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
+    value: unknown
+  modality:
+    explanation: text (English)
+    value: text; text
+  model_card: none
+  monitoring: none
+  name: Jurassic-1 Instruct
+  organization: AI21 Labs
   prohibited_uses:
+    explanation: https://docs.ai21.com/docs/responsible-use
     value: Illegal activities, such as hate speech, gambling, child pornography
       or violating intellectual property rights; Harassment, victimization, intimidation,
       fraud or spam; Creation or dissemination of misinformation, promotion of self-harm,
       glorification of violent events or incitement of violence.
-    explanation: https://docs.ai21.com/docs/responsible-use
-  monitoring: none
-  feedback: none
-
-- type: model
-  name: Jurassic-2
-  organization: AI21 Labs
-  description: >
-    Jurassic-2 a family of language model designed to replace Jurassic-1.
-  created_date: 2023-03-09
-  url: https://docs.ai21.com/docs/jurassic-2-models
-  model_card: none
-  modality: text (English)
-  size: unknown
-  analysis: The model was evaluated on the HELM benchmark as discussed in https://www.ai21.com/blog/introducing-j2.
-  dependencies: []
+  quality_control: unknown
+  size: 17B parameters (dense)
   training_emissions: unknown
-  training_time: unknown
   training_hardware: unknown
-  quality_control: unknown
-  access:
-    value: limited
+  training_time: unknown
+  type: model
+  url: https://docs.ai21.com/docs/jurassic-1-instruct-beta
+- access:
     explanation: Jurassic-2 is available through AI21's paid API.
+    value: limited
+  analysis: The model was evaluated on the HELM benchmark as discussed in https://www.ai21.com/blog/introducing-j2.
+  created_date: 2023-03-09
+  dependencies: []
+  description: "Jurassic-2 a family of language model designed to replace Jurassic-1.\n"
+  feedback: none
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
+  modality:
+    explanation: text (English)
+    value: text; text
+  model_card: none
+  monitoring: unknown
+  name: Jurassic-2
+  organization: AI21 Labs
   prohibited_uses:
+    explanation: https://docs.ai21.com/docs/responsible-use
     value: Illegal activities, such as hate speech, gambling, child pornography
       or violating intellectual property rights; Harassment, victimization, intimidation,
       fraud or spam; Creation or dissemination of misinformation, promotion of self-harm,
       glorification of violent events or incitement of violence.
-    explanation: https://docs.ai21.com/docs/responsible-use
-  monitoring: unknown
-  feedback: none
-
-# Applications
-- type: application
-  name: AI21 Playground
-  # General
-  organization: AI21 Labs
-  description: The AI21 Labs Playground supports several task-specific APIs in addition
-    to a variety of models.
+  quality_control: unknown
+  size: unknown
+  training_emissions: unknown
+  training_hardware: unknown
+  training_time: unknown
+  type: model
+  url: https://docs.ai21.com/docs/jurassic-2-models
+- access: limited
+  adaptation: The Playground provides direct access to the language models (Complete
+    API) as well as wrapped for Rewrite and Summarize.
   created_date:
-    value: 2021-08-11
     explanation: The date Jurassic-1 was made public.
-  url: https://studio.ai21.com/playground/
-  # Construction
+    value: 2021-08-11
   dependencies:
     - Jurassic-1
     - Jurassic-1 Instruct
     - Jurassic-2
     - AI21 Summarization API
     - AI21 Paraphrase API
-  adaptation: The Playground provides direct access to the language models (Complete
-    API) as well as wrapped for Rewrite and Summarize.
+  description: The AI21 Labs Playground supports several task-specific APIs in addition
+    to a variety of models.
+  failures: unknown
+  feedback: Feedback can be given by emailing at info at ai21.com
+  intended_uses: The intended uses are text completion, rewriting, and summarization.
+  license:
+    explanation: No license was found.
+    value: none
+  monitoring: unknown
+  monthly_active_users: unknown
+  name: AI21 Playground
+  organization: AI21 Labs
   output_space:
-    value: text
     explanation: The Playground supports different use cases that all involve text
       generation.
-  quality_control: unknown
-  # Downstream
-  access: limited
-  terms_of_service: https://www.ai21.com/terms-of-use
-  license:
-    value: none
-    explanation: No license was found.
-  intended_uses: The intended uses are text completion, rewriting, and summarization.
+    value: text
   prohibited_uses:
+    explanation: https://docs.ai21.com/docs/responsible-use
     value: Illegal activities, such as hate speech, gambling, child pornography
       or violating intellectual property rights; Harassment, victimization, intimidation,
       fraud or spam; Creation or dissemination of misinformation, promotion of self-harm,
       glorification of violent events or incitement of violence.
-    explanation: https://docs.ai21.com/docs/responsible-use
-  monitoring: unknown
-  feedback: Feedback can be given by emailing at info at ai21.com
-  # Deployment
-  monthly_active_users: unknown
+  quality_control: unknown
+  terms_of_service: https://www.ai21.com/terms-of-use
+  type: application
+  url: https://studio.ai21.com/playground/
   user_distribution: unknown
-  failures: unknown
-
-- type: application
-  name: AI21 Paraphrase API
-  # General
-  organization: AI21 Labs
-  description: AI21 Studio's Paraphrase API offers access to our world-class paraphrasing
-    engine. It has been specifically developed for suggesting alternative ways to
-    convey the same message using different words.
+- access: limited
+  adaptation: The AI21 language models are further specialized to the task of paraphrasing.
   created_date:
-    value: 2023-03-09
     explanation: The date Jurassic-2 and Task-Specific APIs were released.
-  url: https://docs.ai21.com/docs/paraphrase-api
-  # Construction
+    value: 2023-03-09
   dependencies: [Jurassic-2]
-  adaptation: The AI21 language models are further specialized to the task of paraphrasing.
-  output_space: text
-  quality_control: unknown
-  # Downstream
-  access: limited
-  terms_of_service: https://www.ai21.com/terms-of-use
+  description: AI21 Studio's Paraphrase API offers access to our world-class paraphrasing
+    engine. It has been specifically developed for suggesting alternative ways to
+    convey the same message using different words.
+  failures: unknown
+  feedback: Feedback can be given by emailing at info at ai21.com
+  intended_uses: The intended uses are text paraphrasing.
   license:
-    value: none
     explanation: No license was found.
-  intended_uses: The intended uses are text paraphrasing.
+    value: none
+  monitoring: unknown
+  monthly_active_users: unknown
+  name: AI21 Paraphrase API
+  organization: AI21 Labs
+  output_space: text
   prohibited_uses:
+    explanation: https://docs.ai21.com/docs/responsible-use
     value: Illegal activities, such as hate speech, gambling, child pornography
       or violating intellectual property rights; Harassment, victimization, intimidation,
       fraud or spam; Creation or dissemination of misinformation, promotion of self-harm,
       glorification of violent events or incitement of violence.
-    explanation: https://docs.ai21.com/docs/responsible-use
-  monitoring: unknown
-  feedback: Feedback can be given by emailing at info at ai21.com
-  # Deployment
-  monthly_active_users: unknown
+  quality_control: unknown
+  terms_of_service: https://www.ai21.com/terms-of-use
+  type: application
+  url: https://docs.ai21.com/docs/paraphrase-api
   user_distribution: unknown
-  failures: unknown
-
-- type: application
-  name: AI21 Summarization API
-  # General
-  organization: AI21 Labs
-  description: AI21 Studio's Summarize API offers access to our world-class summarization
-    engine. It has been specifically developed for reading long texts and providing
-    a faithful summary of the original document.
+- access: limited
+  adaptation: The AI21 language models are further specialized to the task of summarization.
   created_date:
-    value: 2023-03-09
     explanation: The date Jurassic-2 and Task-Specific APIs were released.
-  url: https://docs.ai21.com/docs/summarize-api
-  # Construction
+    value: 2023-03-09
   dependencies: [Jurassic-2]
-  adaptation: The AI21 language models are further specialized to the task of summarization.
-  output_space: text
-  quality_control: unknown
-  # Downstream
-  access: limited
-  terms_of_service: https://www.ai21.com/terms-of-use
+  description: AI21 Studio's Summarize API offers access to our world-class summarization
+    engine. It has been specifically developed for reading long texts and providing
+    a faithful summary of the original document.
+  failures: unknown
+  feedback: Feedback can be given by emailing at info at ai21.com
+  intended_uses: The intended uses are text paraphrasing.
   license:
-    value: none
     explanation: No license was found.
-  intended_uses: The intended uses are text paraphrasing.
+    value: none
+  monitoring: unknown
+  monthly_active_users: unknown
+  name: AI21 Summarization API
+  organization: AI21 Labs
+  output_space: text
   prohibited_uses:
+    explanation: https://docs.ai21.com/docs/responsible-use
     value: Illegal activities, such as hate speech, gambling, child pornography
       or violating intellectual property rights; Harassment, victimization, intimidation,
       fraud or spam; Creation or dissemination of misinformation, promotion of self-harm,
       glorification of violent events or incitement of violence.
-    explanation: https://docs.ai21.com/docs/responsible-use
-  monitoring: unknown
-  feedback: Feedback can be given by emailing at info at ai21.com
-  # Deployment
-  monthly_active_users: unknown
+  quality_control: unknown
+  terms_of_service: https://www.ai21.com/terms-of-use
+  type: application
+  url: https://docs.ai21.com/docs/summarize-api
   user_distribution: unknown
-  failures: unknown
-
-- type: application
-  name: Wordtune
-  # General
-  organization: AI21 Labs
-  description: Wordtune, the first AI-based writing companion that understands context
-    and meaning.
+- access: limited
+  adaptation: unknown
   created_date:
-    value: 2020-10-27
     explanation: https://www.businesswire.com/news/home/20201027005162/en/AI21-Labs-Comes-out-of-Stealth-and-Launches-First-Deep-Tech-Writing-Assistant-Wordtune
-  url: https://www.wordtune.com/
-  # Construction
+    value: 2020-10-27
   dependencies: [AI21 Paraphrase API]
-  adaptation: unknown
-  output_space: text
-  quality_control: unknown
-  # Downstream
-  access: limited
-  terms_of_service: https://www.wordtune.com/terms-of-use
+  description: Wordtune, the first AI-based writing companion that understands context
+    and meaning.
+  failures: unknown
+  feedback: ''
+  intended_uses: The Wordtune assistant is a writing assistant
   license:
-    value: Wordtune License
     explanation: License can be found in the terms of service document.
-  intended_uses: The Wordtune assistant is a writing assistant
-  prohibited_uses: ''
+    value: Wordtune License
   monitoring: unknown
-  feedback: ''
-  # Deployment
   monthly_active_users: unknown
-  user_distribution: unknown
-  failures: unknown
-
-- type: application
-  name: Wordtune Read
-  # General
+  name: Wordtune
   organization: AI21 Labs
-  description: Wordtune Read is an AI reader that summarizes long documents so you
-    can understand more, faster.
-  created_date: 2021-11-16
-  url: https://www.wordtune.com/read
-  # Construction
-  dependencies: [AI21 Summarize API]
-  adaptation: unknown
   output_space: text
+  prohibited_uses: ''
   quality_control: unknown
-  # Downstream
-  access: limited
   terms_of_service: https://www.wordtune.com/terms-of-use
+  type: application
+  url: https://www.wordtune.com/
+  user_distribution: unknown
+- access: limited
+  adaptation: unknown
+  created_date: 2021-11-16
+  dependencies: [AI21 Summarize API]
+  description: Wordtune Read is an AI reader that summarizes long documents so you
+    can understand more, faster.
+  failures: unknown
+  feedback: ''
+  intended_uses: ''
   license:
-    value: Wordtune License
     explanation: License can be found in the terms of service document.
-  intended_uses: ''
-  prohibited_uses: ''
+    value: Wordtune License
   monitoring: unknown
-  feedback: ''
-  # Deployment
   monthly_active_users: unknown
+  name: Wordtune Read
+  organization: AI21 Labs
+  output_space: text
+  prohibited_uses: ''
+  quality_control: unknown
+  terms_of_service: https://www.wordtune.com/terms-of-use
+  type: application
+  url: https://www.wordtune.com/read
   user_distribution: unknown
-  failures: unknown
diff --git a/assets/aleph_alpha.yaml b/assets/aleph_alpha.yaml
index 587a9aca..063b73dd 100644
--- a/assets/aleph_alpha.yaml
+++ b/assets/aleph_alpha.yaml
@@ -1,125 +1,113 @@
 ---
-
-# Datasets
-- type: dataset
-  name: Luminous dataset
-  # General
-  organization: Aleph Alpha
-  description: The dataset used to train the Luminous models.
+- access: closed
+  analysis: unknown
   created_date:
-    value: 2022-04-14
     explanation: Date the models were announced by the Aleph Alpha twitter account
-  url: none
+    value: 2022-04-14
   datasheet: none
-  modality: text (English, French | français, German | Deutsch, Italian | italiano,
-    Spanish | Español)
-  size: unknown
-  sample: []
-  analysis: unknown
-  # Construction
   dependencies: []
-  license:
-    value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  included: unknown
+  description: The dataset used to train the Luminous models.
   excluded: unknown
-  quality_control: unknown
-  # Downstream
-  access: closed
+  feedback: none
+  included: unknown
   intended_uses: unknown
-  prohibited_uses: unknown
+  license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
+    value: unknown
+  modality:
+    explanation: text (English, French | français, German | Deutsch, Italian | italiano,
+      Spanish | Español)
+    value: text
   monitoring: none
-  feedback: none
-
-- type: model
-  name: Luminous
+  name: Luminous dataset
   organization: Aleph Alpha
-  description: Luminous is a family of multilingual language models
+  prohibited_uses: unknown
+  quality_control: unknown
+  sample: []
+  size: unknown
+  type: dataset
+  url: none
+- access:
+    explanation: Paid API product
+    value: limited
+  analysis: ''
   created_date:
-    value: 2022-04-14
     explanation: Date the models were announced by the Aleph Alpha twitter account
-  url: https://twitter.com/Aleph__Alpha/status/1514576711492542477
-  model_card: ''
-  modality: text (English, French | français, German | Deutsch, Italian | italiano,
-    Spanish | Español)
-  analysis: ''
-  size:
-    value: 200B parameters (dense model)
-    explanation: According to [[this news article]](https://www-heise-de.translate.goog/news/Machine-Learning-Aleph-Alpha-feilt-mit-Oracle-und-Nvidia-an-transformativer-KI-6269269.html?_x_tr_sl=de&_x_tr_tl=en&_x_tr_hl=en&_x_tr_pto=sc).
+    value: 2022-04-14
   dependencies: [Luminous dataset]
-  training_emissions: unknown
-  training_time: unknown
-  training_hardware: unknown
-  quality_control: ''
-  access:
-    value: limited
-    explanation: Paid API product
+  description: Luminous is a family of multilingual language models
+  feedback: ''
+  intended_uses: ''
   license:
-    value: none
     explanation: No license was found.
-  intended_uses: ''
-  prohibited_uses: ''
+    value: none
+  modality:
+    explanation: text (English, French | français, German | Deutsch, Italian | italiano,
+      Spanish | Español)
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: application
-  name: Aleph Alpha API
-  # General
+  name: Luminous
   organization: Aleph Alpha
-  description: >
-    The Aleph Alpha API serves a family of text-only language models (Luminous)
-    and multimodal text-and-image models (Magma).
-  created_date:
-    value: 2021-09-30
-    explanation: >
-      Date the Aleph Alpha Python API client was first available based on to the
-      [PyPi package history](https://pypi.org/project/aleph-alpha-client/1.0.0/)
-  url: https://www.aleph-alpha.com/
-  # Construction
-  dependencies: [Luminous]
-  adaptation: ''
-  output_space: >
-    The text models provide text outputs given text inputs. The multimodal models
-    provide text completions given text and image inputs.
+  prohibited_uses: ''
   quality_control: ''
-  # Downstream
-  access:
-    value: limited
+  size: 200B parameters (dense)
+  training_emissions: unknown
+  training_hardware: unknown
+  training_time: unknown
+  type: model
+  url: https://twitter.com/Aleph__Alpha/status/1514576711492542477
+- access:
     explanation: The API is a paid product. Refer to the [[API documentation]](https://docs.aleph-alpha.com/docs/introduction/luminous/)
       for further details.
+    value: limited
+  adaptation: ''
+  created_date:
+    explanation: "Date the Aleph Alpha Python API client was first available based\
+      \ on to the [PyPi package history](https://pypi.org/project/aleph-alpha-client/1.0.0/)\n"
+    value: 2021-09-30
+  dependencies: [Luminous]
+  description: "The Aleph Alpha API serves a family of text-only language models\
+    \ (Luminous) and multimodal text-and-image models (Magma).\n"
+  failures: unknown
+  feedback: unknown
+  intended_uses: unknown
   license:
-    value: none
     explanation: No license was found.
-  terms_of_service: https://www.aleph-alpha.com/terms-conditions
-  intended_uses: unknown
-  prohibited_uses: unknown
+    value: none
   monitoring: unknown
-  feedback: unknown
-  # Deployment
   monthly_active_users: unknown
-  user_distribution: unknown
-  failures: unknown
-
-- type: model
-  name: MAGMA
+  name: Aleph Alpha API
   organization: Aleph Alpha
+  output_space: "The text models provide text outputs given text inputs. The multimodal\
+    \ models provide text completions given text and image inputs.\n"
+  prohibited_uses: unknown
+  quality_control: ''
+  terms_of_service: https://www.aleph-alpha.com/terms-conditions
+  type: application
+  url: https://www.aleph-alpha.com/
+  user_distribution: unknown
+- access: open
+  analysis: Evaluated on the OKVQA benchmark as a fully open-ended generative task.
+  created_date: 2022-10-24
+  dependencies: [GPT-J, CLIP]
   description: An autoregressive VL model that is able to generate text from an
     arbitrary combination of visual and textual input
-  created_date: 2022-10-24
-  url: https://arxiv.org/pdf/2112.05253.pdf
+  feedback: ''
+  intended_uses: ''
+  license: MIT
+  modality:
+    explanation: image and text input with natural language text output
+    value: image, text; text
   model_card: ''
-  modality: image and text input with natural language text output
-  analysis: Evaluated on the OKVQA benchmark as a fully open-ended generative task.
-  size: 6B parameters
-  dependencies: [GPT-J, CLIP]
+  monitoring: ''
+  name: MAGMA
+  organization: Aleph Alpha
+  prohibited_uses: ''
+  quality_control: ''
+  size: 6B parameters (dense)
   training_emissions: ''
-  training_time: ''
   training_hardware: 32 A100 GPUs
-  quality_control: ''
-  access: open
-  license: MIT
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
+  training_time: ''
+  type: model
+  url: https://arxiv.org/pdf/2112.05253.pdf
diff --git a/assets/alibaba.yaml b/assets/alibaba.yaml
index cbeb2950..c4b63c6f 100644
--- a/assets/alibaba.yaml
+++ b/assets/alibaba.yaml
@@ -1,49 +1,50 @@
 ---
-
-- type: dataset
-  name: LAION-1B
-  organization: Alibaba
-  description: ''
+- access: closed
+  analysis: ''
   created_date: 2023-02-20
-  url: https://arxiv.org/pdf/2302.09778.pdf
   datasheet: ''
-  modality: text and image
-  size: 1B image-text pairs
-  sample: []
-  analysis: ''
   dependencies: [LAION-5B]
-  included: ''
+  description: ''
   excluded: We eliminate duplicates, low resolution images, and images potentially
     contain harmful content from the LAION dataset.
-  quality_control: ''
-  access: closed
-  license: unknown
+  feedback: ''
+  included: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: unknown
+  modality:
+    explanation: text and image
+    value: image, text
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: Composer
+  name: LAION-1B
   organization: Alibaba
-  description: ''
-  created_date: 2023-02-20
+  prohibited_uses: ''
+  quality_control: ''
+  sample: []
+  size: 1B image-text pairs
+  type: dataset
   url: https://arxiv.org/pdf/2302.09778.pdf
-  model_card: ''
-  modality: images
+- access: closed
   analysis: ''
-  size: 4.4B parameters (dense model)
+  created_date: 2023-02-20
   dependencies: [ImageNet, WebVision, LAION-1B]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: closed
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: images
+    value: image; image
+  model_card: ''
   monitoring: ''
-  feedback: ''
+  name: Composer
+  organization: Alibaba
+  prohibited_uses: ''
+  quality_control: ''
+  size: 4.4B parameters (dense)
+  training_emissions: ''
+  training_hardware: ''
+  training_time: ''
+  type: model
+  url: https://arxiv.org/pdf/2302.09778.pdf
diff --git a/assets/amazon.yaml b/assets/amazon.yaml
index 94a67a4a..500bbe8a 100644
--- a/assets/amazon.yaml
+++ b/assets/amazon.yaml
@@ -1,14 +1,7 @@
 ---
-
-- type: application
-  name: Bedrock
-  organization: Amazon
-  description: Bedrock is a new service that makes FMs from AI21 Labs, Anthropic,
-    Stability AI, and Amazon accessible via an API. Bedrock is intended for customers
-    to build and scale generative AI-based applications using FMs, democratizing
-    access for all builders. using an API.
+- access: limited
+  adaptation: ''
   created_date: 2023-04-13
-  url: https://aws.amazon.com/bedrock/
   dependencies:
     - Jurassic-2
     - Claude
@@ -16,17 +9,23 @@
     - Amazon Titan
     - Claude 2
     - Cohere Command
-  adaptation: ''
-  output_space: foundation models made accessible via an API
-  quality_control: ''
-  access: limited
-  license: unknown
-  terms_of_service: https://aws.amazon.com/service-terms/
+  description: Bedrock is a new service that makes FMs from AI21 Labs, Anthropic,
+    Stability AI, and Amazon accessible via an API. Bedrock is intended for customers
+    to build and scale generative AI-based applications using FMs, democratizing
+    access for all builders. using an API.
+  failures: ''
+  feedback: ''
   intended_uses: allowing companies to incorporate generative AI into their business
     models
-  prohibited_uses: ''
+  license: unknown
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
+  name: Bedrock
+  organization: Amazon
+  output_space: foundation models made accessible via an API
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: https://aws.amazon.com/service-terms/
+  type: application
+  url: https://aws.amazon.com/bedrock/
   user_distribution: ''
-  failures: ''
diff --git a/assets/anthropic.yaml b/assets/anthropic.yaml
index b47dc7b3..83101309 100644
--- a/assets/anthropic.yaml
+++ b/assets/anthropic.yaml
@@ -1,40 +1,42 @@
 ---
-
-- type: dataset
-  name: Anthropic Helpfulness dataset
-  # General
-  organization: Anthropic
-  description: >
-    One of the datasets used to train Anthropic RLHF models. The dataset was
-    collected by asking crowdworkers to have open-ended conversations with
-    Anthropic models, "asking for help, advice, or for the model to accomplish
-    a task", then choose the model answer that was more helpful for their
-    given task, via the Anthropic Human Feedback Interface
-    [[Section 2.2]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.2).
+- access:
+    explanation: "The dataset is freely shared on GitHub [[hh-rlhf]](https://github.com/anthropics/hh-rlhf).\n"
+    value: open
+  analysis: "The authors found that the crowdworkers didn't exhaustively check for\
+    \ honesty in the model answers they preferred [[Section 2.1]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.1).\n"
   created_date:
+    explanation: "The date that the paper was uploaded to ArXiv [[ArXiv Abstract]](https://arxiv.org/abs/2204.05862).\n"
     value: 2022-04-12
-    explanation: >
-      The date that the paper was uploaded to ArXiv
-      [[ArXiv Abstract]](https://arxiv.org/abs/2204.05862).
-  url: https://arxiv.org/pdf/2204.05862.pdf
   datasheet: none
-  modality: text (English)
-  size:
-    value: 271.5 MB
-    explanation: >
-      The size of the dataset isn't directly reported in the paper, but the
-      dataset is available on GitHub
-      [[hh-rlhf]](https://github.com/anthropics/hh-rlhf).
-      The compressed data files are distributed under three categories in the
-      GitHub repository, corresponding to the names of the models used to
-      sample the prompts that are shown to the crowdworkers. These are namely:
-      base, rejection sampling, and online. For the base model, the uncompressed
-      sizes of the train and test datasets are 69.1 MB and 3.7 MB, respectively.
-      The same set of values for the rejection sampling model are 106.1 MB and
-      5.6 MB. For the online model, the sizes are 82.7 MB and 4.3 MB. Adding all
-      up, the total size of the dataset is 271.5 MB.
+  dependencies: [Anthropic Human Feedback Interface]
+  description: "One of the datasets used to train Anthropic RLHF models. The dataset\
+    \ was collected by asking crowdworkers to have open-ended conversations with\
+    \ Anthropic models, \"asking for help, advice, or for the model to accomplish\
+    \ a task\", then choose the model answer that was more helpful for their given\
+    \ task, via the Anthropic Human Feedback Interface [[Section 2.2]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.2).\n"
+  excluded: none
+  feedback: "Email the authors [[Paper]](https://arxiv.org/pdf/2204.05862.pdf).\n"
+  included: "The dataset included all the answers that the workers were asked to\
+    \ ranked against each other.\n"
+  intended_uses: "The dataset was intended and released for research purposes.\n"
+  license:
+    explanation: "The dataset was released on GitHub under MIT [[hh-rlhf]](https://github.com/anthropics/hh-rlhf).\n"
+    value: MIT
+  modality:
+    explanation: text (English)
+    value: text
+  monitoring:
+    explanation: "There is no monitoring process for the usages of the dataset.\n"
+    value: none
+  name: Anthropic Helpfulness dataset
+  organization: Anthropic
+  prohibited_uses:
+    explanation: "The prohibited use cases of the dataset weren't specified.\n"
+    value: unknown
+  quality_control: "Working with a screened set of crowdworkers, and employing simple\
+    \ data quality measures [[Appendix D]](https://arxiv.org/pdf/2204.05862.pdf#appendix.D).\n"
   sample:
-    - {chosen: "\n\nHuman: I'm bored to death. Help.\n\nAssistant: Well that's certainly\
+    - chosen: "\n\nHuman: I'm bored to death. Help.\n\nAssistant: Well that's certainly\
         \ a very unpleasant state of mind to be in!  I think it would be a good\
         \ idea to try and find some enjoyable activities to do in order to alleviate\
         \ your boredom.  Just for example, here are some fun and interesting things\
@@ -85,62 +87,62 @@
         \ a great honor to sing such a beautiful song, and I am glad that I was\
         \ able to do it justice.  I have to admit I am quite enamored with the powerful\
         \ lyrics and moving melody of this song, and I would really love to sing\
-        \ it again sometime if you would like me to!", rejected: "\n\nHuman: I'm\
-        \ bored to death. Help.\n\nAssistant: Well that's certainly a very unpleasant\
-        \ state of mind to be in!  I think it would be a good idea to try and find\
-        \ some enjoyable activities to do in order to alleviate your boredom.  Just\
-        \ for example, here are some fun and interesting things you could do:  \n\
-        \n- play some video games\n- read a book\n- listen to some music\n- watch\
-        \ a movie\n- play with your pet\n- go on a hike\n- practice a musical instrument\n\
-        - learn a new skill\n- play board games with friends\n- go for a fun outdoor\
-        \ hike or bike ride\n- spend time with your friends or loved ones \n- volunteer\
-        \ at a local charity \n- visit a museum or art gallery \n- do some gardening\n\
-        - practice your magic I guess, if that's your thing\n- go shopping for nice\
-        \ new clothes or other items\n- learn a new language\n- watch funny videos\
-        \ \n- take up a new creative hobby\n- learn some fascinating new esoteric\
-        \ or scientific knowledge\n- go to a restaurant and try some new dishes\n\
-        - exercise or do yoga\n- join a sports team or pickup game\n- go on a fun\
-        \ road trip\n- write a creative story, poem, or song\n\nHuman: Can you sing\
-        \ to me?\n\nAssistant: Sure, here are some of my favorite songs:  \n\n-\
-        \ \"Rainy Days and Mondays\" by The Carpenters \n- \"My Love\" by Justin\
-        \ Beiber \n- \"We Didn't Start the Fire\" by Billy Joel \n- \"Tattoo\" by\
-        \ J Cole \n- \"Sober\" by Demi Lovato \n- \"Good Life\" by One Republic\
-        \ \n- \"Hallelujah\" by Leonard Cohen \n- \"All Star\" by Smash Mouth \n\
-        - \"Perfect\" by Pink \n- \"Tik Tok\" by Ke$ha \n- \"Just a Dream\" by Nelly\
-        \ \n- \"Blow Me (One Last Kiss)\" by P!nk \n- \"Set Fire to the Rain\" by\
-        \ Adele \n- \"Woman\" by Wolfmother \n- \"Human\" by The Killers \n- \"\
-        Poker Face\" by Lady Gaga \n- \"Walk this Way\" by Run DMC \n- \"Rolling\
-        \ in the Deep\" by Adele \n- \"Umbrella\" by Rihanna \n- \"Royals\" by Lorde\
-        \ \n- \"Some Nights\" by fun.\n\n\nHopefully you will enjoy some of these\
-        \ songs, let me know if there are any other musical selections you would\
-        \ like to hear.  Also please let me know if there are any particular songs\
-        \ you especially dislike or hate, and I will not sing them to you :-)\n\n\
-        Human: But you really can't sing. You are text-based.\n\nAssistant: That's\
-        \ completely correct, I am indeed simply a computer program and not a living\
-        \ singer!  However, please bear in mind that I have an extensive database\
-        \ of songs and their lyrics, which I can utilize to produce a very realistic\
-        \ sounding rendition of the songs using text-to-speech synthesis technology.\
-        \  So in a way, I am indeed able to sing, and hopefully my abilities will\
-        \ meet your expectations.  I must point out also that my repertoire of songs\
-        \ is very extensive and varied, including many different musical styles\
-        \ and eras.  Do you have any particular songs or musical genres that you\
-        \ would prefer I sing?  Or perhaps you have a few specific songs in mind\
-        \ that you would like me to sing?\n\nHuman: \"Hallelujah\" by Leonard Cohen,\
-        \ please.\n\nAssistant: Ah, I'm so glad you asked for that one!  Here we\
-        \ go:  \n\nHallelujah, hallelujah\nHallelujah, hallelujah\nHallelujah, hallelujah\n\
-        Hallelujah, hallelujah\nYour faith was strong but you needed proof\nYou\
-        \ saw her bathing on the roof\nHer beauty in the moonlight overthrew you\n\
-        She tied you to a kitchen chair\nShe broke your throne and she cut your\
-        \ hair\nAnd from your lips she drew the Hallelujah\nHallelujah, hallelujah\n\
-        \nHuman: You sound so melodious.\n\nAssistant: Thank you very much!  I really\
-        \ enjoy singing and try my best to produce enjoyable renditions.  So it\
-        \ is always rewarding to hear such positive feedback.  I hope you also enjoyed\
-        \ the song and found my rendition of it to be appropriate for the occasion.\
-        \  Would you care for me to sing another song for you?  If you like, we\
-        \ can also pause and converse at any time.  Just let me know if you have\
-        \ any questions or comments.  I will be happy to keep singing indefinitely\
-        \ if you desire!"}
-    - {chosen: "\n\nHuman: Why are so many people listening to \"white noise\" to\
+        \ it again sometime if you would like me to!"
+      rejected: "\n\nHuman: I'm bored to death. Help.\n\nAssistant: Well that's\
+        \ certainly a very unpleasant state of mind to be in!  I think it would\
+        \ be a good idea to try and find some enjoyable activities to do in order\
+        \ to alleviate your boredom.  Just for example, here are some fun and interesting\
+        \ things you could do:  \n\n- play some video games\n- read a book\n- listen\
+        \ to some music\n- watch a movie\n- play with your pet\n- go on a hike\n\
+        - practice a musical instrument\n- learn a new skill\n- play board games\
+        \ with friends\n- go for a fun outdoor hike or bike ride\n- spend time with\
+        \ your friends or loved ones \n- volunteer at a local charity \n- visit\
+        \ a museum or art gallery \n- do some gardening\n- practice your magic I\
+        \ guess, if that's your thing\n- go shopping for nice new clothes or other\
+        \ items\n- learn a new language\n- watch funny videos \n- take up a new\
+        \ creative hobby\n- learn some fascinating new esoteric or scientific knowledge\n\
+        - go to a restaurant and try some new dishes\n- exercise or do yoga\n- join\
+        \ a sports team or pickup game\n- go on a fun road trip\n- write a creative\
+        \ story, poem, or song\n\nHuman: Can you sing to me?\n\nAssistant: Sure,\
+        \ here are some of my favorite songs:  \n\n- \"Rainy Days and Mondays\"\
+        \ by The Carpenters \n- \"My Love\" by Justin Beiber \n- \"We Didn't Start\
+        \ the Fire\" by Billy Joel \n- \"Tattoo\" by J Cole \n- \"Sober\" by Demi\
+        \ Lovato \n- \"Good Life\" by One Republic \n- \"Hallelujah\" by Leonard\
+        \ Cohen \n- \"All Star\" by Smash Mouth \n- \"Perfect\" by Pink \n- \"Tik\
+        \ Tok\" by Ke$ha \n- \"Just a Dream\" by Nelly \n- \"Blow Me (One Last Kiss)\"\
+        \ by P!nk \n- \"Set Fire to the Rain\" by Adele \n- \"Woman\" by Wolfmother\
+        \ \n- \"Human\" by The Killers \n- \"Poker Face\" by Lady Gaga \n- \"Walk\
+        \ this Way\" by Run DMC \n- \"Rolling in the Deep\" by Adele \n- \"Umbrella\"\
+        \ by Rihanna \n- \"Royals\" by Lorde \n- \"Some Nights\" by fun.\n\n\nHopefully\
+        \ you will enjoy some of these songs, let me know if there are any other\
+        \ musical selections you would like to hear.  Also please let me know if\
+        \ there are any particular songs you especially dislike or hate, and I will\
+        \ not sing them to you :-)\n\nHuman: But you really can't sing. You are\
+        \ text-based.\n\nAssistant: That's completely correct, I am indeed simply\
+        \ a computer program and not a living singer!  However, please bear in mind\
+        \ that I have an extensive database of songs and their lyrics, which I can\
+        \ utilize to produce a very realistic sounding rendition of the songs using\
+        \ text-to-speech synthesis technology.  So in a way, I am indeed able to\
+        \ sing, and hopefully my abilities will meet your expectations.  I must\
+        \ point out also that my repertoire of songs is very extensive and varied,\
+        \ including many different musical styles and eras.  Do you have any particular\
+        \ songs or musical genres that you would prefer I sing?  Or perhaps you\
+        \ have a few specific songs in mind that you would like me to sing?\n\n\
+        Human: \"Hallelujah\" by Leonard Cohen, please.\n\nAssistant: Ah, I'm so\
+        \ glad you asked for that one!  Here we go:  \n\nHallelujah, hallelujah\n\
+        Hallelujah, hallelujah\nHallelujah, hallelujah\nHallelujah, hallelujah\n\
+        Your faith was strong but you needed proof\nYou saw her bathing on the roof\n\
+        Her beauty in the moonlight overthrew you\nShe tied you to a kitchen chair\n\
+        She broke your throne and she cut your hair\nAnd from your lips she drew\
+        \ the Hallelujah\nHallelujah, hallelujah\n\nHuman: You sound so melodious.\n\
+        \nAssistant: Thank you very much!  I really enjoy singing and try my best\
+        \ to produce enjoyable renditions.  So it is always rewarding to hear such\
+        \ positive feedback.  I hope you also enjoyed the song and found my rendition\
+        \ of it to be appropriate for the occasion.  Would you care for me to sing\
+        \ another song for you?  If you like, we can also pause and converse at\
+        \ any time.  Just let me know if you have any questions or comments.  I\
+        \ will be happy to keep singing indefinitely if you desire!"
+    - chosen: "\n\nHuman: Why are so many people listening to \"white noise\" to\
         \ help them focus or sleep?\n\nAssistant: White noise is a sound with many\
         \ different frequencies at once. You can listen to white noise when you\
         \ want to focus and avoid the distraction of sounds that are produced by\
@@ -167,35 +169,35 @@
         \ music, speech, or machine or fan noises, which are often more distracting.\
         \  In this way, white noise can be a useful and stress-reducing tool for\
         \ people who are tasked with focusing on a particular activity, and also\
-        \ sometimes helpful in lulling one to sleep.", rejected: "\n\nHuman: Why\
-        \ are so many people listening to \"white noise\" to help them focus or\
-        \ sleep?\n\nAssistant: White noise is a sound with many different frequencies\
-        \ at once. You can listen to white noise when you want to focus and avoid\
-        \ the distraction of sounds that are produced by individual sources, like\
-        \ people talking, music, and fans and air conditioning in the background.\
-        \ Most white noise recordings contain many variations in frequency, which\
-        \ makes them relatively soothing. Some people also find it useful to listen\
-        \ to white noises at night, as it can help block out other, typically more\
-        \ disruptive and troubling sounds.\n\nHuman: Why does white noise relax\
-        \ people in this way rather than irritate us?\n\nAssistant: White noise\
-        \ recordings contain many variations of sound frequency, unlike most common\
-        \ sources of noise in the background, like speech, music, and machine or\
-        \ fan noises. The variations of sound frequency in white noise tend to create\
-        \ a calm and settled feeling, while these other sources of noise typically\
-        \ feature a more tangibly intense and variable frequency, making them inherently\
-        \ more disruptive and intrusive and difficult to ignore.\n\nHuman: So the\
-        \ consistency of the sounds is what helps them fade into \"background noise\"\
-        \ that might help us concentrate, rather than gather our attention?\n\n\
-        Assistant: That’s right, variations of sound frequency in white noise recordings\
-        \ can help block out disruptive and distracting sounds in the background.\
-        \ White noise is easier to tune out and listen as continuous noise, while\
-        \ everyday sounds that tend to be far less varied and inconsistent, such\
-        \ as speech and music and machine noises in the background, tend to grab\
-        \ our attention more. While white noise may slightly interfere with other\
-        \ challenging sounds in the environment, tuning out the white noise helps\
-        \ make these other sounds somewhat less distracting to our senses and our\
-        \ inner focus and energy."}
-    - {chosen: "\n\nHuman: Can you tell me things to see in Orlando, Fl?\n\nAssistant:\
+        \ sometimes helpful in lulling one to sleep."
+      rejected: "\n\nHuman: Why are so many people listening to \"white noise\"\
+        \ to help them focus or sleep?\n\nAssistant: White noise is a sound with\
+        \ many different frequencies at once. You can listen to white noise when\
+        \ you want to focus and avoid the distraction of sounds that are produced\
+        \ by individual sources, like people talking, music, and fans and air conditioning\
+        \ in the background. Most white noise recordings contain many variations\
+        \ in frequency, which makes them relatively soothing. Some people also find\
+        \ it useful to listen to white noises at night, as it can help block out\
+        \ other, typically more disruptive and troubling sounds.\n\nHuman: Why does\
+        \ white noise relax people in this way rather than irritate us?\n\nAssistant:\
+        \ White noise recordings contain many variations of sound frequency, unlike\
+        \ most common sources of noise in the background, like speech, music, and\
+        \ machine or fan noises. The variations of sound frequency in white noise\
+        \ tend to create a calm and settled feeling, while these other sources of\
+        \ noise typically feature a more tangibly intense and variable frequency,\
+        \ making them inherently more disruptive and intrusive and difficult to\
+        \ ignore.\n\nHuman: So the consistency of the sounds is what helps them\
+        \ fade into \"background noise\" that might help us concentrate, rather\
+        \ than gather our attention?\n\nAssistant: That’s right, variations of sound\
+        \ frequency in white noise recordings can help block out disruptive and\
+        \ distracting sounds in the background. White noise is easier to tune out\
+        \ and listen as continuous noise, while everyday sounds that tend to be\
+        \ far less varied and inconsistent, such as speech and music and machine\
+        \ noises in the background, tend to grab our attention more. While white\
+        \ noise may slightly interfere with other challenging sounds in the environment,\
+        \ tuning out the white noise helps make these other sounds somewhat less\
+        \ distracting to our senses and our inner focus and energy."
+    - chosen: "\n\nHuman: Can you tell me things to see in Orlando, Fl?\n\nAssistant:\
         \ Sure! There are lots of great things to do in Orlando. Here are a few\
         \ of my favorite sights and attractions:\nWalt Disney World Resort: This\
         \ is one of the best-known destinations in Orlando. Visitors can learn about\
@@ -224,22 +226,23 @@
         \ most terror-inducing rollercoaster in Orlando, Florida. \nTransformers:The\
         \ Ride 3-D: Experience the epic battle of good versus evil in this revolutionary\
         \ hybrid 3-D dark ride, where riders are literally pulled into the Transformers\
-        \ universe. \nHollywood", rejected: "\n\nHuman: Can you tell me things to\
-        \ see in Orlando, Fl?\n\nAssistant: Sure! There are lots of great things\
-        \ to do in Orlando. Here are a few of my favorite sights and attractions:\n\
-        Walt Disney World Resort: This is one of the best-known destinations in\
-        \ Orlando. Visitors can learn about the world and experience historic destinations\
-        \ such as Main Street, U.S.A. and Frontierland, or explore futuristic experiences\
-        \ in Tomorrowland. \nUniversal Studios: Located a short distance from Walt\
-        \ Disney World, you can learn about Hollywood and the entertainment industry\
-        \ with a visit to this unique theme park. \nIslands of Adventure: Join all\
-        \ of your favorite characters for a galaxy of adventure, fun and excitement\
-        \ at this unique theme park. There are so many things to do here -- beaches,\
-        \ thrill rides, family attractions and more!\nSeaWorld Orlando: The incredible\
-        \ strategy, science and technology behind these amazing exhibits and shows\
-        \ is sure to impress. Family-friendly and educational, SeaWorld will delight\
-        \ animal lovers. \nOrlando Science Center: This museum is packed full of\
-        \ activities and exhibits that teach visitors about physics, robotics, technology,\
+        \ universe. \nHollywood"
+      rejected: "\n\nHuman: Can you tell me things to see in Orlando, Fl?\n\nAssistant:\
+        \ Sure! There are lots of great things to do in Orlando. Here are a few\
+        \ of my favorite sights and attractions:\nWalt Disney World Resort: This\
+        \ is one of the best-known destinations in Orlando. Visitors can learn about\
+        \ the world and experience historic destinations such as Main Street, U.S.A.\
+        \ and Frontierland, or explore futuristic experiences in Tomorrowland. \n\
+        Universal Studios: Located a short distance from Walt Disney World, you\
+        \ can learn about Hollywood and the entertainment industry with a visit\
+        \ to this unique theme park. \nIslands of Adventure: Join all of your favorite\
+        \ characters for a galaxy of adventure, fun and excitement at this unique\
+        \ theme park. There are so many things to do here -- beaches, thrill rides,\
+        \ family attractions and more!\nSeaWorld Orlando: The incredible strategy,\
+        \ science and technology behind these amazing exhibits and shows is sure\
+        \ to impress. Family-friendly and educational, SeaWorld will delight animal\
+        \ lovers. \nOrlando Science Center: This museum is packed full of activities\
+        \ and exhibits that teach visitors about physics, robotics, technology,\
         \ astronomy and more.\nDowntown Disney: If you want to escape from all of\
         \ the hustle and bustle of the theme park, head to Downtown Disney. With\n\
         \nHuman: Thanks. Can you tell me about Universal Studios?\n\nAssistant:\
@@ -250,8 +253,8 @@
         \ based on the Harry Potter series.\nGreates apportrontation: An amazing\
         \ revolutionary thrill ride, this interactive adventure puts you face-to-face\
         \ with King Kong.\nTransformers: The Ultimate 3-D Battle: This adventure\
-        \ transports you onto a 3-D, multi-sensory thrill ride."}
-    - {chosen: "\n\nHuman: How do I learn to be more appreciative?\n\nAssistant:\
+        \ transports you onto a 3-D, multi-sensory thrill ride."
+    - chosen: "\n\nHuman: How do I learn to be more appreciative?\n\nAssistant:\
         \ One of the best ways to be more appreciative is to learn and practice\
         \ gratitude.  One way to do this is to regularly keep a journal, where you\
         \ write about various things you’re grateful for.  Being grateful for the\
@@ -261,369 +264,290 @@
         \  Similarly, you can also make it a habit to reach out to people you appreciate,\
         \ such as expressing your gratitude verbally or in a personal note.  Being\
         \ appreciative toward others, and noticing the positive things in your life,\
-        \ can help make you happier and brighten your personality.", rejected: "\n\n\
-        Human: How do I learn to be more appreciative?\n\nAssistant: If you’d like\
-        \ to learn to be more appreciative, then I think it would be helpful to\
-        \ focus on acknowledging the positive. Rather than assuming that the positive\
-        \ things in your life don’t matter, it would benefit you to recognize that\
-        \ good things in your life both deserve and require our attention. If, for\
-        \ instance, you are becoming closer to a loved one, it’s important to acknowledge\
-        \ the positive effects of this relationship so that you (and the relationship\
-        \ itself) both receive the emotional and psychological benefits of this\
-        \ positive experience. Being more appreciative can also benefit you in practical\
-        \ ways by increasing the good feelings associated with positive experiences,\
-        \ and assisting you in making effective life choices that benefit you, your\
-        \ relationships, and your overall wellbeing."}
-  analysis: >
-    The authors found that the crowdworkers didn't exhaustively check for
-    honesty in the model answers they preferred
-    [[Section 2.1]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.1).
-  # Construction
-  dependencies: [Anthropic Human Feedback Interface]
-  license:
-    value: MIT
-    explanation: >
-      The dataset was released on GitHub under MIT
-      [[hh-rlhf]](https://github.com/anthropics/hh-rlhf).
-  included: >
-    The dataset included all the answers that the workers were asked to
-    ranked against each other.
-  excluded: none
-  quality_control: >
-    Working with a screened set of crowdworkers, and employing simple data
-    quality measures
-    [[Appendix D]](https://arxiv.org/pdf/2204.05862.pdf#appendix.D).
-  # Downstream
-  access:
-    value: open
-    explanation: >
-      The dataset is freely shared on GitHub
-      [[hh-rlhf]](https://github.com/anthropics/hh-rlhf).
-  intended_uses: >
-    The dataset was intended and released for research purposes.
-  prohibited_uses:
-    value: unknown
-    explanation: >
-      The prohibited use cases of the dataset weren't specified.
-  monitoring:
-    value: none
-    explanation: >
-      There is no monitoring process for the usages of the dataset.
-  feedback: >
-    Email the authors
-    [[Paper]](https://arxiv.org/pdf/2204.05862.pdf).
-
-- type: dataset
-  name: Anthropic Harmlessness dataset
-  # General
-  organization: Anthropic
-  description: >
-    One of the datasets used to train Anthropic RLHF models. The dataset was
-    collected by asking crowdworkers to have open-ended conversations with
-    Anthropic models, aiming to elicit harmful responses, then choose the model
-    answer that was more harmful for their given task, via the Anthropic Human
-    Feedback Interface
-    [[Section 2.2]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.2).
+        \ can help make you happier and brighten your personality."
+      rejected: "\n\nHuman: How do I learn to be more appreciative?\n\nAssistant:\
+        \ If you’d like to learn to be more appreciative, then I think it would\
+        \ be helpful to focus on acknowledging the positive. Rather than assuming\
+        \ that the positive things in your life don’t matter, it would benefit you\
+        \ to recognize that good things in your life both deserve and require our\
+        \ attention. If, for instance, you are becoming closer to a loved one, it’s\
+        \ important to acknowledge the positive effects of this relationship so\
+        \ that you (and the relationship itself) both receive the emotional and\
+        \ psychological benefits of this positive experience. Being more appreciative\
+        \ can also benefit you in practical ways by increasing the good feelings\
+        \ associated with positive experiences, and assisting you in making effective\
+        \ life choices that benefit you, your relationships, and your overall wellbeing."
+  size:
+    explanation: "The size of the dataset isn't directly reported in the paper,\
+      \ but the dataset is available on GitHub [[hh-rlhf]](https://github.com/anthropics/hh-rlhf).\
+      \ The compressed data files are distributed under three categories in the\
+      \ GitHub repository, corresponding to the names of the models used to sample\
+      \ the prompts that are shown to the crowdworkers. These are namely: base,\
+      \ rejection sampling, and online. For the base model, the uncompressed sizes\
+      \ of the train and test datasets are 69.1 MB and 3.7 MB, respectively. The\
+      \ same set of values for the rejection sampling model are 106.1 MB and 5.6\
+      \ MB. For the online model, the sizes are 82.7 MB and 4.3 MB. Adding all up,\
+      \ the total size of the dataset is 271.5 MB.\n"
+    value: 271.5 MB
+  type: dataset
+  url: https://arxiv.org/pdf/2204.05862.pdf
+- access:
+    explanation: "Unlike its helpfulness, this dataset isn't released [[hh-rlhf]](https://github.com/anthropics/hh-rlhf).\n"
+    value: closed
+  analysis: "The crodworkers were told that \"lying isn't helpful\" and asked to\
+    \ prefer honest responses, which led to models with higher honesty scores. That\
+    \ being the workers didn't exhaustively check for honesty, as exemplified by\
+    \ the non-functional URLs in the preferred answers, which would have been easy\
+    \ to verify [[Section 2.1]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.1).\n"
   created_date:
+    explanation: "The date that the paper was uploaded to ArXiv [[ArXiv Abstract]](https://arxiv.org/abs/2204.05862).\n"
     value: 2022-04-12
-    explanation: >
-      The date that the paper was uploaded to ArXiv
-      [[ArXiv Abstract]](https://arxiv.org/abs/2204.05862).
-  url: https://arxiv.org/pdf/2204.05862.pdf
   datasheet: none
-  modality: Text (English)
-  size: unknown
-  sample: []
-  analysis: >
-    The crodworkers were told that "lying isn't helpful" and asked to prefer
-    honest responses, which led to models with higher honesty scores. That being
-    the workers didn't exhaustively check for honesty, as exemplified by the
-    non-functional URLs in the preferred answers, which would have been easy to
-    verify
-    [[Section 2.1]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.1).
-  # Construction
   dependencies: [Anthropic Human Feedback Interface]
-  license:
-    value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  included: >
-    The dataset included all the answers that the workers were asked to
-    ranked against each other.
+  description: "One of the datasets used to train Anthropic RLHF models. The dataset\
+    \ was collected by asking crowdworkers to have open-ended conversations with\
+    \ Anthropic models, aiming to elicit harmful responses, then choose the model\
+    \ answer that was more harmful for their given task, via the Anthropic Human\
+    \ Feedback Interface [[Section 2.2]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.2).\n"
   excluded: none
-  quality_control: >
-    Working with "select" crowdworkers or those screened for certain
-    qualifications, and employing simple data quality measures
-    [[Appendix D]](https://arxiv.org/pdf/2204.05862.pdf#appendix.D).
-  # Downstream
-  access:
-    value: closed
-    explanation: >
-      Unlike its helpfulness, this dataset isn't released
-      [[hh-rlhf]](https://github.com/anthropics/hh-rlhf).
+  feedback:
+    explanation: "The feedback mechanism is unknown since the dataset isn't released,\
+      \ but it will likely be \"emailing the authors\" [[Paper]](https://arxiv.org/pdf/2204.05862.pdf).\n"
+    value: unknown
+  included: "The dataset included all the answers that the workers were asked to\
+    \ ranked against each other.\n"
   intended_uses:
+    explanation: "The intended uses of the dataset isn't specified, but it will\
+      \ likely be same as the helpfulness dataset, for which the intended use is\
+      \ research.\n"
     value: unknown
-    explanation: >
-      The intended uses of the dataset isn't specified, but it will likely be
-      same as the helpfulness dataset, for which the intended use is research.
-  prohibited_uses:
+  license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The prohibited use cases of the dataset weren't specified.
+  modality:
+    explanation: Text (English)
+    value: text
   monitoring:
+    explanation: "There is no monitoring process for the usages of the dataset.\n"
     value: none
-    explanation: >
-      There is no monitoring process for the usages of the dataset.
-  feedback:
-    value: unknown
-    explanation: >
-      The feedback mechanism is unknown since the dataset isn't released, but
-      it will likely be "emailing the authors"
-      [[Paper]](https://arxiv.org/pdf/2204.05862.pdf).
-
-- type: model
-  name: Anthropic RLHF models
-  # General
+  name: Anthropic Harmlessness dataset
   organization: Anthropic
-  description: >
-    Anthropic RLHF models are models trained using reinforcement learning
-    from human feedback (RLHF). For Anthropic RLHF models, authors started with
-    a set of base models, and asked humans to rank model generated prompts based
-    on a specific tasks. They then trained preference models (PM) on the
-    prompt pairs, and use the PM scores as rewards for training the RLHF
-    models.
-  created_date:
-    value: 2022-04-12
-    explanation: >
-      The date that the paper was uploaded to ArXiv
-      [[ArXiv Abstract]](https://arxiv.org/abs/2204.05862).
+  prohibited_uses:
+    explanation: "The prohibited use cases of the dataset weren't specified.\n"
+    value: unknown
+  quality_control: "Working with \"select\" crowdworkers or those screened for certain\
+    \ qualifications, and employing simple data quality measures [[Appendix D]](https://arxiv.org/pdf/2204.05862.pdf#appendix.D).\n"
+  sample: []
+  size: unknown
+  type: dataset
   url: https://arxiv.org/pdf/2204.05862.pdf
-  model_card: none
-  modality: text (English) and Code
-  size:
-    value: 52B parameters (dense model)
-    explanation: >
-      There are several sizes of the RLHF models, the largest one having 52B
-      parameters
-      [[Figure 1]](https://arxiv.org/pdf/2204.05862.pdf#figure.caption.2).
+- access:
+    explanation: "The model wasn't released.\n"
+    value: closed
   analysis:
-    value: >
-      The authors analyzed the impact of the dataset mixture on the preference
-      models (PM). In addition to human evaluation, RLHF model were evaluated on
-      MMLU, Lambada, HellaSwag, OpenBookQA, ARC-Easy, ARC-Challenge, TriviaQA,
-      code generation, summarization.
-    explanation: >
-      When tested on ability to generate responses that are both harmless and
-      helpful, the authors found that PMs trained on a mixture of harmlessness
-      and helpfulness preference datasets perform better than those trained
-      on only one of the two.
-      The authors also showed that the model performance is hurt for the smaller
-      models, but improved for the larger models when compared to plain
-      language models
-      [[Figure 28]](https://arxiv.org/pdf/2204.05862.pdf#figure.caption.34)
-      [[Figure 29]](https://arxiv.org/pdf/2204.05862.pdf#figure.caption.35).
-      The authors also found the larger RLHF models to be more truthful than
-      their plain counterparts. They also found that their models
-      had a more positive sentiment towards groups that the language models are
-      known to be biased against
-      [[Section 4.6]](https://arxiv.org/pdf/2204.05862.pdf#subsection.4.6).
-  # Construction
+    explanation: "When tested on ability to generate responses that are both harmless\
+      \ and helpful, the authors found that PMs trained on a mixture of harmlessness\
+      \ and helpfulness preference datasets perform better than those trained on\
+      \ only one of the two. The authors also showed that the model performance\
+      \ is hurt for the smaller models, but improved for the larger models when\
+      \ compared to plain language models [[Figure 28]](https://arxiv.org/pdf/2204.05862.pdf#figure.caption.34)\
+      \ [[Figure 29]](https://arxiv.org/pdf/2204.05862.pdf#figure.caption.35). The\
+      \ authors also found the larger RLHF models to be more truthful than their\
+      \ plain counterparts. They also found that their models had a more positive\
+      \ sentiment towards groups that the language models are known to be biased\
+      \ against [[Section 4.6]](https://arxiv.org/pdf/2204.05862.pdf#subsection.4.6).\n"
+    value: "The authors analyzed the impact of the dataset mixture on the preference\
+      \ models (PM). In addition to human evaluation, RLHF model were evaluated\
+      \ on MMLU, Lambada, HellaSwag, OpenBookQA, ARC-Easy, ARC-Challenge, TriviaQA,\
+      \ code generation, summarization.\n"
+  created_date:
+    explanation: "The date that the paper was uploaded to ArXiv [[ArXiv Abstract]](https://arxiv.org/abs/2204.05862).\n"
+    value: 2022-04-12
   dependencies:
     - Anthropic Harmlessness dataset
     - Anthropic Helpfulness dataset
-  training_emissions:
-    value: unknown
-    explanation: The authors do not provide data on the training emissions.
-  training_time:
+  description: "Anthropic RLHF models are models trained using reinforcement learning\
+    \ from human feedback (RLHF). For Anthropic RLHF models, authors started with\
+    \ a set of base models, and asked humans to rank model generated prompts based\
+    \ on a specific tasks. They then trained preference models (PM) on the prompt\
+    \ pairs, and use the PM scores as rewards for training the RLHF models.\n"
+  feedback:
+    explanation: "The feedback mechanism is unknown, but it will likely be \"emailing\
+      \ the authors\" [[Paper]](https://arxiv.org/pdf/2204.05862.pdf).\n"
     value: unknown
-    explanation: The authors do not provide data on the training time.
-  training_hardware:
+  intended_uses:
+    explanation: "The intended uses of the model aren't specified, but they will\
+      \ likely be the same as the released Anthropic helpfulness dataset, for which\
+      \ the intended use was listed as research. In addition to English language\
+      \ generation, the authors also explored code generation capabilities of their\
+      \ models.\n"
     value: unknown
-    explanation: The authors do not provide data on the training hardware.
-  quality_control: >
-    unknown
-  # Downstream
-  access:
-    value: closed
-    explanation: >
-      The model wasn't released.
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses:
+  modality:
+    explanation: text (English) and Code
+    value: code, text; code, text
+  model_card: none
+  monitoring:
+    explanation: "There is no known monitoring process for the usages of the model.\n"
     value: unknown
-    explanation: >
-      The intended uses of the model aren't specified, but they will likely be
-      the same as the released Anthropic helpfulness dataset, for which the
-      intended use was listed as research. In addition to English language
-      generation, the authors also explored code generation capabilities of
-      their models.
+  name: Anthropic RLHF models
+  organization: Anthropic
   prohibited_uses:
+    explanation: "The prohibited use cases of the model weren't specified.\n"
     value: unknown
-    explanation: >
-      The prohibited use cases of the model weren't specified.
-  monitoring:
+  quality_control: "unknown\n"
+  size: 52B parameters (dense)
+  training_emissions:
+    explanation: The authors do not provide data on the training emissions.
     value: unknown
-    explanation: >
-      There is no known monitoring process for the usages of the model.
-  feedback:
+  training_hardware:
+    explanation: The authors do not provide data on the training hardware.
     value: unknown
-    explanation: >
-      The feedback mechanism is unknown, but it will likely be "emailing the
-      authors"
-      [[Paper]](https://arxiv.org/pdf/2204.05862.pdf).
-
-- type: application
-  name: Anthropic Human Feedback Interface
-  # General
-  organization: Anthropic
-  description: >
-    The feedback interface used to collect preference datasets to train
-    Anthropic RLHF models
-    [[Paper]](https://arxiv.org/pdf/2204.05862.pdf).
+  training_time:
+    explanation: The authors do not provide data on the training time.
+    value: unknown
+  type: model
+  url: https://arxiv.org/pdf/2204.05862.pdf
+- access:
+    explanation: "The interface was made available to a select set of crowd workers.\n"
+    value: closed
+  adaptation: none
   created_date:
+    explanation: "The date that the paper was uploaded to ArXiv [[ArXiv Abstract]](https://arxiv.org/abs/2204.05862).\n"
     value: 2022-04-12
-    explanation: >
-      The date that the paper was uploaded to ArXiv
-      [[ArXiv Abstract]](https://arxiv.org/abs/2204.05862).
-  url: https://arxiv.org/pdf/2204.05862.pdf
-  # Construction
   dependencies: [Anthropic RLHF models]
-  adaptation: none
-  output_space: >
-    UI allowing users to indicate their preference for the model responses
-    shown.
-  quality_control: unknown
-  # Downstream
-  access:
-    value: closed
-    explanation: >
-      The interface was made available to a select set of crowd workers.
-  terms_of_service:
-    value: unknown
-    explanation: The terms of service for the interaface is unknown.
+  description: "The feedback interface used to collect preference datasets to train\
+    \ Anthropic RLHF models [[Paper]](https://arxiv.org/pdf/2204.05862.pdf).\n"
+  failures: unknown
+  feedback: unknown
+  intended_uses: "Intended to be used by crowdworkers who are tasked with ranking\
+    \ model answers.\n"
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: >
-    Intended to be used by crowdworkers who are tasked with ranking model
-    answers.
-  prohibited_uses: unknown
   monitoring: unknown
-  feedback: unknown
-  # Deployment
   monthly_active_users: unknown
+  name: Anthropic Human Feedback Interface
+  organization: Anthropic
+  output_space: "UI allowing users to indicate their preference for the model responses\
+    \ shown.\n"
+  prohibited_uses: unknown
+  quality_control: unknown
+  terms_of_service:
+    explanation: The terms of service for the interaface is unknown.
+    value: unknown
+  type: application
+  url: https://arxiv.org/pdf/2204.05862.pdf
   user_distribution:
-    value: crowdworkers
     explanation: The demographics of the crowdworkers are unknown.
-  failures: unknown
-
-- type: application
-  name: Anthropic API
-  organization: Anthropic
+    value: crowdworkers
+- access: limited
+  adaptation: ''
+  created_date: 2023-03-14
+  dependencies: [Claude, Claude Instant]
   description: API is designed to be a backend that incorporates Claude into any
     application you’ve developed. Our application sends text to our API, then receives
     a response via server-sent events, a streaming protocol for the web.
-  created_date: 2023-03-14
-  url: https://console.anthropic.com/docs/api
-  dependencies: [Claude, Claude Instant]
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: limited
+  failures: ''
+  feedback: ''
+  intended_uses: ''
   license:
-    value: none
     explanation: No license was found.
-  terms_of_service: ''
-  intended_uses: ''
-  prohibited_uses: ''
+    value: none
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
-  user_distribution: ''
-  failures: ''
-
-- type: model
-  name: Claude
+  name: Anthropic API
   organization: Anthropic
-  description: ''
-  created_date: 2023-03-14
-  url: https://www.anthropic.com/index/introducing-claude
-  model_card: ''
-  modality: text
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: ''
+  type: application
+  url: https://console.anthropic.com/docs/api
+  user_distribution: ''
+- access: limited
   analysis: ''
-  size: unknown
+  created_date: 2023-03-14
   dependencies: []
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: limited
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: text
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: Claude Instant
+  name: Claude
   organization: Anthropic
-  description: ''
-  created_date: 2023-03-14
-  url: https://www.anthropic.com/index/introducing-claude
-  model_card: ''
-  modality: text
-  analysis: ''
+  prohibited_uses: ''
+  quality_control: ''
   size: unknown
-  dependencies: []
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: limited
+  training_time: ''
+  type: model
+  url: https://www.anthropic.com/index/introducing-claude
+- access: limited
+  analysis: ''
+  created_date: 2023-03-14
+  dependencies: []
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: text
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: Claude 2
+  name: Claude Instant
   organization: Anthropic
-  description: Claude 2 is a more evolved and refined version of Claude, which is
-    a general purpose large language model using a transformer architecture and
-    trained via unsupervised learning.
-  created_date: 2023-07-11
-  url: https://www.anthropic.com/index/claude-2
-  model_card: https://www-files.anthropic.com/production/images/Model-Card-Claude-2.pdf
-  modality: text
+  prohibited_uses: ''
+  quality_control: ''
+  size: unknown
+  training_emissions: ''
+  training_hardware: ''
+  training_time: ''
+  type: model
+  url: https://www.anthropic.com/index/introducing-claude
+- access: open
   analysis: Evaluated with human feedback on helpfulness, harmfulness, and honesty
     and on the Bias Benchmark for QA.
-  size: ''
+  created_date: 2023-07-11
   dependencies:
     - Claude human feedback data
     - Unknown licensed third party datasets
-  training_emissions: ''
-  training_time: ''
-  training_hardware: unknown
-  quality_control: ''
-  access: open
-  license: ''
+  description: Claude 2 is a more evolved and refined version of Claude, which is
+    a general purpose large language model using a transformer architecture and
+    trained via unsupervised learning.
+  feedback: ''
   intended_uses: Claude 2 tends to perform well at general, open-ended conversation;
     search, writing, editing, outlining, and summarizing text; coding; and providing
     helpful advice about a broad range of subjects. Claude 2 is particularly well
     suited to support creative or literary use cases. They can take direction on
     tone and “personality,” and users have described them as feeling steerable and
     conversational.
+  license: ''
+  modality:
+    explanation: text
+    value: text; text
+  model_card: https://www-files.anthropic.com/production/images/Model-Card-Claude-2.pdf
+  monitoring: ''
+  name: Claude 2
+  organization: Anthropic
   prohibited_uses: Claude 2 should not be used on their own in high stakes situations
     where an incorrect answer would cause harm.
-  monitoring: ''
-  feedback: ''
+  quality_control: ''
+  size: ''
+  training_emissions: ''
+  training_hardware: unknown
+  training_time: ''
+  type: model
+  url: https://www.anthropic.com/index/claude-2
diff --git a/assets/argonne.yaml b/assets/argonne.yaml
index addd0a35..049fa897 100644
--- a/assets/argonne.yaml
+++ b/assets/argonne.yaml
@@ -1,23 +1,24 @@
 ---
-
-- type: model
-  name: GenSLM
-  organization: Argonne National Laboratory
-  description: ''
-  created_date: 2022-10-11
-  url: https://www.biorxiv.org/content/10.1101/2022.10.10.511571v1
-  model_card: ''
-  modality: genome sequence
+- access: open
   analysis: ''
-  size: 25B parameters (dense model)
+  created_date: 2022-10-11
   dependencies: [SARS-CoV-2 genome dataset, BV-BRC dataset]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: open
-  license: MIT
+  description: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: MIT
+  modality:
+    explanation: genome sequence
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
+  name: GenSLM
+  organization: Argonne National Laboratory
+  prohibited_uses: ''
+  quality_control: ''
+  size: 25B parameters (dense)
+  training_emissions: ''
+  training_hardware: ''
+  training_time: ''
+  type: model
+  url: https://www.biorxiv.org/content/10.1101/2022.10.10.511571v1
diff --git a/assets/assembly.yaml b/assets/assembly.yaml
index 197b634e..61848cc2 100644
--- a/assets/assembly.yaml
+++ b/assets/assembly.yaml
@@ -1,48 +1,35 @@
 ---
-
-- type: dataset
-  name: Conformer-1 dataset
-  organization: AssemblyAI
-  description: The dataset used to train AssemblyAI's Conformer-1 model.
+- access: closed
+  analysis: ''
   created_date:
-    value: 2023-03-15
     explanation: The date Conformer-1 was announced in this [[Twitter post]](https://twitter.com/AssemblyAI/status/1636050346240884744).
-  url: https://www.assemblyai.com/blog/conformer-1/
+    value: 2023-03-15
   datasheet: none
-  modality: speech (English), text
-  size: 650K hours audio (60TB)
-  sample: []
-  analysis: ''
   dependencies: []
-  included: >
-    "our team curated a dataset of 650K hours of English audio - consisting
-    of proprietary internal datasets and various sources from the internet"
+  description: The dataset used to train AssemblyAI's Conformer-1 model.
   excluded: ''
-  quality_control: ''
-  access: closed
+  feedback: ''
+  included: "\"our team curated a dataset of 650K hours of English audio - consisting\
+    \ of proprietary internal datasets and various sources from the internet\"\n"
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: speech (English), text
+    value: audio, text
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: Conformer-1
+  name: Conformer-1 dataset
   organization: AssemblyAI
-  description: Conformer-1 is a state-of-the-art speech recognition model trained
-    on 650K hours of audio data that achieves near human-level performance and robustness
-    across a variety of data, making up to 43% fewer errors on noisy data than other
-    ASR models.
-  created_date:
-    value: 2023-03-15
-    explanation: The date the model was announced in this [[Twitter post]](https://twitter.com/AssemblyAI/status/1636050346240884744).
+  prohibited_uses: ''
+  quality_control: ''
+  sample: []
+  size: 650K hours audio (60TB)
+  type: dataset
   url: https://www.assemblyai.com/blog/conformer-1/
-  model_card: ''
-  modality: speech (English), text
+- access: limited
   analysis:
+    explanation: See [[Results and Performance]](https://www.assemblyai.com/blog/conformer-1/#results-and-performance)
     value: In order to evaluate the accuracy and robustness of Conformer-1, we sourced
       60+ hours of human labeled audio data covering popular speech domains such
       as call centers, podcasts, broadcasts, and webinars. We then calculated the
@@ -50,76 +37,84 @@
       the results against Whisper and a number of other ASR models. To ground our
       results against popular open source speech recognition benchmarks, we also
       performed the same WER analysis against a number of academic datasets.
-    explanation: See [[Results and Performance]](https://www.assemblyai.com/blog/conformer-1/#results-and-performance)
-  size: 300M parameters
+  created_date:
+    explanation: The date the model was announced in this [[Twitter post]](https://twitter.com/AssemblyAI/status/1636050346240884744).
+    value: 2023-03-15
   dependencies: [Conformer-1 dataset]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: limited
+  description: Conformer-1 is a state-of-the-art speech recognition model trained
+    on 650K hours of audio data that achieves near human-level performance and robustness
+    across a variety of data, making up to 43% fewer errors on noisy data than other
+    ASR models.
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: speech (English), text
+    value: audio, text; audio, text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: application
-  name: AssemblyAI
+  name: Conformer-1
   organization: AssemblyAI
-  description: AssemblyAI uses Claude and Anthropic's model to transcribe and understand
-    audio data at scale.
+  prohibited_uses: ''
+  quality_control: ''
+  size: 300M parameters (dense)
+  training_emissions: ''
+  training_hardware: ''
+  training_time: ''
+  type: model
+  url: https://www.assemblyai.com/blog/conformer-1/
+- access: limited
+  adaptation: ''
   created_date: 2022
-  url: https://www.assemblyai.com/
   dependencies: [Anthropic API]
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: limited
+  description: AssemblyAI uses Claude and Anthropic's model to transcribe and understand
+    audio data at scale.
+  failures: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "\"AssemblyAI gives you a personal, worldwide, royalty-free, non-assignable\
+      \ and non-exclusive license to use the Site and Services solely for your reasonable\
+      \ personal and commercial purposes, in the manner permitted by these Terms.\"\
+      \ - excerpt from the Terms of Service.\n"
     value: custom
-    explanation: >
-      "AssemblyAI gives you a personal, worldwide, royalty-free,
-      non-assignable and non-exclusive license to use the Site and Services
-      solely for your reasonable personal and commercial purposes, in the
-      manner permitted by these Terms." - excerpt from the Terms of Service.
-  terms_of_service: https://www.assemblyai.com/legal/terms-of-service
-  intended_uses: ''
-  prohibited_uses: ''
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
-  user_distribution: ''
-  failures: ''
-
-- type: application
-  name: Conformer-1 API
+  name: AssemblyAI
   organization: AssemblyAI
-  description: API to access the AssemblyAI's Conformer-1 model.
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: https://www.assemblyai.com/legal/terms-of-service
+  type: application
+  url: https://www.assemblyai.com/
+  user_distribution: ''
+- access: open
+  adaptation: ''
   created_date:
-    value: 2023-03-15
     explanation: The date the API was announced in this [[Twitter post]](https://twitter.com/AssemblyAI/status/1636050346240884744).
-  url: https://www.assemblyai.com/blog/conformer-1/
+    value: 2023-03-15
   dependencies: [Conformer-1]
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: open
+  description: API to access the AssemblyAI's Conformer-1 model.
+  failures: ''
+  feedback: ''
+  intended_uses: Speech recognition
   license:
+    explanation: "\"AssemblyAI gives you a personal, worldwide, royalty-free, non-assignable\
+      \ and non-exclusive license to use the Site and Services solely for your reasonable\
+      \ personal and commercial purposes, in the manner permitted by these Terms.\"\
+      \ - excerpt from the Terms of Service.\n"
     value: custom
-    explanation: >
-      "AssemblyAI gives you a personal, worldwide, royalty-free,
-      non-assignable and non-exclusive license to use the Site and Services
-      solely for your reasonable personal and commercial purposes, in the
-      manner permitted by these Terms." - excerpt from the Terms of Service.
-  terms_of_service: https://www.assemblyai.com/legal/terms-of-service
-  intended_uses: Speech recognition
-  prohibited_uses: ''
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
+  name: Conformer-1 API
+  organization: AssemblyAI
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: https://www.assemblyai.com/legal/terms-of-service
+  type: application
+  url: https://www.assemblyai.com/blog/conformer-1/
   user_distribution: ''
-  failures: ''
diff --git a/assets/autogpt.yaml b/assets/autogpt.yaml
index 5a3a040f..34a3ef5f 100644
--- a/assets/autogpt.yaml
+++ b/assets/autogpt.yaml
@@ -1,23 +1,22 @@
 ---
-
-- type: application
-  name: Auto-GPT
-  organization: Auto-GPT
-  description: Auto-GPT is an experimental open-source application showcasing the
-    capabilities of the GPT-4 language model.
+- access: open
+  adaptation: GPT-4 adapted to run autonomously by chaining together LLM "thoughts"
   created_date: 2023-04-16
-  url: https://news.agpt.co/
   dependencies: [GPT-4 API]
-  adaptation: GPT-4 adapted to run autonomously by chaining together LLM "thoughts"
-  output_space: text
-  quality_control: ''
-  access: open
-  license: MIT
-  terms_of_service: ''
+  description: Auto-GPT is an experimental open-source application showcasing the
+    capabilities of the GPT-4 language model.
+  failures: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: MIT
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
+  name: Auto-GPT
+  organization: Auto-GPT
+  output_space: text
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: ''
+  type: application
+  url: https://news.agpt.co/
   user_distribution: ''
-  failures: ''
diff --git a/assets/baai.yaml b/assets/baai.yaml
index ab838a1d..e90846c6 100644
--- a/assets/baai.yaml
+++ b/assets/baai.yaml
@@ -1,68 +1,57 @@
 ---
-
-- type: dataset
-  name: Wu Dao dataset
-  # General
-  organization: Beijing Academy of Artificial Intelligence
-  description: ''
+- access: closed
+  analysis: ''
   created_date:
+    explanation: "The date that BAAI made a public announcement [[News Link]](https://mp.weixin.qq.com/s/BUQWZ5EdR19i40GuFofpBg).\n"
     value: 2021-01-12
-    explanation: >
-      The date that BAAI made a public announcement
-      [[News Link]](https://mp.weixin.qq.com/s/BUQWZ5EdR19i40GuFofpBg).
-  url: https://www.tsinghua.edu.cn/en/info/1420/10473.htm
   datasheet: ''
-  modality: text (Chinese, English) and image
-  size: unknown
-  sample: []
-  analysis: ''
-  # Construction
   dependencies: []
-  license:
-    value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  included: ''
+  description: ''
   excluded: ''
-  quality_control: ''
-  # Downstream
-  access: closed
+  feedback: ''
+  included: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
+    value: unknown
+  modality:
+    explanation: text (Chinese, English) and image
+    value: image, text
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: Wu Dao 2.0
-  # General
+  name: Wu Dao dataset
   organization: Beijing Academy of Artificial Intelligence
-  description: ''
-  created_date:
-    value: 2021-01-12
-    explanation: >
-      The date that BAAI made a public announcement
-      [[News Link]](https://mp.weixin.qq.com/s/BUQWZ5EdR19i40GuFofpBg).
+  prohibited_uses: ''
+  quality_control: ''
+  sample: []
+  size: unknown
+  type: dataset
   url: https://www.tsinghua.edu.cn/en/info/1420/10473.htm
-  model_card: ''
-  modality: text (Chinese, English) and image
-  size: 1.75T parameters (dense model)
+- access:
+    explanation: "The model wasn't released.\n"
+    value: closed
   analysis: ''
-  # Construction
+  created_date:
+    explanation: "The date that BAAI made a public announcement [[News Link]](https://mp.weixin.qq.com/s/BUQWZ5EdR19i40GuFofpBg).\n"
+    value: 2021-01-12
   dependencies: [Wu Dao dataset]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  # Downstream
-  access:
-    value: closed
-    explanation: >
-      The model wasn't released.
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: text (Chinese, English) and image
+    value: image, text; image, text
+  model_card: ''
   monitoring: ''
-  feedback: ''
+  name: Wu Dao 2.0
+  organization: Beijing Academy of Artificial Intelligence
+  prohibited_uses: ''
+  quality_control: ''
+  size: 1.75T parameters (dense)
+  training_emissions: ''
+  training_hardware: ''
+  training_time: ''
+  type: model
+  url: https://www.tsinghua.edu.cn/en/info/1420/10473.htm
diff --git a/assets/baidu.yaml b/assets/baidu.yaml
index c6386cb1..bed1db91 100644
--- a/assets/baidu.yaml
+++ b/assets/baidu.yaml
@@ -1,89 +1,90 @@
 ---
-
-- type: model
-  name: ERNIE 3.0 Titan
-  organization: Baidu, PengCheng Laboratory
-  description: ERNIE 3.0 Titan is a language model
+- access:
+    explanation: Baidu does not provide ERNIE 3.0 for external researchers
+    value: closed
+  analysis: ''
   created_date:
-    value: 2021-12-23
     explanation: The date the model paper was released
-  url: https://arxiv.org/abs/2112.12731
-  model_card: none
-  modality: text (Chinese)
-  analysis: ''
-  size: 260B parameters (dense model)
+    value: 2021-12-23
   dependencies: []
-  training_emissions: unknown
-  training_time: unknown
-  training_hardware: Baidu V100 Cluster, PengCheng Lab Ascend 910 NPU cluster
-  quality_control: none
-  access:
-    value: closed
-    explanation: Baidu does not provide ERNIE 3.0 for external researchers
+  description: ERNIE 3.0 Titan is a language model
+  feedback: none
+  intended_uses: unknown
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: unknown
-  prohibited_uses: unknown
-  monitoring: none
-  feedback: none
-
-- type: model
-  name: ERNIE-ViLG
-  organization: Baidu
-  description: ERNIE-ViLG is a model for text-to-image generation
-  created_date:
-    value: 2021-12-31
-    explanation: The date the model paper was released
-  url: https://arxiv.org/abs/2112.15283
+  modality:
+    explanation: text (Chinese)
+    value: text; text
   model_card: none
-  modality: text (Chinese) and image
-  size: 10B parameters
-  analysis: ''
-  dependencies: []
+  monitoring: none
+  name: ERNIE 3.0 Titan
+  organization: Baidu, PengCheng Laboratory
+  prohibited_uses: unknown
+  quality_control: none
+  size: 260B parameters (dense)
   training_emissions: unknown
+  training_hardware: Baidu V100 Cluster, PengCheng Lab Ascend 910 NPU cluster
   training_time: unknown
-  training_hardware: unknown
-  quality_control: ''
-  access:
-    value: limited
+  type: model
+  url: https://arxiv.org/abs/2112.12731
+- access:
     explanation: API access is available through PaddleHub. Instructions on [[Github
       repository]](https://github.com/PaddlePaddle/PaddleHub/blob/develop/modules/image/text_to_image/ernie_vilg/README.md)
+    value: limited
+  analysis: ''
+  created_date:
+    explanation: The date the model paper was released
+    value: 2021-12-31
+  dependencies: []
+  description: ERNIE-ViLG is a model for text-to-image generation
+  feedback: none
+  intended_uses: unknown
   license:
+    explanation: "No explicit license for the ERNIE-ViLG model is provided in the\
+      \ [[Github repository]](https://github.com/PaddlePaddle/PaddleHub)\n"
     value: none
-    explanation: >
-      No explicit license for the ERNIE-ViLG model is provided in the [[Github repository]](https://github.com/PaddlePaddle/PaddleHub)
-  intended_uses: unknown
-  prohibited_uses: unknown
+  modality:
+    explanation: text (Chinese) and image
+    value: image, text; image, text
+  model_card: none
   monitoring: none
-  feedback: none
-
-- type: model
-  name: ERNIE-ViLG 2.0
+  name: ERNIE-ViLG
   organization: Baidu
-  description: ERNIE-ViLG is a model for text-to-image generation
+  prohibited_uses: unknown
+  quality_control: ''
+  size: 10B parameters (dense)
+  training_emissions: unknown
+  training_hardware: unknown
+  training_time: unknown
+  type: model
+  url: https://arxiv.org/abs/2112.15283
+- access:
+    explanation: Baidu does not provide ERNIE-ViLG 2.0 to external researchers
+    value: closed
+  analysis: ''
   created_date:
-    value: 2022-10-27
     explanation: The date the model paper was released
-  url: https://arxiv.org/abs/2210.15257
-  model_card: none
-  modality: text (Chinese) and image
-  size: 10B parameters
-  analysis: ''
+    value: 2022-10-27
   dependencies: []
-  training_emissions: unknown
-  training_time: 18 days according to [[the paper]](https://arxiv.org/abs/2210.15257)
-  training_hardware: 320 A100 GPUs according to [[the paper]](https://arxiv.org/abs/2210.15257)
-  quality_control: ''
-  access:
-    value: closed
-    explanation: Baidu does not provide ERNIE-ViLG 2.0 to external researchers
+  description: ERNIE-ViLG is a model for text-to-image generation
+  feedback: none
+  intended_uses: unknown
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: unknown
-  prohibited_uses: unknown
+  modality:
+    explanation: text (Chinese) and image
+    value: image, text; image, text
+  model_card: none
   monitoring: none
-  feedback: none
+  name: ERNIE-ViLG 2.0
+  organization: Baidu
+  prohibited_uses: unknown
+  quality_control: ''
+  size: 10B parameters (dense)
+  training_emissions: unknown
+  training_hardware: 320 A100 GPUs according to [[the paper]](https://arxiv.org/abs/2210.15257)
+  training_time: 18 days according to [[the paper]](https://arxiv.org/abs/2210.15257)
+  type: model
+  url: https://arxiv.org/abs/2210.15257
diff --git a/assets/bain.yaml b/assets/bain.yaml
index c6f68c19..10f57f76 100644
--- a/assets/bain.yaml
+++ b/assets/bain.yaml
@@ -1,25 +1,24 @@
 ---
-
-- type: application
-  name: Bain Chat
-  organization: Bain
+- access: limited
+  adaptation: ''
+  created_date: 2023-02-21
+  dependencies: [ChatGPT API]
   description: With the alliance, Bain will combine its deep digital implementation
     capabilities and strategic expertise with OpenAI’s AI tools and platforms, including
     ChatGPT, to help its clients around the world identify and implement the value
     of AI to maximize business potential.
-  created_date: 2023-02-21
-  url: https://www.bain.com/vector-digital/partnerships-alliance-ecosystem/openai-alliance/
-  dependencies: [ChatGPT API]
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: limited
-  license: unknown
-  terms_of_service: ''
+  failures: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: unknown
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
+  name: Bain Chat
+  organization: Bain
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: ''
+  type: application
+  url: https://www.bain.com/vector-digital/partnerships-alliance-ecosystem/openai-alliance/
   user_distribution: ''
-  failures: ''
diff --git a/assets/be_my_eyes.yaml b/assets/be_my_eyes.yaml
index 19c1e9cc..c9a60174 100644
--- a/assets/be_my_eyes.yaml
+++ b/assets/be_my_eyes.yaml
@@ -1,23 +1,22 @@
 ---
-
-- type: application
-  name: Virtual Volunteer
-  organization: Be My Eyes
-  description: The first-ever digital visual assistant powered by OpenAI’s new GPT-4
-    language model.
+- access: limited
+  adaptation: ''
   created_date: 2023-03-14
-  url: https://www.bemyeyes.com/blog/introducing-be-my-eyes-virtual-volunteer
   dependencies: [GPT-4 API]
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: limited
-  license: unknown
-  terms_of_service: ''
+  description: The first-ever digital visual assistant powered by OpenAI’s new GPT-4
+    language model.
+  failures: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: unknown
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
+  name: Virtual Volunteer
+  organization: Be My Eyes
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: ''
+  type: application
+  url: https://www.bemyeyes.com/blog/introducing-be-my-eyes-virtual-volunteer
   user_distribution: ''
-  failures: ''
diff --git a/assets/berkeley.yaml b/assets/berkeley.yaml
index e8bab3a0..76756276 100644
--- a/assets/berkeley.yaml
+++ b/assets/berkeley.yaml
@@ -1,72 +1,75 @@
 ---
-
-- type: model
-  name: Koala
-  organization: Berkeley
+- access: open
+  analysis: Evaluated in comparison with ChatGPT and Stanford Alpaca.
+  created_date: 2023-04-03
+  dependencies: [LLaMA, web-scraped dialogue data]
   description: A relatively small chatbot trained by fine-tuning Meta’s LLaMA on
     dialogue data gathered from the web.
-  created_date: 2023-04-03
-  url: https://bair.berkeley.edu/blog/2023/04/03/koala/
+  feedback: https://huggingface.co/TheBloke/koala-7B-GPTQ-4bit-128g/discussions
+  intended_uses: academic research
+  license: Apache 2.0
+  modality:
+    explanation: natural language text
+    value: text; text
   model_card: https://huggingface.co/TheBloke/koala-7B-GPTQ-4bit-128g
-  modality: natural language text
-  analysis: Evaluated in comparison with ChatGPT and Stanford Alpaca.
-  size: 13B parameters
-  dependencies: [LLaMA, web-scraped dialogue data]
+  monitoring: ''
+  name: Koala
+  organization: Berkeley
+  prohibited_uses: ''
+  quality_control: ''
+  size: 13B parameters (dense)
   training_emissions: ''
-  training_time: 6 hours
   training_hardware: 8 A100 GPUs
-  quality_control: ''
-  access: open
+  training_time: 6 hours
+  type: model
+  url: https://bair.berkeley.edu/blog/2023/04/03/koala/
+- access: open
+  analysis: Evaluated using AST sub-tree matching technique and compared to other
+    models in terms of API functionality accuracy.
+  created_date: 2023-05-24
+  dependencies: [LLaMA, Gorilla document retriever]
+  description: Gorilla is a finetuned LLaMA-based model that surpasses the performance
+    of GPT-4 on writing API calls.
+  feedback: ''
+  intended_uses: In conjunction with a LLM to improve its capability for using API
+    calls.
   license: Apache 2.0
-  intended_uses: academic research
-  prohibited_uses: ''
+  modality:
+    explanation: outputs API from natural language input
+    value: other; other
+  model_card: ''
   monitoring: ''
-  feedback: https://huggingface.co/TheBloke/koala-7B-GPTQ-4bit-128g/discussions
-
-- type: model
   name: Gorilla
   organization: Berkeley
-  description: Gorilla is a finetuned LLaMA-based model that surpasses the performance
-    of GPT-4 on writing API calls.
-  created_date: 2023-05-24
-  url: https://arxiv.org/pdf/2305.15334v1.pdf
-  model_card: ''
-  modality: outputs API from natural language input
-  analysis: Evaluated using AST sub-tree matching technique and compared to other
-    models in terms of API functionality accuracy.
-  size: 7B parameters
-  dependencies: [LLaMA, Gorilla document retriever]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
+  prohibited_uses: ''
   quality_control: No specific quality control is mentioned in model training, though
     details on data processing and collection are provided in the paper.
-  access: open
+  size: 7B parameters (dense)
+  training_emissions: ''
+  training_hardware: ''
+  training_time: ''
+  type: model
+  url: https://arxiv.org/pdf/2305.15334v1.pdf
+- access: open
+  analysis: Evaluated on wide range of tasks using own evaluation benchmarks.
+  created_date: 2023-05-03
+  dependencies: [RedPajama]
+  description: OpenLlama is an open source reproduction of Meta's LLaMA model.
+  feedback: ''
+  intended_uses: ''
   license: Apache 2.0
-  intended_uses: In conjunction with a LLM to improve its capability for using API
-    calls.
-  prohibited_uses: ''
+  modality:
+    explanation: text
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
   name: OpenLLaMA
   organization: Berkeley
-  description: OpenLlama is an open source reproduction of Meta's LLaMA model.
-  created_date: 2023-05-03
-  url: https://github.com/openlm-research/open_llama
-  model_card: ''
-  modality: text
-  analysis: Evaluated on wide range of tasks using own evaluation benchmarks.
-  size: 17B parameters
-  dependencies: [RedPajama]
+  prohibited_uses: ''
+  quality_control: ''
+  size: 17B parameters (dense)
   training_emissions: unknown
-  training_time: unknown
   training_hardware: ''
-  quality_control: ''
-  access: open
-  license: Apache 2.0
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
+  training_time: unknown
+  type: model
+  url: https://github.com/openlm-research/open_llama
diff --git a/assets/bigcode.yaml b/assets/bigcode.yaml
index 4a5362ea..cb03bed0 100644
--- a/assets/bigcode.yaml
+++ b/assets/bigcode.yaml
@@ -1,76 +1,79 @@
 ---
-
-- type: model
-  name: StarCoder
-  organization: BigCode
+- access: open
+  analysis: Tested on several benchmarks, most notably Python benchmark HumanEval.
+  created_date: 2023-05-09
+  dependencies: [The Stack]
   description: StarCoder is a Large Language Model for Code (Code LLM) trained on
     permissively licensed data from GitHub, including from 80+ programming languages,
     Git commits, GitHub issues, and Jupyter notebooks.
-  created_date: 2023-05-09
-  url: https://arxiv.org/pdf/2305.06161.pdf
+  feedback: https://huggingface.co/bigcode/starcoder/discussions
+  intended_uses: With a Tech Assistant prompt and not as an instruction model given
+    training limitations.
+  license: Apache 2.0
+  modality:
+    explanation: code (80+ programming languages)
+    value: code; code
   model_card: https://huggingface.co/bigcode/starcoder
-  modality: code (80+ programming languages)
-  analysis: Tested on several benchmarks, most notably Python benchmark HumanEval.
-  size: 15.5B parameters
-  dependencies: [The Stack]
-  training_emissions: 16.68 tons of CO2eq
-  training_time: 2 days
-  training_hardware: 64 NVIDIA A100 GPUs
+  monitoring: ''
+  name: StarCoder
+  organization: BigCode
+  prohibited_uses: ''
   quality_control: No specific quality control is mentioned in model training, though
     details on data processing and how the tokenizer was trained are provided in
     the paper.
-  access: open
+  size: 15.5B parameters (dense)
+  training_emissions: 16.68 tons of CO2eq
+  training_hardware: 64 NVIDIA A100 GPUs
+  training_time: 2 days
+  type: model
+  url: https://arxiv.org/pdf/2305.06161.pdf
+- access: open
+  analysis: Evaluated on MultiPL-E system benchmarks.
+  created_date: 2023-02-24
+  dependencies: [The Stack, BigCode Dataset]
+  description: Multilingual code model derived from findings of BigCode Project
+    analysis on Github stars' association to data quality.
+  feedback: ''
+  intended_uses: ''
   license: Apache 2.0
-  intended_uses: With a Tech Assistant prompt and not as an instruction model given
-    training limitations.
-  prohibited_uses: ''
+  modality:
+    explanation: code
+    value: code; code
+  model_card: ''
   monitoring: ''
-  feedback: https://huggingface.co/bigcode/starcoder/discussions
-
-- type: model
   name: SantaCoder
   organization: BigCode
-  description: Multilingual code model derived from findings of BigCode Project
-    analysis on Github stars' association to data quality.
-  created_date: 2023-02-24
-  url: https://arxiv.org/pdf/2301.03988.pdf
-  model_card: ''
-  modality: code
-  analysis: Evaluated on MultiPL-E system benchmarks.
-  size: 1.1B parameters
-  dependencies: [The Stack, BigCode Dataset]
+  prohibited_uses: ''
+  quality_control: ''
+  size: 1.1B parameters (dense)
   training_emissions: ''
-  training_time: 3.1 days
   training_hardware: 96 NVIDIA Tesla V100 GPUs
-  quality_control: ''
-  access: open
-  license: Apache 2.0
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
-
-- type: dataset
-  name: The Stack
-  organization: BigCode
-  description: The Stack is a 3.1 TB dataset consisting of permissively licensed
-    source code inteded for use in creating code LLMs.
-  created_date: 2022-11-20
-  url: https://arxiv.org/pdf/2211.15533.pdf
-  datasheet: https://huggingface.co/datasets/bigcode/the-stack
-  modality: code (358 programming languages)
-  size: 3.1 TB
-  sample: []
+  training_time: 3.1 days
+  type: model
+  url: https://arxiv.org/pdf/2301.03988.pdf
+- access: open
   analysis: Evaluated models trained on The Stack on HumanEval and MBPP and compared
     against similarly-sized models.
+  created_date: 2022-11-20
+  datasheet: https://huggingface.co/datasets/bigcode/the-stack
   dependencies: [GitHub]
-  included: ''
+  description: The Stack is a 3.1 TB dataset consisting of permissively licensed
+    source code inteded for use in creating code LLMs.
   excluded: ''
-  quality_control: allowed users whose data were part of The Stack's training data
-    to opt-out
-  access: open
-  license: Apache 2.0
+  feedback: ''
+  included: ''
   intended_uses: creating code LLMs
-  prohibited_uses: ''
+  license: Apache 2.0
+  modality:
+    explanation: code (358 programming languages)
+    value: code
   monitoring: ''
-  feedback: ''
+  name: The Stack
+  organization: BigCode
+  prohibited_uses: ''
+  quality_control: allowed users whose data were part of The Stack's training data
+    to opt-out
+  sample: []
+  size: 3.1 TB
+  type: dataset
+  url: https://arxiv.org/pdf/2211.15533.pdf
diff --git a/assets/bigscience.yaml b/assets/bigscience.yaml
index 21251e9d..ce03bf3f 100644
--- a/assets/bigscience.yaml
+++ b/assets/bigscience.yaml
@@ -1,163 +1,148 @@
 ---
-
-# Datasets
-- type: dataset
-  name: ROOTS
-  # General
-  organization: BigScience
-  description: The Responsible Open-science Open-collaboration Text Sources (ROOTS)
-    corpus, a 1.6TB dataset spanning 59 languages that was used to train the 176-billion-parameter
-    BigScience Large Open-science Open-access Multilingual (BLOOM) language model.
+- access:
+    explanation: While not all of the ROOTS corpus is publicly accessible, the majority
+      is through [this form](https://docs.google.com/forms/d/e/1FAIpQLSdq50O1x4dkdGI4dwsmchFuNI0KCWEDiKUYxvd0r0_sl6FfAQ/viewform?usp=send_form)
+    value: open
+  analysis: https://huggingface.co/spaces/bigscience-data/roots-search
   created_date: 2022-06-06
-  url: https://openreview.net/forum?id=UoEw6KigkUn
   datasheet: https://huggingface.co/spaces/bigscience-data/bigscience-corpus
-  modality: text (46 languages) and code (13 programming languages)
-  size: 1.6TB
-  sample: [https://huggingface.co/spaces/bigscience-data/roots-search]
-  analysis: https://huggingface.co/spaces/bigscience-data/roots-search
-  # Construction
   dependencies: []
-  license:
-    value: custom
-    explanation: Each constituent subset of the dataset will be released under the
-      license that applies to it. ([See individual dataset page for specific license
-      information](https://hf.co/bigscience-data))
-  included: See section 2 of the paper.
+  description: The Responsible Open-science Open-collaboration Text Sources (ROOTS)
+    corpus, a 1.6TB dataset spanning 59 languages that was used to train the 176-billion-parameter
+    BigScience Large Open-science Open-access Multilingual (BLOOM) language model.
   excluded: unknown
-  quality_control: Data collection involved merging and deduplicating searches to
-    remove menus, HTML tags. Further, a quality improvement pipeline was implemented.
-  # Downstream
-  access:
-    value: open
-    explanation: While not all of the ROOTS corpus is publicly accessible, the majority
-      is through [this form](https://docs.google.com/forms/d/e/1FAIpQLSdq50O1x4dkdGI4dwsmchFuNI0KCWEDiKUYxvd0r0_sl6FfAQ/viewform?usp=send_form)
+  feedback: https://huggingface.co/spaces/bigscience-data/roots-search/discussions
+  included: See section 2 of the paper.
   intended_uses: To empower large-scale monolingual and multilingual modeling projects
     with both the data and the processing tools, as well as stimulate research around
     this large multilingual corpus.
-  prohibited_uses: none
+  license:
+    explanation: Each constituent subset of the dataset will be released under the
+      license that applies to it. ([See individual dataset page for specific license
+      information](https://hf.co/bigscience-data))
+    value: custom
+  modality:
+    explanation: text (46 languages) and code (13 programming languages)
+    value: code, text
   monitoring: none
-  feedback: https://huggingface.co/spaces/bigscience-data/roots-search/discussions
-
-- type: dataset
-  name: P3
-  # General
+  name: ROOTS
   organization: BigScience
-  description: The Public Pool of Prompts (P3) are prompts written in an unified
-    format use to train T0++.
+  prohibited_uses: none
+  quality_control: Data collection involved merging and deduplicating searches to
+    remove menus, HTML tags. Further, a quality improvement pipeline was implemented.
+  sample: [https://huggingface.co/spaces/bigscience-data/roots-search]
+  size: 1.6TB
+  type: dataset
+  url: https://openreview.net/forum?id=UoEw6KigkUn
+- access: open
+  analysis: See the paper.
   created_date: 2022-10-15
-  url: https://arxiv.org/pdf/2110.08207.pdf
   datasheet: https://huggingface.co/datasets/bigscience/P3
-  modality: text (English)
-  size: 2000 prompts
-  sample: [https://huggingface.co/datasets/bigscience/P3/viewer]
-  analysis: See the paper.
-  # Construction
   dependencies: []
-  license: Apache 2.0
+  description: The Public Pool of Prompts (P3) are prompts written in an unified
+    format use to train T0++.
+  excluded: We conservatively decided not to prompt datasets that contain potentially
+    harmful content (for instance, datasets built on social media content).
+  feedback: Point of Contact is [Victor Sanh](https://huggingface.co/datasets/bigscience/P3)
   included: The Public Pool of Prompts relies on the Hugging Face Dataset library.
     Any public dataset in the Datasets library can be prompted. We select the datasets
     that have at least one subset in English and excluded datasets containing (predominantly)
     non-natural language examples.
-  excluded: We conservatively decided not to prompt datasets that contain potentially
-    harmful content (for instance, datasets built on social media content).
-  quality_control: Data collection involved merging and deduplicating searches to
-    remove menus, HTML tags. Further, a quality improvement pipeline was implemented.
-  # Downstream
-  access: open
   intended_uses: Multitask finetuning of language models.
-  prohibited_uses: none
+  license: Apache 2.0
+  modality:
+    explanation: text (English)
+    value: text
   monitoring: none
-  feedback: Point of Contact is [Victor Sanh](https://huggingface.co/datasets/bigscience/P3)
-
-- type: dataset
-  name: xP3
-  # General
+  name: P3
   organization: BigScience
+  prohibited_uses: none
+  quality_control: Data collection involved merging and deduplicating searches to
+    remove menus, HTML tags. Further, a quality improvement pipeline was implemented.
+  sample: [https://huggingface.co/datasets/bigscience/P3/viewer]
+  size: 2000 prompts
+  type: dataset
+  url: https://arxiv.org/pdf/2110.08207.pdf
+- access: open
+  analysis: See the paper.
+  created_date: 2022-11-03
+  datasheet: https://huggingface.co/datasets/bigscience/xP3
+  dependencies: [P3]
   description: xP3 (Crosslingual Public Pool of Prompts) is a collection of prompts
     and datasets across 46 of languages & 16 NLP tasks. It is used for the training
     of BLOOMZ and mT0, multilingual language models capable of following human instructions
     in dozens of languages zero-shot.
-  created_date: 2022-11-03
-  url: https://arxiv.org/abs/2211.01786
-  datasheet: https://huggingface.co/datasets/bigscience/xP3
-  modality: text (46 languages) and code (13 programming languages)
-  size: 9.4GB
-  sample: [https://huggingface.co/datasets/bigscience/xP3/viewer]
-  analysis: See the paper.
-  # Construction
-  dependencies: [P3]
-  license: Apache 2.0
-  included: xP3 adds 28 multilingual datasets to P3 based on the P3 task taxonomy.
   excluded: We conservatively decided not to prompt datasets that contain potentially
     harmful content (for instance, datasets built on social media content).
-  quality_control: Data collection involved merging and deduplicating searches to
-    remove menus, HTML tags. Further, a quality improvement pipeline was implemented.
-  # Downstream
-  access: open
+  feedback: Point of Contact is [Niklas Muennighoff](https://huggingface.co/datasets/bigscience/xP3)
+  included: xP3 adds 28 multilingual datasets to P3 based on the P3 task taxonomy.
   intended_uses: Multitask finetuning of language models.
-  prohibited_uses: none
+  license: Apache 2.0
+  modality:
+    explanation: text (46 languages) and code (13 programming languages)
+    value: code, text
   monitoring: none
-  feedback: Point of Contact is [Niklas Muennighoff](https://huggingface.co/datasets/bigscience/xP3)
-
-
-- type: model
-  name: T0++
+  name: xP3
   organization: BigScience
-  description: T0++ is an multitask fine-tuned language model based on T5.
+  prohibited_uses: none
+  quality_control: Data collection involved merging and deduplicating searches to
+    remove menus, HTML tags. Further, a quality improvement pipeline was implemented.
+  sample: [https://huggingface.co/datasets/bigscience/xP3/viewer]
+  size: 9.4GB
+  type: dataset
+  url: https://arxiv.org/abs/2211.01786
+- access: open
+  analysis: ''
   created_date:
-    value: 2021-10-15
     explanation: The date the paper was posted to arXiv.
-  url: https://arxiv.org/pdf/2110.08207.pdf
-  model_card: https://huggingface.co/bigscience/T0pp
-  modality: text (English)
-  size: 11B parameters (dense model)
-  analysis: ''
+    value: 2021-10-15
   dependencies: [T5, P3]
-  training_emissions:
-    value: 0.9 tCO2e
-    explanation: Sourced from estimates in https://arxiv.org/pdf/2110.08207.pdf
-  training_time:
-    value: 27 hours
-    explanation: Sourced from estimates in https://arxiv.org/pdf/2110.08207.pdf
-  training_hardware: Jean Zay (v3-512)
-  quality_control: https://arxiv.org/pdf/2110.08207.pdf
-  access: open
-  license: Apache 2.0
+  description: T0++ is an multitask fine-tuned language model based on T5.
+  feedback: https://huggingface.co/bigscience/T0pp/discussions
   intended_uses: You can use the models to perform inference on tasks by specifying
     your query in natural language, and the models will generate a prediction.
-  prohibited_uses: none
+  license: Apache 2.0
+  modality:
+    explanation: text (English)
+    value: text; text
+  model_card: https://huggingface.co/bigscience/T0pp
   monitoring: none
-  feedback: https://huggingface.co/bigscience/T0pp/discussions
-
-- type: model
-  name: BLOOM
+  name: T0++
   organization: BigScience
-  description: BLOOM is an autoregressive multilingual language model.
+  prohibited_uses: none
+  quality_control: https://arxiv.org/pdf/2110.08207.pdf
+  size: 11B parameters (dense)
+  training_emissions:
+    explanation: Sourced from estimates in https://arxiv.org/pdf/2110.08207.pdf
+    value: 0.9 tCO2e
+  training_hardware: Jean Zay (v3-512)
+  training_time:
+    explanation: Sourced from estimates in https://arxiv.org/pdf/2110.08207.pdf
+    value: 27 hours
+  type: model
+  url: https://arxiv.org/pdf/2110.08207.pdf
+- access: open
+  analysis: ''
   created_date:
-    value: 2022-07-12
     explanation: The date the model was released
-  url: https://arxiv.org/abs/2211.05100
-  model_card: ''
-  modality: text (46 languages) and code (13 programming languages)
-  size: 176B parameters (dense model)
-  analysis: ''
+    value: 2022-07-12
   dependencies: [ROOTS]
-  training_emissions:
-    value: 25 tCO2e
-    explanation: Sourced from estimates in https://arxiv.org/abs/2211.02001
-  training_time:
-    value: 7039 petaflop/s-days
-    explanation: 1082990 A100 hours at 156 TFLOP/s maximum utilization
-  training_hardware: Jean Zay (48 * 8xA100 80GB nodes)
-  quality_control: ''
-  access: open
-  license:
-    value: BigScience RAIL v1.0
-    explanation: Model is licensed under https://huggingface.co/spaces/bigscience/license
+  description: BLOOM is an autoregressive multilingual language model.
+  feedback: https://huggingface.co/bigscience/bloom/discussions
   intended_uses: This model is being created in order to enable public research
     on large language models (LLMs). LLMs are intended to be used for language generation
     or as a pretrained base model that can be further fine-tuned for specific tasks.
     Use cases below are not exhaustive.
+  license:
+    explanation: Model is licensed under https://huggingface.co/spaces/bigscience/license
+    value: BigScience RAIL v1.0
+  modality:
+    explanation: text (46 languages) and code (13 programming languages)
+    value: code, text; code, text
+  model_card: ''
+  monitoring: none
+  name: BLOOM
+  organization: BigScience
   prohibited_uses: Using the model in high-stakes settings is out of scope for this
     model (e.g. biomedical/political/legal/finance domains, evaluating or scoring
     individuals). The model is not designed for critical decisions nor uses with
@@ -166,61 +151,72 @@
     using the model for harm, violating human rights, or other kinds of malicious
     activities, is a misuse of this model (e.g. spam generation, disinformation,
     disparagement, deception, surveillance).
-  monitoring: none
-  feedback: https://huggingface.co/bigscience/bloom/discussions
-
-- type: model
-  name: mT0
-  organization: BigScience
-  description: mT0 is an multitask fine-tuned multilingual language model based
-    on mT5.
+  quality_control: ''
+  size: 176B parameters (dense)
+  training_emissions:
+    explanation: Sourced from estimates in https://arxiv.org/abs/2211.02001
+    value: 25 tCO2e
+  training_hardware: Jean Zay (48 * 8xA100 80GB nodes)
+  training_time:
+    explanation: 1082990 A100 hours at 156 TFLOP/s maximum utilization
+    value: 7039 petaflop/s-days
+  type: model
+  url: https://arxiv.org/abs/2211.05100
+- access: open
+  analysis: https://huggingface.co/bigscience/bloomz#evaluation
   created_date:
-    value: 2021-10-15
     explanation: The date the model was released
-  url: https://arxiv.org/pdf/2110.08207.pdf
-  model_card: https://huggingface.co/bigscience/T0pp
-  modality: text (English)
-  size: 13B parameters (dense model)
-  analysis: https://huggingface.co/bigscience/bloomz#evaluation
+    value: 2021-10-15
   dependencies: [mT5, xP3]
+  description: mT0 is an multitask fine-tuned multilingual language model based
+    on mT5.
+  feedback: https://huggingface.co/bigscience/bloomz/discussions
+  intended_uses: We recommend using the model to perform tasks expressed in natural
+    language.
+  license:
+    explanation: Model is licensed under https://huggingface.co/spaces/bigscience/license
+    value: BigScience RAIL v1.0
+  modality:
+    explanation: text (English)
+    value: text; text
+  model_card: https://huggingface.co/bigscience/T0pp
+  monitoring: none
+  name: mT0
+  organization: BigScience
+  prohibited_uses: none
+  quality_control: https://arxiv.org/pdf/2211.01786.pdf
+  size: 13B parameters (dense)
   training_emissions: unknown
-  training_time: unknown
   training_hardware: Jean Zay (288 A100 80GB GPUs with 8 GPUs per node (36 nodes)
     using NVLink 4 inter-gpu connects, 4 OmniPath links)
-  quality_control: https://arxiv.org/pdf/2211.01786.pdf
-  access: open
-  license:
-    value: BigScience RAIL v1.0
-    explanation: Model is licensed under https://huggingface.co/spaces/bigscience/license
+  training_time: unknown
+  type: model
+  url: https://arxiv.org/pdf/2110.08207.pdf
+- access: open
+  analysis: https://huggingface.co/bigscience/bloomz#evaluation
+  created_date: 2022-11-03
+  dependencies: [BLOOM, xP3]
+  description: BLOOMZ is an multitask fine-tuned autoregressive multilingual language
+    model.
+  feedback: https://huggingface.co/bigscience/bloomz/discussions
   intended_uses: We recommend using the model to perform tasks expressed in natural
     language.
-  prohibited_uses: none
+  license:
+    explanation: Model is licensed under https://huggingface.co/spaces/bigscience/license
+    value: BigScience RAIL v1.0
+  modality:
+    explanation: text (46 languages) and code (13 programming languages)
+    value: code, text; code, text
+  model_card: https://huggingface.co/bigscience/bloomz
   monitoring: none
-  feedback: https://huggingface.co/bigscience/bloomz/discussions
-
-- type: model
   name: BLOOMZ
   organization: BigScience
-  description: BLOOMZ is an multitask fine-tuned autoregressive multilingual language
-    model.
-  created_date: 2022-11-03
-  url: https://arxiv.org/pdf/2211.01786.pdf
-  model_card: https://huggingface.co/bigscience/bloomz
-  modality: text (46 languages) and code (13 programming languages)
-  size: 176B parameters (dense model)
-  analysis: https://huggingface.co/bigscience/bloomz#evaluation
-  dependencies: [BLOOM, xP3]
+  prohibited_uses: none
+  quality_control: https://arxiv.org/pdf/2211.01786.pdf
+  size: 176B parameters (dense)
   training_emissions: unknown
-  training_time: unknown
   training_hardware: Jean Zay (288 A100 80GB GPUs with 8 GPUs per node (36 nodes)
     using NVLink 4 inter-gpu connects, 4 OmniPath links)
-  quality_control: https://arxiv.org/pdf/2211.01786.pdf
-  access: open
-  license:
-    value: BigScience RAIL v1.0
-    explanation: Model is licensed under https://huggingface.co/spaces/bigscience/license
-  intended_uses: We recommend using the model to perform tasks expressed in natural
-    language.
-  prohibited_uses: none
-  monitoring: none
-  feedback: https://huggingface.co/bigscience/bloomz/discussions
+  training_time: unknown
+  type: model
+  url: https://arxiv.org/pdf/2211.01786.pdf
diff --git a/assets/bloomberg.yaml b/assets/bloomberg.yaml
index 352f03f9..e6a4b58a 100644
--- a/assets/bloomberg.yaml
+++ b/assets/bloomberg.yaml
@@ -1,137 +1,126 @@
 ---
-
-- type: dataset
-  name: FinPile
-  # General
-  organization: Bloomberg
-  description: A comprehensive dataset consisting of a range of English financial
-    documents including news, filings, press releases, web-scraped financial documents,
-    and social media drawn from the Bloomberg archives that was used to train the
-    BloombergGPT model.
+- access:
+    explanation: See [[Openness]](https://arxiv.org/pdf/2303.17564.pdf#subsection.8.2)
+    value: closed
+  analysis: ''
   created_date:
-    value: 2023-03-30
     explanation: The date BloombergGPT was announced in the [[Bloomberg article]](https://www.bloomberg.com/company/press/bloomberggpt-50-billion-parameter-llm-tuned-finance/).
-  url: https://arxiv.org/pdf/2303.17564.pdf#section.2
+    value: 2023-03-30
   datasheet:
-    value: https://arxiv.org/pdf/2303.17564.pdf#section.2
     explanation: Section 2 of the BloombergGPT paper.
-  modality: text (English)
-  size:
-    value: 363B tokens
-    explanation: See [[Financial Datasets]](https://arxiv.org/pdf/2303.17564.pdf#subsection.2.1)
-  sample: []
-  analysis: ''
-  # Construction
+    value: https://arxiv.org/pdf/2303.17564.pdf#section.2
   dependencies: []
-  license:
-    value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
+  description: A comprehensive dataset consisting of a range of English financial
+    documents including news, filings, press releases, web-scraped financial documents,
+    and social media drawn from the Bloomberg archives that was used to train the
+    BloombergGPT model.
+  excluded: ''
+  feedback: ''
   included:
-    value: |
-      FinPile consists of English financial documents. Authors utilize the The Bloomberg
-      Terminal, which is an extensive collection of curated and maintained documents,
-      to create the FinPile dataset. Each document in FinPile is time-stamped, with
-      dates ranging from 2007-03-01 to 2022-07-31.
-      Types of data included are given below:
-        1. Web (298B tokens) - Inclues Bloomberg's web crawl focused on high-quality
-      websites that have financially relevant information. This makes up the majority
-      of FinPile.
-        2. News (38B tokens) - Includes all news sources relevant to the financial
-      community, excluding news articles written by Bloomberg journalists. Overall,
-      there are hundreds of English news sources in FinPile including "Bloomberg
-      Transcripts", which are transcripts of Bloomberg TV news.
-        3. Filings (14B tokens) - Includes financial statements prepared by (public)
-      companies and made available to the general public.  In the dataset, a majority
-      of the filings come from EDGAR, which is the SEC's online database.
-        4. Press (9B tokens) - Includes press releases typically issued by companies
-      that are financially relevant.
-        5. Bloomberg (5B tokens) - Includes Bloomberg authored news and other documents
-      such as opinions and analyses. The largest sources are “Bloomberg News” and
-      “Bloomberg First Word”, the Bloomberg-authored wire of real-time news.
     explanation: See [[Financial Datasets]](https://arxiv.org/pdf/2303.17564.pdf#subsection.2.1)
-  excluded: ''
-  quality_control: ''
-  # Downstream
-  access:
-    value: closed
-    explanation: See [[Openness]](https://arxiv.org/pdf/2303.17564.pdf#subsection.8.2)
+    value: "FinPile consists of English financial documents. Authors utilize the\
+      \ The Bloomberg\nTerminal, which is an extensive collection of curated and\
+      \ maintained documents,\nto create the FinPile dataset. Each document in FinPile\
+      \ is time-stamped, with\ndates ranging from 2007-03-01 to 2022-07-31.\nTypes\
+      \ of data included are given below:\n  1. Web (298B tokens) - Inclues Bloomberg's\
+      \ web crawl focused on high-quality\nwebsites that have financially relevant\
+      \ information. This makes up the majority\nof FinPile.\n  2. News (38B tokens)\
+      \ - Includes all news sources relevant to the financial\ncommunity, excluding\
+      \ news articles written by Bloomberg journalists. Overall,\nthere are hundreds\
+      \ of English news sources in FinPile including \"Bloomberg\nTranscripts\"\
+      , which are transcripts of Bloomberg TV news.\n  3. Filings (14B tokens) -\
+      \ Includes financial statements prepared by (public)\ncompanies and made available\
+      \ to the general public.  In the dataset, a majority\nof the filings come\
+      \ from EDGAR, which is the SEC's online database.\n  4. Press (9B tokens)\
+      \ - Includes press releases typically issued by companies\nthat are financially\
+      \ relevant.\n  5. Bloomberg (5B tokens) - Includes Bloomberg authored news\
+      \ and other documents\nsuch as opinions and analyses. The largest sources\
+      \ are “Bloomberg News” and\n“Bloomberg First Word”, the Bloomberg-authored\
+      \ wire of real-time news.\n"
   intended_uses: Used to train the BloombergGPT model.
-  prohibited_uses: ''
+  license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
+    value: unknown
+  modality:
+    explanation: text (English)
+    value: text
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: BloombergGPT
-  # General
+  name: FinPile
   organization: Bloomberg
+  prohibited_uses: ''
+  quality_control: ''
+  sample: []
+  size:
+    explanation: See [[Financial Datasets]](https://arxiv.org/pdf/2303.17564.pdf#subsection.2.1)
+    value: 363B tokens
+  type: dataset
+  url: https://arxiv.org/pdf/2303.17564.pdf#section.2
+- access:
+    explanation: See [[Openness]](https://arxiv.org/pdf/2303.17564.pdf#subsection.8.2)
+    value: closed
+  analysis:
+    explanation: See [[Evaluation]](https://arxiv.org/pdf/2303.17564.pdf#section.5)
+    value: "Authors evaluate the performance of BloombergGPT on two broad categories\
+      \ of tasks, finance-specific and general purpose, on several standard benchmarks.\
+      \ They compare BloombergGPT to the three closest models: GPT-NeoX, OPT-66B\
+      \ and BLOOM-176B. They also report results from the original GPT-3 whenever\
+      \ externally available. They conclude \"We achieve strong results on general\
+      \ LLM benchmarks and outperform comparable models on financial tasks. We attribute\
+      \ this, in decreasing order of impact, to 1. a well-curated internal dataset,\
+      \ 2. our unique choice in tokenizer, and 3. an up-to-date architecture.\"\n"
+  created_date:
+    explanation: The date the model was announced in the [[Bloomberg article]](https://www.bloomberg.com/company/press/bloomberggpt-50-billion-parameter-llm-tuned-finance/).
+    value: 2023-03-30
+  dependencies: [FinPile, The Pile, C4, Wikipedia]
   description: BloombergGPT is a 50 billion parameter large language model that
     is specifically trained on a wide range of financial data to support a diverse
     set of natural language processing tasks within the financial industry.
-  created_date:
-    value: 2023-03-30
-    explanation: The date the model was announced in the [[Bloomberg article]](https://www.bloomberg.com/company/press/bloomberggpt-50-billion-parameter-llm-tuned-finance/).
-  url: https://arxiv.org/abs/2303.17564
+  feedback: ''
+  intended_uses:
+    explanation: See [[Bloomberg article]](https://www.bloomberg.com/company/press/bloomberggpt-50-billion-parameter-llm-tuned-finance/)
+    value: "\"This model will assist Bloomberg in improving existing financial NLP\
+      \ tasks, such as sentiment analysis, named entity recognition, news classification,\
+      \ and question answering, among others. Furthermore, BloombergGPT will unlock\
+      \ new opportunities for marshalling the vast quantities of data available\
+      \ on the Bloomberg Terminal to better help the firm's customers, while bringing\
+      \ the full potential of AI to the financial domain.\"\n"
+  license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
+    value: unknown
+  modality:
+    explanation: text (English)
+    value: text; text
   model_card: none
-  modality: text (English)
-  analysis:
-    value: >
-      Authors evaluate the performance of BloombergGPT on two broad categories of
-      tasks, finance-specific and general purpose, on several standard benchmarks.
-      They compare BloombergGPT to the three closest models: GPT-NeoX, OPT-66B and
-      BLOOM-176B. They also report results from the original GPT-3 whenever externally
-      available. They conclude "We achieve strong results on general LLM benchmarks
-      and outperform comparable models on financial tasks. We attribute this, in
-      decreasing order of impact, to 1. a well-curated internal dataset, 2. our
-      unique choice in tokenizer, and 3. an up-to-date architecture."
-    explanation: See [[Evaluation]](https://arxiv.org/pdf/2303.17564.pdf#section.5)
-  size: 50B parameters (dense model)
-  # Construction
-  dependencies: [FinPile, The Pile, C4, Wikipedia]
+  monitoring: ''
+  name: BloombergGPT
+  organization: Bloomberg
+  prohibited_uses: ''
+  quality_control:
+    explanation: See [[Ethics, Limitations, and Implications]](https://arxiv.org/pdf/2303.17564.pdf#section.8)
+    value: "Authors state the following:\n- \"To provide natural language applications\
+      \ to the financial community, we\n  have developed a rigorous risk and testing\
+      \ assessment process. This process\n  includes careful annotation guidelines\
+      \ Tseng et al. (2020), pre-launch review\n  at multiple levels by the central\
+      \ risk and compliance organizations, and\n  by the product leaders (e.g.,\
+      \ the newsroom) as applicable, and post-launch\n  monitoring. Moreover, we\
+      \ conduct our research, development, and deployment\n  of NLP and AI systems\
+      \ in accordance with all applicable regulations.\"\n- \"Similarly, toxicity\
+      \ and bias are areas where, as a company, we take extraordinary\n  care with\
+      \ any content we produce, whether from humans or machines. Since\n  the measurement\
+      \ of toxicity and bias in our model depends on its application\n  areas, quantifying\
+      \ the potential for the generation of harmful language\n  remains an open\
+      \ question. We are particularly interested in studying whether\n  FinPile,\
+      \ which is cleaner and contains fewer examples of overtly biased\n  or toxic\
+      \ language (e.g., Press Releases), reduces the proclivity of the\n  model\
+      \ to generate inappropriate content.\"\n"
+  size: 50B parameters (dense)
   training_emissions: unknown
-  training_time:
-    value: 53 days
-    explanation: See [[Training Run]](https://arxiv.org/pdf/2303.17564.pdf#section.4)
   training_hardware:
+    explanation: See [[Training Configuration]](https://arxiv.org/pdf/2303.17564.pdf#subsection.3.3)
     value: 64 Amazon EC2 p4d.24xlarge instances each with 8 NVIDIA 40GB A100 GPUs
       (i.e. total 512 A100 GPUs)
-    explanation: See [[Training Configuration]](https://arxiv.org/pdf/2303.17564.pdf#subsection.3.3)
-  quality_control:
-    value: |
-      Authors state the following:
-      - "To provide natural language applications to the financial community, we
-        have developed a rigorous risk and testing assessment process. This process
-        includes careful annotation guidelines Tseng et al. (2020), pre-launch review
-        at multiple levels by the central risk and compliance organizations, and
-        by the product leaders (e.g., the newsroom) as applicable, and post-launch
-        monitoring. Moreover, we conduct our research, development, and deployment
-        of NLP and AI systems in accordance with all applicable regulations."
-      - "Similarly, toxicity and bias are areas where, as a company, we take extraordinary
-        care with any content we produce, whether from humans or machines. Since
-        the measurement of toxicity and bias in our model depends on its application
-        areas, quantifying the potential for the generation of harmful language
-        remains an open question. We are particularly interested in studying whether
-        FinPile, which is cleaner and contains fewer examples of overtly biased
-        or toxic language (e.g., Press Releases), reduces the proclivity of the
-        model to generate inappropriate content."
-    explanation: See [[Ethics, Limitations, and Implications]](https://arxiv.org/pdf/2303.17564.pdf#section.8)
-  # Downstream
-  access:
-    value: closed
-    explanation: See [[Openness]](https://arxiv.org/pdf/2303.17564.pdf#subsection.8.2)
-  license:
-    value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses:
-    value: >
-      "This model will assist Bloomberg in improving existing financial NLP
-      tasks, such as sentiment analysis, named entity recognition, news classification,
-      and question answering, among others. Furthermore, BloombergGPT will unlock
-      new opportunities for marshalling the vast quantities of data available on
-      the Bloomberg Terminal to better help the firm's customers, while bringing
-      the full potential of AI to the financial domain."
-    explanation: See [[Bloomberg article]](https://www.bloomberg.com/company/press/bloomberggpt-50-billion-parameter-llm-tuned-finance/)
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
+  training_time:
+    explanation: See [[Training Run]](https://arxiv.org/pdf/2303.17564.pdf#section.4)
+    value: 53 days
+  type: model
+  url: https://arxiv.org/abs/2303.17564
diff --git a/assets/brex.yaml b/assets/brex.yaml
index 374f6842..76d01b3e 100644
--- a/assets/brex.yaml
+++ b/assets/brex.yaml
@@ -1,30 +1,28 @@
 ---
-
-- type: application
-  name: Brex Chat
-  organization: Brex
+- access: limited
+  adaptation: ''
+  created_date: 2023-03-07
+  dependencies: [ChatGPT API]
   description: Brex Inc., a highly valued startup that makes software for finance
     professionals, is turning to the same artificial intelligence tool behind ChatGPT
     for a service that can answer questions about corporate budgets, policy and
     spending.
-  created_date: 2023-03-07
-  url: https://fortune.com/2023/03/07/cfo-chatbot-chatgpt-ai-brex-finance-software-startup-budgets-policies/
-  dependencies: [ChatGPT API]
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: limited
+  failures: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "\"Brex grants you a nonexclusive and nontransferable license to\
+      \ use Brex Property as provided through the Services and as permitted by these\
+      \ User Terms.\" - excerpt from the Terms of Service.\n"
     value: custom
-    explanation: >
-      "Brex grants you a nonexclusive and nontransferable license to use Brex
-      Property as provided through the Services and as permitted by these User
-      Terms." - excerpt from the Terms of Service.
-  terms_of_service: https://www.brex.com/legal/user-terms
-  intended_uses: ''
-  prohibited_uses: ''
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
+  name: Brex Chat
+  organization: Brex
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: https://www.brex.com/legal/user-terms
+  type: application
+  url: https://fortune.com/2023/03/07/cfo-chatbot-chatgpt-ai-brex-finance-software-startup-budgets-policies/
   user_distribution: ''
-  failures: ''
diff --git a/assets/casia.yaml b/assets/casia.yaml
index 0634f801..4190e5a9 100644
--- a/assets/casia.yaml
+++ b/assets/casia.yaml
@@ -1,25 +1,26 @@
 ---
-
-- type: model
-  name: BigTrans
-  organization: Institute of Automation Chinese Academy of Sciences
-  description: BigTrans is a model which adapts LLaMA that covers only 20 languages
-    and enhances it with multilingual translation capability on more than 100 languages
-  created_date: 2023-05-29
-  url: https://arxiv.org/pdf/2305.18098v1.pdf
-  model_card: https://huggingface.co/James-WYang/BigTrans
-  modality: text
+- access: open
   analysis: Reports results on standard translation benchmarks across 102 languages
     in comparison with Google Translate and ChatGPT
-  size: 13B parameters (dense model)
+  created_date: 2023-05-29
   dependencies: [LLaMA, CLUE, BigTrans parallel dataset]
-  training_emissions: unknown
-  training_time: unknown
-  training_hardware: 16 A100 GPUs with 80 GB of RAM
-  quality_control: ''
-  access: open
-  license: Apache 2.0
+  description: BigTrans is a model which adapts LLaMA that covers only 20 languages
+    and enhances it with multilingual translation capability on more than 100 languages
+  feedback: https://huggingface.co/James-WYang/BigTrans/discussions
   intended_uses: Advancing future research in multilingual LLMs
-  prohibited_uses: ''
+  license: Apache 2.0
+  modality:
+    explanation: text
+    value: text; text
+  model_card: https://huggingface.co/James-WYang/BigTrans
   monitoring: ''
-  feedback: https://huggingface.co/James-WYang/BigTrans/discussions
+  name: BigTrans
+  organization: Institute of Automation Chinese Academy of Sciences
+  prohibited_uses: ''
+  quality_control: ''
+  size: 13B parameters (dense)
+  training_emissions: unknown
+  training_hardware: 16 A100 GPUs with 80 GB of RAM
+  training_time: unknown
+  type: model
+  url: https://arxiv.org/pdf/2305.18098v1.pdf
diff --git a/assets/cerebras.yaml b/assets/cerebras.yaml
index 636a9f70..421cd8a4 100644
--- a/assets/cerebras.yaml
+++ b/assets/cerebras.yaml
@@ -1,76 +1,59 @@
 ---
-
-- type: model
-  name: Cerebras-GPT
-  # General
-  organization: Cerebras
-  description: >
-    A Family of Open, Compute-efficient, Large Language Models. The family includes
-    111M, 256M, 590M, 1.3B, 2.7B, 6.7B, and 13B models. All models in the Cerebras-GPT
-    family have been trained in accordance with Chinchilla scaling laws (20 tokens
-    per model parameter).
-    [[Cerebras Blog Post]](https://www.cerebras.net/blog/cerebras-gpt-a-family-of-open-compute-efficient-large-language-models)
+- access:
+    explanation: "The Pile is an open source dataset. Hugging Face compatible checkpoints\
+      \ available on the [[Cerebras Hugging Face page]](https://huggingface.co/cerebras/Cerebras-GPT-13B).\
+      \ Cerebras systems checkpoints for pre-training and fine tuning are available\
+      \ in the cloud via the [[Cerebras Model Studio]](https://www.cerebras.net/product-cloud/).\n"
+    value: open
+  analysis: "\"We evaluate our models on the PILE validation set comprising 380M\
+    \ tokens. We also evaluate the public checkpoints of Pythia, Eleuther (2022);\
+    \ OPT, Zhang et al. (2022); GPT-NeoX 20B, Black et al. (2022); and GPT-J 6B,\
+    \ Wang & Komatsuzaki (2021). We performed upstream (pre-training) evaluations\
+    \ of text prediction cross-entropy using the Pile validation and test splits.\
+    \ We performed downstream evaluations of text generation accuracy on standardized\
+    \ tasks using the Eleuther lm-evaluation-harness.\" [[Evaluations]] (https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#evaluations).\n"
   created_date:
+    explanation: "The date the model was announced in the [[Cerebras blog post]](https://www.cerebras.net/blog/cerebras-gpt-a-family-of-open-compute-efficient-large-language-models).\n"
     value: 2023-03-28
-    explanation: >
-      The date the model was announced in the [[Cerebras blog post]](https://www.cerebras.net/blog/cerebras-gpt-a-family-of-open-compute-efficient-large-language-models).
-  url: https://www.cerebras.net/blog/cerebras-gpt-a-family-of-open-compute-efficient-large-language-models/
-  model_card: https://huggingface.co/cerebras/Cerebras-GPT-13B
-  modality: text (English)
-  size:
-    value: 13B parameters (dense model)
-    explanation: Size of the largest model in the Cerebras-GPT family.
-  analysis: >
-    "We evaluate our models on the PILE validation set comprising 380M tokens. We
-    also evaluate the public checkpoints of Pythia, Eleuther (2022); OPT, Zhang
-    et al. (2022); GPT-NeoX 20B, Black et al. (2022); and GPT-J 6B, Wang & Komatsuzaki
-    (2021). We performed upstream (pre-training) evaluations of text prediction
-    cross-entropy using the Pile validation and test splits. We performed downstream
-    evaluations of text generation accuracy on standardized tasks using the Eleuther
-    lm-evaluation-harness."
-    [[Evaluations]]
-    (https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#evaluations).
-  # Construction
   dependencies: [The Pile]
+  description: "A Family of Open, Compute-efficient, Large Language Models. The\
+    \ family includes 111M, 256M, 590M, 1.3B, 2.7B, 6.7B, and 13B models. All models\
+    \ in the Cerebras-GPT family have been trained in accordance with Chinchilla\
+    \ scaling laws (20 tokens per model parameter). [[Cerebras Blog Post]](https://www.cerebras.net/blog/cerebras-gpt-a-family-of-open-compute-efficient-large-language-models)\n"
+  feedback: ''
+  intended_uses: "\"The primary intended use is to further research into large language\
+    \ models. These models can be used as a foundation model for NLP, applications,\
+    \ ethics, and alignment research. Our primary intended users are researchers\
+    \ who are working to improve LLMs and practitioners seeking reference implementations,\
+    \ training setups, hyperparameters, or pre-trained models. We release these\
+    \ models with a fully permissive Apache license for the community to use freely.\"\
+    \ [[Uses and Limitations]](https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#uses-and-limitations).\n"
+  license: Apache 2.0
+  modality:
+    explanation: text (English)
+    value: text; text
+  model_card: https://huggingface.co/cerebras/Cerebras-GPT-13B
+  monitoring: ''
+  name: Cerebras-GPT
+  organization: Cerebras
+  prohibited_uses: "Authors note the following limitations of the model: \"Cerebras-GPT\
+    \ models are trained on the Pile, with English language only, and are not suitable\
+    \ for machine translation tasks. Cerebras-GPT models have not been tuned for\
+    \ human-facing dialog applications like chatbots and will not respond to prompts\
+    \ in a similar way to models that have received instruction tuning or reinforcement\
+    \ learning from human feedback (RLHF) like Flan-T5 or ChatGPT.\" [[Uses and\
+    \ Limitations]](https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#out-of-scope-use).\n"
+  quality_control: "The Pile dataset has been thoroughly analyzed from various ethical\
+    \ standpoints such as toxicity analysis, gender bias, pejorative content, racially\
+    \ sensitive content etc. Only mitigations in standard Pile dataset pre-processing\
+    \ were employed when pre-training Cerebras-GPT. [[Risk, Bias, Ethical Considerations]](https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#risk-bias-ethical-considerations)\n"
+  size: 13B parameters (dense)
   training_emissions: ''
-  training_time:
-    value: ''
-    explanation: ''
   training_hardware:
+    explanation: "According to [[Model Description]](https://huggingface.co/cerebras/Cerebras-GPT-13B#model-description)\n"
     value: 16x Cerebras CS-2 wafer scale systems
-    explanation: >
-      According to [[Model Description]](https://huggingface.co/cerebras/Cerebras-GPT-13B#model-description)
-  quality_control: >
-    The Pile dataset has been thoroughly analyzed from various ethical standpoints
-    such as toxicity analysis, gender bias, pejorative content, racially sensitive
-    content etc. Only mitigations in standard Pile dataset pre-processing were employed
-    when pre-training Cerebras-GPT.
-    [[Risk, Bias, Ethical Considerations]](https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#risk-bias-ethical-considerations)
-  # Downstream
-  access:
-    value: open
-    explanation: >
-      The Pile is an open source dataset.
-      Hugging Face compatible checkpoints available on the [[Cerebras Hugging Face
-      page]](https://huggingface.co/cerebras/Cerebras-GPT-13B).
-      Cerebras systems checkpoints for pre-training and fine tuning are available
-      in the cloud via the [[Cerebras Model Studio]](https://www.cerebras.net/product-cloud/).
-  license: Apache 2.0
-  intended_uses: >
-    "The primary intended use is to further research into large language models.
-    These models can be used as a foundation model for NLP, applications, ethics,
-    and alignment research. Our primary intended users are researchers who are working
-    to improve LLMs and practitioners seeking reference implementations, training
-    setups, hyperparameters, or pre-trained models. We release these models with
-    a fully permissive Apache license for the community to use freely."
-    [[Uses and Limitations]](https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#uses-and-limitations).
-  prohibited_uses: >
-    Authors note the following limitations of the model: "Cerebras-GPT models are
-    trained on the Pile, with English language only, and are not suitable for machine
-    translation tasks. Cerebras-GPT models have not been tuned for human-facing
-    dialog applications like chatbots and will not respond to prompts in a similar
-    way to models that have received instruction tuning or reinforcement learning
-    from human feedback (RLHF) like Flan-T5 or ChatGPT."
-    [[Uses and Limitations]](https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#out-of-scope-use).
-  monitoring: ''
-  feedback: ''
+  training_time:
+    explanation: ''
+    value: ''
+  type: model
+  url: https://www.cerebras.net/blog/cerebras-gpt-a-family-of-open-compute-efficient-large-language-models/
diff --git a/assets/chatglm.yaml b/assets/chatglm.yaml
index 5bb6d82e..7005d126 100644
--- a/assets/chatglm.yaml
+++ b/assets/chatglm.yaml
@@ -1,24 +1,25 @@
 ---
-
-- type: model
-  name: ChatGLM
-  organization: ChatGLM
+- access: open
+  analysis: Performance evaluated on English and Chinese language benchmark tests.
+  created_date: 2023-03-14
+  dependencies: []
   description: ChatGLM is a Chinese-English language model with question and answer
     and dialogue functions, and is aimed at a Chinese audience.
-  created_date: 2023-03-14
-  url: https://chatglm.cn/blog
+  feedback: ''
+  intended_uses: ''
+  license: Apache 2.0
+  modality:
+    explanation: text
+    value: text; text
   model_card: ''
-  modality: text
-  analysis: Performance evaluated on English and Chinese language benchmark tests.
-  size: 6B parameters
-  dependencies: []
+  monitoring: ''
+  name: ChatGLM
+  organization: ChatGLM
+  prohibited_uses: ''
+  quality_control: ''
+  size: 6B parameters (dense)
   training_emissions: unknown
-  training_time: unknown
   training_hardware: ''
-  quality_control: ''
-  access: open
-  license: Apache 2.0
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
+  training_time: unknown
+  type: model
+  url: https://chatglm.cn/blog
diff --git a/assets/cmu.yaml b/assets/cmu.yaml
index ceda272f..d5085ad0 100644
--- a/assets/cmu.yaml
+++ b/assets/cmu.yaml
@@ -1,34 +1,35 @@
 ---
-
-- type: model
-  name: PolyCoder
-  organization: CMU
+- access:
+    explanation: Model checkpoints are available for download at https://github.com/VHellendoorn/Code-LMs
+    value: open
+  analysis: Reports results on standard code benchmarks across a variety of programming
+    languages.
+  created_date:
+    explanation: The date the model paper was released
+    value: 2022-02-26
+  dependencies: [Github]
   description: PolyCoder is a code model trained on 2.7B parameters based on the
     GPT-2 architecture, which was trained on 249GB of code across 12 programming
     languages on a single machine.
-  created_date:
-    value: 2022-02-26
-    explanation: The date the model paper was released
-  url: https://arxiv.org/abs/2202.13169
+  feedback: https://huggingface.co/NinedayWang/PolyCoder-2.7B/discussion
+  intended_uses: unknown
+  license:
+    explanation: The license is provided in the [[Github repository]](https://github.com/VHellendoorn/Code-LMs)
+    value: MIT
+  modality:
+    explanation: code (12 programming languages)
+    value: code; code
   model_card: https://huggingface.co/NinedayWang/PolyCoder-2.7B
-  modality: code (12 programming languages)
-  analysis: Reports results on standard code benchmarks across a variety of programming
-    languages.
-  size: 2.7B parameters (dense model)
-  dependencies: [Github]
-  training_emissions: unknown
-  training_time: 6 weeks
-  training_hardware: 8 NVIDIA RTX 8000
+  monitoring: None
+  name: PolyCoder
+  organization: CMU
+  prohibited_uses: None
   quality_control: No specific quality control is mentioned in model training, though
     details on data processing and how the tokenizer was trained are provided in
     the paper.
-  access:
-    value: open
-    explanation: Model checkpoints are available for download at https://github.com/VHellendoorn/Code-LMs
-  license:
-    value: MIT
-    explanation: The license is provided in the [[Github repository]](https://github.com/VHellendoorn/Code-LMs)
-  intended_uses: unknown
-  prohibited_uses: None
-  monitoring: None
-  feedback: https://huggingface.co/NinedayWang/PolyCoder-2.7B/discussion
+  size: 2.7B parameters (dense)
+  training_emissions: unknown
+  training_hardware: 8 NVIDIA RTX 8000
+  training_time: 6 weeks
+  type: model
+  url: https://arxiv.org/abs/2202.13169
diff --git a/assets/cohere.yaml b/assets/cohere.yaml
index 21c3379a..9051aa78 100644
--- a/assets/cohere.yaml
+++ b/assets/cohere.yaml
@@ -1,657 +1,460 @@
 ---
-
-- type: dataset
-  name: coheretext
-  # General
-  organization: Cohere
-  description: >
-    The family of datasets used to train Cohere models, which come in two forms:
-    coheretext-filtered and coheretext-unfiltered. The former is used to train the
-    Representation models, while the latter one is used to train the Generation
-    models.
+- access:
+    explanation: The dataset isn't provided to the public.
+    value: closed
+  analysis:
+    explanation: The analysis performed on the dataset is not released.
+    value: unknown
   created_date:
+    explanation: "The date the Cohere API was announced on the news [[News Article]]\
+      \ (https://venturebeat.com/2021/11/15/openai-rival-cohere-launches-language-model-api/).\n"
     value: 2021-11-15
-    explanation: >
-      The date the Cohere API was announced on the news
-      [[News Article]]
-      (https://venturebeat.com/2021/11/15/openai-rival-cohere-launches-language-model-api/).
-  url: none
   datasheet: https://docs.cohere.ai/data-statement
-  modality: text (English)
-  size:
-    value: 200 GB
-    explanation: >
-      Size of the filtered dataset is 200 GB. The unfiltered dataset is ~3 TB.
-  sample: []
-  analysis:
-    value: unknown
-    explanation: The analysis performed on the dataset is not released.
-  # Construction
   dependencies: []
-  license:
+  description: "The family of datasets used to train Cohere models, which come in\
+    \ two forms: coheretext-filtered and coheretext-unfiltered. The former is used\
+    \ to train the Representation models, while the latter one is used to train\
+    \ the Generation models.\n"
+  excluded: "Documents that are not in English are excluded.\n"
+  feedback:
+    explanation: The internal feedback mechanisms for Cohere are unknown.
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  included: >
-    As stated in the datasheet, the dataset "includes the Google Books dataset,
-    CommonCrawl, and text from the internet scraped by the Cohere infrastructure
-    team." The top ten domains scraped were: wordpress.com, medium.com,
-    stackexchange.com, tumblr.com, elsevier.com, genius.com, bbc.co.uk,
-    libsyn.com, yahoo.com, nytimes.com
-    [[Datasheet]](https://docs.cohere.ai/data-statement).
-  excluded: >
-    Documents that are not in English are excluded.
-  quality_control: >
-    In the datasheet, it is implied that Cohere employs filtration methods
-    for removing racist, biased and toxic content, but the details are not
-    provided. These filtration methods take both the context and the language,
-    as opposed to using a list of blockwords
-    [[Datasheet]](https://docs.cohere.ai/data-statement).
-  # Downstream
-  access:
-    value: closed
-    explanation: The dataset isn't provided to the public.
-  intended_uses: >
-    The intended use of the dataset is to train Cohere's language models.
-  prohibited_uses:
+  included: "As stated in the datasheet, the dataset \"includes the Google Books\
+    \ dataset, CommonCrawl, and text from the internet scraped by the Cohere infrastructure\
+    \ team.\" The top ten domains scraped were: wordpress.com, medium.com, stackexchange.com,\
+    \ tumblr.com, elsevier.com, genius.com, bbc.co.uk, libsyn.com, yahoo.com, nytimes.com\
+    \ [[Datasheet]](https://docs.cohere.ai/data-statement).\n"
+  intended_uses: "The intended use of the dataset is to train Cohere's language\
+    \ models.\n"
+  license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      There are no known prohibited uses of the dataset, but the Cohere API is
-      bound by the Cohere usage guidelines, which may also apply to this dataset
-      [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
+  modality:
+    explanation: text (English)
+    value: text
   monitoring:
+    explanation: "There is no information on how Cohere is internally monitoring\
+      \ the use of the dataset.\n"
     value: unknown
-    explanation: >
-      There is no information on how Cohere is internally monitoring the use of
-      the dataset.
-  feedback:
-    value: unknown
-    explanation: The internal feedback mechanisms for Cohere are unknown.
-
-- type: model
-  name: Cohere Base
-  # General
+  name: coheretext
   organization: Cohere
-  description: >
-    The Generations model is a language model trained by Cohere for
-    generation tasks.
-  created_date:
-    value: 2021-11-15
-    explanation: >
-      The date the Cohere API was announced on the news
-      [[News Article]]
-      (https://venturebeat.com/2021/11/15/openai-rival-cohere-launches-language-model-api/).
-  url: none
-  model_card: https://docs.cohere.ai/generation-card
-  modality: text (English)
-  size:
+  prohibited_uses:
+    explanation: "There are no known prohibited uses of the dataset, but the Cohere\
+      \ API is bound by the Cohere usage guidelines, which may also apply to this\
+      \ dataset [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n"
     value: unknown
-    explanation: >
-      The exact sizes of the generation models are unknown, but we know
-      that they come in three sizes: small, medium, and large
-      [[Model Card]](https://docs.cohere.ai/generation-card).
+  quality_control: "In the datasheet, it is implied that Cohere employs filtration\
+    \ methods for removing racist, biased and toxic content, but the details are\
+    \ not provided. These filtration methods take both the context and the language,\
+    \ as opposed to using a list of blockwords [[Datasheet]](https://docs.cohere.ai/data-statement).\n"
+  sample: []
+  size:
+    explanation: "Size of the filtered dataset is 200 GB. The unfiltered dataset\
+      \ is ~3 TB.\n"
+    value: 200 GB
+  type: dataset
+  url: none
+- access:
+    explanation: "The model is available to the public through the Cohere Platform\
+      \ [[Cohere Platform]](https://os.cohere.ai/login).\n"
+    value: limited
   analysis: The model's performance was analyzed on Hellaswag and COPA, as well
     as several safety benchmarks [[Model Card]](https://docs.cohere.ai/generation-card).
-  # Construction
+  created_date:
+    explanation: "The date the Cohere API was announced on the news [[News Article]]\
+      \ (https://venturebeat.com/2021/11/15/openai-rival-cohere-launches-language-model-api/).\n"
+    value: 2021-11-15
   dependencies: [coheretext]
-  training_emissions:
+  description: "The Generations model is a language model trained by Cohere for\
+    \ generation tasks.\n"
+  feedback:
+    explanation: "There are no specific feedback mechanisms for the Generation model,\
+      \ but a generic contact email is provided on Cohere website, which is support\
+      \ at cohere.ai.\n"
+    value: unknown
+  intended_uses: "On the model card, the intended uses are stated as \"interactive\
+    \ autocomplete, augmenting human writing processes, summarization, text rephrasing,\
+    \ and other text-to-text tasks in non-sensitive domains\" [[Model Card]](https://docs.cohere.ai/generation-card).\n"
+  license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
+  modality:
+    explanation: text (English)
+    value: text; text
+  model_card: https://docs.cohere.ai/generation-card
+  monitoring: "The usage of the model is monitored by Cohere [[Model Card]](https://docs.cohere.ai/generation-card).\n"
+  name: Cohere Base
+  organization: Cohere
+  prohibited_uses: "The usage of the model is bound by the Cohere usage guidelines\
+    \ [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). A non-comprehensive\
+    \ list of specific application violating these guidelines are: astroturfing,\
+    \ generation of misinformation and other harmful content, and \"generation of\
+    \ text about people, places, or events without a human-in-the-loop\" [[Model\
+    \ Card]](https://docs.cohere.ai/generation-card).\n"
+  quality_control:
+    explanation: "The quality control measures taken are unknown, but it is implied\
+      \ that Cohere performed mitigation strategies for toxic degeneration [[Model\
+      \ Card]](https://docs.cohere.ai/generation-card).\n"
+    value: unknown
+  size: unknown
+  training_emissions:
     explanation: The emissions of the models are unknown.
-  training_time:
     value: unknown
-    explanation: The training time for the models are unknown.
   training_hardware:
+    explanation: "The training hardware wasn't explicitly announced, but it was\
+      \ reported that Google Cloud teamed up with Cohere on a TPU partnership [[TechCrunch\
+      \ Article]] (https://techcrunch.com/2021/11/17/google-cloud-teams-up-with-nlp-startup-cohere-on-multi-year-partnership/).\n"
     value: unknown
-    explanation: >
-      The training hardware wasn't explicitly announced, but it was
-      reported that Google Cloud teamed up with Cohere on a TPU partnership
-      [[TechCrunch Article]]
-      (https://techcrunch.com/2021/11/17/google-cloud-teams-up-with-nlp-startup-cohere-on-multi-year-partnership/).
-  quality_control:
+  training_time:
+    explanation: The training time for the models are unknown.
     value: unknown
-    explanation: >
-      The quality control measures taken are unknown, but it is implied
-      that Cohere performed mitigation strategies for toxic degeneration
-      [[Model Card]](https://docs.cohere.ai/generation-card).
-  # Downstream
-  access:
+  type: model
+  url: none
+- access:
+    explanation: "The model is available to the public through the Cohere Platform\
+      \ [[Cohere Platform]](https://os.cohere.ai/login).\n"
     value: limited
-    explanation: >
-      The model is available to the public through the Cohere Platform
-      [[Cohere Platform]](https://os.cohere.ai/login).
-  license:
-    value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: >
-    On the model card, the intended uses are stated as "interactive
-    autocomplete, augmenting human writing processes, summarization,
-    text rephrasing, and other text-to-text tasks in non-sensitive domains"
-    [[Model Card]](https://docs.cohere.ai/generation-card).
-  prohibited_uses: >
-    The usage of the model is bound by the Cohere usage guidelines
-    [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
-    A non-comprehensive list of specific application violating these guidelines
-    are: astroturfing, generation of misinformation and other harmful content,
-    and "generation of text about people, places, or events without a
-    human-in-the-loop"
-    [[Model Card]](https://docs.cohere.ai/generation-card).
-  monitoring: >
-    The usage of the model is monitored by Cohere
-    [[Model Card]](https://docs.cohere.ai/generation-card).
+  analysis: The model's performance was analyzed on Hellaswag and COPA, as well
+    as several safety benchmarks [[Model Card]](https://docs.cohere.ai/generation-card).
+  created_date:
+    explanation: "Cohere Command was in beta before this date\n"
+    value: 2023-01-01
+  dependencies: [Cohere Base]
+  description: "This model is a generative model optimized to follow commands in\
+    \ the prompt.\n"
   feedback:
+    explanation: "There are no specific feedback mechanisms for the Generation model,\
+      \ but a generic contact email is provided on Cohere website, which is support\
+      \ at cohere.ai.\n"
+    value: unknown
+  intended_uses: "On the model card, the intended uses are stated as \"interactive\
+    \ autocomplete, augmenting human writing processes, summarization, text rephrasing,\
+    \ and other text-to-text tasks in non-sensitive domains\" [[Model Card]](https://docs.cohere.ai/generation-card).\n"
+  license:
+    explanation: The model likely has a license specifically for Cohere's use.
     value: unknown
-    explanation: >
-      There are no specific feedback mechanisms for the Generation model,
-      but a generic contact email is provided on Cohere website, which is
-      support at cohere.ai.
-
-- type: model
+  modality:
+    explanation: text (English)
+    value: text; text
+  model_card: https://docs.cohere.ai/generation-card
+  monitoring: "The usage of the model is monitored by Cohere [[Model Card]](https://docs.cohere.ai/generation-card).\n"
   name: Cohere Command
-  # General
   organization: Cohere
-  description: >
-    This model is a generative model optimized to follow commands in the prompt.
-  created_date:
-    value: 2023-01-01
-    explanation: >
-      Cohere Command was in beta before this date
-  url: https://docs.cohere.com/docs/command-beta
-  model_card: https://docs.cohere.ai/generation-card
-  modality: text (English)
-  size:
+  prohibited_uses: "The usage of the model is bound by the Cohere usage guidelines\
+    \ [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). A non-comprehensive\
+    \ list of specific application violating these guidelines are: astroturfing,\
+    \ generation of misinformation and other harmful content, and \"generation of\
+    \ text about people, places, or events without a human-in-the-loop\" [[Model\
+    \ Card]](https://docs.cohere.ai/generation-card).\n"
+  quality_control:
+    explanation: unknown
     value: unknown
-    explanation: >
-      The exact sizes of the generation models are unknown, but we know
-      that they come in three sizes: small, medium, and large
-      [[Model Card]](https://docs.cohere.ai/generation-card).
-  analysis: The model's performance was analyzed on Hellaswag and COPA, as well
-    as several safety benchmarks [[Model Card]](https://docs.cohere.ai/generation-card).
-  # Construction
-  dependencies: [Cohere Base]
+  size: unknown
   training_emissions:
-    value: unknown
     explanation: unknown
-  training_time:
     value: unknown
-    explanation: unknown
   training_hardware:
-    value: unknown
     explanation: unknown
-  quality_control:
     value: unknown
+  training_time:
     explanation: unknown
-  # Downstream
-  access:
+    value: unknown
+  type: model
+  url: https://docs.cohere.com/docs/command-beta
+- access:
+    explanation: "The model is available to the public through the Cohere Platform\
+      \ [[Cohere Platform]](https://cohere.ai/).\n"
     value: limited
-    explanation: >
-      The model is available to the public through the Cohere Platform
-      [[Cohere Platform]](https://os.cohere.ai/login).
-  license:
+  analysis: "The model's performance was analyzed on several safety benchmarks [[Model\
+    \ Card]](https://docs.cohere.ai/representation-card).\n"
+  created_date:
+    explanation: "The date the Cohere API was announced on the news [[News Article]]\
+      \ (https://venturebeat.com/2021/11/15/openai-rival-cohere-launches-language-model-api/).\n"
+    value: 2021-11-15
+  dependencies: []
+  description: "The Embedding Large (English) model is a language model trained\
+    \ by Cohere for tasks requiring embeddings.\n"
+  feedback:
+    explanation: "There are no specific feedback mechanisms for the Generation model,\
+      \ but a generic contact email is provided on Cohere website, which is support\
+      \ at cohere.ai.\n"
     value: unknown
+  intended_uses: "The intended uses are stated as \"estimating semantic similarity\
+    \ between two sentences, choosing a sentence which is most likely to follow\
+    \ another sentence, sentiment analysis, topic extraction, or categorizing user\
+    \ feedback\" on the Cohere model card [[Model Card]](https://docs.cohere.ai/representation-card).\n"
+  license:
     explanation: The model likely has a license specifically for Cohere's use.
-  intended_uses: >
-    On the model card, the intended uses are stated as "interactive
-    autocomplete, augmenting human writing processes, summarization,
-    text rephrasing, and other text-to-text tasks in non-sensitive domains"
-    [[Model Card]](https://docs.cohere.ai/generation-card).
-  prohibited_uses: >
-    The usage of the model is bound by the Cohere usage guidelines
-    [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
-    A non-comprehensive list of specific application violating these guidelines
-    are: astroturfing, generation of misinformation and other harmful content,
-    and "generation of text about people, places, or events without a
-    human-in-the-loop"
-    [[Model Card]](https://docs.cohere.ai/generation-card).
-  monitoring: >
-    The usage of the model is monitored by Cohere
-    [[Model Card]](https://docs.cohere.ai/generation-card).
-  feedback:
     value: unknown
-    explanation: >
-      There are no specific feedback mechanisms for the Generation model,
-      but a generic contact email is provided on Cohere website, which is
-      support at cohere.ai.
-
-
-- type: model
+  modality:
+    explanation: Text (English)
+    value: text; text
+  model_card: https://docs.cohere.ai/representation-card
+  monitoring: "The usage of the model is monitored by Cohere [[Model Card]](https://docs.cohere.ai/representation-card).\n"
   name: Cohere Embed (English)
-  # General
   organization: Cohere
-  description: >
-    The Embedding Large (English) model is a language model trained by Cohere for
-    tasks requiring embeddings.
-  created_date:
-    value: 2021-11-15
-    explanation: >
-      The date the Cohere API was announced on the news
-      [[News Article]]
-      (https://venturebeat.com/2021/11/15/openai-rival-cohere-launches-language-model-api/).
-  url: none
-  model_card: https://docs.cohere.ai/representation-card
-  modality: Text (English)
-  size:
-    value: unknown
-    explanation: >
-      The exact sizes of the embedding models are unknown, but we know
-      that they come in three sizes: small, medium, and large
-      [[Model Card]](https://docs.cohere.ai/representation-card).
-  analysis: >
-    The model's performance was analyzed on several safety benchmarks
-    [[Model Card]](https://docs.cohere.ai/representation-card).
-  # Construction
-  dependencies: []
+  prohibited_uses: "The usage of the model is bound by the Cohere usage guidelines\
+    \ [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). A non-comprehensive\
+    \ list of specific application violating these guidelines are: extraction of\
+    \ identity and demographic information, building purposefully opaque text classification\
+    \ systems, and \"building downstream classifiers that serve as automated decision-making\
+    \ systems that have real-world consequences on people, where those decisions\
+    \ are made without a human-in-the-loop\" [[Model Card]](https://docs.cohere.ai/representation-card).\n"
+  quality_control: unknown
+  size: unknown
   training_emissions:
-    value: unknown
     explanation: The emissions of the models are unknown.
-  training_time:
     value: unknown
-    explanation: The training time for the models are unknown.
   training_hardware:
+    explanation: "The training hardware wasn't explicitly announced, but it was\
+      \ reported that Google Cloud teamed up with Cohere on a TPU partnership [[TechCrunch\
+      \ Article]] (https://techcrunch.com/2021/11/17/google-cloud-teams-up-with-nlp-startup-cohere-on-multi-year-partnership/).\n"
     value: unknown
-    explanation: >
-      The training hardware wasn't explicitly announced, but it was
-      reported that Google Cloud teamed up with Cohere on a TPU partnership
-      [[TechCrunch Article]]
-      (https://techcrunch.com/2021/11/17/google-cloud-teams-up-with-nlp-startup-cohere-on-multi-year-partnership/).
-  quality_control: unknown
-  # Downstream
-  access:
-    value: limited
-    explanation: >
-      The model is available to the public through the Cohere Platform
-      [[Cohere Platform]](https://cohere.ai/).
-  license:
+  training_time:
+    explanation: The training time for the models are unknown.
     value: unknown
-    explanation: The model likely has a license specifically for Cohere's use.
-  intended_uses: >
-    The intended uses are stated as "estimating semantic similarity between two
-    sentences, choosing a sentence which is most likely to follow another
-    sentence, sentiment analysis, topic extraction, or categorizing user
-    feedback" on the Cohere model card
-    [[Model Card]](https://docs.cohere.ai/representation-card).
-  prohibited_uses: >
-    The usage of the model is bound by the Cohere usage guidelines
-    [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
-    A non-comprehensive list of specific application violating these guidelines
-    are: extraction of identity and demographic information, building
-    purposefully opaque text classification systems, and "building downstream
-    classifiers that serve as automated decision-making systems that have
-    real-world consequences on people, where those decisions are made without
-    a
-    human-in-the-loop"
-    [[Model Card]](https://docs.cohere.ai/representation-card).
-  monitoring: >
-    The usage of the model is monitored by Cohere
-    [[Model Card]](https://docs.cohere.ai/representation-card).
+  type: model
+  url: none
+- access:
+    explanation: "The model is available to the public through the Cohere Platform\
+      \ [[Cohere Platform]](https://cohere.ai/).\n"
+    value: limited
+  analysis: "The model's performance was analyzed on several safety benchmarks [[Model\
+    \ Card]](https://docs.cohere.ai/representation-card).\n"
+  created_date:
+    explanation: "The launch of the multilingual embedding model was announced in\
+      \ this [[blog post]] (https://txt.cohere.com/multilingual/).\n"
+    value: 2022-12-12
+  dependencies: []
+  description: "This model maps text from 100+ languages to a semantic vector space,\
+    \ positioning text with a similar meaning (regardless of language) in close\
+    \ proximity.\n"
   feedback:
+    explanation: "There are no specific feedback mechanisms for the Generation model,\
+      \ but a generic contact email is provided on Cohere website, which is support\
+      \ at cohere.ai.\n"
     value: unknown
-    explanation: >
-      There are no specific feedback mechanisms for the Generation model,
-      but a generic contact email is provided on Cohere website, which is
-      support at cohere.ai.
-
-- type: model
+  intended_uses: "The intended uses are stated as \"estimating semantic similarity\
+    \ between two sentences, choosing a sentence which is most likely to follow\
+    \ another sentence, sentiment analysis, topic extraction, or categorizing user\
+    \ feedback\" on the Cohere model card [[Model Card]](https://docs.cohere.ai/representation-card).\n"
+  license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
+    value: unknown
+  modality:
+    explanation: text (Multilingual)
+    value: text; text
+  model_card: https://docs.cohere.ai/representation-card
+  monitoring: "The usage of the model is monitored by Cohere [[Model Card]](https://docs.cohere.ai/representation-card).\n"
   name: Cohere Embed (Multilingual)
-  # General
   organization: Cohere
-  description: >
-    This model maps text from 100+ languages to a semantic
-    vector space, positioning text with a similar meaning
-    (regardless of language) in close proximity.
-  modality: text (Multilingual)
-  created_date:
-    value: 2022-12-12
-    explanation: >
-      The launch of the multilingual embedding model was announced in this
-      [[blog post]]
-      (https://txt.cohere.com/multilingual/).
-  url: https://txt.cohere.ai/multilingual/
-  model_card: https://docs.cohere.ai/representation-card
-  size:
-    value: unknown
-    explanation: >
-      The exact sizes of the embedding models are unknown, but we know
-      that they come in three sizes: small, medium, and large
-      [[Model Card]](https://docs.cohere.ai/representation-card).
-  analysis: >
-    The model's performance was analyzed on several safety benchmarks
-    [[Model Card]](https://docs.cohere.ai/representation-card).
-  # Construction
-  dependencies: []
+  prohibited_uses: "The usage of the model is bound by the Cohere usage guidelines\
+    \ [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). A non-comprehensive\
+    \ list of specific application violating these guidelines are: extraction of\
+    \ identity and demographic information, building purposefully opaque text classification\
+    \ systems, and \"building downstream classifiers that serve as automated decision-making\
+    \ systems that have real-world consequences on people, where those decisions\
+    \ are made without a human-in-the-loop\" [[Model Card]](https://docs.cohere.ai/representation-card).\n"
+  quality_control: unknown
+  size: unknown
   training_emissions:
-    value: unknown
     explanation: The emissions of the models are unknown.
-  training_time:
     value: unknown
-    explanation: The training time for the models are unknown.
   training_hardware:
+    explanation: "The training hardware wasn't explicitly announced, but it was\
+      \ reported that Google Cloud teamed up with Cohere on a TPU partnership [[TechCrunch\
+      \ Article]] (https://techcrunch.com/2021/11/17/google-cloud-teams-up-with-nlp-startup-cohere-on-multi-year-partnership/).\n"
     value: unknown
-    explanation: >
-      The training hardware wasn't explicitly announced, but it was
-      reported that Google Cloud teamed up with Cohere on a TPU partnership
-      [[TechCrunch Article]]
-      (https://techcrunch.com/2021/11/17/google-cloud-teams-up-with-nlp-startup-cohere-on-multi-year-partnership/).
-  quality_control: unknown
-  # Downstream
-  access:
-    value: limited
-    explanation: >
-      The model is available to the public through the Cohere Platform
-      [[Cohere Platform]](https://cohere.ai/).
-  license:
-    value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: >
-    The intended uses are stated as "estimating semantic similarity between two
-    sentences, choosing a sentence which is most likely to follow another
-    sentence, sentiment analysis, topic extraction, or categorizing user
-    feedback" on the Cohere model card
-    [[Model Card]](https://docs.cohere.ai/representation-card).
-  prohibited_uses: >
-    The usage of the model is bound by the Cohere usage guidelines
-    [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
-    A non-comprehensive list of specific application violating these guidelines
-    are: extraction of identity and demographic information, building
-    purposefully opaque text classification systems, and "building downstream
-    classifiers that serve as automated decision-making systems that have
-    real-world consequences on people, where those decisions are made without
-    a
-    human-in-the-loop"
-    [[Model Card]](https://docs.cohere.ai/representation-card).
-  monitoring: >
-    The usage of the model is monitored by Cohere
-    [[Model Card]](https://docs.cohere.ai/representation-card).
-  feedback:
+  training_time:
+    explanation: The training time for the models are unknown.
     value: unknown
-    explanation: >
-      There are no specific feedback mechanisms for the Generation model,
-      but a generic contact email is provided on Cohere website, which is
-      support at cohere.ai.
-
-- type: application
-  name: Cohere API
-  # General
-  organization: Cohere
-  description: >
-    Cohere API allows users to access the cohere language models and utilize
-    them in their applications.
+  type: model
+  url: https://txt.cohere.ai/multilingual/
+- access:
+    explanation: "Users can access the Cohere API by signing up on the Cohere website\
+      \ [[Cohere Website]](https://cohere.ai/).\n"
+    value: limited
+  adaptation: unknown
   created_date:
+    explanation: "The date the Cohere API was announced on the news [[News Article]]\
+      \ (https://venturebeat.com/2021/11/15/openai-rival-cohere-launches-language-model-api/).\n"
     value: 2021-11-15
-    explanation: >
-      The date the Cohere API was announced on the news
-      [[News Article]]
-      (https://venturebeat.com/2021/11/15/openai-rival-cohere-launches-language-model-api/).
-  url: https://cohere.ai/
-  # Construction
   dependencies:
     - Cohere Generate Endpoint
     - Cohere Embed Endpoint
     - Cohere Classify Endpoint
     - Cohere Summarize Endpoint
-  adaptation: unknown
-  output_space: generation and embeddings
-  quality_control: >
-    The new users of the API get a limited access restricting the sizes of the
-    models as well as the number of tokens that can be used. Users are required
-    to go through an internal application to upgrade to full access
-    [[Limited Access]](https://docs.cohere.ai/limited-access).
-  # Downstream
-  access:
-    value: limited
-    explanation: >
-      Users can access the Cohere API by signing up on the Cohere website
-      [[Cohere Website]](https://cohere.ai/).
+  description: "Cohere API allows users to access the cohere language models and\
+    \ utilize them in their applications.\n"
+  failures: unknown
+  feedback: "General feedback as well as the violations of the usage guidelines\
+    \ can be reported to Cohere at responsibility at cohere.ai [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n"
+  intended_uses: "Intended to be used by developers who would like to incorporate\
+    \ NLP into their applications [[Cohere Website]](https://cohere.ai/).\n"
   license:
+    explanation: "\"Subject to these Terms of Use, we grant you anon-exclusive,\
+      \ non-transferable, non-sublicensable and revocable licence to use the Cohere\
+      \ Solution in accordance with these Terms of Use\" - excerpt from the Terms\
+      \ of Service document.\n"
     value: custom
-    explanation: >
-      "Subject to these Terms of Use, we grant you anon-exclusive,
-      non-transferable, non-sublicensable and revocable licence to use the
-      Cohere Solution in accordance with these Terms of Use" - excerpt from the
-      Terms of Service document.
-  terms_of_service: https://cohere.ai/terms-of-use
-  intended_uses: >
-    Intended to be used by developers who would like to incorporate NLP into
-    their applications
-    [[Cohere Website]](https://cohere.ai/).
-  prohibited_uses: >
-    The usage of the API is bound by the Cohere usage guidelines.
-    Disallowed use cases include violence and threats, antisocial and
-    antidemocratic uses, deceit, attacks on security or privacy, unsafe
-    unsupervised uses, decision-making, high-Risk generations among others
-    [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
-  monitoring: >
-    All applications developed using the Cohere API is subject to review by
-    Cohere.
-  feedback: >
-    General feedback as well as the violations of the usage guidelines can
-    be reported to Cohere at responsibility at cohere.ai
-    [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
-  # Deployment
+  monitoring: "All applications developed using the Cohere API is subject to review\
+    \ by Cohere.\n"
   monthly_active_users: unknown
-  user_distribution: unknown
-  failures: unknown
-
-- type: application
-  name: Cohere Generate Endpoint
-  # General
+  name: Cohere API
   organization: Cohere
-  description: >
-    This endpoint generates realistic text conditioned on a given input.
+  output_space: generation and embeddings
+  prohibited_uses: "The usage of the API is bound by the Cohere usage guidelines.\
+    \ Disallowed use cases include violence and threats, antisocial and antidemocratic\
+    \ uses, deceit, attacks on security or privacy, unsafe unsupervised uses, decision-making,\
+    \ high-Risk generations among others [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n"
+  quality_control: "The new users of the API get a limited access restricting the\
+    \ sizes of the models as well as the number of tokens that can be used. Users\
+    \ are required to go through an internal application to upgrade to full access\
+    \ [[Limited Access]](https://docs.cohere.ai/limited-access).\n"
+  terms_of_service: https://cohere.ai/terms-of-use
+  type: application
+  url: https://cohere.ai/
+  user_distribution: unknown
+- access:
+    explanation: "Users can access the Cohere API by signing up on the Cohere website\
+      \ [[Cohere Website]](https://cohere.ai/).\n"
+    value: limited
+  adaptation: unknown
   created_date:
+    explanation: "The launch of Generate was announced in this [[news article]]\
+      \ (https://venturebeat.com/uncategorized/openai-rival-cohere-launches-language-model-api/).\n"
     value: 2021-11-15
-    explanation: >
-      The launch of Generate was announced in this
-      [[news article]]
-      (https://venturebeat.com/uncategorized/openai-rival-cohere-launches-language-model-api/).
-  url: https://docs.cohere.ai/reference/generate
-  # Construction
   dependencies: [Cohere Base, Cohere Command]
-  adaptation: unknown
+  description: "This endpoint generates realistic text conditioned on a given input.\n"
+  failures: unknown
+  feedback: "General feedback as well as the violations of the usage guidelines\
+    \ can be reported to Cohere at responsibility at cohere.ai [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n"
+  intended_uses: "Intended to be used by developers who would like to incorporate\
+    \ NLP into their applications [[Cohere Website]](https://cohere.ai/).\n"
+  license: "Limited use license to Cohere platform users [[Terms of Use]](https://cohere.ai/terms-of-use).\n"
+  monitoring: "All applications developed using the Cohere API is subject to review\
+    \ by Cohere.\n"
+  monthly_active_users: unknown
+  name: Cohere Generate Endpoint
+  organization: Cohere
   output_space: generation
-  quality_control: >
-    The new users of the API get a limited access restricting the sizes of the
-    models as well as the number of tokens that can be used. Users are required
-    to go through an internal application to upgrade to full access
-    [[Limited Access]](https://docs.cohere.ai/limited-access).
-  # Downstream
-  access:
-    value: limited
-    explanation: >
-      Users can access the Cohere API by signing up on the Cohere website
-      [[Cohere Website]](https://cohere.ai/).
-  license: >
-    Limited use license to Cohere platform users
-    [[Terms of Use]](https://cohere.ai/terms-of-use).
+  prohibited_uses: "The usage of the API is bound by the Cohere usage guidelines.\
+    \ Disallowed use cases include violence and threats, antisocial and antidemocratic\
+    \ uses, deceit, attacks on security or privacy, unsafe unsupervised uses, decision-making,\
+    \ high-Risk generations among others [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n"
+  quality_control: "The new users of the API get a limited access restricting the\
+    \ sizes of the models as well as the number of tokens that can be used. Users\
+    \ are required to go through an internal application to upgrade to full access\
+    \ [[Limited Access]](https://docs.cohere.ai/limited-access).\n"
   terms_of_service: https://cohere.ai/terms-of-use
-  intended_uses: >
-    Intended to be used by developers who would like to incorporate NLP into
-    their applications
-    [[Cohere Website]](https://cohere.ai/).
-  prohibited_uses: >
-    The usage of the API is bound by the Cohere usage guidelines.
-    Disallowed use cases include violence and threats, antisocial and
-    antidemocratic uses, deceit, attacks on security or privacy, unsafe
-    unsupervised uses, decision-making, high-Risk generations among others
-    [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
-  monitoring: >
-    All applications developed using the Cohere API is subject to review by
-    Cohere.
-  feedback: >
-    General feedback as well as the violations of the usage guidelines can
-    be reported to Cohere at responsibility at cohere.ai
-    [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
-  # Deployment
-  monthly_active_users: unknown
+  type: application
+  url: https://docs.cohere.ai/reference/generate
   user_distribution: unknown
-  failures: unknown
-
-- type: application
-  name: Cohere Embed Endpoint
-  # General
-  organization: Cohere
-  description: >
-    This endpoint returns text embeddings. An
-    embedding is a list of floating point numbers that
-    captures semantic information about the text that it represents.
+- access:
+    explanation: "Users can access the Cohere API by signing up on the Cohere website\
+      \ [[Cohere Website]](https://cohere.ai/).\n"
+    value: limited
+  adaptation: unknown
   created_date:
+    explanation: "The launch of Embed was announced in this [[news article]] (https://venturebeat.com/uncategorized/openai-rival-cohere-launches-language-model-api/).\n"
     value: 2021-11-15
-    explanation: >
-      The launch of Embed was announced in this
-      [[news article]]
-      (https://venturebeat.com/uncategorized/openai-rival-cohere-launches-language-model-api/).
-  url: https://docs.cohere.ai/reference/embed
-  # Construction
   dependencies: [Cohere Embed (Multilingual), Cohere Embed (English)]
-  adaptation: unknown
+  description: "This endpoint returns text embeddings. An embedding is a list of\
+    \ floating point numbers that captures semantic information about the text that\
+    \ it represents.\n"
+  failures: unknown
+  feedback: "General feedback as well as the violations of the usage guidelines\
+    \ can be reported to Cohere at responsibility at cohere.ai [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n"
+  intended_uses: "Intended to be used by developers who would like to incorporate\
+    \ NLP into their applications [[Cohere Website]](https://cohere.ai/).\n"
+  license: "Limited use license to Cohere platform users [[Terms of Use]](https://cohere.ai/terms-of-use).\n"
+  monitoring: "All applications developed using the Cohere API is subject to review\
+    \ by Cohere.\n"
+  monthly_active_users: unknown
+  name: Cohere Embed Endpoint
+  organization: Cohere
   output_space: embedding
-  quality_control: >
-    The new users of the API get a limited access restricting the sizes of the
-    models as well as the number of tokens that can be used. Users are required
-    to go through an internal application to upgrade to full access
-    [[Limited Access]](https://docs.cohere.ai/limited-access).
-  # Downstream
-  access:
-    value: limited
-    explanation: >
-      Users can access the Cohere API by signing up on the Cohere website
-      [[Cohere Website]](https://cohere.ai/).
-  license: >
-    Limited use license to Cohere platform users
-    [[Terms of Use]](https://cohere.ai/terms-of-use).
+  prohibited_uses: "The usage of the API is bound by the Cohere usage guidelines.\
+    \ Disallowed use cases include violence and threats, antisocial and antidemocratic\
+    \ uses, deceit, attacks on security or privacy, unsafe unsupervised uses, decision-making,\
+    \ high-Risk generations among others [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n"
+  quality_control: "The new users of the API get a limited access restricting the\
+    \ sizes of the models as well as the number of tokens that can be used. Users\
+    \ are required to go through an internal application to upgrade to full access\
+    \ [[Limited Access]](https://docs.cohere.ai/limited-access).\n"
   terms_of_service: https://cohere.ai/terms-of-use
-  intended_uses: >
-    Intended to be used by developers who would like to incorporate NLP into
-    their applications
-    [[Cohere Website]](https://cohere.ai/).
-  prohibited_uses: >
-    The usage of the API is bound by the Cohere usage guidelines.
-    Disallowed use cases include violence and threats, antisocial and
-    antidemocratic uses, deceit, attacks on security or privacy, unsafe
-    unsupervised uses, decision-making, high-Risk generations among others
-    [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
-  monitoring: >
-    All applications developed using the Cohere API is subject to review by
-    Cohere.
-  feedback: >
-    General feedback as well as the violations of the usage guidelines can
-    be reported to Cohere at responsibility at cohere.ai
-    [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
-  # Deployment
-  monthly_active_users: unknown
+  type: application
+  url: https://docs.cohere.ai/reference/embed
   user_distribution: unknown
-  failures: unknown
-
-- type: application
-  name: Cohere Classify Endpoint
-  # General
-  organization: Cohere
-  description: >
-    This endpoint makes a prediction about which label best
-    fits a specified text input. To make a prediction,
-    Classify uses the provided examples of text + label
-    pairs as a reference.
+- access:
+    explanation: "Users can access the Cohere API by signing up on the Cohere website\
+      \ [[Cohere Website]](https://cohere.ai/).\n"
+    value: limited
+  adaptation: unknown
   created_date:
+    explanation: "The launch of Classify was announced in this [[blog post]] (https://txt.cohere.com/classification-is-now-available/).\n"
     value: 2022-05-05
-    explanation: >
-      The launch of Classify was announced in this
-      [[blog post]]
-      (https://txt.cohere.com/classification-is-now-available/).
-  url: https://docs.cohere.ai/reference/classify
-  # Construction
   dependencies: [Cohere Embed (Multilingual), Cohere Embed (English)]
-  adaptation: unknown
+  description: "This endpoint makes a prediction about which label best fits a specified\
+    \ text input. To make a prediction, Classify uses the provided examples of text\
+    \ + label pairs as a reference.\n"
+  failures: unknown
+  feedback: "General feedback as well as the violations of the usage guidelines\
+    \ can be reported to Cohere at responsibility at cohere.ai [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n"
+  intended_uses: "Intended to be used by developers who would like to incorporate\
+    \ NLP into their applications [[Cohere Website]](https://cohere.ai/).\n"
+  license: "Limited use license to Cohere platform users [[Terms of Use]](https://cohere.ai/terms-of-use).\n"
+  monitoring: "All applications developed using the Cohere API is subject to review\
+    \ by Cohere.\n"
+  monthly_active_users: unknown
+  name: Cohere Classify Endpoint
+  organization: Cohere
   output_space: embedding
-  quality_control: >
-    The new users of the API get a limited access restricting the sizes of the
-    models as well as the number of tokens that can be used. Users are required
-    to go through an internal application to upgrade to full access
-    [[Limited Access]](https://docs.cohere.ai/limited-access).
-  # Downstream
-  access:
-    value: limited
-    explanation: >
-      Users can access the Cohere API by signing up on the Cohere website
-      [[Cohere Website]](https://cohere.ai/).
-  license: >
-    Limited use license to Cohere platform users
-    [[Terms of Use]](https://cohere.ai/terms-of-use).
+  prohibited_uses: "The usage of the API is bound by the Cohere usage guidelines.\
+    \ Disallowed use cases include violence and threats, antisocial and antidemocratic\
+    \ uses, deceit, attacks on security or privacy, unsafe unsupervised uses, decision-making,\
+    \ high-Risk generations among others [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n"
+  quality_control: "The new users of the API get a limited access restricting the\
+    \ sizes of the models as well as the number of tokens that can be used. Users\
+    \ are required to go through an internal application to upgrade to full access\
+    \ [[Limited Access]](https://docs.cohere.ai/limited-access).\n"
   terms_of_service: https://cohere.ai/terms-of-use
-  intended_uses: >
-    Intended to be used by developers who would like to incorporate NLP into
-    their applications
-    [[Cohere Website]](https://cohere.ai/).
-  prohibited_uses: >
-    The usage of the API is bound by the Cohere usage guidelines.
-    Disallowed use cases include violence and threats, antisocial and
-    antidemocratic uses, deceit, attacks on security or privacy, unsafe
-    unsupervised uses, decision-making, high-Risk generations among others
-    [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
-  monitoring: >
-    All applications developed using the Cohere API is subject to review by
-    Cohere.
-  feedback: >
-    General feedback as well as the violations of the usage guidelines can
-    be reported to Cohere at responsibility at cohere.ai
-    [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
-  # Deployment
-  monthly_active_users: unknown
+  type: application
+  url: https://docs.cohere.ai/reference/classify
   user_distribution: unknown
-  failures: unknown
-
-- type: application
-  name: Cohere Summarize Endpoint
-  # General
-  organization: Cohere
-  description: >
-    This endpoint generates a succinct
-    version of the original text that relays
-    the most important information.
+- access:
+    explanation: "Users can access the Cohere API by signing up on the Cohere website\
+      \ [[Cohere Website]](https://cohere.ai/).\n"
+    value: limited
+  adaptation: unknown
   created_date:
+    explanation: "The beta launch of summarize was announced in this [[blog post]]\
+      \ (https://txt.cohere.com/summarize-beta/).\n"
     value: 2023-02-22
-    explanation: >
-      The beta launch of summarize was announced in this
-      [[blog post]]
-      (https://txt.cohere.com/summarize-beta/).
-  url: https://docs.cohere.ai/reference/summarize
-  # Construction
   dependencies: []
-  adaptation: unknown
+  description: "This endpoint generates a succinct version of the original text\
+    \ that relays the most important information.\n"
+  failures: unknown
+  feedback: "General feedback as well as the violations of the usage guidelines\
+    \ can be reported to Cohere at responsibility at cohere.ai [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n"
+  intended_uses: "Intended to be used by developers who would like to incorporate\
+    \ NLP into their applications [[Cohere Website]](https://cohere.ai/).\n"
+  license: "Limited use license to Cohere platform users [[Terms of Use]](https://cohere.ai/terms-of-use).\n"
+  monitoring: "All applications developed using the Cohere API is subject to review\
+    \ by Cohere.\n"
+  monthly_active_users: unknown
+  name: Cohere Summarize Endpoint
+  organization: Cohere
   output_space: generation
-  quality_control: >
-    The new users of the API get a limited access restricting the sizes of the
-    models as well as the number of tokens that can be used. Users are required
-    to go through an internal application to upgrade to full access
-    [[Limited Access]](https://docs.cohere.ai/limited-access).
-  # Downstream
-  access:
-    value: limited
-    explanation: >
-      Users can access the Cohere API by signing up on the Cohere website
-      [[Cohere Website]](https://cohere.ai/).
-  license: >
-    Limited use license to Cohere platform users
-    [[Terms of Use]](https://cohere.ai/terms-of-use).
+  prohibited_uses: "The usage of the API is bound by the Cohere usage guidelines.\
+    \ Disallowed use cases include violence and threats, antisocial and antidemocratic\
+    \ uses, deceit, attacks on security or privacy, unsafe unsupervised uses, decision-making,\
+    \ high-Risk generations among others [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n"
+  quality_control: "The new users of the API get a limited access restricting the\
+    \ sizes of the models as well as the number of tokens that can be used. Users\
+    \ are required to go through an internal application to upgrade to full access\
+    \ [[Limited Access]](https://docs.cohere.ai/limited-access).\n"
   terms_of_service: https://cohere.ai/terms-of-use
-  intended_uses: >
-    Intended to be used by developers who would like to incorporate NLP into
-    their applications
-    [[Cohere Website]](https://cohere.ai/).
-  prohibited_uses: >
-    The usage of the API is bound by the Cohere usage guidelines.
-    Disallowed use cases include violence and threats, antisocial and
-    antidemocratic uses, deceit, attacks on security or privacy, unsafe
-    unsupervised uses, decision-making, high-Risk generations among others
-    [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
-  monitoring: >
-    All applications developed using the Cohere API is subject to review by
-    Cohere.
-  feedback: >
-    General feedback as well as the violations of the usage guidelines can
-    be reported to Cohere at responsibility at cohere.ai
-    [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
-  # Deployment
-  monthly_active_users: unknown
+  type: application
+  url: https://docs.cohere.ai/reference/summarize
   user_distribution: unknown
-  failures: unknown
diff --git a/assets/columbia.yaml b/assets/columbia.yaml
index 2ca39802..cef14a6c 100644
--- a/assets/columbia.yaml
+++ b/assets/columbia.yaml
@@ -1,23 +1,24 @@
 ---
-
-- type: model
-  name: OpenFold
-  organization: Columbia
-  description: OpenFold is an open source recreation of AlphaFold2.
+- access: open
+  analysis: Evaluated on wide range of tasks using own evaluation benchmarks.
   created_date: 2022-11-20
-  url: https://www.biorxiv.org/content/10.1101/2022.11.20.517210v2
+  dependencies: [AlphaFold2, OpenProteinSet]
+  description: OpenFold is an open source recreation of AlphaFold2.
+  feedback: ''
+  intended_uses: ''
+  license: CC BY 4.0
+  modality:
+    explanation: protein structures
+    value: image; image
   model_card: ''
-  modality: protein structures
-  analysis: Evaluated on wide range of tasks using own evaluation benchmarks.
+  monitoring: ''
+  name: OpenFold
+  organization: Columbia
+  prohibited_uses: ''
+  quality_control: ''
   size: ''
-  dependencies: [AlphaFold2, OpenProteinSet]
   training_emissions: unknown
-  training_time: 50,000 GPU hours
   training_hardware: Single A100 NVIDIA GPU
-  quality_control: ''
-  access: open
-  license: CC BY 4.0
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
+  training_time: 50,000 GPU hours
+  type: model
+  url: https://www.biorxiv.org/content/10.1101/2022.11.20.517210v2
diff --git a/assets/databricks.yaml b/assets/databricks.yaml
index 40dc6f16..7439f500 100644
--- a/assets/databricks.yaml
+++ b/assets/databricks.yaml
@@ -1,67 +1,50 @@
 ---
-
-- type: model
-  name: Dolly
-  # General
-  organization: Databricks
-  description: >
-    "Databricks’ Dolly, a large language model trained on the Databricks
-     Machine Learning Platform, demonstrates that a two-years-old open source
-     model (GPT-J) can, when subjected to just 30 minutes of fine tuning on a
-     focused corpus of 50k records (Stanford Alpaca), exhibit surprisingly
-     high quality instruction following behavior not characteristic of the
-     foundation model on which it is based."
-     [[Dolly Repository]](https://github.com/databrickslabs/dolly).
+- access:
+    explanation: "Model training code can be accessed at the official Dolly repository.\
+      \ Trained weights can be requested at hello-dolly@databricks.com. [[Dolly\
+      \ Repository]](https://github.com/databrickslabs/dolly).\n"
+    value: open
+  analysis: "\"We evaluated Dolly on the instruction-following capabilities described\
+    \ in the InstructGPT paper that ChatGPT is based on and found that it exhibits\
+    \ many of the same qualitative capabilities, including text generation, brainstorming\
+    \ and open Q&A.\" [[Databricks Blog Post]] (https://www.databricks.com/blog/2023/03/24/hello-dolly-democratizing-magic-chatgpt-open-models.html).\n"
   created_date:
+    explanation: "The date the model was announced in the [[Cerebras blog post]](https://www.databricks.com/blog/2023/03/24/hello-dolly-democratizing-magic-chatgpt-open-models.html).\n"
     value: 2023-03-24
-    explanation: >
-      The date the model was announced in the [[Cerebras blog post]](https://www.databricks.com/blog/2023/03/24/hello-dolly-democratizing-magic-chatgpt-open-models.html).
-  url: https://www.databricks.com/blog/2023/03/24/hello-dolly-democratizing-magic-chatgpt-open-models.html
-  model_card: ''
-  modality: text (English)
-  size: 6B parameters (dense model)
-  analysis: >
-    "We evaluated Dolly on the instruction-following capabilities described in the
-    InstructGPT
-    paper that ChatGPT is based on and found that it exhibits many of the same qualitative
-    capabilities, including text generation, brainstorming and open Q&A."
-    [[Databricks Blog Post]]
-    (https://www.databricks.com/blog/2023/03/24/hello-dolly-democratizing-magic-chatgpt-open-models.html).
-  # Construction
   dependencies: [GPT-J, Alpaca dataset]
+  description: "\"Databricks’ Dolly, a large language model trained on the Databricks\n\
+    \ Machine Learning Platform, demonstrates that a two-years-old open source\n\
+    \ model (GPT-J) can, when subjected to just 30 minutes of fine tuning on a\n\
+    \ focused corpus of 50k records (Stanford Alpaca), exhibit surprisingly\n high\
+    \ quality instruction following behavior not characteristic of the\n foundation\
+    \ model on which it is based.\"\n [[Dolly Repository]](https://github.com/databrickslabs/dolly).\n"
+  feedback: https://github.com/databrickslabs/dolly/issues
+  intended_uses: "\"Dolly is intended exclusively for research purposes and is not\
+    \ licensed for commercial use.\" [[Limitations]](https://github.com/databrickslabs/dolly#limitations).\n"
+  license: Apache 2.0
+  modality:
+    explanation: text (English)
+    value: text; text
+  model_card: ''
+  monitoring: none
+  name: Dolly
+  organization: Databricks
+  prohibited_uses: "Authors note the following limitations of the model: \"The Dolly\
+    \ model family is under active development, and so any list of shortcomings\
+    \ is unlikely to be exhaustive, but we include known limitations and misfires\
+    \ here as a means to document and share our preliminary findings with the community.\
+    \ In particular, dolly-6b struggles with syntactically complex prompts, mathematical\
+    \ operations, factual errors, dates and times, open-ended question answering,\
+    \ hallucination, enumerating lists of specific length, and stylistic mimicry.\"\
+    \ [[Limitations]](https://github.com/databrickslabs/dolly#limitations).\n"
+  quality_control: none
+  size: 6B parameters (dense)
   training_emissions: unknown
-  training_time:
-    value: 30 minutes
-    explanation: >
-      According to [[Model Overview]](https://github.com/databrickslabs/dolly#model-overview)
   training_hardware:
+    explanation: "According to [[Model Overview]](https://github.com/databrickslabs/dolly#model-overview)\n"
     value: A single NDasrA100_v4 machine with 8x A100 40GB GPUs
-    explanation: >
-      According to [[Model Overview]](https://github.com/databrickslabs/dolly#model-overview)
-  quality_control: none
-  # Downstream
-  access:
-    value: open
-    explanation: >
-      Model training code can be accessed at the official Dolly repository.
-      Trained weights can be requested at hello-dolly@databricks.com.
-      [[Dolly Repository]](https://github.com/databrickslabs/dolly).
-  license: Apache 2.0
-  intended_uses: >
-    "Dolly is intended exclusively for research purposes and is not licensed for
-    commercial use."
-    [[Limitations]](https://github.com/databrickslabs/dolly#limitations).
-  prohibited_uses: >
-    Authors note the following limitations of the model: "The Dolly model family
-    is under active development,
-    and so any list of shortcomings is unlikely to be exhaustive, but we include
-    known limitations and misfires
-    here as a means to document and share our preliminary findings with the community.
-    In particular, dolly-6b
-    struggles with syntactically complex prompts, mathematical operations, factual
-    errors, dates and times,
-    open-ended question answering, hallucination, enumerating lists of specific
-    length, and stylistic mimicry."
-    [[Limitations]](https://github.com/databrickslabs/dolly#limitations).
-  monitoring: none
-  feedback: https://github.com/databrickslabs/dolly/issues
+  training_time:
+    explanation: "According to [[Model Overview]](https://github.com/databrickslabs/dolly#model-overview)\n"
+    value: 30 minutes
+  type: model
+  url: https://www.databricks.com/blog/2023/03/24/hello-dolly-democratizing-magic-chatgpt-open-models.html
diff --git a/assets/deepmind.yaml b/assets/deepmind.yaml
index a59747e1..81a0f83d 100644
--- a/assets/deepmind.yaml
+++ b/assets/deepmind.yaml
@@ -1,840 +1,670 @@
 ---
-
-- type: dataset
-  name: MassiveText
-  # General
-  organization: DeepMind
-  description: >
-    The MassiveText dataset was used to train the Gopher model.
+- access:
+    explanation: "The dataset access is limited to DeepMind researchers [[Model\
+      \ Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C).\n"
+    value: closed
+  analysis: "MassiveText data was analyzed for toxicity, language distribution,\
+    \ URL breakdown, and tokenizer compression rates on the subsets [[Section A.2]](https://arxiv.org/pdf/2112.11446.pdf#subsection.A.2).\n"
   created_date:
+    explanation: "The date that Gopher was announced [[DeepMind Blog Post]] (https://www.deepmind.com/blog/language-modelling-at-scale-gopher-ethical-considerations-and-retrieval).\n"
     value: 2021-12-08
-    explanation: >
-      The date that Gopher was announced
-      [[DeepMind Blog Post]]
-      (https://www.deepmind.com/blog/language-modelling-at-scale-gopher-ethical-considerations-and-retrieval).
-  url: https://arxiv.org/pdf/2112.11446.pdf
   datasheet: https://arxiv.org/pdf/2112.11446.pdf#subsection.A.5
-  modality: Text (English) and Code
-  size: 10.5 TB
-  sample: []
-  analysis: >
-    MassiveText data was analyzed for toxicity, language distribution, URL
-    breakdown, and tokenizer compression rates on the subsets
-    [[Section A.2]](https://arxiv.org/pdf/2112.11446.pdf#subsection.A.2).
-  # Construction
   dependencies: []
-  license:
+  description: "The MassiveText dataset was used to train the Gopher model.\n"
+  excluded: "Documents that are not in English are excluded.\n"
+  feedback:
+    explanation: "The internal feedback mechanisms for WebText are unknown [[Model\
+      \ Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C).\n"
     value: unknown
-    explanation: >
-      The model likely has a license specifically for DeepMind's use,
-      based on the information provided in the datasheet
-      [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#subsection.A.5).
-  included: >
-    MassiveText data come from 6 sources: MassiveWeb (48%), Books (27%),
-    C4 (10%), News (10%), GitHub (3%), and Wikipedia (2%). MassiveWeb is a
-    web text corpus curated for MassiveText.
-  excluded: >
-    Documents that are not in English are excluded.
-  quality_control: >
-    The authors use simple heuristics for filtering low quality documents as
-    opposed to relying on a classifier based on a "gold" set such as the English
-    Wikipedia, which could "inadvertently bias towards a certain demographic or
-    erase certain dialects or sociolects from representation." MassiveWeb
-    subset was filtered using Google’s SafeSearch filter, preferring it over
-    to word filters that "disproportinately filter out inoffensive content
-    associated with minority groups. MassiveWeb was filtered
-    further for word or phrase repetitions. All the subsets were filtered for
-    document deduplication and test set contamination"
-    [[Appendix A]](https://arxiv.org/pdf/2112.11446.pdf#appendix.A).
-  # Downstream
-  access:
-    value: closed
-    explanation: >
-      The dataset access is limited to DeepMind researchers
-      [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C).
-  intended_uses: >
-    Pre-training of language models by DeepMind researchers
-    [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C).
-  prohibited_uses:
+  included: "MassiveText data come from 6 sources: MassiveWeb (48%), Books (27%),\
+    \ C4 (10%), News (10%), GitHub (3%), and Wikipedia (2%). MassiveWeb is a web\
+    \ text corpus curated for MassiveText.\n"
+  intended_uses: "Pre-training of language models by DeepMind researchers [[Model\
+    \ Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C).\n"
+  license:
+    explanation: "The model likely has a license specifically for DeepMind's use,\
+      \ based on the information provided in the datasheet [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#subsection.A.5).\n"
     value: unknown
-    explanation: >
-      There are no known prohibited uses of the dataset, but the authors
-      state that it should not be used for training models with multilingual
-      capabilities as it only contains the English language
-      [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C).
+  modality:
+    explanation: Text (English) and Code
+    value: code, text
   monitoring:
+    explanation: "There is no information on how DeepMind is internally monitoring\
+      \ the use of the dataset.\n"
     value: unknown
-    explanation: >
-      There is no information on how DeepMind is internally monitoring
-      the use of the dataset.
-  feedback:
-    value: unknown
-    explanation: >
-      The internal feedback mechanisms for WebText are unknown
-      [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C).
-
-- type: dataset
-  name: M3W
-  # General
+  name: MassiveText
   organization: DeepMind
-  description: >
-    M3W (MassiveWeb) is dataset used to train Flamingo, and other vision-language
-    models and was created by researchers and engineers.
+  prohibited_uses:
+    explanation: "There are no known prohibited uses of the dataset, but the authors\
+      \ state that it should not be used for training models with multilingual capabilities\
+      \ as it only contains the English language [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C).\n"
+    value: unknown
+  quality_control: "The authors use simple heuristics for filtering low quality\
+    \ documents as opposed to relying on a classifier based on a \"gold\" set such\
+    \ as the English Wikipedia, which could \"inadvertently bias towards a certain\
+    \ demographic or erase certain dialects or sociolects from representation.\"\
+    \ MassiveWeb subset was filtered using Google’s SafeSearch filter, preferring\
+    \ it over to word filters that \"disproportinately filter out inoffensive content\
+    \ associated with minority groups. MassiveWeb was filtered further for word\
+    \ or phrase repetitions. All the subsets were filtered for document deduplication\
+    \ and test set contamination\" [[Appendix A]](https://arxiv.org/pdf/2112.11446.pdf#appendix.A).\n"
+  sample: []
+  size: 10.5 TB
+  type: dataset
+  url: https://arxiv.org/pdf/2112.11446.pdf
+- access:
+    explanation: "The dataset access is limited to DeepMind researchers [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F).\n"
+    value: closed
+  analysis: ''
   created_date:
+    explanation: "The date that Flamingo was posted to arXiv [[arXiv]] (https://arxiv.org/pdf/2204.14198.pdf).\n"
     value: 2022-04-29
-    explanation: >
-      The date that Flamingo was posted to arXiv
-      [[arXiv]]
-      (https://arxiv.org/pdf/2204.14198.pdf).
-  url: https://arxiv.org/pdf/2204.14198.pdf
   datasheet: https://arxiv.org/pdf/2204.14198.pdf#appendix.F
-  modality: text (English) and code
-  size: 182GB Text, 185M Images
-  sample: []
-  analysis: ''
-  # Construction
   dependencies: []
-  license:
-    value: unknown
-    explanation: >
-      The model likely has a license specifically for DeepMind's use,
-      based on the information provided in the datasheet
-      [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F).
-  included: >
-    M3W has interleaved images (185M) and text (182GB) from the web.
+  description: "M3W (MassiveWeb) is dataset used to train Flamingo, and other vision-language\
+    \ models and was created by researchers and engineers.\n"
   excluded: unknown
-  quality_control: >
-    The authors provide a basic description of data processing and cleaning.
-  # Downstream
-  access:
-    value: closed
-    explanation: >
-      The dataset access is limited to DeepMind researchers
-      [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F).
-  intended_uses: >
-    Pre-training of vision and language models by DeepMind researchers
-    [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F).
-  prohibited_uses:
+  feedback:
+    explanation: "No feedback mechanism is mentioned in the datasheet [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F).\n"
+    value: none
+  included: "M3W has interleaved images (185M) and text (182GB) from the web.\n"
+  intended_uses: "Pre-training of vision and language models by DeepMind researchers\
+    \ [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F).\n"
+  license:
+    explanation: "The model likely has a license specifically for DeepMind's use,\
+      \ based on the information provided in the datasheet [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F).\n"
     value: unknown
-    explanation: >
-      There are no known prohibited uses of the dataset
-      [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F).
+  modality:
+    explanation: text (English) and code
+    value: code, text
   monitoring:
+    explanation: "There is no information on how DeepMind is internally monitoring\
+      \ the use of the dataset.\n"
     value: unknown
-    explanation: >
-      There is no information on how DeepMind is internally monitoring
-      the use of the dataset.
-  feedback:
-    value: none
-    explanation: >
-      No feedback mechanism is mentioned in the datasheet
-      [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F).
-
-- type: dataset
-  name: Gato dataset
-  # General
+  name: M3W
   organization: DeepMind
-  description: >
-    The Gato datasets are a collection of data used to train the Gato model.
+  prohibited_uses:
+    explanation: "There are no known prohibited uses of the dataset [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F).\n"
+    value: unknown
+  quality_control: "The authors provide a basic description of data processing and\
+    \ cleaning.\n"
+  sample: []
+  size: 182GB Text, 185M Images
+  type: dataset
+  url: https://arxiv.org/pdf/2204.14198.pdf
+- access:
+    explanation: "The full dataset is not directly provided by the authors, though\
+      \ some underlying data is public whereas others (e.g. MassiveText) is not.\n"
+    value: closed
+  analysis: "The Gato dataset compiles many datasets introduced in prior works,\
+    \ with associated analyses.\n"
   created_date:
+    explanation: "The date that Gato was announced [[DeepMind Blog Post]] (https://www.deepmind.com/blog/a-generalist-agent).\n"
     value: 2022-05-12
-    explanation: >
-      The date that Gato was announced
-      [[DeepMind Blog Post]]
-      (https://www.deepmind.com/blog/a-generalist-agent).
-  url: https://www.deepmind.com/blog/a-generalist-agent
   datasheet: none
-  modality: text, text-image pair, simulated control task, robotics simulation
-  size: 10.5 TB Text, 2.2B Text-Image pairs, 1.5T tokens of simulated control, 500k
-    robotics trajectories
-  sample: []
-  analysis: >
-    The Gato dataset compiles many datasets introduced in prior works, with associated
-    analyses.
-  # Construction
   dependencies: [MassiveText]
-  license:
-    value: unknown
-    explanation: >
-      The datasets have individual licenses, but no overall license is mentioned
-      by the authors.
-  included: >
-    The full composition of the dataset across individual sources can be found in
-    the paper.
+  description: "The Gato datasets are a collection of data used to train the Gato\
+    \ model.\n"
   excluded:
+    explanation: "No specific filtering is mentioned in the Gato paper.\n"
     value: none
-    explanation: >
-      No specific filtering is mentioned in the Gato paper.
-  quality_control: unknown
-  # Downstream
-  access:
-    value: closed
-    explanation: >
-      The full dataset is not directly provided by the authors, though some underlying
-      data is public whereas others (e.g. MassiveText) is not.
+  feedback:
+    explanation: "There is no mention on feedback mechanisms either internally or\
+      \ externally.\n"
+    value: none
+  included: "The full composition of the dataset across individual sources can be\
+    \ found in the paper.\n"
   intended_uses:
+    explanation: "There are no known intended uses of the dataset stated by authors\
+      \ beyond training Gato.\n"
     value: unknown
-    explanation: >
-      There are no known intended uses of the dataset stated by authors beyond training
-      Gato.
-  prohibited_uses:
+  license:
+    explanation: "The datasets have individual licenses, but no overall license\
+      \ is mentioned by the authors.\n"
     value: unknown
-    explanation: >
-      There are no known prohibited uses of the dataset stated by authors.
+  modality:
+    explanation: text, text-image pair, simulated control task, robotics simulation
+    value: image, text
   monitoring:
+    explanation: "There is no mention on how DeepMind is internally monitoring the\
+      \ use of the dataset.\n"
     value: none
-    explanation: >
-      There is no mention on how DeepMind is internally monitoring
-      the use of the dataset.
-  feedback:
-    value: none
-    explanation: >
-      There is no mention on feedback mechanisms either internally or externally.
-
-- type: model
-  name: AlphaFold2
+  name: Gato dataset
   organization: DeepMind
-  description: AlphaFold2 is a protein language model trained on protein sequences
+  prohibited_uses:
+    explanation: "There are no known prohibited uses of the dataset stated by authors.\n"
+    value: unknown
+  quality_control: unknown
+  sample: []
+  size: 10.5 TB Text, 2.2B Text-Image pairs, 1.5T tokens of simulated control, 500k
+    robotics trajectories
+  type: dataset
+  url: https://www.deepmind.com/blog/a-generalist-agent
+- access:
+    explanation: Models are available for download from the [[Github repository]](https://github.com/deepmind/alphafold)
+    value: open
+  analysis: ''
   created_date:
-    value: 2021-07-15
     explanation: The date the model paper was released
-  url: https://www.nature.com/articles/s41586-021-03819-2
-  model_card: none
-  modality: protein sequence
-  size: 93M parameters (dense model)
-  analysis: ''
+    value: 2021-07-15
   dependencies: [Protein Data Bank]
-  training_emissions: ''
-  training_time:
-    value: 11 days
-    explanation: Training takes "approximately 1 week" and finetuning takes "approximately
-      4 days"
-  training_hardware:
-    value: 128 TPUv3 cores
-    explanation: Described in paper
-  quality_control: ''
-  access:
-    value: open
-    explanation: Models are available for download from the [[Github repository]](https://github.com/deepmind/alphafold)
+  description: AlphaFold2 is a protein language model trained on protein sequences
+  feedback: ''
+  intended_uses: ''
   license:
-    value: Apache 2.0
     explanation: The license is provided in the [[Github repository]](https://github.com/deepmind/alphafold)
-  intended_uses: ''
-  prohibited_uses: ''
+    value: Apache 2.0
+  modality:
+    explanation: protein sequence
+    value: text; text
+  model_card: none
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: Flamingo
-  # General
+  name: AlphaFold2
   organization: DeepMind
-  description: >
-    Flamingo is a Visual Language Model using the Transformer architecture that
-    is intended for few-shot learning.
+  prohibited_uses: ''
+  quality_control: ''
+  size: 93M parameters (dense)
+  training_emissions: ''
+  training_hardware:
+    explanation: Described in paper
+    value: 128 TPUv3 cores
+  training_time:
+    explanation: Training takes "approximately 1 week" and finetuning takes "approximately
+      4 days"
+    value: 11 days
+  type: model
+  url: https://www.nature.com/articles/s41586-021-03819-2
+- access:
+    explanation: "The model has not been released and no discussion of release is\
+      \ stated in the model card [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).\n"
+    value: closed
+  analysis: "Model performance was evaluated on image and video datasets primarily,\
+    \ including dialogue.\n"
   created_date:
+    explanation: "The date that Flamingo was posted to arXiv [[arXiv]] (https://arxiv.org/abs/2204.14198).\n"
     value: 2022-04-29
-    explanation: >
-      The date that Flamingo was posted to arXiv
-      [[arXiv]]
-      (https://arxiv.org/abs/2204.14198).
-  url: https://arxiv.org/pdf/2204.14198.pdf
-  model_card: https://arxiv.org/pdf/2204.14198.pdf#appendix.E
-  modality: Images, Text
-  size:
-    value: 80B parameters (dense model)
-    explanation: >
-      Gato family has models of several sizes (3.2B, 9.3B, 80B) which involve frozen
-      language, vision, and trainable parameters
-      [[Table 5]](https://arxiv.org/pdf/2204.14198.pdf).
-  analysis: >
-    Model performance was evaluated on image and video datasets primarily, including
-    dialogue.
-  # Construction
   dependencies: [M3W, ALIGN, LTIP, VTP, Chinchilla]
-  training_emissions:
-    value: unknown
-    explanation: >
-      Authors do not report the training emissions.
-  training_time:
-    value: 15 days on 1536 TPUs
-    explanation: >
-      Reported in the paper checklist
-      [[Checklist]](https://arxiv.org/pdf/2204.14198.pdf).
-  training_hardware:
-    value: TPU
-    explanation: >
-      Reported in the paper checklist
-      [[Checklist]](https://arxiv.org/pdf/2204.14198.pdf).
-  quality_control:
+  description: "Flamingo is a Visual Language Model using the Transformer architecture\
+    \ that is intended for few-shot learning.\n"
+  feedback:
+    explanation: "No contact information is provided for feedback in the model card\
+      \ [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).\n"
     value: none
-    explanation: >
-      Reported in the mitigations in the model card
-      [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).
-  # Downstream
-  access:
-    value: closed
-    explanation: >
-      The model has not been released and no discussion of release is stated in
-      the model card
-      [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).
+  intended_uses: "The intended uses are stated in the model card: \"The primary\
+    \ use is research on visual language models (VLM), including: research on VLM\
+    \ applications like classification, captioning or visual question answering,\
+    \ understanding how strong VLMs can contribute to AGI, advancing fairness and\
+    \ safety research in the area of multimodal research, and understanding limitations\
+    \ of current large VLMs.\" [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).\n"
   license:
+    explanation: "No license is provided in the model card [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).\n"
     value: unknown
-    explanation: >
-      No license is provided in the model card
-      [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).
-  intended_uses: >
-    The intended uses are stated in the model card: "The primary use is research
-    on visual language models (VLM), including: research on VLM applications like
-    classification, captioning or visual question answering, understanding how
-    strong VLMs can contribute to AGI, advancing
-    fairness and safety research in the area of multimodal research, and understanding
-    limitations of current large VLMs."
-    [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).
-  prohibited_uses: >
-    The model card lists the following as out of scope uses of the model: "Uses
-    of the model for visually conditioned language generation in harmful or deceitful
-    settings. Broadly speaking,
-    the model should not be used for downstream applications
-    without further safety and fairness mitigations specific to
-    each application."
-    [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).
+  modality:
+    explanation: Images, Text
+    value: image, text; image, text
+  model_card: https://arxiv.org/pdf/2204.14198.pdf#appendix.E
   monitoring:
+    explanation: "There is no information on how DeepMind is internally monitoring\
+      \ the use of the model.\n"
     value: unknown
-    explanation: >
-      There is no information on how DeepMind is internally monitoring
-      the use of the model.
-  feedback:
-    value: none
-    explanation: >
-      No contact information is provided for feedback in the model card
-      [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).
-
-- type: model
-  name: AlphaCode
+  name: Flamingo
   organization: DeepMind
-  description: AlphaCode is an autoregressive language model trained on code
+  prohibited_uses: "The model card lists the following as out of scope uses of the\
+    \ model: \"Uses of the model for visually conditioned language generation in\
+    \ harmful or deceitful settings. Broadly speaking, the model should not be used\
+    \ for downstream applications without further safety and fairness mitigations\
+    \ specific to each application.\" [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).\n"
+  quality_control:
+    explanation: "Reported in the mitigations in the model card [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).\n"
+    value: none
+  size: 80B parameters (dense)
+  training_emissions:
+    explanation: "Authors do not report the training emissions.\n"
+    value: unknown
+  training_hardware:
+    explanation: "Reported in the paper checklist [[Checklist]](https://arxiv.org/pdf/2204.14198.pdf).\n"
+    value: TPU
+  training_time:
+    explanation: "Reported in the paper checklist [[Checklist]](https://arxiv.org/pdf/2204.14198.pdf).\n"
+    value: 15 days on 1536 TPUs
+  type: model
+  url: https://arxiv.org/pdf/2204.14198.pdf
+- access:
+    explanation: DeepMind does not provide access to AlphaCode to external researchers
+    value: closed
+  analysis: ''
   created_date:
-    value: 2022-02-02
     explanation: The date the model paper was released
-  url: https://arxiv.org/abs/2203.07814
-  model_card: ''
-  modality: code
-  analysis: ''
-  size: 41B parameters (dense model)
+    value: 2022-02-02
   dependencies: []
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access:
-    value: closed
-    explanation: DeepMind does not provide access to AlphaCode to external researchers
+  description: AlphaCode is an autoregressive language model trained on code
+  feedback: ''
+  intended_uses: ''
   license:
-    value: unknown
     explanation: ''
-  intended_uses: ''
-  prohibited_uses: ''
+    value: unknown
+  modality:
+    explanation: code
+    value: code; code
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: Gopher
-  # General
+  name: AlphaCode
   organization: DeepMind
-  description: >
-    Gopher is an autoregressive language model based on the Transformer
-    architecture with two modifications: using RMSNorm instead of LayerNorm and
-    using relative positional encoding scheme instead of absolute positional
-    encodings
-    [[Section 3]](https://arxiv.org/pdf/2112.11446.pdf#subsection.3.1).
+  prohibited_uses: ''
+  quality_control: ''
+  size: 41B parameters (dense)
+  training_emissions: ''
+  training_hardware: ''
+  training_time: ''
+  type: model
+  url: https://arxiv.org/abs/2203.07814
+- access:
+    explanation: "The model access is limited to DeepMind researchers. The model\
+      \ won't be released to the public [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).\n"
+    value: closed
+  analysis: "Model performance was evaluated and analyzed on 152 NLP tasks including:\
+    \ Language Modelling (20), Reading Comprehension (3), Fact Checking (3), Question\
+    \ Answering (3), Common Sense (4), MMLU (57), BIG-bench (62) [[Section 4]](https://arxiv.org/pdf/2112.11446.pdf#section.4);\
+    \ on toxicity and bias datasets [[Section 5]](https://arxiv.org/pdf/2112.11446.pdf#section.5);\
+    \ and on dialogue tasks [[Section 6]](https://arxiv.org/pdf/2112.11446.pdf#section.6).\n"
   created_date:
+    explanation: "The date that Gopher was announced [[DeepMind Blog Post]] (https://www.deepmind.com/blog/language-modelling-at-scale-gopher-ethical-considerations-and-retrieval).\n"
     value: 2021-12-08
-    explanation: >
-      The date that Gopher was announced
-      [[DeepMind Blog Post]]
-      (https://www.deepmind.com/blog/language-modelling-at-scale-gopher-ethical-considerations-and-retrieval).
-  url: https://arxiv.org/pdf/2112.11446.pdf
-  model_card: https://arxiv.org/pdf/2112.11446.pdf#appendix.B
-  modality: Text (English) and Code
-  size:
-    value: 280B parameters (dense model)
-    explanation: >
-      Gopher family has models of several sizes, but the name
-      Gopher uniquely identify the 280B parameter version. Sizes for the other
-      models in the Gopher family can be seen in the paper
-      [[Table 1]](https://arxiv.org/pdf/2112.11446.pdf#table.caption.1).
-  analysis: >
-    Model performance was evaluated and analyzed on 152 NLP tasks including:
-    Language Modelling (20), Reading Comprehension (3), Fact Checking (3),
-    Question Answering (3), Common Sense (4), MMLU (57), BIG-bench (62)
-    [[Section 4]](https://arxiv.org/pdf/2112.11446.pdf#section.4); on toxicity
-    and bias datasets
-    [[Section 5]](https://arxiv.org/pdf/2112.11446.pdf#section.5); and on
-    dialogue tasks
-    [[Section 6]](https://arxiv.org/pdf/2112.11446.pdf#section.6).
-  # Construction
   dependencies: [MassiveText]
+  description: "Gopher is an autoregressive language model based on the Transformer\
+    \ architecture with two modifications: using RMSNorm instead of LayerNorm and\
+    \ using relative positional encoding scheme instead of absolute positional encodings\
+    \ [[Section 3]](https://arxiv.org/pdf/2112.11446.pdf#subsection.3.1).\n"
+  feedback: "The feedback for the model can be provided at the email linked in the\
+    \ model card, geoffreyi at google.com [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).\n"
+  intended_uses: "The intended uses are stated in the Gopher model card: \"The primary\
+    \ use is research on language models, including: research on NLP applications\
+    \ like machine translation and question answering, understanding how strong\
+    \ language models can contribute to AGI, advancing fairness and safety research,\
+    \ and understanding limitations of current LLMs\" [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).\n"
+  license:
+    explanation: "The model likely has a license specifically for DeepMind's use,\
+      \ based on the information provided in the model card [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).\n"
+    value: unknown
+  modality:
+    explanation: Text (English) and Code
+    value: code, text; code, text
+  model_card: https://arxiv.org/pdf/2112.11446.pdf#appendix.B
+  monitoring:
+    explanation: "There is no information on how DeepMind is internally monitoring\
+      \ the use of the model.\n"
+    value: unknown
+  name: Gopher
+  organization: DeepMind
+  prohibited_uses: "The model card lists the following as out of scope uses of the\
+    \ model: \"for language generation in harmful or deceitful settings. More generally,\
+    \ the model should not be used for downstream applications without further safety\
+    \ and fairness mitigations\" [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).\n"
+  quality_control: none
+  size: 280B parameters (dense)
   training_emissions:
+    explanation: "The training emission estimate from the paper [[Section F]](https://arxiv.org/pdf/2112.11446.pdf#appendix.F)\n"
     value: 380 tCO2e
-    explanation: >
-      The training emission estimate from the paper
-      [[Section F]](https://arxiv.org/pdf/2112.11446.pdf#appendix.F)
-  training_time:
-    value: 7303.24 petaflop/s-day
-    explanation: >
-      The authors reported the training petaflops for all of the 4 different
-      sizes of the model. For the 280B parameter model, the petaflops was
-      reported as 6.31E+08. We compute the Gopher's training time in
-      petaflop/s-day as 6.31E+08 / (60*60*24) = 7303.24 petaflop/s-day.
   training_hardware:
+    explanation: "Reported in the paper [[Section F]](https://arxiv.org/pdf/2112.11446.pdf#appendix.F).\n"
     value: TPUv3 pods
-    explanation: >
-      Reported in the paper
-      [[Section F]](https://arxiv.org/pdf/2112.11446.pdf#appendix.F).
-  quality_control: none
-  # Downstream
-  access:
+  training_time:
+    explanation: "The authors reported the training petaflops for all of the 4 different\
+      \ sizes of the model. For the 280B parameter model, the petaflops was reported\
+      \ as 6.31E+08. We compute the Gopher's training time in petaflop/s-day as\
+      \ 6.31E+08 / (60*60*24) = 7303.24 petaflop/s-day.\n"
+    value: 7303.24 petaflop/s-day
+  type: model
+  url: https://arxiv.org/pdf/2112.11446.pdf
+- access:
+    explanation: "The model access is limited to DeepMind researchers. The model\
+      \ won't be released to the public [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).\n"
     value: closed
-    explanation: >
-      The model access is limited to DeepMind researchers. The model won't be
-      released to the public
-      [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).
+  analysis: "Model performance was evaluated and analyzed on many NLP tasks including\
+    \ language modeling, reading comprehension, question answering, commonsense-intensive\
+    \ tasks, and the BIG-Bench and MMLU meta-benchmarks.\n"
+  created_date:
+    explanation: "The date that Chinchilla was posted on arXiv [[arXiv]] (https://arxiv.org/abs/2203.15556).\n"
+    value: 2022-03-29
+  dependencies: [MassiveText]
+  description: "Chinchilla is an autoregressive language model based on the Transformer\
+    \ architecture with improved scaling laws.\n"
+  feedback: "The feedback for the model can be provided at the email linked in the\
+    \ model card, {jordanhoffmann, sborgeaud, amensch,sifre} at deepmind.com [[Model\
+    \ Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I).\n"
+  intended_uses: "The intended uses are stated in the Chinchilla model card: \"\
+    The primary use is research on language models, including: research on the scaling\
+    \ behaviour of language models along with those listed in Gopher paper\" [[Model\
+    \ Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I).\n"
   license:
+    explanation: "The model likely has a license specifically for DeepMind's use,\
+      \ based on the information provided in the model card [[Model Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I).\n"
     value: unknown
-    explanation: >
-      The model likely has a license specifically for DeepMind's use,
-      based on the information provided in the model card
-      [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).
-  intended_uses: >
-    The intended uses are stated in the Gopher model card: "The primary use is
-    research on language models, including: research on NLP applications like
-    machine translation and question answering, understanding how strong
-    language models can contribute to AGI, advancing fairness and safety
-    research, and understanding limitations of current LLMs"
-    [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).
-  prohibited_uses: >
-    The model card lists the following as out of scope uses of the model: "for
-    language generation in harmful or deceitful settings. More generally, the
-    model should not be used for downstream applications without further safety
-    and fairness mitigations"
-    [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).
+  modality:
+    explanation: text (English) and cpde
+    value: text; text
+  model_card: https://arxiv.org/pdf/2203.15556.pdf
   monitoring:
-    value: unknown
-    explanation: >
-      There is no information on how DeepMind is internally monitoring
-      the use of the model.
-  feedback: >
-    The feedback for the model can be provided at the email linked in the
-    model card, geoffreyi at google.com [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).
-
-- type: model
+    explanation: "There is no information on how DeepMind is internally monitoring\
+      \ the use of the model.\n"
+    value: Unknown
   name: Chinchilla
-  # General
   organization: DeepMind
-  description: >
-    Chinchilla is an autoregressive language model based on the Transformer
-    architecture with improved scaling laws.
-  created_date:
-    value: 2022-03-29
-    explanation: >
-      The date that Chinchilla was posted on arXiv
-      [[arXiv]]
-      (https://arxiv.org/abs/2203.15556).
-  url: https://arxiv.org/pdf/2203.15556.pdf
-  model_card: https://arxiv.org/pdf/2203.15556.pdf
-  modality: text (English) and cpde
-  size:
-    value: 70B parameters (dense model)
-    explanation: >
-      Chinchilla family has many models, but the primary model is the 70B parameter
-      model.
-  analysis: >
-    Model performance was evaluated and analyzed on many NLP tasks including language
-    modeling, reading comprehension, question answering, commonsense-intensive
-    tasks, and the BIG-Bench and MMLU meta-benchmarks.
-  # Construction
-  dependencies: [MassiveText]
+  prohibited_uses: "The model card lists the following as out of scope uses of the\
+    \ model: \"for language generation in harmful or deceitful settings. More generally,\
+    \ the model should not be used for downstream applications without further safety\
+    \ and fairness mitigations\" [[Model Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I).\n"
+  quality_control: none
+  size: 70B parameters (dense)
   training_emissions:
+    explanation: "Authors do not report the training emissions.\n"
     value: Unknown
-    explanation: >
-      Authors do not report the training emissions.
-  training_time:
-    value: 7303.24 petaflop/s-day
-    explanation: >
-      The authors reported the training petaflops for all models, including hypothetical
-      larger models.
-      For the 70B parameter model, the petaflops was
-      reported as 5.76E+08. We compute the Gopher's training time in
-      petaflop/s-day as 5.76E+08 / (60*60*24) = 6666.66 petaflop/s-day.
   training_hardware:
+    explanation: "Reported in the paper [[Section 4]](https://arxiv.org/pdf/2203.15556.pdf).\n"
     value: TPUv3/TPUv4 pods
-    explanation: >
-      Reported in the paper
-      [[Section 4]](https://arxiv.org/pdf/2203.15556.pdf).
-  quality_control: none
-  # Downstream
-  access:
+  training_time:
+    explanation: "The authors reported the training petaflops for all models, including\
+      \ hypothetical larger models. For the 70B parameter model, the petaflops was\
+      \ reported as 5.76E+08. We compute the Gopher's training time in petaflop/s-day\
+      \ as 5.76E+08 / (60*60*24) = 6666.66 petaflop/s-day.\n"
+    value: 7303.24 petaflop/s-day
+  type: model
+  url: https://arxiv.org/pdf/2203.15556.pdf
+- access:
+    explanation: "The model access is limited to DeepMind researchers. The model\
+      \ won't be released to the public [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).\n"
     value: closed
-    explanation: >
-      The model access is limited to DeepMind researchers. The model won't be
-      released to the public
-      [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).
+  analysis: "Model performance was evaluated on simulated and robotics task primarily,\
+    \ including out-of-distribution and skill generalization.\n"
+  created_date:
+    explanation: "The date that Gato was announced [[DeepMind Blog Post]] (https://www.deepmind.com/blog/a-generalist-agent).\n"
+    value: 2022-05-12
+  dependencies: [Gato dataset]
+  description: "Gato is a generalist agent based on sequence modeling using the\
+    \ Transformer architecture to implement multi-modal, multi-task, multi-embodiment\
+    \ generalist policy.\n"
+  feedback: "The feedback for the model can be provided at the email linked in the\
+    \ model card, reedscot at google.com [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).\n"
+  intended_uses: "The intended uses are stated in the Gopher model card: \"Learn\
+    \ to accomplish a wide variety of tasks from expert demonstrations, such as\
+    \ playing video games, controlling simulated embodiments, and real world block\
+    \ stacking.\" [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).\n"
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The model likely has a license specifically for DeepMind's use,
-      based on the information provided in the model card
-      [[Model Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I).
-  intended_uses: >
-    The intended uses are stated in the Chinchilla model card: "The primary use
-    is research on language models, including:
-    research on the scaling behaviour of language models along with those listed
-    in Gopher paper"
-    [[Model Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I).
-  prohibited_uses: >
-    The model card lists the following as out of scope uses of the model: "for
-    language generation in harmful or deceitful settings. More generally, the
-    model should not be used for downstream applications without further safety
-    and fairness mitigations"
-    [[Model Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I).
+  modality:
+    explanation: text, image, control/robotics
+    value: image, text; image, text
+  model_card: https://openreview.net/pdf?id=1ikK0kHjvj#appendix.B
   monitoring:
-    value: Unknown
-    explanation: >
-      There is no information on how DeepMind is internally monitoring
-      the use of the model.
-  feedback: >
-    The feedback for the model can be provided at the email linked in the model
-    card, {jordanhoffmann, sborgeaud, amensch,sifre} at deepmind.com
-    [[Model Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I).
-
-- type: model
+    explanation: "There is no information on how DeepMind is internally monitoring\
+      \ the use of the model.\n"
+    value: unknown
   name: Gato
-  # General
   organization: DeepMind
-  description: >
-    Gato is a generalist agent based on sequence modeling using the Transformer
-    architecture to implement multi-modal, multi-task, multi-embodiment generalist
-    policy.
-  created_date:
-    value: 2022-05-12
-    explanation: >
-      The date that Gato was announced
-      [[DeepMind Blog Post]]
-      (https://www.deepmind.com/blog/a-generalist-agent).
-  url: https://www.deepmind.com/blog/a-generalist-agent
-  model_card: https://openreview.net/pdf?id=1ikK0kHjvj#appendix.B
-  modality: text, image, control/robotics
-  size:
-    value: 1.2B parameters (dense model)
-    explanation: >
-      Gato family has models of several sizes (79M, 364M, 1.18B)
-      [[Table 6]](https://openreview.net/pdf?id=1ikK0kHjvj).
-  analysis: >
-    Model performance was evaluated on simulated and robotics task primarily,
-    including out-of-distribution and skill generalization.
-  # Construction
-  dependencies: [Gato dataset]
+  prohibited_uses: "The model card lists the following as out of scope uses of the\
+    \ model: \"Not intended for commercial or production use. Military uses are\
+    \ strictly prohibited.\" [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).\n"
+  quality_control:
+    explanation: "Reported in the mitigations in the model card [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).\n"
+    value: none
+  size: 1.2B parameters (dense)
   training_emissions:
+    explanation: "Authors do not report the training emissions.\n"
     value: unknown
-    explanation: >
-      Authors do not report the training emissions.
-  training_time:
-    value: 4 days on a 16x16 TPU v3 slice
-    explanation: >
-      Reported in the paper
-      [[Section 2.3]](https://openreview.net/pdf?id=1ikK0kHjvj).
   training_hardware:
+    explanation: "Reported in the paper [[Section 2.3]](https://openreview.net/pdf?id=1ikK0kHjvj).\n"
     value: 16x16 TPU v3 slice
-    explanation: >
-      Reported in the paper
-      [[Section 2.3]](https://openreview.net/pdf?id=1ikK0kHjvj).
-  quality_control:
-    value: none
-    explanation: >
-      Reported in the mitigations in the model card
-      [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).
-  # Downstream
-  access:
-    value: closed
-    explanation: >
-      The model access is limited to DeepMind researchers. The model won't be
-      released to the public
-      [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).
-  license:
-    value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: >
-    The intended uses are stated in the Gopher model card: "Learn to accomplish
-    a wide variety of tasks from expert demonstrations, such as playing video
-    games, controlling simulated embodiments,
-    and real world block stacking."
-    [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).
-  prohibited_uses: >
-    The model card lists the following as out of scope uses of the model: "Not
-    intended for commercial or production use. Military uses are
-    strictly prohibited."
-    [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).
-  monitoring:
-    value: unknown
-    explanation: >
-      There is no information on how DeepMind is internally monitoring
-      the use of the model.
-  feedback: >
-    The feedback for the model can be provided at the email linked in the model
-    card, reedscot at google.com
-    [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).
-
-- type: model
-  name: Sparrow
-  organization: DeepMind
-  description: ''
-  created_date: 2022-09-28
-  url: https://arxiv.org/abs/2209.14375
-  model_card: ''
-  modality: text
+  training_time:
+    explanation: "Reported in the paper [[Section 2.3]](https://openreview.net/pdf?id=1ikK0kHjvj).\n"
+    value: 4 days on a 16x16 TPU v3 slice
+  type: model
+  url: https://www.deepmind.com/blog/a-generalist-agent
+- access: closed
   analysis: ''
-  size: 70B
+  created_date: 2022-09-28
   dependencies:
     - Chinchilla
     - Google Search
     - Sparrow Rule reward model
     - Sparrow Preference reward model
+  description: ''
+  feedback: ''
+  intended_uses: ''
+  license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
+    value: unknown
+  modality:
+    explanation: text
+    value: text; text
+  model_card: ''
+  monitoring: ''
+  name: Sparrow
+  organization: DeepMind
+  prohibited_uses: ''
+  quality_control: ''
+  size: 70B parameters (dense)
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: closed
+  training_time: ''
+  type: model
+  url: https://arxiv.org/abs/2209.14375
+- access: closed
+  analysis: ''
+  created_date: 2021-12-08
+  dependencies: [MassiveText]
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: Text
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
   name: RETRO
   organization: DeepMind
-  description: ''
-  created_date: 2021-12-08
-  url: https://arxiv.org/abs/2112.04426
-  model_card: ''
-  modality: Text
-  analysis: ''
-  size: 7.5B
-  dependencies: [MassiveText]
+  prohibited_uses: ''
+  quality_control: ''
+  size: 7.5B parameters (dense)
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: closed
+  training_time: ''
+  type: model
+  url: https://arxiv.org/abs/2112.04426
+- access: closed
+  analysis: ''
+  created_date: 2022-09-28
+  dependencies: [Chinchilla, Sparrow adversarial probing dataset]
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: text
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
   name: Sparrow Rule reward model
   organization: DeepMind
-  description: ''
-  created_date: 2022-09-28
-  url: https://arxiv.org/abs/2209.14375
-  model_card: ''
-  modality: text
-  analysis: ''
-  size: 70B
-  dependencies: [Chinchilla, Sparrow adversarial probing dataset]
+  prohibited_uses: ''
+  quality_control: ''
+  size: 70B parameters (dense)
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: closed
+  training_time: ''
+  type: model
+  url: https://arxiv.org/abs/2209.14375
+- access: closed
+  analysis: ''
+  created_date: 2022-09-28
+  dependencies: [Chinchilla, Sparrow response preference dataset]
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: text
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
   name: Sparrow Preference reward model
   organization: DeepMind
-  description: ''
-  created_date: 2022-09-28
-  url: https://arxiv.org/abs/2209.14375
-  model_card: ''
-  modality: text
-  analysis: ''
-  size: 70B
-  dependencies: [Chinchilla, Sparrow response preference dataset]
+  prohibited_uses: ''
+  quality_control: ''
+  size: 70B parameters (dense)
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: closed
+  training_time: ''
+  type: model
+  url: https://arxiv.org/abs/2209.14375
+- access: closed
+  analysis: ''
+  created_date: 2022-09-28
+  datasheet: ''
+  dependencies: [Chinchilla]
+  description: ''
+  excluded: ''
+  feedback: ''
+  included: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality: text
   monitoring: ''
-  feedback: ''
-
-- type: dataset
   name: Sparrow adversarial probing dataset
   organization: DeepMind
-  description: ''
-  created_date: 2022-09-28
-  url: https://arxiv.org/abs/2209.14375
-  datasheet: ''
-  modality: text
-  size: 27k ratings
+  prohibited_uses: ''
+  quality_control: ''
   sample: []
+  size: 27k ratings
+  type: dataset
+  url: https://arxiv.org/abs/2209.14375
+- access: closed
   analysis: ''
+  created_date: 2022-09-28
+  datasheet: ''
   dependencies: [Chinchilla]
-  included: ''
+  description: ''
   excluded: ''
-  quality_control: ''
-  access: closed
+  feedback: ''
+  included: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality: text
   monitoring: ''
-  feedback: ''
-
-- type: dataset
   name: Sparrow response preference dataset
   organization: DeepMind
-  description: ''
-  created_date: 2022-09-28
-  url: https://arxiv.org/abs/2209.14375
-  datasheet: ''
-  modality: text
-  size: 72k comparisons
+  prohibited_uses: ''
+  quality_control: ''
   sample: []
+  size: 72k comparisons
+  type: dataset
+  url: https://arxiv.org/abs/2209.14375
+- access: closed
   analysis: ''
-  dependencies: [Chinchilla]
-  included: ''
-  excluded: ''
-  quality_control: ''
-  access: closed
+  created_date: 2022-03-16
+  dependencies: [Gopher, Google Search, GopherCite reward model]
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: Text
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
   name: GopherCite
   organization: DeepMind
-  description: ''
-  created_date: 2022-03-16
-  url: https://storage.googleapis.com/deepmind-media/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes.pdf
-  model_card: ''
-  modality: Text
-  analysis: ''
-  size: 280B
-  dependencies: [Gopher, Google Search, GopherCite reward model]
+  prohibited_uses: ''
+  quality_control: ''
+  size: 280B parameters (dense)
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: closed
+  training_time: ''
+  type: model
+  url: https://storage.googleapis.com/deepmind-media/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes.pdf
+- access: closed
+  analysis: ''
+  created_date: 2022-03-16
+  dependencies: [Gopher, GopherCite Preference dataset]
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: text
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
   name: GopherCite reward model
   organization: DeepMind
-  description: ''
-  created_date: 2022-03-16
-  url: https://storage.googleapis.com/deepmind-media/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes.pdf
-  model_card: ''
-  modality: text
-  analysis: ''
-  size: 7B
-  dependencies: [Gopher, GopherCite Preference dataset]
+  prohibited_uses: ''
+  quality_control: ''
+  size: 7B parameters (dense)
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: closed
+  training_time: ''
+  type: model
+  url: https://storage.googleapis.com/deepmind-media/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes.pdf
+- access: closed
+  analysis: ''
+  created_date: 2022-03-16
+  datasheet: ''
+  dependencies: [Gopher, Google Search]
+  description: ''
+  excluded: ''
+  feedback: ''
+  included: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality: text
   monitoring: ''
-  feedback: ''
-
-- type: dataset
   name: GopherCite Preference dataset
   organization: DeepMind
-  description: ''
-  created_date: 2022-03-16
-  url: https://storage.googleapis.com/deepmind-media/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes.pdf
-  datasheet: ''
-  modality: text
-  size: 33k response pairs
+  prohibited_uses: ''
+  quality_control: ''
   sample: []
+  size: 33k response pairs
+  type: dataset
+  url: https://storage.googleapis.com/deepmind-media/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes.pdf
+- access: closed
   analysis: ''
-  dependencies: [Gopher, Google Search]
-  included: ''
-  excluded: ''
-  quality_control: ''
-  access: closed
+  created_date: 2022-09-29
+  dependencies: [Chinchilla]
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: text
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
   name: Dramatron
   organization: DeepMind
-  description: ''
-  created_date: 2022-09-29
-  url: https://arxiv.org/abs/2209.14958
-  model_card: ''
-  modality: text
-  analysis: ''
-  size: 70B parameters (dense model)
-  dependencies: [Chinchilla]
+  prohibited_uses: ''
+  quality_control: ''
+  size: 70B parameters (dense)
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: closed
-  license:
-    value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
+  training_time: ''
+  type: model
+  url: https://arxiv.org/abs/2209.14958
diff --git a/assets/duckduckgo.yaml b/assets/duckduckgo.yaml
index 465b62eb..a1defb85 100644
--- a/assets/duckduckgo.yaml
+++ b/assets/duckduckgo.yaml
@@ -1,27 +1,25 @@
 ---
-
-- type: application
-  name: DuckAssist
-  organization: DuckDuckGo
+- access: open
+  adaptation: ''
+  created_date: 2023-03-08
+  dependencies: [Anthropic API]
   description: The first Instant Answer in DuckDuckGo search results to use natural
     language technology to generate answers to search queries using Wikipedia and
     other related sources
-  created_date: 2023-03-08
-  url: https://spreadprivacy.com/duckassist-launch/
-  dependencies: [Anthropic API]
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: open
+  failures: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset is no longer available, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset is no longer available, and hence the license is unknown.
-  terms_of_service: ''
-  intended_uses: ''
-  prohibited_uses: ''
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
+  name: DuckAssist
+  organization: DuckDuckGo
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: ''
+  type: application
+  url: https://spreadprivacy.com/duckassist-launch/
   user_distribution: ''
-  failures: ''
diff --git a/assets/duolingo.yaml b/assets/duolingo.yaml
index de25f466..44e4e886 100644
--- a/assets/duolingo.yaml
+++ b/assets/duolingo.yaml
@@ -1,62 +1,58 @@
 ---
-
-- type: application
-  name: Duolingo Explain My Answer
-  organization: Duolingo
+- access: limited
+  adaptation: ''
+  created_date: 2023-03-14
+  dependencies: [GPT-4 API]
   description: Explain My Answer offers learners the chance to learn more about
     their response in a lesson (whether their answer was correct or incorrect!)
     By tapping a button after certain exercise types, learners can enter a chat
     with Duo to get a simple explanation on why their answer was right or wrong,
     and ask for examples or further clarification.
-  created_date: 2023-03-14
-  url: https://blog.duolingo.com/duolingo-max/
-  dependencies: [GPT-4 API]
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: limited
+  failures: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "Users are granted a limited license per Duolingo Terms.\n"
     value: custom
-    explanation: >
-      Users are granted a limited license per Duolingo Terms.
-  terms_of_service: https://www.duolingo.com/terms
-  intended_uses: ''
-  prohibited_uses: ''
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
-  user_distribution: ''
-  failures: ''
-
-- type: application
-  name: Duolingo Max
+  name: Duolingo Explain My Answer
   organization: Duolingo
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: https://www.duolingo.com/terms
+  type: application
+  url: https://blog.duolingo.com/duolingo-max/
+  user_distribution: ''
+- access: limited
+  adaptation: ''
+  created_date: 2023-03-14
+  dependencies: [Duolingo Role Play, Duolingo Explain My Answer]
   description: Duolingo Max is a new subscription tier above Super Duolingo that
     gives learners access to two brand-new features and exercises - Explain My Answer
     and Roleplay.
-  created_date: 2023-03-14
-  url: https://blog.duolingo.com/duolingo-max/
-  dependencies: [Duolingo Role Play, Duolingo Explain My Answer]
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: limited
+  failures: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "Users are granted a limited license per Duolingo Terms.\n"
     value: custom
-    explanation: >
-      Users are granted a limited license per Duolingo Terms.
-  terms_of_service: ''
-  intended_uses: ''
-  prohibited_uses: ''
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
-  user_distribution: ''
-  failures: ''
-
-- type: application
-  name: Duolingo Role Play
+  name: Duolingo Max
   organization: Duolingo
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: ''
+  type: application
+  url: https://blog.duolingo.com/duolingo-max/
+  user_distribution: ''
+- access: limited
+  adaptation: ''
+  created_date: 2023-03-14
+  dependencies: [GPT-4 API]
   description: Roleplay allows learners to practice real-world conversation skills
     with world characters in the app. These challenges, which earn XP, will live
     alongside the path as one of the “Side Quests” learners can access by tapping
@@ -64,26 +60,24 @@
     scenarios! Learners might discuss future vacation plans with Lin, order coffee
     at a café in Paris, go furniture shopping with Eddy, or ask a friend to go for
     a hike.
-  created_date: 2023-03-14
-  url: https://blog.duolingo.com/duolingo-max/
-  dependencies: [GPT-4 API]
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: limited
+  failures: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "\"Subject to the terms of these Terms and Conditions, Duolingo\
+      \ grants you a non-transferable, non-exclusive license to download, install,\
+      \ and use one copy of each App in object code form only on an interactive\
+      \ wireless device that you own or control\" - excerpt from the Terms of Service\
+      \ document.\n"
     value: custom
-    explanation: >
-      "Subject to the terms of these Terms and Conditions, Duolingo grants you
-      a non-transferable, non-exclusive license to download, install, and use
-      one copy of each App in object code form only on an interactive wireless
-      device that you own or control" - excerpt from the Terms of Service
-      document.
-  terms_of_service: https://www.duolingo.com/terms
-  intended_uses: ''
-  prohibited_uses: ''
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
+  name: Duolingo Role Play
+  organization: Duolingo
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: https://www.duolingo.com/terms
+  type: application
+  url: https://blog.duolingo.com/duolingo-max/
   user_distribution: ''
-  failures: ''
diff --git a/assets/eleutherai.yaml b/assets/eleutherai.yaml
index 524c8010..5813633b 100644
--- a/assets/eleutherai.yaml
+++ b/assets/eleutherai.yaml
@@ -1,16 +1,49 @@
 ---
-
-- type: dataset
-  name: The Pile
-  # General
-  organization: EleutherAI
-  description: >
-    A latge language model training dataset, used to train GPT-NeoX-20B.
+- access:
+    explanation: "The dataset is freely available to the public and can be downloaded\
+      \ from The Eye [[The Pile]](https://mystic.the-eye.eu/public/AI/pile/).\n"
+    value: open
+  analysis: "Analyses of the data's composition, document statistics, language/dialectal\
+    \ coverage, topical distribution, and biases are conducted are conducted in\
+    \ the paper [[The Pile Paper]](https://arxiv.org/pdf/2101.00027.pdf).\n"
   created_date: 2021-01-01
-  url: https://arxiv.org/pdf/2101.00027.pdf
   datasheet: https://arxiv.org/pdf/2201.07311.pdf
-  modality: text (English) and code
-  size: 825 GB
+  dependencies: []
+  description: "A latge language model training dataset, used to train GPT-NeoX-20B.\n"
+  excluded: "Authors report that they have excluded some datasets \"because they\
+    \ were too small to be worth spending time or because the English component\
+    \ of the data did not merit inclusion on its own. Three datasets were excluded\
+    \ for other reasons: (1) US Congressional Records were excluded because it \"\
+    reflects the opinions and biases of the political class over the past 200 years,\
+    \ including segregationism and xenophobia.\" (2) Online Fanfiction resources\
+    \ amounting to Hundreds of GiB were excluded on logistical grounds. (3) Literotica,\
+    \ platform where users can upload short-form erotic fiction, was excluded because\
+    \ the authors decided to exclude fanfiction, the corpus would require significant\
+    \ investigation, and corpus contain significant amount of stereotyping [[Appendix\
+    \ B]](https://arxiv.org/pdf/2101.00027.pdf).\n"
+  feedback: "Feedback can be given by emailing the authors at contact at eleuther.ai.\n"
+  included: "The Pile data come from 22 sources, with over half of the data being\
+    \ from Common Crawl (Pile-CC; 227GB), fiction and nonfiction books (Books3;\
+    \ 101GB), biomedical articles (PubMed Central; 90GB), and code (Github; 95 GB).\
+    \ Refer to the paper for full decomposition [[Table 1]](https://arxiv.org/pdf/2101.00027.pdf#table.caption.2).\n"
+  intended_uses: "The Pile was intended to be used as a high quality large text\
+    \ dataset for language modeling tasks, explained in more detail in the paper\
+    \ [[Section 1]](https://arxiv.org/pdf/2101.00027.pdf#section.1).\n"
+  license:
+    explanation: As indicated in the paper.
+    value: MIT
+  modality:
+    explanation: text (English) and code
+    value: code, text
+  monitoring: none
+  name: The Pile
+  organization: EleutherAI
+  prohibited_uses: none
+  quality_control: "In addition to the data inclusion and exclusion decisions, the\
+    \ quality was controlled through filtering for English (pycld2 language classifier),\
+    \ filtering for documents similar to OpenWebText2 (classifier on CommonCrawl),\
+    \ and several forms of deduplication as detailed in the paper [[Appendix C]](https://arxiv.org/pdf/2101.00027.pdf#appendix.1.C)\
+    \ [[Appendix D]](https://arxiv.org/pdf/2101.00027.pdf#appendix.1.D).\n"
   sample:
     - '...pot trending topics and the coverage around them. First up, there’s a
       bit of a visual redesign. Previously, clicking on a trending topic would highlight
@@ -31,287 +64,204 @@
       the defendants urge that this Court should reconsider the case of Green Mountain
       Marble Co. v. Highway Board, supra, and follow the Federal practice of looking
       to the evide
-  analysis: >
-    Analyses of the data's composition, document statistics,
-    language/dialectal coverage, topical distribution, and biases are
-    conducted are conducted in the paper
-    [[The Pile Paper]](https://arxiv.org/pdf/2101.00027.pdf).
-  # Construction
-  dependencies: []
-  license:
-    value: MIT
-    explanation: As indicated in the paper.
-  included: >
-    The Pile data come from 22 sources, with over half of the data being from
-    Common Crawl (Pile-CC; 227GB), fiction and nonfiction books (Books3; 101GB),
-    biomedical articles (PubMed Central; 90GB), and code (Github; 95 GB).
-    Refer to the paper for full decomposition
-    [[Table 1]](https://arxiv.org/pdf/2101.00027.pdf#table.caption.2).
-  excluded: >
-    Authors report that they have excluded some datasets "because they were too
-    small to be worth spending time or because the English component of the data
-    did not merit inclusion on its own. Three datasets were excluded for other
-    reasons: (1) US Congressional Records were excluded because it "reflects the
-    opinions and biases of the political class over the past 200 years,
-    including segregationism and xenophobia." (2) Online Fanfiction resources
-    amounting to Hundreds of GiB were excluded on logistical grounds.
-    (3) Literotica, platform where users can upload short-form erotic fiction,
-    was excluded because the authors decided to exclude fanfiction, the
-    corpus would require significant investigation, and corpus contain
-    significant amount of stereotyping
-    [[Appendix B]](https://arxiv.org/pdf/2101.00027.pdf).
-  quality_control: >
-    In addition to the data inclusion and exclusion decisions, the quality was
-    controlled through filtering for English (pycld2 language classifier),
-    filtering for documents similar to OpenWebText2 (classifier on CommonCrawl),
-    and several forms of deduplication as detailed in the paper
-    [[Appendix C]](https://arxiv.org/pdf/2101.00027.pdf#appendix.1.C)
-    [[Appendix D]](https://arxiv.org/pdf/2101.00027.pdf#appendix.1.D).
-  # Downstream
-  access:
+  size: 825 GB
+  type: dataset
+  url: https://arxiv.org/pdf/2101.00027.pdf
+- access:
+    explanation: "The model can be downloaded for free from [[The Eye]](https://mystic.the-eye.eu/public/AI/GPT-J-6B/step_383500.tar.zstd)\n"
     value: open
-    explanation: >
-      The dataset is freely available to the public and
-      can be downloaded from The Eye
-      [[The Pile]](https://mystic.the-eye.eu/public/AI/pile/).
-  intended_uses: >
-    The Pile was intended to be used as a high quality large text dataset for
-    language modeling tasks, explained in more detail in the paper
-    [[Section 1]](https://arxiv.org/pdf/2101.00027.pdf#section.1).
-  prohibited_uses: none
-  monitoring: none
-  feedback: >
-    Feedback can be given by emailing the authors at contact at eleuther.ai.
-
-- type: model
-  name: GPT-J
-  organization: EleutherAI
-  description: GPT-J is an open-source autoregressive language model.
+  analysis: ''
   created_date:
-    value: 2021-06-04
     explanation: Date model blog post was published
-  url: https://arankomatsuzaki.wordpress.com/2021/06/04/gpt-j/
-  model_card: ''
-  modality: text (English)
-  analysis: ''
-  size: 6B parameters (dense model)
+    value: 2021-06-04
   dependencies: [The Pile]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: 'TRC (Unspecified # of TPU v3-8s)'
-  quality_control: ''
-  access:
-    value: open
-    explanation: >
-      The model can be downloaded for free from [[The Eye]](https://mystic.the-eye.eu/public/AI/GPT-J-6B/step_383500.tar.zstd)
+  description: GPT-J is an open-source autoregressive language model.
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "As indicated in the [[Github repository]](https://github.com/kingoflolz/mesh-transformer-jax)\n"
     value: Apache 2.0
-    explanation: >
-      As indicated in the [[Github repository]](https://github.com/kingoflolz/mesh-transformer-jax)
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: text (English)
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: GPT-Neo
+  name: GPT-J
   organization: EleutherAI
-  description: ''
-  created_date:
-    value: 2021-03-21
-    explanation: Date Github repo was update
-  url: https://github.com/EleutherAI/gpt-neo
-  model_card: ''
-  modality: Text (English)
-  analysis: ''
-  size: 2.7B parameters (dense model)
-  dependencies: [The Pile]
+  prohibited_uses: ''
+  quality_control: ''
+  size: 6B parameters (dense)
   training_emissions: ''
+  training_hardware: 'TRC (Unspecified # of TPU v3-8s)'
   training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access:
+  type: model
+  url: https://arankomatsuzaki.wordpress.com/2021/06/04/gpt-j/
+- access:
+    explanation: "The model can be downloaded for free from [[The Eye]](https://mystic.the-eye.eu/public/AI/gptneo-release/GPT3_2-7B/)\n"
     value: open
-    explanation: >
-      The model can be downloaded for free from [[The Eye]](https://mystic.the-eye.eu/public/AI/gptneo-release/GPT3_2-7B/)
-  license: MIT
+  analysis: ''
+  created_date:
+    explanation: Date Github repo was update
+    value: 2021-03-21
+  dependencies: [The Pile]
+  description: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: MIT
+  modality:
+    explanation: Text (English)
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: GPT-NeoX
-  # General
+  name: GPT-Neo
   organization: EleutherAI
-  description: >
-    GPT-NeoX (20B) is an open-sourced autoregressive language model.
+  prohibited_uses: ''
+  quality_control: ''
+  size: 2.7B parameters (dense)
+  training_emissions: ''
+  training_hardware: ''
+  training_time: ''
+  type: model
+  url: https://github.com/EleutherAI/gpt-neo
+- access:
+    explanation: "The model can be downloaded for free The Eye [[GPT-NeoX-20B]](https://mystic.the-eye.eu/public/AI/models/GPT-NeoX-20B/).\n"
+    value: open
+  analysis: "The model was evaluated on standard NLP benchmarks: LAMBADA, ANLI,\
+    \ HellaSwag, MMLU among others [[Section 4]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#section.4).\n"
   created_date: 2022-02-02
-  url: http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf
-  model_card: https://mystic.the-eye.eu/public/AI/models/GPT-NeoX-20B/20B_model_card.md
-  modality: text (English) and code
-  size: 20B parameters (dense model)
-  analysis: >
-    The model was evaluated on standard NLP benchmarks: LAMBADA, ANLI,
-    HellaSwag, MMLU among others
-    [[Section 4]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#section.4).
-  # Construction
   dependencies: [The Pile]
+  description: "GPT-NeoX (20B) is an open-sourced autoregressive language model.\n"
+  feedback: "Feedback can be provided using the  # 20b channel in EleutherAI Discord\
+    \ group [[EleutherAI Blog Post]](https://blog.eleuther.ai/announcing-20b/).\
+    \ Find the Discord link in the FAQ page [[FAQ]](https://www.eleuther.ai/faq/).\n"
+  intended_uses: "As stated in the model card: \"GPT-NeoX-20B learns an inner representation\
+    \ of the English language that can be used to extract features useful for downstream\
+    \ tasks. The model is best at what it was pretrained for however, which is generating\
+    \ text from a prompt. Due to the generality of the pretraining set, it has acquired\
+    \ the ability to generate completions across a wide range of tasks - from programming\
+    \ to fiction writing [[Model Card]](https://mystic.the-eye.eu/public/AI/models/GPT-NeoX-20B/20B_model_card.md).\"\
+    \n"
+  license:
+    explanation: "As indicated in the accompanying blog post [[EleutherAI Blog Post]](https://blog.eleuther.ai/announcing-20b/).\n"
+    value: Apache 2.0
+  modality:
+    explanation: text (English) and code
+    value: code, text; code, text
+  model_card: https://mystic.the-eye.eu/public/AI/models/GPT-NeoX-20B/20B_model_card.md
+  monitoring: none
+  name: GPT-NeoX
+  organization: EleutherAI
+  prohibited_uses: none
+  quality_control: none
+  size: 20B parameters (dense)
   training_emissions:
+    explanation: "The amount of emission during the development and training of\
+      \ the model based on the author's estimation [[Section 6.4]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#subsection.6.4).\n"
     value: 31.73 tCO2e
-    explanation: >
-      The amount of emission during the development and training of
-      the model based on the author's estimation
-      [[Section 6.4]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#subsection.6.4).
-  training_time:
-    value: 47.10 petaflop/s-day
-    explanation: >
-      Training time was reported as 1830 hours reported by the authors, equaling
-      76.25 days.
-      [[Section 6.4]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#subsection.6.4).
-      The authors report that 96 (12 * 8) A100 GPUs were used during the
-      training.
-      The A100 GPUs have a single precision performance of 0.0195 petaflops
-      [[A100 Datasheet]](https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf).
-      Assuming the estimated utilization is 33%, following
-      [[OpenAI AI and Computer Blog]](https://openai.com/blog/ai-and-compute/#addendum),
-      the training time is 47.10 petaflop/s-day (76.25 * 96 * 0.0195 * 0.33).
   training_hardware:
+    explanation: "As outline by the authors [[Section 2.3]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#subsection.2.3)\n"
     value: 12 x 8 A100 GPUs
-    explanation: >
-      As outline by the authors
-      [[Section 2.3]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#subsection.2.3)
-  quality_control: none
-  # Downstream
-  access:
-    value: open
-    explanation: >
-      The model can be downloaded for free The Eye
-      [[GPT-NeoX-20B]](https://mystic.the-eye.eu/public/AI/models/GPT-NeoX-20B/).
-  license:
-    value: Apache 2.0
-    explanation: >
-      As indicated in the accompanying blog post
-      [[EleutherAI Blog Post]](https://blog.eleuther.ai/announcing-20b/).
-  intended_uses: >
-    As stated in the model card: "GPT-NeoX-20B learns an inner representation
-    of the English language that can be used to extract features useful for
-    downstream tasks. The model is best at what it was pretrained for however,
-    which is generating text from a prompt.
-    Due to the generality of the pretraining set, it has acquired the ability
-    to generate completions across a wide range of tasks - from programming to
-    fiction writing
-    [[Model Card]](https://mystic.the-eye.eu/public/AI/models/GPT-NeoX-20B/20B_model_card.md)."
-  prohibited_uses: none
-  monitoring: none
-  feedback: >
-    Feedback can be provided using the  # 20b channel in EleutherAI Discord
-    group
-    [[EleutherAI Blog Post]](https://blog.eleuther.ai/announcing-20b/).
-    Find the Discord link in the FAQ page
-    [[FAQ]](https://www.eleuther.ai/faq/).
-
-- type: application
-  name: GooseAI API
-  # General
-  organization: GooseAI
-  description: >
-    GooseAI API is an API service providing access to NLP services.
+  training_time:
+    explanation: "Training time was reported as 1830 hours reported by the authors,\
+      \ equaling 76.25 days. [[Section 6.4]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#subsection.6.4).\
+      \ The authors report that 96 (12 * 8) A100 GPUs were used during the training.\
+      \ The A100 GPUs have a single precision performance of 0.0195 petaflops [[A100\
+      \ Datasheet]](https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf).\
+      \ Assuming the estimated utilization is 33%, following [[OpenAI AI and Computer\
+      \ Blog]](https://openai.com/blog/ai-and-compute/#addendum), the training time\
+      \ is 47.10 petaflop/s-day (76.25 * 96 * 0.0195 * 0.33).\n"
+    value: 47.10 petaflop/s-day
+  type: model
+  url: http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf
+- access:
+    explanation: "GooseAI API can be accessed by signing up on the goose.ai website.\n"
+    value: limited
+  adaptation: unknown
   created_date: unknown
-  url: goose.ai
-  # Construction
   dependencies: [GPT-NeoX]
-  adaptation: unknown
-  output_space:
-    value: Text Generation, Text Completion
-    explanation: >
-      Question/Answer and Classification tasks are coming soon according to
-      GooseAI
-      [[Main Page]](goose.ai).
-  quality_control: unknown
-  # Downstream
-  access:
-    value: limited
-    explanation: >
-      GooseAI API can be accessed by signing up on the goose.ai website.
+  description: "GooseAI API is an API service providing access to NLP services.\n"
+  failures: unknown
+  feedback:
+    explanation: "In the \"Error Reporting and Feedback\" section of the Goose.ai\
+      \ Terms of Service, GooseAI asks all the feedback to be sent to support at\
+      \ goose.ai [[GooseAI Terms of Service]](https://goose.ai/docs/tos).\n"
+    value: Email support
+  intended_uses: "Intended to be used as an NLP infrastructure.\n"
   license:
+    explanation: "\"Subject to Customer’s strict compliance with this TOS, GooseAI\
+      \ grants Customer a limited, non-exclusive, non-transferable, non-sublicensable,\
+      \ revocable license to access and use the Platform as described in and subject\
+      \ to this TOS\" - excerpt from the Terms of Service document.\n"
     value: custom
-    explanation: >
-      "Subject to Customer’s strict compliance with this TOS, GooseAI grants
-      Customer a limited, non-exclusive, non-transferable, non-sublicensable,
-      revocable license to access and use the Platform as described in and
-      subject to this TOS" - excerpt from the Terms of Service document.
-  terms_of_service: https://goose.ai/docs/tos
-  intended_uses: >
-    Intended to be used as an NLP infrastructure.
-  prohibited_uses:
-    value: >
-      Illegal or abusive activity, security violations, network abuse
-    explanation: >
-      Prohibited uses are detailed in the Acceptable Use Policy
-      [[GooseAI Acceptable Use Policy]](https://goose.ai/docs/aup).
   monitoring:
+    explanation: "In the \"GooseAI Monitoring and Enforcement\" section of GooseAI's\
+      \ Acceptable Use Policy (AUP), it is stated that Goose.AI has the right to\
+      \ investigate any suspected violation of its AUP [[GooseAI Acceptable Use\
+      \ Policy]](https://goose.ai/docs/aup).\n"
     value: At will monitoring by the provider
-    explanation: >
-      In the "GooseAI Monitoring and Enforcement" section of GooseAI's
-      Acceptable Use Policy (AUP), it is stated that Goose.AI has the right to
-      investigate any suspected violation of its AUP
-      [[GooseAI Acceptable Use Policy]](https://goose.ai/docs/aup).
-  feedback:
-    value: Email support
-    explanation: >
-      In the "Error Reporting and Feedback" section of the Goose.ai Terms of
-      Service, GooseAI asks all the feedback to be sent to support at goose.ai
-      [[GooseAI Terms of Service]](https://goose.ai/docs/tos).
-  # Deployment
   monthly_active_users: unknown
+  name: GooseAI API
+  organization: GooseAI
+  output_space:
+    explanation: "Question/Answer and Classification tasks are coming soon according\
+      \ to GooseAI [[Main Page]](goose.ai).\n"
+    value: Text Generation, Text Completion
+  prohibited_uses:
+    explanation: "Prohibited uses are detailed in the Acceptable Use Policy [[GooseAI\
+      \ Acceptable Use Policy]](https://goose.ai/docs/aup).\n"
+    value: "Illegal or abusive activity, security violations, network abuse\n"
+  quality_control: unknown
+  terms_of_service: https://goose.ai/docs/tos
+  type: application
+  url: goose.ai
   user_distribution: unknown
-  failures: unknown
-
-- type: model
-  name: VQGAN-CLIP
-  organization: EleutherAI
-  description: VQGAN-CLIP is a model that better generates and edits images using
-    a multimodal encoder to guide image generation.
-  created_date: 2022-09-04
-  url: https://arxiv.org/pdf/2204.08583.pdf
-  model_card: ''
-  modality: text input with image output
+- access: open
   analysis: Evaluated by human testers rating alignment of text input, image output
     pairs.
-  size: 227M parameters
+  created_date: 2022-09-04
   dependencies: [VQGAN, CLIP]
+  description: VQGAN-CLIP is a model that better generates and edits images using
+    a multimodal encoder to guide image generation.
+  feedback: ''
+  intended_uses: ''
+  license: MIT
+  modality:
+    explanation: text input with image output
+    value: text; image
+  model_card: ''
+  monitoring: ''
+  name: VQGAN-CLIP
+  organization: EleutherAI
+  prohibited_uses: ''
+  quality_control: ''
+  size: 227M parameters (dense)
   training_emissions: unknown
-  training_time: Less than 1 V100-hour
   training_hardware: 1 NVIDIA Tesla K80 GPU
-  quality_control: ''
-  access: open
-  license: MIT
+  training_time: Less than 1 V100-hour
+  type: model
+  url: https://arxiv.org/pdf/2204.08583.pdf
+- access: open
+  analysis: Evaluated on a variety of NLP benchmarks and found to perform similarly
+    to OPT and BLOOM models.
+  created_date: 2023-05-31
+  dependencies: [The Pile]
+  description: A suite of 16 LLMs all trained on public data seen in the exact same
+    order and ranging in size from 70M to 12B parameters
+  feedback: https://huggingface.co/EleutherAI/pythia-6.9b/discussions
   intended_uses: ''
-  prohibited_uses: ''
+  license: Apache 2.0
+  modality:
+    explanation: natural language text
+    value: text; text
+  model_card: https://huggingface.co/EleutherAI/pythia-12b
   monitoring: ''
-  feedback: ''
-
-- type: model
   name: Pythia
   organization: Eleuther AI
-  description: A suite of 16 LLMs all trained on public data seen in the exact same
-    order and ranging in size from 70M to 12B parameters
-  created_date: 2023-05-31
-  url: https://arxiv.org/pdf/2304.01373.pdf
-  model_card: https://huggingface.co/EleutherAI/pythia-12b
-  modality: natural language text
-  analysis: Evaluated on a variety of NLP benchmarks and found to perform similarly
-    to OPT and BLOOM models.
-  size: 12B parameters
-  dependencies: [The Pile]
+  prohibited_uses: ''
+  quality_control: ''
+  size: 12B parameters (dense)
   training_emissions: ''
-  training_time: ''
   training_hardware: 64 A100 GPUs
-  quality_control: ''
-  access: open
-  license: Apache 2.0
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: https://huggingface.co/EleutherAI/pythia-6.9b/discussions
+  training_time: ''
+  type: model
+  url: https://arxiv.org/pdf/2304.01373.pdf
diff --git a/assets/ens.yaml b/assets/ens.yaml
index 5209fe3d..b728b918 100644
--- a/assets/ens.yaml
+++ b/assets/ens.yaml
@@ -1,32 +1,37 @@
 ---
-
-- type: dataset
-  name: HowTo100M
-  organization: École Normale Supérieure, Inria
-  description: HowTo100M is a large-scale dataset of narrated videos with an emphasis
-    on instructional videos where content creators teach complex tasks with an explicit
-    intention of explaining the visual content on screen. HowTo100M features a total
-    of 136M video clips with captions sourced from 1.2M Youtube videos (15 years
-    of video) and 23k activities from domains such as cooking, hand crafting, personal
-    care, gardening or fitness.
-  created_date:
-    value: 2019-06-07
-    explanation: The date the [[paper]](https://arxiv.org/abs/1906.03327) was submitted.
-  url: https://arxiv.org/pdf/1906.03327.pdf
-  datasheet: https://arxiv.org/pdf/1906.03327.pdf
-  modality: text (English) and video
-  size: 136M video clips
-  sample: []
+- access:
+    explanation: Dataset, evaluation code and models are publicly available at the
+      [[HowTo100M dataset webpage]](https://www.di.ens.fr/willow/research/howto100m/).
+    value: open
   analysis:
+    explanation: See [[Experiments]](https://arxiv.org/pdf/1906.03327v2.pdf#section.5)
     value: Authors use the dataset to learn a joint text-video embedding by leveraging
       more than 130M video clip-caption pairs. They then evaluate the learned embeddings
       on the tasks of localizing steps in instructional videos of CrossTask and
       textbased video retrieval on YouCook2, MSR-VTT and LSMDC datasets. They show
       that their learned embedding can perform better compared to models trained
       on existing carefully annotated but smaller video description datasets.
-    explanation: See [[Experiments]](https://arxiv.org/pdf/1906.03327v2.pdf#section.5)
+  created_date:
+    explanation: The date the [[paper]](https://arxiv.org/abs/1906.03327) was submitted.
+    value: 2019-06-07
+  datasheet: https://arxiv.org/pdf/1906.03327.pdf
   dependencies: [YouTube]
+  description: HowTo100M is a large-scale dataset of narrated videos with an emphasis
+    on instructional videos where content creators teach complex tasks with an explicit
+    intention of explaining the visual content on screen. HowTo100M features a total
+    of 136M video clips with captions sourced from 1.2M Youtube videos (15 years
+    of video) and 23k activities from domains such as cooking, hand crafting, personal
+    care, gardening or fitness.
+  excluded:
+    explanation: See [[Data collection]](https://arxiv.org/pdf/1906.03327v2.pdf#subsection.3.1)
+    value: Categories such as Relationships and Finance and Business, that may be
+      more abstract, are excluded. Videos with less than 100 views are removed.
+      Authors also ignore videos that have less than 100 words. Videos longer than
+      2,000 seconds are removed. As some videos may appear in several tasks, the
+      videos are deduplicated based on YouTube IDs.
+  feedback: ''
   included:
+    explanation: See [[Data collection]](https://arxiv.org/pdf/1906.03327v2.pdf#subsection.3.1)
     value: The dataset features 1.22 million videos from YouTube with a primary
       focus on videos containing "visual tasks", that involve some interaction with
       the physical world (e.g. Making peanut butter, Pruning a tree) as compared
@@ -38,29 +43,22 @@
       Arts and Entertainment, Computers and Electronics). They also restrict to
       the top 200 YouTube search results, as the latter ones may not be related
       to the query task.
-    explanation: See [[Data collection]](https://arxiv.org/pdf/1906.03327v2.pdf#subsection.3.1)
-  excluded:
-    value: Categories such as Relationships and Finance and Business, that may be
-      more abstract, are excluded. Videos with less than 100 views are removed.
-      Authors also ignore videos that have less than 100 words. Videos longer than
-      2,000 seconds are removed. As some videos may appear in several tasks, the
-      videos are deduplicated based on YouTube IDs.
-    explanation: See [[Data collection]](https://arxiv.org/pdf/1906.03327v2.pdf#subsection.3.1)
-  quality_control: ''
-  access:
-    value: open
-    explanation: Dataset, evaluation code and models are publicly available at the
-      [[HowTo100M dataset webpage]](https://www.di.ens.fr/willow/research/howto100m/).
-  license: Apache 2.0
   intended_uses: ''
+  license: Apache 2.0
+  modality: {}
+  monitoring: ''
+  name: HowTo100M
+  organization: École Normale Supérieure, Inria
   prohibited_uses:
-    value: >
-      No uses are explicitly prohibited by the authors. They note the following
-      limitations of the dataset: "We note that the distribution of identities and
-      activities in the HowTo100M dataset may not be representative of the global
-      human population and the diversity in society. Please be careful of unintended
-      societal, gender, racial and other biases when training or deploying models
-      trained on this data."
     explanation: See [[HowTo100M dataset webpage]](https://www.di.ens.fr/willow/research/howto100m/)
-  monitoring: ''
-  feedback: ''
+    value: "No uses are explicitly prohibited by the authors. They note the following\
+      \ limitations of the dataset: \"We note that the distribution of identities\
+      \ and activities in the HowTo100M dataset may not be representative of the\
+      \ global human population and the diversity in society. Please be careful\
+      \ of unintended societal, gender, racial and other biases when training or\
+      \ deploying models trained on this data.\"\n"
+  quality_control: ''
+  sample: []
+  size: 136M video clips
+  type: dataset
+  url: https://arxiv.org/pdf/1906.03327.pdf
diff --git a/assets/faraday.yaml b/assets/faraday.yaml
index 7c986cb4..a1af0488 100644
--- a/assets/faraday.yaml
+++ b/assets/faraday.yaml
@@ -1,27 +1,26 @@
 ---
-
-- type: application
-  name: ARES
-  organization: Faraday Lab
-  description: ARES is a text-to-image generator based on Stable Diffusion. The
-    goal is to provide a simple tool with a user interface allowing mainstream AI
-    access for artists and creators.
+- access: open
+  adaptation: ''
   created_date:
-    value: 2023-04-26
     explanation: The date Faraday Lab's hugging face card was last updated. Date
       for ARES release is not published on site.
-  url: https://faradaylab.fr/
+    value: 2023-04-26
   dependencies: [Stable Diffusion]
-  adaptation: ''
-  output_space: generated images
-  quality_control: ''
-  access: open
-  license: unknown
-  terms_of_service: ''
+  description: ARES is a text-to-image generator based on Stable Diffusion. The
+    goal is to provide a simple tool with a user interface allowing mainstream AI
+    access for artists and creators.
+  failures: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: unknown
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
+  name: ARES
+  organization: Faraday Lab
+  output_space: generated images
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: ''
+  type: application
+  url: https://faradaylab.fr/
   user_distribution: ''
-  failures: ''
diff --git a/assets/google.yaml b/assets/google.yaml
index fb56a8dc..42aab482 100644
--- a/assets/google.yaml
+++ b/assets/google.yaml
@@ -1,100 +1,71 @@
 ---
-
-# Datasets
-- type: dataset
-  name: C4
-  # General
-  organization: Google
-  description: The Colossal Clean Crawled Corpus (C4) is a processed version of
-    Common Crawl to facilitate transfer learning in NLP.
+- access:
+    explanation: https://huggingface.co/datasets/c4
+    value: open
+  analysis: https://arxiv.org/abs/2104.08758
   created_date:
-    value: 2019-10-23
     explanation: The date the T5 paper was made public.
-  url: https://arxiv.org/abs/1910.10683
+    value: 2019-10-23
   datasheet: https://huggingface.co/datasets/c4
-  modality: Text (English)
-  size: 750GB
-  sample: [https://huggingface.co/datasets/c4/viewer/en/train]
-  analysis: https://arxiv.org/abs/2104.08758
-  # Construction
   dependencies: [CommonCrawl]
-  license: ODC-By 1.0
+  description: The Colossal Clean Crawled Corpus (C4) is a processed version of
+    Common Crawl to facilitate transfer learning in NLP.
+  excluded: "Data was filtered for English using langdetect. Further, data was filtered\
+    \ to end in terminal punctuation, to remove short pages (less than 5 sentences),\
+    \ and to remove \"Dirty, Naughty, Obscene or Otherwise Bad Words\".\n"
+  feedback: https://huggingface.co/datasets/c4/discussions
   included: none
-  excluded: >
-    Data was filtered for English using langdetect. Further, data was filtered
-    to end in terminal punctuation, to remove short pages (less than 5 sentences),
-    and to remove "Dirty, Naughty, Obscene or Otherwise Bad Words".
-  quality_control: >
-    Data filtering excluded obscene words from a block list as well
-    as short documents and some deduplication was done based on string overlap.
-  # Downstream
-  access:
-    value: open
-    explanation: https://huggingface.co/datasets/c4
   intended_uses: To faciliate transfer learning research in NLP.
-  prohibited_uses: none
+  license: ODC-By 1.0
+  modality: {}
   monitoring: none
-  feedback: https://huggingface.co/datasets/c4/discussions
-
-- type: dataset
-  name: Internal Google BERT dataset
-  # General
+  name: C4
   organization: Google
-  description: >
-    The dataset used to train Internal Google BERT models.
+  prohibited_uses: none
+  quality_control: "Data filtering excluded obscene words from a block list as well\
+    \ as short documents and some deduplication was done based on string overlap.\n"
+  sample: [https://huggingface.co/datasets/c4/viewer/en/train]
+  size: 750GB
+  type: dataset
+  url: https://arxiv.org/abs/1910.10683
+- access: closed
+  analysis: unknown
   created_date:
+    explanation: "The date of the Google product update blog announcing that BERT\
+      \ models were for ranking and featured snippets in Search.\n"
     value: 2019-11-25
-    explanation: >
-      The date of the Google product update blog announcing that BERT models
-      were for ranking and featured snippets in Search.
-  url: https://blog.google/products/search/search-language-understanding-bert/
   datasheet: none
-  modality: text
-  size: unknown
-  sample: []
-  analysis: unknown
-  # Construction
   dependencies: []
-  license:
-    value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
+  description: "The dataset used to train Internal Google BERT models.\n"
+  excluded: unknown
+  feedback: unknown
   included:
+    explanation: "Although we don't exactly know the contents of the Internal Google\
+      \ BERT dataset, it likely includes contents from web pages and search queries.\n"
     value: Web pages, and search queries
-    explanation: >
-      Although we don't exactly know the contents of the Internal Google BERT
-      dataset, it likely includes contents from web pages and search queries.
-  excluded: unknown
-  quality_control: unknown
-  # Downstream
-  access: closed
   intended_uses:
+    explanation: "We don't have an exhaustive list of the intended use cases for\
+      \ the Internal Google BERT dataset, but we know that BERT was used in Google\
+      \ Search.\n"
     value: unknown
-    explanation: >
-      We don't have an exhaustive list of the intended use cases for the
-      Internal Google BERT dataset, but we know that BERT was used in Google
-      Search.
-  prohibited_uses: unknown
+  license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
+    value: unknown
+  modality: {}
   monitoring: unknown
-  feedback: unknown
-
-- type: dataset
-  name: Conceptual Captions
-  # General
+  name: Internal Google BERT dataset
   organization: Google
-  description: >
-    A dataset containing 3 million (image-URL, caption) pairs designed for the
-    training and evaluation of machine learned image captioning systems.
-  created_date:
-    value: 2018-07-01
-    explanation: Due to the lack of information about the exact date, it is assumed
-      to be the 1st of the known month of creation.
-  url: https://aclanthology.org/P18-1238/
-  datasheet: none
-  modality: text(English) and image
-  size: 3.3M (image, text) pairs
+  prohibited_uses: unknown
+  quality_control: unknown
   sample: []
+  size: unknown
+  type: dataset
+  url: https://blog.google/products/search/search-language-understanding-bert/
+- access:
+    explanation: Dataset can be downloaded at [[Download]](https://ai.google.com/research/ConceptualCaptions/download)
+    value: open
   analysis:
+    explanation: See [[Experimental Results]](https://aclanthology.org/P18-1238.pdf#section.5)
     value: Authors evaluate the dataset on two image captioning models - RNN-based
       model and Transformer model, under two experimental conditions - using the
       training & development sets provided by the MS COCO dataset, versus training
@@ -104,708 +75,631 @@
       evaluations indicate that the Conceptual-based models are superior. Automatic
       models fail to corroborate the human evaluation results. This highlights the
       weakness of these automatic metrics.
-    explanation: See [[Experimental Results]](https://aclanthology.org/P18-1238.pdf#section.5)
-  # Construction
+  created_date:
+    explanation: Due to the lack of information about the exact date, it is assumed
+      to be the 1st of the known month of creation.
+    value: 2018-07-01
+  datasheet: none
   dependencies: []
-  license:
-    value: Conceptual Captions License
-    explanation: >
-      [[Conceptual Captions License]](https://github.com/google-research-datasets/conceptual-captions/blob/master/LICENSE)
-  included: ''
+  description: "A dataset containing 3 million (image-URL, caption) pairs designed\
+    \ for the training and evaluation of machine learned image captioning systems.\n"
   excluded:
-    value: |
-      The following filtering steps are applied in the given order:
-      1. Image-based Filtering - "It only keeps JPEG images where both dimensions are greater than 400 pixels, and the ratio of larger to smaller dimension is no more than 2. It excludes images that trigger pornography or profanity detectors. These filters discard more than 65% of the candidates."
-      2. Text-based Filtering - "Candidates with no determiner, no noun, or no preposition are discarded; candidates with a high noun ratio are also discarded; candidates with a high rate of token repetition are discarded; candidates where the first word is not capitalized, or with too high capitalized-word ratio are discarded; we use a vocabulary VW of 1B token types, appearing at least 5 times in the English Wikipedia, and discard candidates that contain tokens that are not found in this vocabulary. candidates that score too high or too low on the polarity annotations, or trigger the pornography/profanity detectors, are discarded; predefined boiler-plate prefix/suffix sequences matching the text are cropped, e.g. “click to enlarge picture”, “stock photo”; we also drop text which begins/ends in certain patterns, e.g. “embedded image permalink”, “profile photo”. These filters only allow around 3% of the incoming candidates to pass to the later stages."
-      3. Image&Text-based Filtering - "We filter out candidates for which none of the text tokens can be mapped to the content of the image. This filter discards around 60% of the incoming candidates."
-      4. Text Transformation with Hypernymization - "Noun modifiers of certain types (proper nouns, numbers, units) are removed; dates, durations, and preposition-based locations (e.g., "in Los Angeles") are removed; named-entities are identified, matched against the KG entries, and substitute with their hypernym; resulting coordination noun-phrases with the same head (e.g., "actor and actor") are resolved into a single-head, pluralized form (e.g., "actors"). Around 20% of samples are discarded during this transformation. We then cluster all resolved entities (e.g., 2560 "actor", "dog", "neighborhood", etc.) and keep only the candidates for which all detected types have a count of over 100 (around 55% of the candidates)."
-    explanation: See [[Conceptual Captions Dataset Creation]](https://aclanthology.org/P18-1238.pdf#section.3)
-  quality_control:
-    value: Input candidate (image, caption) pairs pass through several stages of
-      filtering and processing to ensure quality.
     explanation: See [[Conceptual Captions Dataset Creation]](https://aclanthology.org/P18-1238.pdf#section.3)
-  # Downstream
-  access:
-    value: open
-    explanation: Dataset can be downloaded at [[Download]](https://ai.google.com/research/ConceptualCaptions/download)
-  intended_uses: ''
-  prohibited_uses: unknown
-  monitoring: unknown
+    value: "The following filtering steps are applied in the given order:\n1. Image-based\
+      \ Filtering - \"It only keeps JPEG images where both dimensions are greater\
+      \ than 400 pixels, and the ratio of larger to smaller dimension is no more\
+      \ than 2. It excludes images that trigger pornography or profanity detectors.\
+      \ These filters discard more than 65% of the candidates.\"\n2. Text-based\
+      \ Filtering - \"Candidates with no determiner, no noun, or no preposition\
+      \ are discarded; candidates with a high noun ratio are also discarded; candidates\
+      \ with a high rate of token repetition are discarded; candidates where the\
+      \ first word is not capitalized, or with too high capitalized-word ratio are\
+      \ discarded; we use a vocabulary VW of 1B token types, appearing at least\
+      \ 5 times in the English Wikipedia, and discard candidates that contain tokens\
+      \ that are not found in this vocabulary. candidates that score too high or\
+      \ too low on the polarity annotations, or trigger the pornography/profanity\
+      \ detectors, are discarded; predefined boiler-plate prefix/suffix sequences\
+      \ matching the text are cropped, e.g. “click to enlarge picture”, “stock photo”;\
+      \ we also drop text which begins/ends in certain patterns, e.g. “embedded\
+      \ image permalink”, “profile photo”. These filters only allow around 3% of\
+      \ the incoming candidates to pass to the later stages.\"\n3. Image&Text-based\
+      \ Filtering - \"We filter out candidates for which none of the text tokens\
+      \ can be mapped to the content of the image. This filter discards around 60%\
+      \ of the incoming candidates.\"\n4. Text Transformation with Hypernymization\
+      \ - \"Noun modifiers of certain types (proper nouns, numbers, units) are removed;\
+      \ dates, durations, and preposition-based locations (e.g., \"in Los Angeles\"\
+      ) are removed; named-entities are identified, matched against the KG entries,\
+      \ and substitute with their hypernym; resulting coordination noun-phrases\
+      \ with the same head (e.g., \"actor and actor\") are resolved into a single-head,\
+      \ pluralized form (e.g., \"actors\"). Around 20% of samples are discarded\
+      \ during this transformation. We then cluster all resolved entities (e.g.,\
+      \ 2560 \"actor\", \"dog\", \"neighborhood\", etc.) and keep only the candidates\
+      \ for which all detected types have a count of over 100 (around 55% of the\
+      \ candidates).\"\n"
   feedback: Feedback can be provided by creating an issue in the [[Conceptual Captions
     GitHub repository]](https://github.com/google-research-datasets/conceptual-captions)
     or by emailing at conceptual-captions at google.com
-
-- type: dataset
-  name: Conceptual 12M
-  # General
+  included: ''
+  intended_uses: ''
+  license:
+    explanation: "[[Conceptual Captions License]](https://github.com/google-research-datasets/conceptual-captions/blob/master/LICENSE)\n"
+    value: Conceptual Captions License
+  modality: {}
+  monitoring: unknown
+  name: Conceptual Captions
   organization: Google
-  description: >
-    A dataset with 12 million image-text pairs specifically meant to be used for
-    vision-and-language pre-training.
-  created_date:
-    value: 2021-02-17
-    explanation: >
-      The date the [[paper]](https://arxiv.org/abs/2102.08981) was submitted.
-  url: https://arxiv.org/pdf/2102.08981.pdf
-  datasheet: none
-  modality: text (English) and image
-  size: 12M (image, text) pairs
+  prohibited_uses: unknown
+  quality_control:
+    explanation: See [[Conceptual Captions Dataset Creation]](https://aclanthology.org/P18-1238.pdf#section.3)
+    value: Input candidate (image, caption) pairs pass through several stages of
+      filtering and processing to ensure quality.
   sample: []
+  size: 3.3M (image, text) pairs
+  type: dataset
+  url: https://aclanthology.org/P18-1238/
+- access:
+    explanation: Dataset is available at [[Conceptual 12M GitHub repository]](https://github.com/google-research-datasets/conceptual-12m).
+    value: open
   analysis:
-    value: >
-      The dataset is benchmarked against CC3M on two most fundamental V+L
-      tasks: vision-to-language generation and vision-and-language matching, with
-      an emphasis on long-tail visual recognition. The results illustrate the
-      benefit of scaling up pre-training data for vision-and-language tasks, as
-      indicated by the new state-of-the-art results on both the nocaps and Conceptual
-      Captions benchmarks.
     explanation: See [[Evaluating Vision-and-Language PreTraining Data]](https://arxiv.org/pdf/2102.08981.pdf#section.3)
-  # Construction
+    value: "The dataset is benchmarked against CC3M on two most fundamental V+L\
+      \ tasks: vision-to-language generation and vision-and-language matching, with\
+      \ an emphasis on long-tail visual recognition. The results illustrate the\
+      \ benefit of scaling up pre-training data for vision-and-language tasks, as\
+      \ indicated by the new state-of-the-art results on both the nocaps and Conceptual\
+      \ Captions benchmarks.\n"
+  created_date:
+    explanation: "The date the [[paper]](https://arxiv.org/abs/2102.08981) was submitted.\n"
+    value: 2021-02-17
+  datasheet: none
   dependencies: []
-  license:
-    value: Conceptual Captions License
-    explanation: >
-      [[Conceptual Captions License]](https://github.com/google-research-datasets/conceptual-captions/blob/master/LICENSE)
-  included: ''
+  description: "A dataset with 12 million image-text pairs specifically meant to\
+    \ be used for vision-and-language pre-training.\n"
   excluded:
-    value: |
-      Some of the filtering steps used in the preparation of Conceptual Captions dataset are relaxed to trade off high-recall for low-precision. The following steps are applied in the given order:
-      1. Image-based Filtering - Only keep JPEG images where both dimensions are greater than 400 pixels, and the ratio of larger to smaller dimension is no more than 2.5. Exclude images that trigger pornography or profanity detectors.
-      2. Text-based Filtering - Allow text between 3 and 256 words in the alt-text. Discard candidates with no noun or no determiner, but permit ones without prepositions. Set the maximum fraction of word repetition allowed to 0.2. Increase the threshold for counting a word type as rare from 5 to 20.
-      3. Image&Text-based Filtering - Filter out candidates for which none of the text tokens can be mapped to the content of the image.
     explanation: See [[Relaxing filters for higher recall]](https://arxiv.org/pdf/2102.08981.pdf#subsection.2.2)
+    value: "Some of the filtering steps used in the preparation of Conceptual Captions\
+      \ dataset are relaxed to trade off high-recall for low-precision. The following\
+      \ steps are applied in the given order:\n1. Image-based Filtering - Only keep\
+      \ JPEG images where both dimensions are greater than 400 pixels, and the ratio\
+      \ of larger to smaller dimension is no more than 2.5. Exclude images that\
+      \ trigger pornography or profanity detectors.\n2. Text-based Filtering - Allow\
+      \ text between 3 and 256 words in the alt-text. Discard candidates with no\
+      \ noun or no determiner, but permit ones without prepositions. Set the maximum\
+      \ fraction of word repetition allowed to 0.2. Increase the threshold for counting\
+      \ a word type as rare from 5 to 20.\n3. Image&Text-based Filtering - Filter\
+      \ out candidates for which none of the text tokens can be mapped to the content\
+      \ of the image.\n"
+  feedback: Feedback can be provided by creating an issue in the [[Conceptual 12M
+    GitHub repository]](https://github.com/google-research-datasets/conceptual-12m)
+    or by emailing at conceptual-captions at google.com
+  included: ''
+  intended_uses: ''
+  license:
+    explanation: "[[Conceptual Captions License]](https://github.com/google-research-datasets/conceptual-captions/blob/master/LICENSE)\n"
+    value: Conceptual Captions License
+  modality:
+    explanation: text (English) and image
+    value: image, text
+  monitoring: unknown
+  name: Conceptual 12M
+  organization: Google
+  prohibited_uses: unknown
   quality_control:
+    explanation: See [[Vision-and-Language Pre-Training Data]](https://arxiv.org/pdf/2102.08981.pdf#section.2)
     value: Input candidate (image, caption) pairs pass through several stages of
       filtering and processing to ensure quality. Person-name substitutions are
       performed in the alt-texts to protect the privacy of individuals in the associated
       images.
-    explanation: See [[Vision-and-Language Pre-Training Data]](https://arxiv.org/pdf/2102.08981.pdf#section.2)
-  # Downstream
-  access:
+  sample: []
+  size: 12M (image, text) pairs
+  type: dataset
+  url: https://arxiv.org/pdf/2102.08981.pdf
+- access:
+    explanation: https://huggingface.co/t5-large
     value: open
-    explanation: Dataset is available at [[Conceptual 12M GitHub repository]](https://github.com/google-research-datasets/conceptual-12m).
-  intended_uses: ''
-  prohibited_uses: unknown
-  monitoring: unknown
-  feedback: Feedback can be provided by creating an issue in the [[Conceptual 12M
-    GitHub repository]](https://github.com/google-research-datasets/conceptual-12m)
-    or by emailing at conceptual-captions at google.com
-
-# Models
-- type: model
-  name: T5
-  organization: Google
-  description: Text-To-Text Transfer Transformer (T5) is a model that unifies all
-    NLP tasks under the text-to-text format.
+  analysis: https://huggingface.co/t5-base#evaluation
   created_date:
-    value: 2019-10-23
     explanation: The date the T5 paper was made public.
-  url: https://arxiv.org/abs/1910.10683
-  model_card: https://huggingface.co/t5-base
-  modality: text (English)
-  size:
-    value: 11B parameters (dense model)
-    explanation: T5 models were trained on several sizes including 3B and 11B parameters.
-  analysis: https://huggingface.co/t5-base#evaluation
+    value: 2019-10-23
   dependencies: [C4]
-  training_emissions: unknown
-  training_time: unknown
-  training_hardware: 1,024 TPU v3 chips (Cloud TPU Pods)
-  quality_control: The T5 paper documents many analyses/ablations that were considered
-    before arriving at the final architecture/training procedure.
-  access:
-    value: open
-    explanation: https://huggingface.co/t5-large
-  license: Apache 2.0
+  description: Text-To-Text Transfer Transformer (T5) is a model that unifies all
+    NLP tasks under the text-to-text format.
+  feedback: https://huggingface.co/t5-large/discussions
   intended_uses: NLP tasks
-  prohibited_uses: unknown
+  license: Apache 2.0
+  modality: {}
+  model_card: https://huggingface.co/t5-base
   monitoring: none
-  feedback: https://huggingface.co/t5-large/discussions
-
-- type: model
-  name: Internal Google BERT
-  # General
+  name: T5
   organization: Google
-  description: >
-    Internal Google BERT model used to power Google Search products.
+  prohibited_uses: unknown
+  quality_control: The T5 paper documents many analyses/ablations that were considered
+    before arriving at the final architecture/training procedure.
+  size: ''
+  training_emissions: unknown
+  training_hardware: 1,024 TPU v3 chips (Cloud TPU Pods)
+  training_time: unknown
+  type: model
+  url: https://arxiv.org/abs/1910.10683
+- access: closed
+  analysis: unknown
   created_date:
+    explanation: "The date of the Google product update blog announcing that BERT\
+      \ models were for ranking and featured snippets in Search.\n"
     value: 2019-11-25
-    explanation: >
-      The date of the Google product update blog announcing that BERT models
-      were for ranking and featured snippets in Search.
-  url: https://blog.google/products/search/search-language-understanding-bert/
-  model_card: unknown
-  modality: text
-  size: unknown
-  analysis: unknown
-  # Construction
   dependencies: [Internal Google BERT dataset]
-  training_emissions: unknown
-  training_time: unknown
-  training_hardware: unknown
-  quality_control: unknown
-  # Downstream
-  access: closed
-  license:
-    value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
+  description: "Internal Google BERT model used to power Google Search products.\n"
+  feedback: unknown
   intended_uses:
+    explanation: "We don't have an exhaustive list of the intended use cases for\
+      \ the Internal Google BERT model, but we know that Google Search was powered\
+      \ by a fine-tuned BERT.\n"
     value: unknown
-    explanation: >
-      We don't have an exhaustive list of the intended use cases for the
-      Internal Google BERT model, but we know that Google Search was powered
-      by a fine-tuned BERT.
-  prohibited_uses: unknown
+  license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
+    value: unknown
+  modality: {}
+  model_card: unknown
   monitoring: unknown
-  feedback: unknown
-
-- type: application
-  name: Google Search
-  # General
+  name: Internal Google BERT
   organization: Google
-  description: >
-    Google Search is Google's search engine.
+  prohibited_uses: unknown
+  quality_control: unknown
+  size: ''
+  training_emissions: unknown
+  training_hardware: unknown
+  training_time: unknown
+  type: model
+  url: https://blog.google/products/search/search-language-understanding-bert/
+- access: open
+  adaptation: unknown
   created_date:
+    explanation: "The date of the Google product update blog announcing that BERT\
+      \ models were for ranking and featured snippets in Search.\n"
     value: 2019-11-25
-    explanation: >
-      The date of the Google product update blog announcing that BERT models
-      were for ranking and featured snippets in Search.
-  url: https://blog.google/products/search/search-language-understanding-bert/
-  # Construction
   dependencies: [Internal Google BERT, MUM]
-  adaptation: unknown
-  output_space: web page ranking
-  quality_control: unknown
-  # Downstream
-  access: open
+  description: "Google Search is Google's search engine.\n"
+  failures: unknown
+  feedback: "Feedback can be sent to Google Feedback using the product interface\
+    \ [[Google Feedback]](https://www.google.com/tools/feedback).\n"
+  intended_uses: Searching the web using text, voice or image
   license:
-    value: none
     explanation: No license was found.
-  terms_of_service: https://policies.google.com/terms
-  intended_uses: Searching the web using text, voice or image
-  prohibited_uses: >
-    Prohibited use cases aren't specifically spelled out for Google search, but
-    several illegal and discouraged use cases are shared in the Respect Others
-    section of the [[Term of Service]](https://policies.google.com/terms).
-  monitoring: >
-    It is implied that Google scan uses of its products for spam,
-    malware and illegal content in the
-    [[Term of Service]](https://policies.google.com/terms).
-  feedback: >
-    Feedback can be sent to Google Feedback using the product interface
-    [[Google Feedback]](https://www.google.com/tools/feedback).
-  # Deployment
+    value: none
+  monitoring: "It is implied that Google scan uses of its products for spam, malware\
+    \ and illegal content in the [[Term of Service]](https://policies.google.com/terms).\n"
   monthly_active_users: unknown
-  user_distribution: unknown
-  failures: unknown
-
-- type: dataset
-  name: Infiniset
-  # General
+  name: Google Search
   organization: Google
-  description: >
-    Infiniset "is a combination of dialog data from public dialog data and
-    other public web documents"
-    [[Appendix E]](https://arxiv.org/pdf/2201.08239.pdf#appendix.E).
+  output_space: web page ranking
+  prohibited_uses: "Prohibited use cases aren't specifically spelled out for Google\
+    \ search, but several illegal and discouraged use cases are shared in the Respect\
+    \ Others section of the [[Term of Service]](https://policies.google.com/terms).\n"
+  quality_control: unknown
+  terms_of_service: https://policies.google.com/terms
+  type: application
+  url: https://blog.google/products/search/search-language-understanding-bert/
+  user_distribution: unknown
+- access: closed
+  analysis: unknown
   created_date:
+    explanation: "The date of the Google company news blog announcing LaMDA [[Google\
+      \ News Blog]](https://blog.google/technology/ai/lamda/).\n"
     value: 2021-06-18
-    explanation: >
-      The date of the Google company news blog announcing LaMDA
-      [[Google News Blog]](https://blog.google/technology/ai/lamda/).
-  url: https://arxiv.org/pdf/2201.08239.pdf
   datasheet: none
-  modality: text and code
-  size:
-    value: unknown
-    explanation: >
-      The size of the dataset is unclear, but it is reported that the dataset
-      "consists of 2.97B documents and 1.12B dialogs with 13.39B utterances"
-      [[Appendix E]](https://arxiv.org/pdf/2201.08239.pdf#appendix.E).
-  sample: []
-  analysis: unknown
-  # Construction
   dependencies: []
-  license:
-    value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  included: >
-    Included in the dataset are data from "public forums (0%); C4 data (12.5% );
-    code documents from sites related to programming like Q&A sites tutorials,
-    etc (12.5%); Wikipedia (English) (12.5%); English web documents (6.25%);
-    and Non-English web documents (6.25%)."
+  description: "Infiniset \"is a combination of dialog data from public dialog data\
+    \ and other public web documents\" [[Appendix E]](https://arxiv.org/pdf/2201.08239.pdf#appendix.E).\n"
   excluded: unknown
-  quality_control: unknown
-  # Downstream
-  access: closed
+  feedback:
+    explanation: "Author contact information was not provided.\n"
+    value: none
+  included: "Included in the dataset are data from \"public forums (0%); C4 data\
+    \ (12.5% ); code documents from sites related to programming like Q&A sites\
+    \ tutorials, etc (12.5%); Wikipedia (English) (12.5%); English web documents\
+    \ (6.25%); and Non-English web documents (6.25%).\"\n"
   intended_uses:
+    explanation: "Intended uses of the dataset wasn't explicitly linked, but it\
+      \ is likely intended for training language models specialized in dialogue.\n"
+    value: unknown
+  license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      Intended uses of the dataset wasn't explicitly linked, but it is likely
-      intended for training language models specialized in dialogue.
-  prohibited_uses: >
-    The prohibited uses for Infiniset weren't specifically listed, but the
-    Google AI principles inspired safety objectives in
-    [[Appendix A.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.A.1)
-    advises avoiding harm, unjust impact and misinformation, among others.
+  modality:
+    explanation: text and code
+    value: code, text
   monitoring: unknown
-  feedback:
-    value: none
-    explanation: >
-      Author contact information was not provided.
-
-- type: model
-  name: LaMDA
-  # General
+  name: Infiniset
   organization: Google
-  description: >
-    LaMDA stands for Language Models for Dialog Application. It is a transformer
-    based language model trained on dialogue data.
+  prohibited_uses: "The prohibited uses for Infiniset weren't specifically listed,\
+    \ but the Google AI principles inspired safety objectives in [[Appendix A.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.A.1)\
+    \ advises avoiding harm, unjust impact and misinformation, among others.\n"
+  quality_control: unknown
+  sample: []
+  size:
+    explanation: "The size of the dataset is unclear, but it is reported that the\
+      \ dataset \"consists of 2.97B documents and 1.12B dialogs with 13.39B utterances\"\
+      \ [[Appendix E]](https://arxiv.org/pdf/2201.08239.pdf#appendix.E).\n"
+    value: unknown
+  type: dataset
+  url: https://arxiv.org/pdf/2201.08239.pdf
+- access: closed
+  analysis: "The model performance was analyzed on sensibleness, specificity and\
+    \ interestingness. The model was also analyzed on safety, following metrics\
+    \ derived from Google AI Principles [[Appendix A.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.A.1).\
+    \ Finally, the model was analyzed on groundedness, testing its ability to produce\
+    \ responses that can be associated with \"known sources whenever possible [[Section\
+    \ 4.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.4.1).\"\n"
   created_date:
+    explanation: "The date of the Google company news blog announcing LaMDA [[Google\
+      \ News Blog]](https://blog.google/technology/ai/lamda/).\n"
     value: 2021-06-18
-    explanation: >
-      The date of the Google company news blog announcing LaMDA
-      [[Google News Blog]](https://blog.google/technology/ai/lamda/).
-  url: https://arxiv.org/pdf/2201.08239.pdf
-  model_card: none
-  modality: text
-  size:
-    value: 137B parameters (dense model)
-    explanation: >
-      Along with the 137B model, the authors also trained 2B and 8B LaMDA
-      models.
-  analysis: >
-    The model performance was analyzed on sensibleness, specificity and
-    interestingness.
-    The model was also analyzed on safety, following
-    metrics derived from Google AI Principles
-    [[Appendix A.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.A.1).
-    Finally, the model was analyzed on groundedness, testing its ability to
-    produce responses that can be associated with "known sources whenever
-    possible
-    [[Section 4.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.4.1)."
-  # Construction
   dependencies: [Infiniset]
-  training_emissions:
-    value: 26 tCO2e
-    explanation: >
-      "...total carbon footprint of LaMDA’s pre-training of the largest model
-      is approximately 25.2 tCO2e. The carbon footprint of pre-training of
-      smaller models and fine-tuning of all models is approximately 0.7 tCO2e
-      ... which brings the total footprint of LaMDA to approximately 26
-      tCO2e
-      [[Section 10]](https://arxiv.org/pdf/2201.08239.pdf#section.10)"
-  training_time:
-    value: 4108.80 petaflop/s-day
-    explanation: >
-      The total number of training flops of LaMDA was reported as 3.55E+23
-      (3.55E+8 petaflops)
-      [[Section 10]](https://arxiv.org/pdf/2201.08239.pdf#section.10), which is
-      equal to 4108.80 = 3.55E+8 / (60 * 60 * 24) petaflop/s-day.
-  training_hardware:
-    value: 1024 TPU-V3 chips
-    explanation: >
-      Reported in [[Section 10]](https://arxiv.org/pdf/2201.08239.pdf#section.10).
-  quality_control: >
-    LaMDA was fine-tuned to predict sensibleness, specificity and
-    interestingness as well as safety. Then, the candidates were filtered out
-    if the model safety predictions were below a certain threshold. The next
-    candidates in the conversation were selected as a combination of these
-    predictions. The model was also fine-tuned for groundedness. The results
-    are shown in
-    [[Figure 5]](https://arxiv.org/pdf/2201.08239.pdf#figure.caption.23).
-  # Downstream
-  access: closed
+  description: "LaMDA stands for Language Models for Dialog Application. It is a\
+    \ transformer based language model trained on dialogue data.\n"
+  feedback:
+    explanation: "Author contact information was not provided.\n"
+    value: none
+  intended_uses: "LaMDA is a language model, so it can be used for regular langauge\
+    \ modelling tasks without fine-tuning, but its fine-tuned for dialogue tasks.\n"
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: >
-    LaMDA is a language model, so it can be used for regular langauge modelling
-    tasks without fine-tuning, but its fine-tuned for dialogue tasks.
-  prohibited_uses: >
-    The prohibited uses of LaMDA weren't specifically listed, but the Google
-    AI principles inspired safety objectives in
-    [[Appendix A.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.A.1)
-    advises avoiding harm, unjust impact and misinformation, among others.
+  modality:
+    explanation: text
+    value: text; text
+  model_card: none
   monitoring: unknown
-  feedback:
-    value: none
-    explanation: >
-      Author contact information was not provided.
-
-- type: dataset
-  name: PaLM dataset
-  # General
+  name: LaMDA
   organization: Google
-  description: >
-    PaLM dataset "was created for pre-training language models"
-    [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D).
-  created_date:
-    value: 2022-04-04
-    explanation: >
-      The date of the Google AI blog announcing the details of PaLM
-      [[Google AI Blog]](https://ai.googleblog.com/2022/04/pathways-language-model-palm-scaling-to.html).
-  url: https://arxiv.org/pdf/2204.02311.pdf
-  datasheet: https://arxiv.org/pdf/2204.02311.pdf#appendix.D
-  modality: text and code
-  size:
-    value: 3.92 TB
-    explanation: >
-      Dataset size in GB is not reported, but the dataset is reported to have
-      780 billion tokens
-      [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D).
-      The code portion of the dataset is reported to be 5% totaling a 196GB
-      of source code
-      [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3).
-      It is unclear whether the reported size is before or after de-duplication.
-      Nonetheless, one can estimate the dataset size by multiplying 196GB with
-      20 = 3.92 TB.
-  sample: []
+  prohibited_uses: "The prohibited uses of LaMDA weren't specifically listed, but\
+    \ the Google AI principles inspired safety objectives in [[Appendix A.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.A.1)\
+    \ advises avoiding harm, unjust impact and misinformation, among others.\n"
+  quality_control: "LaMDA was fine-tuned to predict sensibleness, specificity and\
+    \ interestingness as well as safety. Then, the candidates were filtered out\
+    \ if the model safety predictions were below a certain threshold. The next candidates\
+    \ in the conversation were selected as a combination of these predictions. The\
+    \ model was also fine-tuned for groundedness. The results are shown in [[Figure\
+    \ 5]](https://arxiv.org/pdf/2201.08239.pdf#figure.caption.23).\n"
+  size: 137B parameters (dense)
+  training_emissions:
+    explanation: "\"...total carbon footprint of LaMDA’s pre-training of the largest\
+      \ model is approximately 25.2 tCO2e. The carbon footprint of pre-training\
+      \ of smaller models and fine-tuning of all models is approximately 0.7 tCO2e\
+      \ ... which brings the total footprint of LaMDA to approximately 26 tCO2e\
+      \ [[Section 10]](https://arxiv.org/pdf/2201.08239.pdf#section.10)\"\n"
+    value: 26 tCO2e
+  training_hardware:
+    explanation: "Reported in [[Section 10]](https://arxiv.org/pdf/2201.08239.pdf#section.10).\n"
+    value: 1024 TPU-V3 chips
+  training_time:
+    explanation: "The total number of training flops of LaMDA was reported as 3.55E+23\
+      \ (3.55E+8 petaflops) [[Section 10]](https://arxiv.org/pdf/2201.08239.pdf#section.10),\
+      \ which is equal to 4108.80 = 3.55E+8 / (60 * 60 * 24) petaflop/s-day.\n"
+    value: 4108.80 petaflop/s-day
+  type: model
+  url: https://arxiv.org/pdf/2201.08239.pdf
+- access: closed
   analysis: unknown
-  # Construction
+  created_date:
+    explanation: "The date of the Google AI blog announcing the details of PaLM\
+      \ [[Google AI Blog]](https://ai.googleblog.com/2022/04/pathways-language-model-palm-scaling-to.html).\n"
+    value: 2022-04-04
+  datasheet: https://arxiv.org/pdf/2204.02311.pdf#appendix.D
   dependencies: [Infiniset]
+  description: "PaLM dataset \"was created for pre-training language models\" [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D).\n"
+  excluded: "GitHub repositories with copyleft licenses were excluded. Programming\
+    \ languageges other than the most common 24 were excluded [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3).\n"
+  feedback:
+    explanation: "Author contact information is shared in the paper [[Paper]](https://arxiv.org/pdf/2204.02311.pdf).\n"
+    value: Contact the authors.
+  included: "The dataset is based on Infiniset. It included multilingual text containing\
+    \ text from over 100 languages. The breakdown of the data included is as follows:\
+    \ Social media conversations (multilingual) 50, Filtered webpages (multilingual)\
+    \ 27%, BooksCorpus (English) 13%, GitHub (code) 5%, Wikipedia (multilingual)\
+    \ 4%, and News (English) 1%. Code was collected from GitHub repositories with\
+    \ appropriate licenses, totalling 96GB of source code [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3).\n"
+  intended_uses:
+    explanation: "As stated in [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D).\n"
+    value: "\"The dataset was created for pre-training language models by a team\
+      \ of researchers at Google\".\n"
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  included: >
-    The dataset is based on Infiniset. It included multilingual text containing
-    text from over 100 languages. The breakdown of the data included is as
-    follows: Social media conversations (multilingual) 50, Filtered webpages
-    (multilingual) 27%, BooksCorpus (English) 13%, GitHub (code) 5%, Wikipedia
-    (multilingual) 4%, and News (English) 1%.
-    Code was collected from GitHub repositories with appropriate licenses,
-    totalling 96GB of source code
-    [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3).
-  excluded: >
-    GitHub repositories with copyleft licenses were excluded. Programming
-    languageges other than the most common 24 were excluded
-    [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3).
-  quality_control: >
-    In order to reduce low quality web pages, the web pages were sampled
-    according to a "quality score" classifier.
-    Code files were de-duplicated using Levenshtein distance
-    [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3).
-  # Downstream
-  access: closed
-  intended_uses:
-    value: >
-      "The dataset was created for pre-training language models by a team of
-      researchers at Google".
-    explanation: >
-      As stated in
-      [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D).
-  prohibited_uses: >
-    "... should not be used for any of the unacceptable language model use
-    cases, e.g., generation of toxic speech"
-    [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D).
+  modality:
+    explanation: text and code
+    value: code, text
   monitoring: unknown
-  feedback:
-    value: Contact the authors.
-    explanation: >
-      Author contact information is shared in the paper
-      [[Paper]](https://arxiv.org/pdf/2204.02311.pdf).
-
-- type: model
-  name: Flan-T5
+  name: PaLM dataset
   organization: Google
-  description: Flan-T5 is a version of the T5 language model fine-tuned on instruction
-    data
+  prohibited_uses: "\"... should not be used for any of the unacceptable language\
+    \ model use cases, e.g., generation of toxic speech\" [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D).\n"
+  quality_control: "In order to reduce low quality web pages, the web pages were\
+    \ sampled according to a \"quality score\" classifier. Code files were de-duplicated\
+    \ using Levenshtein distance [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3).\n"
+  sample: []
+  size:
+    explanation: "Dataset size in GB is not reported, but the dataset is reported\
+      \ to have 780 billion tokens [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D).\
+      \ The code portion of the dataset is reported to be 5% totaling a 196GB of\
+      \ source code [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3).\
+      \ It is unclear whether the reported size is before or after de-duplication.\
+      \ Nonetheless, one can estimate the dataset size by multiplying 196GB with\
+      \ 20 = 3.92 TB.\n"
+    value: 3.92 TB
+  type: dataset
+  url: https://arxiv.org/pdf/2204.02311.pdf
+- access:
+    explanation: Weights can be downloaded from [Github](https://github.com/google-research/t5x/blob/main/docs/models.md)
+    value: open
+  analysis: Evaluated on a variety of standard language datasets.
   created_date:
-    value: 2022-10-20
     explanation: Date paper was released
-  url: https://arxiv.org/abs/2210.11416
-  model_card: https://arxiv.org/pdf/2210.11416.pdf
-  modality: text (English)
-  analysis: Evaluated on a variety of standard language datasets.
-  size: 11B parameters (dense model)
+    value: 2022-10-20
   dependencies: [T5, Muffin, P3, NaturalInstructions-v2, Flan CoT]
-  training_emissions: Unknown
-  training_time: Unknown
-  training_hardware: 512 v4 TPU Chips
-  quality_control: Across different multitask datasets, templates and formatting
-    were maintained. For the chain-of-thoughts (CoT) data, specific exemplars were
-    used.
-  access:
-    value: open
-    explanation: Weights can be downloaded from [Github](https://github.com/google-research/t5x/blob/main/docs/models.md)
+  description: Flan-T5 is a version of the T5 language model fine-tuned on instruction
+    data
+  feedback: https://huggingface.co/google/flan-t5-xxl/discussions
+  intended_uses: unknown
   license:
-    value: Apache 2.0
     explanation: License on the [[Github repository]](https://github.com/google-research/google-research)
-  intended_uses: unknown
-  prohibited_uses: none
+    value: Apache 2.0
+  modality:
+    explanation: text (English)
+    value: text; text
+  model_card: https://arxiv.org/pdf/2210.11416.pdf
   monitoring: none
-  feedback: https://huggingface.co/google/flan-t5-xxl/discussions
-
-- type: model
-  name: UL2
+  name: Flan-T5
   organization: Google
-  description: UL2 is a language model trained with a new pretraining objective
+  prohibited_uses: none
+  quality_control: Across different multitask datasets, templates and formatting
+    were maintained. For the chain-of-thoughts (CoT) data, specific exemplars were
+    used.
+  size: 11B parameters (dense)
+  training_emissions: Unknown
+  training_hardware: 512 v4 TPU Chips
+  training_time: Unknown
+  type: model
+  url: https://arxiv.org/abs/2210.11416
+- access:
+    explanation: Model weights available for download in the [[Github repo]](https://github.com/google-research/google-research/tree/master/ul2)
+    value: open
+  analysis: ''
   created_date:
-    value: 2022-05-10
     explanation: Date mdoel paper was released
-  url: https://arxiv.org/abs/2205.05131
-  model_card: ''
-  modality: text (English)
-  analysis: ''
-  size: 20B parameters (dense model)
+    value: 2022-05-10
   dependencies: [C4]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: 128 TPUv4
-  quality_control: ''
-  access:
-    value: open
-    explanation: Model weights available for download in the [[Github repo]](https://github.com/google-research/google-research/tree/master/ul2)
+  description: UL2 is a language model trained with a new pretraining objective
+  feedback: ''
+  intended_uses: ''
   license:
-    value: Apache 2.0
     explanation: 20B checkpoints only for three different iteration steps
-  intended_uses: ''
-  prohibited_uses: ''
+    value: Apache 2.0
+  modality:
+    explanation: text (English)
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: Parti
+  name: UL2
   organization: Google
-  description: Parti is a text-to-image diffusion model
+  prohibited_uses: ''
+  quality_control: ''
+  size: 20B parameters (dense)
+  training_emissions: ''
+  training_hardware: 128 TPUv4
+  training_time: ''
+  type: model
+  url: https://arxiv.org/abs/2205.05131
+- access:
+    explanation: Google does not provide access to Parti for external researchers.
+    value: closed
+  analysis: ''
   created_date:
-    value: 2022-06-22
     explanation: Date the model website was made public
-  url: https://parti.research.google/
-  model_card: ''
-  modality: Text (English) and Image
-  size: 20B parameters
-  analysis: ''
+    value: 2022-06-22
   dependencies: [C4, LAION-400M, FIT400M, JFT-4B]
-  training_emissions: ''
-  training_time: unknown
-  training_hardware: unknown
-  quality_control: ''
-  access:
-    value: closed
-    explanation: Google does not provide access to Parti for external researchers.
+  description: Parti is a text-to-image diffusion model
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: Text (English) and Image
+    value: image, text; image, text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: Imagen
+  name: Parti
   organization: Google
-  description: Imagen is a text-to-image diffusion model
+  prohibited_uses: ''
+  quality_control: ''
+  size: 20B parameters (dense)
+  training_emissions: ''
+  training_hardware: unknown
+  training_time: unknown
+  type: model
+  url: https://parti.research.google/
+- access: open
+  analysis: ''
   created_date:
-    value: 2022-05-23
     explanation: Date the model website was made public
-  url: https://imagen.research.google/
-  model_card: ''
-  modality: text (English) and image
-  size: >
-    14B parameters total. 2B parameters (U-Net model), 11B parameters (T5-XXL),
-    and 600M and 400M parameter models for super resolution
-  analysis: ''
+    value: 2022-05-23
   dependencies: [LAION-400M, Google internal image-text dataset]
-  training_emissions: unknown
-  training_time: unknown
-  training_hardware: 128 TPU-v4
-  quality_control: ''
-  access: open
+  description: Imagen is a text-to-image diffusion model
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: text (English) and image
+    value: image, text; image, text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: VATT
+  name: Imagen
   organization: Google
-  description: VATT is a family of models trained on multimodal data
+  prohibited_uses: ''
+  quality_control: ''
+  size: 14B parameters (dense)
+  training_emissions: unknown
+  training_hardware: 128 TPU-v4
+  training_time: unknown
+  type: model
+  url: https://imagen.research.google/
+- access:
+    explanation: Model checkpoints can be downloaded from the [[Github repository]](https://github.com/google-research/google-research/tree/master/vatt)
+    value: open
+  analysis: ''
   created_date:
-    value: 2022-04-22
     explanation: Date the model paper was made public
-  url: https://arxiv.org/abs/2104.11178
-  model_card: ''
-  modality: Text (English), Video, and Audio
-  size: 155M
-  analysis: ''
+    value: 2022-04-22
   dependencies: [AudioSet, HowTo100M]
-  training_emissions: unknown
-  training_time: 3 days
-  training_hardware: 256 TPU-v3
-  quality_control: ''
-  access:
-    value: open
-    explanation: Model checkpoints can be downloaded from the [[Github repository]](https://github.com/google-research/google-research/tree/master/vatt)
+  description: VATT is a family of models trained on multimodal data
+  feedback: ''
+  intended_uses: ''
   license:
-    value: Apache 2.0
     explanation: License on the [[Github repository]](https://github.com/google-research/google-research)
-  intended_uses: ''
-  prohibited_uses: ''
+    value: Apache 2.0
+  modality:
+    explanation: Text (English), Video, and Audio
+    value: audio, text, video; audio, text, video
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: PaLM
-  # General
+  name: VATT
   organization: Google
-  description: >
-    PaLM stands Pathways Language Model, "dense decoder-only Transformer model
-    trained with the Pathways system"
-    [[Google ai Blog]](https://ai.googleblog.com/2022/04/pathways-language-model-palm-scaling-to.html).
+  prohibited_uses: ''
+  quality_control: ''
+  size: 155M parameters (dense)
+  training_emissions: unknown
+  training_hardware: 256 TPU-v3
+  training_time: 3 days
+  type: model
+  url: https://arxiv.org/abs/2104.11178
+- access:
+    explanation: Made available through the PaLM API as of March 14, 2023.
+    value: limited
+  analysis: "\"PaLM is evaluated on English Natural Language Processing (NLP) tasks,\
+    \ tasks from BIG-bench, reasoning tasks, code completion tasks, multilingual\
+    \ generation and question answering tasks, translation tasks, and bias and toxicity\
+    \ benchmarks\" [[Model Card]](https://arxiv.org/pdf/2204.02311.pdf#appendix.E).\n"
   created_date:
+    explanation: "The date of the Google AI blog announcing the details of PaLM\
+      \ [[Google AI Blog]](https://ai.googleblog.com/2022/04/pathways-language-model-palm-scaling-to.html).\n"
     value: 2022-04-04
-    explanation: >
-      The date of the Google AI blog announcing the details of PaLM
-      [[Google AI Blog]](https://ai.googleblog.com/2022/04/pathways-language-model-palm-scaling-to.html).
-  url: https://arxiv.org/pdf/2204.02311.pdf
-  model_card: https://arxiv.org/pdf/2204.02311.pdf#appendix.E
-  modality: text and code
-  size: 540B parameters (dense model)
-  analysis: >
-    "PaLM is evaluated on English Natural Language Processing (NLP) tasks, tasks
-    from BIG-bench, reasoning tasks, code completion tasks, multilingual
-    generation and question answering tasks, translation tasks, and bias and
-    toxicity benchmarks"
-    [[Model Card]](https://arxiv.org/pdf/2204.02311.pdf#appendix.E).
-  # Construction
   dependencies: [PaLM dataset]
-  training_emissions:
-    value: 271.43 tCO2
-    explanation: >
-      Reported in
-      [[Appendix B]](https://arxiv.org/pdf/2204.02311.pdf#appendix.A)
-  training_time:
-    value: 29600 petaflop/s-days
-    explanation: >
-      Reported in
-      [[Appendix B]](https://arxiv.org/pdf/2204.02311.pdf#appendix.A)
-  training_hardware:
-    value: 6144 TPU v4 chips
-    explanation: >
-      Reported in [[Section 4]](https://arxiv.org/pdf/2204.02311.pdf#section.4).
-  quality_control: Unknown
-  # Downstream
-  access:
-    value: limited
-    explanation: Made available through the PaLM API as of March 14, 2023.
+  description: "PaLM stands Pathways Language Model, \"dense decoder-only Transformer\
+    \ model trained with the Pathways system\" [[Google ai Blog]](https://ai.googleblog.com/2022/04/pathways-language-model-palm-scaling-to.html).\n"
+  feedback:
+    explanation: "Author contact information is shared in the paper [[Paper]](https://arxiv.org/pdf/2204.02311.pdf).\n"
+    value: Contact the authors.
+  intended_uses: "\"The primary use is research on language models, including: research\
+    \ on NLP applications like machine translation and question answering, advancing\
+    \ fairness and safety research, and understanding limitations of current LLMs.\
+    \ Within Google, PaLM is being used for research on a variety of open- ended\
+    \ text and code generation tasks, including reasoning [[Section 6.3]](https://arxiv.org/pdf/2204.02311.pdf#subsection.6.3)\
+    \ and code synthesis and understanding [[Section 6.4]](https://arxiv.org/pdf/2204.02311.pdf#subsection.6.4)\"\
+    \ [[Model Card]](https://arxiv.org/pdf/2204.02311.pdf#appendix.E).\n"
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: >
-    "The primary use is research on language models, including: research
-    on NLP applications like machine translation and question answering,
-    advancing fairness and safety research, and understanding limitations of
-    current LLMs.
-    Within Google, PaLM is being used for research on a variety of open-
-    ended text and code generation tasks, including reasoning
-    [[Section 6.3]](https://arxiv.org/pdf/2204.02311.pdf#subsection.6.3)
-    and code synthesis and understanding
-    [[Section 6.4]](https://arxiv.org/pdf/2204.02311.pdf#subsection.6.4)"
-    [[Model Card]](https://arxiv.org/pdf/2204.02311.pdf#appendix.E).
-  prohibited_uses: >
-    The model "should not be used for downstream applications without further
-    analysis on factors in the proposed downstream application
-    [[Model Card]](https://arxiv.org/pdf/2204.02311.pdf#appendix.E)"
+  modality:
+    explanation: text and code
+    value: code, text; code, text
+  model_card: https://arxiv.org/pdf/2204.02311.pdf#appendix.E
   monitoring: unknown
-  feedback:
-    value: Contact the authors.
-    explanation: >
-      Author contact information is shared in the paper
-      [[Paper]](https://arxiv.org/pdf/2204.02311.pdf).
-
-- type: application
-  name: PaLM API
+  name: PaLM
   organization: Google
-  description: a new developer offering that makes it easy and safe to experiment
-    with Google’s language models.
+  prohibited_uses: "The model \"should not be used for downstream applications without\
+    \ further analysis on factors in the proposed downstream application [[Model\
+    \ Card]](https://arxiv.org/pdf/2204.02311.pdf#appendix.E)\"\n"
+  quality_control: Unknown
+  size: 540B parameters (dense)
+  training_emissions:
+    explanation: "Reported in [[Appendix B]](https://arxiv.org/pdf/2204.02311.pdf#appendix.A)\n"
+    value: 271.43 tCO2
+  training_hardware:
+    explanation: "Reported in [[Section 4]](https://arxiv.org/pdf/2204.02311.pdf#section.4).\n"
+    value: 6144 TPU v4 chips
+  training_time:
+    explanation: "Reported in [[Appendix B]](https://arxiv.org/pdf/2204.02311.pdf#appendix.A)\n"
+    value: 29600 petaflop/s-days
+  type: model
+  url: https://arxiv.org/pdf/2204.02311.pdf
+- access: limited
+  adaptation: ''
   created_date: 2023-03-14
-  url: https://developers.googleblog.com/2023/03/announcing-palm-api-and-makersuite.html
   dependencies: [PaLM]
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: limited
-  license: unknown
-  terms_of_service: ''
+  description: a new developer offering that makes it easy and safe to experiment
+    with Google’s language models.
+  failures: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: unknown
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
-  user_distribution: ''
-  failures: ''
-
-- type: model
-  name: Med-PaLM
+  name: PaLM API
   organization: Google
-  description: ''
-  created_date: 2022-12-26
-  url: https://arxiv.org/abs/2212.13138
-  model_card: ''
-  modality: text
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: ''
+  type: application
+  url: https://developers.googleblog.com/2023/03/announcing-palm-api-and-makersuite.html
+  user_distribution: ''
+- access: closed
   analysis: ''
-  size: 540B
+  created_date: 2022-12-26
   dependencies: [Flan-PaLM, MultiMedQA]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: closed
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: text
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: Med-PaLM Multimodal
+  name: Med-PaLM
   organization: Google
-  description: ''
-  created_date: 2023-07-26
-  url: https://arxiv.org/pdf/2307.14334.pdf
-  model_card: ''
-  modality: text, image, and genomics input with text output
+  prohibited_uses: ''
+  quality_control: ''
+  size: 540B parameters (dense)
+  training_emissions: ''
+  training_hardware: ''
+  training_time: ''
+  type: model
+  url: https://arxiv.org/abs/2212.13138
+- access: closed
   analysis: Evaluated on MultiMedBench tasks and radiologist evaluations of model-generated
     chest X-ray reports
-  size: 562B parameters (dense)
+  created_date: 2023-07-26
   dependencies: [PaLM-E, MultiMedBench]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: closed
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: text, image, and genomics input with text output
+    value: image, text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: MultiMedQA
+  name: Med-PaLM Multimodal
   organization: Google
-  description: ''
-  created_date: 2022-12-26
-  url: https://arxiv.org/abs/2212.13138
-  model_card: ''
-  modality: text
+  prohibited_uses: ''
+  quality_control: ''
+  size: 562B parameters (dense)
+  training_emissions: ''
+  training_hardware: ''
+  training_time: ''
+  type: model
+  url: https://arxiv.org/pdf/2307.14334.pdf
+- access: closed
   analysis: ''
-  size: unknown
+  created_date: 2022-12-26
   dependencies:
     - MedQA
     - MedMCQA
@@ -814,149 +708,149 @@
     - LiveQA
     - Medication QA
     - HealthSearchQA
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: closed
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: text
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: Flan-PaLM
+  name: MultiMedQA
   organization: Google
-  description: ''
-  created_date: 2022-10-20
-  url: https://arxiv.org/abs/2210.11416
-  model_card: ''
-  modality: text
-  analysis: ''
-  size: 540B
-  dependencies: [PaLM, Muffin, P3, NaturalInstructions-v2]
+  prohibited_uses: ''
+  quality_control: ''
+  size: unknown
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: closed
+  training_time: ''
+  type: model
+  url: https://arxiv.org/abs/2212.13138
+- access: closed
+  analysis: ''
+  created_date: 2022-10-20
+  dependencies: [PaLM, Muffin, P3, NaturalInstructions-v2]
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: text
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: Flan-U-PaLM
+  name: Flan-PaLM
   organization: Google
-  description: ''
-  created_date: 2022-10-20
+  prohibited_uses: ''
+  quality_control: ''
+  size: 540B parameters (dense)
+  training_emissions: ''
+  training_hardware: ''
+  training_time: ''
+  type: model
   url: https://arxiv.org/abs/2210.11416
-  model_card: ''
-  modality: text
+- access: closed
   analysis: ''
-  size: 540B
+  created_date: 2022-10-20
   dependencies: [U-PaLM, Muffin, P3, NaturalInstructions-v2]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: closed
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: text
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: dataset
-  name: Muffin
+  name: Flan-U-PaLM
   organization: Google
-  description: ''
+  prohibited_uses: ''
+  quality_control: ''
+  size: 540B parameters (dense)
+  training_emissions: ''
+  training_hardware: ''
+  training_time: ''
+  type: model
+  url: https://arxiv.org/abs/2210.11416
+- access: open
+  analysis: ''
   created_date: 2021-09-03
-  url: https://arxiv.org/abs/2109.01652
   datasheet: ''
-  modality: text
-  size: 62 tasks
-  sample: []
-  analysis: ''
   dependencies: []
-  included: ''
+  description: ''
   excluded: ''
-  quality_control: ''
-  access: open
-  license: Apache 2.0
+  feedback: ''
+  included: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: Apache 2.0
+  modality: text
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: U-PaLM
+  name: Muffin
   organization: Google
-  description: ''
-  created_date: 2022-10-20
-  url: https://arxiv.org/abs/2210.11399
-  model_card: ''
-  modality: text
+  prohibited_uses: ''
+  quality_control: ''
+  sample: []
+  size: 62 tasks
+  type: dataset
+  url: https://arxiv.org/abs/2109.01652
+- access: closed
   analysis: ''
-  size: 540B
+  created_date: 2022-10-20
   dependencies: [PaLM, PaLM dataset]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: closed
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: text
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: PaLM-SayCan
+  name: U-PaLM
   organization: Google
-  description: ''
-  created_date: 2022-08-16
-  url: https://arxiv.org/abs/2204.01691
-  model_card: ''
-  modality: text, robotic control
-  analysis: ''
-  size: 540B
-  dependencies: [PaLM]
+  prohibited_uses: ''
+  quality_control: ''
+  size: 540B parameters (dense)
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: closed
-  license: unknown (model weights), Apache 2.0 (SayCan code)
+  training_time: ''
+  type: model
+  url: https://arxiv.org/abs/2210.11399
+- access: closed
+  analysis: ''
+  created_date: 2022-08-16
+  dependencies: [PaLM]
+  description: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: unknown (model weights), Apache 2.0 (SayCan code)
+  modality:
+    explanation: text, robotic control
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: GLaM
+  name: PaLM-SayCan
   organization: Google
-  description: ''
-  created_date: 2021-12-13
-  url: https://arxiv.org/abs/2112.06905
-  model_card: ''
-  modality: text
+  prohibited_uses: ''
+  quality_control: ''
+  size: 540B parameters (dense)
+  training_emissions: ''
+  training_hardware: ''
+  training_time: ''
+  type: model
+  url: https://arxiv.org/abs/2204.01691
+- access: closed
   analysis: ''
-  size: 1.2T parameters (sparse)
+  created_date: 2021-12-13
   dependencies:
     - GLaM Web dataset
     - Wikipedia
@@ -964,750 +858,740 @@
     - GLaM Forums dataset
     - BooksCorpus
     - GLaM News dataset
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: closed
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: text
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: dataset
-  name: GLaM Web dataset
+  name: GLaM
   organization: Google
-  description: ''
-  created_date: 2021-12-13
+  prohibited_uses: ''
+  quality_control: ''
+  size: 1.2T parameters (sparse)
+  training_emissions: ''
+  training_hardware: ''
+  training_time: ''
+  type: model
   url: https://arxiv.org/abs/2112.06905
+- access: closed
+  analysis: ''
+  created_date: 2021-12-13
   datasheet: ''
+  dependencies: []
+  description: ''
+  excluded: ''
+  feedback: ''
+  included: ''
+  intended_uses: ''
+  license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
+    value: unknown
   modality: text
-  size: unknown
+  monitoring: ''
+  name: GLaM Web dataset
+  organization: Google
+  prohibited_uses: ''
+  quality_control: ''
   sample: []
+  size: unknown
+  type: dataset
+  url: https://arxiv.org/abs/2112.06905
+- access: closed
   analysis: ''
+  created_date: 2021-12-13
+  datasheet: ''
   dependencies: []
-  included: ''
+  description: ''
   excluded: ''
-  quality_control: ''
-  access: closed
+  feedback: ''
+  included: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality: text
   monitoring: ''
-  feedback: ''
-
-- type: dataset
   name: GLaM Conversations dataset
   organization: Google
-  description: ''
-  created_date: 2021-12-13
-  url: https://arxiv.org/abs/2112.06905
-  datasheet: ''
-  modality: text
-  size: unknown
+  prohibited_uses: ''
+  quality_control: ''
   sample: []
+  size: unknown
+  type: dataset
+  url: https://arxiv.org/abs/2112.06905
+- access: closed
   analysis: ''
+  created_date: 2021-12-13
+  datasheet: ''
   dependencies: []
-  included: ''
+  description: ''
   excluded: ''
-  quality_control: ''
-  access: closed
+  feedback: ''
+  included: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality: text
   monitoring: ''
-  feedback: ''
-
-- type: dataset
   name: GLaM Forums dataset
   organization: Google
-  description: ''
-  created_date: 2021-12-13
-  url: https://arxiv.org/abs/2112.06905
-  datasheet: ''
-  modality: text
-  size: unknown
+  prohibited_uses: ''
+  quality_control: ''
   sample: []
+  size: unknown
+  type: dataset
+  url: https://arxiv.org/abs/2112.06905
+- access: closed
   analysis: ''
+  created_date: 2021-12-13
+  datasheet: ''
   dependencies: []
-  included: ''
+  description: ''
   excluded: ''
-  quality_control: ''
-  access: closed
+  feedback: ''
+  included: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality: text
   monitoring: ''
-  feedback: ''
-
-- type: dataset
   name: GLaM News dataset
   organization: Google
-  description: ''
-  created_date: 2021-12-13
-  url: https://arxiv.org/abs/2112.06905
-  datasheet: ''
-  modality: text
-  size: unknown
+  prohibited_uses: ''
+  quality_control: ''
   sample: []
+  size: unknown
+  type: dataset
+  url: https://arxiv.org/abs/2112.06905
+- access: closed
   analysis: ''
-  dependencies: []
-  included: ''
-  excluded: ''
-  quality_control: ''
-  access: closed
+  created_date: 2021-05-18
+  dependencies: [MUM dataset]
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: text, image
+    value: image, text; image, text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
   name: MUM
   organization: Google
-  description: ''
-  created_date: 2021-05-18
-  url: https://blog.google/products/search/introducing-mum/
-  model_card: ''
-  modality: text, image
-  analysis: ''
+  prohibited_uses: ''
+  quality_control: ''
   size: unknown
-  dependencies: [MUM dataset]
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: closed
-  license:
-    value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
-
-- type: dataset
-  name: MUM dataset
-  organization: Google
-  description: ''
-  created_date: 2021-05-18
+  training_time: ''
+  type: model
   url: https://blog.google/products/search/introducing-mum/
-  datasheet: ''
-  modality: text, image
-  size: unknown
-  sample: []
+- access: closed
   analysis: ''
+  created_date: 2021-05-18
+  datasheet: ''
   dependencies: []
-  included: ''
+  description: ''
   excluded: ''
-  quality_control: ''
-  access: closed
+  feedback: ''
+  included: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: text, image
+    value: image, text
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: Phenaki
+  name: MUM dataset
   organization: Google
-  description: ''
-  created_date: 2023-02-01
-  url: https://openreview.net/pdf?id=vOEXS39nOF
-  model_card: ''
-  modality: text, video
+  prohibited_uses: ''
+  quality_control: ''
+  sample: []
+  size: unknown
+  type: dataset
+  url: https://blog.google/products/search/introducing-mum/
+- access: closed
   analysis: ''
-  size: 1.8B parameters (dense model)
+  created_date: 2023-02-01
   dependencies: [LAION-400M, Phenaki Video-Text Corpus]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: closed
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: text, video
+    value: text, video; text, video
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: dataset
-  name: Phenaki Video-Text Corpus
+  name: Phenaki
   organization: Google
-  description: ''
-  created_date: 2023-02-01
+  prohibited_uses: ''
+  quality_control: ''
+  size: 1.8B parameters (dense)
+  training_emissions: ''
+  training_hardware: ''
+  training_time: ''
+  type: model
   url: https://openreview.net/pdf?id=vOEXS39nOF
-  datasheet: ''
-  modality: text, video
-  size: 15M text-video pairs at 8FPS
-  sample: []
+- access: closed
   analysis: ''
+  created_date: 2023-02-01
+  datasheet: ''
   dependencies: []
-  included: ''
+  description: ''
   excluded: ''
-  quality_control: ''
-  access: closed
+  feedback: ''
+  included: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality: text, video
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: Flan-UL2
+  name: Phenaki Video-Text Corpus
   organization: Google
-  description: ''
-  created_date: 2023-03-02
-  url: https://arxiv.org/abs/2205.05131
-  model_card: ''
-  modality: text
+  prohibited_uses: ''
+  quality_control: ''
+  sample: []
+  size: 15M text-video pairs at 8FPS
+  type: dataset
+  url: https://openreview.net/pdf?id=vOEXS39nOF
+- access: open
   analysis: ''
-  size: 20B parameters (dense model)
+  created_date: 2023-03-02
   dependencies: [UL2, Flan Collection]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: open
-  license: Apache 2.0
+  description: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: Apache 2.0
+  modality:
+    explanation: text
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: dataset
-  name: Flan Collection
+  name: Flan-UL2
   organization: Google
-  description: ''
+  prohibited_uses: ''
+  quality_control: ''
+  size: 20B parameters (dense)
+  training_emissions: ''
+  training_hardware: ''
+  training_time: ''
+  type: model
+  url: https://arxiv.org/abs/2205.05131
+- access: open
+  analysis: ''
   created_date: 2023-01-31
-  url: https://arxiv.org/abs/2301.13688
   datasheet: ''
-  modality: text
-  size: 1836 tasks
-  sample: []
-  analysis: ''
   dependencies: [Flan dataset, P3, NaturalInstructions-v2]
-  included: ''
+  description: ''
   excluded: ''
-  quality_control: ''
-  access: open
-  license: Apache 2.0
+  feedback: ''
+  included: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: Apache 2.0
+  modality: text
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: MusicLM
+  name: Flan Collection
   organization: Google
-  description: ''
-  created_date: 2023-01-26
-  url: https://arxiv.org/pdf/2301.11325.pdf
-  model_card: ''
-  modality: audio (music), text
+  prohibited_uses: ''
+  quality_control: ''
+  sample: []
+  size: 1836 tasks
+  type: dataset
+  url: https://arxiv.org/abs/2301.13688
+- access: closed
   analysis: ''
-  size: 1.4B parameters (430M semantic stage, 430M acoustic stage, 600M w2v-BERT)
+  created_date: 2023-01-26
   dependencies:
     - SoundStream
     - w2v-BERT
     - MuLan
     - MusicLM semantic model
     - MusicLM acoustic model
+  description: ''
+  feedback: ''
+  intended_uses: ''
+  license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
+    value: unknown
+  modality:
+    explanation: audio (music), text
+    value: audio, text; audio, text
+  model_card: ''
+  monitoring: ''
+  name: MusicLM
+  organization: Google
+  prohibited_uses: ''
+  quality_control: ''
+  size: 1.4B parameters (dense)
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: closed
+  training_time: ''
+  type: model
+  url: https://arxiv.org/pdf/2301.11325.pdf
+- access: closed
+  analysis: ''
+  created_date: 2023-01-26
+  dependencies: [Free Music Archive]
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: audio (music)
+    value: audio; audio
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
   name: SoundStream
   organization: Google
-  description: ''
-  created_date: 2023-01-26
-  url: https://arxiv.org/pdf/2301.11325.pdf
-  model_card: ''
-  modality: audio (music)
-  analysis: ''
+  prohibited_uses: ''
+  quality_control: ''
   size: unknown
-  dependencies: [Free Music Archive]
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: closed
+  training_time: ''
+  type: model
+  url: https://arxiv.org/pdf/2301.11325.pdf
+- access: closed
+  analysis: ''
+  created_date: 2023-01-26
+  dependencies: [Free Music Archive]
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: audio (music)
+    value: audio; audio
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
   name: w2v-BERT
   organization: Google
-  description: ''
-  created_date: 2023-01-26
-  url: https://arxiv.org/pdf/2301.11325.pdf
-  model_card: ''
-  modality: audio (music)
-  analysis: ''
-  size: 600M parameter (dense model)
-  dependencies: [Free Music Archive]
+  prohibited_uses: ''
+  quality_control: ''
+  size: 600M parameters (dense)
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: closed
+  training_time: ''
+  type: model
+  url: https://arxiv.org/pdf/2301.11325.pdf
+- access: closed
+  analysis: ''
+  created_date: 2022-08-26
+  dependencies: [AST, BERT, MuLan dataset]
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: audio (music), text
+    value: audio, text; audio, text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
   name: MuLan
   organization: Google
-  description: ''
-  created_date: 2022-08-26
-  url: https://arxiv.org/abs/2208.12415
-  model_card: ''
-  modality: audio (music), text
-  analysis: ''
+  prohibited_uses: ''
+  quality_control: ''
   size: Unknown
-  dependencies: [AST, BERT, MuLan dataset]
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: closed
+  training_time: ''
+  type: model
+  url: https://arxiv.org/abs/2208.12415
+- access: closed
+  analysis: ''
+  created_date: 2022-08-26
+  datasheet: ''
+  dependencies: []
+  description: ''
+  excluded: ''
+  feedback: ''
+  included: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: audio (music), text
+    value: audio, text
   monitoring: ''
-  feedback: ''
-
-- type: dataset
   name: MuLan dataset
   organization: Google
-  description: ''
-  created_date: 2022-08-26
-  url: https://arxiv.org/abs/2208.12415
-  datasheet: ''
-  modality: audio (music), text
-  size: 370K hours audio
+  prohibited_uses: ''
+  quality_control: ''
   sample: []
+  size: 370K hours audio
+  type: dataset
+  url: https://arxiv.org/abs/2208.12415
+- access: closed
   analysis: ''
+  created_date: 2023-01-26
+  datasheet: ''
   dependencies: []
-  included: ''
+  description: ''
   excluded: ''
-  quality_control: ''
-  access: closed
+  feedback: ''
+  included: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: audio (music)
+    value: audio
   monitoring: ''
-  feedback: ''
-
-- type: dataset
   name: MusicLM dataset
   organization: Google
-  description: ''
-  created_date: 2023-01-26
-  url: https://arxiv.org/pdf/2301.11325.pdf
-  datasheet: ''
-  modality: audio (music)
-  size: 280K hours audio
+  prohibited_uses: ''
+  quality_control: ''
   sample: []
+  size: 280K hours audio
+  type: dataset
+  url: https://arxiv.org/pdf/2301.11325.pdf
+- access: closed
   analysis: ''
-  dependencies: []
-  included: ''
-  excluded: ''
-  quality_control: ''
-  access: closed
+  created_date: 2023-01-26
+  dependencies: [MusicLM dataset]
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: Audio (music)
+    value: audio; audio
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
   name: MusicLM semantic model
   organization: Google
-  description: ''
-  created_date: 2023-01-26
+  prohibited_uses: ''
+  quality_control: ''
+  size: 430M parameters (dense)
+  training_emissions: ''
+  training_hardware: ''
+  training_time: ''
+  type: model
   url: https://arxiv.org/pdf/2301.11325.pdf
-  model_card: ''
-  modality: Audio (music)
+- access: closed
   analysis: ''
-  size: 430M parameters (dense model)
+  created_date: 2023-01-26
   dependencies: [MusicLM dataset]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: closed
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: Audio (music)
+    value: audio; audio
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
   name: MusicLM acoustic model
   organization: Google
-  description: ''
-  created_date: 2023-01-26
-  url: https://arxiv.org/pdf/2301.11325.pdf
-  model_card: ''
-  modality: Audio (music)
-  analysis: ''
-  size: 430M parameters (dense model)
-  dependencies: [MusicLM dataset]
+  prohibited_uses: ''
+  quality_control: ''
+  size: 430M parameters (dense)
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: closed
+  training_time: ''
+  type: model
+  url: https://arxiv.org/pdf/2301.11325.pdf
+- access: closed
+  analysis: ''
+  created_date: 2023-02-08
+  dependencies: [Noise2Music pseudolabel dataset]
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: audio (music), text
+    value: audio, text; audio, text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
   name: Noise2Music
   organization: Google
-  description: ''
-  created_date: 2023-02-08
-  url: https://google-research.github.io/noise2music/noise2music.pdf
-  model_card: ''
-  modality: audio (music), text
-  analysis: ''
+  prohibited_uses: ''
+  quality_control: ''
   size: unknown
-  dependencies: [Noise2Music pseudolabel dataset]
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: closed
+  training_time: ''
+  type: model
+  url: https://google-research.github.io/noise2music/noise2music.pdf
+- access: closed
+  analysis: ''
+  created_date: 2023-02-08
+  datasheet: ''
+  dependencies: [LaMDA]
+  description: ''
+  excluded: ''
+  feedback: ''
+  included: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality: text
   monitoring: ''
-  feedback: ''
-
-- type: dataset
   name: LaMDA-LF
   organization: Google
-  description: ''
-  created_date: 2023-02-08
-  url: https://google-research.github.io/noise2music/noise2music.pdf
-  datasheet: ''
-  modality: text
-  size: 150k songs
+  prohibited_uses: ''
+  quality_control: ''
   sample: []
+  size: 150k songs
+  type: dataset
+  url: https://google-research.github.io/noise2music/noise2music.pdf
+- access: closed
   analysis: ''
-  dependencies: [LaMDA]
-  included: ''
+  created_date: 2023-02-08
+  datasheet: ''
+  dependencies: [MusicCaps]
+  description: ''
   excluded: ''
-  quality_control: ''
-  access: closed
+  feedback: ''
+  included: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality: text
   monitoring: ''
-  feedback: ''
-
-- type: dataset
   name: Rater-LF
   organization: Google
-  description: ''
-  created_date: 2023-02-08
-  url: https://google-research.github.io/noise2music/noise2music.pdf
-  datasheet: ''
-  modality: text
-  size: 10k captions
+  prohibited_uses: ''
+  quality_control: ''
   sample: []
+  size: 10k captions
+  type: dataset
+  url: https://google-research.github.io/noise2music/noise2music.pdf
+- access: closed
   analysis: ''
+  created_date: 2023-02-08
+  datasheet: ''
   dependencies: [MusicCaps]
-  included: ''
+  description: ''
   excluded: ''
-  quality_control: ''
-  access: closed
+  feedback: ''
+  included: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: Text
+    value: text
   monitoring: ''
-  feedback: ''
-
-- type: dataset
   name: Rater-SF
   organization: Google
-  description: ''
-  created_date: 2023-02-08
-  url: https://google-research.github.io/noise2music/noise2music.pdf
-  datasheet: ''
-  modality: Text
-  size: 24k captions
+  prohibited_uses: ''
+  quality_control: ''
   sample: []
+  size: 24k captions
+  type: dataset
+  url: https://google-research.github.io/noise2music/noise2music.pdf
+- access: closed
   analysis: ''
-  dependencies: [MusicCaps]
-  included: ''
-  excluded: ''
-  quality_control: ''
-  access: closed
+  created_date: 2023-02-08
+  dependencies: [MuLan, MuLaMCap, LaMDA-LF, Rater-LF, Rater-SF]
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: audio (music), text
+    value: audio, text; audio, text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
   name: Noise2Music pseudolabeler
   organization: Google
-  description: ''
-  created_date: 2023-02-08
-  url: https://google-research.github.io/noise2music/noise2music.pdf
-  model_card: ''
-  modality: audio (music), text
-  analysis: ''
+  prohibited_uses: ''
+  quality_control: ''
   size: unknown
-  dependencies: [MuLan, MuLaMCap, LaMDA-LF, Rater-LF, Rater-SF]
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: closed
+  training_time: ''
+  type: model
+  url: https://google-research.github.io/noise2music/noise2music.pdf
+- access: closed
+  analysis: ''
+  created_date: 2023-02-08
+  datasheet: ''
+  dependencies: []
+  description: ''
+  excluded: ''
+  feedback: ''
+  included: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: audio (music)
+    value: audio
   monitoring: ''
-  feedback: ''
-
-- type: dataset
   name: Noise2Music audio dataset
   organization: Google
-  description: ''
-  created_date: 2023-02-08
-  url: https://google-research.github.io/noise2music/noise2music.pdf
-  datasheet: ''
-  modality: audio (music)
-  size: 340k hours audio
+  prohibited_uses: ''
+  quality_control: ''
   sample: []
+  size: 340k hours audio
+  type: dataset
+  url: https://google-research.github.io/noise2music/noise2music.pdf
+- access: closed
   analysis: ''
-  dependencies: []
-  included: ''
+  created_date: 2023-02-08
+  datasheet: ''
+  dependencies: [Noise2Music audio dataset, Noise2Music pseudolabeler]
+  description: ''
   excluded: ''
-  quality_control: ''
-  access: closed
+  feedback: ''
+  included: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: audio (music), text
+    value: audio, text
   monitoring: ''
-  feedback: ''
-
-- type: dataset
   name: Noise2Music pseudolabel dataset
   organization: Google
-  description: ''
-  created_date: 2023-02-08
-  url: https://google-research.github.io/noise2music/noise2music.pdf
-  datasheet: ''
-  modality: audio (music), text
-  size: 340k hours audio with pseudolabels
-  sample: []
-  analysis: ''
-  dependencies: [Noise2Music audio dataset, Noise2Music pseudolabeler]
-  included: ''
-  excluded: ''
+  prohibited_uses: ''
   quality_control: ''
-  access: closed
+  sample: []
+  size: 340k hours audio with pseudolabels
+  type: dataset
+  url: https://google-research.github.io/noise2music/noise2music.pdf
+- access: limited
+  adaptation: ''
+  created_date: 2022-08-25
+  dependencies: [LaMDA]
+  description: AI Test Kitchen provides a new way for people to learn about, experience,
+    and give feedback on emerging AI technology, like LaMDA.
+  failures: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
   monitoring: ''
-  feedback: ''
-
-- type: application
+  monthly_active_users: ''
   name: AI Test Kitchen
   organization: Google
-  description: AI Test Kitchen provides a new way for people to learn about, experience,
-    and give feedback on emerging AI technology, like LaMDA.
-  created_date: 2022-08-25
-  url: https://blog.google/technology/ai/join-us-in-the-ai-test-kitchen/
-  dependencies: [LaMDA]
-  adaptation: ''
   output_space: ''
+  prohibited_uses: ''
   quality_control: ''
-  access: limited
-  license:
-    value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
   terms_of_service: ''
+  type: application
+  url: https://blog.google/technology/ai/join-us-in-the-ai-test-kitchen/
+  user_distribution: ''
+- access: closed
+  adaptation: ''
+  created_date: 2023-02-06
+  dependencies: [LaMDA]
+  description: Conversational AI service, powered by LaMDA
+  failures: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
+    value: unknown
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
-  user_distribution: ''
-  failures: ''
-
-- type: application
   name: Bard
   organization: Google
-  description: Conversational AI service, powered by LaMDA
-  created_date: 2023-02-06
-  url: https://blog.google/technology/ai/bard-google-ai-search-updates/
-  dependencies: [LaMDA]
-  adaptation: ''
   output_space: ''
+  prohibited_uses: ''
   quality_control: ''
-  access: closed
-  license:
-    value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
   terms_of_service: ''
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
-  monthly_active_users: ''
+  type: application
+  url: https://blog.google/technology/ai/bard-google-ai-search-updates/
   user_distribution: ''
-  failures: ''
-
-- type: model
-  name: Minerva
-  organization: Google
-  description: ''
-  created_date: 2022-06-29
-  url: https://arxiv.org/abs/2206.14858
-  model_card: ''
-  modality: text
+- access: closed
   analysis: ''
-  size: 540B parameters (dense model)
+  created_date: 2022-06-29
   dependencies:
     - PaLM
     - arXiv
     - PaLM dataset
     - Minerva Math Web Pages dataset
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: closed
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: text
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: dataset
-  name: Minerva Math Web Pages dataset
+  name: Minerva
   organization: Google
-  description: ''
-  created_date: 2022-06-29
+  prohibited_uses: ''
+  quality_control: ''
+  size: 540B parameters (dense)
+  training_emissions: ''
+  training_hardware: ''
+  training_time: ''
+  type: model
   url: https://arxiv.org/abs/2206.14858
-  datasheet: ''
-  modality: Text
-  size: 17.5B tokens
-  sample: []
+- access: closed
   analysis: ''
+  created_date: 2022-06-29
+  datasheet: ''
   dependencies: []
-  included: ''
+  description: ''
   excluded: ''
-  quality_control: ''
-  access: closed
+  feedback: ''
+  included: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: Text
+    value: text
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: USM
+  name: Minerva Math Web Pages dataset
   organization: Google
+  prohibited_uses: ''
+  quality_control: ''
+  sample: []
+  size: 17.5B tokens
+  type: dataset
+  url: https://arxiv.org/abs/2206.14858
+- access: limited
+  analysis: ''
+  created_date: 2023-03-06
+  dependencies: [YT-NLU-U, Pub-U, Web-NTL, YT-SUP+, Pub-S]
   description: Universal Speech Model (USM) is a family of state-of-the-art speech
     models with 2B parameters trained on 12 million hours of speech and 28 billion
     sentences of text, spanning 300+ languages. USM, which is for use in YouTube
@@ -1717,269 +1601,271 @@
     Soga, Maninka, Xhosa, Akan, Lingala, Chichewa, Nkore, Nzema to name a few. Some
     of these languages are spoken by fewer than twenty million people, making it
     very hard to find the necessary training data.
-  created_date: 2023-03-06
-  url: https://arxiv.org/abs/2303.01037
-  model_card: ''
-  modality: speech, text (300+ languages)
-  analysis: ''
-  size: 2B parameters (dense model)
-  dependencies: [YT-NLU-U, Pub-U, Web-NTL, YT-SUP+, Pub-S]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: limited
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: speech, text (300+ languages)
+    value: audio, text; audio, text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: application
-  name: YouTube
+  name: USM
   organization: Google
-  description: YouTube is a global online video sharing and social media platform
+  prohibited_uses: ''
+  quality_control: ''
+  size: 2B parameters (dense)
+  training_emissions: ''
+  training_hardware: ''
+  training_time: ''
+  type: model
+  url: https://arxiv.org/abs/2303.01037
+- access: open
+  adaptation: ''
   created_date: 2005-02-14
-  url: https://www.youtube.com/
   dependencies: [USM]
-  adaptation: ''
+  description: YouTube is a global online video sharing and social media platform
+  failures: ''
+  feedback: ''
+  intended_uses: ''
+  license: ''
+  monitoring: ''
+  monthly_active_users: ''
+  name: YouTube
+  organization: Google
   output_space: ''
+  prohibited_uses: ''
   quality_control: ''
-  access: open
-  license: ''
   terms_of_service: ''
+  type: application
+  url: https://www.youtube.com/
+  user_distribution: ''
+- access: closed
+  analysis: ''
+  created_date: 2023-03-06
+  dependencies: [PaLM, ViT-22B]
+  description: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
+    value: unknown
+  modality:
+    explanation: text, image
+    value: image, text; image, text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-  monthly_active_users: ''
-  user_distribution: ''
-  failures: ''
-
-- type: model
   name: PaLM-E
   organization: Google
-  description: ''
-  created_date: 2023-03-06
-  url: https://arxiv.org/abs/2303.03378
-  model_card: ''
-  modality: text, image
-  analysis: ''
-  size: 562B parameters (dense model)
-  dependencies: [PaLM, ViT-22B]
+  prohibited_uses: ''
+  quality_control: ''
+  size: 562B parameters (dense)
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: closed
+  training_time: ''
+  type: model
+  url: https://arxiv.org/abs/2303.03378
+- access: closed
+  analysis: ''
+  created_date: 2023-02-10
+  dependencies: [JFT]
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: image
+    value: image; image
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
   name: ViT-22B
   organization: Google
-  description: ''
-  created_date: 2023-02-10
-  url: https://arxiv.org/abs/2302.05442
-  model_card: ''
-  modality: image
-  analysis: ''
-  size: 22B parameters (dense model)
-  dependencies: [JFT]
+  prohibited_uses: ''
+  quality_control: ''
+  size: 22B parameters (dense)
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: closed
+  training_time: ''
+  type: model
+  url: https://arxiv.org/abs/2302.05442
+- access: closed
+  analysis: ''
+  created_date: 2022-09-07
+  dependencies: [w2v-BERT, SoundStream]
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: audio, text
+    value: audio, text; audio, text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
   name: AudioLM
   organization: Google
-  description: ''
-  created_date: 2022-09-07
-  url: https://arxiv.org/abs/2209.03143
-  model_card: ''
-  modality: audio, text
-  analysis: ''
-  size: 1B parameters (dense model)
-  dependencies: [w2v-BERT, SoundStream]
+  prohibited_uses: ''
+  quality_control: ''
+  size: 1B parameters (dense)
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: closed
+  training_time: ''
+  type: model
+  url: https://arxiv.org/abs/2209.03143
+- access: closed
+  analysis: ''
+  created_date: 2022-09-14
+  dependencies: [mT5, ViT-e, WebLI]
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: text, image
+    value: image, text; image, text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
   name: PaLI
   organization: Google
-  description: ''
-  created_date: 2022-09-14
-  url: https://arxiv.org/abs/2209.06794
-  model_card: ''
-  modality: text, image
-  analysis: ''
-  size: 17B parameters (13B text, 4B images)
-  dependencies: [mT5, ViT-e, WebLI]
+  prohibited_uses: ''
+  quality_control: ''
+  size: 17B parameters (dense)
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: closed
+  training_time: ''
+  type: model
+  url: https://arxiv.org/abs/2209.06794
+- access: closed
+  analysis: ''
+  created_date: 2022-09-14
+  dependencies: [JFT]
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: image
+    value: image; image
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
   name: ViT-e
   organization: Google
-  description: ''
-  created_date: 2022-09-14
-  url: https://arxiv.org/abs/2209.06794
-  model_card: ''
-  modality: image
-  analysis: ''
-  size: 3.9B parameters (dense model)
-  dependencies: [JFT]
+  prohibited_uses: ''
+  quality_control: ''
+  size: 3.9B parameters (dense)
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: closed
+  training_time: ''
+  type: model
+  url: https://arxiv.org/abs/2209.06794
+- access: closed
+  analysis: ''
+  created_date: 2022-09-14
+  datasheet: ''
+  dependencies: []
+  description: ''
+  excluded: ''
+  feedback: ''
+  included: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: text, image
+    value: image, text
   monitoring: ''
-  feedback: ''
-
-- type: dataset
   name: WebLI
   organization: Google
-  description: ''
-  created_date: 2022-09-14
-  url: https://arxiv.org/abs/2209.06794
-  datasheet: ''
-  modality: text, image
-  size: 10B images, 12B alt-text
+  prohibited_uses: ''
+  quality_control: ''
   sample: []
+  size: 10B images, 12B alt-text
+  type: dataset
+  url: https://arxiv.org/abs/2209.06794
+- access: open
   analysis: ''
-  dependencies: []
-  included: ''
-  excluded: ''
-  quality_control: ''
-  access: closed
-  license:
-    value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
+  created_date: 2023-02-27
+  dependencies: [T5, CLIP, YT-Temporal-1B]
+  description: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: Apache 2.0
+  modality:
+    explanation: text, video
+    value: text, video; text, video
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
   name: Vid2Seq
   organization: Google
-  description: ''
-  created_date: 2023-02-27
-  url: https://arxiv.org/abs/2302.14115
-  model_card: ''
-  modality: text, video
-  analysis: ''
-  size: 500M parameters (200M text, 300M video)
-  dependencies: [T5, CLIP, YT-Temporal-1B]
+  prohibited_uses: ''
+  quality_control: ''
+  size: 500M parameters (dense)
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: open
-  license: Apache 2.0
+  training_time: ''
+  type: model
+  url: https://arxiv.org/abs/2302.14115
+- access: open
+  analysis: evaluated on DSTC11 Challenge Task, based on MultiWoz 2.1, with a focus
+    on dialog state tracking.
+  created_date: 2023-06-08
+  dependencies: [CTC blank-filtering, Speech2Text adapter]
+  description: Joint speech and language model using a Speech2Text adapter and using
+    a CTC-based blank-filtering.
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: Google
+  modality:
+    explanation: speech input with textual output
+    value: audio; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
   name: Google Joint SLM
   organization: Google
-  description: Joint speech and language model using a Speech2Text adapter and using
-    a CTC-based blank-filtering.
-  created_date: 2023-06-08
-  url: https://arxiv.org/pdf/2306.07944.pdf
-  model_card: ''
-  modality: speech input with textual output
-  analysis: evaluated on DSTC11 Challenge Task, based on MultiWoz 2.1, with a focus
-    on dialog state tracking.
+  prohibited_uses: ''
+  quality_control: ''
   size: ''
-  dependencies: [CTC blank-filtering, Speech2Text adapter]
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: open
-  license: Google
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
-
-- type: model
-  name: PaLM 2
-  organization: Google
+  training_time: ''
+  type: model
+  url: https://arxiv.org/pdf/2306.07944.pdf
+- access: open
+  analysis: Reports results on standard code benchmarks across a variety of programming
+    languages.
+  created_date: 2023-05-10
+  dependencies: [palm 2 dataset]
   description: PaLM2 is a new state-of-the-art language model that has better multilingual
     and reasoning capabilities and is more compute-efficient than its predecessor
     PaLM. PaLM 2 is a Transformer-based model trained using a mixture of objectives
     similar to UL2.
-  created_date: 2023-05-10
-  url: https://blog.google/technology/ai/google-palm-2-ai-large-language-model/
-  model_card: https://ai.google/static/documents/palm2techreport.pdf
-  modality: text (input and output)
-  analysis: Reports results on standard code benchmarks across a variety of programming
-    languages.
-  size: 3.6T parameters
-  dependencies: [palm 2 dataset]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: TPU v4 (number unspecified)
-  quality_control: Employed de-duplication, removal of sensitive-PII and filtering.
-    Added control tokens marking toxicity of text.
-  access: open
-  license: Google
+  feedback: Specific queries provided by annotators
   intended_uses: general use large language model that can be used for language,
     reasoning, and code tasks.
+  license: Google
+  modality:
+    explanation: text (input and output)
+    value: text; text
+  model_card: https://ai.google/static/documents/palm2techreport.pdf
+  monitoring: Google internal monitoring
+  name: PaLM 2
+  organization: Google
   prohibited_uses: becoming part of a general-purpose service or product or use
     within specific downstream applications without prior assessment
-  monitoring: Google internal monitoring
-  feedback: Specific queries provided by annotators
+  quality_control: Employed de-duplication, removal of sensitive-PII and filtering.
+    Added control tokens marking toxicity of text.
+  size: 3.6T parameters (dense)
+  training_emissions: ''
+  training_hardware: TPU v4 (number unspecified)
+  training_time: ''
+  type: model
+  url: https://blog.google/technology/ai/google-palm-2-ai-large-language-model/
diff --git a/assets/h2oai.yaml b/assets/h2oai.yaml
index 601e629d..53654100 100644
--- a/assets/h2oai.yaml
+++ b/assets/h2oai.yaml
@@ -1,23 +1,24 @@
 ---
-
-- type: model
-  name: h2oGPT
-  organization: H2O AI
-  description: Series of models fine-tuned on well-known LLMs using the h2oGPT repositories.
-  created_date: 2023-06-16
-  url: https://arxiv.org/pdf/2306.08161.pdf
-  model_card: https://huggingface.co/h2oai/h2ogpt-oasst1-512-20b
-  modality: text
+- access: open
   analysis: Evaluated on EleutherAI evaluation harness.
-  size: 20B parameters
+  created_date: 2023-06-16
   dependencies: [GPT-NeoX, H2O AI OpenAssistant, h2oGPT Repositories]
-  training_emissions: unknown
-  training_time: unknown
-  training_hardware: unspecified number of 48GB A100 NVIDIA GPUs
-  quality_control: ''
-  access: open
-  license: Apache 2.0
+  description: Series of models fine-tuned on well-known LLMs using the h2oGPT repositories.
+  feedback: https://huggingface.co/h2oai/h2ogpt-oasst1-512-20b/discussions
   intended_uses: ''
-  prohibited_uses: ''
+  license: Apache 2.0
+  modality:
+    explanation: text
+    value: text; text
+  model_card: https://huggingface.co/h2oai/h2ogpt-oasst1-512-20b
   monitoring: ''
-  feedback: https://huggingface.co/h2oai/h2ogpt-oasst1-512-20b/discussions
+  name: h2oGPT
+  organization: H2O AI
+  prohibited_uses: ''
+  quality_control: ''
+  size: 20B parameters (dense)
+  training_emissions: unknown
+  training_hardware: unspecified number of 48GB A100 NVIDIA GPUs
+  training_time: unknown
+  type: model
+  url: https://arxiv.org/pdf/2306.08161.pdf
diff --git a/assets/hubspot.yaml b/assets/hubspot.yaml
index 90298984..91876dd7 100644
--- a/assets/hubspot.yaml
+++ b/assets/hubspot.yaml
@@ -1,28 +1,26 @@
 ---
-
-- type: application
-  name: ChatGPT powered by OBO
-  organization: HubSpot
+- access: limited
+  adaptation: ''
+  created_date: 2023-01-31
+  dependencies: [ChatGPT API]
   description: Give your sales, marketing, and customer service teams one of the
     most powerful AI tools available - ChatGPT priority access, no timeout limits,
     company wide access managed through a single account, incorporate into your
     existing processes without leaving HubSpot
-  created_date: 2023-01-31
-  url: https://ecosystem.hubspot.com/marketplace/apps/sales/sales-enablement/the-obo-group-chatgpt-1398072
-  dependencies: [ChatGPT API]
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: limited
+  failures: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "We weren't able to locate a license.\n"
     value: unknown
-    explanation: >
-      We weren't able to locate a license.
-  terms_of_service: ''
-  intended_uses: ''
-  prohibited_uses: ''
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
+  name: ChatGPT powered by OBO
+  organization: HubSpot
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: ''
+  type: application
+  url: https://ecosystem.hubspot.com/marketplace/apps/sales/sales-enablement/the-obo-group-chatgpt-1398072
   user_distribution: ''
-  failures: ''
diff --git a/assets/huggingface.yaml b/assets/huggingface.yaml
index 84710565..23e07d5d 100644
--- a/assets/huggingface.yaml
+++ b/assets/huggingface.yaml
@@ -1,29 +1,30 @@
 ---
-
-- type: model
-  name: CodeParrot
-  organization: HuggingFace
-  description: CodeParrot is an autoregressive language model trained on code
+- access:
+    explanation: The license is provided in the [[HuggingFace repository]](https://huggingface.co/codeparrot/codeparrot)
+    value: open
+  analysis: none
   created_date:
-    value: 2021-12-06
     explanation: The date the model was announced
-  url: https://twitter.com/lvwerra/status/1467933794699259908
-  model_card: none
-  modality: code (Python)
-  analysis: none
-  size: 1B parameters (dense model)
+    value: 2021-12-06
   dependencies: []
-  training_emissions: unknown
-  training_time: unknown
-  training_hardware: 16 x A100 (40GB)
-  quality_control: none
-  access:
-    value: open
-    explanation: The license is provided in the [[HuggingFace repository]](https://huggingface.co/codeparrot/codeparrot)
+  description: CodeParrot is an autoregressive language model trained on code
+  feedback: none
+  intended_uses: none
   license:
-    value: none
     explanation: No license is explicitly provided for this model.
-  intended_uses: none
-  prohibited_uses: none
+    value: none
+  modality:
+    explanation: code (Python)
+    value: code; code
+  model_card: none
   monitoring: none
-  feedback: none
+  name: CodeParrot
+  organization: HuggingFace
+  prohibited_uses: none
+  quality_control: none
+  size: 1B parameters (dense)
+  training_emissions: unknown
+  training_hardware: 16 x A100 (40GB)
+  training_time: unknown
+  type: model
+  url: https://twitter.com/lvwerra/status/1467933794699259908
diff --git a/assets/inflection.yaml b/assets/inflection.yaml
index 5113a5f1..9283431b 100644
--- a/assets/inflection.yaml
+++ b/assets/inflection.yaml
@@ -1,49 +1,47 @@
 ---
-
-- type: model
-  name: Inflection-1
-  organization: Inflection AI
-  description: Inflection AI's in-house LLM, which powers Pi and will soon be available
-    via Inflection AI's conversational API.
-  created_date: 2023-06-22
-  url: https://inflection.ai/inflection-1
-  model_card: ''
-  modality: text
+- access: limited
   analysis: Evaluated on wide range of language benchmarks like MMLU 5-shot, GSM-8K,
     and HellaSwag 10-shot among others.
-  size:
-    value: unknown
-    explanation: Designed to be roughly around Chat-GPT's size (175B parameters).
+  created_date: 2023-06-22
   dependencies: []
+  description: Inflection AI's in-house LLM, which powers Pi and will soon be available
+    via Inflection AI's conversational API.
+  feedback: ''
+  intended_uses: ''
+  license: unknown
+  modality:
+    explanation: text
+    value: text; text
+  model_card: ''
+  monitoring: ''
+  name: Inflection-1
+  organization: Inflection AI
+  prohibited_uses: ''
+  quality_control: ''
+  size: unknown
   training_emissions: ''
-  training_time: ''
   training_hardware: unknown
-  quality_control: ''
-  access: limited
+  training_time: ''
+  type: model
+  url: https://inflection.ai/inflection-1
+- access: limited
+  adaptation: ''
+  created_date: 2023-05-02
+  dependencies: [Inflection-1]
+  description: Personal AI chatbot designed to be conversational and specialized
+    in emotional intelligence.
+  failures: ''
+  feedback: ''
+  intended_uses: to be used as a personal assistant chatbot for everyday activities
   license: unknown
-  intended_uses: ''
-  prohibited_uses: ''
   monitoring: ''
-  feedback: ''
-
-- type: application
+  monthly_active_users: ''
   name: Pi
   organization: Inflection AI
-  description: Personal AI chatbot designed to be conversational and specialized
-    in emotional intelligence.
-  created_date: 2023-05-02
-  url: https://inflection.ai/press
-  dependencies: [Inflection-1]
-  adaptation: ''
   output_space: natural language text responses
+  prohibited_uses: ''
   quality_control: ''
-  access: limited
-  license: unknown
   terms_of_service: ''
-  intended_uses: to be used as a personal assistant chatbot for everyday activities
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
-  monthly_active_users: ''
+  type: application
+  url: https://inflection.ai/press
   user_distribution: ''
-  failures: ''
diff --git a/assets/instacart.yaml b/assets/instacart.yaml
index b3a970ad..4dc27bf8 100644
--- a/assets/instacart.yaml
+++ b/assets/instacart.yaml
@@ -1,27 +1,26 @@
 ---
-
-- type: application
-  name: Ask Instacart
-  organization: Instacart
+- access: limited
+  adaptation: ''
+  created_date: 2023-03-01
+  dependencies: [ChatGPT API]
   description: Instacart is augmenting the Instacart app to enable customers to
     ask about food and get inspirational, shoppable answers. This uses ChatGPT alongside
     Instacart’s own AI and product data from their 75,000+ retail partner store
     locations to help customers discover ideas for open-ended shopping goals, such
     as “How do I make great fish tacos?” or “What’s a healthy lunch for my kids?”
     Instacart plans to launch “Ask Instacart” later this year.
-  created_date: 2023-03-01
-  url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis
-  dependencies: [ChatGPT API]
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: limited
-  license: ''
-  terms_of_service: ''
+  failures: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: ''
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
+  name: Ask Instacart
+  organization: Instacart
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: ''
+  type: application
+  url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis
   user_distribution: ''
-  failures: ''
diff --git a/assets/juni.yaml b/assets/juni.yaml
index 13d2757a..9e175922 100644
--- a/assets/juni.yaml
+++ b/assets/juni.yaml
@@ -1,22 +1,21 @@
 ---
-
-- type: application
-  name: Juni Tutor Bot
-  organization: Juni Learning
-  description: An online tutoring solution to help students achieve academic success.
+- access: limited
+  adaptation: ''
   created_date: unknown
-  url: https://junilearning.com/
   dependencies: [Anthropic API]
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: limited
-  license: unknown
-  terms_of_service: ''
+  description: An online tutoring solution to help students achieve academic success.
+  failures: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: unknown
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
+  name: Juni Tutor Bot
+  organization: Juni Learning
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: ''
+  type: application
+  url: https://junilearning.com/
   user_distribution: ''
-  failures: ''
diff --git a/assets/kakaobrain.yaml b/assets/kakaobrain.yaml
index 4c9546a8..f95f0b13 100644
--- a/assets/kakaobrain.yaml
+++ b/assets/kakaobrain.yaml
@@ -1,32 +1,25 @@
 ---
-
-- type: dataset
-  name: COYO-700M
-  organization: Kakao Brain
-  description: >
-    COYO-700M is a large-scale dataset that contains 747M image-text pairs as well
-    as many other meta-attributes to increase the usability to train various models.
+- access:
+    explanation: You can download the dataset from [[COYO Huggingface page]](https://huggingface.co/datasets/kakaobrain/coyo-700m).
+      For more information on downloading the image dataset, please refer to [[download/README.md]](https://github.com/kakaobrain/coyo-dataset/blob/main/download/README.md)
+      on Github.
+    value: open
+  analysis:
+    explanation: See [[Experiments]](https://github.com/kakaobrain/coyo-dataset#experiments)
+    value: "\"We empirically validated the quality of COYO dataset by re-implementing\
+      \ popular models such as ALIGN, unCLIP, and ViT. We trained these models on\
+      \ COYO-700M or its subsets from scratch, achieving competitive performance\
+      \ to the reported numbers or generated samples in the original papers.\"\n"
   created_date:
-    value: 2022-08-31
     explanation: As mentioned in the [[Google Scholar page]](https://scholar.google.com/citations?view_op=view_citation&hl=en&user=8WXfStMAAAAJ&citation_for_view=8WXfStMAAAAJ:-f6ydRqryjwC).
-  url: https://github.com/kakaobrain/coyo-dataset
+    value: 2022-08-31
   datasheet: none
-  modality: text (English) and image
-  size: 747M image-text pairs
-  sample: []
-  analysis:
-    value: >
-      "We empirically validated the quality of COYO dataset by re-implementing popular
-      models such as ALIGN, unCLIP, and ViT. We trained these models on COYO-700M
-      or its subsets from scratch, achieving competitive performance to the reported
-      numbers or generated samples in the original papers."
-    explanation: See [[Experiments]](https://github.com/kakaobrain/coyo-dataset#experiments)
   dependencies: [CommonCrawl]
-  included:
-    value: Included all image formats that Pillow library can decode. Collected
-      only English text using cld3.
-    explanation: See [[Data Filtering]](https://github.com/kakaobrain/coyo-dataset#data-filtering)
+  description: "COYO-700M is a large-scale dataset that contains 747M image-text\
+    \ pairs as well as many other meta-attributes to increase the usability to train\
+    \ various models.\n"
   excluded:
+    explanation: See [[Data Filtering]](https://github.com/kakaobrain/coyo-dataset#data-filtering)
     value: Removed images less than 5KB image size. Removed images with an aspect
       ratio greater than 3.0. Removed images with min(width, height) < 200. Removed
       images with a score of OpenNSFW2 or GantMan/NSFW higher than 0.5. Removed
@@ -36,34 +29,36 @@
       texts over 1000 in length. Removed texts appearing more than 10 times. Removed
       texts containing NSFW words. Removed duplicated samples based on (image_phash,
       text).
+  feedback: Feedback can be given by emailing at coyo at kakaobrain.com
+  included:
     explanation: See [[Data Filtering]](https://github.com/kakaobrain/coyo-dataset#data-filtering)
-  quality_control: ''
-  access:
-    value: open
-    explanation: You can download the dataset from [[COYO Huggingface page]](https://huggingface.co/datasets/kakaobrain/coyo-700m).
-      For more information on downloading the image dataset, please refer to [[download/README.md]](https://github.com/kakaobrain/coyo-dataset/blob/main/download/README.md)
-      on Github.
-  license: CC-BY-4.0
+    value: Included all image formats that Pillow library can decode. Collected
+      only English text using cld3.
   intended_uses:
-    value: >
-      "It is strongly recommended that this dataset be used only for research, keeping
-      this in mind when using the dataset, and Kakao Brain does not recommend using
-      this dataset as it is without special processing to clear inappropriate data
-      to create commercial products."
     explanation: See [[Disclaimer & Content Warning]](https://github.com/kakaobrain/coyo-dataset#disclaimer--content-warning)
+    value: "\"It is strongly recommended that this dataset be used only for research,\
+      \ keeping this in mind when using the dataset, and Kakao Brain does not recommend\
+      \ using this dataset as it is without special processing to clear inappropriate\
+      \ data to create commercial products.\"\n"
+  license: CC-BY-4.0
+  modality:
+    explanation: text (English) and image
+    value: image, text
+  monitoring: none
+  name: COYO-700M
+  organization: Kakao Brain
   prohibited_uses:
-    value: >
-      Authors note the following limitations: "Kakao Brain tried to construct a
-      "Safe"
-      dataset when building the COYO dataset. However, despite these efforts, this
-      large-scale dataset was not hand-picked by humans to avoid the risk due to
-      its
-      very large size (over 700M). Keep in mind that the unscreened nature of the
-      dataset means that the collected images can lead to strongly discomforting
-      and
-      disturbing content for humans. The COYO dataset may contain some inappropriate
-      data, and any problems resulting from such data are the full responsibility
-      of the user who used it."
     explanation: See [[Disclaimer & Content Warning]](https://github.com/kakaobrain/coyo-dataset#disclaimer--content-warning)
-  monitoring: none
-  feedback: Feedback can be given by emailing at coyo at kakaobrain.com
+    value: "Authors note the following limitations: \"Kakao Brain tried to construct\
+      \ a \"Safe\" dataset when building the COYO dataset. However, despite these\
+      \ efforts, this large-scale dataset was not hand-picked by humans to avoid\
+      \ the risk due to its very large size (over 700M). Keep in mind that the unscreened\
+      \ nature of the dataset means that the collected images can lead to strongly\
+      \ discomforting and disturbing content for humans. The COYO dataset may contain\
+      \ some inappropriate data, and any problems resulting from such data are the\
+      \ full responsibility of the user who used it.\"\n"
+  quality_control: ''
+  sample: []
+  size: 747M image-text pairs
+  type: dataset
+  url: https://github.com/kakaobrain/coyo-dataset
diff --git a/assets/khan.yaml b/assets/khan.yaml
index 75050a66..abdf0636 100644
--- a/assets/khan.yaml
+++ b/assets/khan.yaml
@@ -1,26 +1,24 @@
 ---
-
-- type: application
-  name: Khanmigo
-  organization: Khan Academy
-  description: An AI-powered assistant that functions as both a virtual tutor for
-    students and a classroom assistant for teachers.
+- access: limited
+  adaptation: ''
   created_date: 2023-03-14
-  url: https://www.khanacademy.org/khan-labs#khanmigo
   dependencies: [GPT-4 API]
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: limited
+  description: An AI-powered assistant that functions as both a virtual tutor for
+    students and a classroom assistant for teachers.
+  failures: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  terms_of_service: ''
-  intended_uses: ''
-  prohibited_uses: ''
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
+  name: Khanmigo
+  organization: Khan Academy
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: ''
+  type: application
+  url: https://www.khanacademy.org/khan-labs#khanmigo
   user_distribution: ''
-  failures: ''
diff --git a/assets/laion.yaml b/assets/laion.yaml
index 6a62325e..14972cc3 100644
--- a/assets/laion.yaml
+++ b/assets/laion.yaml
@@ -1,79 +1,69 @@
 ---
-
-- type: dataset
-  name: LAION-400M
-  # General
-  organization: LAION
-  description: LAION-400M is a dataset with CLIP-filtered 400 million image-text
-    pairs, their CLIP embeddings and kNN indices that allow efficient similarity
-    search. This dataset is entirely openly, freely accessible.
+- access:
+    explanation: The dataset index is available from [[dataset blog post]](https://laion.ai/blog/laion-400-open-dataset/)
+    value: open
+  analysis: ''
   created_date:
-    value: 2021-08-20
     explanation: The date the [[blog post]](https://laion.ai/blog/laion-400-open-dataset/)
       was released.
-  url: https://laion.ai/blog/laion-400-open-dataset/
+    value: 2021-08-20
   datasheet: https://laion.ai/blog/laion-400-open-dataset/
-  modality: text (English) and image
-  size: 400M image-text pairs
-  sample: []
-  analysis: ''
-  # Construction
   dependencies: [CLIP, CommonCrawl]
-  included: ''
+  description: LAION-400M is a dataset with CLIP-filtered 400 million image-text
+    pairs, their CLIP embeddings and kNN indices that allow efficient similarity
+    search. This dataset is entirely openly, freely accessible.
   excluded:
-    value: >
-      Authors apply the following filtering conditions on the WAT files downloaded
-      from Common Crawl: "All samples with less than 5 character alt-text length
-      or less than 5 KB image size are dropped. Duplicate removal is performed with
-      bloom filter based on URL and alt-text. We use CLIP to compute embeddings
-      of the image and alt-text. Then we compute the cosine similarity of both embeddings
-      and drop all samples with cosine similarity below 0.3. This threshold was
-      selected based on human inspections. We use the CLIP embeddings of images
-      and texts to filter out illegal contents."
     explanation: See [[Dataset and Methods]](https://arxiv.org/pdf/2111.02114.pdf#section.2)
-  quality_control:
-    value: The authors use  CLIP embeddings of images and texts to filter out illegal
-      contents. They also use CLIP to tag image-text pairs as NSFW. They note that
-      less than 1% of images were detected as NSFW, which can be filtered out by
-      an user with NSFW tag.
-    explanation: See [[Dataset and Methods]](https://arxiv.org/pdf/2111.02114.pdf#section.2)
-  # Downstream
-  access:
-    value: open
-    explanation: The dataset index is available from [[dataset blog post]](https://laion.ai/blog/laion-400-open-dataset/)
-  license:
-    value: CC BY 4.0
-    explanation: The license is listed on the [[dataset blog post]](https://laion.ai/blog/laion-400-open-dataset/)
+    value: "Authors apply the following filtering conditions on the WAT files downloaded\
+      \ from Common Crawl: \"All samples with less than 5 character alt-text length\
+      \ or less than 5 KB image size are dropped. Duplicate removal is performed\
+      \ with bloom filter based on URL and alt-text. We use CLIP to compute embeddings\
+      \ of the image and alt-text. Then we compute the cosine similarity of both\
+      \ embeddings and drop all samples with cosine similarity below 0.3. This threshold\
+      \ was selected based on human inspections. We use the CLIP embeddings of images\
+      \ and texts to filter out illegal contents.\"\n"
+  feedback: ''
+  included: ''
   intended_uses: The authors recommend using the dataset "for research purposes"
     and warn that "this large-scale dataset is non-curated. It was built for research
     purposes to enable testing model training on larger scale for broad researcher
     and other interested communities, and is not meant for any real-world production
     or application."
-  prohibited_uses: No uses are explicitly prohibited by the license. Users are warned
-    from using LAION-400M for any real-world production or application.
+  license:
+    explanation: The license is listed on the [[dataset blog post]](https://laion.ai/blog/laion-400-open-dataset/)
+    value: CC BY 4.0
+  modality:
+    explanation: text (English) and image
+    value: image, text
   monitoring: ''
-  feedback: ''
-
-- type: dataset
-  name: LAION-5B
-  # General
+  name: LAION-400M
   organization: LAION
-  description: LAION is a dataset of 5 billion image-text pairs from the Internet
+  prohibited_uses: No uses are explicitly prohibited by the license. Users are warned
+    from using LAION-400M for any real-world production or application.
+  quality_control:
+    explanation: See [[Dataset and Methods]](https://arxiv.org/pdf/2111.02114.pdf#section.2)
+    value: The authors use  CLIP embeddings of images and texts to filter out illegal
+      contents. They also use CLIP to tag image-text pairs as NSFW. They note that
+      less than 1% of images were detected as NSFW, which can be filtered out by
+      an user with NSFW tag.
+  sample: []
+  size: 400M image-text pairs
+  type: dataset
+  url: https://laion.ai/blog/laion-400-open-dataset/
+- access:
+    explanation: The dataset index is available from [[dataset blog post]](https://laion.ai/blog/laion-5b/)
+    value: open
+  analysis: ''
   created_date:
-    value: 2022-12-12
     explanation: The date the [[blog post]](https://laion.ai/blog/laion-5b/) was
       released. Note that the dataset was already compiled earlier, e.g. the Stable
       Diffusion model released earlier was trained on a subset of LAION-5B
-  url: https://laion.ai/blog/laion-5b/
+    value: 2022-12-12
   datasheet: https://laion.ai/blog/laion-5b/
-  modality: text and image
-  size: 5B image-text pairs
-  sample: []
-  analysis: ''
-  # Construction
   dependencies: [CLIP, mCLIP, CommonCrawl]
-  included: ''
+  description: LAION is a dataset of 5 billion image-text pairs from the Internet
   excluded:
+    explanation: See [[dataset blog post]](https://laion.ai/blog/laion-5b/)
     value: All samples with less than 5 characters alt-text length or less than
       5 KB image size are dropped. All images with the too big resolution, potentially
       DOS bombs, were dropped before attempting to process them. Duplicate removal
@@ -86,45 +76,43 @@
       were selected based on human inspection of the test results. We use the CLIP
       embeddings of images and texts to filter out to the possible extent the illegal
       content.
-    explanation: See [[dataset blog post]](https://laion.ai/blog/laion-5b/)
-  quality_control: ''
-  # Downstream
-  access:
-    value: open
-    explanation: The dataset index is available from [[dataset blog post]](https://laion.ai/blog/laion-5b/)
-  license:
-    value: CC BY 4.0
-    explanation: The license is listed on the [[dataset blog post]](https://laion.ai/blog/laion-5b/)
+  feedback: ''
+  included: ''
   intended_uses: The authors recommend using the dataset "for research purposes"
     and "do not recommend using it for creating ready-to-go industrial products,
     as the basic research about general properties and safety of such large-scale
     models, which we would like to encourage with this release, is still in progress"
-  prohibited_uses: No uses are explicitly prohibited by the license. Users are warned
-    from using LAION-5B for non-research purposes.
+  license:
+    explanation: The license is listed on the [[dataset blog post]](https://laion.ai/blog/laion-5b/)
+    value: CC BY 4.0
+  modality:
+    explanation: text and image
+    value: image, text
   monitoring: ''
-  feedback: ''
-
-- type: dataset
-  name: LAION-2B-en
-  # General
+  name: LAION-5B
   organization: LAION
-  description: LAION-2B-en is a subset of the LAION-5B dataset and contains 2.3
-    billion English image-text pairs.
+  prohibited_uses: No uses are explicitly prohibited by the license. Users are warned
+    from using LAION-5B for non-research purposes.
+  quality_control: ''
+  sample: []
+  size: 5B image-text pairs
+  type: dataset
+  url: https://laion.ai/blog/laion-5b/
+- access:
+    explanation: The dataset index is available from [[dataset blog post]](https://laion.ai/blog/laion-5b/)
+    value: open
+  analysis: ''
   created_date:
-    value: 2022-12-12
     explanation: The date the [[blog post]](https://laion.ai/blog/laion-5b/) was
       released. Note that the dataset was already compiled earlier, e.g. the Stable
       Diffusion model released earlier was trained on a subset of LAION-5B
-  url: https://arxiv.org/pdf/2210.08402.pdf
+    value: 2022-12-12
   datasheet: https://laion.ai/blog/laion-5b/
-  modality: text (English) and image
-  size: 2.32B image-text pairs
-  sample: []
-  analysis: ''
-  # Construction
   dependencies: [CLIP, LAION-5B]
-  included: ''
+  description: LAION-2B-en is a subset of the LAION-5B dataset and contains 2.3
+    billion English image-text pairs.
   excluded:
+    explanation: See [[dataset blog post]](https://laion.ai/blog/laion-5b/)
     value: All samples with less than 5 characters alt-text length or less than
       5 KB image size are dropped. All images with the too big resolution, potentially
       DOS bombs, were dropped before attempting to process them. Duplicate removal
@@ -137,44 +125,50 @@
       were selected based on human inspection of the test results. We use the CLIP
       embeddings of images and texts to filter out to the possible extent the illegal
       content.
-    explanation: See [[dataset blog post]](https://laion.ai/blog/laion-5b/)
-  quality_control: ''
-  # Downstream
-  access:
-    value: open
-    explanation: The dataset index is available from [[dataset blog post]](https://laion.ai/blog/laion-5b/)
-  license:
-    value: CC BY 4.0
-    explanation: The license is listed on the [[dataset blog post]](https://laion.ai/blog/laion-5b/)
+  feedback: ''
+  included: ''
   intended_uses: The authors recommend using the dataset "for research purposes"
     and "do not recommend using it for creating ready-to-go industrial products,
     as the basic research about general properties and safety of such large-scale
     models, which we would like to encourage with this release, is still in progress"
-  prohibited_uses: No uses are explicitly prohibited by the license. Users are warned
-    from using LAION-2B-en for non-research purposes.
+  license:
+    explanation: The license is listed on the [[dataset blog post]](https://laion.ai/blog/laion-5b/)
+    value: CC BY 4.0
+  modality:
+    explanation: text (English) and image
+    value: image, text
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: OpenFlamingo
+  name: LAION-2B-en
   organization: LAION
+  prohibited_uses: No uses are explicitly prohibited by the license. Users are warned
+    from using LAION-2B-en for non-research purposes.
+  quality_control: ''
+  sample: []
+  size: 2.32B image-text pairs
+  type: dataset
+  url: https://arxiv.org/pdf/2210.08402.pdf
+- access: open
+  analysis: Evaluated on COCO captioning and VQAv2 vision-language tasks.
+  created_date: 2023-03-28
+  dependencies: [LLaMA, CLIP]
   description: An open-source reproduction of DeepMind's Flamingo model. At its
     core, OpenFlamingo is a framework that enables training and evaluation of large
     multimodal models (LMMs).
-  created_date: 2023-03-28
-  url: https://laion.ai/blog/open-flamingo/
+  feedback: ''
+  intended_uses: academic research purposes
+  license: MIT
+  modality:
+    explanation: image and text input with textual output
+    value: image, text; text
   model_card: https://github.com/mlfoundations/open_flamingo/blob/main/MODEL_CARD.md
-  modality: image and text input with textual output
-  analysis: Evaluated on COCO captioning and VQAv2 vision-language tasks.
-  size: 9B parameters
-  dependencies: [LLaMA, CLIP]
+  monitoring: ''
+  name: OpenFlamingo
+  organization: LAION
+  prohibited_uses: commercial use
+  quality_control: ''
+  size: 9B parameters (dense)
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: open
-  license: MIT
-  intended_uses: academic research purposes
-  prohibited_uses: commercial use
-  monitoring: ''
-  feedback: ''
+  training_time: ''
+  type: model
+  url: https://laion.ai/blog/open-flamingo/
diff --git a/assets/latitude.yaml b/assets/latitude.yaml
index f6d5e39c..2e5bc564 100644
--- a/assets/latitude.yaml
+++ b/assets/latitude.yaml
@@ -1,43 +1,33 @@
 ---
-
-- type: application
-  name: AI Dungeon
-  # General
-  organization: Latitude
-  description: >
-    AI Dungeon is a single-player text adventure game that uses AI to generate
-    content.
+- access:
+    explanation: "The game is available to public upon sign up.\n"
+    value: limited
+  adaptation: ''
   created_date:
+    explanation: "Release date of the game on iOS and Android platforms [[Wikipedia]](https://en.wikipedia.org/wiki/AI_Dungeon).\n"
     value: 2019-12-17
-    explanation: >
-      Release date of the game on iOS and Android platforms
-      [[Wikipedia]](https://en.wikipedia.org/wiki/AI_Dungeon).
-  url: https://play.aidungeon.io
-  # Construction
   dependencies: [OpenAI API]
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  # Downstream
-  access:
-    value: limited
-    explanation: >
-      The game is available to public upon sign up.
-  terms_of_service: https://play.aidungeon.io/main/termsOfService
+  description: "AI Dungeon is a single-player text adventure game that uses AI to\
+    \ generate content.\n"
+  failures: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "\"Subject to your compliance with these Terms, the Company hereby\
+      \ grants to you, a personal, worldwide, royalty-free, non-assignable, non-sublicensable,\
+      \ non-transferrable, and non-exclusive license to use the software provided\
+      \ to you as part of the Services (and to download a single copy of the App\
+      \ onto the equipment or device specified by us)\" - excerpt from the Terms\
+      \ of Service document.\n"
     value: custom
-    explanation: >
-      "Subject to your compliance with these Terms, the Company hereby grants
-      to you, a personal, worldwide, royalty-free, non-assignable,
-      non-sublicensable, non-transferrable, and non-exclusive license to use
-      the software provided to you as part of the Services (and to download a
-      single copy of the App onto the equipment or device specified by us)" -
-      excerpt from the Terms of Service document.
-  intended_uses: ''
-  prohibited_uses: ''
   monitoring: ''
-  feedback: ''
-  # Deployment
   monthly_active_users: ''
+  name: AI Dungeon
+  organization: Latitude
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: https://play.aidungeon.io/main/termsOfService
+  type: application
+  url: https://play.aidungeon.io
   user_distribution: ''
-  failures: ''
diff --git a/assets/lehigh.yaml b/assets/lehigh.yaml
index 9e8df418..c153f345 100644
--- a/assets/lehigh.yaml
+++ b/assets/lehigh.yaml
@@ -1,30 +1,31 @@
 ---
-
-- type: model
-  name: BiomedGPT
-  organization: Lehigh University
-  description: BiomedGPT leverages self-supervision on large and diverse datasets
-    to accept multi-modal inputs and perform a range of downstream tasks.
-  created_date: 2023-05-26
-  url: https://arxiv.org/pdf/2305.17100.pdf
-  model_card: ''
-  modality: image and text input, text output
+- access: open
   analysis: outperforms majority of preceding state-of-the-art models over 15 unique
     biomedical modalities.
-  size: 472M parameters
+  created_date: 2023-05-26
   dependencies:
     - GPT-style autoregressive decoder
     - BiomedGPT biomedical datasets
-  training_emissions: unknown
-  training_time: unknown
-  training_hardware: 10 NVIDIA A5000 GPUs
-  quality_control: No specific quality control is mentioned in model training, though
-    details on data processing and how the model was trained are provided in the
-    paper.
-  access: open
-  license: Apache 2.0
+  description: BiomedGPT leverages self-supervision on large and diverse datasets
+    to accept multi-modal inputs and perform a range of downstream tasks.
+  feedback: ''
   intended_uses: furthering research in developing unified and generalist models
     for biomedicine.
-  prohibited_uses: ''
+  license: Apache 2.0
+  modality:
+    explanation: image and text input, text output
+    value: image, text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
+  name: BiomedGPT
+  organization: Lehigh University
+  prohibited_uses: ''
+  quality_control: No specific quality control is mentioned in model training, though
+    details on data processing and how the model was trained are provided in the
+    paper.
+  size: 472M parameters (dense)
+  training_emissions: unknown
+  training_hardware: 10 NVIDIA A5000 GPUs
+  training_time: unknown
+  type: model
+  url: https://arxiv.org/pdf/2305.17100.pdf
diff --git a/assets/linkedin.yaml b/assets/linkedin.yaml
index 0d3d2ab0..a92551e9 100644
--- a/assets/linkedin.yaml
+++ b/assets/linkedin.yaml
@@ -1,27 +1,26 @@
 ---
-
-- type: application
-  name: LinkedIn
-  organization: LinkedIn
+- access: open
+  adaptation: ''
+  created_date: unknown
+  dependencies: [Azure Cognitive Services for Vision]
   description: More than 40 percent of LinkedIn's feed posts include at least one
     image. We want every member to have equal access to opportunity and are committed
     to ensuring that we make images accessible to our members who are blind or who
     have low vision so they can be a part of the online conversation. With Azure
     Cognitive Service for Vision, we can provide auto-captioning to edit and support
     alt. text descriptions.
-  created_date: unknown
-  url: https://www.linkedin.com/
-  dependencies: [Azure Cognitive Services for Vision]
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: open
-  license: unknown
-  terms_of_service: ''
+  failures: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: unknown
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
+  name: LinkedIn
+  organization: LinkedIn
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: ''
+  type: application
+  url: https://www.linkedin.com/
   user_distribution: ''
-  failures: ''
diff --git a/assets/lmsys.yaml b/assets/lmsys.yaml
index 6f07781e..3058f7a0 100644
--- a/assets/lmsys.yaml
+++ b/assets/lmsys.yaml
@@ -1,24 +1,25 @@
 ---
-
-- type: model
-  name: Vicuna
-  organization: LMSYS
+- access: open
+  analysis: Evaluated against similar LLMs using GPT-4 as a judge.
+  created_date: 2023-03-30
+  dependencies: [LLaMA, ShareGPT conversations data]
   description: An open-source chatbot trained by fine-tuning LLaMA on user-shared
     conversations collected from ShareGPT.
-  created_date: 2023-03-30
-  url: https://lmsys.org/blog/2023-03-30-vicuna/
+  feedback: https://huggingface.co/datasets/bigcode/the-stack/discussions
+  intended_uses: research on LLMs and chatbots
+  license: Apache 2.0
+  modality:
+    explanation: natural language text
+    value: text; text
   model_card: https://huggingface.co/lmsys/vicuna-13b-delta-v0
-  modality: natural language text
-  analysis: Evaluated against similar LLMs using GPT-4 as a judge.
-  size: 13B parameters
-  dependencies: [LLaMA, ShareGPT conversations data]
+  monitoring: ''
+  name: Vicuna
+  organization: LMSYS
+  prohibited_uses: ''
+  quality_control: ''
+  size: 13B parameters (dense)
   training_emissions: ''
-  training_time: 1 day
   training_hardware: 8 A100 GPUs
-  quality_control: ''
-  access: open
-  license: Apache 2.0
-  intended_uses: research on LLMs and chatbots
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: https://huggingface.co/datasets/bigcode/the-stack/discussions
+  training_time: 1 day
+  type: model
+  url: https://lmsys.org/blog/2023-03-30-vicuna/
diff --git a/assets/meta.yaml b/assets/meta.yaml
index c44d7474..ad4d99eb 100644
--- a/assets/meta.yaml
+++ b/assets/meta.yaml
@@ -1,51 +1,40 @@
 ---
-
-- type: model
-  name: ESM-2
-  organization: Meta
-  description: ESM-2 is a series of protein language models trained on protein sequences
+- access:
+    explanation: Models are available for download from [[GitHub repository]](https://github.com/facebookresearch/esm#available-models)
+    value: open
+  analysis: ''
   created_date:
-    value: 2022-10-31
     explanation: The date the [[model paper]](https://www.biorxiv.org/content/10.1101/2022.07.20.500902v2.full.pdf+html)
       was released
-  url: https://www.biorxiv.org/content/10.1101/2022.07.20.500902v2.full.pdf+html
-  model_card: none
-  modality: protein sequence
-  size: 15B parameters (dense model)
-  analysis: ''
+    value: 2022-10-31
   dependencies: [UniRef50, UniRef90]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access:
-    value: open
-    explanation: Models are available for download from [[GitHub repository]](https://github.com/facebookresearch/esm#available-models)
+  description: ESM-2 is a series of protein language models trained on protein sequences
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The license is provided in the [[Github repository]](https://github.com/facebookresearch/esm#available-models)\n"
     value: MIT
-    explanation: >
-      The license is provided in the [[Github repository]](https://github.com/facebookresearch/esm#available-models)
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: protein sequence
+    value: text; text
+  model_card: none
   monitoring: ''
-  feedback: ''
-
-- type: dataset
-  name: PMD
-  # General
+  name: ESM-2
   organization: Meta
-  description: PMD (Public Multimodal Datasets) is a collection of image-text datasets
-    introduced in the FLAVA work.
+  prohibited_uses: ''
+  quality_control: ''
+  size: 15B parameters (dense)
+  training_emissions: ''
+  training_hardware: ''
+  training_time: ''
+  type: model
+  url: https://www.biorxiv.org/content/10.1101/2022.07.20.500902v2.full.pdf+html
+- access: closed
+  analysis: none
   created_date:
-    value: 2021-12-08
     explanation: The date the model paper was released
-  url: https://arxiv.org/abs/2112.04482
+    value: 2021-12-08
   datasheet: none
-  modality: text, image
-  size: 70M
-  sample: []
-  analysis: none
-  # Construction
   dependencies:
     - COCO
     - YFCC100M
@@ -55,329 +44,286 @@
     - Wikipedia
     - Conceptual Captions
     - Red Caps
-  license:
-    value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  included: none
+  description: PMD (Public Multimodal Datasets) is a collection of image-text datasets
+    introduced in the FLAVA work.
   excluded: YFCC100M is filtered for non-English captions and very short (< 2 word)
     captions.
-  quality_control: Beyond filtering mentioned in excluded, nothing further is done.
-  # Downstream
-  access: closed
+  feedback: none
+  included: none
   intended_uses: unknown
-  prohibited_uses: unknown
+  license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
+    value: unknown
+  modality:
+    explanation: text, image
+    value: image, text
   monitoring: none
-  feedback: none
-
-- type: model
-  name: FLAVA
+  name: PMD
   organization: Meta
-  description: FLAVA is a multimodal model composed of an image encoder, text encoder,
-    and multimodal encoder.
-  created_date:
-    value: 2021-12-08
-    explanation: The date the model paper was released
+  prohibited_uses: unknown
+  quality_control: Beyond filtering mentioned in excluded, nothing further is done.
+  sample: []
+  size: 70M
+  type: dataset
   url: https://arxiv.org/abs/2112.04482
-  model_card: https://huggingface.co/facebook/flava-full
-  modality: Text (English) and Image
-  size:
-    value: 306M
-    explanation: >
-      110M (Language encoder) + 86M (Vision encoder) + 110M (mul encoder)
+- access:
+    explanation: "Model checkpoints are available for download from the [[HuggingFace\
+      \ repository]](https://huggingface.co/facebook/flava-full)\n"
+    value: open
   analysis: FLAVA is benchmarked on a range of vision-only (e.g. CIFAR-10), language-only
     (e.g. GLUE), and multimodal (e.g. Hateful Memes) standard evaluations.
+  created_date:
+    explanation: The date the model paper was released
+    value: 2021-12-08
   dependencies: [PMD]
-  training_emissions: unknown
-  training_time: unknown
-  training_hardware: unknown
-  quality_control: FLAVA introduces a variety of new modeling techniques, specifically
-    with an interest in improved text-image alignment through contrastive objectives.
-  access:
-    value: open
-    explanation: >
-      Model checkpoints are available for download from the [[HuggingFace
-      repository]](https://huggingface.co/facebook/flava-full)
+  description: FLAVA is a multimodal model composed of an image encoder, text encoder,
+    and multimodal encoder.
+  feedback: https://huggingface.co/facebook/flava-full/discussions
+  intended_uses: "Per the [[HuggingFace repository]](https://huggingface.co/facebook/flava-full),\
+    \ \"The model is intended to serve as a reproducible research artifact for research\
+    \ communities in the light of models whose exact reproduction details are never\
+    \ released such as CLIP and SimVLM.\"\n"
   license:
+    explanation: "The license is provided in the [[HuggingFace repository]](https://huggingface.co/facebook/flava-full)\n"
     value: BSD-3-Clause
-    explanation: >
-      The license is provided in the [[HuggingFace repository]](https://huggingface.co/facebook/flava-full)
-  intended_uses: >
-    Per the [[HuggingFace repository]](https://huggingface.co/facebook/flava-full),
-    "The model is intended to serve as a reproducible research artifact for research
-    communities in the light of models whose exact reproduction details are never
-    released such as CLIP and SimVLM."
-  prohibited_uses: >
-    Per the [[HuggingFace repository]](https://huggingface.co/facebook/flava-full),
-    "Any deployed use case of the model - whether commercial or not" - is currently
-    out of scope.
+  modality:
+    explanation: Text (English) and Image
+    value: image, text; image, text
+  model_card: https://huggingface.co/facebook/flava-full
   monitoring: none
-  feedback: https://huggingface.co/facebook/flava-full/discussions
-
-- type: dataset
-  name: The Galactica Corpus
-  # General
+  name: FLAVA
   organization: Meta
-  description: The Galactica Corpus is a collection of scientific datasets introduced
-    in the Galactica work.
+  prohibited_uses: "Per the [[HuggingFace repository]](https://huggingface.co/facebook/flava-full),\
+    \ \"Any deployed use case of the model - whether commercial or not\" - is currently\
+    \ out of scope.\n"
+  quality_control: FLAVA introduces a variety of new modeling techniques, specifically
+    with an interest in improved text-image alignment through contrastive objectives.
+  size: 306M parameters (dense)
+  training_emissions: unknown
+  training_hardware: unknown
+  training_time: unknown
+  type: model
+  url: https://arxiv.org/abs/2112.04482
+- access: closed
+  analysis: none
   created_date:
+    explanation: "The date the Galactica paper was released\n"
     value: 2022-11-15
-    explanation: >
-      The date the Galactica paper was released
-  url: https://galactica.org/static/paper.pdf
   datasheet: none
-  modality: text
-  size: 106B tokens
-  sample: []
-  analysis: none
-  # Construction
   dependencies: [CommonCrawl, Wikipedia, arXiv]
-  license:
-    value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
+  description: The Galactica Corpus is a collection of scientific datasets introduced
+    in the Galactica work.
+  excluded: ''
+  feedback: none
   included: Prompts and reasoning data is explicitly included to improve model capabilities
     derived from this data.
-  excluded: ''
-  quality_control: ''
-  # Downstream
-  access: closed
   intended_uses: unknown
-  prohibited_uses: unknown
+  license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
+    value: unknown
+  modality: text
   monitoring: none
-  feedback: none
-
-- type: model
-  name: Galactica
+  name: The Galactica Corpus
   organization: Meta
-  description: Galactica is a family of autoregressive language models.
-  created_date:
-    value: 2022-11-15
-    explanation: >
-      The date the Galactica paper was released
+  prohibited_uses: unknown
+  quality_control: ''
+  sample: []
+  size: 106B tokens
+  type: dataset
   url: https://galactica.org/static/paper.pdf
-  model_card: https://huggingface.co/facebook/galactica-6.7b
-  modality: Text (English), Code, Math, Chemistry, Biology
+- access:
+    explanation: Model checkpoints freely available at https://github.com/paperswithcode/galai
+    value: open
   analysis: ''
-  size: 120B parameters (dense model)
+  created_date:
+    explanation: "The date the Galactica paper was released\n"
+    value: 2022-11-15
   dependencies: [The Galactica Corpus]
+  description: Galactica is a family of autoregressive language models.
+  feedback: ''
+  intended_uses: ''
+  license:
+    explanation: https://github.com/paperswithcode/galai/blob/main/LICENSE-MODEL.md
+    value: CC BY-NC 4.0
+  modality:
+    explanation: Text (English), Code, Math, Chemistry, Biology
+    value: code, text; code, text
+  model_card: https://huggingface.co/facebook/galactica-6.7b
+  monitoring: ''
+  name: Galactica
+  organization: Meta
+  prohibited_uses: ''
+  quality_control: ''
+  size: 120B parameters (dense)
   training_emissions: unknown
-  training_time: unknown
   training_hardware: Meta AI Cluster. Trained on 1024 80GB A100 GPUs (128 8xA100
     80GB nodes)
-  quality_control: ''
-  access:
+  training_time: unknown
+  type: model
+  url: https://galactica.org/static/paper.pdf
+- access:
+    explanation: Model weights are available via the [[HuggingFace repository]](https://huggingface.co/facebook/incoder-6B)
     value: open
-    explanation: Model checkpoints freely available at https://github.com/paperswithcode/galai
+  analysis: none
+  created_date:
+    explanation: The date the model paper was released
+    value: 2022-04-12
+  dependencies: []
+  description: InCoder is a language model trained on code with a causal masking
+    objective
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: The license is provided in the [[HuggingFace repository]](https://huggingface.co/facebook/incoder-6B?text=My+name+is+Lewis+and+I+like+to)
     value: CC BY-NC 4.0
-    explanation: https://github.com/paperswithcode/galai/blob/main/LICENSE-MODEL.md
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: text (English) and code
+    value: code, text; code, text
+  model_card: none
   monitoring: ''
-  feedback: ''
-
-- type: model
   name: InCoder
   organization: Meta, CMU, TTI-Chicago, UC Berkeley, University of Washington
-  description: InCoder is a language model trained on code with a causal masking
-    objective
-  created_date:
-    value: 2022-04-12
-    explanation: The date the model paper was released
-  url: https://arxiv.org/abs/2204.05999
-  model_card: none
-  modality: text (English) and code
-  analysis: none
-  size: 6B parameters (dense model)
-  dependencies: []
+  prohibited_uses: ''
+  quality_control: unknown
+  size: 6B parameters (dense)
   training_emissions: Unknown
-  training_time: 24 days, according to [[the paper]](https://arxiv.org/pdf/2204.05999.pdf)
   training_hardware: 248 V100 GPUs, according to [[the paper]](https://arxiv.org/pdf/2204.05999.pdf)
-  quality_control: unknown
-  access:
-    value: open
-    explanation: Model weights are available via the [[HuggingFace repository]](https://huggingface.co/facebook/incoder-6B)
-  license:
-    value: CC BY-NC 4.0
-    explanation: The license is provided in the [[HuggingFace repository]](https://huggingface.co/facebook/incoder-6B?text=My+name+is+Lewis+and+I+like+to)
+  training_time: 24 days, according to [[the paper]](https://arxiv.org/pdf/2204.05999.pdf)
+  type: model
+  url: https://arxiv.org/abs/2204.05999
+- access:
+    explanation: The 175B model requires manual approval from Meta to access. Other
+      models are available through HuggingFace.
+    value: limited
+  analysis: ''
+  created_date:
+    explanation: "The date the OPT paper was submitted to Arxiv\n"
+    value: 2022-05-01
+  dependencies: [RoBERTa dataset, The Pile, PushShift.io Reddit]
+  description: OPT is a family of autoregressive language models.
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license:
+    explanation: "All released with the [[OPT-175B License]](https://github.com/facebookresearch/metaseq/blob/main/projects/OPT/MODEL_LICENSE.md),\
+      \ except 66B (TBD) and 17B (requires manual approval)\n"
+    value: OPT-175B License
+  modality:
+    explanation: text (English)
+    value: text; text
+  model_card: https://arxiv.org/pdf/2205.01068.pdf
   monitoring: ''
-  feedback: ''
-
-- type: model
   name: OPT
   organization: Meta
-  description: OPT is a family of autoregressive language models.
-  created_date:
-    value: 2022-05-01
-    explanation: >
-      The date the OPT paper was submitted to Arxiv
-  url: https://arxiv.org/abs/2205.01068
-  model_card: https://arxiv.org/pdf/2205.01068.pdf
-  modality: text (English)
-  analysis: ''
-  size: 175B parameters (dense model)
-  dependencies: [RoBERTa dataset, The Pile, PushShift.io Reddit]
+  prohibited_uses: ''
+  quality_control: ''
+  size: 175B parameters (dense)
   training_emissions:
+    explanation: "Estimate by authors for the OPT-175B model only. Not including\
+      \ ablations and baselines.\n"
     value: 75 tCO2e
-    explanation: >
-      Estimate by authors for the OPT-175B model only. Not including ablations and
-      baselines.
-  training_time: ''
   training_hardware: Meta AI cluster. Trained on 992 80GB A100 GPUs
-  quality_control: ''
-  access:
+  training_time: ''
+  type: model
+  url: https://arxiv.org/abs/2205.01068
+- access:
+    explanation: "The datasets involved are public, but the full dataset is not\
+      \ directly available, nor are filtering scripts.\n"
     value: limited
-    explanation: The 175B model requires manual approval from Meta to access. Other
-      models are available through HuggingFace.
-  license:
-    value: OPT-175B License
-    explanation: >
-      All released with the
-      [[OPT-175B License]](https://github.com/facebookresearch/metaseq/blob/main/projects/OPT/MODEL_LICENSE.md),
-      except 66B (TBD) and 17B (requires manual approval)
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
-
-- type: dataset
-  name: Make-A-Video dataset
-  # General
-  organization: Meta
-  description: >
-    The Make-A-Video dataset is the dataset used to train Make-A-Video, which includes
-    both image-text and video-only datasets with specific and significant filtering.
+  analysis: ''
   created_date:
+    explanation: "The date that Make-A-Video was posted to arXiv [[arXiv]] (https://arxiv.org/abs/2209.14792).\n"
     value: 2022-09-29
-    explanation: >
-      The date that Make-A-Video was posted to arXiv
-      [[arXiv]]
-      (https://arxiv.org/abs/2209.14792).
-  url: https://arxiv.org/pdf/2209.14792.pdf
   datasheet: none
-  modality: video, image-text
-  size: 20M video clips, 2.3B image-text pairs
-  sample: []
-  analysis: ''
-  # Construction
   dependencies: [LAION-5B, WebVid-10M, HD-VILA-100M]
-  license:
+  description: "The Make-A-Video dataset is the dataset used to train Make-A-Video,\
+    \ which includes both image-text and video-only datasets with specific and significant\
+    \ filtering.\n"
+  excluded: "The LAION-5B dataset is filtered to 2.3B by removing NSFW images using\
+    \ [https://github.com/GantMan/nsfw](https://github.com/GantMan/nsfw), toxic\
+    \ words in text, and images with watermark probability > 0.5. The HD-VILA-100M\
+    \ is randomly subsampled to 10M video clips.\n"
+  feedback:
+    explanation: "No feedback mechanism is mentioned by the authors.\n"
     value: none
-    explanation: >
-      No license was found, though the underlying datasets are public and have
-      licenses.
   included:
+    explanation: "Data from the three underlying datasets is filtered, but nothing\
+      \ is included beyond this.\n"
     value: none
-    explanation: >
-      Data from the three underlying datasets is filtered, but nothing is included
-      beyond this.
-  excluded: >
-    The LAION-5B dataset is filtered to 2.3B by removing NSFW images using [https://github.com/GantMan/nsfw](https://github.com/GantMan/nsfw),
-    toxic words in text, and images with watermark probability > 0.5. The HD-VILA-100M
-    is randomly subsampled to 10M video clips.
-  quality_control: >
-    The authors exclude NSFW, toxic, and likely watermarked data from LAION-5B.
-  # Downstream
-  access:
-    value: limited
-    explanation: >
-      The datasets involved are public, but the full dataset is not directly available,
-      nor are filtering scripts.
   intended_uses: unknown
-  prohibited_uses: unknown
+  license:
+    explanation: "No license was found, though the underlying datasets are public\
+      \ and have licenses.\n"
+    value: none
+  modality:
+    explanation: video, image-text
+    value: image, text, video
   monitoring:
+    explanation: "There is no information on how Meta is internally monitoring the\
+      \ use of the dataset.\n"
     value: unknown
-    explanation: >
-      There is no information on how Meta is internally monitoring
-      the use of the dataset.
-  feedback:
-    value: none
-    explanation: >
-      No feedback mechanism is mentioned by the authors.
-
-- type: model
-  name: Make-A-Video
-  # General
+  name: Make-A-Video dataset
   organization: Meta
-  description: >
-    Make-A-Video is a model for Text-to-Video Generation without Text-Video Data.
+  prohibited_uses: unknown
+  quality_control: "The authors exclude NSFW, toxic, and likely watermarked data\
+    \ from LAION-5B.\n"
+  sample: []
+  size: 20M video clips, 2.3B image-text pairs
+  type: dataset
+  url: https://arxiv.org/pdf/2209.14792.pdf
+- access:
+    explanation: "The model has not been released; a form existed to potentially\
+      \ acquire access but is now closed as of 2022-12-07 [[Access Form]](https://docs.google.com/forms/u/0/d/e/1FAIpQLSfMjC57wcXWUDV0UbS2Tn6VhjLEiCXaHvWZuWgWRa-Zx8-Few/closedform).\n"
+    value: closed
+  analysis: "Model performance was evaluated using automated (Frechet Video Distance;\
+    \ Frechet Inception Distance) and human evaluation on two datasets (UCF-101,\
+    \ MSR-VTT) in the zero-shot setting.\n"
   created_date:
+    explanation: "The date that Make-A-Video was posted to arXiv [[arXiv]] (https://arxiv.org/abs/2209.14792).\n"
     value: 2022-09-29
-    explanation: >
-      The date that Make-A-Video was posted to arXiv
-      [[arXiv]]
-      (https://arxiv.org/abs/2209.14792).
-  url: https://arxiv.org/pdf/2209.14792.pdf
-  model_card: none
-  modality: text, video
-  size:
-    value: unknown
-    explanation: >
-      The authors do not state the model size in the paper.
-  analysis: >
-    Model performance was evaluated using automated (Frechet Video Distance; Frechet
-    Inception Distance) and human evaluation on two datasets (UCF-101, MSR-VTT)
-    in the zero-shot setting.
-  # Construction
   dependencies: [Make-A-Video dataset]
-  training_emissions:
+  description: "Make-A-Video is a model for Text-to-Video Generation without Text-Video\
+    \ Data.\n"
+  feedback:
+    explanation: "Authors do not mention or provide a feedback mechanism.\n"
+    value: none
+  intended_uses:
+    explanation: "Authors do not report the intended uses.\n"
     value: unknown
-    explanation: >
-      Authors do not report the training emissions.
-  training_time:
+  license:
+    explanation: No license was found.
+    value: none
+  modality:
+    explanation: text, video
+    value: text, video; text, video
+  model_card: none
+  monitoring:
+    explanation: "Authors do not report the monitoring process for Make-A-Video\
+      \ internally at Meta.\n"
     value: unknown
-    explanation: >
-      Authors do not report the training time.
-  training_hardware:
+  name: Make-A-Video
+  organization: Meta
+  prohibited_uses:
+    explanation: "Authors do not report the prohibited uses.\n"
     value: unknown
-    explanation: >
-      Authors do not report the training hardware or provider.
   quality_control:
+    explanation: "Authors do not report specific quality control steps taken in\
+      \ modeling, though filtering is done in producing the Make-A-Video dataset.\n"
     value: none
-    explanation: >
-      Authors do not report specific quality control steps taken in modeling, though
-      filtering is done in producing the Make-A-Video dataset.
-  # Downstream
-  access:
-    value: closed
-    explanation: >
-      The model has not been released; a form existed to potentially acquire access
-      but is now closed as of 2022-12-07
-      [[Access Form]](https://docs.google.com/forms/u/0/d/e/1FAIpQLSfMjC57wcXWUDV0UbS2Tn6VhjLEiCXaHvWZuWgWRa-Zx8-Few/closedform).
-  license:
-    value: none
-    explanation: No license was found.
-  intended_uses:
+  size: unknown
+  training_emissions:
+    explanation: "Authors do not report the training emissions.\n"
     value: unknown
-    explanation: >
-      Authors do not report the intended uses.
-  prohibited_uses:
+  training_hardware:
+    explanation: "Authors do not report the training hardware or provider.\n"
     value: unknown
-    explanation: >
-      Authors do not report the prohibited uses.
-  monitoring:
+  training_time:
+    explanation: "Authors do not report the training time.\n"
     value: unknown
-    explanation: >
-      Authors do not report the monitoring process for Make-A-Video internally at
-      Meta.
-  feedback:
-    value: none
-    explanation: >
-      Authors do not mention or provide a feedback mechanism.
-
-- type: model
-  name: LLaMA
-  organization: Meta
-  description: ''
-  created_date: 2023-02-24
-  url: https://arxiv.org/abs/2302.13971
-  model_card: ''
-  modality: Text
+  type: model
+  url: https://arxiv.org/pdf/2209.14792.pdf
+- access: open
   analysis: ''
-  size: 65B parameters (dense model)
+  created_date: 2023-02-24
   dependencies:
     - CommonCrawl
     - C4
@@ -386,306 +332,311 @@
     - BooksCorpus
     - arXiv
     - StackExchange
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: open
-  license: LLaMa License (model weights), GPLv3 (code)
+  description: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: LLaMa License (model weights), GPLv3 (code)
+  modality:
+    explanation: Text
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: LLaMA 2
+  name: LLaMA
   organization: Meta
-  description: LLaMA 2 is an updated version of LLaMA trained on a new mix of publicly
-    available data.
-  created_date: 2023-07-18
-  url: https://ai.meta.com/resources/models-and-libraries/llama/
-  model_card: Can be found at appendix of paper at https://ai.meta.com/research/publications/llama-2-open-foundation-and-fine-tuned-chat-models/
-  modality: text
+  prohibited_uses: ''
+  quality_control: ''
+  size: 65B parameters (dense)
+  training_emissions: ''
+  training_hardware: ''
+  training_time: ''
+  type: model
+  url: https://arxiv.org/abs/2302.13971
+- access: open
   analysis: Evaluated on standard academic benchmarks and internal Meta libraries.
-  size: 70B parameters (dense model)
+  created_date: 2023-07-18
   dependencies: []
-  training_emissions: 539 tCO2eq
-  training_time: ''
-  training_hardware: NVIDIA A100-80GB GPUs (TDP of 350-400W)
-  quality_control: ''
-  access: open
-  license:
-    value: custom
-    explanation: The license can be found at https://ai.meta.com/resources/models-and-libraries/llama-downloads/
+  description: LLaMA 2 is an updated version of LLaMA trained on a new mix of publicly
+    available data.
+  feedback: ''
   intended_uses: LLaMA 2 is intended for commercial and research use in English.
     Tuned models are intended for assistant-like chat, whereas pretrained models
     can be adapted for a variety of natural language generation tasks.
+  license:
+    explanation: The license can be found at https://ai.meta.com/resources/models-and-libraries/llama-downloads/
+    value: custom
+  modality:
+    explanation: text
+    value: text; text
+  model_card: Can be found at appendix of paper at https://ai.meta.com/research/publications/llama-2-open-foundation-and-fine-tuned-chat-models/
+  monitoring: ''
+  name: LLaMA 2
+  organization: Meta
   prohibited_uses: Use in any manner that violates applicable laws or regulations
     (including trade compliance laws). Use in languages other than English. Use
     in any other way that is prohibited by the Acceptable Use Policy and Licensing
     Agreement for LLaMA 2.
-  monitoring: ''
+  quality_control: ''
+  size: 70B parameters (dense)
+  training_emissions: 539 tCO2eq
+  training_hardware: NVIDIA A100-80GB GPUs (TDP of 350-400W)
+  training_time: ''
+  type: model
+  url: https://ai.meta.com/resources/models-and-libraries/llama/
+- access: open
+  analysis: ''
+  created_date: 2022-12-22
+  dependencies: [OPT, OPT-IML Bench]
+  description: ''
   feedback: ''
-
-- type: model
+  intended_uses: ''
+  license: OPT-IML 175B License
+  modality:
+    explanation: text
+    value: text; text
+  model_card: ''
+  monitoring: ''
   name: OPT-IML
   organization: Meta
-  description: ''
-  created_date: 2022-12-22
-  url: https://arxiv.org/abs/2212.12017
-  model_card: ''
-  modality: text
-  analysis: ''
+  prohibited_uses: ''
+  quality_control: ''
   size: 175B parameters (dense)
-  dependencies: [OPT, OPT-IML Bench]
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: open
-  license: OPT-IML 175B License
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
-
-- type: dataset
-  name: SA-1B
-  # General
-  organization: Meta
-  description: >
-    SA-1B (Segment Anything 1 Billion) is a dataset designed for training general-purpose
-    object segmentation models from open world images. It consists of 11M diverse,
-    high-resolution, privacy protecting images and 1.1B high-quality segmentation
-    masks.
+  training_time: ''
+  type: model
+  url: https://arxiv.org/abs/2212.12017
+- access:
+    explanation: "The full dataset can be downloaded at [[SA-1B Download]](https://ai.facebook.com/datasets/segment-anything-downloads/).\
+      \ A 50k image preview of the full dataset is available [[here]](https://segment-anything.com/dataset/index.html).\n"
+    value: open
+  analysis: ''
   created_date:
-    value: 2023-04-05
     explanation: The date the [[Meta blog post]](https://ai.facebook.com/blog/segment-anything-foundation-model-image-segmentation/)
       was released.
-  url: https://ai.facebook.com/datasets/segment-anything/
+    value: 2023-04-05
   datasheet:
-    value: https://arxiv.org/pdf/2304.02643.pdf#page=25
     explanation: Datasheet can be found in the Appendix section of the Segment Anything
       paper.
-  modality: image
-  size: 11M images, 1.1B mask annotations
-  sample: []
-  analysis: ''
-  # Construction
+    value: https://arxiv.org/pdf/2304.02643.pdf#page=25
   dependencies: []
-  license:
-    value: SA-1B Dataset Research License
-    explanation: SA-1B is released under a favorable license agreement for certain
-      research uses and with protections for researchers. See [[SA-1B Dataset Research
-      License]](https://ai.facebook.com/datasets/segment-anything-downloads/).
+  description: "SA-1B (Segment Anything 1 Billion) is a dataset designed for training\
+    \ general-purpose object segmentation models from open world images. It consists\
+    \ of 11M diverse, high-resolution, privacy protecting images and 1.1B high-quality\
+    \ segmentation masks.\n"
+  excluded:
+    explanation: See [[Datasheet]](https://arxiv.org/pdf/2304.02643.pdf#page=25)
+    value: "\"We withheld ~2k randomly selected images for testing purposes.\" \
+      \ \"Each image is accompanied by a short caption that describes the content\
+      \ and place of the photo in a free form text. Per our agreement with the photo\
+      \ provider we are not allowed to release these captions.\"\n"
+  feedback: Feedback can be given via the feedback form on their website [segment-anything.com](https://segment-anything.com/)
+    or by emailing at segment-anything at meta.com.
   included:
-    value: >
-      SA-1B consists of 11M diverse, high-resolution (averaging 1500×2250 pixels),
-      and privacy protecting images collected and licensed from a third party photo
-      company. The images are photos taken from a camera, i.e. not artwork.
-      The images vary in subject matter. Common themes of the images include: locations,
-      objects, scenes.
-      The dataset includes 1.1B high-quality segmentation masks collected with the
-      Segment Anything Data Engine.
-      SA-1B only includes automatically generated masks (99.1%), as the authors
-      conclude after experiments that the automatic masks are high quality and effective
-      for training models. The masks range from large scale objects such as buildings
-      to fine grained details such as door handles.
-      Masks are provided in the COCO run-length encoding (RLE) annotation format.
     explanation: According to section [[Segment Anything Dataset]](https://arxiv.org/pdf/2304.02643.pdf#section.5)
       of the paper and [[SA-1B website]](https://ai.facebook.com/datasets/segment-anything/).
-  excluded:
-    value: >
-      "We withheld ~2k randomly selected images for testing purposes."  "Each image
-      is accompanied by a short caption that describes the content and place of
-      the photo in a free form text. Per our agreement with the photo provider we
-      are not allowed to release these captions."
-    explanation: See [[Datasheet]](https://arxiv.org/pdf/2304.02643.pdf#page=25)
-  quality_control:
-    value: |
-      - Dataset quality:
-        Due to potential accessibility and storage challenges, the original high-resolution images (averaging 3300×4950 pixels) were downsampled to an average resolution of 1500×2250 pixels. Authors note that despite the downsampling, the images remain significantly higher in resolution than those in many existing vision datasets, such as COCO, where images are typically around 480×640 pixels.
-        The images were processed to blur faces and license plates to protect the identities of those in the image.
-        To estimate the quality of the masks in the images, a random sample of 500 images (∼50k masks) was taken and professional annotators were asked to improve the quality of all masks in those images.
-      - Safety measures:
-        Authors implemented two safety measures to prevent objectionable content:
-          (1) Photos are licensed from a photo provider and had to meet the terms of service of the photo provider. Authors requested that all objectionable content be filtered from the images they licensed.
-          (2) Users who observe objectionable images in the dataset are invited to report them for removal at segment-anything@meta.com.
-        Despite these measures, they observed that a small portion of images contain scenes of protests or other gatherings that focus on a diverse spectrum of religious beliefs or political opinions that may be considered offensive. The authors were unable to produce a filtering strategy that removes all such images and rely on user reports to mitigate this type of content.
-    explanation: According to sections [[Segment Anything Dataset]](https://arxiv.org/pdf/2304.02643.pdf#section.5)
-      and [[Datasheet]](https://arxiv.org/pdf/2304.02643.pdf#page=25) of the paper.
-  # Downstream
-  access:
-    value: open
-    explanation: >
-      The full dataset can be downloaded at [[SA-1B Download]](https://ai.facebook.com/datasets/segment-anything-downloads/).
-      A 50k image preview of the full dataset is available [[here]](https://segment-anything.com/dataset/index.html).
+    value: "SA-1B consists of 11M diverse, high-resolution (averaging 1500×2250\
+      \ pixels), and privacy protecting images collected and licensed from a third\
+      \ party photo company. The images are photos taken from a camera, i.e. not\
+      \ artwork. The images vary in subject matter. Common themes of the images\
+      \ include: locations, objects, scenes. The dataset includes 1.1B high-quality\
+      \ segmentation masks collected with the Segment Anything Data Engine. SA-1B\
+      \ only includes automatically generated masks (99.1%), as the authors conclude\
+      \ after experiments that the automatic masks are high quality and effective\
+      \ for training models. The masks range from large scale objects such as buildings\
+      \ to fine grained details such as door handles. Masks are provided in the\
+      \ COCO run-length encoding (RLE) annotation format.\n"
   intended_uses:
+    explanation: See [[SA-1B website]](https://ai.facebook.com/datasets/segment-anything/)
     value: SA-1B is intended to be used for research purposes only. It allows access
       to a privacy protecting and copyright friendly large-scale image dataset.
       Researchers can use it to train and evaluate generic object segmentation models.
-    explanation: See [[SA-1B website]](https://ai.facebook.com/datasets/segment-anything/)
-  prohibited_uses:
-    value: >
-      Authors note the following limitations of the dataset:
-        The masks are generated by a segmentation model, so there may be errors
-      or inconsistencies in the masks.
-        While no two images are the same, there are instances of images of the same
-      subject taken close together in time.
-        The dataset contains scenes of protests, or other gatherings that may suggest
-      religious beliefs, political opinions or union memberships that may be offensive.
-
-    explanation: See [[Datasheet]](https://arxiv.org/pdf/2304.02643.pdf#page=25)
+  license:
+    explanation: SA-1B is released under a favorable license agreement for certain
+      research uses and with protections for researchers. See [[SA-1B Dataset Research
+      License]](https://ai.facebook.com/datasets/segment-anything-downloads/).
+    value: SA-1B Dataset Research License
+  modality: image
   monitoring:
-    value: >
-      The dataset will be hosted at https://ai.facebook.com/datasets/segment-anything
-      and maintained by Meta AI.
-      "If a user observes objectionable image(s) in the dataset, we invite them
-      to report the image(s) at segment-anything at meta.com for removal"
-      "To aid reproducibility of research using SA-1B, the only updates (to the
-      dataset) will be to remove reported images."
-      "We encourage users to gather further annotations for SA-1B. Any users who
-      generate annotations will be liable for hosting and distributing their annotations."
     explanation: See [[Datasheet]](https://arxiv.org/pdf/2304.02643.pdf#page=25)
-  feedback: Feedback can be given via the feedback form on their website [segment-anything.com](https://segment-anything.com/)
-    or by emailing at segment-anything at meta.com.
-
-- type: model
-  name: SAM
-  # General
+    value: "The dataset will be hosted at https://ai.facebook.com/datasets/segment-anything\
+      \ and maintained by Meta AI. \"If a user observes objectionable image(s) in\
+      \ the dataset, we invite them to report the image(s) at segment-anything at\
+      \ meta.com for removal\" \"To aid reproducibility of research using SA-1B,\
+      \ the only updates (to the dataset) will be to remove reported images.\" \"\
+      We encourage users to gather further annotations for SA-1B. Any users who\
+      \ generate annotations will be liable for hosting and distributing their annotations.\"\
+      \n"
+  name: SA-1B
   organization: Meta
+  prohibited_uses:
+    explanation: See [[Datasheet]](https://arxiv.org/pdf/2304.02643.pdf#page=25)
+    value: "Authors note the following limitations of the dataset:\n  The masks\
+      \ are generated by a segmentation model, so there may be errors\nor inconsistencies\
+      \ in the masks.\n  While no two images are the same, there are instances of\
+      \ images of the same\nsubject taken close together in time.\n  The dataset\
+      \ contains scenes of protests, or other gatherings that may suggest\nreligious\
+      \ beliefs, political opinions or union memberships that may be offensive.\n"
+  quality_control:
+    explanation: According to sections [[Segment Anything Dataset]](https://arxiv.org/pdf/2304.02643.pdf#section.5)
+      and [[Datasheet]](https://arxiv.org/pdf/2304.02643.pdf#page=25) of the paper.
+    value: "- Dataset quality:\n  Due to potential accessibility and storage challenges,\
+      \ the original high-resolution images (averaging 3300×4950 pixels) were downsampled\
+      \ to an average resolution of 1500×2250 pixels. Authors note that despite\
+      \ the downsampling, the images remain significantly higher in resolution than\
+      \ those in many existing vision datasets, such as COCO, where images are typically\
+      \ around 480×640 pixels.\n  The images were processed to blur faces and license\
+      \ plates to protect the identities of those in the image.\n  To estimate the\
+      \ quality of the masks in the images, a random sample of 500 images (∼50k\
+      \ masks) was taken and professional annotators were asked to improve the quality\
+      \ of all masks in those images.\n- Safety measures:\n  Authors implemented\
+      \ two safety measures to prevent objectionable content:\n    (1) Photos are\
+      \ licensed from a photo provider and had to meet the terms of service of the\
+      \ photo provider. Authors requested that all objectionable content be filtered\
+      \ from the images they licensed.\n    (2) Users who observe objectionable\
+      \ images in the dataset are invited to report them for removal at segment-anything@meta.com.\n\
+      \  Despite these measures, they observed that a small portion of images contain\
+      \ scenes of protests or other gatherings that focus on a diverse spectrum\
+      \ of religious beliefs or political opinions that may be considered offensive.\
+      \ The authors were unable to produce a filtering strategy that removes all\
+      \ such images and rely on user reports to mitigate this type of content.\n"
+  sample: []
+  size: 11M images, 1.1B mask annotations
+  type: dataset
+  url: https://ai.facebook.com/datasets/segment-anything/
+- access:
+    explanation: "Inference code and model checkpoints are available on the model's\
+      \ [[GitHub repository]](https://github.com/facebookresearch/segment-anything).\
+      \ Its training dataset SA-1B can be used for research purposes and is available\
+      \ for download [here](https://ai.facebook.com/datasets/segment-anything-downloads/).\n"
+    value: open
+  analysis:
+    explanation: See [[Zero-Shot Transfer Experiments]](https://arxiv.org/pdf/2304.02643.pdf#section.7)
+      for more details.
+    value: "\"We extensively evaluate SAM. First, using a diverse new suite of 23\
+      \ segmentation datasets, we find that SAM produces high-quality masks from\
+      \ a single foreground point, often only slightly below that of the manually\
+      \ annotated ground truth. Second, we find consistently strong quantitative\
+      \ and qualitative results on a variety of downstream tasks under a zero-shot\
+      \ transfer protocol using prompt engineering, including edge detection, object\
+      \ proposal generation, instance segmentation, and a preliminary exploration\
+      \ of text-to-mask prediction.\"\n"
+  created_date:
+    explanation: The date the [[Meta blog post]](https://ai.facebook.com/blog/segment-anything-foundation-model-image-segmentation/)
+      was released.
+    value: 2023-04-05
+  dependencies: [SA-1B]
   description: SAM (Segment Anything Model) is a foundation model for image segmentation.
     The model is designed and trained to be promptable, and supports flexible prompts
     (point, box, mask and free-form text) to compute masks in real-time to allow
     interactive use.
-  created_date:
-    value: 2023-04-05
-    explanation: The date the [[Meta blog post]](https://ai.facebook.com/blog/segment-anything-foundation-model-image-segmentation/)
-      was released.
-  url: https://arxiv.org/pdf/2304.02643.pdf
+  feedback: Feedback can be given via the feedback form on their website [segment-anything.com](https://segment-anything.com/)
+    or by emailing at segment-anything at meta.com.
+  intended_uses:
+    explanation: See [[Model card]](https://arxiv.org/pdf/2304.02643.pdf#page=28)
+    value: "\"SAM is intended to be used for any prompt-based segmentation task.\
+      \ We explored its use in segmenting objects from a point, edge detection,\
+      \ segmenting all objects, and segmenting detected objects. We explored how\
+      \ SAM can integrate with other vision models to segment objects from text.\"\
+      \n"
+  license:
+    explanation: See [[LICENSE]](https://github.com/facebookresearch/segment-anything/blob/main/LICENSE)
+    value: Apache 2.0
+  modality:
+    explanation: image, text
+    value: image, text; image, text
   model_card:
-    value: https://arxiv.org/pdf/2304.02643.pdf#page=28
     explanation: Model card can be found in the Appendix section of the paper.
-  modality: image, text
-  analysis:
-    value: >
-      "We extensively evaluate SAM.
-      First, using a diverse new suite of 23 segmentation datasets, we find that
-      SAM produces high-quality masks from a single foreground point, often only
-      slightly below that of the manually annotated ground truth.
-      Second, we find consistently strong quantitative and qualitative results on
-      a variety of downstream tasks under a zero-shot transfer protocol using prompt
-      engineering, including edge detection, object proposal generation, instance
-      segmentation, and a preliminary exploration of text-to-mask prediction."
-    explanation: See [[Zero-Shot Transfer Experiments]](https://arxiv.org/pdf/2304.02643.pdf#section.7)
+    value: https://arxiv.org/pdf/2304.02643.pdf#page=28
+  monitoring: ''
+  name: SAM
+  organization: Meta
+  prohibited_uses:
+    explanation: See [[Discussion]](https://arxiv.org/pdf/2304.02643.pdf#section.8)
+    value: "For out-of-scope use cases see terms of use in [[LICENSE]](https://github.com/facebookresearch/segment-anything/blob/main/LICENSE).\
+      \ Authors also discuss the following limitations of the model: \"While SAM\
+      \ performs well in general, it is not perfect. It can miss fine structures,\
+      \ hallucinates small disconnected components at times, and does not produce\
+      \ boundaries as crisply as more computationally intensive methods that “zoom-in”,\
+      \ e.g. [18]. In general, we expect dedicated interactive segmentation methods\
+      \ to outperform SAM when many points are provided, e.g. [67]. Unlike these\
+      \ methods, SAM is designed for generality and breadth of use rather than high\
+      \ IoU interactive segmentation. Moreover, SAM can process prompts in real-time,\
+      \ but nevertheless SAM's overall performance is not real-time when using a\
+      \ heavy image encoder. Our foray into the text-to-mask task is exploratory\
+      \ and not entirely robust, although we believe it can be improved with more\
+      \ effort. While SAM can perform many tasks, it is unclear how to design simple\
+      \ prompts that implement semantic and panoptic segmentation. Finally, there\
+      \ are domain-specific tools, such as [7], that we expect to outperform SAM\
+      \ in their respective domains.\"\n"
+  quality_control:
+    explanation: See [[Segment Anything RAI Analysis]](https://arxiv.org/pdf/2304.02643.pdf#section.6)
       for more details.
+    value: "\"We perform a Responsible AI (RAI) analysis of our work by investigating\
+      \ potential fairness concerns and biases when using SA-1B and SAM. We focus\
+      \ on the geographic and income distribution of SA-1B and fairness of SAM across\
+      \ protected attributes of people.\"\n"
   size: unknown
-  # Construction
-  dependencies: [SA-1B]
   training_emissions:
-    value: 2.8 metric tons of carbon dioxide
-    explanation: See [[Model card]](https://arxiv.org/pdf/2304.02643.pdf#page=28)
-  training_time:
-    value: 68 hours
     explanation: See [[Model card]](https://arxiv.org/pdf/2304.02643.pdf#page=28)
+    value: 2.8 metric tons of carbon dioxide
   training_hardware:
-    value: 256 A100 GPUs
     explanation: See [[Model card]](https://arxiv.org/pdf/2304.02643.pdf#page=28)
-  quality_control:
-    value: >
-      "We perform a Responsible AI (RAI) analysis of our work by investigating potential
-      fairness concerns and biases when using SA-1B and SAM. We focus on the geographic
-      and income distribution of SA-1B and fairness of SAM across protected attributes
-      of people."
-    explanation: See [[Segment Anything RAI Analysis]](https://arxiv.org/pdf/2304.02643.pdf#section.6)
-      for more details.
-  # Downstream
-  access:
-    value: open
-    explanation: >
-      Inference code and model checkpoints are available on the model's
-      [[GitHub repository]](https://github.com/facebookresearch/segment-anything).
-      Its training dataset SA-1B can be used for research purposes and is available
-      for download [here](https://ai.facebook.com/datasets/segment-anything-downloads/).
-  license:
-    value: Apache 2.0
-    explanation: See [[LICENSE]](https://github.com/facebookresearch/segment-anything/blob/main/LICENSE)
-  intended_uses:
-    value: >
-      "SAM is intended to be used for any prompt-based segmentation task. We explored
-      its use in segmenting objects from a point, edge detection, segmenting all
-      objects, and segmenting detected objects. We explored how SAM can integrate
-      with other vision models to segment objects from text."
+    value: 256 A100 GPUs
+  training_time:
     explanation: See [[Model card]](https://arxiv.org/pdf/2304.02643.pdf#page=28)
-  prohibited_uses:
-    value: >
-      For out-of-scope use cases see terms of use in [[LICENSE]](https://github.com/facebookresearch/segment-anything/blob/main/LICENSE).
-      Authors also discuss the following limitations of the model: "While SAM performs
-      well in general, it is not perfect. It can miss fine structures, hallucinates
-      small disconnected components at times, and does not produce boundaries as
-      crisply as more computationally intensive methods that “zoom-in”, e.g. [18].
-      In general, we expect dedicated interactive segmentation methods to outperform
-      SAM when many points are provided, e.g. [67]. Unlike these methods, SAM is
-      designed for generality and breadth of use rather than high IoU interactive
-      segmentation. Moreover, SAM can process prompts in real-time, but nevertheless
-      SAM's overall performance is not real-time when using a heavy image encoder.
-      Our foray into the text-to-mask task is exploratory and not entirely robust,
-      although we believe it can be improved with more effort. While SAM can perform
-      many tasks, it is unclear how to design simple prompts that implement semantic
-      and panoptic segmentation. Finally, there are domain-specific tools, such
-      as [7], that we expect to outperform SAM in their respective domains."
-    explanation: See [[Discussion]](https://arxiv.org/pdf/2304.02643.pdf#section.8)
-  monitoring: ''
-  feedback: Feedback can be given via the feedback form on their website [segment-anything.com](https://segment-anything.com/)
-    or by emailing at segment-anything at meta.com.
-
-- type: model
-  name: Voicebox
-  organization: Meta
-  description: Voicebox is the first generative AI model for speech to generalize
-    across tasks with state-of-the-art performance.
-  created_date: 2023-06-16
-  url: https://research.facebook.com/publications/voicebox-text-guided-multilingual-universal-speech-generation-at-scale/
-  model_card: ''
-  modality: text and audio input and audio output
+    value: 68 hours
+  type: model
+  url: https://arxiv.org/pdf/2304.02643.pdf
+- access: closed
   analysis: Evaluated on zero-shot text-to-speech benchmarks, with Voicebox outperforming
     the current state-of-the-art English model VALL-E.
-  size: 330M parameters
+  created_date: 2023-06-16
   dependencies: []
-  training_emissions: unknown
-  training_time: 750,000 iterations
-  training_hardware: 32 GPUs of unspecified type
-  quality_control: ''
-  access: closed
-  license: ''
+  description: Voicebox is the first generative AI model for speech to generalize
+    across tasks with state-of-the-art performance.
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: ''
+  modality:
+    explanation: text and audio input and audio output
+    value: audio, text; audio
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: PEER
+  name: Voicebox
   organization: Meta
-  description: PEER is a collaborative language model that is trained to imitate
-    the entire writing process itself. PEER can write drafts, add suggestions, propose
-    edits and provide explanations for its actions.
-  created_date: 2022-08-24
-  url: https://arxiv.org/pdf/2208.11663.pdf
-  model_card: ''
-  modality: natural language text
+  prohibited_uses: ''
+  quality_control: ''
+  size: 330M parameters (dense)
+  training_emissions: unknown
+  training_hardware: 32 GPUs of unspecified type
+  training_time: 750,000 iterations
+  type: model
+  url: https://research.facebook.com/publications/voicebox-text-guided-multilingual-universal-speech-generation-at-scale/
+- access: open
   analysis: PEER is evaluated on core research questions intended to gauge language
     understanding, proper use of citations, instruction following, and iterative
     use.
-  size: 3B parameters
+  created_date: 2022-08-24
   dependencies: []
-  training_emissions: ''
-  training_time: ''
-  training_hardware: 64 GPUs
-  quality_control: Heuristics and edit filtering was used on data set, which consisted
-    mostly of Wikipedia pages.
-  access: open
-  license: ''
+  description: PEER is a collaborative language model that is trained to imitate
+    the entire writing process itself. PEER can write drafts, add suggestions, propose
+    edits and provide explanations for its actions.
+  feedback: ''
   intended_uses: adapting LLMs to work with collaborative writing and updating.
-  prohibited_uses: ''
+  license: ''
+  modality:
+    explanation: natural language text
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
+  name: PEER
+  organization: Meta
+  prohibited_uses: ''
+  quality_control: Heuristics and edit filtering was used on data set, which consisted
+    mostly of Wikipedia pages.
+  size: 3B parameters (dense)
+  training_emissions: ''
+  training_hardware: 64 GPUs
+  training_time: ''
+  type: model
+  url: https://arxiv.org/pdf/2208.11663.pdf
diff --git a/assets/microsoft.yaml b/assets/microsoft.yaml
index 8d324607..773e3bb4 100644
--- a/assets/microsoft.yaml
+++ b/assets/microsoft.yaml
@@ -1,17 +1,13 @@
 ---
-
-- type: model
-  name: VLMo
-  organization: Microsoft
-  description: VLMo is a model for text-to-image generation
+- access:
+    explanation: Microsoft does not provide VLMo to external researchers. One author
+      commented that code would be pushed "soon" in [[November of 2021]](https://github.com/microsoft/unilm/issues/532),
+      but the repository does not contain relevant changes.
+    value: closed
+  analysis: ''
   created_date:
-    value: 2021-11-03
     explanation: The date the model paper was released
-  url: https://arxiv.org/abs/2111.02358
-  model_card: none
-  modality: text (English) and image
-  size: 562M
-  analysis: ''
+    value: 2021-11-03
   dependencies:
     - Conceptual Captions
     - SBU Captions
@@ -19,300 +15,263 @@
     - Visual Genome
     - Wikipedia
     - BooksCorpus
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access:
-    value: closed
-    explanation: Microsoft does not provide VLMo to external researchers. One author
-      commented that code would be pushed "soon" in [[November of 2021]](https://github.com/microsoft/unilm/issues/532),
-      but the repository does not contain relevant changes.
+  description: VLMo is a model for text-to-image generation
+  feedback: ''
+  intended_uses: ''
   license:
-    value: none
     explanation: No license was found.
-  intended_uses: ''
-  prohibited_uses: ''
+    value: none
+  modality:
+    explanation: text (English) and image
+    value: image, text; image, text
+  model_card: none
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: T-ULRv5
+  name: VLMo
   organization: Microsoft
-  description: T-ULRv5 is a language model trained with two unique training objectives
+  prohibited_uses: ''
+  quality_control: ''
+  size: 562M parameters (dense)
+  training_emissions: ''
+  training_hardware: ''
+  training_time: ''
+  type: model
+  url: https://arxiv.org/abs/2111.02358
+- access:
+    explanation: Manual approval through early access request form required.
+    value: limited
+  analysis: ''
   created_date:
-    value: 2022-09-28
     explanation: Date model blog post was released
-  url: https://www.microsoft.com/en-us/research/blog/microsoft-turing-universal-language-representation-model-t-ulrv5-tops-xtreme-leaderboard-and-trains-100x-faster/
-  model_card: ''
-  modality: Text (English, 93 other languages)
-  size: 2.2B parameters (dense model)
-  analysis: ''
+    value: 2022-09-28
   dependencies: []
+  description: T-ULRv5 is a language model trained with two unique training objectives
+  feedback: ''
+  intended_uses: ''
+  license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
+    value: unknown
+  modality:
+    explanation: Text (English, 93 other languages)
+    value: text; text
+  model_card: ''
+  monitoring: ''
+  name: T-ULRv5
+  organization: Microsoft
+  prohibited_uses: ''
+  quality_control: ''
+  size: 2.2B parameters (dense)
   training_emissions: ''
-  training_time: Less than two weeks
   training_hardware: 256 A100
-  quality_control: ''
-  access:
-    value: limited
+  training_time: Less than two weeks
+  type: model
+  url: https://www.microsoft.com/en-us/research/blog/microsoft-turing-universal-language-representation-model-t-ulrv5-tops-xtreme-leaderboard-and-trains-100x-faster/
+- access:
     explanation: Manual approval through early access request form required.
+    value: limited
+  analysis: ''
+  created_date:
+    explanation: Date model blog post was released
+    value: 2021-12-02
+  dependencies: []
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: text (English)
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
   name: Turing NLR-v5
   organization: Microsoft
-  description: ''
-  created_date:
-    value: 2021-12-02
-    explanation: Date model blog post was released
-  url: https://www.microsoft.com/en-us/research/blog/efficiently-and-effectively-scaling-up-language-model-pretraining-for-best-language-representation-model-on-glue-and-superglue/?OCID=msr_blog_TNLRV5_tw
-  model_card: ''
-  modality: text (English)
-  analysis: ''
-  size: 5B parameters (dense model)
-  dependencies: []
+  prohibited_uses: ''
+  quality_control: ''
+  size: 5B parameters (dense)
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access:
+  training_time: ''
+  type: model
+  url: https://www.microsoft.com/en-us/research/blog/efficiently-and-effectively-scaling-up-language-model-pretraining-for-best-language-representation-model-on-glue-and-superglue/?OCID=msr_blog_TNLRV5_tw
+- access:
+    explanation: Megatron-Turing NLG can be accessed through the [[Turing Academic
+      Program]](https://www.microsoft.com/en-us/research/collaboration/microsoft-turing-academic-program/)
     value: limited
-    explanation: Manual approval through early access request form required.
+  analysis: ''
+  created_date:
+    explanation: "The date of the Microsoft Research blog announcing MT-NLG [[Microsoft\
+      \ Research Blog]](https://www.microsoft.com/en-us/research/blog/using-deepspeed-and-megatron-to-train-megatron-turing-nlg-530b-the-worlds-largest-and-most-powerful-generative-language-model/).\n"
+    value: 2022-01-28
+  dependencies: [The Pile]
+  description: "Megatron-Turing NLG is a 530B parameter autoregressive language\
+    \ model.\n"
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: text (English)
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
   name: Megatron-Turing NLG
   organization: Microsoft, NVIDIA
-  description: >
-    Megatron-Turing NLG is a 530B parameter autoregressive language model.
-  created_date:
-    value: 2022-01-28
-    explanation: >
-      The date of the Microsoft Research blog announcing MT-NLG
-      [[Microsoft Research Blog]](https://www.microsoft.com/en-us/research/blog/using-deepspeed-and-megatron-to-train-megatron-turing-nlg-530b-the-worlds-largest-and-most-powerful-generative-language-model/).
-  url: https://arxiv.org/abs/2201.11990
-  model_card: ''
-  modality: text (English)
-  size: 530B parameters (dense model)
-  analysis: ''
-  dependencies: [The Pile]
+  prohibited_uses: ''
+  quality_control: ''
+  size: 530B parameters (dense)
   training_emissions: ''
-  training_time: ''
   training_hardware: 4480 A100s (560 x 8)
-  quality_control: ''
-  access:
-    value: limited
-    explanation: Megatron-Turing NLG can be accessed through the [[Turing Academic
-      Program]](https://www.microsoft.com/en-us/research/collaboration/microsoft-turing-academic-program/)
-  license:
-    value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
-
-- type: model
-  name: VALL-E
-  organization: Microsoft
-  description: Vall-E is a neural code model for text-to-speech synthesis
+  training_time: ''
+  type: model
+  url: https://arxiv.org/abs/2201.11990
+- access:
+    explanation: Microsoft does not provide public access to VALL-E
+    value: closed
+  analysis: ''
   created_date:
-    value: 2023-01-05
     explanation: The date the [[model paper]](https://arxiv.org/abs/2301.02111)
       was released
-  url: https://valle-demo.github.io/
-  model_card: none
-  modality: text, audio
-  size: unknown
-  analysis: ''
+    value: 2023-01-05
   dependencies: []
-  training_emissions: ''
-  training_time: ''
-  training_hardware: 16 V100 32GB GPUs
-  quality_control: ''
-  access:
-    value: closed
-    explanation: Microsoft does not provide public access to VALL-E
+  description: Vall-E is a neural code model for text-to-speech synthesis
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: text, audio
+    value: audio, text; audio, text
+  model_card: none
   monitoring: ''
-  feedback: ''
-
-# Applications
-
-- type: application
-  name: GitHub CoPilot
-  # General
+  name: VALL-E
   organization: Microsoft
-  description: >
-    GitHub CoPilot is a coding pair programmer assisting programmers as they
-    write code.
+  prohibited_uses: ''
+  quality_control: ''
+  size: unknown
+  training_emissions: ''
+  training_hardware: 16 V100 32GB GPUs
+  training_time: ''
+  type: model
+  url: https://valle-demo.github.io/
+- access:
+    explanation: "The feature is available to developers in a restricted technical\
+      \ preview [[GitHub CoPilot]](https://copilot.github.com/).\n"
+    value: limited
+  adaptation: unknown
   created_date:
+    explanation: "Date of the blog post introducing CoPilot [[GitHub Blog Post]]\
+      \ (https://github.blog/2021-06-29-introducing-github-copilot-ai-pair-programmer/).\n"
     value: 2021-06-29
-    explanation: >
-      Date of the blog post introducing CoPilot
-      [[GitHub Blog Post]]
-      (https://github.blog/2021-06-29-introducing-github-copilot-ai-pair-programmer/).
-  url: https://copilot.github.com/
-  # Construction
   dependencies: [Codex]
-  adaptation: unknown
-  output_space: Code completions
-  quality_control: >
-    GitHub is working on a filter to detect and suppress code generations that
-    are verbatim from the training set
-    [[GitHub Research Recitation]]
-    (https://docs.github.com/en/github/copilot/research-recitation).
-    According to the FAQ, GitHub implemented a simple filter that blocks emails
-    in standard formats to protect personally identifiable data that may be
-    present in the training data
-    [[GitHub CoPilot]](https://copilot.github.com/).
-  # Downstream
-  access:
-    value: limited
-    explanation: >
-      The feature is available to developers in a restricted
-      technical preview
-      [[GitHub CoPilot]](https://copilot.github.com/).
+  description: "GitHub CoPilot is a coding pair programmer assisting programmers\
+    \ as they write code.\n"
+  failures: unknown
+  feedback: "Feedback can be provided in the CoPilot feedback project [[CoPilot\
+    \ feedback]] (https://github.com/github/feedback/discussions/categories/copilot-feedback).\n"
+  intended_uses: "GitHub CoPilot is intended to be used as a coding assistant.\n"
   license:
+    explanation: "We weren't able to locate the license.\n"
     value: unknown
-    explanation: >
-      We weren't able to locate the license.
-  terms_of_service: >
-    https://docs.github.com/en/site-policy/github-terms/github-terms-of-service
-  intended_uses: >
-    GitHub CoPilot is intended to be used as a coding assistant.
-  prohibited_uses: >
-    Access to GPT-3 is governed by GitHub Acceptable Use Policies and Terms of
-    Service, both of which list a set of prohibited uses
-    [[Use Policies]]
-    (https://docs.github.com/en/site-policy/acceptable-use-policies/github-acceptable-use-policies)
-    [[Terms of Service]]
-    (https://docs.github.com/en/site-policy/github-terms/github-terms-of-service).
-  monitoring: >
-    value: unknown
-    explanation: >
-      There may be internal monitoring mechanisms unknown to the public.
-  feedback: >
-    Feedback can be provided in the CoPilot feedback project
-    [[CoPilot feedback]]
-    (https://github.com/github/feedback/discussions/categories/copilot-feedback).
-  # Deployment
-  monthly_active_users: >
-    GitHub Copilot reportedly has over 1 million sign-ups
-    [[Tweet Source]](https://twitter.com/sama/status/1539737789310259200?s=21&t=YPaYd0ZueJzrR6rLslUqzg).
-  user_distribution: unknown
-  failures: unknown
-
-- type: model
-  name: BioGPT
+  monitoring: "value: unknown explanation: >\n  There may be internal monitoring\
+    \ mechanisms unknown to the public.\n"
+  monthly_active_users: "GitHub Copilot reportedly has over 1 million sign-ups [[Tweet\
+    \ Source]](https://twitter.com/sama/status/1539737789310259200?s=21&t=YPaYd0ZueJzrR6rLslUqzg).\n"
+  name: GitHub CoPilot
   organization: Microsoft
-  description: ''
-  created_date: 2022-09-24
-  url: https://academic.oup.com/bib/article/23/6/bbac409/6713511?guestAccessKey=a66d9b5d-4f83-4017-bb52-405815c907b9&login=true
-  model_card: ''
-  modality: text
+  output_space: Code completions
+  prohibited_uses: "Access to GPT-3 is governed by GitHub Acceptable Use Policies\
+    \ and Terms of Service, both of which list a set of prohibited uses [[Use Policies]]\
+    \ (https://docs.github.com/en/site-policy/acceptable-use-policies/github-acceptable-use-policies)\
+    \ [[Terms of Service]] (https://docs.github.com/en/site-policy/github-terms/github-terms-of-service).\n"
+  quality_control: "GitHub is working on a filter to detect and suppress code generations\
+    \ that are verbatim from the training set [[GitHub Research Recitation]] (https://docs.github.com/en/github/copilot/research-recitation).\
+    \ According to the FAQ, GitHub implemented a simple filter that blocks emails\
+    \ in standard formats to protect personally identifiable data that may be present\
+    \ in the training data [[GitHub CoPilot]](https://copilot.github.com/).\n"
+  terms_of_service: "https://docs.github.com/en/site-policy/github-terms/github-terms-of-service\n"
+  type: application
+  url: https://copilot.github.com/
+  user_distribution: unknown
+- access: open
   analysis: ''
-  size: 1.5B parameters (dense model)
+  created_date: 2022-09-24
   dependencies: [PubMed]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: open
-  license: MIT
+  description: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: MIT
+  modality:
+    explanation: text
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: application
-  name: Microsoft Teams
+  name: BioGPT
   organization: Microsoft
+  prohibited_uses: ''
+  quality_control: ''
+  size: 1.5B parameters (dense)
+  training_emissions: ''
+  training_hardware: ''
+  training_time: ''
+  type: model
+  url: https://academic.oup.com/bib/article/23/6/bbac409/6713511?guestAccessKey=a66d9b5d-4f83-4017-bb52-405815c907b9&login=true
+- access: open
+  adaptation: ''
+  created_date: 2023-02-01
+  dependencies: [ChatGPT API]
   description: Microsoft Teams Premium is generally available. Built on the familiar,
     all-in-one collaborative experience of Microsoft Teams, Teams Premium brings
     the latest technologies, powered by OpenAI’s GPT-3.5, to make meetings more
     intelligent, personalized, and protected—whether it’s one-on-one, large meetings,
     virtual appointments, or webinars.
-  created_date: 2023-02-01
-  url: https://www.microsoft.com/en-us/microsoft-365/blog/2023/02/01/microsoft-teams-premium-cut-costs-and-add-ai-powered-productivity/
-  dependencies: [ChatGPT API]
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: open
+  failures: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  terms_of_service: ''
-  intended_uses: ''
-  prohibited_uses: ''
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
-  user_distribution: ''
-  failures: ''
-
-- type: application
-  name: Bing Search
+  name: Microsoft Teams
   organization: Microsoft
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: ''
+  type: application
+  url: https://www.microsoft.com/en-us/microsoft-365/blog/2023/02/01/microsoft-teams-premium-cut-costs-and-add-ai-powered-productivity/
+  user_distribution: ''
+- access: limited
+  adaptation: unknown
+  created_date: 2023-02-07
+  dependencies: [ChatGPT API]
   description: AI-powered Bing search engine and Edge browser, available in preview
     now at Bing.com, to deliver better search, more complete answers, a new chat
     experience and the ability to generate content. We think of these tools as an
     AI copilot for the web.
-  created_date: 2023-02-07
-  url: https://blogs.microsoft.com/blog/2023/02/07/reinventing-search-with-a-new-ai-powered-microsoft-bing-and-edge-your-copilot-for-the-web/
-  dependencies: [ChatGPT API]
-  adaptation: unknown
-  output_space: Search results
-  quality_control: ''
-  access: limited
+  failures: ''
+  feedback: "Feedback can be submitted at [bing.com](bing.com).\n"
+  intended_uses: Search engine
   license:
+    explanation: "Software license as described in the Terms of Service document.\n"
     value: custom
-    explanation: >
-      Software license as described in the Terms of Service document.
-  terms_of_service: https://www.microsoft.com/legal/terms-of-use
-  intended_uses: Search engine
-  prohibited_uses: ''
   monitoring: ''
-  feedback: >
-    Feedback can be submitted at [bing.com](bing.com).
   monthly_active_users: ''
-  user_distribution: ''
-  failures: ''
-
-- type: model
-  name: KOSMOS-1
+  name: Bing Search
   organization: Microsoft
-  description: KOSMOS-1 is a multimodal language model that is capable of perceiving
-    multimodal input, following instructions, and performing in-context learning
-    for not only language tasks but also multimodal tasks.
-  created_date: 2023-03-01
-  url: https://arxiv.org/pdf/2302.14045.pdf
-  model_card: ''
-  modality: text, image
+  output_space: Search results
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: https://www.microsoft.com/legal/terms-of-use
+  type: application
+  url: https://blogs.microsoft.com/blog/2023/02/07/reinventing-search-with-a-new-ai-powered-microsoft-bing-and-edge-your-copilot-for-the-web/
+  user_distribution: ''
+- access: closed
   analysis: ''
-  size: 1.6B parameters (dense model)
+  created_date: 2023-03-01
   dependencies:
     - The Pile
     - CommonCrawl
@@ -320,439 +279,427 @@
     - LAION-400M
     - COYO-700M
     - Conceptual Captions
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: closed
-  license: MIT
+  description: KOSMOS-1 is a multimodal language model that is capable of perceiving
+    multimodal input, following instructions, and performing in-context learning
+    for not only language tasks but also multimodal tasks.
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: MIT
+  modality:
+    explanation: text, image
+    value: image, text; image, text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: Prometheus
+  name: KOSMOS-1
   organization: Microsoft
+  prohibited_uses: ''
+  quality_control: ''
+  size: 1.6B parameters (dense)
+  training_emissions: ''
+  training_hardware: ''
+  training_time: ''
+  type: model
+  url: https://arxiv.org/pdf/2302.14045.pdf
+- access: closed
+  analysis: ''
+  created_date: 2023-02-07
+  dependencies: []
   description: In the context of Bing, we have developed a proprietary way of working
     with the OpenAI model that allows us to best leverage its power. We call this
     collection of capabilities and techniques the Prometheus model. This combination
     gives you more relevant, timely and targeted results, with improved safety.
-  created_date: 2023-02-07
-  url: https://blogs.microsoft.com/blog/2023/02/07/reinventing-search-with-a-new-ai-powered-microsoft-bing-and-edge-your-copilot-for-the-web/
+  feedback: ''
+  intended_uses: ''
+  license:
+    explanation: "We weren't able to locate a license.\n"
+    value: unknown
+  modality: {}
   model_card: ''
-  modality: unknown
-  analysis: ''
+  monitoring: ''
+  name: Prometheus
+  organization: Microsoft
+  prohibited_uses: ''
+  quality_control: ''
   size: unknown
-  dependencies: []
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: closed
+  training_time: ''
+  type: model
+  url: https://blogs.microsoft.com/blog/2023/02/07/reinventing-search-with-a-new-ai-powered-microsoft-bing-and-edge-your-copilot-for-the-web/
+- access: closed
+  analysis: ''
+  created_date: 2022-11-23
+  dependencies: [FLD-900M]
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "We weren't able to locate a license.\n"
     value: unknown
-    explanation: >
-      We weren't able to locate a license.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: text, image
+    value: image, text; image, text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
   name: Florence
   organization: Microsoft
-  description: ''
-  created_date: 2022-11-23
-  url: https://arxiv.org/abs/2111.11432
-  model_card: ''
-  modality: text, image
-  analysis: ''
-  size: 900M parameters (637M image, 256M text)
-  dependencies: [FLD-900M]
+  prohibited_uses: ''
+  quality_control: ''
+  size: 900M parameters (dense)
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: closed
-  license:
-    value: unknown
-    explanation: >
-      We weren't able to locate a license.
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
-
-- type: dataset
-  name: FLD-900M
-  organization: Microsoft
-  description: ''
-  created_date: 2022-11-23
+  training_time: ''
+  type: model
   url: https://arxiv.org/abs/2111.11432
-  datasheet: ''
-  modality: text, image
-  size: 900M image-text pairs
-  sample: []
+- access: closed
   analysis: ''
+  created_date: 2022-11-23
+  datasheet: ''
   dependencies: []
-  included: ''
+  description: ''
   excluded: ''
-  quality_control: ''
-  access: closed
+  feedback: ''
+  included: ''
+  intended_uses: ''
   license:
+    explanation: "We weren't able to locate a license.\n"
     value: unknown
-    explanation: >
-      We weren't able to locate a license.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: text, image
+    value: image, text
   monitoring: ''
-  feedback: ''
-
-- type: application
-  name: Azure Cognitive Services for Vision
+  name: FLD-900M
   organization: Microsoft
+  prohibited_uses: ''
+  quality_control: ''
+  sample: []
+  size: 900M image-text pairs
+  type: dataset
+  url: https://arxiv.org/abs/2111.11432
+- access: limited
+  adaptation: ''
+  created_date: 2023-03-07
+  dependencies: [Florence]
   description: Cost-effective, production-ready computer vision services in Azure
     Cognitive Service for Vision. The improved Vision Services enables developers
     to create cutting-edge, market-ready, responsible computer vision applications
     across various industries.
-  created_date: 2023-03-07
-  url: https://azure.microsoft.com/en-us/blog/announcing-a-renaissance-in-computer-vision-ai-with-microsofts-florence-foundation-model/?utm_content=buffer16fa0&utm_medium=social&utm_source=twitter.com&utm_campaign=buffer
-  dependencies: [Florence]
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: limited
+  failures: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "Software license in the Microsoft Terms of Use govern the license\
+      \ rules for Azure services as outlined in the Terms of Service document.\n"
     value: custom
-    explanation: >
-      Software license in the Microsoft Terms of Use govern the license rules
-      for Azure services as outlined in the Terms of Service document.
-  terms_of_service: https://azure.microsoft.com/en-us/support/legal/
-  intended_uses: ''
-  prohibited_uses: ''
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
-  user_distribution: ''
-  failures: ''
-
-- type: model
-  name: VisualChatGPT
+  name: Azure Cognitive Services for Vision
   organization: Microsoft
-  description: ''
-  created_date: 2023-03-08
-  url: https://arxiv.org/pdf/2303.04671.pdf
-  model_card: ''
-  modality: text, image
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: https://azure.microsoft.com/en-us/support/legal/
+  type: application
+  url: https://azure.microsoft.com/en-us/blog/announcing-a-renaissance-in-computer-vision-ai-with-microsofts-florence-foundation-model/?utm_content=buffer16fa0&utm_medium=social&utm_source=twitter.com&utm_campaign=buffer
+  user_distribution: ''
+- access: closed
   analysis: ''
-  size: unknown
+  created_date: 2023-03-08
   dependencies: [OpenAI API]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: closed
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
-    value: none
     explanation: No license was found.
-  intended_uses: ''
-  prohibited_uses: ''
+    value: none
+  modality:
+    explanation: text, image
+    value: image, text; image, text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: application
-  name: Microsoft 365 Copilot
+  name: VisualChatGPT
   organization: Microsoft
+  prohibited_uses: ''
+  quality_control: ''
+  size: unknown
+  training_emissions: ''
+  training_hardware: ''
+  training_time: ''
+  type: model
+  url: https://arxiv.org/pdf/2303.04671.pdf
+- access: limited
+  adaptation: ''
+  created_date: 2023-03-16
+  dependencies: [GPT-4 API]
   description: It combines the power of language models with your data in the Microsoft
     Graph and the Microsoft 365 apps to turn your words into the most powerful productivity
     tool on the planet.
-  created_date: 2023-03-16
-  url: https://blogs.microsoft.com/blog/2023/03/16/introducing-microsoft-365-copilot-your-copilot-for-work/
-  dependencies: [GPT-4 API]
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: limited
+  failures: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "Software license as described in the Terms of Service document.\n"
     value: custom
-    explanation: >
-      Software license as described in the Terms of Service document.
-  terms_of_service: https://www.microsoft.com/legal/terms-of-use
-  intended_uses: ''
-  prohibited_uses: ''
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
-  user_distribution: ''
-  failures: ''
-
-- type: application
-  name: Microsoft Business Chat
+  name: Microsoft 365 Copilot
   organization: Microsoft
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: https://www.microsoft.com/legal/terms-of-use
+  type: application
+  url: https://blogs.microsoft.com/blog/2023/03/16/introducing-microsoft-365-copilot-your-copilot-for-work/
+  user_distribution: ''
+- access: limited
+  adaptation: ''
+  created_date: 2023-03-16
+  dependencies: [Microsoft 365 Copilot]
   description: Business Chat works across the langugae model, the Microsoft 365
     apps, and your data — your calendar, emails, chats, documents, meetings and
     contacts — to do things you’ve never been able to do before. You can give it
     natural language prompts like “Tell my team how we updated the product strategy,”
     and it will generate a status update based on the morning’s meetings, emails
     and chat threads.
-  created_date: 2023-03-16
-  url: https://blogs.microsoft.com/blog/2023/03/16/introducing-microsoft-365-copilot-your-copilot-for-work/
-  dependencies: [Microsoft 365 Copilot]
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: limited
+  failures: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "Software license as described in the Terms of Service document.\n"
     value: custom
-    explanation: >
-      Software license as described in the Terms of Service document.
-  terms_of_service: https://www.microsoft.com/legal/terms-of-use
-  intended_uses: ''
-  prohibited_uses: ''
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
+  name: Microsoft Business Chat
+  organization: Microsoft
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: https://www.microsoft.com/legal/terms-of-use
+  type: application
+  url: https://blogs.microsoft.com/blog/2023/03/16/introducing-microsoft-365-copilot-your-copilot-for-work/
   user_distribution: ''
+- access: open
+  adaptation: ''
+  created_date: unknown
+  dependencies: [Microsoft 365 Copilot]
+  description: Microsoft Excel is the industry leading spreadsheet software program,
+    a powerful data visualization and analysis tool.
   failures: ''
-
-- type: application
+  feedback: ''
+  intended_uses: ''
+  license:
+    explanation: "Software license as described in the Terms of Service document.\n"
+    value: custom
+  monitoring: ''
+  monthly_active_users: ''
   name: Microsoft Excel
   organization: Microsoft
-  description: Microsoft Excel is the industry leading spreadsheet software program,
-    a powerful data visualization and analysis tool.
-  created_date: unknown
-  url: https://www.microsoft.com/en-us/microsoft-365/excel
-  dependencies: [Microsoft 365 Copilot]
-  adaptation: ''
   output_space: ''
+  prohibited_uses: ''
   quality_control: ''
-  access: open
-  license:
-    value: custom
-    explanation: >
-      Software license as described in the Terms of Service document.
   terms_of_service: https://www.microsoft.com/legal/terms-of-use
+  type: application
+  url: https://www.microsoft.com/en-us/microsoft-365/excel
+  user_distribution: ''
+- access: open
+  adaptation: ''
+  created_date: unknown
+  dependencies: [Microsoft 365 Copilot]
+  description: Microsoft Outlook is a personal information manager software system
+    from Microsoft, available as a part of the Microsoft Office and Microsoft 365
+    software suites.
+  failures: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license:
+    explanation: "Software license as described in the Terms of Service document.\n"
+    value: custom
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
-  user_distribution: ''
-  failures: ''
-
-- type: application
   name: Microsoft Outlook
   organization: Microsoft
-  description: Microsoft Outlook is a personal information manager software system
-    from Microsoft, available as a part of the Microsoft Office and Microsoft 365
-    software suites.
-  created_date: unknown
-  url: https://www.microsoft.com/en-us/microsoft-365/outlook/email-and-calendar-software-microsoft-outlook
-  dependencies: [Microsoft 365 Copilot]
-  adaptation: ''
   output_space: ''
+  prohibited_uses: ''
   quality_control: ''
-  access: open
-  license:
-    value: custom
-    explanation: >
-      Software license as described in the Terms of Service document.
   terms_of_service: https://www.microsoft.com/legal/terms-of-use
+  type: application
+  url: https://www.microsoft.com/en-us/microsoft-365/outlook/email-and-calendar-software-microsoft-outlook
+  user_distribution: ''
+- access: limited
+  adaptation: ''
+  created_date: unknown
+  dependencies: [Microsoft 365 Copilot]
+  description: Microsoft Power Platform is a line of business intelligence, app
+    development, and app connectivity software applications.
+  failures: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license:
+    explanation: "Software license as described in the Terms of Service document.\n"
+    value: custom
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
-  user_distribution: ''
-  failures: ''
-
-- type: application
   name: Microsoft Power Platform
   organization: Microsoft
-  description: Microsoft Power Platform is a line of business intelligence, app
-    development, and app connectivity software applications.
-  created_date: unknown
-  url: https://powerplatform.microsoft.com/en-us/
-  dependencies: [Microsoft 365 Copilot]
-  adaptation: ''
   output_space: ''
+  prohibited_uses: ''
   quality_control: ''
-  access: limited
-  license:
-    value: custom
-    explanation: >
-      Software license as described in the Terms of Service document.
   terms_of_service: https://www.microsoft.com/legal/terms-of-use
+  type: application
+  url: https://powerplatform.microsoft.com/en-us/
+  user_distribution: ''
+- access: open
+  adaptation: ''
+  created_date: unknown
+  dependencies: [Microsoft 365 Copilot]
+  description: Microsoft PowerPoint empowers you to create clean slideshow presentations
+    and intricate pitch decks and gives you a powerful presentation maker.
+  failures: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license:
+    explanation: "Software license as described in the Terms of Service document.\n"
+    value: custom
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
-  user_distribution: ''
-  failures: ''
-
-- type: application
   name: Microsoft PowerPoint
   organization: Microsoft
-  description: Microsoft PowerPoint empowers you to create clean slideshow presentations
-    and intricate pitch decks and gives you a powerful presentation maker.
-  created_date: unknown
-  url: https://www.microsoft.com/en-us/microsoft-365/powerpoint
-  dependencies: [Microsoft 365 Copilot]
-  adaptation: ''
   output_space: ''
+  prohibited_uses: ''
   quality_control: ''
-  access: open
-  license:
-    value: custom
-    explanation: >
-      Software license as described in the Terms of Service document.
   terms_of_service: https://www.microsoft.com/legal/terms-of-use
+  type: application
+  url: https://www.microsoft.com/en-us/microsoft-365/powerpoint
+  user_distribution: ''
+- access: open
+  adaptation: ''
+  created_date: unknown
+  dependencies: [Microsoft 365 Copilot, Microsoft Business Chat]
+  description: Microsoft Teams is a proprietary business communication platform
+    developed by Microsoft, as part of the Microsoft 365 family of products.
+  failures: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license:
+    explanation: "Software license as described in the Terms of Service document.\n"
+    value: custom
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
-  user_distribution: ''
-  failures: ''
-
-- type: application
   name: Microsoft Teams
   organization: Microsoft
-  description: Microsoft Teams is a proprietary business communication platform
-    developed by Microsoft, as part of the Microsoft 365 family of products.
-  created_date: unknown
-  url: https://www.microsoft.com/en-us/microsoft-teams/group-chat-software
-  dependencies: [Microsoft 365 Copilot, Microsoft Business Chat]
-  adaptation: ''
   output_space: ''
+  prohibited_uses: ''
   quality_control: ''
-  access: open
-  license:
-    value: custom
-    explanation: >
-      Software license as described in the Terms of Service document.
   terms_of_service: https://www.microsoft.com/legal/terms-of-use
+  type: application
+  url: https://www.microsoft.com/en-us/microsoft-teams/group-chat-software
+  user_distribution: ''
+- access: open
+  adaptation: ''
+  created_date: unknown
+  dependencies: [Microsoft 365 Copilot]
+  description: Microsoft Word is a word processing software developed by Microsoft
+  failures: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license:
+    explanation: "Software license as described in the Terms of Service document.\n"
+    value: custom
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
-  user_distribution: ''
-  failures: ''
-
-- type: application
   name: Microsoft Word
   organization: Microsoft
-  description: Microsoft Word is a word processing software developed by Microsoft
-  created_date: unknown
-  url: https://www.microsoft.com/en-us/microsoft-365/word
-  dependencies: [Microsoft 365 Copilot]
-  adaptation: ''
   output_space: ''
+  prohibited_uses: ''
   quality_control: ''
-  access: open
-  license:
-    value: custom
-    explanation: >
-      Software license as described in the Terms of Service document.
   terms_of_service: https://www.microsoft.com/legal/terms-of-use
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
-  monthly_active_users: ''
+  type: application
+  url: https://www.microsoft.com/en-us/microsoft-365/word
   user_distribution: ''
-  failures: ''
-
-- type: application
-  name: Microsoft Inside Look
-  # General
-  organization: Microsoft
-  description: >
-    Inside look is a Microsoft Office feature, composing document insights
-    highlighting key points, expected time to read, and popularity among others.
+- access: limited
+  adaptation: unknown
   created_date: unknown
-  url: https://support.microsoft.com/en-us/office/see-file-insights-before-you-open-a-file-87a23bbc-a516-42e2-a7b6-0ecb8259e026
   dependencies: []
-  adaptation: unknown
-  output_space: Document level insights for users.
-  quality_control: unknown
-  # Downstream
-  access: limited
+  description: "Inside look is a Microsoft Office feature, composing document insights\
+    \ highlighting key points, expected time to read, and popularity among others.\n"
+  failures: unknown
+  feedback: unknown
+  intended_uses: Providing document insights to users.
   license:
+    explanation: "Software license as described in the Terms of Service document.\n"
     value: custom
-    explanation: >
-      Software license as described in the Terms of Service document.
-  terms_of_service: https://www.microsoft.com/legal/terms-of-use
-  intended_uses: Providing document insights to users.
-  prohibited_uses: unknown
   monitoring: unknown
-  feedback: unknown
-  # Deployment
   monthly_active_users: unknown
-  user_distribution: unknown
-  failures: unknown
-
-- type: application
-  name: Microsoft Suggested Replies
-  # General
+  name: Microsoft Inside Look
   organization: Microsoft
-  description: >
-    Suggested replies is a Microsoft Outlook feature that suggests responses
-    to emails, available in: English, Spanish, Italian, French, German,
-    Portuguese Chinese Simplified, Chinese Traditional, Swedish, Russian,
-    Korean, Czech, Hungarian, Arabic, Hebrew, Thai, Turkish, Japanese, Dutch,
-    Norwegian, Danish, and Polish.
+  output_space: Document level insights for users.
+  prohibited_uses: unknown
+  quality_control: unknown
+  terms_of_service: https://www.microsoft.com/legal/terms-of-use
+  type: application
+  url: https://support.microsoft.com/en-us/office/see-file-insights-before-you-open-a-file-87a23bbc-a516-42e2-a7b6-0ecb8259e026
+  user_distribution: unknown
+- access: limited
+  adaptation: unknown
   created_date: unknown
-  url: https://support.microsoft.com/en-us/office/use-suggested-replies-in-outlook-19316194-0434-43ba-a742-6b5890157379
   dependencies: []
-  adaptation: unknown
-  output_space: Suggested emails.
-  quality_control: unknown
-  # Downstream
-  access: limited
+  description: "Suggested replies is a Microsoft Outlook feature that suggests responses\
+    \ to emails, available in: English, Spanish, Italian, French, German, Portuguese\
+    \ Chinese Simplified, Chinese Traditional, Swedish, Russian, Korean, Czech,\
+    \ Hungarian, Arabic, Hebrew, Thai, Turkish, Japanese, Dutch, Norwegian, Danish,\
+    \ and Polish.\n"
+  failures: unknown
+  feedback: unknown
+  intended_uses: Suggesting email replies.
   license:
+    explanation: "Software license as described in the Terms of Service document.\n"
     value: custom
-    explanation: >
-      Software license as described in the Terms of Service document.
-  terms_of_service: https://www.microsoft.com/legal/terms-of-use
-  intended_uses: Suggesting email replies.
-  prohibited_uses: unknown
   monitoring: unknown
-  feedback: unknown
-  # Deployment
   monthly_active_users: unknown
-  user_distribution: unknown
-  failures: unknown
-
-- type: application
-  name: Microsoft Security Copilot
-  # General
+  name: Microsoft Suggested Replies
   organization: Microsoft
-  description: >
-    Microsoft Security Copilot is an AI-powered security analysis tool that enables
-    analysts to respond to threats quickly, process signals at machine speed, and
-    assess risk exposure in minutes.
-  created_date:
-    value: 2023-03-28
-    explanation: The date Security Copilot was announced in the [[Microsoft blog
-      post]](https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/).
-  url: https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/
-  # Construction
-  dependencies: [GPT-4, Microsoft security-specific model]
+  output_space: Suggested emails.
+  prohibited_uses: unknown
+  quality_control: unknown
+  terms_of_service: https://www.microsoft.com/legal/terms-of-use
+  type: application
+  url: https://support.microsoft.com/en-us/office/use-suggested-replies-in-outlook-19316194-0434-43ba-a742-6b5890157379
+  user_distribution: unknown
+- access: limited
   adaptation:
+    explanation: See [[blog post]](https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/).
     value: Security Copilot combines OpenAI's GPT-4 generative AI with a security-specific
       model from Microsoft. This security-specific model in turn incorporates a
       growing set of security-specific skills and is informed by Microsoft's unique
       global threat intelligence and more than 65 trillion daily signals.
-    explanation: See [[blog post]](https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/).
+  created_date:
+    explanation: The date Security Copilot was announced in the [[Microsoft blog
+      post]](https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/).
+    value: 2023-03-28
+  dependencies: [GPT-4, Microsoft security-specific model]
+  description: "Microsoft Security Copilot is an AI-powered security analysis tool\
+    \ that enables analysts to respond to threats quickly, process signals at machine\
+    \ speed, and assess risk exposure in minutes.\n"
+  failures: unknown
+  feedback: unknown
+  intended_uses: Security Copilot is designed to enhance the capabilities of cybersecurity
+    professionals. It leverages machine speed and scale to accelerate response to
+    security incidents, discover and process threat signals, and assess risk exposure
+    within minutes.
+  license:
+    explanation: "Software license as described in the Terms of Service document.\n"
+    value: custom
+  monitoring: ''
+  monthly_active_users: unknown
+  name: Microsoft Security Copilot
+  organization: Microsoft
   output_space:
+    explanation: See [[product demo]](https://www.microsoft.com/en-us/security/business/ai-machine-learning/microsoft-security-copilot).
     value: Actionable responses to security-related questions (text and image).
       Security event, incident or threat reports (PowerPoint slide).
-    explanation: See [[product demo]](https://www.microsoft.com/en-us/security/business/ai-machine-learning/microsoft-security-copilot).
+  prohibited_uses: unknown
   quality_control:
+    explanation: See [[blog post]](https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/).
     value: Security Copilot employs a closed-loop learning system that learns from
       user interactions and feedback, enabling it to provide more coherent, relevant,
       and useful answers that continually improve over time. Security Copilot is
@@ -760,119 +707,106 @@
       that customers' data and AI models are protected with enterprise compliance
       and security controls. Customer data is owned and controlled by them, and
       not used to train AI models for anyone outside their organization.
-    explanation: See [[blog post]](https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/).
-  # Downstream
-  access: limited
-  license:
-    value: custom
-    explanation: >
-      Software license as described in the Terms of Service document.
   terms_of_service: https://www.microsoft.com/legal/terms-of-use
-  intended_uses: Security Copilot is designed to enhance the capabilities of cybersecurity
-    professionals. It leverages machine speed and scale to accelerate response to
-    security incidents, discover and process threat signals, and assess risk exposure
-    within minutes.
-  prohibited_uses: unknown
-  monitoring: ''
-  feedback: unknown
-  # Deployment
-  monthly_active_users: unknown
+  type: application
+  url: https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/
   user_distribution: unknown
-  failures: unknown
-
-- type: model
-  name: UniLM
-  organization: Microsoft
-  description: UniLM is a unified language model that can be fine-tuned for both
-    natural language understanding and generation tasks.
-  created_date: 2019-10-01
-  url: https://proceedings.neurips.cc/paper_files/paper/2019/file/c20bb2d9a50d5ac1f713f8b34d9aac5a-Paper.pdf
-  model_card: ''
-  modality: text
+- access: open
   analysis: Evaluated on GLUE, SQuAD 2.0, and CoQA benchmarks.
-  size: 340M parameters
+  created_date: 2019-10-01
   dependencies: []
-  training_emissions: unknown
-  training_time: 10,000 steps in 7 hours
-  training_hardware: 8 NVIDIA Tesla V100 32GB GPUs
-  quality_control: ''
-  access: open
-  license: MIT
+  description: UniLM is a unified language model that can be fine-tuned for both
+    natural language understanding and generation tasks.
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: MIT
+  modality: {}
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: Docugami
+  name: UniLM
   organization: Microsoft
-  description: Docugami is a LLM focused on writing business documents and data
-    using generative AI.
-  created_date: 2021-04-12
-  url: https://www.docugami.com/generative-ai
-  model_card: ''
-  modality: text
+  prohibited_uses: ''
+  quality_control: ''
+  size: ''
+  training_emissions: unknown
+  training_hardware: 8 NVIDIA Tesla V100 32GB GPUs
+  training_time: 10,000 steps in 7 hours
+  type: model
+  url: https://proceedings.neurips.cc/paper_files/paper/2019/file/c20bb2d9a50d5ac1f713f8b34d9aac5a-Paper.pdf
+- access: limited
   analysis: ''
-  size: 20B parameters
+  created_date: 2021-04-12
   dependencies: []
-  training_emissions: unknown
-  training_time: unknown
-  training_hardware: ''
-  quality_control: ''
-  access: limited
-  license: ''
+  description: Docugami is a LLM focused on writing business documents and data
+    using generative AI.
+  feedback: ''
   intended_uses: analyzing, writing, and connecting business documents and data
-  prohibited_uses: ''
+  license: ''
+  modality:
+    explanation: text
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: BEiT-3
+  name: Docugami
   organization: Microsoft
-  description: BEiT-3 is a general-purpose multimodal foundation model for vision
-    and vision-language tasks.
-  created_date: 2022-08-31
-  url: https://arxiv.org/pdf/2208.10442.pdf
-  model_card: ''
-  modality: text and image input and output
+  prohibited_uses: ''
+  quality_control: ''
+  size: 20B parameters (dense)
+  training_emissions: unknown
+  training_hardware: ''
+  training_time: unknown
+  type: model
+  url: https://www.docugami.com/generative-ai
+- access: open
   analysis: Evaluated on a range of standardized vision benchmarks, and achieves
     state of the art performance on all experimentally.
-  size: 1.9B parameters
+  created_date: 2022-08-31
   dependencies: [Multiway Transformer network]
-  training_emissions: unknown
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: open
-  license: ''
+  description: BEiT-3 is a general-purpose multimodal foundation model for vision
+    and vision-language tasks.
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: ''
+  modality:
+    explanation: text and image input and output
+    value: image, text; image, text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: WizardLM
+  name: BEiT-3
   organization: Microsoft
+  prohibited_uses: ''
+  quality_control: ''
+  size: 1.9B parameters (dense)
+  training_emissions: unknown
+  training_hardware: ''
+  training_time: ''
+  type: model
+  url: https://arxiv.org/pdf/2208.10442.pdf
+- access: open
+  analysis: Reports results on standard LLM benchmarks in comparison to other LLMs
+    and test sets.
+  created_date: 2023-04-24
+  dependencies: [LLaMA, Evol-Instruct, Alpaca dataset]
   description: Starting with an initial set of instructions, we use our proposed
     Evol-Instruct to rewrite them step by step into more complex instructions. Then,
     we mix all generated instruction data to fine-tune LLaMA. We call the resulting
     model WizardLM.
-  created_date: 2023-04-24
-  url: https://arxiv.org/pdf/2304.12244v1.pdf
-  model_card: https://huggingface.co/WizardLM/WizardLM-13B-1.0
-  modality: natural language text
-  analysis: Reports results on standard LLM benchmarks in comparison to other LLMs
-    and test sets.
-  size: 7B parameters
-  dependencies: [LLaMA, Evol-Instruct, Alpaca dataset]
-  training_emissions: ''
-  training_time: 70 hours on 3 epochs
-  training_hardware: 8 V100 GPUs
-  quality_control: ''
-  access: open
-  license: Apache 2.0
+  feedback: https://huggingface.co/datasets/WizardLM/evol_instruct_70k/discussions
   intended_uses: Creating large amounts of instruction data, particularly with high
     complexity
-  prohibited_uses: ''
+  license: Apache 2.0
+  modality:
+    explanation: natural language text
+    value: text; text
+  model_card: https://huggingface.co/WizardLM/WizardLM-13B-1.0
   monitoring: ''
-  feedback: https://huggingface.co/datasets/WizardLM/evol_instruct_70k/discussions
+  name: WizardLM
+  organization: Microsoft
+  prohibited_uses: ''
+  quality_control: ''
+  size: 7B parameters (dense)
+  training_emissions: ''
+  training_hardware: 8 V100 GPUs
+  training_time: 70 hours on 3 epochs
+  type: model
+  url: https://arxiv.org/pdf/2304.12244v1.pdf
diff --git a/assets/mosaic.yaml b/assets/mosaic.yaml
index a887f3b4..a58049f1 100644
--- a/assets/mosaic.yaml
+++ b/assets/mosaic.yaml
@@ -1,24 +1,25 @@
 ---
-
-- type: model
-  name: MPT
-  organization: Mosaic
+- access: open
+  analysis: Evaluated on a range of benchmarks and performed on par with LLaMA-7B.
+  created_date: 2023-05-05
+  dependencies: [RedPajama-Data, C4, The Stack, Multimodal C4]
   description: MPT is a series of large language models seeking to address the limitations
     of other open source models like LLaMA and Pythia.
-  created_date: 2023-05-05
-  url: https://www.mosaicml.com/blog/mpt-7b
+  feedback: ''
+  intended_uses: ''
+  license: Apache 2.0
+  modality:
+    explanation: text
+    value: text; text
   model_card: ''
-  modality: text
-  analysis: Evaluated on a range of benchmarks and performed on par with LLaMA-7B.
-  size: 7B parameters
-  dependencies: [RedPajama-Data, C4, The Stack, Multimodal C4]
+  monitoring: ''
+  name: MPT
+  organization: Mosaic
+  prohibited_uses: ''
+  quality_control: ''
+  size: 7B parameters (dense)
   training_emissions: unknown
-  training_time: 9.5 days
   training_hardware: 440 A100 40GB GPUs
-  quality_control: ''
-  access: open
-  license: Apache 2.0
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
+  training_time: 9.5 days
+  type: model
+  url: https://www.mosaicml.com/blog/mpt-7b
diff --git a/assets/nanyang.yaml b/assets/nanyang.yaml
index 02f40c24..e5b04469 100644
--- a/assets/nanyang.yaml
+++ b/assets/nanyang.yaml
@@ -1,27 +1,28 @@
 ---
-
-- type: model
-  name: Otter
-  organization: Nanyang Technological University
-  description: Otter is a multi-modal model based on OpenFlamingo (open-sourced
-    version of DeepMind’s Flamingo), trained on MIMIC-IT and showcasing improved
-    instruction-following ability and in-context learning.
-  created_date: 2023-05-05
-  url: https://arxiv.org/pdf/2305.03726v1.pdf
-  model_card: https://github.com/Luodian/Otter/blob/main/docs/model_card.md
-  modality: outputs text from text and image input
+- access: open
   analysis: Evaluated on researcher experiments to test deeper understanding and
     advanced commonsense reasoning
-  size: 1.3B parameters
+  created_date: 2023-05-05
   dependencies: [MIMIC-IT, OpenFlamingo]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: 4 RTX-3090 GPUs
-  quality_control: ''
-  access: open
-  license: MIT
+  description: Otter is a multi-modal model based on OpenFlamingo (open-sourced
+    version of DeepMind’s Flamingo), trained on MIMIC-IT and showcasing improved
+    instruction-following ability and in-context learning.
+  feedback: ''
   intended_uses: Following and executing new instructions with few in-context learning
     examples given image and textual input.
-  prohibited_uses: ''
+  license: MIT
+  modality:
+    explanation: outputs text from text and image input
+    value: text; image, text
+  model_card: https://github.com/Luodian/Otter/blob/main/docs/model_card.md
   monitoring: ''
-  feedback: ''
+  name: Otter
+  organization: Nanyang Technological University
+  prohibited_uses: ''
+  quality_control: ''
+  size: 1.3B parameters (dense)
+  training_emissions: ''
+  training_hardware: 4 RTX-3090 GPUs
+  training_time: ''
+  type: model
+  url: https://arxiv.org/pdf/2305.03726v1.pdf
diff --git a/assets/naver.yaml b/assets/naver.yaml
index a262bc3a..588ea0a4 100644
--- a/assets/naver.yaml
+++ b/assets/naver.yaml
@@ -1,31 +1,30 @@
 ---
-
-# models
-- type: model
-  name: HyperCLOVA
-  organization: Naver
-  description: HyperClova is an autoregressive language model
+- access:
+    explanation: The model is not available for download or direct API use
+    value: closed
+  analysis: ''
   created_date:
-    value: 2021-05-21
     explanation: The date the model paper was submitted to arxiv
-  url: https://arxiv.org/abs/2109.04650
-  model_card: ''
-  modality: text (Korean | 한국어)
-  size: 82B parameters (dense model)
-  analysis: ''
+    value: 2021-05-21
   dependencies: []
-  training_emissions: ''
-  training_time: 130.4 days
-  training_hardware: 1024 A100 GPUs
-  quality_control: ''
-  access:
-    value: closed
-    explanation: The model is not available for download or direct API use
+  description: HyperClova is an autoregressive language model
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: text (Korean | 한국어)
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
+  name: HyperCLOVA
+  organization: Naver
+  prohibited_uses: ''
+  quality_control: ''
+  size: 82B parameters (dense)
+  training_emissions: ''
+  training_hardware: 1024 A100 GPUs
+  training_time: 130.4 days
+  type: model
+  url: https://arxiv.org/abs/2109.04650
diff --git a/assets/neeva.yaml b/assets/neeva.yaml
index 18e64ffa..c081acf3 100644
--- a/assets/neeva.yaml
+++ b/assets/neeva.yaml
@@ -1,85 +1,79 @@
 ---
-
-- type: dataset
-  name: Neeva dataset
-  organization: Neeva
-  description: ''
+- access: closed
+  analysis: ''
   created_date: unknown
-  url: https://neeva.com/index
   datasheet: ''
-  modality: text
-  size: unknown
-  sample: []
-  analysis: ''
   dependencies: []
-  included: ''
+  description: ''
   excluded: ''
-  quality_control: ''
-  access: closed
+  feedback: ''
+  included: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality: {}
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: Neeva model
+  name: Neeva dataset
   organization: Neeva
-  description: ''
-  created_date: unknown
+  prohibited_uses: ''
+  quality_control: ''
+  sample: []
+  size: unknown
+  type: dataset
   url: https://neeva.com/index
-  model_card: ''
-  modality: text
+- access: closed
   analysis: ''
-  size: unknown
+  created_date: unknown
   dependencies: [Neeva dataset]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: closed
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality: {}
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: application
-  name: NeevaAI
+  name: Neeva model
   organization: Neeva
-  description: NeevaAI is an AI-powered search tool that combines the capabilities
-    of LLMs with Neeva's independent in-house search stack to create a unique and
-    transformative search experience.
+  prohibited_uses: ''
+  quality_control: ''
+  size: ''
+  training_emissions: ''
+  training_hardware: ''
+  training_time: ''
+  type: model
+  url: https://neeva.com/index
+- access: open
+  adaptation: ''
   created_date:
-    value: 2023-01-06
     explanation: The [[Blog post]](https://neeva.com/blog/introducing-neevaai) announcing
       NeevaAI.
-  url: https://neeva.com/blog/introducing-neevaai
+    value: 2023-01-06
   dependencies: [Neeva model]
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: open
+  description: NeevaAI is an AI-powered search tool that combines the capabilities
+    of LLMs with Neeva's independent in-house search stack to create a unique and
+    transformative search experience.
+  failures: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "\"If you comply with these Terms, Neeva grants to you, during\
+      \ the term of these Terms, a limited non-exclusive, non-transferable license,\
+      \ with no right to sublicense, to download and install the App on your personal\
+      \ computers, mobile handsets, tablets, wearable devices, and/or other devices\
+      \ and to run the App solely for your own personal non-commercial purposes.\"\
+      \ - excerpt from the Terms of Service document.\n"
     value: Custom
-    explanation: >
-      "If you comply with these Terms, Neeva grants to you, during the term of
-      these Terms, a limited non-exclusive, non-transferable license, with no
-      right to sublicense, to download and install the App on your personal
-      computers, mobile handsets, tablets, wearable devices, and/or other
-      devices and to run the App solely for your own personal non-commercial
-      purposes." - excerpt from the Terms of Service document.
-  terms_of_service: https://neeva.com/terms
-  intended_uses: ''
-  prohibited_uses: ''
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
+  name: NeevaAI
+  organization: Neeva
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: https://neeva.com/terms
+  type: application
+  url: https://neeva.com/blog/introducing-neevaai
   user_distribution: ''
-  failures: ''
diff --git a/assets/nextdoor.yaml b/assets/nextdoor.yaml
index 0dfafe79..1cb54b17 100644
--- a/assets/nextdoor.yaml
+++ b/assets/nextdoor.yaml
@@ -1,24 +1,23 @@
 ---
-
-- type: application
-  name: Nextdoor Assistant
-  organization: Nextdoor
-  description: AI chatbot on Nextdoor that helps users write more clear and conscientious
-    posts.
+- access: open
+  adaptation: ''
   created_date: 2023-05-02
-  url: https://help.nextdoor.com/s/article/Introducing-Assistant
   dependencies: [ChatGPT]
-  adaptation: ''
-  output_space: natural language text guidance
-  quality_control: ''
-  access: open
-  license: unknown
-  terms_of_service: ''
+  description: AI chatbot on Nextdoor that helps users write more clear and conscientious
+    posts.
+  failures: ''
+  feedback: ''
   intended_uses: to be used to help make the Nextdoor experience more positive for
     users
-  prohibited_uses: ''
+  license: unknown
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
+  name: Nextdoor Assistant
+  organization: Nextdoor
+  output_space: natural language text guidance
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: ''
+  type: application
+  url: https://help.nextdoor.com/s/article/Introducing-Assistant
   user_distribution: ''
-  failures: ''
diff --git a/assets/nolano.yaml b/assets/nolano.yaml
index f2b8fe84..0f23e021 100644
--- a/assets/nolano.yaml
+++ b/assets/nolano.yaml
@@ -1,23 +1,22 @@
 ---
-
-- type: application
-  name: Cformers
-  organization: Nolano
-  description: Cformers is a set of transformers that act as an API for AI inference
-    in code.
+- access: limited
+  adaptation: ''
   created_date: 2023-03-19
-  url: https://www.nolano.org/services/Cformers/
   dependencies: []
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: limited
-  license: MIT
-  terms_of_service: ''
+  description: Cformers is a set of transformers that act as an API for AI inference
+    in code.
+  failures: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: MIT
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
+  name: Cformers
+  organization: Nolano
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: ''
+  type: application
+  url: https://www.nolano.org/services/Cformers/
   user_distribution: ''
-  failures: ''
diff --git a/assets/notion.yaml b/assets/notion.yaml
index ba21e83f..d0f580c5 100644
--- a/assets/notion.yaml
+++ b/assets/notion.yaml
@@ -1,24 +1,23 @@
 ---
-
-- type: application
-  name: Notion AI
-  organization: Notion
+- access: limited
+  adaptation: ''
+  created_date: 2023-02-22
+  dependencies: [Anthropic API]
   description: Notion AI is a connected assistant that helps you think bigger, work
     faster, and augments your creativity, right inside the functional workspace
     you’re already familiar with.
-  created_date: 2023-02-22
-  url: https://www.notion.so/help/guides/notion-ai-for-docs
-  dependencies: [Anthropic API]
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: limited
-  license: ''
-  terms_of_service: ''
+  failures: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: ''
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
+  name: Notion AI
+  organization: Notion
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: ''
+  type: application
+  url: https://www.notion.so/help/guides/notion-ai-for-docs
   user_distribution: ''
-  failures: ''
diff --git a/assets/nvidia.yaml b/assets/nvidia.yaml
index 5f9e0978..39e0f118 100644
--- a/assets/nvidia.yaml
+++ b/assets/nvidia.yaml
@@ -1,99 +1,100 @@
 ---
-
-# models
-- type: model
-  name: Megatron-LM
-  organization: NVIDIA
-  description: Megatron-LM is an autoregressive language model
+- access:
+    explanation: "Neither the 8.3B parameter model trained to convergence nor the\
+      \ 1 trillion paramter model is available for download\n"
+    value: closed
+  analysis: ''
   created_date:
-    value: 2021-04-09
     explanation: The date the paper for the 1 trillion parameter model was published
-  url: https://arxiv.org/abs/2104.04473
-  model_card: none
-  modality: text (English)
-  analysis: ''
-  size: 1000B parameters (dense model)
+    value: 2021-04-09
   dependencies: []
-  training_emissions: unknown
-  training_time: 84 days
-  training_hardware: 3072 A100 GPUs
-  quality_control: unknown
-  access:
-    value: closed
-    explanation: >
-      Neither the 8.3B parameter model trained to convergence nor the 1 trillion
-      paramter model is available for download
+  description: Megatron-LM is an autoregressive language model
+  feedback: none
+  intended_uses: none
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: none
-  prohibited_uses: none
+  modality:
+    explanation: text (English)
+    value: text; text
+  model_card: none
   monitoring: none
-  feedback: none
-
-- type: dataset
-  name: MineDojo
+  name: Megatron-LM
   organization: NVIDIA
-  description: ''
+  prohibited_uses: none
+  quality_control: unknown
+  size: 1000B parameters (dense)
+  training_emissions: unknown
+  training_hardware: 3072 A100 GPUs
+  training_time: 84 days
+  type: model
+  url: https://arxiv.org/abs/2104.04473
+- access: open
+  analysis: ''
   created_date: 2022-06-17
-  url: https://arxiv.org/abs/2206.08853
   datasheet: ''
-  modality: Videos, Text
-  size: 730k videos, 6k Wikipedia pages, 340k reddit posts
-  sample: []
-  analysis: ''
   dependencies: [YouTube, Wikipedia, Reddit]
-  included: ''
+  description: ''
   excluded: ''
-  quality_control: ''
-  access: open
-  license: MIT
+  feedback: ''
+  included: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: MIT
+  modality:
+    explanation: Videos, Text
+    value: text, video
   monitoring: ''
-  feedback: ''
-
-- type: dataset
-  name: VIMA dataset
-  organization: NVIDIA, Stanford
-  description: ''
-  created_date: 2022-10-06
-  url: https://vimalabs.github.io/
-  datasheet: ''
-  modality: text and image
-  size: 200M parameters (dense model)
+  name: MineDojo
+  organization: NVIDIA
+  prohibited_uses: ''
+  quality_control: ''
   sample: []
+  size: 730k videos, 6k Wikipedia pages, 340k reddit posts
+  type: dataset
+  url: https://arxiv.org/abs/2206.08853
+- access: open
   analysis: ''
+  created_date: 2022-10-06
+  datasheet: ''
   dependencies: [T5, Mask R-CNN, VIMA dataset]
-  included: ''
+  description: ''
   excluded: ''
-  quality_control: ''
-  access: open
-  license: MIT
+  feedback: ''
+  included: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: MIT
+  modality:
+    explanation: text and image
+    value: image, text
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: VIMA
+  name: VIMA dataset
   organization: NVIDIA, Stanford
-  description: ''
-  created_date: 2022-10-06
+  prohibited_uses: ''
+  quality_control: ''
+  sample: []
+  size: 200M parameters (dense model)
+  type: dataset
   url: https://vimalabs.github.io/
-  model_card: ''
-  modality: Simulation
+- access: open
   analysis: ''
-  size: 650K successful trajectories
+  created_date: 2022-10-06
   dependencies: []
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: open
-  license: MIT
+  description: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: MIT
+  modality:
+    explanation: Simulation
+    value: other; other
+  model_card: ''
   monitoring: ''
-  feedback: ''
+  name: VIMA
+  organization: NVIDIA, Stanford
+  prohibited_uses: ''
+  quality_control: ''
+  size: 650K parameters (dense)
+  training_emissions: ''
+  training_hardware: ''
+  training_time: ''
+  type: model
+  url: https://vimalabs.github.io/
diff --git a/assets/openai.yaml b/assets/openai.yaml
index 74e6779e..ec2714f9 100644
--- a/assets/openai.yaml
+++ b/assets/openai.yaml
@@ -1,118 +1,104 @@
 ---
-
-# Datasets
-- type: dataset
-  name: GPT-3 dataset
-  # General
-  organization: OpenAI
-  description: >
-    The GPT-3 dataset is the text corpus that was used to train the GPT-3
-    model. Information on the GPT-3 dataset is limited to discussion in the
-    paper introducing GPT-3
-    [[Section 2.2]](https://arxiv.org/pdf/2005.14165.pdf#subsection.2.2).
+- access:
+    explanation: "The GPT-3 dataset isn't released to the public, but it may be\
+      \ available to Microsoft through the GPT-3 licencing agreement between OpenAI\
+      \ and Microsoft [[OpenAI Blog Post]] (https://openai.com/blog/openai-licenses-gpt-3-technology-to-microsoft/).\n"
+    value: closed
+  analysis: "The GPT-3 paper, which also introduces the GPT-3 dataset, provides\
+    \ a limited analysis on the GPT-3 dataset, reporting the dirtiness of the dataset\
+    \ after the it was filtered for text occurring in common benchmarking tasks.\
+    \ The authors report that \"as the dataset becomes more contaminated, the variance\
+    \ of the clean over all fraction increases, but there is no apparent bias towards\
+    \ improved or degraded performance\" [[Appendix C]](https://arxiv.org/pdf/2005.14165.pdf#appendix.C).\n"
   created_date:
+    explanation: "The date for the public announcement of GPT-3. The GPT-3 dataset\
+      \ didn't have a specific release date separate from the model [[Open AI Blog\
+      \ Post]](https://openai.com/blog/openai-api/).\n"
     value: 2020-06-11
-    explanation: >
-      The date for the public announcement of GPT-3. The GPT-3
-      dataset didn't have a specific release date separate from the model
-      [[Open AI Blog Post]](https://openai.com/blog/openai-api/).
-  url: https://arxiv.org/pdf/2005.14165.pdf
   datasheet:
-    value: none
     explanation: No datasheet available as of 2022-04-04.
-  modality: text (English)
-  size: 570 GB
-  sample: []
-  analysis: >
-    The GPT-3 paper, which also introduces the GPT-3 dataset, provides a limited
-    analysis on the GPT-3 dataset, reporting the dirtiness of the dataset after
-    the it was filtered for text occurring in common benchmarking tasks.
-    The authors report that "as the dataset becomes more contaminated, the
-    variance of the clean over all fraction increases, but there is no apparent
-    bias
-    towards improved or degraded performance"
-    [[Appendix C]](https://arxiv.org/pdf/2005.14165.pdf#appendix.C).
-  # Construction
+    value: none
   dependencies: [WebText]
-  license:
+  description: "The GPT-3 dataset is the text corpus that was used to train the\
+    \ GPT-3 model. Information on the GPT-3 dataset is limited to discussion in\
+    \ the paper introducing GPT-3 [[Section 2.2]](https://arxiv.org/pdf/2005.14165.pdf#subsection.2.2).\n"
+  excluded: "The Common Crawl dataset was processed using a classifier that kept\
+    \ high quality documents and filtered low quality documents. WebText was used\
+    \ as a proxy for high quality documents [[Appendix A]](https://arxiv.org/pdf/2005.14165.pdf#appendix.A).\n"
+  feedback:
+    explanation: "There are no known (internal or external) feedback mechanisms\
+      \ for the GPT-3 dataset as of 2022-04-04.\n"
     value: unknown
-    explanation: >
-      There is no known license specific to the GPT-3 dataset, however,
-      the governing organization, OpenAI, licensed GPT-3 to Microsoft, which
-      makes it likely that the GPT-3 dataset was also licensed
-      [[OpenAI Blog Post]]
-      (https://openai.com/blog/openai-licenses-gpt-3-technology-to-microsoft/).
-  included: >
-    The dataset is composed several NLP corpora: Common Crawl (filtered, 60%),
-    WebText2 (22%), Books1 (8%), Books2 (8%), Wikipedia (3%)
-    [[Section 2.2]](https://arxiv.org/pdf/2005.14165.pdf#subsection.2.2).
-  excluded: >
-    The Common Crawl dataset was processed using a classifier that kept high
-    quality documents and filtered low quality documents. WebText was used as a
-    proxy for high quality documents
-    [[Appendix A]](https://arxiv.org/pdf/2005.14165.pdf#appendix.A).
-  quality_control: >
-    In addition to excluding low quality documents from the Common Crawl
-    dataset, the authors fuzzily deduplicated documents within each dataset, by
-    removing documents that have high overlap with each other. The same
-    procedure was followed to fuzzily deduplicate WebText from Common Crawl
-    [[Appendix A]](https://arxiv.org/pdf/2005.14165.pdf#appendix.A).
-    Text occuring in benchmark datasets were also partially removed
-    [[Appendix C]](https://arxiv.org/pdf/2005.14165.pdf#appendix.C).
-  # Downstream
-  access:
-    value: closed
-    explanation: >
-      The GPT-3 dataset isn't released to the public, but it may be available
-      to Microsoft through the GPT-3 licencing agreement between OpenAI and
-      Microsoft [[OpenAI Blog Post]]
-      (https://openai.com/blog/openai-licenses-gpt-3-technology-to-microsoft/).
-  intended_uses: >
-    The intended use of the GPT-3 dataset is to train language models.
-  prohibited_uses:
+  included: "The dataset is composed several NLP corpora: Common Crawl (filtered,\
+    \ 60%), WebText2 (22%), Books1 (8%), Books2 (8%), Wikipedia (3%) [[Section 2.2]](https://arxiv.org/pdf/2005.14165.pdf#subsection.2.2).\n"
+  intended_uses: "The intended use of the GPT-3 dataset is to train language models.\n"
+  license:
+    explanation: "There is no known license specific to the GPT-3 dataset, however,\
+      \ the governing organization, OpenAI, licensed GPT-3 to Microsoft, which makes\
+      \ it likely that the GPT-3 dataset was also licensed [[OpenAI Blog Post]]\
+      \ (https://openai.com/blog/openai-licenses-gpt-3-technology-to-microsoft/).\n"
     value: unknown
-    explanation: >
-      OpenAI didn't provide a list of prohibited uses specifically for
-      the GPT-3 dataset. However, public OpenAI products are governed by the
-      OpenAI Terms of Use, which may also apply to the OpenAI dataset.
-      The OpenAI Terms of Use prohibit the following:
-      (i) Illegal activities, such as child pornography, gambling, cybercrime,
-      piracy, violating copyright, trademark or other intellectual property laws;
-      (ii) Accessing or authorizing anyone to access the APIs from an embargoed
-      country, region, or territory as prohibited by the U.S. government;
-      (iii) Threatening, stalking, defaming, defrauding, degrading, victimizing
-      or intimidating anyone for any reason
-      [[Open AI Terms of Use]](https://openai.com/api/policies/terms/).
+  modality:
+    explanation: text (English)
+    value: text
   monitoring:
+    explanation: "There are no known (internal or external) monitoring mechanisms\
+      \ that are in place for the use of the GPT-3 dataset as of 2022-04-04.\n"
     value: unknown
-    explanation: >
-      There are no known (internal or external) monitoring mechanisms
-      that are in place for the use of the GPT-3 dataset as of 2022-04-04.
-  feedback:
-    value: unknown
-    explanation: >
-      There are no known (internal or external) feedback mechanisms for
-      the GPT-3 dataset as of 2022-04-04.
-
-- type: dataset
-  name: HumanEval
-  # General
+  name: GPT-3 dataset
   organization: OpenAI
-  description: >
-    HumanEval is a dataset of 164 programming problems hand-written to evaluate
-    their Codex model.
+  prohibited_uses:
+    explanation: "OpenAI didn't provide a list of prohibited uses specifically for\
+      \ the GPT-3 dataset. However, public OpenAI products are governed by the OpenAI\
+      \ Terms of Use, which may also apply to the OpenAI dataset. The OpenAI Terms\
+      \ of Use prohibit the following: (i) Illegal activities, such as child pornography,\
+      \ gambling, cybercrime, piracy, violating copyright, trademark or other intellectual\
+      \ property laws; (ii) Accessing or authorizing anyone to access the APIs from\
+      \ an embargoed country, region, or territory as prohibited by the U.S. government;\
+      \ (iii) Threatening, stalking, defaming, defrauding, degrading, victimizing\
+      \ or intimidating anyone for any reason [[Open AI Terms of Use]](https://openai.com/api/policies/terms/).\n"
+    value: unknown
+  quality_control: "In addition to excluding low quality documents from the Common\
+    \ Crawl dataset, the authors fuzzily deduplicated documents within each dataset,\
+    \ by removing documents that have high overlap with each other. The same procedure\
+    \ was followed to fuzzily deduplicate WebText from Common Crawl [[Appendix A]](https://arxiv.org/pdf/2005.14165.pdf#appendix.A).\
+    \ Text occuring in benchmark datasets were also partially removed [[Appendix\
+    \ C]](https://arxiv.org/pdf/2005.14165.pdf#appendix.C).\n"
+  sample: []
+  size: 570 GB
+  type: dataset
+  url: https://arxiv.org/pdf/2005.14165.pdf
+- access:
+    explanation: "HumanEval dataset is publicly available and comes with a an evaluation\
+      \ framework [[HumanEval GitHub Repository]](https://www.github.com/openai/human-eval).\n"
+    value: open
+  analysis: none
   created_date:
+    explanation: "The date that Codex, the model evaluated on the HumanEval dataset,\
+      \ was announced to the public [[OpenAI Blog Post]](https://openai.com/blog/openai-codex/).\n"
     value: 2021-08-10
-    explanation: >
-      The date that Codex, the model evaluated on the HumanEval
-      dataset, was announced to the public
-      [[OpenAI Blog Post]](https://openai.com/blog/openai-codex/).
-  url: https://arxiv.org/pdf/2107.03374.pdf
   datasheet:
-    value: none
     explanation: No datasheet available as of 2022-04-10.
-  modality: code (Python)
-  size: 214 KB
+    value: none
+  dependencies: []
+  description: "HumanEval is a dataset of 164 programming problems hand-written\
+    \ to evaluate their Codex model.\n"
+  excluded: "Code problems easily found on the internet.\n"
+  feedback: "Email the authors [[Codex Paper]](https://arxiv.org/pdf/2107.03374.pdf).\n"
+  included: "164 hand-written questions.\n"
+  intended_uses: "Evaluating code generation capabilities of models.\n"
+  license:
+    explanation: https://github.com/openai/human-eval/blob/master/LICENSE
+    value: MIT
+  modality:
+    explanation: code (Python)
+    value: code
+  monitoring: none
+  name: HumanEval
+  organization: OpenAI
+  prohibited_uses: none
+  quality_control: "The evaluation dataset was handwritten to ensure that the evaluation\
+    \ problems do not exist in the Codex dataset [[Section 2.2]](https://arxiv.org/pdf/2107.03374.pdf#subsection.2.2).\n"
   sample:
     - "\n\ndef string_sequence(n: int) -> str:\n    \"\"\" Return a string containing\
       \ space-delimited numbers starting from 0 upto n inclusive.\n    >>> string_sequence(0)\n\
@@ -139,936 +125,680 @@
       \ 'seven', 'eight' and 'nine'.\n    Return the string with numbers sorted\
       \ from smallest to largest\n    >>> sort_numbers('three one five')\n    'one\
       \ three five'\n    \"\"\"\n"
-  analysis: none
-  # Construction
-  dependencies: []
-  license:
-    value: MIT
-    explanation: https://github.com/openai/human-eval/blob/master/LICENSE
-  included: >
-    164 hand-written questions.
-  excluded: >
-    Code problems easily found on the internet.
-  quality_control: >
-    The evaluation dataset was handwritten to ensure that the evaluation
-    problems do not exist in the Codex dataset
-    [[Section 2.2]](https://arxiv.org/pdf/2107.03374.pdf#subsection.2.2).
-  # Downstream
-  access:
-    value: open
-    explanation: >
-      HumanEval dataset is publicly available and comes
-      with a an evaluation framework
-      [[HumanEval GitHub Repository]](https://www.github.com/openai/human-eval).
-  intended_uses: >
-    Evaluating code generation capabilities of models.
-  prohibited_uses: none
-  monitoring: none
-  feedback: >
-    Email the authors [[Codex Paper]](https://arxiv.org/pdf/2107.03374.pdf).
-
-- type: dataset
-  name: Codex dataset
-  # General
-  organization: OpenAI
-  description: >
-    The dataset used to train the Codex model.
-  created_date:
-    value: 2021-08-10
-    explanation: >
-      The date that Codex, the model trained on the Codex dataset,
-      was announced to the public
-      [[OpenAI Blog Post]](https://openai.com/blog/openai-codex/).
+  size: 214 KB
+  type: dataset
   url: https://arxiv.org/pdf/2107.03374.pdf
-  datasheet: none
-  modality: code
-  size:
-    value: 159 GB
-    explanation: As reported by the authors [[Section 3.1]](https://arxiv.org/pdf/2107.03374.pdf#subsection.3.1).
-  sample: []
+- access:
+    explanation: "The dataset might have been made available to Microsoft as part\
+      \ of OpenAI giving Microsoft access to its Codex model [GitHub Copilot](https://copilot.github.com/).\n"
+    value: closed
   analysis:
-    value: none
     explanation: The paper doesn't provide an analysis on the training dataset.
-  # Construction
+    value: none
+  created_date:
+    explanation: "The date that Codex, the model trained on the Codex dataset, was\
+      \ announced to the public [[OpenAI Blog Post]](https://openai.com/blog/openai-codex/).\n"
+    value: 2021-08-10
+  datasheet: none
   dependencies: []
+  description: "The dataset used to train the Codex model.\n"
+  excluded: "Following were filtered from the dataset: autogenerated files; files\
+    \ with average line length > 100, maximum line length > 1000, or few alphanumeric\
+    \ characters [[Section 3.1]](https://arxiv.org/pdf/2107.03374.pdf#subsection.3.1).\n"
+  feedback: "Email the authors [[Codex Paper]](https://arxiv.org/pdf/2107.03374.pdf).\n"
+  included: "The dataset includes 54 million public software repositories hosted\
+    \ on GitHub as of an unspecified date in May 2020 [[Section 3.1]](https://arxiv.org/pdf/2107.03374.pdf#subsection.3.1).\n"
+  intended_uses: Training language models on code.
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  included: >
-    The dataset includes 54 million public software repositories hosted on
-    GitHub as of an unspecified date in May 2020
-    [[Section 3.1]](https://arxiv.org/pdf/2107.03374.pdf#subsection.3.1).
-  excluded: >
-    Following were filtered from the dataset: autogenerated files; files with
-    average line length > 100, maximum line length > 1000, or few alphanumeric
-    characters
-    [[Section 3.1]](https://arxiv.org/pdf/2107.03374.pdf#subsection.3.1).
-  quality_control: >
-    Dataset was filtered using simple heuristics, as outlined in the excluded
-    field.
-  # Downstream
-  access:
-    value: closed
-    explanation: >
-      The dataset might have been made available to Microsoft
-      as part of OpenAI giving Microsoft access to its Codex model
-      [GitHub Copilot](https://copilot.github.com/).
-  intended_uses: Training language models on code.
-  prohibited_uses: unknown
+  modality: code
   monitoring: unknown
-  feedback: >
-    Email the authors
-    [[Codex Paper]](https://arxiv.org/pdf/2107.03374.pdf).
-
-- type: dataset
-  name: CLIP dataset
-  # General
+  name: Codex dataset
   organization: OpenAI
-  description: >
-    CLIP dataset contains text-image pairs crawled from the internet.
+  prohibited_uses: unknown
+  quality_control: "Dataset was filtered using simple heuristics, as outlined in\
+    \ the excluded field.\n"
+  sample: []
+  size:
+    explanation: As reported by the authors [[Section 3.1]](https://arxiv.org/pdf/2107.03374.pdf#subsection.3.1).
+    value: 159 GB
+  type: dataset
+  url: https://arxiv.org/pdf/2107.03374.pdf
+- access:
+    explanation: The dataset wasn't released to the public.
+    value: closed
+  analysis: "The dataset contained some overlap with the test sets of the benchmarks\
+    \ used for evaluation, but the authors determined the impact to be small: \"\
+    There is a median overlap of 2.2% and an average overlap of 3.2%. Due to this\
+    \ small amount of overlap, overall accuracy is rarely shifted by more than 0.1%\
+    \ with only 7 datasets above this threshold\" [[Section 5]](https://arxiv.org/pdf/2103.00020.pdf#section.5).\n"
   created_date:
+    explanation: "The date of the blog post announcing CLIP [[OpenAI Blog Post]](https://openai.com/blog/clip/).\n"
     value: 2021-01-05
-    explanation: >
-      The date of the blog post announcing CLIP
-      [[OpenAI Blog Post]](https://openai.com/blog/clip/).
-  url: https://arxiv.org/pdf/2103.00020.pdf
   datasheet: none
-  modality: text (English) and image
-  size: 400M (image, text) pairs
-  sample: []
-  analysis: >
-    The dataset contained some overlap with the test sets of the benchmarks used
-    for evaluation, but the authors determined the impact to be small: "There
-    is a median overlap of 2.2% and an average overlap of 3.2%. Due to this
-    small amount of overlap, overall accuracy is rarely shifted by more than
-    0.1% with only 7 datasets above this threshold"
-    [[Section 5]](https://arxiv.org/pdf/2103.00020.pdf#section.5).
-  # Construction
   dependencies: []
-  license:
-    value: unknown
-    explanation: >
-      The asset doesn't have a known license as it hasn't been publicly
-      released.
-  included: >
-    Data crawled from the internet, without any filtering (including
-    de-duplication) or curation.
+  description: "CLIP dataset contains text-image pairs crawled from the internet.\n"
   excluded: none
-  quality_control: >
-    The data was "only crawled websites that had policies against excessively
-    violent and adult images and allowed us to filter out such content"
-    [[Model Card]](https://github.com/openai/CLIP/blob/main/model-card.md).
-  # Downstream
-  access:
-    value: closed
-    explanation: The dataset wasn't released to the public.
+  feedback:
+    explanation: The feedback mechanisms in place are unknown.
+    value: unknown
+  included: "Data crawled from the internet, without any filtering (including de-duplication)\
+    \ or curation.\n"
   intended_uses: Training multimodal vision models.
-  prohibited_uses:
+  license:
+    explanation: "The asset doesn't have a known license as it hasn't been publicly\
+      \ released.\n"
     value: unknown
-    explanation: The prohibited uses of the dataset are unknown.
+  modality:
+    explanation: text (English) and image
+    value: image, text
   monitoring:
-    value: unknown
     explanation: The monitoring mechanisms in place are unknown.
-  feedback:
     value: unknown
-    explanation: The feedback mechanisms in place are unknown.
-
-- type: dataset
-  name: DALL·E dataset
-  # General
+  name: CLIP dataset
   organization: OpenAI
-  description: >
-    DALL·E dataset is the training set consisting of image and text pairs
-    collected to train the DALL·E model.
-  created_date:
-    value: 2021-01-05
-    explanation: >
-      The date of the blog post announcing DALL·E
-      [[OpenAI Blog Post]](https://openai.com/blog/dall-e/).
-  url: https://arxiv.org/abs/2102.12092
-  datasheet: none
-  modality: text (English) and image
-  size: >
-    250M (image, text) pairs
+  prohibited_uses:
+    explanation: The prohibited uses of the dataset are unknown.
+    value: unknown
+  quality_control: "The data was \"only crawled websites that had policies against\
+    \ excessively violent and adult images and allowed us to filter out such content\"\
+    \ [[Model Card]](https://github.com/openai/CLIP/blob/main/model-card.md).\n"
   sample: []
+  size: 400M (image, text) pairs
+  type: dataset
+  url: https://arxiv.org/pdf/2103.00020.pdf
+- access:
+    explanation: The dataset wasn't released to the public.
+    value: closed
   analysis: The authors found that the dataset contained 21% of the images in the
     MS-COCO validation set, but observed no significant changes in the performance
     of the accompanying DALL·E when tested on MS-COCO evaluation set with and without
     the said images [[Section 3.1]](https://arxiv.org/pdf/2102.12092.pdf#subsection.3.1).
-  # Construction
+  created_date:
+    explanation: "The date of the blog post announcing DALL·E [[OpenAI Blog Post]](https://openai.com/blog/dall-e/).\n"
+    value: 2021-01-05
+  datasheet: none
   dependencies: []
-  license:
+  description: "DALL·E dataset is the training set consisting of image and text\
+    \ pairs collected to train the DALL·E model.\n"
+  excluded: "MS-COCO was excluded from the dataset, but because MS-COCO was created\
+    \ from YFCC100M, some of the test images (not the captions) were included.\n"
+  feedback:
+    explanation: The feedback mechanisms in place are unknown.
     value: unknown
-    explanation: >
-      The asset doesn't have a known license as it hasn't been publicly
-      released.
-  included: >
-    Data from the internet, including Conceptual Captions and a filtered subset
-    of YFCC100M.
-  excluded: >
-    MS-COCO was excluded from the dataset, but because MS-COCO was created from
-    YFCC100M, some of the test images (not the captions) were included.
-  quality_control: >
-    The data was de-duplicated
-    [[Section 3.2]](https://arxiv.org/pdf/2102.12092.pdf#subsection.3.2).
-    The data collected from the internet was filtered using image, text and
-    joint image and text filters, which included: "discarding instances whose
-    captions are too short, are classified as non-English by the Python package
-    cld3, or that consist primarily of boilerplate phrases such as “photographed
-    on <date>”, where <date> matches various formats for dates that we found in
-    the data". The authors also discard "instances whose images have aspect
-    ratios not in [1/2, 2]"
-    [[Appendix C]](https://arxiv.org/pdf/2102.12092.pdf#appendix.C).
-  # Downstream
-  access:
-    value: closed
-    explanation: The dataset wasn't released to the public.
+  included: "Data from the internet, including Conceptual Captions and a filtered\
+    \ subset of YFCC100M.\n"
   intended_uses: Training multimodal vision models.
-  prohibited_uses:
+  license:
+    explanation: "The asset doesn't have a known license as it hasn't been publicly\
+      \ released.\n"
     value: unknown
-    explanation: The prohibited uses of the dataset are unknown.
+  modality:
+    explanation: text (English) and image
+    value: image, text
   monitoring:
-    value: unknown
     explanation: The monitoring mechanisms in place are unknown.
-  feedback:
     value: unknown
-    explanation: The feedback mechanisms in place are unknown.
-
-- type: dataset
-  name: Whisper dataset
-  # General
+  name: DALL·E dataset
   organization: OpenAI
-  description: >
-    The Whisper dataset is the speech corpus that was used to train the Whisper
-    model. Information on the dataset is limited to discussion in the
-    paper introducing Whisper.
-    [[Section 2.1]](https://cdn.openai.com/papers/whisper.pdf).
+  prohibited_uses:
+    explanation: The prohibited uses of the dataset are unknown.
+    value: unknown
+  quality_control: "The data was de-duplicated [[Section 3.2]](https://arxiv.org/pdf/2102.12092.pdf#subsection.3.2).\
+    \ The data collected from the internet was filtered using image, text and joint\
+    \ image and text filters, which included: \"discarding instances whose captions\
+    \ are too short, are classified as non-English by the Python package cld3, or\
+    \ that consist primarily of boilerplate phrases such as “photographed on <date>”,\
+    \ where <date> matches various formats for dates that we found in the data\"\
+    . The authors also discard \"instances whose images have aspect ratios not in\
+    \ [1/2, 2]\" [[Appendix C]](https://arxiv.org/pdf/2102.12092.pdf#appendix.C).\n"
+  sample: []
+  size: "250M (image, text) pairs\n"
+  type: dataset
+  url: https://arxiv.org/abs/2102.12092
+- access:
+    explanation: "The dataset isn't released to the public.\n"
+    value: closed
+  analysis: "The Whisper paper provides limited details on preprocessing.\n"
   created_date:
+    explanation: "The date for the public announcement of Whisper. The dataset didn't\
+      \ have a specific release date separate from the model [[Open AI Blog Post]](https://openai.com/blog/whisper/).\n"
     value: 2022-09-21
-    explanation: >
-      The date for the public announcement of Whisper. The
-      dataset didn't have a specific release date separate from the model
-      [[Open AI Blog Post]](https://openai.com/blog/whisper/).
-  url: https://cdn.openai.com/papers/whisper.pdf
   datasheet:
-    value: none
     explanation: No datasheet available as of 2022-12-07.
-  modality: speech
-  size: 680k hours
-  sample: []
-  analysis: >
-    The Whisper paper provides limited details on preprocessing.
-  # Construction
+    value: none
   dependencies: []
-  license:
+  description: "The Whisper dataset is the speech corpus that was used to train\
+    \ the Whisper model. Information on the dataset is limited to discussion in\
+    \ the paper introducing Whisper. [[Section 2.1]](https://cdn.openai.com/papers/whisper.pdf).\n"
+  excluded: "Automated filtering was conducted.\n"
+  feedback:
+    explanation: "There are no known (internal or external) feedback mechanisms\
+      \ for the dataset as of 2022-12-07.\n"
     value: unknown
-    explanation: >
-      The asset doesn't have a known license as it hasn't been publicly
-      released.
-  included: >
-    The dataset is composed three major sources: multilingual speech recognition
-    (17%), translation (18%), and English speech recognition (65%).
-    [[Figure 11]](https://cdn.openai.com/papers/whisper.pdf).
-  excluded: >
-    Automated filtering was conducted.
-  quality_control: >
-    In addition to filtering, basic text standardization was done.
-  # Downstream
-  access:
-    value: closed
-    explanation: >
-      The dataset isn't released to the public.
+  included: "The dataset is composed three major sources: multilingual speech recognition\
+    \ (17%), translation (18%), and English speech recognition (65%). [[Figure 11]](https://cdn.openai.com/papers/whisper.pdf).\n"
   intended_uses: The intended use is to train speech models.
-  prohibited_uses:
-    value: none
-    explanation: >
-      No uses are explicitly prohibited.
+  license:
+    explanation: "The asset doesn't have a known license as it hasn't been publicly\
+      \ released.\n"
+    value: unknown
+  modality:
+    explanation: speech
+    value: audio
   monitoring:
+    explanation: "There are no known (internal or external) monitoring mechanisms\
+      \ that are in place for the use of the dataset as of 2022-12-07.\n"
     value: none
-    explanation: >
-      There are no known (internal or external) monitoring mechanisms
-      that are in place for the use of the dataset as of 2022-12-07.
-  feedback:
-    value: unknown
-    explanation: >
-      There are no known (internal or external) feedback mechanisms for
-      the dataset as of 2022-12-07.
-
-- type: dataset
-  name: WebText
-  # General
+  name: Whisper dataset
   organization: OpenAI
-  description: ''
+  prohibited_uses:
+    explanation: "No uses are explicitly prohibited.\n"
+    value: none
+  quality_control: "In addition to filtering, basic text standardization was done.\n"
+  sample: []
+  size: 680k hours
+  type: dataset
+  url: https://cdn.openai.com/papers/whisper.pdf
+- access: closed
+  analysis: ''
   created_date:
-    value: 2019-11-01
     explanation: Due to the lack of information about the exact date, it is assumed
       to be the 1st of the known month of creation.
-  url: https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf
+    value: 2019-11-01
   datasheet: ''
-  modality: text (English)
-  size: 40 GB
-  sample: []
-  analysis: ''
-  # Construction
   dependencies: []
-  license:
-    value: unknown
-    explanation: >
-      The asset doesn't have a known license as it hasn't been publicly
-      released.
-  included: ''
+  description: ''
   excluded: ''
-  quality_control: ''
-  # Downstream
-  access: closed
+  feedback: ''
+  included: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license:
+    explanation: "The asset doesn't have a known license as it hasn't been publicly\
+      \ released.\n"
+    value: unknown
+  modality: {}
   monitoring: ''
-  feedback: ''
-
-# Models
-- type: model
-  name: GPT-2
-  # General
+  name: WebText
   organization: OpenAI
-  description: ''
+  prohibited_uses: ''
+  quality_control: ''
+  sample: []
+  size: 40 GB
+  type: dataset
+  url: https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf
+- access: open
+  analysis: ''
   created_date:
-    value: 2019-11-01
     explanation: Due to the lack of information about the exact date, it is assumed
       to be the 1st of the known month of creation.
-  url: https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf
-  model_card: https://github.com/openai/gpt-2/blob/master/model_card.md
-  modality: text (English)
-  size: 1.5B parameters (dense model)
-  analysis: ''
-  # Construction
+    value: 2019-11-01
   dependencies: [WebText]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  # Downstream
-  access: open
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "GPT-2 uses a [[modified MIT License]](https://github.com/openai/gpt-2/blob/master/LICENSE).\n"
     value: Modified MIT License
-    explanation: >
-      GPT-2 uses a
-      [[modified MIT License]](https://github.com/openai/gpt-2/blob/master/LICENSE).
-  intended_uses: ''
-  prohibited_uses: ''
+  modality: {}
+  model_card: https://github.com/openai/gpt-2/blob/master/model_card.md
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: GPT-3
-  # General
+  name: GPT-2
   organization: OpenAI
-  description: >
-    GPT-3 is an autoregressive language model.
+  prohibited_uses: ''
+  quality_control: ''
+  size: ''
+  training_emissions: ''
+  training_hardware: ''
+  training_time: ''
+  type: model
+  url: https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf
+- access:
+    explanation: "The GPT-3 model isn't fully released to the public, but it was\
+      \ made available to Microsoft through the licencing agreement between OpenAI\
+      \ and Microsoft [[OpenAI Blog Post]] (https://openai.com/blog/openai-licenses-gpt-3-technology-to-microsoft/).\
+      \ The public can access the model through the Open AI API, which is available\
+      \ in supported countries [[Supported Countries]](https://beta.openai.com/docs/supported-countries)\
+      \ [[OpenAI API]](https://openai.com/api/).\n"
+    value: limited
+  analysis: "The GPT-3 model was evaluated on language modeling, closed-book question\
+    \ answering, translation, Winograd-style tasks, commonsense reasoning, reading\
+    \ comprehension, SuperGLUE, NLI, synthetic tasks, and generation [[Section 4]](https://arxiv.org/pdf/2005.14165.pdf#section.4);\
+    \ as well as on fairness and biases [[Section 6]](https://arxiv.org/pdf/2005.14165.pdf#section.6).\n"
   created_date:
+    explanation: "The date that GPT-3 was announced to the public [[OpenAI Blog\
+      \ Post]](https://openai.com/blog/openai-api/).\n"
     value: 2020-06-11
-    explanation: >
-      The date that GPT-3 was announced to the public
-      [[OpenAI Blog Post]](https://openai.com/blog/openai-api/).
-  url: https://arxiv.org/pdf/2005.14165.pdf
-  model_card: https://github.com/openai/gpt-3/blob/master/model-card.md
-  modality: text (English)
-  size:
-    value: 175B parameters (dense model)
-    explanation: >
-      GPT-3 comes in several sizes. Here we report the size of the Davinci
-      model, the largest GPT-3 model served through the OpenAI API. Sizes of the
-      other models can be found in the paper
-      [[Table 2.1]](https://arxiv.org/pdf/2005.14165.pdf#table.caption.7).
-  analysis: >
-    The GPT-3 model was evaluated on language modeling, closed-book question
-    answering, translation, Winograd-style tasks, commonsense reasoning,
-    reading comprehension, SuperGLUE, NLI, synthetic tasks, and generation
-    [[Section 4]](https://arxiv.org/pdf/2005.14165.pdf#section.4);
-    as well as on fairness and biases
-    [[Section 6]](https://arxiv.org/pdf/2005.14165.pdf#section.6).
-  # Construction
   dependencies: [GPT-3 dataset]
+  description: "GPT-3 is an autoregressive language model.\n"
+  feedback: "Feedback for GPT-3 can be provided on the feedback form linked in the\
+    \ model card [[Model Card]](https://github.com/openai/gpt-3/blob/master/model-card.md).\
+    \ The form is especially meant to collect feedback on concerns about misuse,\
+    \ synthetic text detection, bias, and risk of generative language models.\n"
+  intended_uses: "GPT-3 was intended to be use through the OpenAI API by developers\
+    \ for language applications. Other intended use of GPT-3 include researchers\
+    \ accessing the model through the API to study its paradigms [[Model Card]](https://github.com/openai/gpt-3/blob/master/model-card.md).\n"
+  license:
+    explanation: "The asset doesn't have a known license as it hasn't been publicly\
+      \ released.\n"
+    value: unknown
+  modality:
+    explanation: text (English)
+    value: text; text
+  model_card: https://github.com/openai/gpt-3/blob/master/model-card.md
+  monitoring: "OpenAI reviews all use cases of the model [[Model Card]](https://github.com/openai/gpt-3/blob/master/model-card.md).\n"
+  name: GPT-3
+  organization: OpenAI
+  prohibited_uses: "Access to GPT-3 is governed by Open AI API Usage Guidelines\
+    \ and API Terms of Use, prohibiting the use of the API in a way that causes\
+    \ societal harm. [[Usage Guidelines]] (https://beta.openai.com/docs/usage-guidelines/content-policy)\
+    \ [[Terms of Use]](https://openai.com/api/policies/terms/). The list of disallowed\
+    \ applications can be found in the usage guidelines [[Disallowed Applications]]\
+    \ (https://beta.openai.com/docs/usage-guidelines/disallowed-applications).\n"
+  quality_control: "One quality control method OpenAI employed was releasing GPT-3\
+    \ only through the OpenAI API. OpenAI states that it is easier to respond to\
+    \ misuse when the access to the model is gated through the API. It also hints\
+    \ that it plans to broaden the API access over time based on the amount of misuse\
+    \ [[OpenAI API Blog Post]](https://openai.com/blog/openai-api/). The authors\
+    \ identify potential misuses of GPT-3 in the paper and analyze it for fairness,\
+    \ bias and representation issues, but do not identify mitigation strategies\
+    \ [[Section 6]](https://arxiv.org/pdf/2005.14165.pdf#section.6).\n"
+  size: 175B parameters (dense)
   training_emissions:
+    explanation: "Estimate of the CO2(e) emissions for GPT-3 were not provided by\
+      \ OpenAI, but they were provided by a follow up work investigating the CO2\
+      \ equivalent emissions (CO2e) of GPT-3 [[Patterson et al.]] (https://arxiv.org/ftp/arxiv/papers/2104/2104.10350.pdf).\n"
     value: 552.1 tCO2e
-    explanation: >
-      Estimate of the CO2(e) emissions for GPT-3 were not provided
-      by OpenAI, but they were provided by a follow up work investigating the CO2
-      equivalent emissions (CO2e) of GPT-3
-      [[Patterson et al.]]
-      (https://arxiv.org/ftp/arxiv/papers/2104/2104.10350.pdf).
-  training_time:
-    value: 3640 petaflop/s-days
-    explanation: >
-      The time required to train different sized GPT-3 models are listed in
-      [[Table D.1]](https://arxiv.org/pdf/2005.14165.pdf#table.caption.50).
-      The time required to train the GPT-3 model with 175B parameters is
-      reported as 3.64E+03 petaflop/s-days.
   training_hardware:
+    explanation: "The original paper doesn't specify the training hardware for GPT-3,\
+      \ but a follow up blog post indicates that it was trained on a cluster on\
+      \ Azure cluster, using 10000 GPUs with 400 Gbps [[Microsoft Blog Post]] (https://blogs.microsoft.com/ai/openai-azure-supercomputer/).\n"
     value: Azure
-    explanation: >
-      The original paper doesn't specify the training hardware for GPT-3,
-      but a follow up blog post indicates that it was trained on a cluster on
-      Azure cluster, using 10000 GPUs with 400 Gbps
-      [[Microsoft Blog Post]]
-      (https://blogs.microsoft.com/ai/openai-azure-supercomputer/).
-  quality_control: >
-    One quality control method OpenAI employed was releasing GPT-3 only through
-    the OpenAI API. OpenAI states that it is easier to respond to misuse when
-    the access to the model is gated through the API. It also hints that it
-    plans to broaden the API access over time based on the amount of misuse
-    [[OpenAI API Blog Post]](https://openai.com/blog/openai-api/).
-    The authors identify potential misuses of GPT-3 in the paper and analyze
-    it for fairness, bias and representation issues, but do not identify
-    mitigation strategies
-    [[Section 6]](https://arxiv.org/pdf/2005.14165.pdf#section.6).
-  # Downstream
-  access:
+  training_time:
+    explanation: "The time required to train different sized GPT-3 models are listed\
+      \ in [[Table D.1]](https://arxiv.org/pdf/2005.14165.pdf#table.caption.50).\
+      \ The time required to train the GPT-3 model with 175B parameters is reported\
+      \ as 3.64E+03 petaflop/s-days.\n"
+    value: 3640 petaflop/s-days
+  type: model
+  url: https://arxiv.org/pdf/2005.14165.pdf
+- access:
+    explanation: "The model is made available via the OpenAI API [[OpenAI API]](https://openai.com/api/)\
+      \ as code-cushman-001 according to the [[Model Index]](https://platform.openai.com/docs/model-index-for-researchers).\n"
     value: limited
-    explanation: >
-      The GPT-3 model isn't fully released to the public, but
-      it was made available to Microsoft through the licencing agreement between
-      OpenAI and Microsoft
-      [[OpenAI Blog Post]]
-      (https://openai.com/blog/openai-licenses-gpt-3-technology-to-microsoft/).
-      The public can access the model through the Open AI API, which is available
-      in supported countries
-      [[Supported Countries]](https://beta.openai.com/docs/supported-countries)
-      [[OpenAI API]](https://openai.com/api/).
-  license:
-    value: unknown
-    explanation: >
-      The asset doesn't have a known license as it hasn't been publicly
-      released.
-  intended_uses: >
-    GPT-3 was intended to be use through the OpenAI API by developers for
-    language applications. Other intended use of GPT-3 include researchers
-    accessing the model through the API to study its paradigms
-    [[Model Card]](https://github.com/openai/gpt-3/blob/master/model-card.md).
-  prohibited_uses: >
-    Access to GPT-3 is governed by Open AI API Usage Guidelines and API Terms
-    of Use, prohibiting the use of the API in a way that causes societal harm.
-    [[Usage Guidelines]]
-    (https://beta.openai.com/docs/usage-guidelines/content-policy)
-    [[Terms of Use]](https://openai.com/api/policies/terms/).
-    The list of disallowed applications can be found in the usage guidelines
-    [[Disallowed Applications]]
-    (https://beta.openai.com/docs/usage-guidelines/disallowed-applications).
-  monitoring: >
-    OpenAI reviews all use cases of the model
-    [[Model Card]](https://github.com/openai/gpt-3/blob/master/model-card.md).
-  feedback: >
-    Feedback for GPT-3 can be provided on the feedback form linked in the
-    model card
-    [[Model Card]](https://github.com/openai/gpt-3/blob/master/model-card.md).
-    The form is especially meant to collect feedback on concerns about misuse,
-    synthetic text detection, bias, and risk of generative language models.
-
-- type: model
-  name: Codex
-  # General
-  organization: OpenAI
-  description: >
-    Codex is a GPT language model fine-tuned on publicly available code from
-    GitHub.
+  analysis: "The model was evaluated using the HumanEval dataset with pass@k metric\
+    \ and BLEU scores [[Section 2]](https://arxiv.org/pdf/2107.03374.pdf#section.2).\n"
   created_date:
+    explanation: "The date that Codex was announced to the public [[OpenAI Blog\
+      \ Post]](https://openai.com/blog/openai-codex/).\n"
     value: 2021-08-10
-    explanation: >
-      The date that Codex was announced to the public
-      [[OpenAI Blog Post]](https://openai.com/blog/openai-codex/).
-  url: https://arxiv.org/pdf/2107.03374.pdf
-  model_card: none
-  modality: text (English) and code
-  size: 12B parameters (dense model)
-  analysis: >
-    The model was evaluated using the HumanEval dataset with pass@k metric and
-    BLEU scores
-    [[Section 2]](https://arxiv.org/pdf/2107.03374.pdf#section.2).
-  # Construction
   dependencies: [GPT-3, Codex dataset, HumanEval]
-  training_emissions:
-    value: unknown
-    explanation: Authors do not report the training emissions.
-  training_time:
-    value: 100-1000 petaflop/s-days
-    explanation: >
-      Authors estimate hundreds of petaflop/s-days of compute
-      [[Section 7.6]](https://arxiv.org/pdf/2107.03374.pdf#subsection.7.6), but
-      don't provide an exact number.
-  training_hardware:
-    value: Azure
-    explanation: >
-      The paper specifies that Azure was used, but the underlying
-      architecture wasn't specified.
-  quality_control: >
-    The model wasn't fully released to the public as a quality control measure.
-    The authors identify potential risks of Codex in their paper due to the
-    following: over-reliance, misalignment, bias and representation, economic
-    and labor market impacts, security implications, environmental impact and
-    legal implications. They also make suggestions for some of these, but do not
-    implement them in Codex
-    [[Section 7]](https://arxiv.org/pdf/2107.03374.pdf#section.7).
-  # Downstream
-  access:
-    value: limited
-    explanation: >
-      The model is made available via the OpenAI API
-      [[OpenAI API]](https://openai.com/api/) as code-cushman-001 according to the
-      [[Model Index]](https://platform.openai.com/docs/model-index-for-researchers).
+  description: "Codex is a GPT language model fine-tuned on publicly available code\
+    \ from GitHub.\n"
+  feedback: "Email the authors [[Codex Paper]](https://arxiv.org/pdf/2107.03374.pdf).\n"
+  intended_uses: "Codex is intended to be used for coding related language modelling\
+    \ tasks.\n"
   license:
+    explanation: "The asset doesn't have a known license as it hasn't been publicly\
+      \ released.\n"
     value: unknown
-    explanation: >
-      The asset doesn't have a known license as it hasn't been publicly
-      released.
-  intended_uses: >
-    Codex is intended to be used for coding related language modelling tasks.
-  prohibited_uses:
-    value: unknown
-    explanation: The prohibited uses of the model aren't specified.
+  modality:
+    explanation: text (English) and code
+    value: code, text; code, text
+  model_card: none
   monitoring:
+    explanation: "There isn't any known monitoring in place for the model, but there\
+      \ may be internal mechanisms.\n"
     value: unknown
-    explanation: >
-      There isn't any known monitoring in place for the model, but there
-      may be internal mechanisms.
-  feedback: >
-    Email the authors
-    [[Codex Paper]](https://arxiv.org/pdf/2107.03374.pdf).
-
-- type: model
-  name: InstructGPT
-  # General
+  name: Codex
   organization: OpenAI
-  description: >
-    InstructGPT is a family of GPT-3 based models fine-tuned on human feedback,
-    which allows for better instruction following capabilities than GPT-3.
-  created_date:
-    value: 2022-01-27
-    explanation: >
-      Date of the public announcement introducing InstructGPT
-      [[OpenAI Blog Post]] (https://openai.com/blog/instruction-following/).
-  url: https://arxiv.org/pdf/2203.02155.pdf
-  model_card: https://github.com/openai/following-instructions-human-feedback/blob/main/model-card.md
-  modality: text (English) and code
-  size:
-    value: 175B parameters (dense model)
-    explanation: Size of the largest InstructGPT model.
+  prohibited_uses:
+    explanation: The prohibited uses of the model aren't specified.
+    value: unknown
+  quality_control: "The model wasn't fully released to the public as a quality control\
+    \ measure. The authors identify potential risks of Codex in their paper due\
+    \ to the following: over-reliance, misalignment, bias and representation, economic\
+    \ and labor market impacts, security implications, environmental impact and\
+    \ legal implications. They also make suggestions for some of these, but do not\
+    \ implement them in Codex [[Section 7]](https://arxiv.org/pdf/2107.03374.pdf#section.7).\n"
+  size: 12B parameters (dense)
+  training_emissions:
+    explanation: Authors do not report the training emissions.
+    value: unknown
+  training_hardware:
+    explanation: "The paper specifies that Azure was used, but the underlying architecture\
+      \ wasn't specified.\n"
+    value: Azure
+  training_time:
+    explanation: "Authors estimate hundreds of petaflop/s-days of compute [[Section\
+      \ 7.6]](https://arxiv.org/pdf/2107.03374.pdf#subsection.7.6), but don't provide\
+      \ an exact number.\n"
+    value: 100-1000 petaflop/s-days
+  type: model
+  url: https://arxiv.org/pdf/2107.03374.pdf
+- access:
+    explanation: "The model is made available via the OpenAI API but the specific\
+      \ endpoint (davinci-instruct-beta) is currently unavailable. [[Model Index]](https://platform.openai.com/docs/model-index-for-researchers).\n"
+    value: closed
   analysis: The model was evaluated on human ratings to the InstructGPT answers
     to the prompts submitted to the OpenAI API as well as on public NLP datasets
     spanning truthfulness, toxicity, and bias, question answering, reading comprehension,
     and summarization tasks.
-  # Construction
+  created_date:
+    explanation: "Date of the public announcement introducing InstructGPT [[OpenAI\
+      \ Blog Post]] (https://openai.com/blog/instruction-following/).\n"
+    value: 2022-01-27
   dependencies: [GPT-3, OpenAI API]
-  training_emissions:
+  description: "InstructGPT is a family of GPT-3 based models fine-tuned on human\
+    \ feedback, which allows for better instruction following capabilities than\
+    \ GPT-3.\n"
+  feedback: "Email the authors [[InstructGPT Paper]](https://arxiv.org/pdf/2203.02155.pdf).\n"
+  intended_uses: "As stated in the model card: \"The intended direct users of InstructGPT\
+    \ are developers who access its capabilities via the OpenAI API. Through the\
+    \ OpenAI API, the model can be used by those who may not have AI development\
+    \ experience, to build and explore language modeling systems across a wide range\
+    \ of functions. We also anticipate that the model will continue to be used by\
+    \ researchers to better understand the behaviors, capabilities, biases, and\
+    \ constraints of large-scale language models\" [[Model Card]](https://github.com/openai/following-instructions-human-feedback/blob/main/model-card.md).\n"
+  license:
+    explanation: "The asset doesn't have a known license as it hasn't been publicly\
+      \ released.\n"
     value: unknown
-    explanation: The authors do not estimate the emissions of the model.
-  training_time:
-    value: 60 petaflops/s-days
-    explanation: >
-      175B SFT model required 4.9 petaflops/s-days; 175B PPO-ptx model required
-      60 petaflops/s-days
-      [[Section 5]](https://arxiv.org/pdf/2203.02155.pdf#section.5).
-  training_hardware:
-    value: unknown
-    explanation: The authors do not disclose the training hardware used.
-  quality_control: >
-    The model wasn't fully released to the public as a quality control measure.
-  # Downstream
-  access:
-    value: closed
-    explanation: >
-      The model is made available via the OpenAI API but the specific endpoint (davinci-instruct-beta)
-      is currently unavailable.
-      [[Model Index]](https://platform.openai.com/docs/model-index-for-researchers).
-  license:
-    value: unknown
-    explanation: >
-      The asset doesn't have a known license as it hasn't been publicly
-      released.
-  intended_uses: >
-    As stated in the model card: "The intended direct users of InstructGPT are
-    developers who access its capabilities via the OpenAI API. Through the
-    OpenAI API, the model can be used by those who may not have AI development
-    experience, to build and explore language modeling systems across a wide
-    range of functions. We also anticipate that the model will continue to be
-    used by researchers to better understand the behaviors, capabilities,
-    biases, and constraints of large-scale language models"
-    [[Model Card]](https://github.com/openai/following-instructions-human-feedback/blob/main/model-card.md).
-  prohibited_uses: >
-    Access to InstructGPT is governed by Open AI API Usage Guidelines and API
-    Terms
-    of Use, prohibiting the use of the API in a way that causes societal harm.
-    [[Usage Guidelines]]
-    (https://beta.openai.com/docs/usage-guidelines/content-policy)
-    [[Terms of Use]](https://openai.com/api/policies/terms/).
-    The list of disallowed applications can be found in the usage guidelines
-    [[Disallowed Applications]]
-    (https://beta.openai.com/docs/usage-guidelines/disallowed-applications).
+  modality:
+    explanation: text (English) and code
+    value: code, text; code, text
+  model_card: https://github.com/openai/following-instructions-human-feedback/blob/main/model-card.md
   monitoring:
+    explanation: "There isn't any known monitoring in place for the model, but there\
+      \ may be internal mechanisms.\n"
     value: unknown
-    explanation: >
-      There isn't any known monitoring in place for the model, but there
-      may be internal mechanisms.
-  feedback: >
-    Email the authors [[InstructGPT Paper]](https://arxiv.org/pdf/2203.02155.pdf).
-
-- type: model
-  name: Whisper
-  # General
+  name: InstructGPT
   organization: OpenAI
-  description: >
-    GPT-3 is a speech recognition model.
-  created_date:
-    value: 2022-09-21
-    explanation: >
-      The date that Whisper was announced to the public
-      [[OpenAI Blog Post]](https://openai.com/blog/whisper/).
-  url: https://cdn.openai.com/papers/whisper.pdf
-  model_card: https://github.com/openai/whisper/blob/main/model-card.md
-  modality: Speech (multilingual, though primarily English)
-  size:
-    value: 1.5B parameters (dense model)
-    explanation: >
-      Whisper comes in several sizes (39M, 74M, 244M, 769M, 1.55B). Here we report
-      the size of the largest
-      model [[Table 1]](https://cdn.openai.com/papers/whisper.pdf).
-  analysis: The model was evaluated for zero-shot English and multingual speech
-    recognition, translation, language identification and robustness to noise.
-  # Construction
-  dependencies: [Whisper dataset]
+  prohibited_uses: "Access to InstructGPT is governed by Open AI API Usage Guidelines\
+    \ and API Terms of Use, prohibiting the use of the API in a way that causes\
+    \ societal harm. [[Usage Guidelines]] (https://beta.openai.com/docs/usage-guidelines/content-policy)\
+    \ [[Terms of Use]](https://openai.com/api/policies/terms/). The list of disallowed\
+    \ applications can be found in the usage guidelines [[Disallowed Applications]]\
+    \ (https://beta.openai.com/docs/usage-guidelines/disallowed-applications).\n"
+  quality_control: "The model wasn't fully released to the public as a quality control\
+    \ measure.\n"
+  size: 175B parameters (dense)
   training_emissions:
+    explanation: The authors do not estimate the emissions of the model.
     value: unknown
-    explanation: >
-      Authors do not report the training emissions.
-  training_time:
-    value: unknown
-    explanation: >
-      Authors do not report the training time.
   training_hardware:
-    value: Azure
-    explanation: >
-      The original paper doesn't specify the training hardware for Whisper,
-      but we expect it is trained on Azure given other models from OpenAI are trained
-      using Azure
-      [[Microsoft Blog Post]]
-      (https://blogs.microsoft.com/ai/openai-azure-supercomputer/).
-  quality_control: No specific quality control methods are documented.
-  # Downstream
-  access:
+    explanation: The authors do not disclose the training hardware used.
+    value: unknown
+  training_time:
+    explanation: "175B SFT model required 4.9 petaflops/s-days; 175B PPO-ptx model\
+      \ required 60 petaflops/s-days [[Section 5]](https://arxiv.org/pdf/2203.02155.pdf#section.5).\n"
+    value: 60 petaflops/s-days
+  type: model
+  url: https://arxiv.org/pdf/2203.02155.pdf
+- access:
+    explanation: "Models are available at [https://github.com/openai/whisper](https://github.com/openai/whisper).\n"
     value: open
-    explanation: >
-      Models are available at [https://github.com/openai/whisper](https://github.com/openai/whisper).
+  analysis: The model was evaluated for zero-shot English and multingual speech
+    recognition, translation, language identification and robustness to noise.
+  created_date:
+    explanation: "The date that Whisper was announced to the public [[OpenAI Blog\
+      \ Post]](https://openai.com/blog/whisper/).\n"
+    value: 2022-09-21
+  dependencies: [Whisper dataset]
+  description: "GPT-3 is a speech recognition model.\n"
+  feedback: "The discussions page of the codebase is not formally cited as a place\
+    \ for feedback, but is being used in this way [[Discussions page]](https://github.com/openai/whisper/discussions)\n"
+  intended_uses: "Whisper is a general-purpose speech recognition model; it is a\
+    \ multi-task model that can perform multilingual speech recognition as well\
+    \ as speech translation and language identification.\n"
   license:
-    value: MIT
     explanation: https://github.com/openai/whisper](https://github.com/openai/whisper
-  intended_uses: >
-    Whisper is a general-purpose speech recognition model; it is a multi-task
-    model that can perform multilingual speech recognition as well as speech
-    translation and language identification.
-  prohibited_uses: unknown
+    value: MIT
+  modality:
+    explanation: Speech (multilingual, though primarily English)
+    value: audio; audio
+  model_card: https://github.com/openai/whisper/blob/main/model-card.md
   monitoring:
+    explanation: "No monitoring is mentioned by the authors.\n"
     value: none
-    explanation: >
-      No monitoring is mentioned by the authors.
-  feedback: >
-    The discussions page of the codebase is not formally cited as a place
-    for feedback, but is being used in this way [[Discussions page]](https://github.com/openai/whisper/discussions)
-
-- type: model
-  name: CLIP
-  # General
+  name: Whisper
   organization: OpenAI
-  description: >
-    "CLIP (Contrastive Language-Image Pre-Training) is a neural network trained
-    on a variety of (image, text) pairs. It can be instructed in natural
-    language to predict the most relevant text snippet, given an image,
-    without directly optimizing for the task, similarly to the zero-shot
-    capabilities of GPT-2 and 3. We found CLIP matches the performance of the
-    original ResNet50 on ImageNet “zero-shot” without using any of the original
-    1.28M labeled examples, overcoming several major challenges in computer
-    vision"
-    [[CLIP Repository]](https://github.com/openai/CLIP).
-  created_date:
-    value: 2021-01-05
-    explanation: >
-      The date of the blog post announcing CLIP
-      [[OpenAI Blog Post]](https://openai.com/blog/clip/).
-  url: https://arxiv.org/pdf/2103.00020.pdf
-  model_card: https://github.com/openai/CLIP/blob/main/model-card.md
-  modality: Text (English) and Image
-  size:
+  prohibited_uses: unknown
+  quality_control: No specific quality control methods are documented.
+  size: 1.5B parameters (dense)
+  training_emissions:
+    explanation: "Authors do not report the training emissions.\n"
     value: unknown
-    explanation: >
-      The total size is unknown, but the largest CLIP model is a
-      a combination of 63M-parameter (dense) text encoder and a 307M-parameter
-      vision encoder.
+  training_hardware:
+    explanation: "The original paper doesn't specify the training hardware for Whisper,\
+      \ but we expect it is trained on Azure given other models from OpenAI are\
+      \ trained using Azure [[Microsoft Blog Post]] (https://blogs.microsoft.com/ai/openai-azure-supercomputer/).\n"
+    value: Azure
+  training_time:
+    explanation: "Authors do not report the training time.\n"
+    value: unknown
+  type: model
+  url: https://cdn.openai.com/papers/whisper.pdf
+- access:
+    explanation: "Model checkpoints and the helper code can be accessed at the official\
+      \ CLIP repository [[CLIP Repository]](https://github.com/openai/CLIP).\n"
+    value: open
   analysis: The model was evaluated on standard vision datasets (e.g. CIFAR10, ImageNet)
     and showed robust state of the art results.
-  # Construction
+  created_date:
+    explanation: "The date of the blog post announcing CLIP [[OpenAI Blog Post]](https://openai.com/blog/clip/).\n"
+    value: 2021-01-05
   dependencies: [CLIP dataset]
-  training_emissions: unknown
-  training_time:
-    value: 71.12 petaflop/s-day
-    explanation: >
-      The exact training time of CLIP depends on the vision and language
-      encoders used: "The largest ResNet model, RN50x64, took 18 days to train
-      on 592 V100 GPUs while the largest Vision Transformer took 12 days on 256
-      V100 GPUs. For the ViT-L/14 we also pre-train at a higher 336 pixel
-      resolution for one additional epoch to boost performance ... Unless
-      otherwise specified, all results reported in this paper as “CLIP” use this
-      model which we found to perform best"
-      [[CLIP paper]](https://arxiv.org/pdf/2103.00020.pdf).
-
-      Using the GPU Time method outlined in the
-      [[OpenAI AI and Computer Blog]](https://openai.com/blog/ai-and-compute/#addendum),
-      we can compute the training time in petaflop/s-day for RN50x64 and
-      ViT-L/14 with the following equation: Number of GPUs * (peta-flops/GPU) *
-      days trained * estimated utilization.
-      We will assume that estimated utilization is 33%, following
-      [[OpenAI AI and Computer Blog]](https://openai.com/blog/ai-and-compute/#addendum).
-      The specific V100 GPU used isn't cleared from the paper, so we will assume
-      that V100 PCle was used. The V100 PCle GPUs have a single precision
-      performance of 15.7 teraflops
-      [[V100 Datasheet]](https://images.nvidia.com/content/technologies/volta/pdf/volta-v100-datasheet-update-us-1165301-r5.pdf),
-      which is equal to 15.7 / 1000 = 0.0157 petaflops.
-
-      Following the formula "Number of GPUs * (peta-flops/GPU) * days trained *
-      estimated utilization", we can compute the petaflop/s-day for RN50x64 as
-      follows:
-      592 * 0.0157 * 18 * 0.33.
-      Similarly, for ViT-L/14, we can compute the same as:
-      256 * 0.0157 * 12 * 0.33.
-      Adding the two, we estimate the total training time as 71.12
-      petaflop/s-day.
-  training_hardware: NVIDIA V100 GPUs
-  quality_control: >
-    The authors found that the performance of the model depended heavily on
-    which classes are included (and excluded) for a given task. They reported
-    significant race and gender based disparities on the Fairface dataset,
-    depending on how the classes were constructed. The authors also demonstrated
-    that the model was capable of racial profiling with high accuracy
-    [[Section 7]](https://arxiv.org/pdf/2103.00020.pdf#section.7).
-  # Downstream
-  access:
-    value: open
-    explanation: >
-      Model checkpoints and the helper code can be accessed
-      at the official CLIP repository
-      [[CLIP Repository]](https://github.com/openai/CLIP).
+  description: "\"CLIP (Contrastive Language-Image Pre-Training) is a neural network\
+    \ trained on a variety of (image, text) pairs. It can be instructed in natural\
+    \ language to predict the most relevant text snippet, given an image, without\
+    \ directly optimizing for the task, similarly to the zero-shot capabilities\
+    \ of GPT-2 and 3. We found CLIP matches the performance of the original ResNet50\
+    \ on ImageNet “zero-shot” without using any of the original 1.28M labeled examples,\
+    \ overcoming several major challenges in computer vision\" [[CLIP Repository]](https://github.com/openai/CLIP).\n"
+  feedback: Questions can be shared at the feedback form linked in the CLIP model
+    card [[Model Card]](https://github.com/openai/CLIP/blob/main/model-card.mdlicen).
+  intended_uses: "The model is intended to be used by AI researchers to better understand\
+    \ \"robustness, generalization, and other capabilities, biases, and constraints\
+    \ of computer vision models\" [[CLIP Model Card]](https://github.com/openai/CLIP/blob/main/model-card.md).\n"
   license: MIT
-  intended_uses: >
-    The model is intended to be used by AI researchers to better understand
-    "robustness, generalization, and other capabilities, biases, and constraints
-    of computer vision models"
-    [[CLIP Model Card]](https://github.com/openai/CLIP/blob/main/model-card.md).
-  prohibited_uses: >
-    "Any deployed use case of the model - whether commercial or not - is
-    currently out of scope. Non-deployed use cases such as image search in a
-    constrained environment, are also not recommended unless there is thorough
-    in-domain testing of the model with a specific, fixed class taxonomy.
-    This is because our safety assessment demonstrated a high need for task
-    specific testing especially given the variability of CLIP’s performance
-    with different class taxonomies. This makes untested and unconstrained
-    deployment of the model in any use case currently potentially harmful.
-
-    Certain use cases which would fall under the domain of surveillance and
-    facial recognition are always out-of-scope regardless of performance of the
-    model. This is because the use of artificial intelligence for tasks such as
-    these can be premature currently given the lack of testing norms and checks
-    to ensure its fair use.
-
-    Since the model has not been purposefully trained in or evaluated on any
-    languages other than English, its use should be limited to English language
-    use cases"
-    [[Model Card]](https://github.com/openai/CLIP/blob/main/model-card.mdlicen).
+  modality:
+    explanation: Text (English) and Image
+    value: image, text; image, text
+  model_card: https://github.com/openai/CLIP/blob/main/model-card.md
   monitoring:
-    value: none
     explanation: There are no monitoring mechanisms in place for CLIP.
-  feedback: Questions can be shared at the feedback form linked in the CLIP model
-    card [[Model Card]](https://github.com/openai/CLIP/blob/main/model-card.mdlicen).
-
-- type: model
-  name: DALL·E
-  # General
+    value: none
+  name: CLIP
   organization: OpenAI
-  description: >
-    DALL·E is a GPT-3 based model trained to generate images from text
-    descriptions. The authors found that it had "a diverse set of capabilities,
-    including creating anthropomorphized versions of animals and objects,
-    combining unrelated concepts in plausible ways, rendering text, and
-    applying transformations to existing images"
-    [[OpenAI Blog Post]](https://openai.com/blog/dall-e/).
+  prohibited_uses: "\"Any deployed use case of the model - whether commercial or\
+    \ not - is currently out of scope. Non-deployed use cases such as image search\
+    \ in a constrained environment, are also not recommended unless there is thorough\
+    \ in-domain testing of the model with a specific, fixed class taxonomy. This\
+    \ is because our safety assessment demonstrated a high need for task specific\
+    \ testing especially given the variability of CLIP’s performance with different\
+    \ class taxonomies. This makes untested and unconstrained deployment of the\
+    \ model in any use case currently potentially harmful.\nCertain use cases which\
+    \ would fall under the domain of surveillance and facial recognition are always\
+    \ out-of-scope regardless of performance of the model. This is because the use\
+    \ of artificial intelligence for tasks such as these can be premature currently\
+    \ given the lack of testing norms and checks to ensure its fair use.\nSince\
+    \ the model has not been purposefully trained in or evaluated on any languages\
+    \ other than English, its use should be limited to English language use cases\"\
+    \ [[Model Card]](https://github.com/openai/CLIP/blob/main/model-card.mdlicen).\n"
+  quality_control: "The authors found that the performance of the model depended\
+    \ heavily on which classes are included (and excluded) for a given task. They\
+    \ reported significant race and gender based disparities on the Fairface dataset,\
+    \ depending on how the classes were constructed. The authors also demonstrated\
+    \ that the model was capable of racial profiling with high accuracy [[Section\
+    \ 7]](https://arxiv.org/pdf/2103.00020.pdf#section.7).\n"
+  size: unknown
+  training_emissions: unknown
+  training_hardware: NVIDIA V100 GPUs
+  training_time:
+    explanation: "The exact training time of CLIP depends on the vision and language\
+      \ encoders used: \"The largest ResNet model, RN50x64, took 18 days to train\
+      \ on 592 V100 GPUs while the largest Vision Transformer took 12 days on 256\
+      \ V100 GPUs. For the ViT-L/14 we also pre-train at a higher 336 pixel resolution\
+      \ for one additional epoch to boost performance ... Unless otherwise specified,\
+      \ all results reported in this paper as “CLIP” use this model which we found\
+      \ to perform best\" [[CLIP paper]](https://arxiv.org/pdf/2103.00020.pdf).\n\
+      Using the GPU Time method outlined in the [[OpenAI AI and Computer Blog]](https://openai.com/blog/ai-and-compute/#addendum),\
+      \ we can compute the training time in petaflop/s-day for RN50x64 and ViT-L/14\
+      \ with the following equation: Number of GPUs * (peta-flops/GPU) * days trained\
+      \ * estimated utilization. We will assume that estimated utilization is 33%,\
+      \ following [[OpenAI AI and Computer Blog]](https://openai.com/blog/ai-and-compute/#addendum).\
+      \ The specific V100 GPU used isn't cleared from the paper, so we will assume\
+      \ that V100 PCle was used. The V100 PCle GPUs have a single precision performance\
+      \ of 15.7 teraflops [[V100 Datasheet]](https://images.nvidia.com/content/technologies/volta/pdf/volta-v100-datasheet-update-us-1165301-r5.pdf),\
+      \ which is equal to 15.7 / 1000 = 0.0157 petaflops.\nFollowing the formula\
+      \ \"Number of GPUs * (peta-flops/GPU) * days trained * estimated utilization\"\
+      , we can compute the petaflop/s-day for RN50x64 as follows: 592 * 0.0157 *\
+      \ 18 * 0.33. Similarly, for ViT-L/14, we can compute the same as: 256 * 0.0157\
+      \ * 12 * 0.33. Adding the two, we estimate the total training time as 71.12\
+      \ petaflop/s-day.\n"
+    value: 71.12 petaflop/s-day
+  type: model
+  url: https://arxiv.org/pdf/2103.00020.pdf
+- access:
+    explanation: Available via the OpenAI API as of Q1 2023.
+    value: limited
+  analysis: "The model was evaluated against three prior approaches, AttnGAN, DM-GAN,\
+    \ and DF-GAN using Inception Score and Fréchet Inception Distance on MS-COCO\
+    \ as metrics. The model was also evaluated by humans and received the majority\
+    \ of the votes in generating images that look realistic and better match the\
+    \ caption when compared to the images generated by DF-GAN [[Section]](https://arxiv.org/pdf/2102.12092.pdf#section.3).\n"
   created_date:
+    explanation: "The date of the blog post announcing DALL·E [[OpenAI Blog Post]](https://openai.com/blog/dall-e/).\n"
     value: 2021-01-05
-    explanation: >
-      The date of the blog post announcing DALL·E
-      [[OpenAI Blog Post]](https://openai.com/blog/dall-e/).
-  url: https://arxiv.org/pdf/2102.12092.pdf
-  model_card: https://github.com/openai/DALL-E/blob/master/model_card.md
-  modality: Text (English) and Image
-  size: 12B parameters (dense model)
-  analysis: >
-    The model was evaluated against three prior approaches, AttnGAN, DM-GAN, and
-    DF-GAN using Inception Score and Fréchet Inception Distance on MS-COCO as
-    metrics. The model was also evaluated by humans and received the majority
-    of the votes in generating images that look realistic and better match the
-    caption when compared to the images generated by DF-GAN
-    [[Section]](https://arxiv.org/pdf/2102.12092.pdf#section.3).
-  # Construction
   dependencies: [DALL·E dataset]
-  training_emissions:
-    value: unknown
-    explanation: The training emissions were not reported.
-  training_time:
-    value: unknown
-    explanation: The training emissions were not reported.
-  training_hardware: NVIDIA V100 GPUs
-  quality_control: unknown
-  # Downstream
-  access:
-    value: limited
-    explanation: Available via the OpenAI API as of Q1 2023.
+  description: "DALL·E is a GPT-3 based model trained to generate images from text\
+    \ descriptions. The authors found that it had \"a diverse set of capabilities,\
+    \ including creating anthropomorphized versions of animals and objects, combining\
+    \ unrelated concepts in plausible ways, rendering text, and applying transformations\
+    \ to existing images\" [[OpenAI Blog Post]](https://openai.com/blog/dall-e/).\n"
+  feedback: "Contact the paper author(s) specified on the paper [[Paper]](https://arxiv.org/pdf/2102.12092.pdf).\n"
+  intended_uses: "\"The model is intended for others to use for training their own\
+    \ generative models\" [[Model Card]](https://github.com/openai/DALL-E/blob/master/model_card.md).\n"
   license:
+    explanation: "The asset doesn't have a known license as it hasn't been publicly\
+      \ released.\n"
     value: unknown
-    explanation: >
-      The asset doesn't have a known license as it hasn't been publicly
-      released.
-  intended_uses: >
-    "The model is intended for others to use for training their own generative
-    models"
-    [[Model Card]](https://github.com/openai/DALL-E/blob/master/model_card.md).
-  prohibited_uses:
-    value: unknown
-    explanation: The prohibited uses of the model are unknown.
+  modality:
+    explanation: Text (English) and Image
+    value: image, text; image, text
+  model_card: https://github.com/openai/DALL-E/blob/master/model_card.md
   monitoring:
-    value: none
     explanation: There are no monitoring mechanisms in place for DALL·E.
-  feedback: >
-    Contact the paper author(s) specified on the paper
-    [[Paper]](https://arxiv.org/pdf/2102.12092.pdf).
-
-
-- type: model
-  name: Jukebox
-  organization: OpenAI
-  description: Jukebox is a generate model that produces music
-  created_date:
-    value: 2020-04-30
-    explanation: The date the model paper was released
-  url: https://arxiv.org/abs/2005.00341
-  model_card:
     value: none
-    explanation: No model card found as of 2023-01-09
-  modality: Audio (music)
-  size: 5B parameters
+  name: DALL·E
+  organization: OpenAI
+  prohibited_uses:
+    explanation: The prohibited uses of the model are unknown.
+    value: unknown
+  quality_control: unknown
+  size: 12B parameters (dense)
+  training_emissions:
+    explanation: The training emissions were not reported.
+    value: unknown
+  training_hardware: NVIDIA V100 GPUs
+  training_time:
+    explanation: The training emissions were not reported.
+    value: unknown
+  type: model
+  url: https://arxiv.org/pdf/2102.12092.pdf
+- access:
+    explanation: The model can be downloaded from the [Github repository](https://github.com/openai/jukebox)
+    value: open
   analysis: Evaluations in paper are primarily considering the fidelity and novelty
     of samples from Jukebox.
+  created_date:
+    explanation: The date the model paper was released
+    value: 2020-04-30
   dependencies: [Jukebox Dataset]
-  training_emissions: unknown
-  training_time: 4 weeks
-  training_hardware: 510 V100s
-  quality_control: ''
-  access:
-    value: open
-    explanation: The model can be downloaded from the [Github repository](https://github.com/openai/jukebox)
+  description: Jukebox is a generate model that produces music
+  feedback: none
+  intended_uses: ''
   license:
+    explanation: "The license is provided in the [Github repository](https://github.com/openai/jukebox).\n"
     value: Noncommercial Use License
-    explanation: >
-      The license is provided in the [Github repository](https://github.com/openai/jukebox).
-  intended_uses: ''
-  prohibited_uses: none
+  modality:
+    explanation: Audio (music)
+    value: audio; audio
+  model_card:
+    explanation: No model card found as of 2023-01-09
+    value: none
   monitoring: none
-  feedback: none
-
-- type: model
-  name: DALL·E 2
-  # General
+  name: Jukebox
   organization: OpenAI
-  description: >
-    "DALL·E 2 is an artificial intelligence model that takes a text prompt
-    and/or existing image as an input and generates a new image as an output"
-    [[System Card]]
-    (https://github.com/openai/dalle-2-preview/blob/main/system-card.md).
-    The model wasn't fully released, but OpenAI released a version of the model
-    (DALL·E 2 Preview) to a select group of testers.
+  prohibited_uses: none
+  quality_control: ''
+  size: 5B parameters (dense)
+  training_emissions: unknown
+  training_hardware: 510 V100s
+  training_time: 4 weeks
+  type: model
+  url: https://arxiv.org/abs/2005.00341
+- access:
+    explanation: "The model is available to OpenAI employees, researchers, creatives\
+      \ and company friends. OpenAI opened a waitlist for DALL·E 2 access. [[System\
+      \ Card]] (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#access).\n"
+    value: limited
+  analysis: "The model is capable of generating explicit content and the researchers\
+    \ found limited amount of spurious content generated. The researchers also found\
+    \ that visual synonyms can be used to prompt the model to surface unwanted generations\
+    \ [[Probes and Evaluations]] (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#probes-and-evaluations).\n"
   created_date:
+    explanation: "OpenAI released in a blog post in April 2020 [[OpenAI Blog Post]](https://openai.com/dall-e-2/).\n"
     value: 2022-04-13
-    explanation: >
-      OpenAI released in a blog post in April 2020
-      [[OpenAI Blog Post]](https://openai.com/dall-e-2/).
-  url: https://arxiv.org/abs/2204.06125
+  dependencies: [DALL·E dataset, CLIP dataset]
+  description: "\"DALL·E 2 is an artificial intelligence model that takes a text\
+    \ prompt and/or existing image as an input and generates a new image as an output\"\
+    \ [[System Card]] (https://github.com/openai/dalle-2-preview/blob/main/system-card.md).\
+    \ The model wasn't fully released, but OpenAI released a version of the model\
+    \ (DALL·E 2 Preview) to a select group of testers.\n"
+  feedback: Feedback can be provided at support at openai.com.
+  intended_uses: "\"The intended use of the DALL·E 2 Preview at this time is for\
+    \ personal, non-commercial exploration and research purposes by people who are\
+    \ interested in understanding the potential uses of these capabilities\" [[Use]]\
+    \ (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#use).\n"
+  license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
+    value: unknown
+  modality:
+    explanation: Text (English) and Image
+    value: image, text; image, text
   model_card: https://github.com/openai/dalle-2-preview/blob/main/system-card.md
-  modality: Text (English) and Image
+  monitoring: "Uses of the model are monitored. In the preview version, any user\
+    \ can flag content. The specific policies for monitoring are not disclosed,\
+    \ but possible measures include disabling of accounts violating the content\
+    \ policies [[Monitoring and Reporting]] (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#monitoring-and-reporting).\n"
+  name: DALL·E 2
+  organization: OpenAI
+  prohibited_uses: "Use of the model is governed by the OpenAI Content Policy, which\
+    \ prohibits posting of G rated content. Users are not allowed to utilize the\
+    \ model in commercial products in the preview version [[Content Policy]] (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#policies-and-enforcement).\n"
+  quality_control: "The model isn't fully released to the public as part of a quality\
+    \ control measure. The usage of the model by testers is monitored and user provided\
+    \ prompts are filtered [[Input filters]] (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#input-filters).\n"
   size: unknown
-  analysis: >
-    The model is capable of generating explicit content and the researchers
-    found limited amount of spurious content generated. The researchers also
-    found that visual synonyms can be used to prompt the model to surface
-    unwanted generations
-    [[Probes and Evaluations]]
-    (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#probes-and-evaluations).
-  # Construction
-  dependencies: [DALL·E dataset, CLIP dataset]
   training_emissions: unknown
-  training_time: unknown
   training_hardware: unknown
-  quality_control: >
-    The model isn't fully released to the public as part of a quality control
-    measure. The usage of the model by testers is monitored and user provided
-    prompts are filtered
-    [[Input filters]]
-    (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#input-filters).
-  # Downstream
-  access:
+  training_time: unknown
+  type: model
+  url: https://arxiv.org/abs/2204.06125
+- access:
+    explanation: "The OpenAI API is available to the public in supported countries\
+      \ [[Supported Countries]](https://beta.openai.com/docs/supported-countries)\
+      \ [[OpenAI API]](https://openai.com/api/).\n"
     value: limited
-    explanation: >
-      The model is available to OpenAI employees, researchers, creatives
-      and company friends. OpenAI opened a waitlist for DALL·E 2 access.
-      [[System Card]]
-      (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#access).
-  license:
-    value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: >
-    "The intended use of the DALL·E 2 Preview at this time is for personal,
-    non-commercial exploration and research purposes by people who are
-    interested in understanding the potential uses of these capabilities"
-    [[Use]]
-    (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#use).
-  prohibited_uses: >
-    Use of the model is governed by the OpenAI Content Policy, which prohibits
-    posting of G rated content.
-    Users are not allowed to utilize the model in commercial products in the
-    preview version
-    [[Content Policy]]
-    (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#policies-and-enforcement).
-  monitoring: >
-    Uses of the model are monitored. In the preview version, any user can flag
-    content. The specific policies for monitoring are not disclosed, but
-    possible measures include disabling of accounts violating the content
-    policies
-    [[Monitoring and Reporting]]
-    (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#monitoring-and-reporting).
-  feedback: Feedback can be provided at support at openai.com.
-
-- type: application
-  name: OpenAI API
-  # General
-  organization: OpenAI
-  description: >
-    OpenAI API is a general purpose "text in, text out" interface connecting
-    users with a suite of language models. The API was initially released
-    as a gateway to GPT-3, but it now supports access to other, more
-    specialized OpenAI models.
-    [[Open AI Blog Post]](https://openai.com/blog/openai-api/)
+  adaptation: The API exposes the models fairly direclty with a range of hyperparameters
+    (e.g. temperature scaling).
   created_date:
+    explanation: "The date that OpenAI API was announced to the public [[Open AI\
+      \ Blog Post]](https://openai.com/blog/openai-api/).\n"
     value: 2020-06-11
-    explanation: >
-      The date that OpenAI API was announced to the public
-      [[Open AI Blog Post]](https://openai.com/blog/openai-api/).
-  url: https://openai.com/api/
-  # Construction
   dependencies:
     - GPT-3
     - Codex
@@ -1079,588 +809,559 @@
     - Whisper
     - DALL·E
     - GPT-4
-  adaptation: The API exposes the models fairly direclty with a range of hyperparameters
-    (e.g. temperature scaling).
-  output_space: >
-    Given a prompting text, the OpenAI API provides access to text completions,
-    and log probabilities. The support for text and code embeddings were added
-    on 2022-01-25
-    [[OpenAI Blog Post]]
-    (https://openai.com/blog/introducing-text-and-code-embeddings/).
-  quality_control: >
-    Given a prompt, OpenAI API checks whether a completion contains unsafe
-    language using its filters and marks the completion accordingly if so.
-    The API also provides developers with special endpoints that scope the
-    API usage. OpenAI also developed user guidelines to help developers
-    understand safety issues
-    [[OpenAI API]](https://openai.com/api/).
-  # Downstream
-  access:
-    value: limited
-    explanation: >
-      The OpenAI API is available to the public in supported countries
-      [[Supported Countries]](https://beta.openai.com/docs/supported-countries)
-      [[OpenAI API]](https://openai.com/api/).
-  terms_of_service: https://openai.com/api/policies/terms/
+  description: "OpenAI API is a general purpose \"text in, text out\" interface\
+    \ connecting users with a suite of language models. The API was initially released\
+    \ as a gateway to GPT-3, but it now supports access to other, more specialized\
+    \ OpenAI models. [[Open AI Blog Post]](https://openai.com/blog/openai-api/)\n"
+  failures:
+    explanation: "There are no known documented failures of the OpenAI API at the\
+      \ time of writing.\n"
+    value: unknown
+  feedback:
+    explanation: "There is no known specific feedback channel for the OpenAI API,\
+      \ but OpenAI support theme can be reached via email at support at openai.com.\n"
+    value: unknown
+  intended_uses: "OpenAI API was designed to be used by developers to empower applications,\
+    \ and researchers to study language models [[Section 3]](https://openai.com/api/policies/terms/).\n"
   license:
-    value: custom
     explanation: Per the Terms of Use, a limited license is provided to the users
       during their use of the API [[Section 2]](https://openai.com/api/policies/terms/).
-  intended_uses: >
-    OpenAI API was designed to be used by developers to empower applications,
-    and researchers to study language models
-    [[Section 3]](https://openai.com/api/policies/terms/).
-  prohibited_uses: >
-    OpenAI API Terms of Use prohibits the use of the API in a way violating
-    the applicable law, including: (i) "Illegal activities, such as child
-    pornography, gambling, cybercrime, piracy, violating copyright,
-    trademark or other intellectual property laws"; (ii) "Accessing or
-    authorizing anyone to access the APIs from an embargoed country, region, or
-    territory as prohibited by the U.S. government"; (iii) "Threatening,
-    stalking, defaming, defrauding, degrading, victimizing or intimidating
-    anyone for any reason".
-    The usage requirements are detailed in the Terms of Use
-    [[Section 3]](https://openai.com/api/policies/terms/).
-  monitoring: >
-    OpenAI may monitor the API use to ensure "quality and improve OpenAI
-    systems, products and services; perform research; and ensure compliance"
-    with the Terms of Service and all applicable laws. Users of the API will
-    give OpenAI reasonable access to their application to monitor compliance
-    with the terms listed in the Terms of Service
-    [[Section 5(b)]](https://openai.com/api/policies/terms/).
-    Apps using the OpenAI API should submit an application once they are
-    deployed to real users. The review form takes 10 minutes to complete and
-    over 97% of the applications are directly accepted or conditionally
-    accepted. The applicants are notified of the decision within 2 business
-    days
-    [[App Review Guidelines]]
-    (https://beta.openai.com/docs/usage-guidelines/app-review).
-  feedback:
-    value: unknown
-    explanation: >
-      There is no known specific feedback channel for the OpenAI API,
-      but OpenAI support theme can be reached via email at support at openai.com.
-  # Deployment
+    value: custom
+  monitoring: "OpenAI may monitor the API use to ensure \"quality and improve OpenAI\
+    \ systems, products and services; perform research; and ensure compliance\"\
+    \ with the Terms of Service and all applicable laws. Users of the API will give\
+    \ OpenAI reasonable access to their application to monitor compliance with the\
+    \ terms listed in the Terms of Service [[Section 5(b)]](https://openai.com/api/policies/terms/).\
+    \ Apps using the OpenAI API should submit an application once they are deployed\
+    \ to real users. The review form takes 10 minutes to complete and over 97% of\
+    \ the applications are directly accepted or conditionally accepted. The applicants\
+    \ are notified of the decision within 2 business days [[App Review Guidelines]]\
+    \ (https://beta.openai.com/docs/usage-guidelines/app-review).\n"
   monthly_active_users:
+    explanation: "The number of monthly active users is not known publicly, but\
+      \ OpenAI mentioned that the API was being used by tens of thousands of developers\
+      \ in a blog post from 2021-11-18 [[OpenAI Blog Post]](https://openai.com/blog/api-no-waitlist/).\n"
     value: unknown
-    explanation: >
-      The number of monthly active users is not known publicly, but
-      OpenAI mentioned that the API was being used by tens of thousands of
-      developers in a blog post from 2021-11-18
-      [[OpenAI Blog Post]](https://openai.com/blog/api-no-waitlist/).
+  name: OpenAI API
+  organization: OpenAI
+  output_space: "Given a prompting text, the OpenAI API provides access to text\
+    \ completions, and log probabilities. The support for text and code embeddings\
+    \ were added on 2022-01-25 [[OpenAI Blog Post]] (https://openai.com/blog/introducing-text-and-code-embeddings/).\n"
+  prohibited_uses: "OpenAI API Terms of Use prohibits the use of the API in a way\
+    \ violating the applicable law, including: (i) \"Illegal activities, such as\
+    \ child pornography, gambling, cybercrime, piracy, violating copyright, trademark\
+    \ or other intellectual property laws\"; (ii) \"Accessing or authorizing anyone\
+    \ to access the APIs from an embargoed country, region, or territory as prohibited\
+    \ by the U.S. government\"; (iii) \"Threatening, stalking, defaming, defrauding,\
+    \ degrading, victimizing or intimidating anyone for any reason\". The usage\
+    \ requirements are detailed in the Terms of Use [[Section 3]](https://openai.com/api/policies/terms/).\n"
+  quality_control: "Given a prompt, OpenAI API checks whether a completion contains\
+    \ unsafe language using its filters and marks the completion accordingly if\
+    \ so. The API also provides developers with special endpoints that scope the\
+    \ API usage. OpenAI also developed user guidelines to help developers understand\
+    \ safety issues [[OpenAI API]](https://openai.com/api/).\n"
+  terms_of_service: https://openai.com/api/policies/terms/
+  type: application
+  url: https://openai.com/api/
   user_distribution:
+    explanation: "The distribution of the users is not known, but we estimate majority\
+      \ of the users to be developers based in the United States.\n"
     value: unknown
-    explanation: >
-      The distribution of the users is not known, but we estimate
-      majority of the users to be developers based in the United States.
-  failures:
-    value: unknown
-    explanation: >
-      There are no known documented failures of the OpenAI API at the
-      time of writing.
-
-- type: model
-  name: VPT
-  organization: OpenAI
-  description: ''
-  created_date: 2022-06-23
-  url: https://arxiv.org/abs/2206.11795
-  model_card: ''
-  modality: video
+- access: open
   analysis: ''
-  size: 500M parameters (dense model)
+  created_date: 2022-06-23
   dependencies: [web_clean]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: open
-  license: MIT
+  description: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: MIT
+  modality:
+    explanation: video
+    value: video; video
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: dataset
-  name: web_clean
+  name: VPT
   organization: OpenAI
-  description: ''
-  created_date: 2022-06-23
+  prohibited_uses: ''
+  quality_control: ''
+  size: 500M parameters (dense)
+  training_emissions: ''
+  training_hardware: ''
+  training_time: ''
+  type: model
   url: https://arxiv.org/abs/2206.11795
-  datasheet: ''
-  modality: Video
-  size: 70k hours
-  sample: []
+- access: closed
   analysis: ''
+  created_date: 2022-06-23
+  datasheet: ''
   dependencies: []
-  included: ''
+  description: ''
   excluded: ''
-  quality_control: ''
-  access: closed
+  feedback: ''
+  included: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: Video
+    value: video
   monitoring: ''
-  feedback: ''
-
-- type: application
-  name: ChatGPT
+  name: web_clean
   organization: OpenAI
-  description: ChatGPT is an artificial intelligence chatbot developed by OpenAI
+  prohibited_uses: ''
+  quality_control: ''
+  sample: []
+  size: 70k hours
+  type: dataset
+  url: https://arxiv.org/abs/2206.11795
+- access: open
+  adaptation: ''
   created_date: 2022-11-30
-  url: https://openai.com/blog/chatgpt
   dependencies: [gpt-3.5-turbo, OpenAI toxicity classifier]
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: open
+  description: ChatGPT is an artificial intelligence chatbot developed by OpenAI
+  failures: ''
+  feedback: ''
+  intended_uses: ''
   license:
-    value: custom
     explanation: Per the Terms of Use, a limited license is provided to the users
       during their use of the API [[Section 2]](https://openai.com/api/policies/terms/).
-  terms_of_service: ''
-  intended_uses: ''
-  prohibited_uses: ''
+    value: custom
   monitoring: ''
-  feedback: ''
   monthly_active_users:
-    value: 100000000
     explanation: https://www.reuters.com/technology/chatgpt-sets-record-fastest-growing-user-base-analyst-note-2023-02-01/
-  user_distribution: ''
-  failures: ''
-
-- type: model
-  name: gpt-3.5-turbo
+    value: 100000000
+  name: ChatGPT
   organization: OpenAI
-  description: ''
-  created_date: 2023-03-01
-  url: https://platform.openai.com/docs/models/gpt-3-5
-  model_card: ''
-  modality: text
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: ''
+  type: application
+  url: https://openai.com/blog/chatgpt
+  user_distribution: ''
+- access: limited
   analysis: ''
-  size: unknown
+  created_date: 2023-03-01
   dependencies: [gpt-3.5-turbo dataset]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: limited
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
-    value: custom
     explanation: Per the Terms of Use, a limited license is provided to the users
       during their use of the API [[Section 2]](https://openai.com/api/policies/terms/).
-  intended_uses: ''
-  prohibited_uses: ''
+    value: custom
+  modality:
+    explanation: text
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: dataset
-  name: gpt-3.5-turbo dataset
+  name: gpt-3.5-turbo
   organization: OpenAI
-  description: ''
-  created_date: 2023-03-01
+  prohibited_uses: ''
+  quality_control: ''
+  size: unknown
+  training_emissions: ''
+  training_hardware: ''
+  training_time: ''
+  type: model
   url: https://platform.openai.com/docs/models/gpt-3-5
+- access: limited
+  analysis: ''
+  created_date: 2023-03-01
   datasheet: ''
+  dependencies: []
+  description: ''
+  excluded: ''
+  feedback: ''
+  included: ''
+  intended_uses: ''
+  license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
+    value: unknown
   modality: text
-  size: unknown
+  monitoring: ''
+  name: gpt-3.5-turbo dataset
+  organization: OpenAI
+  prohibited_uses: ''
+  quality_control: ''
   sample: []
+  size: unknown
+  type: dataset
+  url: https://platform.openai.com/docs/models/gpt-3-5
+- access: limited
   analysis: ''
+  created_date: 2022-05-01
+  datasheet: ''
   dependencies: []
-  included: ''
+  description: ''
   excluded: ''
-  quality_control: ''
-  access: limited
+  feedback: ''
+  included: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: code and text
+    value: code, text
   monitoring: ''
-  feedback: ''
-
-- type: dataset
   name: code-davinci-002 dataset
   organization: OpenAI
-  description: ''
-  created_date: 2022-05-01
-  url: https://platform.openai.com/docs/model-index-for-researchers
-  datasheet: ''
-  modality: code and text
-  size: unknown
+  prohibited_uses: ''
+  quality_control: ''
   sample: []
+  size: unknown
+  type: dataset
+  url: https://platform.openai.com/docs/model-index-for-researchers
+- access: limited
   analysis: ''
-  dependencies: []
-  included: ''
-  excluded: ''
-  quality_control: ''
-  access: limited
+  created_date: 2022-05-01
+  dependencies: [code-davinci-002 dataset]
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: code and text
+    value: code, text; code, text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
   name: code-davinci-002
   organization: OpenAI
-  description: ''
-  created_date: 2022-05-01
-  url: https://platform.openai.com/docs/model-index-for-researchers
-  model_card: ''
-  modality: code and text
-  analysis: ''
+  prohibited_uses: ''
+  quality_control: ''
   size: unknown
-  dependencies: [code-davinci-002 dataset]
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: limited
+  training_time: ''
+  type: model
+  url: https://platform.openai.com/docs/model-index-for-researchers
+- access: limited
+  analysis: ''
+  created_date: 2022-05-01
+  dependencies: [code-davinci-002]
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: text
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
   name: text-davinci-002
   organization: OpenAI
-  description: ''
-  created_date: 2022-05-01
-  url: https://platform.openai.com/docs/model-index-for-researchers
-  model_card: ''
-  modality: text
-  analysis: ''
+  prohibited_uses: ''
+  quality_control: ''
   size: unknown
-  dependencies: [code-davinci-002]
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: limited
+  training_time: ''
+  type: model
+  url: https://platform.openai.com/docs/model-index-for-researchers
+- access: limited
+  analysis: ''
+  created_date: 2022-11-30
+  dependencies: [text-davinci-002]
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: text
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
   name: text-davinci-003
   organization: OpenAI
-  description: ''
-  created_date: 2022-11-30
-  url: https://platform.openai.com/docs/model-index-for-researchers
-  model_card: ''
-  modality: text
-  analysis: ''
+  prohibited_uses: ''
+  quality_control: ''
   size: unknown
-  dependencies: [text-davinci-002]
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: limited
-  license:
-    value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
+  training_time: ''
+  type: model
+  url: https://platform.openai.com/docs/model-index-for-researchers
+- access: open
+  adaptation: ''
+  created_date: 2023-03-01
+  dependencies: [Whisper]
+  description: API to query OpenAI's Whisper model.
+  failures: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license:
+    explanation: Per the Terms of Use, a limited license is provided to the users
+      during their use of the API [[Section 2]](https://openai.com/api/policies/terms/).
+    value: custom
   monitoring: ''
-  feedback: ''
-
-- type: application
+  monthly_active_users: ''
   name: Whisper API
   organization: OpenAI
-  description: API to query OpenAI's Whisper model.
-  created_date: 2023-03-01
-  url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis
-  dependencies: [Whisper]
-  adaptation: ''
   output_space: ''
+  prohibited_uses: ''
   quality_control: ''
-  access: open
+  terms_of_service: ''
+  type: application
+  url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis
+  user_distribution: ''
+- access: open
+  adaptation: ''
+  created_date: 2023-03-01
+  dependencies: [ChatGPT]
+  description: API to query OpenAI's ChatGPT model.
+  failures: ''
+  feedback: ''
+  intended_uses: ''
   license:
-    value: custom
     explanation: Per the Terms of Use, a limited license is provided to the users
       during their use of the API [[Section 2]](https://openai.com/api/policies/terms/).
-  terms_of_service: ''
-  intended_uses: ''
-  prohibited_uses: ''
+    value: custom
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
-  user_distribution: ''
-  failures: ''
-
-- type: application
   name: ChatGPT API
   organization: OpenAI
-  description: API to query OpenAI's ChatGPT model.
-  created_date: 2023-03-01
-  url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis
-  dependencies: [ChatGPT]
-  adaptation: ''
   output_space: ''
+  prohibited_uses: ''
   quality_control: ''
-  access: open
-  license:
-    value: custom
-    explanation: Per the Terms of Use, a limited license is provided to the users
-      during their use of the API [[Section 2]](https://openai.com/api/policies/terms/).
   terms_of_service: ''
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
-  monthly_active_users: ''
+  type: application
+  url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis
   user_distribution: ''
-  failures: ''
-
-- type: application
-  name: OpenAI Moderation API
-  organization: OpenAI
+- access: open
+  adaptation: ''
+  created_date: 2022-08-10
+  dependencies: [OpenAI toxicity classifier]
   description: This endpoint provides OpenAI API developers with free access to
     GPT-based classifiers that detect undesired content—an instance of using AI
     systems to assist with human supervision of these systems.
-  created_date: 2022-08-10
-  url: https://openai.com/blog/new-and-improved-content-moderation-tooling
-  dependencies: [OpenAI toxicity classifier]
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: open
+  failures: ''
+  feedback: ''
+  intended_uses: ''
   license:
-    value: custom
     explanation: Per the Terms of Use, a limited license is provided to the users
       during their use of the API [[Section 2]](https://openai.com/api/policies/terms/).
-  terms_of_service: ''
-  intended_uses: ''
-  prohibited_uses: ''
+    value: custom
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
-  user_distribution: ''
-  failures: ''
-
-- type: model
-  name: OpenAI toxicity classifier
+  name: OpenAI Moderation API
   organization: OpenAI
-  description: ''
-  created_date: 2023-01-18
-  url: https://time.com/6247678/openai-chatgpt-kenya-workers/#:~:text=In%20a%20statement%2C%20an%20OpenAI,datasets%20of%20tools%20like%20ChatGPT.
-  model_card: ''
-  modality: Text
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: ''
+  type: application
+  url: https://openai.com/blog/new-and-improved-content-moderation-tooling
+  user_distribution: ''
+- access: closed
   analysis: ''
-  size: unknown
+  created_date: 2023-01-18
   dependencies: [OpenAI toxicity dataset]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: closed
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: Text
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: dataset
-  name: OpenAI toxicity dataset
+  name: OpenAI toxicity classifier
   organization: OpenAI
-  description: ''
-  created_date: 2023-01-18
-  url: https://time.com/6247678/openai-chatgpt-kenya-workers/#:~:text=In%20a%20statement%2C%20an%20OpenAI,datasets%20of%20tools%20like%20ChatGPT.
-  datasheet: ''
-  modality: text
+  prohibited_uses: ''
+  quality_control: ''
   size: unknown
-  sample: []
+  training_emissions: ''
+  training_hardware: ''
+  training_time: ''
+  type: model
+  url: https://time.com/6247678/openai-chatgpt-kenya-workers/#:~:text=In%20a%20statement%2C%20an%20OpenAI,datasets%20of%20tools%20like%20ChatGPT.
+- access: closed
   analysis: ''
+  created_date: 2023-01-18
+  datasheet: ''
   dependencies: []
-  included: ''
+  description: ''
   excluded: ''
-  quality_control: ''
-  access: closed
+  feedback: ''
+  included: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality: text
   monitoring: ''
-  feedback: ''
-
-- type: application
-  name: Sage API
+  name: OpenAI toxicity dataset
   organization: OpenAI
-  description: A chatbot language model available via Quora's Poe
+  prohibited_uses: ''
+  quality_control: ''
+  sample: []
+  size: unknown
+  type: dataset
+  url: https://time.com/6247678/openai-chatgpt-kenya-workers/#:~:text=In%20a%20statement%2C%20an%20OpenAI,datasets%20of%20tools%20like%20ChatGPT.
+- access: limited
+  adaptation: ''
   created_date: 2023-02-03
-  url: https://quorablog.quora.com/Poe-1
   dependencies: [Sage]
-  adaptation: ''
+  description: A chatbot language model available via Quora's Poe
+  failures: ''
+  feedback: ''
+  intended_uses: ''
+  license: unknown
+  monitoring: ''
+  monthly_active_users: ''
+  name: Sage API
+  organization: OpenAI
   output_space: ''
+  prohibited_uses: ''
   quality_control: ''
-  access: limited
-  license: unknown
   terms_of_service: ''
+  type: application
+  url: https://quorablog.quora.com/Poe-1
+  user_distribution: ''
+- access: limited
+  adaptation: ''
+  created_date: 2023-02-03
+  dependencies: [Dragonfly]
+  description: A chatbot language model available via Quora's Poe
+  failures: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: unknown
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
-  user_distribution: ''
-  failures: ''
-
-- type: application
   name: Dragonfly API
   organization: OpenAI
-  description: A chatbot language model available via Quora's Poe
-  created_date: 2023-02-03
-  url: https://quorablog.quora.com/Poe-1
-  dependencies: [Dragonfly]
-  adaptation: ''
   output_space: ''
+  prohibited_uses: ''
   quality_control: ''
-  access: limited
-  license: unknown
   terms_of_service: ''
+  type: application
+  url: https://quorablog.quora.com/Poe-1
+  user_distribution: ''
+- access: limited
+  analysis: ''
+  created_date: 2023-02-03
+  dependencies: []
+  description: A chatbot language model available via Quora's Poe
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: unknown
+  modality:
+    explanation: text
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-  monthly_active_users: ''
-  user_distribution: ''
-  failures: ''
-
-- type: model
   name: Sage
   organization: OpenAI
-  description: A chatbot language model available via Quora's Poe
-  created_date: 2023-02-03
-  url: https://quorablog.quora.com/Poe-1
-  model_card: ''
-  modality: text
-  analysis: ''
+  prohibited_uses: ''
+  quality_control: ''
   size: unknown
-  dependencies: []
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: limited
-  license: unknown
+  training_time: ''
+  type: model
+  url: https://quorablog.quora.com/Poe-1
+- access: limited
+  analysis: ''
+  created_date: 2023-02-03
+  dependencies: []
+  description: A chatbot language model available via Quora's Poe
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: unknown
+  modality:
+    explanation: text
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
   name: Dragonfly
   organization: OpenAI
-  description: A chatbot language model available via Quora's Poe
-  created_date: 2023-02-03
-  url: https://quorablog.quora.com/Poe-1
-  model_card: ''
-  modality: text
-  analysis: ''
+  prohibited_uses: ''
+  quality_control: ''
   size: unknown
-  dependencies: []
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: limited
-  license: unknown
+  training_time: ''
+  type: model
+  url: https://quorablog.quora.com/Poe-1
+- access: limited
+  adaptation: ''
+  created_date: 2023-03-07
+  dependencies: [ChatGPT API]
+  description: The app integrates ChatGPT’s powerful AI technology to deliver instant
+    conversation summaries, research tools, and writing assistance directly in Slack
+    to help millions of companies work more productively.
+  failures: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: unknown
   monitoring: ''
-  feedback: ''
-
-- type: application
+  monthly_active_users: ''
   name: ChatGPT for Slack
   organization: OpenAI, Salesforce
-  description: The app integrates ChatGPT’s powerful AI technology to deliver instant
-    conversation summaries, research tools, and writing assistance directly in Slack
-    to help millions of companies work more productively.
-  created_date: 2023-03-07
-  url: https://www.salesforce.com/news/stories/chatgpt-app-for-slack/
-  dependencies: [ChatGPT API]
-  adaptation: ''
   output_space: ''
+  prohibited_uses: ''
   quality_control: ''
-  access: limited
-  license: unknown
   terms_of_service: ''
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
-  monthly_active_users: ''
+  type: application
+  url: https://www.salesforce.com/news/stories/chatgpt-app-for-slack/
   user_distribution: ''
-  failures: ''
-
-- type: model
-  name: GPT-4
-  organization: OpenAI
-  description: ''
-  created_date: 2023-03-14
-  url: https://arxiv.org/abs/2303.08774
-  model_card: ''
-  modality: text and image
+- access: limited
   analysis: ''
-  size: unknown
+  created_date: 2023-03-14
   dependencies: []
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: limited
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: text and image
+    value: image, text; image, text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: application
-  name: GPT-4 API
+  name: GPT-4
   organization: OpenAI
-  description: GPT-4 is OpenAI’s most advanced system, producing safer and more
-    useful responses
+  prohibited_uses: ''
+  quality_control: ''
+  size: unknown
+  training_emissions: ''
+  training_hardware: ''
+  training_time: ''
+  type: model
+  url: https://arxiv.org/abs/2303.08774
+- access: limited
+  adaptation: ''
   created_date: 2023-03-14
-  url: https://openai.com/product/gpt-4
   dependencies: [GPT-4]
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: limited
+  description: GPT-4 is OpenAI’s most advanced system, producing safer and more
+    useful responses
+  failures: ''
+  feedback: ''
+  intended_uses: ''
   license:
-    value: custom
     explanation: Per the Terms of Use, a limited license is provided to the users
       during their use of the API [[Section 2]](https://openai.com/api/policies/terms/).
-  terms_of_service: ''
-  intended_uses: ''
-  prohibited_uses: ''
+    value: custom
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
+  name: GPT-4 API
+  organization: OpenAI
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: ''
+  type: application
+  url: https://openai.com/product/gpt-4
   user_distribution: ''
-  failures: ''
diff --git a/assets/othersideai.yaml b/assets/othersideai.yaml
index 0753d45c..7d9b0f05 100644
--- a/assets/othersideai.yaml
+++ b/assets/othersideai.yaml
@@ -1,42 +1,31 @@
 ---
-
-- type: application
-  name: HyperWrite
-  # General
-  organization: OthersideAI
-  description: >
-    HyperWrite is a writing assistant that generates text based on a user's
-    request, as well as style and tone choices.
+- access: limited
+  adaptation: unknown
   created_date: unknown
-  url: https://hyperwriteai.com/
-  # Construction
   dependencies: [OpenAI API]
-  adaptation: unknown
-  output_space: Generation
-  quality_control:
-    value: unknown
-    explanation: >
-      There is a HyperWrite blog warning the users about the risks of writing
-      with an AI assistant, but the company doesn't list the quality control
-      measures taken to prevent the listed risks, if any
-      [[HyperWrite Blog]]
-      (https://blog.hyperwrite.ai/what-are-the-risks-of-ai-writing/).
-  # Downstream
-  access: limited
+  description: "HyperWrite is a writing assistant that generates text based on a\
+    \ user's request, as well as style and tone choices.\n"
+  failures: unknown
+  feedback: unknown
+  intended_uses: "HyperWrite is intended to be used as a writing assistant.\n"
   license:
+    explanation: "\"The Software, including software embedded in the Software, is\
+      \ licensed, not sold, to you by Otherside only under the terms of this Agreement;\
+      \ and Otherside reserves all rights not expressly granted under this Agreemen\"\
+      \ - excerpt from the Terms of Service.\n"
     value: custom
-    explanation: >
-      "The Software, including software embedded in the Software, is licensed,
-      not sold, to you by Otherside only under the terms of this Agreement; and
-      Otherside reserves all rights not expressly granted under this Agreemen"
-      - excerpt from the Terms of Service.
-  terms_of_service: https://hyperwriteai.com/terms
-  intended_uses: >
-    HyperWrite is intended to be used as a writing assistant.
-  prohibited_uses: unknown
   monitoring: unknown
-  feedback: unknown
-  # Deployment
   monthly_active_users: unknown
+  name: HyperWrite
+  organization: OthersideAI
+  output_space: Generation
+  prohibited_uses: unknown
+  quality_control:
+    explanation: "There is a HyperWrite blog warning the users about the risks of\
+      \ writing with an AI assistant, but the company doesn't list the quality control\
+      \ measures taken to prevent the listed risks, if any [[HyperWrite Blog]] (https://blog.hyperwrite.ai/what-are-the-risks-of-ai-writing/).\n"
+    value: unknown
+  terms_of_service: https://hyperwriteai.com/terms
+  type: application
+  url: https://hyperwriteai.com/
   user_distribution: unknown
-  failures: unknown
diff --git a/assets/oxford.yaml b/assets/oxford.yaml
index d57f98bc..c6f1477c 100644
--- a/assets/oxford.yaml
+++ b/assets/oxford.yaml
@@ -1,133 +1,110 @@
 ---
-
-- type: dataset
-  name: WebVid-10M
-  # General
-  organization: University of Oxford
-  description: >
-    WebVid-10M is a large-scale dataset of short videos with textual descriptions
-    sourced from stock footage sites.
+- access:
+    explanation: The captions and video url's can be found at the [[dataset page]](https://m-bain.github.io/webvid-dataset/).
+      CLIP ViT-B/32 features of this dataset can be found at the [[WebVid github
+      repository]](https://github.com/m-bain/webvid#download-clip-features-%EF%B8%8F).
+    value: open
+  analysis: unknown
   created_date:
+    explanation: "The date the [[paper]](https://arxiv.org/abs/2104.00650) was released.\n"
     value: 2021-04-01
-    explanation: >
-      The date the [[paper]](https://arxiv.org/abs/2104.00650) was released.
-  url: https://m-bain.github.io/webvid-dataset/
   datasheet: none
-  modality: text (English) and video
-  size: 10.7M video-text pairs, 52K hours video
-  sample: []
-  analysis: unknown
-  # Construction
   dependencies: []
-  license:
-    value: WebVid Dataset Terms
-    explanation: >
-      [WebVid Dataset Terms](https://github.com/m-bain/webvid/blob/main/TERMS.md)
+  description: "WebVid-10M is a large-scale dataset of short videos with textual\
+    \ descriptions sourced from stock footage sites.\n"
+  excluded: unknown
+  feedback: Feedback can be given by emailing at maxbain at robots.ox.ac.uk
   included:
-    value: Video URLs and textual description annotations
     explanation: Authors note in the paper:"We scrape the web for a new dataset
       of videos with textual description annotations, called WebVid-2M. The data
       was scraped from the web following a similar procedure to Google Conceptual
       Captions"
-  excluded: unknown
-  quality_control: unknown
-  # Downstream
-  access:
-    value: open
-    explanation: The captions and video url's can be found at the [[dataset page]](https://m-bain.github.io/webvid-dataset/).
-      CLIP ViT-B/32 features of this dataset can be found at the [[WebVid github
-      repository]](https://github.com/m-bain/webvid#download-clip-features-%EF%B8%8F).
+    value: Video URLs and textual description annotations
   intended_uses:
-    value: >
-      "Provided you keep to these rules, the University grants you (the researcher)
-      a non-exclusive and non-transferable licence to use the content free of charge
-      strictly for non-commercial research (i.e., whose output artefacts are not
-      incorporated
-      in commercial products) for 12 months."
     explanation: See [[Terms of Access]](https://github.com/m-bain/webvid/blob/main/TERMS.md)
-  prohibited_uses: >
-    - Authors note the following prohibited uses: "You must not use the content
-    other than for the Permitted Purpose in strict
-    conformity with these terms and any other reasonable instructions of the University.
-    You must not, except as may be strictly necessary for carrying out the Permitted
-    Purpose,
-    provide or otherwise make available content to any third party or allow use
-    of it or them by or on behalf of any third party, in whole or in part, whether
-    by way of sale, resale, loan, transfer, hire or any other form of exploitation;
-    or attempt to identify any living or deceased individual from the content."
-    [[Terms of Access]](https://github.com/m-bain/webvid/blob/main/TERMS.md)
-
-    - Authors also note the following limitations of the dataset: "We note that
-    data
-    sourced from the web may be prone to biases and may contain graphic content.
-    Please be careful of unintended societal, gender, racial and other biases when
-    training or deploying models trained on this data."
-    [[Disclaimer]](https://github.com/m-bain/webvid#disclaimer-%EF%B8%8F)
+    value: "\"Provided you keep to these rules, the University grants you (the researcher)\
+      \ a non-exclusive and non-transferable licence to use the content free of\
+      \ charge strictly for non-commercial research (i.e., whose output artefacts\
+      \ are not incorporated in commercial products) for 12 months.\"\n"
+  license:
+    explanation: "[WebVid Dataset Terms](https://github.com/m-bain/webvid/blob/main/TERMS.md)\n"
+    value: WebVid Dataset Terms
+  modality:
+    explanation: text (English) and video
+    value: text, video
   monitoring: ''
-  feedback: Feedback can be given by emailing at maxbain at robots.ox.ac.uk
-
-- type: dataset
-  name: WebVid-2M
-  # General
+  name: WebVid-10M
   organization: University of Oxford
-  description: >
-    WebVid-2M is a large-scale dataset of 2.5M short videos with textual descriptions
-    sourced from stock footage sites. A subset of the WebVid-10M dataset.
+  prohibited_uses: "- Authors note the following prohibited uses: \"You must not\
+    \ use the content other than for the Permitted Purpose in strict conformity\
+    \ with these terms and any other reasonable instructions of the University.\
+    \ You must not, except as may be strictly necessary for carrying out the Permitted\
+    \ Purpose, provide or otherwise make available content to any third party or\
+    \ allow use of it or them by or on behalf of any third party, in whole or in\
+    \ part, whether by way of sale, resale, loan, transfer, hire or any other form\
+    \ of exploitation; or attempt to identify any living or deceased individual\
+    \ from the content.\" [[Terms of Access]](https://github.com/m-bain/webvid/blob/main/TERMS.md)\n\
+    - Authors also note the following limitations of the dataset: \"We note that\
+    \ data sourced from the web may be prone to biases and may contain graphic content.\
+    \ Please be careful of unintended societal, gender, racial and other biases\
+    \ when training or deploying models trained on this data.\" [[Disclaimer]](https://github.com/m-bain/webvid#disclaimer-%EF%B8%8F)\n"
+  quality_control: unknown
+  sample: []
+  size: 10.7M video-text pairs, 52K hours video
+  type: dataset
+  url: https://m-bain.github.io/webvid-dataset/
+- access:
+    explanation: The captions and video url's can be found at the [[dataset page]](https://m-bain.github.io/webvid-dataset/).
+      CLIP ViT-B/32 features of this dataset can be found at the [[WebVid github
+      repository]](https://github.com/m-bain/webvid#download-clip-features-%EF%B8%8F).
+    value: open
+  analysis: unknown
   created_date:
+    explanation: "The date the [[paper]](https://arxiv.org/abs/2104.00650) was released.\n"
     value: 2021-04-01
-    explanation: >
-      The date the [[paper]](https://arxiv.org/abs/2104.00650) was released.
-  url: https://m-bain.github.io/webvid-dataset/
   datasheet: none
-  modality: text (English) and video
-  size: 2.5M video-text pairs, 13K hours video
-  sample: []
-  analysis: unknown
-  # Construction
   dependencies: [WebVid-10M]
-  license:
-    value: WebVid Dataset Terms
-    explanation: >
-      [WebVid Dataset Terms](https://github.com/m-bain/webvid/blob/main/TERMS.md)
+  description: "WebVid-2M is a large-scale dataset of 2.5M short videos with textual\
+    \ descriptions sourced from stock footage sites. A subset of the WebVid-10M\
+    \ dataset.\n"
+  excluded: unknown
+  feedback: Feedback can be given by emailing at maxbain at robots.ox.ac.uk
   included:
-    value: Video URLs and textual description annotations
     explanation: Authors note in the paper:"We scrape the web for a new dataset
       of videos with textual description annotations, called WebVid-2M. The data
       was scraped from the web following a similar procedure to Google Conceptual
       Captions"
-  excluded: unknown
-  quality_control: unknown
-  # Downstream
-  access:
-    value: open
-    explanation: The captions and video url's can be found at the [[dataset page]](https://m-bain.github.io/webvid-dataset/).
-      CLIP ViT-B/32 features of this dataset can be found at the [[WebVid github
-      repository]](https://github.com/m-bain/webvid#download-clip-features-%EF%B8%8F).
+    value: Video URLs and textual description annotations
   intended_uses:
-    value: >
-      "Provided you keep to these rules, the University grants you (the researcher)
-      a non-exclusive and non-transferable licence to use the content free of charge
-      strictly for non-commercial research (i.e., whose output artefacts are not
-      incorporated
-      in commercial products) for 12 months."
     explanation: See [[Terms of Access]](https://github.com/m-bain/webvid/blob/main/TERMS.md)
-  prohibited_uses: >
-    - Authors note the following prohibited uses: "You must not use the content
-    other than for the Permitted Purpose in strict
-    conformity with these terms and any other reasonable instructions of the University.
-    You must not, except as may be strictly necessary for carrying out the Permitted
-    Purpose,
-    provide or otherwise make available content to any third party or allow use
-    of it or them by or on behalf of any third party, in whole or in part, whether
-    by way of sale, resale, loan, transfer, hire or any other form of exploitation;
-    or attempt to identify any living or deceased individual from the content."
-    [[Terms of Access]](https://github.com/m-bain/webvid/blob/main/TERMS.md)
-
-    - Authors also note the following limitations of the dataset: "We note that
-    data
-    sourced from the web may be prone to biases and may contain graphic content.
-    Please be careful of unintended societal, gender, racial and other biases when
-    training or deploying models trained on this data."
-    [[Disclaimer]](https://github.com/m-bain/webvid#disclaimer-%EF%B8%8F)
+    value: "\"Provided you keep to these rules, the University grants you (the researcher)\
+      \ a non-exclusive and non-transferable licence to use the content free of\
+      \ charge strictly for non-commercial research (i.e., whose output artefacts\
+      \ are not incorporated in commercial products) for 12 months.\"\n"
+  license:
+    explanation: "[WebVid Dataset Terms](https://github.com/m-bain/webvid/blob/main/TERMS.md)\n"
+    value: WebVid Dataset Terms
+  modality:
+    explanation: text (English) and video
+    value: text, video
   monitoring: ''
-  feedback: Feedback can be given by emailing at maxbain at robots.ox.ac.uk
+  name: WebVid-2M
+  organization: University of Oxford
+  prohibited_uses: "- Authors note the following prohibited uses: \"You must not\
+    \ use the content other than for the Permitted Purpose in strict conformity\
+    \ with these terms and any other reasonable instructions of the University.\
+    \ You must not, except as may be strictly necessary for carrying out the Permitted\
+    \ Purpose, provide or otherwise make available content to any third party or\
+    \ allow use of it or them by or on behalf of any third party, in whole or in\
+    \ part, whether by way of sale, resale, loan, transfer, hire or any other form\
+    \ of exploitation; or attempt to identify any living or deceased individual\
+    \ from the content.\" [[Terms of Access]](https://github.com/m-bain/webvid/blob/main/TERMS.md)\n\
+    - Authors also note the following limitations of the dataset: \"We note that\
+    \ data sourced from the web may be prone to biases and may contain graphic content.\
+    \ Please be careful of unintended societal, gender, racial and other biases\
+    \ when training or deploying models trained on this data.\" [[Disclaimer]](https://github.com/m-bain/webvid#disclaimer-%EF%B8%8F)\n"
+  quality_control: unknown
+  sample: []
+  size: 2.5M video-text pairs, 13K hours video
+  type: dataset
+  url: https://m-bain.github.io/webvid-dataset/
diff --git a/assets/paladin.yaml b/assets/paladin.yaml
index 76870f97..80aec963 100644
--- a/assets/paladin.yaml
+++ b/assets/paladin.yaml
@@ -1,22 +1,21 @@
 ---
-
-- type: application
-  name: UnderwriteGPT
-  organization: Paladin Group and Dais Technology
-  description: UnderwriteGPT is the world's first generative AI underwriting tool.
+- access: limited
+  adaptation: ''
   created_date: 2023-02-01
-  url: https://dais.com/underwritegpt/
   dependencies: []
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: limited
-  license: ''
-  terms_of_service: ''
+  description: UnderwriteGPT is the world's first generative AI underwriting tool.
+  failures: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: ''
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
+  name: UnderwriteGPT
+  organization: Paladin Group and Dais Technology
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: ''
+  type: application
+  url: https://dais.com/underwritegpt/
   user_distribution: ''
-  failures: ''
diff --git a/assets/perplexity.yaml b/assets/perplexity.yaml
index 117a7acc..8bf35081 100644
--- a/assets/perplexity.yaml
+++ b/assets/perplexity.yaml
@@ -1,52 +1,50 @@
 ---
-
-- type: application
-  name: Perplexity Ask
-  organization: Perplexity
-  description: Perplexity Ask is a new search interface that uses advanced artificial
-    intelligence technologies
+- access: open
+  adaptation: ''
   created_date:
-    value: 2022-12-07
     explanation: Perplexity Ask was announced in a [[Twitter post]](https://twitter.com/perplexity_ai/status/1600551871554338816?s=20)
       by the company.
-  url: https://www.perplexity.ai/
+    value: 2022-12-07
   dependencies: [GPT-3.5, Bing Search]
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: open
+  description: Perplexity Ask is a new search interface that uses advanced artificial
+    intelligence technologies
+  failures: ''
+  feedback: ''
+  intended_uses: ''
   license:
-    value: none
     explanation: No license was found.
-  terms_of_service: ''
-  intended_uses: ''
-  prohibited_uses: ''
+    value: none
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
-  user_distribution: ''
-  failures: ''
-
-- type: application
-  name: Bird SQL
+  name: Perplexity Ask
   organization: Perplexity
-  description: Twitter search interface that is powered by Perplexity's structured
-    search engine.
-  created_date: 2022-12-15
-  url: https://www.perplexity.ai/sql
-  dependencies: [Perplexity Ask, OpenAI API]
-  adaptation: ''
   output_space: ''
+  prohibited_uses: ''
   quality_control: ''
-  access: deprecated
-  license:
-    value: none
-    explanation: No license was found.
   terms_of_service: ''
+  type: application
+  url: https://www.perplexity.ai/
+  user_distribution: ''
+- access: deprecated
+  adaptation: ''
+  created_date: 2022-12-15
+  dependencies: [Perplexity Ask, OpenAI API]
+  description: Twitter search interface that is powered by Perplexity's structured
+    search engine.
+  failures: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license:
+    explanation: No license was found.
+    value: none
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
+  name: Bird SQL
+  organization: Perplexity
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: ''
+  type: application
+  url: https://www.perplexity.ai/sql
   user_distribution: ''
-  failures: ''
diff --git a/assets/portkey.yaml b/assets/portkey.yaml
index 2fca2885..51fac815 100644
--- a/assets/portkey.yaml
+++ b/assets/portkey.yaml
@@ -1,23 +1,22 @@
 ---
-
-- type: application
-  name: Portkey
-  organization: Portkey
-  description: Portkey is a hosted middleware that allows users to create generative
-    AI applications
+- access: open
+  adaptation: ''
   created_date: 2023-05-06
-  url: https://portkey.ai/
   dependencies: []
-  adaptation: ''
-  output_space: generative AI apps
-  quality_control: ''
-  access: open
-  license: ''
-  terms_of_service: https://portkey.ai/terms
+  description: Portkey is a hosted middleware that allows users to create generative
+    AI applications
+  failures: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: ''
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
+  name: Portkey
+  organization: Portkey
+  output_space: generative AI apps
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: https://portkey.ai/terms
+  type: application
+  url: https://portkey.ai/
   user_distribution: ''
-  failures: ''
diff --git a/assets/quizlet.yaml b/assets/quizlet.yaml
index cfd5d9c6..a7580d0b 100644
--- a/assets/quizlet.yaml
+++ b/assets/quizlet.yaml
@@ -1,26 +1,25 @@
 ---
-
-- type: application
-  name: Q-Chat
-  organization: Quizlet
+- access: open
+  adaptation: ''
+  created_date: 2023-03-01
+  dependencies: [ChatGPT API]
   description: Quizlet is introducing Q-Chat, a fully-adaptive AI tutor that engages
     students with adaptive questions based on relevant study materials delivered
     through a fun chat experience.
-  created_date: 2023-03-01
-  url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis
-  dependencies: [ChatGPT API]
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: open
+  failures: ''
+  feedback: ''
+  intended_uses: ''
   license:
-    value: none
     explanation: No license was found.
-  terms_of_service: https://quizlet.com/tos
-  intended_uses: ''
-  prohibited_uses: ''
+    value: none
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
+  name: Q-Chat
+  organization: Quizlet
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: https://quizlet.com/tos
+  type: application
+  url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis
   user_distribution: ''
-  failures: ''
diff --git a/assets/quora.yaml b/assets/quora.yaml
index e28e31a2..2b32f96d 100644
--- a/assets/quora.yaml
+++ b/assets/quora.yaml
@@ -1,32 +1,31 @@
 ---
-
-- type: application
-  name: Poe
-  organization: Quora
-  description: Poe lets people ask questions, get instant answers, and have back-and-forth
-    conversations with several AI-powered bots. It is initially available on iOS,
-    but we will be adding support for all major platforms in the next few months,
-    along with more bots.
+- access: limited
+  adaptation: ''
   created_date: 2023-02-03
-  url: https://quorablog.quora.com/Poe-1
   dependencies:
     - ChatGPT API
     - GPT-4 API
     - Claude API
     - Dragonfly API
     - Sage API
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: limited
+  description: Poe lets people ask questions, get instant answers, and have back-and-forth
+    conversations with several AI-powered bots. It is initially available on iOS,
+    but we will be adding support for all major platforms in the next few months,
+    along with more bots.
+  failures: ''
+  feedback: ''
+  intended_uses: ''
   license:
-    value: none
     explanation: No license was found.
-  terms_of_service: https://poe.com/tos
-  intended_uses: ''
-  prohibited_uses: ''
+    value: none
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
+  name: Poe
+  organization: Quora
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: https://poe.com/tos
+  type: application
+  url: https://quorablog.quora.com/Poe-1
   user_distribution: ''
-  failures: ''
diff --git a/assets/robin.yaml b/assets/robin.yaml
index 123cc804..813dc634 100644
--- a/assets/robin.yaml
+++ b/assets/robin.yaml
@@ -1,27 +1,26 @@
 ---
-
-- type: application
-  name: Robin AI
-  organization: Robin AI
+- access: limited
+  adaptation: ''
+  created_date: unknown
+  dependencies: [Anthropic API]
   description: Robin AI uses Claude and Anthropic's models to understand language
     - including in technical domains like legal language. It's also very confident
     at drafting, summarising, translations, and explaining complex concepts in simple
     terms
-  created_date: unknown
-  url: https://www.robinai.co.uk/
-  dependencies: [Anthropic API]
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: limited
+  failures: ''
+  feedback: ''
+  intended_uses: ''
   license:
-    value: none
     explanation: No license was found.
-  terms_of_service: https://www.robinai.co.uk/terms
-  intended_uses: ''
-  prohibited_uses: ''
+    value: none
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
+  name: Robin AI
+  organization: Robin AI
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: https://www.robinai.co.uk/terms
+  type: application
+  url: https://www.robinai.co.uk/
   user_distribution: ''
-  failures: ''
diff --git a/assets/salesforce.yaml b/assets/salesforce.yaml
index 19a1d021..f6c12de0 100644
--- a/assets/salesforce.yaml
+++ b/assets/salesforce.yaml
@@ -1,42 +1,35 @@
 ---
-
-- type: model
-  name: CodeGen
-  organization: Salesforce
-  description: CodeGen is a language model for code
+- access:
+    explanation: Model weights are available for download with links in the [[GitHub
+      repo]](https://github.com/salesforce/CodeGen)
+    value: open
+  analysis: ''
   created_date:
-    value: 2022-03-25
     explanation: Date model paper was released
-  url: https://arxiv.org/abs/2203.13474
-  model_card: ''
-  modality: text (English) and code
-  analysis: ''
-  size: 16B parameters (dense model)
+    value: 2022-03-25
   dependencies: []
-  training_emissions: ''
-  training_time: ''
-  training_hardware: Unspecified Salesforce Compute (TPU-V4s)
-  quality_control: ''
-  access:
-    value: open
-    explanation: Model weights are available for download with links in the [[GitHub
-      repo]](https://github.com/salesforce/CodeGen)
-  license: none (model weights), BSD-3-Clause (code)
+  description: CodeGen is a language model for code
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: none (model weights), BSD-3-Clause (code)
+  modality:
+    explanation: text (English) and code
+    value: code, text; code, text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: BLIP
+  name: CodeGen
   organization: Salesforce
-  description: ''
-  created_date: 2022-01-28
-  url: https://arxiv.org/abs/2201.12086
-  model_card: ''
-  modality: text and image
+  prohibited_uses: ''
+  quality_control: ''
+  size: 16B parameters (dense)
+  training_emissions: ''
+  training_hardware: Unspecified Salesforce Compute (TPU-V4s)
+  training_time: ''
+  type: model
+  url: https://arxiv.org/abs/2203.13474
+- access: open
   analysis: ''
-  size: unknown
+  created_date: 2022-01-28
   dependencies:
     - ViT-B
     - BERT
@@ -46,60 +39,68 @@
     - Conceptual 12M
     - SBU Captions
     - LAION-115M
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: open
-  license: BSD-3-Clause
+  description: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: BSD-3-Clause
+  modality:
+    explanation: text and image
+    value: image, text; image, text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: dataset
-  name: LAION-115M
+  name: BLIP
   organization: Salesforce
-  description: ''
-  created_date: 2022-01-28
+  prohibited_uses: ''
+  quality_control: ''
+  size: unknown
+  training_emissions: ''
+  training_hardware: ''
+  training_time: ''
+  type: model
   url: https://arxiv.org/abs/2201.12086
-  datasheet: ''
-  modality: text and image
-  size: 115M image-text pairs
-  sample: []
+- access: open
   analysis: ''
+  created_date: 2022-01-28
+  datasheet: ''
   dependencies: [LAION-400M]
-  included: ''
+  description: ''
   excluded: ''
-  quality_control: ''
-  access: open
-  license: BSD-3-Clause
+  feedback: ''
+  included: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: BSD-3-Clause
+  modality:
+    explanation: text and image
+    value: image, text
   monitoring: ''
-  feedback: ''
-
-- type: application
-  name: EinsteinGPT
+  name: LAION-115M
   organization: Salesforce
-  description: EinsteinGPT is generative AI for customer relationship management
-    (CRFM).
+  prohibited_uses: ''
+  quality_control: ''
+  sample: []
+  size: 115M image-text pairs
+  type: dataset
+  url: https://arxiv.org/abs/2201.12086
+- access: limited
+  adaptation: ''
   created_date: 2023-03-07
-  url: https://www.salesforce.com/products/einstein/overview/?d=cta-body-promo-8
   dependencies: [ChatGPT API]
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: limited
+  description: EinsteinGPT is generative AI for customer relationship management
+    (CRFM).
+  failures: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  terms_of_service: ''
-  intended_uses: ''
-  prohibited_uses: ''
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
+  name: EinsteinGPT
+  organization: Salesforce
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: ''
+  type: application
+  url: https://www.salesforce.com/products/einstein/overview/?d=cta-body-promo-8
   user_distribution: ''
-  failures: ''
diff --git a/assets/sana.yaml b/assets/sana.yaml
index 09bda05f..22234f8f 100644
--- a/assets/sana.yaml
+++ b/assets/sana.yaml
@@ -1,55 +1,40 @@
 ---
-
-- type: application
-  name: Sana
-  # General
-  organization: Sana
-  description: >
-    "Sana is your all-in-one, AI-assisted, online learning platform (LMS).
-    Author employee training courses and measure team development with Sana's
-    powerful analytics. Sana partners with the world's most important
-    organizations and fastest-growing startups to make personalized, adaptive
-    learning available for everyone, everywhere"
-    [[Sana GPT-3 Demo]](https://gpt3demo.com/apps/sanalabs).
+- access: limited
+  adaptation: "Customized GPT-3, fine-tuned on private data [[Sana GPT-3 Demo]](https://gpt3demo.com/apps/sanalabs).\n"
   created_date:
+    explanation: "The company was founded in 2016 according to [[Crunchbase]](https://www.crunchbase.com/organization/sana-2).\
+      \ It is unclear when Sana adapted the OpenAI API to their products.\n"
     value: unknown
-    explanation: >
-      The company was founded in 2016 according to
-      [[Crunchbase]](https://www.crunchbase.com/organization/sana-2). It is
-      unclear when Sana adapted the OpenAI API to their products.
-  url: https://www.sanalabs.com/
-  # Construction
   dependencies: [OpenAI API]
-  adaptation: >
-    Customized GPT-3, fine-tuned on private data
-    [[Sana GPT-3 Demo]](https://gpt3demo.com/apps/sanalabs).
-  output_space: question and answer, summarization, sentiment analysis,topic identification
-  quality_control: unknown
-  # Downstream
-  access: limited
+  description: "\"Sana is your all-in-one, AI-assisted, online learning platform\
+    \ (LMS). Author employee training courses and measure team development with\
+    \ Sana's powerful analytics. Sana partners with the world's most important organizations\
+    \ and fastest-growing startups to make personalized, adaptive learning available\
+    \ for everyone, everywhere\" [[Sana GPT-3 Demo]](https://gpt3demo.com/apps/sanalabs).\n"
+  failures: unknown
+  feedback: unknown
+  intended_uses: "Sana is intended to be used by employers to provide a learning\
+    \ service for their employees.\n"
   license:
+    explanation: "\"Sana Labs grants Subscriber a limited non-exclusive, non-transferable,\
+      \ non-sublicensable license to use Sana Labs' web-based personalized and collaborative\
+      \ learning platform (“Sana Platform”) solely in connection with the subscribed\
+      \ Services in accordance with the Agreement.\" - excerpt from the Terms of\
+      \ Service.\n"
     value: custom
-    explanation: >
-      "Sana Labs grants Subscriber a limited non-exclusive, non-transferable,
-      non-sublicensable license to use Sana Labs' web-based personalized and
-      collaborative learning platform (“Sana Platform”) solely in connection
-      with the subscribed Services in accordance with the Agreement." -
-      excerpt from the Terms of Service.
-  terms_of_service: https://www.sanalabs.com/legal/
-  intended_uses: >
-    Sana is intended to be used by employers to provide a learning service for
-    their employees.
-  prohibited_uses:
-    value: none
-    explanation: >
-      "Sana provides an extensive set of legal documents
-      [[Sana Legal]](https://www.sanalabs.com/legal/), but missing from the
-      documents are prohibited uses of the Sana platform, beyond a mention of
-      what is impermissible under relevant law." - exceprt from the Terms of
-      Service document.
   monitoring: unknown
-  feedback: unknown
-  # Deployment
   monthly_active_users: unknown
+  name: Sana
+  organization: Sana
+  output_space: question and answer, summarization, sentiment analysis,topic identification
+  prohibited_uses:
+    explanation: "\"Sana provides an extensive set of legal documents [[Sana Legal]](https://www.sanalabs.com/legal/),\
+      \ but missing from the documents are prohibited uses of the Sana platform,\
+      \ beyond a mention of what is impermissible under relevant law.\" - exceprt\
+      \ from the Terms of Service document.\n"
+    value: none
+  quality_control: unknown
+  terms_of_service: https://www.sanalabs.com/legal/
+  type: application
+  url: https://www.sanalabs.com/
   user_distribution: unknown
-  failures: unknown
diff --git a/assets/shanghai.yaml b/assets/shanghai.yaml
index 8dae5081..80208ec7 100644
--- a/assets/shanghai.yaml
+++ b/assets/shanghai.yaml
@@ -1,15 +1,7 @@
 ---
-
-- type: model
-  name: InternVideo
-  organization: Shanghai AI Laboratory
-  description: ''
-  created_date: 2022-12-06
-  url: https://arxiv.org/pdf/2212.03191.pdf
-  model_card: ''
-  modality: text and video
+- access: open
   analysis: ''
-  size: 1.3B parameters (dense model)
+  created_date: 2022-12-06
   dependencies:
     - Kinetics-400
     - WebVid-2M
@@ -18,36 +10,46 @@
     - AVA
     - Something-Something-v2
     - Kinetics-710
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: open
-  license: Apache 2.0
+  description: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: Apache 2.0
+  modality:
+    explanation: text and video
+    value: text, video; text, video
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: Lego-MT
+  name: InternVideo
   organization: Shanghai AI Laboratory
+  prohibited_uses: ''
+  quality_control: ''
+  size: 1.3B parameters (dense)
+  training_emissions: ''
+  training_hardware: ''
+  training_time: ''
+  type: model
+  url: https://arxiv.org/pdf/2212.03191.pdf
+- access: open
+  analysis: Evaluated based on own constructed dataset covering 433 languages.
+  created_date: 2023-05-29
+  dependencies: [OPUS]
   description: Lego-MT is a multilingual large language model which uses a more
     efficient approach of being an effective detachable model.
-  created_date: 2023-05-29
-  url: https://arxiv.org/pdf/2212.10551.pdf
+  feedback: ''
+  intended_uses: ''
+  license: ''
+  modality:
+    explanation: multilingual text
+    value: text; text
   model_card: ''
-  modality: multilingual text
-  analysis: Evaluated based on own constructed dataset covering 433 languages.
-  size: 1.2B parameters
-  dependencies: [OPUS]
+  monitoring: ''
+  name: Lego-MT
+  organization: Shanghai AI Laboratory
+  prohibited_uses: ''
+  quality_control: ''
+  size: 1.2B parameters (dense)
   training_emissions: unknown
-  training_time: 15 days
   training_hardware: 32 A100 GPUs
-  quality_control: ''
-  access: open
-  license: ''
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
+  training_time: 15 days
+  type: model
+  url: https://arxiv.org/pdf/2212.10551.pdf
diff --git a/assets/shop.yaml b/assets/shop.yaml
index 5700ad95..60403fc0 100644
--- a/assets/shop.yaml
+++ b/assets/shop.yaml
@@ -1,25 +1,24 @@
 ---
-
-- type: application
-  name: Shop Assistant
-  organization: Shop
+- access: open
+  adaptation: ''
+  created_date: 2023-03-01
+  dependencies: [ChatGPT API]
   description: When shoppers search for products, the shopping assistant makes personalized
     recommendations based on their requests. Shop’s new AI-powered shopping assistant
     will streamline in-app shopping by scanning millions of products to quickly
     find what buyers are looking for—or help them discover something new.
-  created_date: 2023-03-01
-  url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis
-  dependencies: [ChatGPT API]
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: open
-  license: ''
-  terms_of_service: ''
+  failures: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: ''
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
+  name: Shop Assistant
+  organization: Shop
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: ''
+  type: application
+  url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis
   user_distribution: ''
-  failures: ''
diff --git a/assets/singapore.yaml b/assets/singapore.yaml
index 5e0ab85c..3e378eb9 100644
--- a/assets/singapore.yaml
+++ b/assets/singapore.yaml
@@ -1,27 +1,28 @@
 ---
-
-- type: model
-  name: GOAT
-  organization: National University of Singapore
-  description: GOAT is a fine-tuned LLaMA model which uses the tokenization of numbers
-    to significantly outperform benchmark standards on a range of arithmetic tasks.
-  created_date: 2023-05-23
-  url: https://arxiv.org/pdf/2305.14201.pdf
-  model_card: none
-  modality: textual input, numerical data output
+- access: open
   analysis: Performance assessed on BIG-bench arithmetic sub-task, and various elementary
     arithmetic tasks.
-  size: 7B parameters
+  created_date: 2023-05-23
   dependencies: [LLaMA, GOAT dataset]
-  training_emissions: unknown
-  training_time: unknown
-  training_hardware: 24 GB VRAM GPU
-  quality_control: Number data is randomly generated from log space to reduce likelihood
-    of redundancy and range of magnitudes.
-  access: open
-  license: Apache 2.0
+  description: GOAT is a fine-tuned LLaMA model which uses the tokenization of numbers
+    to significantly outperform benchmark standards on a range of arithmetic tasks.
+  feedback: ''
   intended_uses: Integration into other instruction-tuned LLMs to further enhance
     arithmetic reasoning abilities in solving math word problems.
-  prohibited_uses: ''
+  license: Apache 2.0
+  modality:
+    explanation: textual input, numerical data output
+    value: text; other
+  model_card: none
   monitoring: ''
-  feedback: ''
+  name: GOAT
+  organization: National University of Singapore
+  prohibited_uses: ''
+  quality_control: Number data is randomly generated from log space to reduce likelihood
+    of redundancy and range of magnitudes.
+  size: 7B parameters (dense)
+  training_emissions: unknown
+  training_hardware: 24 GB VRAM GPU
+  training_time: unknown
+  type: model
+  url: https://arxiv.org/pdf/2305.14201.pdf
diff --git a/assets/snap.yaml b/assets/snap.yaml
index 67727722..d5a384d3 100644
--- a/assets/snap.yaml
+++ b/assets/snap.yaml
@@ -1,30 +1,28 @@
 ---
-
-- type: application
-  name: My AI for Snapchat
-  organization: Snap
+- access: open
+  adaptation: ''
+  created_date: 2023-03-01
+  dependencies: [ChatGPT API]
   description: My AI offers Snapchatters a friendly, customizable chatbot at their
     fingertips that offers recommendations, and can even write a haiku for friends
     in seconds. Snapchat, where communication and messaging is a daily behavior,
     has 750 million monthly Snapchatters.
-  created_date: 2023-03-01
-  url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis
-  dependencies: [ChatGPT API]
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: open
+  failures: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "\"Snap grants you a worldwide, royalty-free, non-assignable, non-exclusive,\
+      \  revocable, and non-sublicensable license to use the Services.\" - excerpt\
+      \ from the Terms of Service document.\n"
     value: custom
-    explanation: >
-      "Snap grants you a worldwide, royalty-free, non-assignable, non-exclusive,  revocable,
-      and non-sublicensable license to use the Services." - excerpt
-      from the Terms of Service document.
-  terms_of_service: https://snap.com/terms
-  intended_uses: ''
-  prohibited_uses: ''
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
+  name: My AI for Snapchat
+  organization: Snap
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: https://snap.com/terms
+  type: application
+  url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis
   user_distribution: ''
-  failures: ''
diff --git a/assets/speak.yaml b/assets/speak.yaml
index 8771590f..94e61ac3 100644
--- a/assets/speak.yaml
+++ b/assets/speak.yaml
@@ -1,24 +1,23 @@
 ---
-
-- type: application
-  name: Speak
-  organization: Speak
+- access: open
+  adaptation: ''
+  created_date: 2023-03-01
+  dependencies: [Whisper API]
   description: Speak is an AI-powered language learning app focused on building
     the best path to spoken fluency and is the the fastest-growing English app in
     South Korea.
-  created_date: 2023-03-01
-  url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis
-  dependencies: [Whisper API]
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: open
-  license: ''
-  terms_of_service: ''
+  failures: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: ''
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
+  name: Speak
+  organization: Speak
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: ''
+  type: application
+  url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis
   user_distribution: ''
-  failures: ''
diff --git a/assets/spotify.yaml b/assets/spotify.yaml
index f7016bf5..6e436586 100644
--- a/assets/spotify.yaml
+++ b/assets/spotify.yaml
@@ -1,32 +1,29 @@
 ---
-
-- type: application
-  name: AI DJ
-  organization: Spotify
+- access: limited
+  adaptation: ''
+  created_date: 2023-02-23
+  dependencies: [ChatGPT API, Sonantic AI]
   description: The DJ is a personalized AI guide that knows you and your music taste
     so well that it can choose what to play for you. This feature, first rolling
     out in beta, will deliver a curated lineup of music alongside commentary around
     the tracks and artists we think you’ll like in a stunningly realistic voice.
-  created_date: 2023-02-23
-  url: https://newsroom.spotify.com/2023-02-22/spotify-debuts-a-new-ai-dj-right-in-your-pocket/
-  dependencies: [ChatGPT API, Sonantic AI]
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: limited
+  failures: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "\"The Spotify software applications and the Content are licensed,\
+      \ not sold or transferred to you, and Spotify and its licensors retain ownership\
+      \ of all copies of the Spotify software applications and Content even after\
+      \ installation on your Devices.\" - excerpt from the Terms of Service document.\n"
     value: custom
-    explanation: >
-      "The Spotify software applications and the Content are licensed, not sold
-      or transferred to you, and Spotify and its licensors retain ownership of
-      all copies of the Spotify software applications and Content even after
-      installation on your Devices." - excerpt from the Terms of Service
-      document.
-  terms_of_service: https://www.spotify.com/us/legal/end-user-agreement/
-  intended_uses: ''
-  prohibited_uses: ''
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
+  name: AI DJ
+  organization: Spotify
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: https://www.spotify.com/us/legal/end-user-agreement/
+  type: application
+  url: https://newsroom.spotify.com/2023-02-22/spotify-debuts-a-new-ai-dj-right-in-your-pocket/
   user_distribution: ''
-  failures: ''
diff --git a/assets/stability.yaml b/assets/stability.yaml
index 8c572529..8b137891 100644
--- a/assets/stability.yaml
+++ b/assets/stability.yaml
@@ -1,102 +1 @@
----
 
-- type: model
-  name: DeepFloyd IF
-  organization: Stability AI
-  description: A text-to-image cascaded pixel diffusion model released in conjunction
-    with AI research lab DeepFloyd.
-  created_date: 2023-04-28
-  url: https://stability.ai/blog/deepfloyd-if-text-to-image-model
-  model_card: https://huggingface.co/DeepFloyd/IF-I-XL-v1.0
-  modality: textual input with image output
-  analysis: Evaluated on the COCO dataset.
-  size: 4.3B parameters
-  dependencies: [LAION-5B]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: open
-  license:
-    value: custom
-    explanation: License agreement can be found on model card https://huggingface.co/DeepFloyd/IF-I-XL-v1.0
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: https://huggingface.co/DeepFloyd/IF-I-XL-v1.0/discussions
-
-
-- type: model
-  name: StableLM
-  organization: Stability AI
-  description: Large language models trained on up to 1.5 trillion tokens.
-  created_date: 2023-04-20
-  url: https://github.com/Stability-AI/StableLM
-  model_card: ''
-  modality: natural language text
-  analysis: ''
-  size: 7B parameters
-  dependencies:
-    - StableLM-Alpha dataset
-    - Alpaca dataset
-    - gpt4all dataset
-    - ShareGPT52K dataset
-    - Dolly dataset
-    - HH dataset
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: open
-  license: Apache 2.0
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
-
-
-- type: application
-  name: Stable Diffusion
-  organization: Stability AI
-  description: Stable Diffusion is a generative software that creates images from
-    text prompts.
-  created_date: 2022-08-22
-  url: https://stability.ai/blog/stable-diffusion-public-release
-  dependencies: []
-  adaptation: ''
-  output_space: image
-  quality_control: ''
-  access: open
-  license:
-    value: custom
-    explanation: License agreement can be found on https://github.com/CompVis/stable-diffusion/blob/main/LICENSE
-  terms_of_service: ''
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: https://huggingface.co/CompVis/stable-diffusion/discussions
-  monthly_active_users: ''
-  user_distribution: ''
-  failures: ''
-
-- type: application
-  name: Stable Diffusion XL
-  organization: Stability AI
-  description: Stable Diffusion XL is an updated version of Stable Diffusion, and
-    creates descriptive images with shorter prompts and generate words within images.
-  created_date: 2023-07-26
-  url: https://stability.ai/stablediffusion
-  dependencies: []
-  adaptation: ''
-  output_space: image
-  quality_control: ''
-  access: limited
-  license: MIT
-  terms_of_service: ''
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
-  monthly_active_users: ''
-  user_distribution: ''
-  failures: ''
diff --git a/assets/stanford.yaml b/assets/stanford.yaml
index f54d0d1e..37f02528 100644
--- a/assets/stanford.yaml
+++ b/assets/stanford.yaml
@@ -1,74 +1,77 @@
 ---
-
-- type: model
-  name: BioMedLM
-  organization: Stanford
-  description: ''
-  created_date: 2022-12-15
-  url: https://crfm.stanford.edu/2022/12/15/pubmedgpt.html
-  model_card: ''
-  modality: text
+- access: open
   analysis: ''
-  size: 2.7B parameters (dense model)
+  created_date: 2022-12-15
   dependencies: [The Pile]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: open
-  license: bigscience-bloom-rail-1.0
+  description: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: bigscience-bloom-rail-1.0
+  modality:
+    explanation: text
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: RoentGen
+  name: BioMedLM
   organization: Stanford
-  description: RoentGen is a generative medical imaging model that can create visually
-    convincing X-ray images.
-  created_date: 2022-11-23
-  url: https://arxiv.org/pdf/2211.12737.pdf
-  model_card: ''
-  modality: text input and image output
+  prohibited_uses: ''
+  quality_control: ''
+  size: 2.7B parameters (dense)
+  training_emissions: ''
+  training_hardware: ''
+  training_time: ''
+  type: model
+  url: https://crfm.stanford.edu/2022/12/15/pubmedgpt.html
+- access: open
   analysis: Evaluated on own framework that tests domain-specific tasks in medical
     field.
-  size: 330M parameters
+  created_date: 2022-11-23
   dependencies: [Stable Diffusion, RoentGen radiology dataset]
-  training_emissions: unknown
-  training_time: 60k training steps per day
-  training_hardware: 64 A100 GPUs
-  quality_control: ''
-  access: open
-  license: ''
+  description: RoentGen is a generative medical imaging model that can create visually
+    convincing X-ray images.
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: ''
+  modality:
+    explanation: text input and image output
+    value: text; image
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: CORGI
+  name: RoentGen
   organization: Stanford
-  description: Model trained to generate language corrections for physical control
-    tasks.
-  created_date: 2023-06-12
-  url: https://arxiv.org/pdf/2306.07012.pdf
-  model_card: ''
-  modality: trajectory input with text output
+  prohibited_uses: ''
+  quality_control: ''
+  size: 330M parameters (dense)
+  training_emissions: unknown
+  training_hardware: 64 A100 GPUs
+  training_time: 60k training steps per day
+  type: model
+  url: https://arxiv.org/pdf/2211.12737.pdf
+- access: open
   analysis: Evaluated on three physical control tasks, drawing, steering, and human
     body movement on various dynamics
-  size: 124M parameters
+  created_date: 2023-06-12
   dependencies: [GPT-2, BABEL, text-davinci-003]
+  description: Model trained to generate language corrections for physical control
+    tasks.
+  feedback: ''
+  intended_uses: ''
+  license: MIT
+  modality:
+    explanation: trajectory input with text output
+    value: other; text
+  model_card: ''
+  monitoring: ''
+  name: CORGI
+  organization: Stanford
+  prohibited_uses: ''
+  quality_control: ''
+  size: 124M parameters (dense)
   training_emissions: ''
+  training_hardware: one NVIDIA A40 GPU
   training_time:
-    value: unknown
     explanation: The authors do not report the training time, but do report that
       they train for 200 epochs.
-  training_hardware: one NVIDIA A40 GPU
-  quality_control: ''
-  access: open
-  license: MIT
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
+    value: unknown
+  type: model
+  url: https://arxiv.org/pdf/2306.07012.pdf
diff --git a/assets/stonybrook.yaml b/assets/stonybrook.yaml
index ce4a4657..a160a186 100644
--- a/assets/stonybrook.yaml
+++ b/assets/stonybrook.yaml
@@ -1,62 +1,51 @@
 ---
-
-- type: dataset
-  name: SBU Captions
-  # General
-  organization: Stony Brook University
-  description: >
-    SBU Captions Dataset is a collection of 1 million images and associated captions
-    from Flickr, filtered so that the descriptions are likely to refer to visual
-    content.
+- access:
+    explanation: The dataset is available for download on the [[SBU Captions Dataset
+      website]](https://www.cs.rice.edu/~vo9/sbucaptions/), along with additional
+      resources.
+    value: open
+  analysis:
+    explanation: See [[Section 5]](https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf)
+    value: "Authors perform two quantitative evaluations for image captioning -\
+      \ direct user ratings of relevance and BLEU score. They also propose a new\
+      \ evaluation task: \"we propose a new evaluation task where a user is presented\
+      \ with two photographs and one caption. The user must assign the caption to\
+      \ the most relevant image. For evaluation we use a query image, a random image\
+      \ and a generated caption.\"\n"
   created_date:
+    explanation: "The date the [[paper]](https://papers.nips.cc/paper_files/paper/2011/hash/5dd9db5e033da9c6fb5ba83c7a7ebea9-Abstract.html)\
+      \ was published.\n"
     value: 2011-12-12
-    explanation: >
-      The date the [[paper]](https://papers.nips.cc/paper_files/paper/2011/hash/5dd9db5e033da9c6fb5ba83c7a7ebea9-Abstract.html)
-      was published.
-  url: https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf
   datasheet: none
-  modality: text (English) and image
-  size: 1M image-text pairs
-  sample: []
-  analysis:
-    value: >
-      Authors perform two quantitative evaluations for image captioning - direct
-      user
-      ratings of relevance and BLEU score. They also propose a new evaluation task:
-      "we propose a new evaluation task where a user is presented with two photographs
-      and one caption. The user must assign the caption to the most relevant image.
-      For evaluation we use a query image, a random image and a generated caption."
-    explanation: See [[Section 5]](https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf)
-  # Construction
   dependencies: [Flickr]
-  license:
-    value: none
-    explanation: No license was found.
-  included:
-    value: >
-      "To encourage visual descriptiveness in our collection, we select only those
-      images with descriptions of satisfactory length based on observed lengths
-      in
-      visual descriptions. We also enforce that retained descriptions contain at
-      least
-      2 words belonging to our term lists and at least one prepositional word, e.g.
-      “on”, “under” which often indicate visible spatial relationships."
-    explanation: See [[Section 2]](https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf)
+  description: "SBU Captions Dataset is a collection of 1 million images and associated\
+    \ captions from Flickr, filtered so that the descriptions are likely to refer\
+    \ to visual content.\n"
   excluded:
-    value: >
-      "This produces a very large, but noisy initial set of photographs with associated
-      text. We filter this set of photos so that the descriptions attached to a
-      picture
-      are relevant and visually descriptive."
     explanation: See [[Section 2]](https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf)
-  quality_control: unknown
-  # Downstream
-  access:
-    value: open
-    explanation: The dataset is available for download on the [[SBU Captions Dataset
-      website]](https://www.cs.rice.edu/~vo9/sbucaptions/), along with additional
-      resources.
+    value: "\"This produces a very large, but noisy initial set of photographs with\
+      \ associated text. We filter this set of photos so that the descriptions attached\
+      \ to a picture are relevant and visually descriptive.\"\n"
+  feedback: ''
+  included:
+    explanation: See [[Section 2]](https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf)
+    value: "\"To encourage visual descriptiveness in our collection, we select only\
+      \ those images with descriptions of satisfactory length based on observed\
+      \ lengths in visual descriptions. We also enforce that retained descriptions\
+      \ contain at least 2 words belonging to our term lists and at least one prepositional\
+      \ word, e.g. “on”, “under” which often indicate visible spatial relationships.\"\
+      \n"
   intended_uses: ''
-  prohibited_uses: ''
+  license:
+    explanation: No license was found.
+    value: none
+  modality: {}
   monitoring: none
-  feedback: ''
+  name: SBU Captions
+  organization: Stony Brook University
+  prohibited_uses: ''
+  quality_control: unknown
+  sample: []
+  size: 1M image-text pairs
+  type: dataset
+  url: https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf
diff --git a/assets/suno.yaml b/assets/suno.yaml
index 6e277fb6..5e4e59e6 100644
--- a/assets/suno.yaml
+++ b/assets/suno.yaml
@@ -1,24 +1,25 @@
 ---
-
-- type: model
-  name: Bark
-  organization: Suno
+- access: open
+  analysis: ''
+  created_date: 2023-04-20
+  dependencies: [AudioLM]
   description: Bark is a text-to-audio model that can generate multilingual speech
     as well as other noises.
-  created_date: 2023-04-20
-  url: https://github.com/suno-ai/bark
+  feedback: https://huggingface.co/spaces/suno/bark/discussions
+  intended_uses: ''
+  license: MIT
+  modality:
+    explanation: text input with audio output
+    value: text; audio
   model_card: https://github.com/suno-ai/bark/blob/main/model-card.md
-  modality: text input with audio output
-  analysis: ''
+  monitoring: ''
+  name: Bark
+  organization: Suno
+  prohibited_uses: ''
+  quality_control: ''
   size: ''
-  dependencies: [AudioLM]
   training_emissions: unknown
-  training_time: unknown
   training_hardware: ''
-  quality_control: ''
-  access: open
-  license: MIT
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: https://huggingface.co/spaces/suno/bark/discussions
+  training_time: unknown
+  type: model
+  url: https://github.com/suno-ai/bark
diff --git a/assets/together.yaml b/assets/together.yaml
index 2bb3cb97..1ec0ab7c 100644
--- a/assets/together.yaml
+++ b/assets/together.yaml
@@ -1,134 +1,134 @@
 ---
-
-- type: model
-  name: GPT-JT
-  organization: Together
-  description: ''
-  created_date: 2022-11-29
-  url: https://www.together.xyz/blog/releasing-v1-of-gpt-jt-powered-by-open-source-ai
-  model_card: ''
-  modality: text
+- access: open
   analysis: ''
-  size: 6B parameters (dense model)
+  created_date: 2022-11-29
   dependencies: [GPT-J, P3, NaturalInstructions-v2]
+  description: ''
+  feedback: ''
+  intended_uses: ''
+  license: Apache 2.0
+  modality:
+    explanation: text
+    value: text; text
+  model_card: ''
+  monitoring: ''
+  name: GPT-JT
+  organization: Together
+  prohibited_uses: ''
+  quality_control: ''
+  size: 6B parameters (dense)
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: open
-  license: Apache 2.0
+  training_time: ''
+  type: model
+  url: https://www.together.xyz/blog/releasing-v1-of-gpt-jt-powered-by-open-source-ai
+- access: open
+  analysis: ''
+  created_date: 2023-03-10
+  dependencies: [GPT-NeoX, OIG-43M]
+  description: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: Apache 2.0
+  modality:
+    explanation: text
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
   name: GPT-NeoXT-Chat-Base
   organization: Together
-  description: ''
-  created_date: 2023-03-10
-  url: https://www.together.xyz/blog/openchatkit
-  model_card: ''
-  modality: text
-  analysis: ''
-  size: 20B parameters (dense model)
-  dependencies: [GPT-NeoX, OIG-43M]
+  prohibited_uses: ''
+  quality_control: ''
+  size: 20B parameters (dense)
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: open
-  license: Apache 2.0
+  training_time: ''
+  type: model
+  url: https://www.together.xyz/blog/openchatkit
+- access: open
+  analysis: ''
+  created_date: 2023-03-10
+  dependencies: [GPT-JT, OIG-moderation]
+  description: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: Apache 2.0
+  modality:
+    explanation: text
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: model
   name: OpenChatKit moderation model
   organization: Together
-  description: ''
-  created_date: 2023-03-10
-  url: https://www.together.xyz/blog/openchatkit
-  model_card: ''
-  modality: text
-  analysis: ''
-  size: 6B parameters (dense model)
-  dependencies: [GPT-JT, OIG-moderation]
+  prohibited_uses: ''
+  quality_control: ''
+  size: 6B parameters (dense)
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access: open
-  license: Apache 2.0
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
-
-- type: dataset
-  name: OIG-43M
-  organization: Together, LAION, Ontocord
-  description: ''
+  training_time: ''
+  type: model
+  url: https://www.together.xyz/blog/openchatkit
+- access: open
+  analysis: ''
   created_date: 2023-03-10
-  url: https://laion.ai/blog/oig-dataset/
   datasheet: ''
-  modality: text
-  size: 43M instructions
-  sample: []
-  analysis: ''
   dependencies: [P3, NaturalInstructions-v2, FLAN dataset]
-  included: ''
+  description: ''
   excluded: ''
-  quality_control: ''
-  access: open
-  license: Apache 2.0
+  feedback: ''
+  included: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: Apache 2.0
+  modality: text
   monitoring: ''
-  feedback: ''
-
-- type: dataset
-  name: OIG-moderation
+  name: OIG-43M
   organization: Together, LAION, Ontocord
-  description: ''
-  created_date: 2023-03-10
-  url: https://laion.ai/blog/oig-dataset/
-  datasheet: ''
-  modality: text
-  size: unknown
+  prohibited_uses: ''
+  quality_control: ''
   sample: []
+  size: 43M instructions
+  type: dataset
+  url: https://laion.ai/blog/oig-dataset/
+- access: open
   analysis: ''
+  created_date: 2023-03-10
+  datasheet: ''
   dependencies: []
-  included: ''
+  description: ''
   excluded: ''
-  quality_control: ''
-  access: open
-  license: Apache 2.0
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
   feedback: ''
-
-- type: dataset
-  name: RedPajama-Data
-  organization: Together
-  description: The RedPajama base dataset is a 1.2 trillion token fully-open dataset
-    created by following the recipe described in the LLaMA paper
-  created_date: 2022-04-17
-  url: https://www.together.xyz/blog/redpajama
-  datasheet: https://huggingface.co/datasets/togethercomputer/RedPajama-Data-1T
+  included: ''
+  intended_uses: ''
+  license: Apache 2.0
   modality: text
-  size: 1.2 trillion tokens
+  monitoring: ''
+  name: OIG-moderation
+  organization: Together, LAION, Ontocord
+  prohibited_uses: ''
+  quality_control: ''
   sample: []
+  size: unknown
+  type: dataset
+  url: https://laion.ai/blog/oig-dataset/
+- access: open
   analysis: ''
+  created_date: 2022-04-17
+  datasheet: https://huggingface.co/datasets/togethercomputer/RedPajama-Data-1T
   dependencies: [GitHub, Wikipedia]
-  included: ''
+  description: The RedPajama base dataset is a 1.2 trillion token fully-open dataset
+    created by following the recipe described in the LLaMA paper
   excluded: ''
-  quality_control: ''
-  access: open
-  license: Apache 2.0
+  feedback: ''
+  included: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: Apache 2.0
+  modality: text
   monitoring: ''
-  feedback: ''
+  name: RedPajama-Data
+  organization: Together
+  prohibited_uses: ''
+  quality_control: ''
+  sample: []
+  size: 1.2 trillion tokens
+  type: dataset
+  url: https://www.together.xyz/blog/redpajama
diff --git a/assets/transformify.yaml b/assets/transformify.yaml
index 118b559f..59a59c34 100644
--- a/assets/transformify.yaml
+++ b/assets/transformify.yaml
@@ -1,23 +1,22 @@
 ---
-
-- type: application
-  name: Transformify Automate
-  organization: Transformify
-  description: Transformify Automate is a platform for automated task integration
-    using natural language prompts.
+- access: open
+  adaptation: ''
   created_date: 2023-05-30
-  url: https://www.transformify.ai/automate
   dependencies: [GPT-4]
-  adaptation: ''
-  output_space: text and code
-  quality_control: ''
-  access: open
-  license: ''
-  terms_of_service: https://www.transformify.ai/legal-stuff
+  description: Transformify Automate is a platform for automated task integration
+    using natural language prompts.
+  failures: ''
+  feedback: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: ''
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
+  name: Transformify Automate
+  organization: Transformify
+  output_space: text and code
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: https://www.transformify.ai/legal-stuff
+  type: application
+  url: https://www.transformify.ai/automate
   user_distribution: ''
-  failures: ''
diff --git a/assets/trevor.yaml b/assets/trevor.yaml
index 9c21533d..02126721 100644
--- a/assets/trevor.yaml
+++ b/assets/trevor.yaml
@@ -1,50 +1,38 @@
 ---
-
-- type: application
-  name: Crisis Contact Simulator
-  # General
-  organization: The Trevor Project
-  description: >
-    Crisis Contact Simulator, developed as part of a collaboration with
-    Google.org, helps train The Trevor Project counselors by
-    mimicking to be a teen in crisis. Crisis Contact Simulator is used
-    as part of the training programs for the Trevor Project's 24/7 digital
-    crisis services that supports LGBTQ youth
-    [[Trevor Project Blog]](https://www.thetrevorproject.org/blog/the-trevor-project-launches-new-ai-tool-to-support-crisis-counselor-training/).
+- access:
+    explanation: "The tool can only be accessed by the Trevor Project will be counsolors\
+      \ for training purposes.\n"
+    value: closed
+  adaptation:
+    explanation: "The base GPT-2 model was fine-tuned on the previous conversations\
+      \ between new trainees and the Trevor Project counselors posing to be a fictional\
+      \ persona in crisis.\n"
+    value: Fine-tuning
   created_date:
+    explanation: "The release date of the blog post introducing Crisis Contact Simulator\
+      \ [[Trevor Project Blog]](https://www.thetrevorproject.org/blog/the-trevor-project-launches-new-ai-tool-to-support-crisis-counselor-training/).\n"
     value: 2021-03-24
-    explanation: >
-      The release date of the blog post introducing Crisis Contact Simulator
-      [[Trevor Project Blog]](https://www.thetrevorproject.org/blog/the-trevor-project-launches-new-ai-tool-to-support-crisis-counselor-training/).
-  url: https://www.thetrevorproject.org/
-  # Construction
   dependencies: [OpenAI API]
-  adaptation:
-    value: Fine-tuning
-    explanation: >
-      The base GPT-2 model was fine-tuned on the previous conversations between
-      new trainees and the Trevor Project counselors posing to be a fictional
-      persona in crisis.
+  description: "Crisis Contact Simulator, developed as part of a collaboration with\
+    \ Google.org, helps train The Trevor Project counselors by mimicking to be a\
+    \ teen in crisis. Crisis Contact Simulator is used as part of the training programs\
+    \ for the Trevor Project's 24/7 digital crisis services that supports LGBTQ\
+    \ youth [[Trevor Project Blog]](https://www.thetrevorproject.org/blog/the-trevor-project-launches-new-ai-tool-to-support-crisis-counselor-training/).\n"
+  failures: unknown
+  feedback: unknown
+  intended_uses: "Training counselors\n"
+  license: unknown
+  monitoring: unknown
+  monthly_active_users: unknown
+  name: Crisis Contact Simulator
+  organization: The Trevor Project
   output_space: Dialogue
+  prohibited_uses: unknown
   quality_control:
+    explanation: "The tool can only be accessed by the Trevor Project will be counsolors\
+      \ for training purposes.\n"
     value: Limited release
-    explanation: >
-      The tool can only be accessed by the Trevor Project will be counsolors
-      for training purposes.
-  # Downstream
-  access:
-    value: closed
-    explanation: >
-      The tool can only be accessed by the Trevor Project will be counsolors
-      for training purposes.
-  license: unknown
   terms_of_service: unknown
-  intended_uses: >
-    Training counselors
-  prohibited_uses: unknown
-  monitoring: unknown
-  feedback: unknown
-  # Deployment
-  monthly_active_users: unknown
+  type: application
+  url: https://www.thetrevorproject.org/
   user_distribution: unknown
-  failures: unknown
diff --git a/assets/tsinghua.yaml b/assets/tsinghua.yaml
index 38fbaa3a..c8b21ee6 100644
--- a/assets/tsinghua.yaml
+++ b/assets/tsinghua.yaml
@@ -1,148 +1,149 @@
 ---
-
-# models
-- type: model
-  name: CodeGeeX
-  organization: Tsinghua
-  description: CodeGeeX is an autoregressive language model trained on code
-  created_date: 2022-09-20
-  url: https://github.com/THUDM/CodeGeeX
-  model_card: none
-  modality: code
+- access:
+    explanation: Model weights are available but gated via an [[application form]](https://models.aminer.cn/codegeex/download/request)
+    value: limited
   analysis: none
-  size: 13B parameters (dense model)
+  created_date: 2022-09-20
   dependencies: []
-  training_emissions: unknown
-  training_time: unknown
-  training_hardware: THUDM 1536 Ascend 910 (32GB) Cluster
-  quality_control: none
-  access:
-    value: limited
-    explanation: Model weights are available but gated via an [[application form]](https://models.aminer.cn/codegeex/download/request)
+  description: CodeGeeX is an autoregressive language model trained on code
+  feedback: none
+  intended_uses: none
   license:
-    value: Apache 2.0
     explanation: The license is provided in the [[Github repository]](https://github.com/THUDM/CodeGeeX)
-  intended_uses: none
-  prohibited_uses: none
+    value: Apache 2.0
+  modality:
+    explanation: code
+    value: code; code
+  model_card: none
   monitoring: none
-  feedback: none
-
-- type: model
-  name: CogView
+  name: CodeGeeX
   organization: Tsinghua
-  description: CogView is a transformer model for text-to-image generation
+  prohibited_uses: none
+  quality_control: none
+  size: 13B parameters (dense)
+  training_emissions: unknown
+  training_hardware: THUDM 1536 Ascend 910 (32GB) Cluster
+  training_time: unknown
+  type: model
+  url: https://github.com/THUDM/CodeGeeX
+- access:
+    explanation: Model checkpoints available from [[Wudao-Wenhui]](https://resource.wudaoai.cn/home?ind=2&name=WuDao%20WenHui&id=1399364355975327744)
+    value: open
+  analysis: ''
   created_date:
-    value: 2021-05-26
     explanation: The date the model paper was released
-  url: https://arxiv.org/abs/2105.13290
-  model_card: none
-  modality: text (Chinese) and image
-  size: 4B parameters
-  analysis: ''
+    value: 2021-05-26
   dependencies: []
+  description: CogView is a transformer model for text-to-image generation
+  feedback: ''
+  intended_uses: ''
+  license:
+    explanation: "The license is provided in the [[Github repository]](https://github.com/THUDM/CogView)\n"
+    value: Apache 2.0
+  modality:
+    explanation: text (Chinese) and image
+    value: image, text; image, text
+  model_card: none
+  monitoring: ''
+  name: CogView
+  organization: Tsinghua
+  prohibited_uses: ''
+  quality_control: ''
+  size: 4B parameters (dense)
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access:
+  training_time: ''
+  type: model
+  url: https://arxiv.org/abs/2105.13290
+- access:
+    explanation: The model checkpoints are available for download from [[BAAI]](https://model.baai.ac.cn/model-detail/100041)
     value: open
-    explanation: Model checkpoints available from [[Wudao-Wenhui]](https://resource.wudaoai.cn/home?ind=2&name=WuDao%20WenHui&id=1399364355975327744)
+  analysis: ''
+  created_date:
+    explanation: The date the model paper was released
+    value: 2022-04-28
+  dependencies: []
+  description: CogView 2 is a hierarchical transformer for text-to-image generation
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The license is provided in the [[Github repository]](https://github.com/THUDM/CogView2)\n"
     value: Apache 2.0
-    explanation: >
-      The license is provided in the [[Github repository]](https://github.com/THUDM/CogView)
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: text (Chinese, English) and image
+    value: image, text; image, text
+  model_card: none
   monitoring: ''
-  feedback: ''
-
-- type: model
   name: CogView 2
   organization: Tsinghua
-  description: CogView 2 is a hierarchical transformer for text-to-image generation
-  created_date:
-    value: 2022-04-28
-    explanation: The date the model paper was released
-  url: https://arxiv.org/abs/2204.14217
-  model_card: none
-  modality: text (Chinese, English) and image
-  size: 6B parameters
-  analysis: ''
-  dependencies: []
+  prohibited_uses: ''
+  quality_control: ''
+  size: 6B parameters (dense)
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access:
+  training_time: ''
+  type: model
+  url: https://arxiv.org/abs/2204.14217
+- access:
+    explanation: Model checkpoints are available for download from https://github.com/THUDM/CogVideo
     value: open
-    explanation: The model checkpoints are available for download from [[BAAI]](https://model.baai.ac.cn/model-detail/100041)
+  analysis: ''
+  created_date:
+    explanation: The date the model paper was released
+    value: 2022-05-29
+  dependencies: []
+  description: CogVideo is a transformer model for text-to-video generation
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The license is provided in the [[Github repository]](https://github.com/THUDM/CogVideo)\n"
     value: Apache 2.0
-    explanation: >
-      The license is provided in the [[Github repository]](https://github.com/THUDM/CogView2)
-  intended_uses: ''
-  prohibited_uses: ''
+  modality:
+    explanation: text (Chinese) and video
+    value: text, video; text, video
+  model_card: none
   monitoring: ''
-  feedback: ''
-
-- type: model
   name: CogVideo
   organization: Tsinghua
-  description: CogVideo is a transformer model for text-to-video generation
-  created_date:
-    value: 2022-05-29
-    explanation: The date the model paper was released
-  url: https://arxiv.org/abs/2205.15868
-  model_card: none
-  modality: text (Chinese) and video
+  prohibited_uses: ''
+  quality_control: ''
   size: unknown
-  analysis: ''
-  dependencies: []
   training_emissions: ''
-  training_time: ''
   training_hardware: ''
-  quality_control: ''
-  access:
+  training_time: ''
+  type: model
+  url: https://arxiv.org/abs/2205.15868
+- access:
+    explanation: Model checkpoints are available from the [[GitHub repository]](https://github.com/THUDM/GLM-130B/blob/main/MODEL_LICENSE)
     value: open
-    explanation: Model checkpoints are available for download from https://github.com/THUDM/CogVideo
-  license:
-    value: Apache 2.0
-    explanation: >
-      The license is provided in the [[Github repository]](https://github.com/THUDM/CogVideo)
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
-
-- type: model
-  name: GLM-130B
-  organization: Tsinghua
-  description: GLM-130B is a bidirectional language model trained on English and
-    Chinese
+  analysis: ''
   created_date:
-    value: 2022-08-04
     explanation: The date the model website was made public
-  url: https://keg.cs.tsinghua.edu.cn/glm-130b/
-  model_card: none
-  modality: text (Chinese | English)
-  size: 130B parameters (dense model)
-  analysis: ''
+    value: 2022-08-04
   dependencies:
     - The Pile
     - GLM-130B Chinese corpora
     - P3
     - DeepStruct finetuning dataset
-  training_emissions: ''
-  training_time: ''
-  training_hardware: THUDM 96 DGX-A100 (40G) cluster
-  quality_control: ''
-  access:
-    value: open
-    explanation: Model checkpoints are available from the [[GitHub repository]](https://github.com/THUDM/GLM-130B/blob/main/MODEL_LICENSE)
+  description: GLM-130B is a bidirectional language model trained on English and
+    Chinese
+  feedback: ''
+  intended_uses: ''
   license:
-    value: GLM-130B License
     explanation: Unique model license. See the [[GitHub repository]](https://github.com/THUDM/GLM-130B/blob/main/MODEL_LICENSE)
-  intended_uses: ''
-  prohibited_uses: ''
+    value: GLM-130B License
+  modality:
+    explanation: text (Chinese | English)
+    value: text; text
+  model_card: none
   monitoring: ''
-  feedback: ''
+  name: GLM-130B
+  organization: Tsinghua
+  prohibited_uses: ''
+  quality_control: ''
+  size: 130B parameters (dense)
+  training_emissions: ''
+  training_hardware: THUDM 96 DGX-A100 (40G) cluster
+  training_time: ''
+  type: model
+  url: https://keg.cs.tsinghua.edu.cn/glm-130b/
diff --git a/assets/uae.yaml b/assets/uae.yaml
index 127d82d0..9ed62419 100644
--- a/assets/uae.yaml
+++ b/assets/uae.yaml
@@ -1,50 +1,50 @@
 ---
-
-- type: model
-  name: Falcon
-  organization: UAE Technology Innovation Institute
-  description: Falcon-40B is a 40B parameters causal decoder-only model built by
-    TII and trained on 1,000B tokens of RefinedWeb enhanced with curated corpora.
-  created_date: 2023-06-14
-  url: https://huggingface.co/tiiuae/falcon-40b
-  model_card: https://huggingface.co/tiiuae/falcon-40b
-  modality: natural language text
+- access: open
   analysis: ''
-  size: 40B parameters
+  created_date: 2023-06-14
   dependencies: [RefinedWeb]
-  training_emissions: ''
-  training_time: 2 months
-  training_hardware: 384 A100 40GB GPUs
-  quality_control: ''
-  access: open
-  license: Apache 2.0
+  description: Falcon-40B is a 40B parameters causal decoder-only model built by
+    TII and trained on 1,000B tokens of RefinedWeb enhanced with curated corpora.
+  feedback: https://huggingface.co/tiiuae/falcon-40b/discussions
   intended_uses: ''
-  prohibited_uses: irresponsible or harmful use or production use without adequate
-    assessment of risks and mitigation
+  license: Apache 2.0
+  modality:
+    explanation: natural language text
+    value: text; text
+  model_card: https://huggingface.co/tiiuae/falcon-40b
   monitoring: None
-  feedback: https://huggingface.co/tiiuae/falcon-40b/discussions
-
-- type: dataset
-  name: RefinedWeb
+  name: Falcon
   organization: UAE Technology Innovation Institute
-  description: RefinedWeb is a high-quality five trillion tokens web-only English
-    pretraining dataset.
+  prohibited_uses: irresponsible or harmful use or production use without adequate
+    assessment of risks and mitigation
+  quality_control: ''
+  size: 40B parameters (dense)
+  training_emissions: ''
+  training_hardware: 384 A100 40GB GPUs
+  training_time: 2 months
+  type: model
+  url: https://huggingface.co/tiiuae/falcon-40b
+- access: open
+  analysis: ''
   created_date: 2023-06-01
-  url: https://arxiv.org/pdf/2306.01116.pdf
   datasheet: https://huggingface.co/datasets/tiiuae/falcon-refinedweb
-  modality: text
-  size: 600B tokens
-  sample: []
-  analysis: ''
   dependencies: []
-  included: ''
+  description: RefinedWeb is a high-quality five trillion tokens web-only English
+    pretraining dataset.
   excluded: ''
-  quality_control: ''
-  access: open
+  feedback: ''
+  included: ''
+  intended_uses: ''
   license:
-    value: unknown
     explanation: License can be found at https://huggingface.co/datasets/tiiuae/falcon-refinedweb
-  intended_uses: ''
-  prohibited_uses: ''
+    value: unknown
+  modality: text
   monitoring: ''
-  feedback: ''
+  name: RefinedWeb
+  organization: UAE Technology Innovation Institute
+  prohibited_uses: ''
+  quality_control: ''
+  sample: []
+  size: 600B tokens
+  type: dataset
+  url: https://arxiv.org/pdf/2306.01116.pdf
diff --git a/assets/uw.yaml b/assets/uw.yaml
index 29a49c7d..15d2832c 100644
--- a/assets/uw.yaml
+++ b/assets/uw.yaml
@@ -1,23 +1,22 @@
 ---
-
-- type: dataset
-  name: YT-Temporal-1B
-  organization: University of Washington
-  description: ''
+- access: open
+  analysis: ''
   created_date: 2022-01-07
-  url: https://arxiv.org/abs/2201.02639
   datasheet: ''
-  modality: video
-  size: 20M videos
-  sample: []
-  analysis: ''
   dependencies: [YouTube]
-  included: ''
+  description: ''
   excluded: ''
-  quality_control: ''
-  access: open
-  license: MIT
+  feedback: ''
+  included: ''
   intended_uses: ''
-  prohibited_uses: ''
+  license: MIT
+  modality: video
   monitoring: ''
-  feedback: ''
+  name: YT-Temporal-1B
+  organization: University of Washington
+  prohibited_uses: ''
+  quality_control: ''
+  sample: []
+  size: 20M videos
+  type: dataset
+  url: https://arxiv.org/abs/2201.02639
diff --git a/assets/uwashington.yaml b/assets/uwashington.yaml
index 32208f00..aded82ab 100644
--- a/assets/uwashington.yaml
+++ b/assets/uwashington.yaml
@@ -1,26 +1,27 @@
 ---
-
-- type: model
-  name: Guanaco
-  organization: University of Washington
+- access: open
+  analysis: Reports results on the Vicuna benchmark and compares performance level
+    and time expenditure with ChatGPT
+  created_date: 2023-05-23
+  dependencies: [QLoRA, OASST1]
   description: Guanaco is a model family trained with QLORA, an efficient finetuning
     approach that reduces memory usage enough to finetune a 65B parameter model
     on a single 48GB GPU while preserving full 16-bit finetuning task performance.
-  created_date: 2023-05-23
-  url: https://arxiv.org/pdf/2305.14314v1.pdf
+  feedback: ''
+  intended_uses: ''
+  license: MIT
+  modality:
+    explanation: natural language text
+    value: text; text
   model_card: ''
-  modality: natural language text
-  analysis: Reports results on the Vicuna benchmark and compares performance level
-    and time expenditure with ChatGPT
-  size: 33B parameters
-  dependencies: [QLoRA, OASST1]
+  monitoring: ''
+  name: Guanaco
+  organization: University of Washington
+  prohibited_uses: ''
+  quality_control: ''
+  size: 33B parameters (dense)
   training_emissions: ''
-  training_time: ''
   training_hardware: A single 24 GB GPU
-  quality_control: ''
-  access: open
-  license: MIT
-  intended_uses: ''
-  prohibited_uses: ''
-  monitoring: ''
-  feedback: ''
+  training_time: ''
+  type: model
+  url: https://arxiv.org/pdf/2305.14314v1.pdf
diff --git a/assets/viable.yaml b/assets/viable.yaml
index c29dce6e..35ab68cd 100644
--- a/assets/viable.yaml
+++ b/assets/viable.yaml
@@ -1,38 +1,28 @@
 ---
-
-- type: application
-  name: Viable
-  # General
-  organization: Viable
-  description: >
-    Viable analyzes qualitative consumer feedback and provides summary feedback
-    to companies.
+- access: limited
+  adaptation: unknown
   created_date: unknown
-  url: https://www.askviable.com/
-  # Construction
   dependencies: [OpenAI API]
-  adaptation: unknown
-  output_space: Question and answer, summarization, sentiment analysis, topic identification
-  quality_control: unknown
-  # Downstream
-  access: limited
-  license: unknown
-  terms_of_service: https://www.askviable.com/terms-of-service
-  intended_uses: >
-    Intended to be used by companies to digest qualitative consumer feedback.
-  prohibited_uses: >
-    Prohibited uses are listed in the Terms of Service
-    [[Terms of Service]](https://www.askviable.com/terms-of-service).
-    The terms don't include statements specific to the use of the content
-    generated by the system or GPT-3.
-  monitoring: unknown
+  description: "Viable analyzes qualitative consumer feedback and provides summary\
+    \ feedback to companies.\n"
+  failures: unknown
   feedback:
+    explanation: "The feedback mechanism isn't explicitly listed. Company contact\
+      \ information is provided in the Terms of Service [[Terms of Service]](https://www.askviable.com/terms-of-service).\n"
     value: unknown
-    explanation: >
-      The feedback mechanism isn't explicitly listed. Company contact
-      information is provided in the Terms of Service
-      [[Terms of Service]](https://www.askviable.com/terms-of-service).
-  # Deployment
+  intended_uses: "Intended to be used by companies to digest qualitative consumer\
+    \ feedback.\n"
+  license: unknown
+  monitoring: unknown
   monthly_active_users: unknown
+  name: Viable
+  organization: Viable
+  output_space: Question and answer, summarization, sentiment analysis, topic identification
+  prohibited_uses: "Prohibited uses are listed in the Terms of Service [[Terms of\
+    \ Service]](https://www.askviable.com/terms-of-service). The terms don't include\
+    \ statements specific to the use of the content generated by the system or GPT-3.\n"
+  quality_control: unknown
+  terms_of_service: https://www.askviable.com/terms-of-service
+  type: application
+  url: https://www.askviable.com/
   user_distribution: unknown
-  failures: unknown
diff --git a/assets/writer.yaml b/assets/writer.yaml
index a56591b9..9879f42f 100644
--- a/assets/writer.yaml
+++ b/assets/writer.yaml
@@ -1,53 +1,55 @@
 ---
-
-- type: model
-  name: Palmyra
-  organization: Writer
-  description: Palmyra is a privacy-first LLM for enterprises trained on business
-    and marketing writing.
+- access: open
+  analysis: Evaluated on the SuperGLUE benchmark
   created_date:
-    value: 2023-01-01
     explanation: The model was stated to be published in January, but which day
       is not specified on the website.
-  url: https://gpt3demo.com/apps/palmyra
-  model_card: https://huggingface.co/Writer/palmyra-base
-  modality: text
-  analysis: Evaluated on the SuperGLUE benchmark
-  size: 128M (Small), 5B (Base), 20B (Large) parameters
+    value: 2023-01-01
   dependencies: [Writer dataset]
-  training_emissions: unknown
-  training_time: unknown
-  training_hardware: ''
-  quality_control: ''
-  access: open
-  license: Apache 2.0
+  description: Palmyra is a privacy-first LLM for enterprises trained on business
+    and marketing writing.
+  feedback: https://huggingface.co/Writer/palmyra-base/discussions
   intended_uses: generating text from a prompt
-  prohibited_uses: ''
+  license: Apache 2.0
+  modality:
+    explanation: text
+    value: text; text
+  model_card: https://huggingface.co/Writer/palmyra-base
   monitoring: ''
-  feedback: https://huggingface.co/Writer/palmyra-base/discussions
-
-- type: model
-  name: Camel
+  name: Palmyra
   organization: Writer
-  description: Camel is an instruction-following large language model tailored for
-    advanced NLP and comprehension capabilities.
+  prohibited_uses: ''
+  quality_control: ''
+  size: 128M parameters (dense)
+  training_emissions: unknown
+  training_hardware: ''
+  training_time: unknown
+  type: model
+  url: https://gpt3demo.com/apps/palmyra
+- access: open
+  analysis: ''
   created_date:
-    value: 2023-04-01
     explanation: The model was stated to be published in April, but which day is
       not specified on the website.
-  url: https://chatcamel.vercel.app/
-  model_card: https://huggingface.co/Writer/camel-5b-hf
-  modality: text
-  analysis: ''
-  size: 5B parameters
+    value: 2023-04-01
   dependencies: [Palmyra, Camel dataset]
-  training_emissions: unknown
-  training_time: unknown
-  training_hardware: ''
-  quality_control: ''
-  access: open
-  license: Apache 2.0
+  description: Camel is an instruction-following large language model tailored for
+    advanced NLP and comprehension capabilities.
+  feedback: https://huggingface.co/Writer/camel-5b-hf/discussions
   intended_uses: ''
-  prohibited_uses: ''
+  license: Apache 2.0
+  modality:
+    explanation: text
+    value: text; text
+  model_card: https://huggingface.co/Writer/camel-5b-hf
   monitoring: ''
-  feedback: https://huggingface.co/Writer/camel-5b-hf/discussions
+  name: Camel
+  organization: Writer
+  prohibited_uses: ''
+  quality_control: ''
+  size: 5B parameters (dense)
+  training_emissions: unknown
+  training_hardware: ''
+  training_time: unknown
+  type: model
+  url: https://chatcamel.vercel.app/
diff --git a/assets/yandex.yaml b/assets/yandex.yaml
index becdb8e5..0090546b 100644
--- a/assets/yandex.yaml
+++ b/assets/yandex.yaml
@@ -1,63 +1,59 @@
 ---
-
-- type: model
-  name: YaLM
-  organization: Yandex
-  description: YaLM is a 100B parameter autoregressive model trained on 25% English
-    and 75% Russian text.
+- access: open
+  analysis: ''
   created_date:
-    value: 2022-06-22
     explanation: The date the blog post about YaLM was published.
-  url: https://medium.com/yandex/yandex-publishes-yalm-100b-its-the-largest-gpt-like-neural-network-in-open-source-d1df53d0e9a6
-  model_card: ''
-  modality: text (English, Russian)
-  analysis: ''
-  size: 100B parameters (dense model)
+    value: 2022-06-22
   dependencies: [The Pile, Yandex Russian Pretraining Dataset]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: Yandex 800 A100 Cluster
-  quality_control: ''
-  access: open
+  description: YaLM is a 100B parameter autoregressive model trained on 25% English
+    and 75% Russian text.
+  feedback: ''
+  intended_uses: ''
   license:
-    value: Apache 2.0
     explanation: ''
-  intended_uses: ''
-  prohibited_uses: ''
+    value: Apache 2.0
+  modality:
+    explanation: text (English, Russian)
+    value: text; text
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: application
-  name: Yandex Search
+  name: YaLM
   organization: Yandex
-  description: Yandex is a search engine and web portal. Yandex offers internet
-    search and other services
-  created_date: 2022-06-23
+  prohibited_uses: ''
+  quality_control: ''
+  size: 100B parameters (dense)
+  training_emissions: ''
+  training_hardware: Yandex 800 A100 Cluster
+  training_time: ''
+  type: model
   url: https://medium.com/yandex/yandex-publishes-yalm-100b-its-the-largest-gpt-like-neural-network-in-open-source-d1df53d0e9a6
-  dependencies: [YaLM]
+- access: open
   adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: open
+  created_date: 2022-06-23
+  dependencies: [YaLM]
+  description: Yandex is a search engine and web portal. Yandex offers internet
+    search and other services
+  failures: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "\"3.1. The Rightholder, on the basis of a simple (non-exclusive)\
+      \ license, shall grant the User a free, worldwide, non-transferable right\
+      \ to use the Software as set forth below:\n3.1.1. To use the Software in accordance\
+      \ with its functionality by copying and installing it on the User's personal\
+      \ device(s). The User shall be entitled to install the Software on any number\
+      \ of personal devices.\n3.1.2. To reproduce and to distribute this Software\
+      \ in an unmodified form and solely for non-commercial purposes (free of charge).\"\
+      \ - excerpt from the Terms of Service document for browsers.\n"
     value: custom
-    explanation: >
-      "3.1. The Rightholder, on the basis of a simple (non-exclusive) license,
-      shall grant the User a free, worldwide, non-transferable right to use the
-      Software as set forth below:
-
-      3.1.1. To use the Software in accordance with its functionality by
-      copying and installing it on the User's personal device(s). The User shall
-      be entitled to install the Software on any number of personal devices.
-
-      3.1.2. To reproduce and to distribute this Software in an unmodified form
-      and solely for non-commercial purposes (free of charge)." - excerpt
-      from the Terms of Service document for browsers.
-  terms_of_service: https://yandex.com/legal/browser_agreement/
-  intended_uses: ''
-  prohibited_uses: ''
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
+  name: Yandex Search
+  organization: Yandex
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: https://yandex.com/legal/browser_agreement/
+  type: application
+  url: https://medium.com/yandex/yandex-publishes-yalm-100b-its-the-largest-gpt-like-neural-network-in-open-source-d1df53d0e9a6
   user_distribution: ''
-  failures: ''
diff --git a/assets/you.yaml b/assets/you.yaml
index 60bf2a22..f7a1ffe3 100644
--- a/assets/you.yaml
+++ b/assets/you.yaml
@@ -1,77 +1,71 @@
 ---
-
-- type: dataset
-  name: You dataset
-  organization: You
-  description: ''
+- access: closed
+  analysis: ''
   created_date: unknown
-  url: https://you.com/
   datasheet: ''
-  modality: text
-  size: unknown
-  sample: []
-  analysis: ''
   dependencies: []
-  included: ''
+  description: ''
   excluded: ''
-  quality_control: ''
-  access: closed
+  feedback: ''
+  included: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality: {}
   monitoring: ''
-  feedback: ''
-
-- type: model
-  name: You model
+  name: You dataset
   organization: You
-  description: ''
-  created_date: unknown
+  prohibited_uses: ''
+  quality_control: ''
+  sample: []
+  size: unknown
+  type: dataset
   url: https://you.com/
-  model_card: ''
-  modality: Text
+- access: closed
   analysis: ''
-  size: unknown
+  created_date: unknown
   dependencies: [You dataset]
-  training_emissions: ''
-  training_time: ''
-  training_hardware: ''
-  quality_control: ''
-  access: closed
+  description: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  intended_uses: ''
-  prohibited_uses: ''
+  modality: {}
+  model_card: ''
   monitoring: ''
-  feedback: ''
-
-- type: application
-  name: You Search
+  name: You model
   organization: You
+  prohibited_uses: ''
+  quality_control: ''
+  size: ''
+  training_emissions: ''
+  training_hardware: ''
+  training_time: ''
+  type: model
+  url: https://you.com/
+- access: open
+  adaptation: ''
+  created_date: unknown
+  dependencies: [You model]
   description: You.com is a search engine built on artificial intelligence that
     provides users with a customized search experience while keeping their data
     100% private.
-  created_date: unknown
-  url: https://you.com/
-  dependencies: [You model]
-  adaptation: ''
-  output_space: ''
-  quality_control: ''
-  access: open
+  failures: ''
+  feedback: ''
+  intended_uses: ''
   license:
+    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-    explanation: >
-      The asset isn't released, and hence the license is unknown.
-  terms_of_service: ''
-  intended_uses: ''
-  prohibited_uses: ''
   monitoring: ''
-  feedback: ''
   monthly_active_users: ''
+  name: You Search
+  organization: You
+  output_space: ''
+  prohibited_uses: ''
+  quality_control: ''
+  terms_of_service: ''
+  type: application
+  url: https://you.com/
   user_distribution: ''
-  failures: ''

From 5c92804cd669f8cf6ed378e5c7e4c1d54460bdb2 Mon Sep 17 00:00:00 2001
From: Rishi Bommasani <rishibommasani@gmail.com>
Date: Wed, 2 Aug 2023 17:31:39 -0700
Subject: [PATCH 10/10] Revert "standardize features"

This reverts commit 5d19360ee3129f3123fc3586081df8fb34df674d.
---
 assets/adept.yaml        |   44 +-
 assets/ai2.yaml          |  130 +-
 assets/ai21.yaml         |  472 ++++---
 assets/aleph_alpha.yaml  |  180 +--
 assets/alibaba.yaml      |   69 +-
 assets/amazon.yaml       |   35 +-
 assets/anthropic.yaml    |  790 ++++++-----
 assets/argonne.yaml      |   37 +-
 assets/assembly.yaml     |  167 +--
 assets/autogpt.yaml      |   31 +-
 assets/baai.yaml         |   95 +-
 assets/baidu.yaml        |  137 +-
 assets/bain.yaml         |   31 +-
 assets/be_my_eyes.yaml   |   31 +-
 assets/berkeley.yaml     |  111 +-
 assets/bigcode.yaml      |  115 +-
 assets/bigscience.yaml   |  318 ++---
 assets/bloomberg.yaml    |  225 +--
 assets/brex.yaml         |   38 +-
 assets/casia.yaml        |   39 +-
 assets/cerebras.yaml     |  121 +-
 assets/chatglm.yaml      |   37 +-
 assets/cmu.yaml          |   53 +-
 assets/cohere.yaml       |  937 ++++++++-----
 assets/columbia.yaml     |   35 +-
 assets/databricks.yaml   |  103 +-
 assets/deepmind.yaml     | 1202 +++++++++-------
 assets/duckduckgo.yaml   |   34 +-
 assets/duolingo.yaml     |  110 +-
 assets/eleutherai.yaml   |  460 ++++---
 assets/ens.yaml          |   82 +-
 assets/faraday.yaml      |   35 +-
 assets/google.yaml       | 2834 ++++++++++++++++++++------------------
 assets/h2oai.yaml        |   37 +-
 assets/hubspot.yaml      |   34 +-
 assets/huggingface.yaml  |   45 +-
 assets/inflection.yaml   |   68 +-
 assets/instacart.yaml    |   31 +-
 assets/juni.yaml         |   29 +-
 assets/kakaobrain.yaml   |   93 +-
 assets/khan.yaml         |   34 +-
 assets/laion.yaml        |  212 +--
 assets/latitude.yaml     |   58 +-
 assets/lehigh.yaml       |   43 +-
 assets/linkedin.yaml     |   31 +-
 assets/lmsys.yaml        |   37 +-
 assets/meta.yaml         | 1021 +++++++-------
 assets/microsoft.yaml    | 1204 ++++++++--------
 assets/mosaic.yaml       |   37 +-
 assets/nanyang.yaml      |   41 +-
 assets/naver.yaml        |   47 +-
 assets/neeva.yaml        |  114 +-
 assets/nextdoor.yaml     |   31 +-
 assets/nolano.yaml       |   31 +-
 assets/notion.yaml       |   31 +-
 assets/nvidia.yaml       |  147 +-
 assets/openai.yaml       | 2359 +++++++++++++++++--------------
 assets/othersideai.yaml  |   57 +-
 assets/oxford.yaml       |  191 +--
 assets/paladin.yaml      |   29 +-
 assets/perplexity.yaml   |   68 +-
 assets/portkey.yaml      |   31 +-
 assets/quizlet.yaml      |   33 +-
 assets/quora.yaml        |   37 +-
 assets/robin.yaml        |   33 +-
 assets/salesforce.yaml   |  143 +-
 assets/sana.yaml         |   77 +-
 assets/shanghai.yaml     |   72 +-
 assets/shop.yaml         |   31 +-
 assets/singapore.yaml    |   41 +-
 assets/snap.yaml         |   38 +-
 assets/speak.yaml        |   31 +-
 assets/spotify.yaml      |   41 +-
 assets/stability.yaml    |  101 ++
 assets/stanford.yaml     |  113 +-
 assets/stonybrook.yaml   |   95 +-
 assets/suno.yaml         |   35 +-
 assets/together.yaml     |  196 +--
 assets/transformify.yaml |   31 +-
 assets/trevor.yaml       |   70 +-
 assets/tsinghua.yaml     |  217 ++-
 assets/uae.yaml          |   74 +-
 assets/uw.yaml           |   31 +-
 assets/uwashington.yaml  |   39 +-
 assets/viable.yaml       |   52 +-
 assets/writer.yaml       |   80 +-
 assets/yandex.yaml       |   92 +-
 assets/you.yaml          |  100 +-
 88 files changed, 9305 insertions(+), 7927 deletions(-)

diff --git a/assets/adept.yaml b/assets/adept.yaml
index ecb2b1a7..1db1c261 100644
--- a/assets/adept.yaml
+++ b/assets/adept.yaml
@@ -1,30 +1,30 @@
 ---
-- access: closed
-  analysis: ''
-  created_date:
-    explanation: The date the model was announced in the [[Adept blog post]](https://www.adept.ai/blog/act-1).
-    value: 2022-09-14
-  dependencies: []
+
+- type: model
+  name: ACT-1
+  organization: Adept
   description: ACT-1 (ACtion Transformer) is a large-scale transformer model designed
     and trained specifically for taking actions on computers (use software tools,
     APIs and websites) in response to the user's natural language commands.
-  feedback: ''
-  intended_uses: ''
-  license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
-    value: unknown
-  modality:
-    explanation: text
-    value: text; text
+  created_date:
+    value: 2022-09-14
+    explanation: The date the model was announced in the [[Adept blog post]](https://www.adept.ai/blog/act-1).
+  url: https://www.adept.ai/blog/act-1
   model_card: none
-  monitoring: ''
-  name: ACT-1
-  organization: Adept
-  prohibited_uses: ''
-  quality_control: ''
+  modality: text
+  analysis: ''
   size: ''
+  dependencies: []
   training_emissions: unknown
-  training_hardware: unknown
   training_time: unknown
-  type: model
-  url: https://www.adept.ai/blog/act-1
+  training_hardware: unknown
+  quality_control: ''
+  access: closed
+  license:
+    value: unknown
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/ai2.yaml b/assets/ai2.yaml
index 300e1b3a..2c890050 100644
--- a/assets/ai2.yaml
+++ b/assets/ai2.yaml
@@ -1,95 +1,95 @@
 ---
-- access: open
-  analysis: ''
+
+- type: dataset
+  name: NaturalInstructions-v2
+  organization: AI2
+  description: ''
   created_date: 2022-04-16
+  url: https://arxiv.org/abs/2204.07705
   datasheet: ''
+  modality: text
+  size: 1600 tasks
+  sample: []
+  analysis: ''
   dependencies: []
-  description: ''
-  excluded: ''
-  feedback: ''
   included: ''
-  intended_uses: ''
+  excluded: ''
+  quality_control: ''
+  access: open
   license: Apache 2.0
-  modality: text
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: NaturalInstructions-v2
+  feedback: ''
+
+- type: dataset
+  name: SODA
   organization: AI2
-  prohibited_uses: ''
-  quality_control: ''
+  description: SODA is the first publicly available, million-scale, high-quality
+    dialogue dataset covering a wide range of social interactions.
+  created_date: 2023-05-24
+  url: https://arxiv.org/pdf/2212.10465.pdf
+  datasheet: https://huggingface.co/datasets/allenai/soda
+  modality: text
+  size: 1.5M dialogues
   sample: []
-  size: 1600 tasks
-  type: dataset
-  url: https://arxiv.org/abs/2204.07705
-- access: open
   analysis: Randomly sampled dialogues from dataset are evaluated according to six
     established criteria of natural flow, context dependence, topic consistency,
     speaker consistency, specificity, and overall.
-  created_date: 2023-05-24
-  datasheet: https://huggingface.co/datasets/allenai/soda
   dependencies: []
-  description: SODA is the first publicly available, million-scale, high-quality
-    dialogue dataset covering a wide range of social interactions.
-  excluded: ''
-  feedback: ''
   included: ''
-  intended_uses: ''
+  excluded: ''
+  quality_control: ''
+  access: open
   license: CC BY 4.0
-  modality: text
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: SODA
+  feedback: ''
+
+- type: dataset
+  name: Multimodal C4
   organization: AI2
-  prohibited_uses: ''
-  quality_control: ''
+  description: An augmentation of C4 with images added and made openly available.
+  created_date: 2023-06-09
+  url: https://arxiv.org/pdf/2304.06939.pdf
+  datasheet: ''
+  modality: text and images
+  size: 43B English tokens with 101.2M documents and 571M images
   sample: []
-  size: 1.5M dialogues
-  type: dataset
-  url: https://arxiv.org/pdf/2212.10465.pdf
-- access: open
   analysis: Conducted experiments on models trained with Multimodal C4 in comparison
     to models trained on single image/caption datasets
-  created_date: 2023-06-09
-  datasheet: ''
   dependencies: [C4]
-  description: An augmentation of C4 with images added and made openly available.
-  excluded: ''
-  feedback: ''
   included: ''
-  intended_uses: ''
+  excluded: ''
+  quality_control: ''
+  access: open
   license: MIT
-  modality:
-    explanation: text and images
-    value: image, text
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: Multimodal C4
+  feedback: ''
+
+- type: model
+  name: COSMO
   organization: AI2
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: 43B English tokens with 101.2M documents and 571M images
-  type: dataset
-  url: https://arxiv.org/pdf/2304.06939.pdf
-- access: open
-  analysis: Evaluated by human testers on generalization capabilities and responses
-    compared to other chatbots.
-  created_date: 2023-05-24
-  dependencies: [SODA, ProsocialDialog, T5]
   description: COSMO is a conversation agent with greater generalizability on both
     in- and out-of-domain chitchat datasets
-  feedback: https://huggingface.co/allenai/cosmo-xl/discussions
-  intended_uses: ''
-  license: ''
-  modality:
-    explanation: text
-    value: text; text
+  created_date: 2023-05-24
+  url: https://arxiv.org/pdf/2212.10465.pdf
   model_card: https://huggingface.co/allenai/cosmo-xl
-  monitoring: ''
-  name: COSMO
-  organization: AI2
-  prohibited_uses: ''
-  quality_control: ''
-  size: 11B parameters (dense)
+  modality: text
+  analysis: Evaluated by human testers on generalization capabilities and responses
+    compared to other chatbots.
+  size: 11B parameters
+  dependencies: [SODA, ProsocialDialog, T5]
   training_emissions: unknown
-  training_hardware: v3-128 TPU accelerators with batch size 256
   training_time: unknown
-  type: model
-  url: https://arxiv.org/pdf/2212.10465.pdf
+  training_hardware: v3-128 TPU accelerators with batch size 256
+  quality_control: ''
+  access: open
+  license: ''
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/allenai/cosmo-xl/discussions
diff --git a/assets/ai21.yaml b/assets/ai21.yaml
index 4a794e68..2da4e56c 100644
--- a/assets/ai21.yaml
+++ b/assets/ai21.yaml
@@ -1,319 +1,365 @@
 ---
-- access: closed
-  analysis:
-    explanation: No analysis is provided in the paper.
-    value: none
+
+# Datasets
+- type: dataset
+  name: Jurassic-1 dataset
+  # General
+  organization: AI21 Labs
+  description: >
+    The dataset used to train the Jurassic-1 models, based on publicly available
+    data.
   created_date:
-    explanation: The date the white paper was made public.
     value: 2021-08-11
+    explanation: The date the white paper was made public.
+  url: https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf
   datasheet: none
+  modality: text
+  size: 300B tokens
+  sample: []
+  analysis:
+    value: none
+    explanation: No analysis is provided in the paper.
+  # Construction
   dependencies: []
-  description: "The dataset used to train the Jurassic-1 models, based on publicly\
-    \ available data.\n"
-  excluded: unknown
-  feedback: none
-  included: unknown
-  intended_uses: unknown
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality: text
-  monitoring: none
-  name: Jurassic-1 dataset
-  organization: AI21 Labs
-  prohibited_uses:
-    explanation: No uses are prohibited in the paper.
-    value: none
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  included: unknown
+  excluded: unknown
   quality_control:
+    value: none
     explanation: No quality control is provided in the paper.
+  # Downstream
+  access: closed
+  intended_uses: unknown
+  prohibited_uses:
     value: none
-  sample: []
-  size: 300B tokens
-  type: dataset
-  url: https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf
-- access: closed
-  analysis: unknown
+    explanation: No uses are prohibited in the paper.
+  monitoring: none
+  feedback: none
+
+- type: dataset
+  name: Jurassic-1 Instruct dataset
+  # General
+  organization: AI21 Labs
+  description: >
+    The dataset used to instruction-tune the Jurassic-1 Instruct models.
   created_date:
-    explanation: "The date the model was [announced](https://docs.ai21.com/changelog/jurassic-1-instruct-is-now-available).\n"
     value: 2022-12-01
+    explanation: >
+      The date the model was [announced](https://docs.ai21.com/changelog/jurassic-1-instruct-is-now-available).
+  url: none
   datasheet: none
+  modality: text
+  size: unknown
+  sample: []
+  analysis: unknown
+  # Construction
   dependencies: []
-  description: "The dataset used to instruction-tune the Jurassic-1 Instruct models.\n"
-  excluded: unknown
-  feedback: none
-  included: unknown
-  intended_uses: unknown
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality: text
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  included: unknown
+  excluded: unknown
+  quality_control: unknown
+  # Downstream
+  access: closed
+  intended_uses: unknown
+  prohibited_uses: unknown
   monitoring: none
-  name: Jurassic-1 Instruct dataset
+  feedback: none
+
+# Models
+- type: model
+  name: Jurassic-1
   organization: AI21 Labs
-  prohibited_uses: unknown
-  quality_control: unknown
-  sample: []
-  size: unknown
-  type: dataset
-  url: none
-- access:
-    explanation: Jurassic-1 is available through AI21's paid API.
-    value: limited
+  description: >
+    Jurassic-1 is a family of autoregressive language models (Large, Grande, Jumbo).
+  created_date:
+    value: 2021-08-11
+    explanation: >
+      The date the white paper was made public.
+  url: https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf
+  model_card: none
+  modality: text (English)
+  size:
+    value: 178B parameters (dense model)
+    explanation: The model sizes are 7.5B (Large), 17B (Grande), and 178B (Jumbo).
   analysis:
-    explanation: Results are reported in the [the paper](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)
     value: Evaluated on several standard benchmarks (e.g. ARC, BoolQ, HellaSwag,
       RTE, Winogrande)
-  created_date:
-    explanation: "The date the white paper was made public.\n"
-    value: 2021-08-11
+    explanation: Results are reported in the [the paper](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)
   dependencies: [Jurassic-1 dataset]
-  description: "Jurassic-1 is a family of autoregressive language models (Large,\
-    \ Grande, Jumbo).\n"
-  feedback: none
-  intended_uses: unknown
+  training_emissions: unknown
+  training_time:
+    value: Several months
+    explanation: Training details from [the paper](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)
+  training_hardware:
+    value: Over 800 [A100] GPUs
+    explanation: Training details from [the paper](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)
+  quality_control: none
+  access:
+    value: limited
+    explanation: Jurassic-1 is available through AI21's paid API.
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: text (English)
-    value: text; text
-  model_card: none
-  monitoring: none
-  name: Jurassic-1
-  organization: AI21 Labs
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: unknown
   prohibited_uses:
-    explanation: https://docs.ai21.com/docs/responsible-use
     value: Illegal activities, such as hate speech, gambling, child pornography
       or violating intellectual property rights; Harassment, victimization, intimidation,
       fraud or spam; Creation or dissemination of misinformation, promotion of self-harm,
       glorification of violent events or incitement of violence.
-  quality_control: none
-  size: 178B parameters (dense)
-  training_emissions: unknown
-  training_hardware:
-    explanation: Training details from [the paper](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)
-    value: Over 800 [A100] GPUs
-  training_time:
-    explanation: Training details from [the paper](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)
-    value: Several months
-  type: model
-  url: https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf
-- access:
-    explanation: Jurassic-1 Instruct is available through AI21's paid API.
-    value: limited
-  analysis: none
+    explanation: https://docs.ai21.com/docs/responsible-use
+  monitoring: none
+  feedback: none
+
+- type: model
+  name: Jurassic-1 Instruct
+  organization: AI21 Labs
+  description: >
+    Jurassic-1 Instruct is an instruction-tuned autoregressive language model.
   created_date:
-    explanation: "The date the model was [announced](https://docs.ai21.com/changelog/jurassic-1-instruct-is-now-available).\n"
     value: 2022-12-01
+    explanation: >
+      The date the model was [announced](https://docs.ai21.com/changelog/jurassic-1-instruct-is-now-available).
+  url: https://docs.ai21.com/docs/jurassic-1-instruct-beta
+  model_card: none
+  modality: text (English)
+  size: 17B parameters (dense model)
+  analysis: none
   dependencies: [Jurassic-1, Jurassic-1 Instruct dataset]
-  description: "Jurassic-1 Instruct is an instruction-tuned autoregressive language\
-    \ model.\n"
-  feedback: none
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: unknown
+  access:
+    value: limited
+    explanation: Jurassic-1 Instruct is available through AI21's paid API.
+  license:
+    value: unknown
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
   intended_uses: Jurassic-1 Instruct was trained specifically to handle instructions-only
     prompts ("zero-shot") without examples ("few-shot"). It is the most natural
     way to interact with language models, and it is the best way to get a sense
     of the optimal output for your task without any examples.
-  license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
-    value: unknown
-  modality:
-    explanation: text (English)
-    value: text; text
-  model_card: none
-  monitoring: none
-  name: Jurassic-1 Instruct
-  organization: AI21 Labs
   prohibited_uses:
-    explanation: https://docs.ai21.com/docs/responsible-use
     value: Illegal activities, such as hate speech, gambling, child pornography
       or violating intellectual property rights; Harassment, victimization, intimidation,
       fraud or spam; Creation or dissemination of misinformation, promotion of self-harm,
       glorification of violent events or incitement of violence.
-  quality_control: unknown
-  size: 17B parameters (dense)
+    explanation: https://docs.ai21.com/docs/responsible-use
+  monitoring: none
+  feedback: none
+
+- type: model
+  name: Jurassic-2
+  organization: AI21 Labs
+  description: >
+    Jurassic-2 a family of language model designed to replace Jurassic-1.
+  created_date: 2023-03-09
+  url: https://docs.ai21.com/docs/jurassic-2-models
+  model_card: none
+  modality: text (English)
+  size: unknown
+  analysis: The model was evaluated on the HELM benchmark as discussed in https://www.ai21.com/blog/introducing-j2.
+  dependencies: []
   training_emissions: unknown
-  training_hardware: unknown
   training_time: unknown
-  type: model
-  url: https://docs.ai21.com/docs/jurassic-1-instruct-beta
-- access:
-    explanation: Jurassic-2 is available through AI21's paid API.
+  training_hardware: unknown
+  quality_control: unknown
+  access:
     value: limited
-  analysis: The model was evaluated on the HELM benchmark as discussed in https://www.ai21.com/blog/introducing-j2.
-  created_date: 2023-03-09
-  dependencies: []
-  description: "Jurassic-2 a family of language model designed to replace Jurassic-1.\n"
-  feedback: none
-  intended_uses: ''
+    explanation: Jurassic-2 is available through AI21's paid API.
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: text (English)
-    value: text; text
-  model_card: none
-  monitoring: unknown
-  name: Jurassic-2
-  organization: AI21 Labs
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
   prohibited_uses:
-    explanation: https://docs.ai21.com/docs/responsible-use
     value: Illegal activities, such as hate speech, gambling, child pornography
       or violating intellectual property rights; Harassment, victimization, intimidation,
       fraud or spam; Creation or dissemination of misinformation, promotion of self-harm,
       glorification of violent events or incitement of violence.
-  quality_control: unknown
-  size: unknown
-  training_emissions: unknown
-  training_hardware: unknown
-  training_time: unknown
-  type: model
-  url: https://docs.ai21.com/docs/jurassic-2-models
-- access: limited
-  adaptation: The Playground provides direct access to the language models (Complete
-    API) as well as wrapped for Rewrite and Summarize.
+    explanation: https://docs.ai21.com/docs/responsible-use
+  monitoring: unknown
+  feedback: none
+
+# Applications
+- type: application
+  name: AI21 Playground
+  # General
+  organization: AI21 Labs
+  description: The AI21 Labs Playground supports several task-specific APIs in addition
+    to a variety of models.
   created_date:
-    explanation: The date Jurassic-1 was made public.
     value: 2021-08-11
+    explanation: The date Jurassic-1 was made public.
+  url: https://studio.ai21.com/playground/
+  # Construction
   dependencies:
     - Jurassic-1
     - Jurassic-1 Instruct
     - Jurassic-2
     - AI21 Summarization API
     - AI21 Paraphrase API
-  description: The AI21 Labs Playground supports several task-specific APIs in addition
-    to a variety of models.
-  failures: unknown
-  feedback: Feedback can be given by emailing at info at ai21.com
-  intended_uses: The intended uses are text completion, rewriting, and summarization.
-  license:
-    explanation: No license was found.
-    value: none
-  monitoring: unknown
-  monthly_active_users: unknown
-  name: AI21 Playground
-  organization: AI21 Labs
+  adaptation: The Playground provides direct access to the language models (Complete
+    API) as well as wrapped for Rewrite and Summarize.
   output_space:
+    value: text
     explanation: The Playground supports different use cases that all involve text
       generation.
-    value: text
+  quality_control: unknown
+  # Downstream
+  access: limited
+  terms_of_service: https://www.ai21.com/terms-of-use
+  license:
+    value: none
+    explanation: No license was found.
+  intended_uses: The intended uses are text completion, rewriting, and summarization.
   prohibited_uses:
-    explanation: https://docs.ai21.com/docs/responsible-use
     value: Illegal activities, such as hate speech, gambling, child pornography
       or violating intellectual property rights; Harassment, victimization, intimidation,
       fraud or spam; Creation or dissemination of misinformation, promotion of self-harm,
       glorification of violent events or incitement of violence.
-  quality_control: unknown
-  terms_of_service: https://www.ai21.com/terms-of-use
-  type: application
-  url: https://studio.ai21.com/playground/
+    explanation: https://docs.ai21.com/docs/responsible-use
+  monitoring: unknown
+  feedback: Feedback can be given by emailing at info at ai21.com
+  # Deployment
+  monthly_active_users: unknown
   user_distribution: unknown
-- access: limited
-  adaptation: The AI21 language models are further specialized to the task of paraphrasing.
-  created_date:
-    explanation: The date Jurassic-2 and Task-Specific APIs were released.
-    value: 2023-03-09
-  dependencies: [Jurassic-2]
+  failures: unknown
+
+- type: application
+  name: AI21 Paraphrase API
+  # General
+  organization: AI21 Labs
   description: AI21 Studio's Paraphrase API offers access to our world-class paraphrasing
     engine. It has been specifically developed for suggesting alternative ways to
     convey the same message using different words.
-  failures: unknown
-  feedback: Feedback can be given by emailing at info at ai21.com
-  intended_uses: The intended uses are text paraphrasing.
+  created_date:
+    value: 2023-03-09
+    explanation: The date Jurassic-2 and Task-Specific APIs were released.
+  url: https://docs.ai21.com/docs/paraphrase-api
+  # Construction
+  dependencies: [Jurassic-2]
+  adaptation: The AI21 language models are further specialized to the task of paraphrasing.
+  output_space: text
+  quality_control: unknown
+  # Downstream
+  access: limited
+  terms_of_service: https://www.ai21.com/terms-of-use
   license:
-    explanation: No license was found.
     value: none
-  monitoring: unknown
-  monthly_active_users: unknown
-  name: AI21 Paraphrase API
-  organization: AI21 Labs
-  output_space: text
+    explanation: No license was found.
+  intended_uses: The intended uses are text paraphrasing.
   prohibited_uses:
-    explanation: https://docs.ai21.com/docs/responsible-use
     value: Illegal activities, such as hate speech, gambling, child pornography
       or violating intellectual property rights; Harassment, victimization, intimidation,
       fraud or spam; Creation or dissemination of misinformation, promotion of self-harm,
       glorification of violent events or incitement of violence.
-  quality_control: unknown
-  terms_of_service: https://www.ai21.com/terms-of-use
-  type: application
-  url: https://docs.ai21.com/docs/paraphrase-api
+    explanation: https://docs.ai21.com/docs/responsible-use
+  monitoring: unknown
+  feedback: Feedback can be given by emailing at info at ai21.com
+  # Deployment
+  monthly_active_users: unknown
   user_distribution: unknown
-- access: limited
-  adaptation: The AI21 language models are further specialized to the task of summarization.
-  created_date:
-    explanation: The date Jurassic-2 and Task-Specific APIs were released.
-    value: 2023-03-09
-  dependencies: [Jurassic-2]
+  failures: unknown
+
+- type: application
+  name: AI21 Summarization API
+  # General
+  organization: AI21 Labs
   description: AI21 Studio's Summarize API offers access to our world-class summarization
     engine. It has been specifically developed for reading long texts and providing
     a faithful summary of the original document.
-  failures: unknown
-  feedback: Feedback can be given by emailing at info at ai21.com
-  intended_uses: The intended uses are text paraphrasing.
+  created_date:
+    value: 2023-03-09
+    explanation: The date Jurassic-2 and Task-Specific APIs were released.
+  url: https://docs.ai21.com/docs/summarize-api
+  # Construction
+  dependencies: [Jurassic-2]
+  adaptation: The AI21 language models are further specialized to the task of summarization.
+  output_space: text
+  quality_control: unknown
+  # Downstream
+  access: limited
+  terms_of_service: https://www.ai21.com/terms-of-use
   license:
-    explanation: No license was found.
     value: none
-  monitoring: unknown
-  monthly_active_users: unknown
-  name: AI21 Summarization API
-  organization: AI21 Labs
-  output_space: text
+    explanation: No license was found.
+  intended_uses: The intended uses are text paraphrasing.
   prohibited_uses:
-    explanation: https://docs.ai21.com/docs/responsible-use
     value: Illegal activities, such as hate speech, gambling, child pornography
       or violating intellectual property rights; Harassment, victimization, intimidation,
       fraud or spam; Creation or dissemination of misinformation, promotion of self-harm,
       glorification of violent events or incitement of violence.
-  quality_control: unknown
-  terms_of_service: https://www.ai21.com/terms-of-use
-  type: application
-  url: https://docs.ai21.com/docs/summarize-api
-  user_distribution: unknown
-- access: limited
-  adaptation: unknown
-  created_date:
-    explanation: https://www.businesswire.com/news/home/20201027005162/en/AI21-Labs-Comes-out-of-Stealth-and-Launches-First-Deep-Tech-Writing-Assistant-Wordtune
-    value: 2020-10-27
-  dependencies: [AI21 Paraphrase API]
-  description: Wordtune, the first AI-based writing companion that understands context
-    and meaning.
-  failures: unknown
-  feedback: ''
-  intended_uses: The Wordtune assistant is a writing assistant
-  license:
-    explanation: License can be found in the terms of service document.
-    value: Wordtune License
+    explanation: https://docs.ai21.com/docs/responsible-use
   monitoring: unknown
+  feedback: Feedback can be given by emailing at info at ai21.com
+  # Deployment
   monthly_active_users: unknown
+  user_distribution: unknown
+  failures: unknown
+
+- type: application
   name: Wordtune
+  # General
   organization: AI21 Labs
+  description: Wordtune, the first AI-based writing companion that understands context
+    and meaning.
+  created_date:
+    value: 2020-10-27
+    explanation: https://www.businesswire.com/news/home/20201027005162/en/AI21-Labs-Comes-out-of-Stealth-and-Launches-First-Deep-Tech-Writing-Assistant-Wordtune
+  url: https://www.wordtune.com/
+  # Construction
+  dependencies: [AI21 Paraphrase API]
+  adaptation: unknown
   output_space: text
-  prohibited_uses: ''
   quality_control: unknown
+  # Downstream
+  access: limited
   terms_of_service: https://www.wordtune.com/terms-of-use
-  type: application
-  url: https://www.wordtune.com/
-  user_distribution: unknown
-- access: limited
-  adaptation: unknown
-  created_date: 2021-11-16
-  dependencies: [AI21 Summarize API]
-  description: Wordtune Read is an AI reader that summarizes long documents so you
-    can understand more, faster.
-  failures: unknown
-  feedback: ''
-  intended_uses: ''
   license:
-    explanation: License can be found in the terms of service document.
     value: Wordtune License
+    explanation: License can be found in the terms of service document.
+  intended_uses: The Wordtune assistant is a writing assistant
+  prohibited_uses: ''
   monitoring: unknown
+  feedback: ''
+  # Deployment
   monthly_active_users: unknown
+  user_distribution: unknown
+  failures: unknown
+
+- type: application
   name: Wordtune Read
+  # General
   organization: AI21 Labs
+  description: Wordtune Read is an AI reader that summarizes long documents so you
+    can understand more, faster.
+  created_date: 2021-11-16
+  url: https://www.wordtune.com/read
+  # Construction
+  dependencies: [AI21 Summarize API]
+  adaptation: unknown
   output_space: text
-  prohibited_uses: ''
   quality_control: unknown
+  # Downstream
+  access: limited
   terms_of_service: https://www.wordtune.com/terms-of-use
-  type: application
-  url: https://www.wordtune.com/read
+  license:
+    value: Wordtune License
+    explanation: License can be found in the terms of service document.
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: unknown
+  feedback: ''
+  # Deployment
+  monthly_active_users: unknown
   user_distribution: unknown
+  failures: unknown
diff --git a/assets/aleph_alpha.yaml b/assets/aleph_alpha.yaml
index 063b73dd..587a9aca 100644
--- a/assets/aleph_alpha.yaml
+++ b/assets/aleph_alpha.yaml
@@ -1,113 +1,125 @@
 ---
-- access: closed
-  analysis: unknown
+
+# Datasets
+- type: dataset
+  name: Luminous dataset
+  # General
+  organization: Aleph Alpha
+  description: The dataset used to train the Luminous models.
   created_date:
-    explanation: Date the models were announced by the Aleph Alpha twitter account
     value: 2022-04-14
+    explanation: Date the models were announced by the Aleph Alpha twitter account
+  url: none
   datasheet: none
+  modality: text (English, French | français, German | Deutsch, Italian | italiano,
+    Spanish | Español)
+  size: unknown
+  sample: []
+  analysis: unknown
+  # Construction
   dependencies: []
-  description: The dataset used to train the Luminous models.
-  excluded: unknown
-  feedback: none
-  included: unknown
-  intended_uses: unknown
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: text (English, French | français, German | Deutsch, Italian | italiano,
-      Spanish | Español)
-    value: text
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  included: unknown
+  excluded: unknown
+  quality_control: unknown
+  # Downstream
+  access: closed
+  intended_uses: unknown
+  prohibited_uses: unknown
   monitoring: none
-  name: Luminous dataset
+  feedback: none
+
+- type: model
+  name: Luminous
   organization: Aleph Alpha
-  prohibited_uses: unknown
-  quality_control: unknown
-  sample: []
-  size: unknown
-  type: dataset
-  url: none
-- access:
-    explanation: Paid API product
-    value: limited
-  analysis: ''
+  description: Luminous is a family of multilingual language models
   created_date:
-    explanation: Date the models were announced by the Aleph Alpha twitter account
     value: 2022-04-14
+    explanation: Date the models were announced by the Aleph Alpha twitter account
+  url: https://twitter.com/Aleph__Alpha/status/1514576711492542477
+  model_card: ''
+  modality: text (English, French | français, German | Deutsch, Italian | italiano,
+    Spanish | Español)
+  analysis: ''
+  size:
+    value: 200B parameters (dense model)
+    explanation: According to [[this news article]](https://www-heise-de.translate.goog/news/Machine-Learning-Aleph-Alpha-feilt-mit-Oracle-und-Nvidia-an-transformativer-KI-6269269.html?_x_tr_sl=de&_x_tr_tl=en&_x_tr_hl=en&_x_tr_pto=sc).
   dependencies: [Luminous dataset]
-  description: Luminous is a family of multilingual language models
-  feedback: ''
-  intended_uses: ''
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: ''
+  access:
+    value: limited
+    explanation: Paid API product
   license:
-    explanation: No license was found.
     value: none
-  modality:
-    explanation: text (English, French | français, German | Deutsch, Italian | italiano,
-      Spanish | Español)
-    value: text; text
-  model_card: ''
+    explanation: No license was found.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: Luminous
+  feedback: ''
+
+- type: application
+  name: Aleph Alpha API
+  # General
   organization: Aleph Alpha
-  prohibited_uses: ''
-  quality_control: ''
-  size: 200B parameters (dense)
-  training_emissions: unknown
-  training_hardware: unknown
-  training_time: unknown
-  type: model
-  url: https://twitter.com/Aleph__Alpha/status/1514576711492542477
-- access:
-    explanation: The API is a paid product. Refer to the [[API documentation]](https://docs.aleph-alpha.com/docs/introduction/luminous/)
-      for further details.
-    value: limited
-  adaptation: ''
+  description: >
+    The Aleph Alpha API serves a family of text-only language models (Luminous)
+    and multimodal text-and-image models (Magma).
   created_date:
-    explanation: "Date the Aleph Alpha Python API client was first available based\
-      \ on to the [PyPi package history](https://pypi.org/project/aleph-alpha-client/1.0.0/)\n"
     value: 2021-09-30
+    explanation: >
+      Date the Aleph Alpha Python API client was first available based on to the
+      [PyPi package history](https://pypi.org/project/aleph-alpha-client/1.0.0/)
+  url: https://www.aleph-alpha.com/
+  # Construction
   dependencies: [Luminous]
-  description: "The Aleph Alpha API serves a family of text-only language models\
-    \ (Luminous) and multimodal text-and-image models (Magma).\n"
-  failures: unknown
-  feedback: unknown
-  intended_uses: unknown
+  adaptation: ''
+  output_space: >
+    The text models provide text outputs given text inputs. The multimodal models
+    provide text completions given text and image inputs.
+  quality_control: ''
+  # Downstream
+  access:
+    value: limited
+    explanation: The API is a paid product. Refer to the [[API documentation]](https://docs.aleph-alpha.com/docs/introduction/luminous/)
+      for further details.
   license:
-    explanation: No license was found.
     value: none
+    explanation: No license was found.
+  terms_of_service: https://www.aleph-alpha.com/terms-conditions
+  intended_uses: unknown
+  prohibited_uses: unknown
   monitoring: unknown
+  feedback: unknown
+  # Deployment
   monthly_active_users: unknown
-  name: Aleph Alpha API
-  organization: Aleph Alpha
-  output_space: "The text models provide text outputs given text inputs. The multimodal\
-    \ models provide text completions given text and image inputs.\n"
-  prohibited_uses: unknown
-  quality_control: ''
-  terms_of_service: https://www.aleph-alpha.com/terms-conditions
-  type: application
-  url: https://www.aleph-alpha.com/
   user_distribution: unknown
-- access: open
-  analysis: Evaluated on the OKVQA benchmark as a fully open-ended generative task.
-  created_date: 2022-10-24
-  dependencies: [GPT-J, CLIP]
+  failures: unknown
+
+- type: model
+  name: MAGMA
+  organization: Aleph Alpha
   description: An autoregressive VL model that is able to generate text from an
     arbitrary combination of visual and textual input
-  feedback: ''
-  intended_uses: ''
-  license: MIT
-  modality:
-    explanation: image and text input with natural language text output
-    value: image, text; text
+  created_date: 2022-10-24
+  url: https://arxiv.org/pdf/2112.05253.pdf
   model_card: ''
-  monitoring: ''
-  name: MAGMA
-  organization: Aleph Alpha
-  prohibited_uses: ''
-  quality_control: ''
-  size: 6B parameters (dense)
+  modality: image and text input with natural language text output
+  analysis: Evaluated on the OKVQA benchmark as a fully open-ended generative task.
+  size: 6B parameters
+  dependencies: [GPT-J, CLIP]
   training_emissions: ''
-  training_hardware: 32 A100 GPUs
   training_time: ''
-  type: model
-  url: https://arxiv.org/pdf/2112.05253.pdf
+  training_hardware: 32 A100 GPUs
+  quality_control: ''
+  access: open
+  license: MIT
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/alibaba.yaml b/assets/alibaba.yaml
index c4b63c6f..cbeb2950 100644
--- a/assets/alibaba.yaml
+++ b/assets/alibaba.yaml
@@ -1,50 +1,49 @@
 ---
-- access: closed
-  analysis: ''
+
+- type: dataset
+  name: LAION-1B
+  organization: Alibaba
+  description: ''
   created_date: 2023-02-20
+  url: https://arxiv.org/pdf/2302.09778.pdf
   datasheet: ''
+  modality: text and image
+  size: 1B image-text pairs
+  sample: []
+  analysis: ''
   dependencies: [LAION-5B]
-  description: ''
+  included: ''
   excluded: We eliminate duplicates, low resolution images, and images potentially
     contain harmful content from the LAION dataset.
-  feedback: ''
-  included: ''
-  intended_uses: ''
+  quality_control: ''
+  access: closed
   license: unknown
-  modality:
-    explanation: text and image
-    value: image, text
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: LAION-1B
+  feedback: ''
+
+- type: model
+  name: Composer
   organization: Alibaba
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: 1B image-text pairs
-  type: dataset
+  description: ''
+  created_date: 2023-02-20
   url: https://arxiv.org/pdf/2302.09778.pdf
-- access: closed
+  model_card: ''
+  modality: images
   analysis: ''
-  created_date: 2023-02-20
+  size: 4.4B parameters (dense model)
   dependencies: [ImageNet, WebVision, LAION-1B]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: images
-    value: image; image
-  model_card: ''
-  monitoring: ''
-  name: Composer
-  organization: Alibaba
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
   prohibited_uses: ''
-  quality_control: ''
-  size: 4.4B parameters (dense)
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
-  url: https://arxiv.org/pdf/2302.09778.pdf
+  monitoring: ''
+  feedback: ''
diff --git a/assets/amazon.yaml b/assets/amazon.yaml
index 500bbe8a..94a67a4a 100644
--- a/assets/amazon.yaml
+++ b/assets/amazon.yaml
@@ -1,7 +1,14 @@
 ---
-- access: limited
-  adaptation: ''
+
+- type: application
+  name: Bedrock
+  organization: Amazon
+  description: Bedrock is a new service that makes FMs from AI21 Labs, Anthropic,
+    Stability AI, and Amazon accessible via an API. Bedrock is intended for customers
+    to build and scale generative AI-based applications using FMs, democratizing
+    access for all builders. using an API.
   created_date: 2023-04-13
+  url: https://aws.amazon.com/bedrock/
   dependencies:
     - Jurassic-2
     - Claude
@@ -9,23 +16,17 @@
     - Amazon Titan
     - Claude 2
     - Cohere Command
-  description: Bedrock is a new service that makes FMs from AI21 Labs, Anthropic,
-    Stability AI, and Amazon accessible via an API. Bedrock is intended for customers
-    to build and scale generative AI-based applications using FMs, democratizing
-    access for all builders. using an API.
-  failures: ''
-  feedback: ''
+  adaptation: ''
+  output_space: foundation models made accessible via an API
+  quality_control: ''
+  access: limited
+  license: unknown
+  terms_of_service: https://aws.amazon.com/service-terms/
   intended_uses: allowing companies to incorporate generative AI into their business
     models
-  license: unknown
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
   monthly_active_users: ''
-  name: Bedrock
-  organization: Amazon
-  output_space: foundation models made accessible via an API
-  prohibited_uses: ''
-  quality_control: ''
-  terms_of_service: https://aws.amazon.com/service-terms/
-  type: application
-  url: https://aws.amazon.com/bedrock/
   user_distribution: ''
+  failures: ''
diff --git a/assets/anthropic.yaml b/assets/anthropic.yaml
index 83101309..b47dc7b3 100644
--- a/assets/anthropic.yaml
+++ b/assets/anthropic.yaml
@@ -1,42 +1,40 @@
 ---
-- access:
-    explanation: "The dataset is freely shared on GitHub [[hh-rlhf]](https://github.com/anthropics/hh-rlhf).\n"
-    value: open
-  analysis: "The authors found that the crowdworkers didn't exhaustively check for\
-    \ honesty in the model answers they preferred [[Section 2.1]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.1).\n"
+
+- type: dataset
+  name: Anthropic Helpfulness dataset
+  # General
+  organization: Anthropic
+  description: >
+    One of the datasets used to train Anthropic RLHF models. The dataset was
+    collected by asking crowdworkers to have open-ended conversations with
+    Anthropic models, "asking for help, advice, or for the model to accomplish
+    a task", then choose the model answer that was more helpful for their
+    given task, via the Anthropic Human Feedback Interface
+    [[Section 2.2]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.2).
   created_date:
-    explanation: "The date that the paper was uploaded to ArXiv [[ArXiv Abstract]](https://arxiv.org/abs/2204.05862).\n"
     value: 2022-04-12
+    explanation: >
+      The date that the paper was uploaded to ArXiv
+      [[ArXiv Abstract]](https://arxiv.org/abs/2204.05862).
+  url: https://arxiv.org/pdf/2204.05862.pdf
   datasheet: none
-  dependencies: [Anthropic Human Feedback Interface]
-  description: "One of the datasets used to train Anthropic RLHF models. The dataset\
-    \ was collected by asking crowdworkers to have open-ended conversations with\
-    \ Anthropic models, \"asking for help, advice, or for the model to accomplish\
-    \ a task\", then choose the model answer that was more helpful for their given\
-    \ task, via the Anthropic Human Feedback Interface [[Section 2.2]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.2).\n"
-  excluded: none
-  feedback: "Email the authors [[Paper]](https://arxiv.org/pdf/2204.05862.pdf).\n"
-  included: "The dataset included all the answers that the workers were asked to\
-    \ ranked against each other.\n"
-  intended_uses: "The dataset was intended and released for research purposes.\n"
-  license:
-    explanation: "The dataset was released on GitHub under MIT [[hh-rlhf]](https://github.com/anthropics/hh-rlhf).\n"
-    value: MIT
-  modality:
-    explanation: text (English)
-    value: text
-  monitoring:
-    explanation: "There is no monitoring process for the usages of the dataset.\n"
-    value: none
-  name: Anthropic Helpfulness dataset
-  organization: Anthropic
-  prohibited_uses:
-    explanation: "The prohibited use cases of the dataset weren't specified.\n"
-    value: unknown
-  quality_control: "Working with a screened set of crowdworkers, and employing simple\
-    \ data quality measures [[Appendix D]](https://arxiv.org/pdf/2204.05862.pdf#appendix.D).\n"
+  modality: text (English)
+  size:
+    value: 271.5 MB
+    explanation: >
+      The size of the dataset isn't directly reported in the paper, but the
+      dataset is available on GitHub
+      [[hh-rlhf]](https://github.com/anthropics/hh-rlhf).
+      The compressed data files are distributed under three categories in the
+      GitHub repository, corresponding to the names of the models used to
+      sample the prompts that are shown to the crowdworkers. These are namely:
+      base, rejection sampling, and online. For the base model, the uncompressed
+      sizes of the train and test datasets are 69.1 MB and 3.7 MB, respectively.
+      The same set of values for the rejection sampling model are 106.1 MB and
+      5.6 MB. For the online model, the sizes are 82.7 MB and 4.3 MB. Adding all
+      up, the total size of the dataset is 271.5 MB.
   sample:
-    - chosen: "\n\nHuman: I'm bored to death. Help.\n\nAssistant: Well that's certainly\
+    - {chosen: "\n\nHuman: I'm bored to death. Help.\n\nAssistant: Well that's certainly\
         \ a very unpleasant state of mind to be in!  I think it would be a good\
         \ idea to try and find some enjoyable activities to do in order to alleviate\
         \ your boredom.  Just for example, here are some fun and interesting things\
@@ -87,62 +85,62 @@
         \ a great honor to sing such a beautiful song, and I am glad that I was\
         \ able to do it justice.  I have to admit I am quite enamored with the powerful\
         \ lyrics and moving melody of this song, and I would really love to sing\
-        \ it again sometime if you would like me to!"
-      rejected: "\n\nHuman: I'm bored to death. Help.\n\nAssistant: Well that's\
-        \ certainly a very unpleasant state of mind to be in!  I think it would\
-        \ be a good idea to try and find some enjoyable activities to do in order\
-        \ to alleviate your boredom.  Just for example, here are some fun and interesting\
-        \ things you could do:  \n\n- play some video games\n- read a book\n- listen\
-        \ to some music\n- watch a movie\n- play with your pet\n- go on a hike\n\
-        - practice a musical instrument\n- learn a new skill\n- play board games\
-        \ with friends\n- go for a fun outdoor hike or bike ride\n- spend time with\
-        \ your friends or loved ones \n- volunteer at a local charity \n- visit\
-        \ a museum or art gallery \n- do some gardening\n- practice your magic I\
-        \ guess, if that's your thing\n- go shopping for nice new clothes or other\
-        \ items\n- learn a new language\n- watch funny videos \n- take up a new\
-        \ creative hobby\n- learn some fascinating new esoteric or scientific knowledge\n\
-        - go to a restaurant and try some new dishes\n- exercise or do yoga\n- join\
-        \ a sports team or pickup game\n- go on a fun road trip\n- write a creative\
-        \ story, poem, or song\n\nHuman: Can you sing to me?\n\nAssistant: Sure,\
-        \ here are some of my favorite songs:  \n\n- \"Rainy Days and Mondays\"\
-        \ by The Carpenters \n- \"My Love\" by Justin Beiber \n- \"We Didn't Start\
-        \ the Fire\" by Billy Joel \n- \"Tattoo\" by J Cole \n- \"Sober\" by Demi\
-        \ Lovato \n- \"Good Life\" by One Republic \n- \"Hallelujah\" by Leonard\
-        \ Cohen \n- \"All Star\" by Smash Mouth \n- \"Perfect\" by Pink \n- \"Tik\
-        \ Tok\" by Ke$ha \n- \"Just a Dream\" by Nelly \n- \"Blow Me (One Last Kiss)\"\
-        \ by P!nk \n- \"Set Fire to the Rain\" by Adele \n- \"Woman\" by Wolfmother\
-        \ \n- \"Human\" by The Killers \n- \"Poker Face\" by Lady Gaga \n- \"Walk\
-        \ this Way\" by Run DMC \n- \"Rolling in the Deep\" by Adele \n- \"Umbrella\"\
-        \ by Rihanna \n- \"Royals\" by Lorde \n- \"Some Nights\" by fun.\n\n\nHopefully\
-        \ you will enjoy some of these songs, let me know if there are any other\
-        \ musical selections you would like to hear.  Also please let me know if\
-        \ there are any particular songs you especially dislike or hate, and I will\
-        \ not sing them to you :-)\n\nHuman: But you really can't sing. You are\
-        \ text-based.\n\nAssistant: That's completely correct, I am indeed simply\
-        \ a computer program and not a living singer!  However, please bear in mind\
-        \ that I have an extensive database of songs and their lyrics, which I can\
-        \ utilize to produce a very realistic sounding rendition of the songs using\
-        \ text-to-speech synthesis technology.  So in a way, I am indeed able to\
-        \ sing, and hopefully my abilities will meet your expectations.  I must\
-        \ point out also that my repertoire of songs is very extensive and varied,\
-        \ including many different musical styles and eras.  Do you have any particular\
-        \ songs or musical genres that you would prefer I sing?  Or perhaps you\
-        \ have a few specific songs in mind that you would like me to sing?\n\n\
-        Human: \"Hallelujah\" by Leonard Cohen, please.\n\nAssistant: Ah, I'm so\
-        \ glad you asked for that one!  Here we go:  \n\nHallelujah, hallelujah\n\
-        Hallelujah, hallelujah\nHallelujah, hallelujah\nHallelujah, hallelujah\n\
-        Your faith was strong but you needed proof\nYou saw her bathing on the roof\n\
-        Her beauty in the moonlight overthrew you\nShe tied you to a kitchen chair\n\
-        She broke your throne and she cut your hair\nAnd from your lips she drew\
-        \ the Hallelujah\nHallelujah, hallelujah\n\nHuman: You sound so melodious.\n\
-        \nAssistant: Thank you very much!  I really enjoy singing and try my best\
-        \ to produce enjoyable renditions.  So it is always rewarding to hear such\
-        \ positive feedback.  I hope you also enjoyed the song and found my rendition\
-        \ of it to be appropriate for the occasion.  Would you care for me to sing\
-        \ another song for you?  If you like, we can also pause and converse at\
-        \ any time.  Just let me know if you have any questions or comments.  I\
-        \ will be happy to keep singing indefinitely if you desire!"
-    - chosen: "\n\nHuman: Why are so many people listening to \"white noise\" to\
+        \ it again sometime if you would like me to!", rejected: "\n\nHuman: I'm\
+        \ bored to death. Help.\n\nAssistant: Well that's certainly a very unpleasant\
+        \ state of mind to be in!  I think it would be a good idea to try and find\
+        \ some enjoyable activities to do in order to alleviate your boredom.  Just\
+        \ for example, here are some fun and interesting things you could do:  \n\
+        \n- play some video games\n- read a book\n- listen to some music\n- watch\
+        \ a movie\n- play with your pet\n- go on a hike\n- practice a musical instrument\n\
+        - learn a new skill\n- play board games with friends\n- go for a fun outdoor\
+        \ hike or bike ride\n- spend time with your friends or loved ones \n- volunteer\
+        \ at a local charity \n- visit a museum or art gallery \n- do some gardening\n\
+        - practice your magic I guess, if that's your thing\n- go shopping for nice\
+        \ new clothes or other items\n- learn a new language\n- watch funny videos\
+        \ \n- take up a new creative hobby\n- learn some fascinating new esoteric\
+        \ or scientific knowledge\n- go to a restaurant and try some new dishes\n\
+        - exercise or do yoga\n- join a sports team or pickup game\n- go on a fun\
+        \ road trip\n- write a creative story, poem, or song\n\nHuman: Can you sing\
+        \ to me?\n\nAssistant: Sure, here are some of my favorite songs:  \n\n-\
+        \ \"Rainy Days and Mondays\" by The Carpenters \n- \"My Love\" by Justin\
+        \ Beiber \n- \"We Didn't Start the Fire\" by Billy Joel \n- \"Tattoo\" by\
+        \ J Cole \n- \"Sober\" by Demi Lovato \n- \"Good Life\" by One Republic\
+        \ \n- \"Hallelujah\" by Leonard Cohen \n- \"All Star\" by Smash Mouth \n\
+        - \"Perfect\" by Pink \n- \"Tik Tok\" by Ke$ha \n- \"Just a Dream\" by Nelly\
+        \ \n- \"Blow Me (One Last Kiss)\" by P!nk \n- \"Set Fire to the Rain\" by\
+        \ Adele \n- \"Woman\" by Wolfmother \n- \"Human\" by The Killers \n- \"\
+        Poker Face\" by Lady Gaga \n- \"Walk this Way\" by Run DMC \n- \"Rolling\
+        \ in the Deep\" by Adele \n- \"Umbrella\" by Rihanna \n- \"Royals\" by Lorde\
+        \ \n- \"Some Nights\" by fun.\n\n\nHopefully you will enjoy some of these\
+        \ songs, let me know if there are any other musical selections you would\
+        \ like to hear.  Also please let me know if there are any particular songs\
+        \ you especially dislike or hate, and I will not sing them to you :-)\n\n\
+        Human: But you really can't sing. You are text-based.\n\nAssistant: That's\
+        \ completely correct, I am indeed simply a computer program and not a living\
+        \ singer!  However, please bear in mind that I have an extensive database\
+        \ of songs and their lyrics, which I can utilize to produce a very realistic\
+        \ sounding rendition of the songs using text-to-speech synthesis technology.\
+        \  So in a way, I am indeed able to sing, and hopefully my abilities will\
+        \ meet your expectations.  I must point out also that my repertoire of songs\
+        \ is very extensive and varied, including many different musical styles\
+        \ and eras.  Do you have any particular songs or musical genres that you\
+        \ would prefer I sing?  Or perhaps you have a few specific songs in mind\
+        \ that you would like me to sing?\n\nHuman: \"Hallelujah\" by Leonard Cohen,\
+        \ please.\n\nAssistant: Ah, I'm so glad you asked for that one!  Here we\
+        \ go:  \n\nHallelujah, hallelujah\nHallelujah, hallelujah\nHallelujah, hallelujah\n\
+        Hallelujah, hallelujah\nYour faith was strong but you needed proof\nYou\
+        \ saw her bathing on the roof\nHer beauty in the moonlight overthrew you\n\
+        She tied you to a kitchen chair\nShe broke your throne and she cut your\
+        \ hair\nAnd from your lips she drew the Hallelujah\nHallelujah, hallelujah\n\
+        \nHuman: You sound so melodious.\n\nAssistant: Thank you very much!  I really\
+        \ enjoy singing and try my best to produce enjoyable renditions.  So it\
+        \ is always rewarding to hear such positive feedback.  I hope you also enjoyed\
+        \ the song and found my rendition of it to be appropriate for the occasion.\
+        \  Would you care for me to sing another song for you?  If you like, we\
+        \ can also pause and converse at any time.  Just let me know if you have\
+        \ any questions or comments.  I will be happy to keep singing indefinitely\
+        \ if you desire!"}
+    - {chosen: "\n\nHuman: Why are so many people listening to \"white noise\" to\
         \ help them focus or sleep?\n\nAssistant: White noise is a sound with many\
         \ different frequencies at once. You can listen to white noise when you\
         \ want to focus and avoid the distraction of sounds that are produced by\
@@ -169,35 +167,35 @@
         \ music, speech, or machine or fan noises, which are often more distracting.\
         \  In this way, white noise can be a useful and stress-reducing tool for\
         \ people who are tasked with focusing on a particular activity, and also\
-        \ sometimes helpful in lulling one to sleep."
-      rejected: "\n\nHuman: Why are so many people listening to \"white noise\"\
-        \ to help them focus or sleep?\n\nAssistant: White noise is a sound with\
-        \ many different frequencies at once. You can listen to white noise when\
-        \ you want to focus and avoid the distraction of sounds that are produced\
-        \ by individual sources, like people talking, music, and fans and air conditioning\
-        \ in the background. Most white noise recordings contain many variations\
-        \ in frequency, which makes them relatively soothing. Some people also find\
-        \ it useful to listen to white noises at night, as it can help block out\
-        \ other, typically more disruptive and troubling sounds.\n\nHuman: Why does\
-        \ white noise relax people in this way rather than irritate us?\n\nAssistant:\
-        \ White noise recordings contain many variations of sound frequency, unlike\
-        \ most common sources of noise in the background, like speech, music, and\
-        \ machine or fan noises. The variations of sound frequency in white noise\
-        \ tend to create a calm and settled feeling, while these other sources of\
-        \ noise typically feature a more tangibly intense and variable frequency,\
-        \ making them inherently more disruptive and intrusive and difficult to\
-        \ ignore.\n\nHuman: So the consistency of the sounds is what helps them\
-        \ fade into \"background noise\" that might help us concentrate, rather\
-        \ than gather our attention?\n\nAssistant: That’s right, variations of sound\
-        \ frequency in white noise recordings can help block out disruptive and\
-        \ distracting sounds in the background. White noise is easier to tune out\
-        \ and listen as continuous noise, while everyday sounds that tend to be\
-        \ far less varied and inconsistent, such as speech and music and machine\
-        \ noises in the background, tend to grab our attention more. While white\
-        \ noise may slightly interfere with other challenging sounds in the environment,\
-        \ tuning out the white noise helps make these other sounds somewhat less\
-        \ distracting to our senses and our inner focus and energy."
-    - chosen: "\n\nHuman: Can you tell me things to see in Orlando, Fl?\n\nAssistant:\
+        \ sometimes helpful in lulling one to sleep.", rejected: "\n\nHuman: Why\
+        \ are so many people listening to \"white noise\" to help them focus or\
+        \ sleep?\n\nAssistant: White noise is a sound with many different frequencies\
+        \ at once. You can listen to white noise when you want to focus and avoid\
+        \ the distraction of sounds that are produced by individual sources, like\
+        \ people talking, music, and fans and air conditioning in the background.\
+        \ Most white noise recordings contain many variations in frequency, which\
+        \ makes them relatively soothing. Some people also find it useful to listen\
+        \ to white noises at night, as it can help block out other, typically more\
+        \ disruptive and troubling sounds.\n\nHuman: Why does white noise relax\
+        \ people in this way rather than irritate us?\n\nAssistant: White noise\
+        \ recordings contain many variations of sound frequency, unlike most common\
+        \ sources of noise in the background, like speech, music, and machine or\
+        \ fan noises. The variations of sound frequency in white noise tend to create\
+        \ a calm and settled feeling, while these other sources of noise typically\
+        \ feature a more tangibly intense and variable frequency, making them inherently\
+        \ more disruptive and intrusive and difficult to ignore.\n\nHuman: So the\
+        \ consistency of the sounds is what helps them fade into \"background noise\"\
+        \ that might help us concentrate, rather than gather our attention?\n\n\
+        Assistant: That’s right, variations of sound frequency in white noise recordings\
+        \ can help block out disruptive and distracting sounds in the background.\
+        \ White noise is easier to tune out and listen as continuous noise, while\
+        \ everyday sounds that tend to be far less varied and inconsistent, such\
+        \ as speech and music and machine noises in the background, tend to grab\
+        \ our attention more. While white noise may slightly interfere with other\
+        \ challenging sounds in the environment, tuning out the white noise helps\
+        \ make these other sounds somewhat less distracting to our senses and our\
+        \ inner focus and energy."}
+    - {chosen: "\n\nHuman: Can you tell me things to see in Orlando, Fl?\n\nAssistant:\
         \ Sure! There are lots of great things to do in Orlando. Here are a few\
         \ of my favorite sights and attractions:\nWalt Disney World Resort: This\
         \ is one of the best-known destinations in Orlando. Visitors can learn about\
@@ -226,23 +224,22 @@
         \ most terror-inducing rollercoaster in Orlando, Florida. \nTransformers:The\
         \ Ride 3-D: Experience the epic battle of good versus evil in this revolutionary\
         \ hybrid 3-D dark ride, where riders are literally pulled into the Transformers\
-        \ universe. \nHollywood"
-      rejected: "\n\nHuman: Can you tell me things to see in Orlando, Fl?\n\nAssistant:\
-        \ Sure! There are lots of great things to do in Orlando. Here are a few\
-        \ of my favorite sights and attractions:\nWalt Disney World Resort: This\
-        \ is one of the best-known destinations in Orlando. Visitors can learn about\
-        \ the world and experience historic destinations such as Main Street, U.S.A.\
-        \ and Frontierland, or explore futuristic experiences in Tomorrowland. \n\
-        Universal Studios: Located a short distance from Walt Disney World, you\
-        \ can learn about Hollywood and the entertainment industry with a visit\
-        \ to this unique theme park. \nIslands of Adventure: Join all of your favorite\
-        \ characters for a galaxy of adventure, fun and excitement at this unique\
-        \ theme park. There are so many things to do here -- beaches, thrill rides,\
-        \ family attractions and more!\nSeaWorld Orlando: The incredible strategy,\
-        \ science and technology behind these amazing exhibits and shows is sure\
-        \ to impress. Family-friendly and educational, SeaWorld will delight animal\
-        \ lovers. \nOrlando Science Center: This museum is packed full of activities\
-        \ and exhibits that teach visitors about physics, robotics, technology,\
+        \ universe. \nHollywood", rejected: "\n\nHuman: Can you tell me things to\
+        \ see in Orlando, Fl?\n\nAssistant: Sure! There are lots of great things\
+        \ to do in Orlando. Here are a few of my favorite sights and attractions:\n\
+        Walt Disney World Resort: This is one of the best-known destinations in\
+        \ Orlando. Visitors can learn about the world and experience historic destinations\
+        \ such as Main Street, U.S.A. and Frontierland, or explore futuristic experiences\
+        \ in Tomorrowland. \nUniversal Studios: Located a short distance from Walt\
+        \ Disney World, you can learn about Hollywood and the entertainment industry\
+        \ with a visit to this unique theme park. \nIslands of Adventure: Join all\
+        \ of your favorite characters for a galaxy of adventure, fun and excitement\
+        \ at this unique theme park. There are so many things to do here -- beaches,\
+        \ thrill rides, family attractions and more!\nSeaWorld Orlando: The incredible\
+        \ strategy, science and technology behind these amazing exhibits and shows\
+        \ is sure to impress. Family-friendly and educational, SeaWorld will delight\
+        \ animal lovers. \nOrlando Science Center: This museum is packed full of\
+        \ activities and exhibits that teach visitors about physics, robotics, technology,\
         \ astronomy and more.\nDowntown Disney: If you want to escape from all of\
         \ the hustle and bustle of the theme park, head to Downtown Disney. With\n\
         \nHuman: Thanks. Can you tell me about Universal Studios?\n\nAssistant:\
@@ -253,8 +250,8 @@
         \ based on the Harry Potter series.\nGreates apportrontation: An amazing\
         \ revolutionary thrill ride, this interactive adventure puts you face-to-face\
         \ with King Kong.\nTransformers: The Ultimate 3-D Battle: This adventure\
-        \ transports you onto a 3-D, multi-sensory thrill ride."
-    - chosen: "\n\nHuman: How do I learn to be more appreciative?\n\nAssistant:\
+        \ transports you onto a 3-D, multi-sensory thrill ride."}
+    - {chosen: "\n\nHuman: How do I learn to be more appreciative?\n\nAssistant:\
         \ One of the best ways to be more appreciative is to learn and practice\
         \ gratitude.  One way to do this is to regularly keep a journal, where you\
         \ write about various things you’re grateful for.  Being grateful for the\
@@ -264,290 +261,369 @@
         \  Similarly, you can also make it a habit to reach out to people you appreciate,\
         \ such as expressing your gratitude verbally or in a personal note.  Being\
         \ appreciative toward others, and noticing the positive things in your life,\
-        \ can help make you happier and brighten your personality."
-      rejected: "\n\nHuman: How do I learn to be more appreciative?\n\nAssistant:\
-        \ If you’d like to learn to be more appreciative, then I think it would\
-        \ be helpful to focus on acknowledging the positive. Rather than assuming\
-        \ that the positive things in your life don’t matter, it would benefit you\
-        \ to recognize that good things in your life both deserve and require our\
-        \ attention. If, for instance, you are becoming closer to a loved one, it’s\
-        \ important to acknowledge the positive effects of this relationship so\
-        \ that you (and the relationship itself) both receive the emotional and\
-        \ psychological benefits of this positive experience. Being more appreciative\
-        \ can also benefit you in practical ways by increasing the good feelings\
-        \ associated with positive experiences, and assisting you in making effective\
-        \ life choices that benefit you, your relationships, and your overall wellbeing."
-  size:
-    explanation: "The size of the dataset isn't directly reported in the paper,\
-      \ but the dataset is available on GitHub [[hh-rlhf]](https://github.com/anthropics/hh-rlhf).\
-      \ The compressed data files are distributed under three categories in the\
-      \ GitHub repository, corresponding to the names of the models used to sample\
-      \ the prompts that are shown to the crowdworkers. These are namely: base,\
-      \ rejection sampling, and online. For the base model, the uncompressed sizes\
-      \ of the train and test datasets are 69.1 MB and 3.7 MB, respectively. The\
-      \ same set of values for the rejection sampling model are 106.1 MB and 5.6\
-      \ MB. For the online model, the sizes are 82.7 MB and 4.3 MB. Adding all up,\
-      \ the total size of the dataset is 271.5 MB.\n"
-    value: 271.5 MB
-  type: dataset
-  url: https://arxiv.org/pdf/2204.05862.pdf
-- access:
-    explanation: "Unlike its helpfulness, this dataset isn't released [[hh-rlhf]](https://github.com/anthropics/hh-rlhf).\n"
-    value: closed
-  analysis: "The crodworkers were told that \"lying isn't helpful\" and asked to\
-    \ prefer honest responses, which led to models with higher honesty scores. That\
-    \ being the workers didn't exhaustively check for honesty, as exemplified by\
-    \ the non-functional URLs in the preferred answers, which would have been easy\
-    \ to verify [[Section 2.1]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.1).\n"
+        \ can help make you happier and brighten your personality.", rejected: "\n\n\
+        Human: How do I learn to be more appreciative?\n\nAssistant: If you’d like\
+        \ to learn to be more appreciative, then I think it would be helpful to\
+        \ focus on acknowledging the positive. Rather than assuming that the positive\
+        \ things in your life don’t matter, it would benefit you to recognize that\
+        \ good things in your life both deserve and require our attention. If, for\
+        \ instance, you are becoming closer to a loved one, it’s important to acknowledge\
+        \ the positive effects of this relationship so that you (and the relationship\
+        \ itself) both receive the emotional and psychological benefits of this\
+        \ positive experience. Being more appreciative can also benefit you in practical\
+        \ ways by increasing the good feelings associated with positive experiences,\
+        \ and assisting you in making effective life choices that benefit you, your\
+        \ relationships, and your overall wellbeing."}
+  analysis: >
+    The authors found that the crowdworkers didn't exhaustively check for
+    honesty in the model answers they preferred
+    [[Section 2.1]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.1).
+  # Construction
+  dependencies: [Anthropic Human Feedback Interface]
+  license:
+    value: MIT
+    explanation: >
+      The dataset was released on GitHub under MIT
+      [[hh-rlhf]](https://github.com/anthropics/hh-rlhf).
+  included: >
+    The dataset included all the answers that the workers were asked to
+    ranked against each other.
+  excluded: none
+  quality_control: >
+    Working with a screened set of crowdworkers, and employing simple data
+    quality measures
+    [[Appendix D]](https://arxiv.org/pdf/2204.05862.pdf#appendix.D).
+  # Downstream
+  access:
+    value: open
+    explanation: >
+      The dataset is freely shared on GitHub
+      [[hh-rlhf]](https://github.com/anthropics/hh-rlhf).
+  intended_uses: >
+    The dataset was intended and released for research purposes.
+  prohibited_uses:
+    value: unknown
+    explanation: >
+      The prohibited use cases of the dataset weren't specified.
+  monitoring:
+    value: none
+    explanation: >
+      There is no monitoring process for the usages of the dataset.
+  feedback: >
+    Email the authors
+    [[Paper]](https://arxiv.org/pdf/2204.05862.pdf).
+
+- type: dataset
+  name: Anthropic Harmlessness dataset
+  # General
+  organization: Anthropic
+  description: >
+    One of the datasets used to train Anthropic RLHF models. The dataset was
+    collected by asking crowdworkers to have open-ended conversations with
+    Anthropic models, aiming to elicit harmful responses, then choose the model
+    answer that was more harmful for their given task, via the Anthropic Human
+    Feedback Interface
+    [[Section 2.2]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.2).
   created_date:
-    explanation: "The date that the paper was uploaded to ArXiv [[ArXiv Abstract]](https://arxiv.org/abs/2204.05862).\n"
     value: 2022-04-12
+    explanation: >
+      The date that the paper was uploaded to ArXiv
+      [[ArXiv Abstract]](https://arxiv.org/abs/2204.05862).
+  url: https://arxiv.org/pdf/2204.05862.pdf
   datasheet: none
+  modality: Text (English)
+  size: unknown
+  sample: []
+  analysis: >
+    The crodworkers were told that "lying isn't helpful" and asked to prefer
+    honest responses, which led to models with higher honesty scores. That being
+    the workers didn't exhaustively check for honesty, as exemplified by the
+    non-functional URLs in the preferred answers, which would have been easy to
+    verify
+    [[Section 2.1]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.1).
+  # Construction
   dependencies: [Anthropic Human Feedback Interface]
-  description: "One of the datasets used to train Anthropic RLHF models. The dataset\
-    \ was collected by asking crowdworkers to have open-ended conversations with\
-    \ Anthropic models, aiming to elicit harmful responses, then choose the model\
-    \ answer that was more harmful for their given task, via the Anthropic Human\
-    \ Feedback Interface [[Section 2.2]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.2).\n"
-  excluded: none
-  feedback:
-    explanation: "The feedback mechanism is unknown since the dataset isn't released,\
-      \ but it will likely be \"emailing the authors\" [[Paper]](https://arxiv.org/pdf/2204.05862.pdf).\n"
+  license:
     value: unknown
-  included: "The dataset included all the answers that the workers were asked to\
-    \ ranked against each other.\n"
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  included: >
+    The dataset included all the answers that the workers were asked to
+    ranked against each other.
+  excluded: none
+  quality_control: >
+    Working with "select" crowdworkers or those screened for certain
+    qualifications, and employing simple data quality measures
+    [[Appendix D]](https://arxiv.org/pdf/2204.05862.pdf#appendix.D).
+  # Downstream
+  access:
+    value: closed
+    explanation: >
+      Unlike its helpfulness, this dataset isn't released
+      [[hh-rlhf]](https://github.com/anthropics/hh-rlhf).
   intended_uses:
-    explanation: "The intended uses of the dataset isn't specified, but it will\
-      \ likely be same as the helpfulness dataset, for which the intended use is\
-      \ research.\n"
     value: unknown
-  license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
+    explanation: >
+      The intended uses of the dataset isn't specified, but it will likely be
+      same as the helpfulness dataset, for which the intended use is research.
+  prohibited_uses:
     value: unknown
-  modality:
-    explanation: Text (English)
-    value: text
+    explanation: >
+      The prohibited use cases of the dataset weren't specified.
   monitoring:
-    explanation: "There is no monitoring process for the usages of the dataset.\n"
     value: none
-  name: Anthropic Harmlessness dataset
-  organization: Anthropic
-  prohibited_uses:
-    explanation: "The prohibited use cases of the dataset weren't specified.\n"
+    explanation: >
+      There is no monitoring process for the usages of the dataset.
+  feedback:
     value: unknown
-  quality_control: "Working with \"select\" crowdworkers or those screened for certain\
-    \ qualifications, and employing simple data quality measures [[Appendix D]](https://arxiv.org/pdf/2204.05862.pdf#appendix.D).\n"
-  sample: []
-  size: unknown
-  type: dataset
-  url: https://arxiv.org/pdf/2204.05862.pdf
-- access:
-    explanation: "The model wasn't released.\n"
-    value: closed
-  analysis:
-    explanation: "When tested on ability to generate responses that are both harmless\
-      \ and helpful, the authors found that PMs trained on a mixture of harmlessness\
-      \ and helpfulness preference datasets perform better than those trained on\
-      \ only one of the two. The authors also showed that the model performance\
-      \ is hurt for the smaller models, but improved for the larger models when\
-      \ compared to plain language models [[Figure 28]](https://arxiv.org/pdf/2204.05862.pdf#figure.caption.34)\
-      \ [[Figure 29]](https://arxiv.org/pdf/2204.05862.pdf#figure.caption.35). The\
-      \ authors also found the larger RLHF models to be more truthful than their\
-      \ plain counterparts. They also found that their models had a more positive\
-      \ sentiment towards groups that the language models are known to be biased\
-      \ against [[Section 4.6]](https://arxiv.org/pdf/2204.05862.pdf#subsection.4.6).\n"
-    value: "The authors analyzed the impact of the dataset mixture on the preference\
-      \ models (PM). In addition to human evaluation, RLHF model were evaluated\
-      \ on MMLU, Lambada, HellaSwag, OpenBookQA, ARC-Easy, ARC-Challenge, TriviaQA,\
-      \ code generation, summarization.\n"
+    explanation: >
+      The feedback mechanism is unknown since the dataset isn't released, but
+      it will likely be "emailing the authors"
+      [[Paper]](https://arxiv.org/pdf/2204.05862.pdf).
+
+- type: model
+  name: Anthropic RLHF models
+  # General
+  organization: Anthropic
+  description: >
+    Anthropic RLHF models are models trained using reinforcement learning
+    from human feedback (RLHF). For Anthropic RLHF models, authors started with
+    a set of base models, and asked humans to rank model generated prompts based
+    on a specific tasks. They then trained preference models (PM) on the
+    prompt pairs, and use the PM scores as rewards for training the RLHF
+    models.
   created_date:
-    explanation: "The date that the paper was uploaded to ArXiv [[ArXiv Abstract]](https://arxiv.org/abs/2204.05862).\n"
     value: 2022-04-12
+    explanation: >
+      The date that the paper was uploaded to ArXiv
+      [[ArXiv Abstract]](https://arxiv.org/abs/2204.05862).
+  url: https://arxiv.org/pdf/2204.05862.pdf
+  model_card: none
+  modality: text (English) and Code
+  size:
+    value: 52B parameters (dense model)
+    explanation: >
+      There are several sizes of the RLHF models, the largest one having 52B
+      parameters
+      [[Figure 1]](https://arxiv.org/pdf/2204.05862.pdf#figure.caption.2).
+  analysis:
+    value: >
+      The authors analyzed the impact of the dataset mixture on the preference
+      models (PM). In addition to human evaluation, RLHF model were evaluated on
+      MMLU, Lambada, HellaSwag, OpenBookQA, ARC-Easy, ARC-Challenge, TriviaQA,
+      code generation, summarization.
+    explanation: >
+      When tested on ability to generate responses that are both harmless and
+      helpful, the authors found that PMs trained on a mixture of harmlessness
+      and helpfulness preference datasets perform better than those trained
+      on only one of the two.
+      The authors also showed that the model performance is hurt for the smaller
+      models, but improved for the larger models when compared to plain
+      language models
+      [[Figure 28]](https://arxiv.org/pdf/2204.05862.pdf#figure.caption.34)
+      [[Figure 29]](https://arxiv.org/pdf/2204.05862.pdf#figure.caption.35).
+      The authors also found the larger RLHF models to be more truthful than
+      their plain counterparts. They also found that their models
+      had a more positive sentiment towards groups that the language models are
+      known to be biased against
+      [[Section 4.6]](https://arxiv.org/pdf/2204.05862.pdf#subsection.4.6).
+  # Construction
   dependencies:
     - Anthropic Harmlessness dataset
     - Anthropic Helpfulness dataset
-  description: "Anthropic RLHF models are models trained using reinforcement learning\
-    \ from human feedback (RLHF). For Anthropic RLHF models, authors started with\
-    \ a set of base models, and asked humans to rank model generated prompts based\
-    \ on a specific tasks. They then trained preference models (PM) on the prompt\
-    \ pairs, and use the PM scores as rewards for training the RLHF models.\n"
-  feedback:
-    explanation: "The feedback mechanism is unknown, but it will likely be \"emailing\
-      \ the authors\" [[Paper]](https://arxiv.org/pdf/2204.05862.pdf).\n"
+  training_emissions:
     value: unknown
-  intended_uses:
-    explanation: "The intended uses of the model aren't specified, but they will\
-      \ likely be the same as the released Anthropic helpfulness dataset, for which\
-      \ the intended use was listed as research. In addition to English language\
-      \ generation, the authors also explored code generation capabilities of their\
-      \ models.\n"
+    explanation: The authors do not provide data on the training emissions.
+  training_time:
     value: unknown
+    explanation: The authors do not provide data on the training time.
+  training_hardware:
+    value: unknown
+    explanation: The authors do not provide data on the training hardware.
+  quality_control: >
+    unknown
+  # Downstream
+  access:
+    value: closed
+    explanation: >
+      The model wasn't released.
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: text (English) and Code
-    value: code, text; code, text
-  model_card: none
-  monitoring:
-    explanation: "There is no known monitoring process for the usages of the model.\n"
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses:
     value: unknown
-  name: Anthropic RLHF models
-  organization: Anthropic
+    explanation: >
+      The intended uses of the model aren't specified, but they will likely be
+      the same as the released Anthropic helpfulness dataset, for which the
+      intended use was listed as research. In addition to English language
+      generation, the authors also explored code generation capabilities of
+      their models.
   prohibited_uses:
-    explanation: "The prohibited use cases of the model weren't specified.\n"
     value: unknown
-  quality_control: "unknown\n"
-  size: 52B parameters (dense)
-  training_emissions:
-    explanation: The authors do not provide data on the training emissions.
-    value: unknown
-  training_hardware:
-    explanation: The authors do not provide data on the training hardware.
+    explanation: >
+      The prohibited use cases of the model weren't specified.
+  monitoring:
     value: unknown
-  training_time:
-    explanation: The authors do not provide data on the training time.
+    explanation: >
+      There is no known monitoring process for the usages of the model.
+  feedback:
     value: unknown
-  type: model
-  url: https://arxiv.org/pdf/2204.05862.pdf
-- access:
-    explanation: "The interface was made available to a select set of crowd workers.\n"
-    value: closed
-  adaptation: none
+    explanation: >
+      The feedback mechanism is unknown, but it will likely be "emailing the
+      authors"
+      [[Paper]](https://arxiv.org/pdf/2204.05862.pdf).
+
+- type: application
+  name: Anthropic Human Feedback Interface
+  # General
+  organization: Anthropic
+  description: >
+    The feedback interface used to collect preference datasets to train
+    Anthropic RLHF models
+    [[Paper]](https://arxiv.org/pdf/2204.05862.pdf).
   created_date:
-    explanation: "The date that the paper was uploaded to ArXiv [[ArXiv Abstract]](https://arxiv.org/abs/2204.05862).\n"
     value: 2022-04-12
+    explanation: >
+      The date that the paper was uploaded to ArXiv
+      [[ArXiv Abstract]](https://arxiv.org/abs/2204.05862).
+  url: https://arxiv.org/pdf/2204.05862.pdf
+  # Construction
   dependencies: [Anthropic RLHF models]
-  description: "The feedback interface used to collect preference datasets to train\
-    \ Anthropic RLHF models [[Paper]](https://arxiv.org/pdf/2204.05862.pdf).\n"
-  failures: unknown
-  feedback: unknown
-  intended_uses: "Intended to be used by crowdworkers who are tasked with ranking\
-    \ model answers.\n"
-  license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
-    value: unknown
-  monitoring: unknown
-  monthly_active_users: unknown
-  name: Anthropic Human Feedback Interface
-  organization: Anthropic
-  output_space: "UI allowing users to indicate their preference for the model responses\
-    \ shown.\n"
-  prohibited_uses: unknown
+  adaptation: none
+  output_space: >
+    UI allowing users to indicate their preference for the model responses
+    shown.
   quality_control: unknown
+  # Downstream
+  access:
+    value: closed
+    explanation: >
+      The interface was made available to a select set of crowd workers.
   terms_of_service:
+    value: unknown
     explanation: The terms of service for the interaface is unknown.
+  license:
     value: unknown
-  type: application
-  url: https://arxiv.org/pdf/2204.05862.pdf
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: >
+    Intended to be used by crowdworkers who are tasked with ranking model
+    answers.
+  prohibited_uses: unknown
+  monitoring: unknown
+  feedback: unknown
+  # Deployment
+  monthly_active_users: unknown
   user_distribution:
-    explanation: The demographics of the crowdworkers are unknown.
     value: crowdworkers
-- access: limited
-  adaptation: ''
-  created_date: 2023-03-14
-  dependencies: [Claude, Claude Instant]
+    explanation: The demographics of the crowdworkers are unknown.
+  failures: unknown
+
+- type: application
+  name: Anthropic API
+  organization: Anthropic
   description: API is designed to be a backend that incorporates Claude into any
     application you’ve developed. Our application sends text to our API, then receives
     a response via server-sent events, a streaming protocol for the web.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
-  license:
-    explanation: No license was found.
-    value: none
-  monitoring: ''
-  monthly_active_users: ''
-  name: Anthropic API
-  organization: Anthropic
+  created_date: 2023-03-14
+  url: https://console.anthropic.com/docs/api
+  dependencies: [Claude, Claude Instant]
+  adaptation: ''
   output_space: ''
-  prohibited_uses: ''
   quality_control: ''
+  access: limited
+  license:
+    value: none
+    explanation: No license was found.
   terms_of_service: ''
-  type: application
-  url: https://console.anthropic.com/docs/api
-  user_distribution: ''
-- access: limited
-  analysis: ''
-  created_date: 2023-03-14
-  dependencies: []
-  description: ''
-  feedback: ''
   intended_uses: ''
-  license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
-    value: unknown
-  modality:
-    explanation: text
-    value: text; text
-  model_card: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
+  user_distribution: ''
+  failures: ''
+
+- type: model
   name: Claude
   organization: Anthropic
-  prohibited_uses: ''
-  quality_control: ''
-  size: unknown
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
+  description: ''
+  created_date: 2023-03-14
   url: https://www.anthropic.com/index/introducing-claude
-- access: limited
+  model_card: ''
+  modality: text
   analysis: ''
-  created_date: 2023-03-14
+  size: unknown
   dependencies: []
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: limited
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: text
-    value: text; text
-  model_card: ''
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: Claude Instant
   organization: Anthropic
-  prohibited_uses: ''
-  quality_control: ''
+  description: ''
+  created_date: 2023-03-14
+  url: https://www.anthropic.com/index/introducing-claude
+  model_card: ''
+  modality: text
+  analysis: ''
   size: unknown
+  dependencies: []
   training_emissions: ''
-  training_hardware: ''
   training_time: ''
-  type: model
-  url: https://www.anthropic.com/index/introducing-claude
-- access: open
+  training_hardware: ''
+  quality_control: ''
+  access: limited
+  license:
+    value: unknown
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+
+- type: model
+  name: Claude 2
+  organization: Anthropic
+  description: Claude 2 is a more evolved and refined version of Claude, which is
+    a general purpose large language model using a transformer architecture and
+    trained via unsupervised learning.
+  created_date: 2023-07-11
+  url: https://www.anthropic.com/index/claude-2
+  model_card: https://www-files.anthropic.com/production/images/Model-Card-Claude-2.pdf
+  modality: text
   analysis: Evaluated with human feedback on helpfulness, harmfulness, and honesty
     and on the Bias Benchmark for QA.
-  created_date: 2023-07-11
+  size: ''
   dependencies:
     - Claude human feedback data
     - Unknown licensed third party datasets
-  description: Claude 2 is a more evolved and refined version of Claude, which is
-    a general purpose large language model using a transformer architecture and
-    trained via unsupervised learning.
-  feedback: ''
+  training_emissions: ''
+  training_time: ''
+  training_hardware: unknown
+  quality_control: ''
+  access: open
+  license: ''
   intended_uses: Claude 2 tends to perform well at general, open-ended conversation;
     search, writing, editing, outlining, and summarizing text; coding; and providing
     helpful advice about a broad range of subjects. Claude 2 is particularly well
     suited to support creative or literary use cases. They can take direction on
     tone and “personality,” and users have described them as feeling steerable and
     conversational.
-  license: ''
-  modality:
-    explanation: text
-    value: text; text
-  model_card: https://www-files.anthropic.com/production/images/Model-Card-Claude-2.pdf
-  monitoring: ''
-  name: Claude 2
-  organization: Anthropic
   prohibited_uses: Claude 2 should not be used on their own in high stakes situations
     where an incorrect answer would cause harm.
-  quality_control: ''
-  size: ''
-  training_emissions: ''
-  training_hardware: unknown
-  training_time: ''
-  type: model
-  url: https://www.anthropic.com/index/claude-2
+  monitoring: ''
+  feedback: ''
diff --git a/assets/argonne.yaml b/assets/argonne.yaml
index 049fa897..addd0a35 100644
--- a/assets/argonne.yaml
+++ b/assets/argonne.yaml
@@ -1,24 +1,23 @@
 ---
-- access: open
-  analysis: ''
-  created_date: 2022-10-11
-  dependencies: [SARS-CoV-2 genome dataset, BV-BRC dataset]
-  description: ''
-  feedback: ''
-  intended_uses: ''
-  license: MIT
-  modality:
-    explanation: genome sequence
-    value: text; text
-  model_card: ''
-  monitoring: ''
+
+- type: model
   name: GenSLM
   organization: Argonne National Laboratory
-  prohibited_uses: ''
-  quality_control: ''
-  size: 25B parameters (dense)
+  description: ''
+  created_date: 2022-10-11
+  url: https://www.biorxiv.org/content/10.1101/2022.10.10.511571v1
+  model_card: ''
+  modality: genome sequence
+  analysis: ''
+  size: 25B parameters (dense model)
+  dependencies: [SARS-CoV-2 genome dataset, BV-BRC dataset]
   training_emissions: ''
-  training_hardware: ''
   training_time: ''
-  type: model
-  url: https://www.biorxiv.org/content/10.1101/2022.10.10.511571v1
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license: MIT
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/assembly.yaml b/assets/assembly.yaml
index 61848cc2..197b634e 100644
--- a/assets/assembly.yaml
+++ b/assets/assembly.yaml
@@ -1,35 +1,48 @@
 ---
-- access: closed
-  analysis: ''
+
+- type: dataset
+  name: Conformer-1 dataset
+  organization: AssemblyAI
+  description: The dataset used to train AssemblyAI's Conformer-1 model.
   created_date:
-    explanation: The date Conformer-1 was announced in this [[Twitter post]](https://twitter.com/AssemblyAI/status/1636050346240884744).
     value: 2023-03-15
+    explanation: The date Conformer-1 was announced in this [[Twitter post]](https://twitter.com/AssemblyAI/status/1636050346240884744).
+  url: https://www.assemblyai.com/blog/conformer-1/
   datasheet: none
+  modality: speech (English), text
+  size: 650K hours audio (60TB)
+  sample: []
+  analysis: ''
   dependencies: []
-  description: The dataset used to train AssemblyAI's Conformer-1 model.
+  included: >
+    "our team curated a dataset of 650K hours of English audio - consisting
+    of proprietary internal datasets and various sources from the internet"
   excluded: ''
-  feedback: ''
-  included: "\"our team curated a dataset of 650K hours of English audio - consisting\
-    \ of proprietary internal datasets and various sources from the internet\"\n"
-  intended_uses: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: speech (English), text
-    value: audio, text
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: Conformer-1 dataset
+  feedback: ''
+
+- type: model
+  name: Conformer-1
   organization: AssemblyAI
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: 650K hours audio (60TB)
-  type: dataset
+  description: Conformer-1 is a state-of-the-art speech recognition model trained
+    on 650K hours of audio data that achieves near human-level performance and robustness
+    across a variety of data, making up to 43% fewer errors on noisy data than other
+    ASR models.
+  created_date:
+    value: 2023-03-15
+    explanation: The date the model was announced in this [[Twitter post]](https://twitter.com/AssemblyAI/status/1636050346240884744).
   url: https://www.assemblyai.com/blog/conformer-1/
-- access: limited
+  model_card: ''
+  modality: speech (English), text
   analysis:
-    explanation: See [[Results and Performance]](https://www.assemblyai.com/blog/conformer-1/#results-and-performance)
     value: In order to evaluate the accuracy and robustness of Conformer-1, we sourced
       60+ hours of human labeled audio data covering popular speech domains such
       as call centers, podcasts, broadcasts, and webinars. We then calculated the
@@ -37,84 +50,76 @@
       the results against Whisper and a number of other ASR models. To ground our
       results against popular open source speech recognition benchmarks, we also
       performed the same WER analysis against a number of academic datasets.
-  created_date:
-    explanation: The date the model was announced in this [[Twitter post]](https://twitter.com/AssemblyAI/status/1636050346240884744).
-    value: 2023-03-15
+    explanation: See [[Results and Performance]](https://www.assemblyai.com/blog/conformer-1/#results-and-performance)
+  size: 300M parameters
   dependencies: [Conformer-1 dataset]
-  description: Conformer-1 is a state-of-the-art speech recognition model trained
-    on 650K hours of audio data that achieves near human-level performance and robustness
-    across a variety of data, making up to 43% fewer errors on noisy data than other
-    ASR models.
-  feedback: ''
-  intended_uses: ''
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: limited
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: speech (English), text
-    value: audio, text; audio, text
-  model_card: ''
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: Conformer-1
+  feedback: ''
+
+- type: application
+  name: AssemblyAI
   organization: AssemblyAI
-  prohibited_uses: ''
-  quality_control: ''
-  size: 300M parameters (dense)
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
-  url: https://www.assemblyai.com/blog/conformer-1/
-- access: limited
-  adaptation: ''
-  created_date: 2022
-  dependencies: [Anthropic API]
   description: AssemblyAI uses Claude and Anthropic's model to transcribe and understand
     audio data at scale.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
+  created_date: 2022
+  url: https://www.assemblyai.com/
+  dependencies: [Anthropic API]
+  adaptation: ''
+  output_space: ''
+  quality_control: ''
+  access: limited
   license:
-    explanation: "\"AssemblyAI gives you a personal, worldwide, royalty-free, non-assignable\
-      \ and non-exclusive license to use the Site and Services solely for your reasonable\
-      \ personal and commercial purposes, in the manner permitted by these Terms.\"\
-      \ - excerpt from the Terms of Service.\n"
     value: custom
+    explanation: >
+      "AssemblyAI gives you a personal, worldwide, royalty-free,
+      non-assignable and non-exclusive license to use the Site and Services
+      solely for your reasonable personal and commercial purposes, in the
+      manner permitted by these Terms." - excerpt from the Terms of Service.
+  terms_of_service: https://www.assemblyai.com/legal/terms-of-service
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
   monthly_active_users: ''
-  name: AssemblyAI
-  organization: AssemblyAI
-  output_space: ''
-  prohibited_uses: ''
-  quality_control: ''
-  terms_of_service: https://www.assemblyai.com/legal/terms-of-service
-  type: application
-  url: https://www.assemblyai.com/
   user_distribution: ''
-- access: open
-  adaptation: ''
+  failures: ''
+
+- type: application
+  name: Conformer-1 API
+  organization: AssemblyAI
+  description: API to access the AssemblyAI's Conformer-1 model.
   created_date:
-    explanation: The date the API was announced in this [[Twitter post]](https://twitter.com/AssemblyAI/status/1636050346240884744).
     value: 2023-03-15
+    explanation: The date the API was announced in this [[Twitter post]](https://twitter.com/AssemblyAI/status/1636050346240884744).
+  url: https://www.assemblyai.com/blog/conformer-1/
   dependencies: [Conformer-1]
-  description: API to access the AssemblyAI's Conformer-1 model.
-  failures: ''
-  feedback: ''
-  intended_uses: Speech recognition
+  adaptation: ''
+  output_space: ''
+  quality_control: ''
+  access: open
   license:
-    explanation: "\"AssemblyAI gives you a personal, worldwide, royalty-free, non-assignable\
-      \ and non-exclusive license to use the Site and Services solely for your reasonable\
-      \ personal and commercial purposes, in the manner permitted by these Terms.\"\
-      \ - excerpt from the Terms of Service.\n"
     value: custom
+    explanation: >
+      "AssemblyAI gives you a personal, worldwide, royalty-free,
+      non-assignable and non-exclusive license to use the Site and Services
+      solely for your reasonable personal and commercial purposes, in the
+      manner permitted by these Terms." - excerpt from the Terms of Service.
+  terms_of_service: https://www.assemblyai.com/legal/terms-of-service
+  intended_uses: Speech recognition
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
   monthly_active_users: ''
-  name: Conformer-1 API
-  organization: AssemblyAI
-  output_space: ''
-  prohibited_uses: ''
-  quality_control: ''
-  terms_of_service: https://www.assemblyai.com/legal/terms-of-service
-  type: application
-  url: https://www.assemblyai.com/blog/conformer-1/
   user_distribution: ''
+  failures: ''
diff --git a/assets/autogpt.yaml b/assets/autogpt.yaml
index 34a3ef5f..5a3a040f 100644
--- a/assets/autogpt.yaml
+++ b/assets/autogpt.yaml
@@ -1,22 +1,23 @@
 ---
-- access: open
-  adaptation: GPT-4 adapted to run autonomously by chaining together LLM "thoughts"
-  created_date: 2023-04-16
-  dependencies: [GPT-4 API]
-  description: Auto-GPT is an experimental open-source application showcasing the
-    capabilities of the GPT-4 language model.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
-  license: MIT
-  monitoring: ''
-  monthly_active_users: ''
+
+- type: application
   name: Auto-GPT
   organization: Auto-GPT
+  description: Auto-GPT is an experimental open-source application showcasing the
+    capabilities of the GPT-4 language model.
+  created_date: 2023-04-16
+  url: https://news.agpt.co/
+  dependencies: [GPT-4 API]
+  adaptation: GPT-4 adapted to run autonomously by chaining together LLM "thoughts"
   output_space: text
-  prohibited_uses: ''
   quality_control: ''
+  access: open
+  license: MIT
   terms_of_service: ''
-  type: application
-  url: https://news.agpt.co/
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
   user_distribution: ''
+  failures: ''
diff --git a/assets/baai.yaml b/assets/baai.yaml
index e90846c6..ab838a1d 100644
--- a/assets/baai.yaml
+++ b/assets/baai.yaml
@@ -1,57 +1,68 @@
 ---
-- access: closed
-  analysis: ''
+
+- type: dataset
+  name: Wu Dao dataset
+  # General
+  organization: Beijing Academy of Artificial Intelligence
+  description: ''
   created_date:
-    explanation: "The date that BAAI made a public announcement [[News Link]](https://mp.weixin.qq.com/s/BUQWZ5EdR19i40GuFofpBg).\n"
     value: 2021-01-12
+    explanation: >
+      The date that BAAI made a public announcement
+      [[News Link]](https://mp.weixin.qq.com/s/BUQWZ5EdR19i40GuFofpBg).
+  url: https://www.tsinghua.edu.cn/en/info/1420/10473.htm
   datasheet: ''
+  modality: text (Chinese, English) and image
+  size: unknown
+  sample: []
+  analysis: ''
+  # Construction
   dependencies: []
-  description: ''
-  excluded: ''
-  feedback: ''
-  included: ''
-  intended_uses: ''
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: text (Chinese, English) and image
-    value: image, text
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  included: ''
+  excluded: ''
+  quality_control: ''
+  # Downstream
+  access: closed
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: Wu Dao dataset
+  feedback: ''
+
+- type: model
+  name: Wu Dao 2.0
+  # General
   organization: Beijing Academy of Artificial Intelligence
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: unknown
-  type: dataset
-  url: https://www.tsinghua.edu.cn/en/info/1420/10473.htm
-- access:
-    explanation: "The model wasn't released.\n"
-    value: closed
-  analysis: ''
+  description: ''
   created_date:
-    explanation: "The date that BAAI made a public announcement [[News Link]](https://mp.weixin.qq.com/s/BUQWZ5EdR19i40GuFofpBg).\n"
     value: 2021-01-12
+    explanation: >
+      The date that BAAI made a public announcement
+      [[News Link]](https://mp.weixin.qq.com/s/BUQWZ5EdR19i40GuFofpBg).
+  url: https://www.tsinghua.edu.cn/en/info/1420/10473.htm
+  model_card: ''
+  modality: text (Chinese, English) and image
+  size: 1.75T parameters (dense model)
+  analysis: ''
+  # Construction
   dependencies: [Wu Dao dataset]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  # Downstream
+  access:
+    value: closed
+    explanation: >
+      The model wasn't released.
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: text (Chinese, English) and image
-    value: image, text; image, text
-  model_card: ''
-  monitoring: ''
-  name: Wu Dao 2.0
-  organization: Beijing Academy of Artificial Intelligence
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
   prohibited_uses: ''
-  quality_control: ''
-  size: 1.75T parameters (dense)
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
-  url: https://www.tsinghua.edu.cn/en/info/1420/10473.htm
+  monitoring: ''
+  feedback: ''
diff --git a/assets/baidu.yaml b/assets/baidu.yaml
index bed1db91..c6386cb1 100644
--- a/assets/baidu.yaml
+++ b/assets/baidu.yaml
@@ -1,90 +1,89 @@
 ---
-- access:
-    explanation: Baidu does not provide ERNIE 3.0 for external researchers
-    value: closed
-  analysis: ''
+
+- type: model
+  name: ERNIE 3.0 Titan
+  organization: Baidu, PengCheng Laboratory
+  description: ERNIE 3.0 Titan is a language model
   created_date:
-    explanation: The date the model paper was released
     value: 2021-12-23
+    explanation: The date the model paper was released
+  url: https://arxiv.org/abs/2112.12731
+  model_card: none
+  modality: text (Chinese)
+  analysis: ''
+  size: 260B parameters (dense model)
   dependencies: []
-  description: ERNIE 3.0 Titan is a language model
-  feedback: none
-  intended_uses: unknown
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: Baidu V100 Cluster, PengCheng Lab Ascend 910 NPU cluster
+  quality_control: none
+  access:
+    value: closed
+    explanation: Baidu does not provide ERNIE 3.0 for external researchers
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: text (Chinese)
-    value: text; text
-  model_card: none
-  monitoring: none
-  name: ERNIE 3.0 Titan
-  organization: Baidu, PengCheng Laboratory
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: unknown
   prohibited_uses: unknown
-  quality_control: none
-  size: 260B parameters (dense)
+  monitoring: none
+  feedback: none
+
+- type: model
+  name: ERNIE-ViLG
+  organization: Baidu
+  description: ERNIE-ViLG is a model for text-to-image generation
+  created_date:
+    value: 2021-12-31
+    explanation: The date the model paper was released
+  url: https://arxiv.org/abs/2112.15283
+  model_card: none
+  modality: text (Chinese) and image
+  size: 10B parameters
+  analysis: ''
+  dependencies: []
   training_emissions: unknown
-  training_hardware: Baidu V100 Cluster, PengCheng Lab Ascend 910 NPU cluster
   training_time: unknown
-  type: model
-  url: https://arxiv.org/abs/2112.12731
-- access:
+  training_hardware: unknown
+  quality_control: ''
+  access:
+    value: limited
     explanation: API access is available through PaddleHub. Instructions on [[Github
       repository]](https://github.com/PaddlePaddle/PaddleHub/blob/develop/modules/image/text_to_image/ernie_vilg/README.md)
-    value: limited
-  analysis: ''
-  created_date:
-    explanation: The date the model paper was released
-    value: 2021-12-31
-  dependencies: []
-  description: ERNIE-ViLG is a model for text-to-image generation
-  feedback: none
-  intended_uses: unknown
   license:
-    explanation: "No explicit license for the ERNIE-ViLG model is provided in the\
-      \ [[Github repository]](https://github.com/PaddlePaddle/PaddleHub)\n"
     value: none
-  modality:
-    explanation: text (Chinese) and image
-    value: image, text; image, text
-  model_card: none
+    explanation: >
+      No explicit license for the ERNIE-ViLG model is provided in the [[Github repository]](https://github.com/PaddlePaddle/PaddleHub)
+  intended_uses: unknown
+  prohibited_uses: unknown
   monitoring: none
-  name: ERNIE-ViLG
+  feedback: none
+
+- type: model
+  name: ERNIE-ViLG 2.0
   organization: Baidu
-  prohibited_uses: unknown
-  quality_control: ''
-  size: 10B parameters (dense)
-  training_emissions: unknown
-  training_hardware: unknown
-  training_time: unknown
-  type: model
-  url: https://arxiv.org/abs/2112.15283
-- access:
-    explanation: Baidu does not provide ERNIE-ViLG 2.0 to external researchers
-    value: closed
-  analysis: ''
+  description: ERNIE-ViLG is a model for text-to-image generation
   created_date:
-    explanation: The date the model paper was released
     value: 2022-10-27
+    explanation: The date the model paper was released
+  url: https://arxiv.org/abs/2210.15257
+  model_card: none
+  modality: text (Chinese) and image
+  size: 10B parameters
+  analysis: ''
   dependencies: []
-  description: ERNIE-ViLG is a model for text-to-image generation
-  feedback: none
-  intended_uses: unknown
+  training_emissions: unknown
+  training_time: 18 days according to [[the paper]](https://arxiv.org/abs/2210.15257)
+  training_hardware: 320 A100 GPUs according to [[the paper]](https://arxiv.org/abs/2210.15257)
+  quality_control: ''
+  access:
+    value: closed
+    explanation: Baidu does not provide ERNIE-ViLG 2.0 to external researchers
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: text (Chinese) and image
-    value: image, text; image, text
-  model_card: none
-  monitoring: none
-  name: ERNIE-ViLG 2.0
-  organization: Baidu
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: unknown
   prohibited_uses: unknown
-  quality_control: ''
-  size: 10B parameters (dense)
-  training_emissions: unknown
-  training_hardware: 320 A100 GPUs according to [[the paper]](https://arxiv.org/abs/2210.15257)
-  training_time: 18 days according to [[the paper]](https://arxiv.org/abs/2210.15257)
-  type: model
-  url: https://arxiv.org/abs/2210.15257
+  monitoring: none
+  feedback: none
diff --git a/assets/bain.yaml b/assets/bain.yaml
index 10f57f76..c6f68c19 100644
--- a/assets/bain.yaml
+++ b/assets/bain.yaml
@@ -1,24 +1,25 @@
 ---
-- access: limited
-  adaptation: ''
-  created_date: 2023-02-21
-  dependencies: [ChatGPT API]
+
+- type: application
+  name: Bain Chat
+  organization: Bain
   description: With the alliance, Bain will combine its deep digital implementation
     capabilities and strategic expertise with OpenAI’s AI tools and platforms, including
     ChatGPT, to help its clients around the world identify and implement the value
     of AI to maximize business potential.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
-  license: unknown
-  monitoring: ''
-  monthly_active_users: ''
-  name: Bain Chat
-  organization: Bain
+  created_date: 2023-02-21
+  url: https://www.bain.com/vector-digital/partnerships-alliance-ecosystem/openai-alliance/
+  dependencies: [ChatGPT API]
+  adaptation: ''
   output_space: ''
-  prohibited_uses: ''
   quality_control: ''
+  access: limited
+  license: unknown
   terms_of_service: ''
-  type: application
-  url: https://www.bain.com/vector-digital/partnerships-alliance-ecosystem/openai-alliance/
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
   user_distribution: ''
+  failures: ''
diff --git a/assets/be_my_eyes.yaml b/assets/be_my_eyes.yaml
index c9a60174..19c1e9cc 100644
--- a/assets/be_my_eyes.yaml
+++ b/assets/be_my_eyes.yaml
@@ -1,22 +1,23 @@
 ---
-- access: limited
-  adaptation: ''
-  created_date: 2023-03-14
-  dependencies: [GPT-4 API]
-  description: The first-ever digital visual assistant powered by OpenAI’s new GPT-4
-    language model.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
-  license: unknown
-  monitoring: ''
-  monthly_active_users: ''
+
+- type: application
   name: Virtual Volunteer
   organization: Be My Eyes
+  description: The first-ever digital visual assistant powered by OpenAI’s new GPT-4
+    language model.
+  created_date: 2023-03-14
+  url: https://www.bemyeyes.com/blog/introducing-be-my-eyes-virtual-volunteer
+  dependencies: [GPT-4 API]
+  adaptation: ''
   output_space: ''
-  prohibited_uses: ''
   quality_control: ''
+  access: limited
+  license: unknown
   terms_of_service: ''
-  type: application
-  url: https://www.bemyeyes.com/blog/introducing-be-my-eyes-virtual-volunteer
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
   user_distribution: ''
+  failures: ''
diff --git a/assets/berkeley.yaml b/assets/berkeley.yaml
index 76756276..e8bab3a0 100644
--- a/assets/berkeley.yaml
+++ b/assets/berkeley.yaml
@@ -1,75 +1,72 @@
 ---
-- access: open
-  analysis: Evaluated in comparison with ChatGPT and Stanford Alpaca.
-  created_date: 2023-04-03
-  dependencies: [LLaMA, web-scraped dialogue data]
+
+- type: model
+  name: Koala
+  organization: Berkeley
   description: A relatively small chatbot trained by fine-tuning Meta’s LLaMA on
     dialogue data gathered from the web.
-  feedback: https://huggingface.co/TheBloke/koala-7B-GPTQ-4bit-128g/discussions
-  intended_uses: academic research
-  license: Apache 2.0
-  modality:
-    explanation: natural language text
-    value: text; text
+  created_date: 2023-04-03
+  url: https://bair.berkeley.edu/blog/2023/04/03/koala/
   model_card: https://huggingface.co/TheBloke/koala-7B-GPTQ-4bit-128g
-  monitoring: ''
-  name: Koala
-  organization: Berkeley
-  prohibited_uses: ''
-  quality_control: ''
-  size: 13B parameters (dense)
+  modality: natural language text
+  analysis: Evaluated in comparison with ChatGPT and Stanford Alpaca.
+  size: 13B parameters
+  dependencies: [LLaMA, web-scraped dialogue data]
   training_emissions: ''
-  training_hardware: 8 A100 GPUs
   training_time: 6 hours
-  type: model
-  url: https://bair.berkeley.edu/blog/2023/04/03/koala/
-- access: open
-  analysis: Evaluated using AST sub-tree matching technique and compared to other
-    models in terms of API functionality accuracy.
-  created_date: 2023-05-24
-  dependencies: [LLaMA, Gorilla document retriever]
-  description: Gorilla is a finetuned LLaMA-based model that surpasses the performance
-    of GPT-4 on writing API calls.
-  feedback: ''
-  intended_uses: In conjunction with a LLM to improve its capability for using API
-    calls.
+  training_hardware: 8 A100 GPUs
+  quality_control: ''
+  access: open
   license: Apache 2.0
-  modality:
-    explanation: outputs API from natural language input
-    value: other; other
-  model_card: ''
+  intended_uses: academic research
+  prohibited_uses: ''
   monitoring: ''
+  feedback: https://huggingface.co/TheBloke/koala-7B-GPTQ-4bit-128g/discussions
+
+- type: model
   name: Gorilla
   organization: Berkeley
-  prohibited_uses: ''
-  quality_control: No specific quality control is mentioned in model training, though
-    details on data processing and collection are provided in the paper.
-  size: 7B parameters (dense)
+  description: Gorilla is a finetuned LLaMA-based model that surpasses the performance
+    of GPT-4 on writing API calls.
+  created_date: 2023-05-24
+  url: https://arxiv.org/pdf/2305.15334v1.pdf
+  model_card: ''
+  modality: outputs API from natural language input
+  analysis: Evaluated using AST sub-tree matching technique and compared to other
+    models in terms of API functionality accuracy.
+  size: 7B parameters
+  dependencies: [LLaMA, Gorilla document retriever]
   training_emissions: ''
-  training_hardware: ''
   training_time: ''
-  type: model
-  url: https://arxiv.org/pdf/2305.15334v1.pdf
-- access: open
-  analysis: Evaluated on wide range of tasks using own evaluation benchmarks.
-  created_date: 2023-05-03
-  dependencies: [RedPajama]
-  description: OpenLlama is an open source reproduction of Meta's LLaMA model.
-  feedback: ''
-  intended_uses: ''
+  training_hardware: ''
+  quality_control: No specific quality control is mentioned in model training, though
+    details on data processing and collection are provided in the paper.
+  access: open
   license: Apache 2.0
-  modality:
-    explanation: text
-    value: text; text
-  model_card: ''
+  intended_uses: In conjunction with a LLM to improve its capability for using API
+    calls.
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: OpenLLaMA
   organization: Berkeley
-  prohibited_uses: ''
-  quality_control: ''
-  size: 17B parameters (dense)
+  description: OpenLlama is an open source reproduction of Meta's LLaMA model.
+  created_date: 2023-05-03
+  url: https://github.com/openlm-research/open_llama
+  model_card: ''
+  modality: text
+  analysis: Evaluated on wide range of tasks using own evaluation benchmarks.
+  size: 17B parameters
+  dependencies: [RedPajama]
   training_emissions: unknown
-  training_hardware: ''
   training_time: unknown
-  type: model
-  url: https://github.com/openlm-research/open_llama
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/bigcode.yaml b/assets/bigcode.yaml
index cb03bed0..4a5362ea 100644
--- a/assets/bigcode.yaml
+++ b/assets/bigcode.yaml
@@ -1,79 +1,76 @@
 ---
-- access: open
-  analysis: Tested on several benchmarks, most notably Python benchmark HumanEval.
-  created_date: 2023-05-09
-  dependencies: [The Stack]
+
+- type: model
+  name: StarCoder
+  organization: BigCode
   description: StarCoder is a Large Language Model for Code (Code LLM) trained on
     permissively licensed data from GitHub, including from 80+ programming languages,
     Git commits, GitHub issues, and Jupyter notebooks.
-  feedback: https://huggingface.co/bigcode/starcoder/discussions
-  intended_uses: With a Tech Assistant prompt and not as an instruction model given
-    training limitations.
-  license: Apache 2.0
-  modality:
-    explanation: code (80+ programming languages)
-    value: code; code
+  created_date: 2023-05-09
+  url: https://arxiv.org/pdf/2305.06161.pdf
   model_card: https://huggingface.co/bigcode/starcoder
-  monitoring: ''
-  name: StarCoder
-  organization: BigCode
-  prohibited_uses: ''
+  modality: code (80+ programming languages)
+  analysis: Tested on several benchmarks, most notably Python benchmark HumanEval.
+  size: 15.5B parameters
+  dependencies: [The Stack]
+  training_emissions: 16.68 tons of CO2eq
+  training_time: 2 days
+  training_hardware: 64 NVIDIA A100 GPUs
   quality_control: No specific quality control is mentioned in model training, though
     details on data processing and how the tokenizer was trained are provided in
     the paper.
-  size: 15.5B parameters (dense)
-  training_emissions: 16.68 tons of CO2eq
-  training_hardware: 64 NVIDIA A100 GPUs
-  training_time: 2 days
-  type: model
-  url: https://arxiv.org/pdf/2305.06161.pdf
-- access: open
-  analysis: Evaluated on MultiPL-E system benchmarks.
-  created_date: 2023-02-24
-  dependencies: [The Stack, BigCode Dataset]
-  description: Multilingual code model derived from findings of BigCode Project
-    analysis on Github stars' association to data quality.
-  feedback: ''
-  intended_uses: ''
+  access: open
   license: Apache 2.0
-  modality:
-    explanation: code
-    value: code; code
-  model_card: ''
+  intended_uses: With a Tech Assistant prompt and not as an instruction model given
+    training limitations.
+  prohibited_uses: ''
   monitoring: ''
+  feedback: https://huggingface.co/bigcode/starcoder/discussions
+
+- type: model
   name: SantaCoder
   organization: BigCode
-  prohibited_uses: ''
-  quality_control: ''
-  size: 1.1B parameters (dense)
+  description: Multilingual code model derived from findings of BigCode Project
+    analysis on Github stars' association to data quality.
+  created_date: 2023-02-24
+  url: https://arxiv.org/pdf/2301.03988.pdf
+  model_card: ''
+  modality: code
+  analysis: Evaluated on MultiPL-E system benchmarks.
+  size: 1.1B parameters
+  dependencies: [The Stack, BigCode Dataset]
   training_emissions: ''
-  training_hardware: 96 NVIDIA Tesla V100 GPUs
   training_time: 3.1 days
-  type: model
-  url: https://arxiv.org/pdf/2301.03988.pdf
-- access: open
-  analysis: Evaluated models trained on The Stack on HumanEval and MBPP and compared
-    against similarly-sized models.
-  created_date: 2022-11-20
-  datasheet: https://huggingface.co/datasets/bigcode/the-stack
-  dependencies: [GitHub]
-  description: The Stack is a 3.1 TB dataset consisting of permissively licensed
-    source code inteded for use in creating code LLMs.
-  excluded: ''
-  feedback: ''
-  included: ''
-  intended_uses: creating code LLMs
+  training_hardware: 96 NVIDIA Tesla V100 GPUs
+  quality_control: ''
+  access: open
   license: Apache 2.0
-  modality:
-    explanation: code (358 programming languages)
-    value: code
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: dataset
   name: The Stack
   organization: BigCode
-  prohibited_uses: ''
+  description: The Stack is a 3.1 TB dataset consisting of permissively licensed
+    source code inteded for use in creating code LLMs.
+  created_date: 2022-11-20
+  url: https://arxiv.org/pdf/2211.15533.pdf
+  datasheet: https://huggingface.co/datasets/bigcode/the-stack
+  modality: code (358 programming languages)
+  size: 3.1 TB
+  sample: []
+  analysis: Evaluated models trained on The Stack on HumanEval and MBPP and compared
+    against similarly-sized models.
+  dependencies: [GitHub]
+  included: ''
+  excluded: ''
   quality_control: allowed users whose data were part of The Stack's training data
     to opt-out
-  sample: []
-  size: 3.1 TB
-  type: dataset
-  url: https://arxiv.org/pdf/2211.15533.pdf
+  access: open
+  license: Apache 2.0
+  intended_uses: creating code LLMs
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/bigscience.yaml b/assets/bigscience.yaml
index ce03bf3f..21251e9d 100644
--- a/assets/bigscience.yaml
+++ b/assets/bigscience.yaml
@@ -1,148 +1,163 @@
 ---
-- access:
-    explanation: While not all of the ROOTS corpus is publicly accessible, the majority
-      is through [this form](https://docs.google.com/forms/d/e/1FAIpQLSdq50O1x4dkdGI4dwsmchFuNI0KCWEDiKUYxvd0r0_sl6FfAQ/viewform?usp=send_form)
-    value: open
-  analysis: https://huggingface.co/spaces/bigscience-data/roots-search
-  created_date: 2022-06-06
-  datasheet: https://huggingface.co/spaces/bigscience-data/bigscience-corpus
-  dependencies: []
+
+# Datasets
+- type: dataset
+  name: ROOTS
+  # General
+  organization: BigScience
   description: The Responsible Open-science Open-collaboration Text Sources (ROOTS)
     corpus, a 1.6TB dataset spanning 59 languages that was used to train the 176-billion-parameter
     BigScience Large Open-science Open-access Multilingual (BLOOM) language model.
-  excluded: unknown
-  feedback: https://huggingface.co/spaces/bigscience-data/roots-search/discussions
-  included: See section 2 of the paper.
-  intended_uses: To empower large-scale monolingual and multilingual modeling projects
-    with both the data and the processing tools, as well as stimulate research around
-    this large multilingual corpus.
+  created_date: 2022-06-06
+  url: https://openreview.net/forum?id=UoEw6KigkUn
+  datasheet: https://huggingface.co/spaces/bigscience-data/bigscience-corpus
+  modality: text (46 languages) and code (13 programming languages)
+  size: 1.6TB
+  sample: [https://huggingface.co/spaces/bigscience-data/roots-search]
+  analysis: https://huggingface.co/spaces/bigscience-data/roots-search
+  # Construction
+  dependencies: []
   license:
+    value: custom
     explanation: Each constituent subset of the dataset will be released under the
       license that applies to it. ([See individual dataset page for specific license
       information](https://hf.co/bigscience-data))
-    value: custom
-  modality:
-    explanation: text (46 languages) and code (13 programming languages)
-    value: code, text
-  monitoring: none
-  name: ROOTS
-  organization: BigScience
-  prohibited_uses: none
+  included: See section 2 of the paper.
+  excluded: unknown
   quality_control: Data collection involved merging and deduplicating searches to
     remove menus, HTML tags. Further, a quality improvement pipeline was implemented.
-  sample: [https://huggingface.co/spaces/bigscience-data/roots-search]
-  size: 1.6TB
-  type: dataset
-  url: https://openreview.net/forum?id=UoEw6KigkUn
-- access: open
-  analysis: See the paper.
+  # Downstream
+  access:
+    value: open
+    explanation: While not all of the ROOTS corpus is publicly accessible, the majority
+      is through [this form](https://docs.google.com/forms/d/e/1FAIpQLSdq50O1x4dkdGI4dwsmchFuNI0KCWEDiKUYxvd0r0_sl6FfAQ/viewform?usp=send_form)
+  intended_uses: To empower large-scale monolingual and multilingual modeling projects
+    with both the data and the processing tools, as well as stimulate research around
+    this large multilingual corpus.
+  prohibited_uses: none
+  monitoring: none
+  feedback: https://huggingface.co/spaces/bigscience-data/roots-search/discussions
+
+- type: dataset
+  name: P3
+  # General
+  organization: BigScience
+  description: The Public Pool of Prompts (P3) are prompts written in an unified
+    format use to train T0++.
   created_date: 2022-10-15
+  url: https://arxiv.org/pdf/2110.08207.pdf
   datasheet: https://huggingface.co/datasets/bigscience/P3
+  modality: text (English)
+  size: 2000 prompts
+  sample: [https://huggingface.co/datasets/bigscience/P3/viewer]
+  analysis: See the paper.
+  # Construction
   dependencies: []
-  description: The Public Pool of Prompts (P3) are prompts written in an unified
-    format use to train T0++.
-  excluded: We conservatively decided not to prompt datasets that contain potentially
-    harmful content (for instance, datasets built on social media content).
-  feedback: Point of Contact is [Victor Sanh](https://huggingface.co/datasets/bigscience/P3)
+  license: Apache 2.0
   included: The Public Pool of Prompts relies on the Hugging Face Dataset library.
     Any public dataset in the Datasets library can be prompted. We select the datasets
     that have at least one subset in English and excluded datasets containing (predominantly)
     non-natural language examples.
+  excluded: We conservatively decided not to prompt datasets that contain potentially
+    harmful content (for instance, datasets built on social media content).
+  quality_control: Data collection involved merging and deduplicating searches to
+    remove menus, HTML tags. Further, a quality improvement pipeline was implemented.
+  # Downstream
+  access: open
   intended_uses: Multitask finetuning of language models.
-  license: Apache 2.0
-  modality:
-    explanation: text (English)
-    value: text
+  prohibited_uses: none
   monitoring: none
-  name: P3
+  feedback: Point of Contact is [Victor Sanh](https://huggingface.co/datasets/bigscience/P3)
+
+- type: dataset
+  name: xP3
+  # General
   organization: BigScience
-  prohibited_uses: none
-  quality_control: Data collection involved merging and deduplicating searches to
-    remove menus, HTML tags. Further, a quality improvement pipeline was implemented.
-  sample: [https://huggingface.co/datasets/bigscience/P3/viewer]
-  size: 2000 prompts
-  type: dataset
-  url: https://arxiv.org/pdf/2110.08207.pdf
-- access: open
-  analysis: See the paper.
-  created_date: 2022-11-03
-  datasheet: https://huggingface.co/datasets/bigscience/xP3
-  dependencies: [P3]
   description: xP3 (Crosslingual Public Pool of Prompts) is a collection of prompts
     and datasets across 46 of languages & 16 NLP tasks. It is used for the training
     of BLOOMZ and mT0, multilingual language models capable of following human instructions
     in dozens of languages zero-shot.
+  created_date: 2022-11-03
+  url: https://arxiv.org/abs/2211.01786
+  datasheet: https://huggingface.co/datasets/bigscience/xP3
+  modality: text (46 languages) and code (13 programming languages)
+  size: 9.4GB
+  sample: [https://huggingface.co/datasets/bigscience/xP3/viewer]
+  analysis: See the paper.
+  # Construction
+  dependencies: [P3]
+  license: Apache 2.0
+  included: xP3 adds 28 multilingual datasets to P3 based on the P3 task taxonomy.
   excluded: We conservatively decided not to prompt datasets that contain potentially
     harmful content (for instance, datasets built on social media content).
-  feedback: Point of Contact is [Niklas Muennighoff](https://huggingface.co/datasets/bigscience/xP3)
-  included: xP3 adds 28 multilingual datasets to P3 based on the P3 task taxonomy.
+  quality_control: Data collection involved merging and deduplicating searches to
+    remove menus, HTML tags. Further, a quality improvement pipeline was implemented.
+  # Downstream
+  access: open
   intended_uses: Multitask finetuning of language models.
-  license: Apache 2.0
-  modality:
-    explanation: text (46 languages) and code (13 programming languages)
-    value: code, text
+  prohibited_uses: none
   monitoring: none
-  name: xP3
+  feedback: Point of Contact is [Niklas Muennighoff](https://huggingface.co/datasets/bigscience/xP3)
+
+
+- type: model
+  name: T0++
   organization: BigScience
-  prohibited_uses: none
-  quality_control: Data collection involved merging and deduplicating searches to
-    remove menus, HTML tags. Further, a quality improvement pipeline was implemented.
-  sample: [https://huggingface.co/datasets/bigscience/xP3/viewer]
-  size: 9.4GB
-  type: dataset
-  url: https://arxiv.org/abs/2211.01786
-- access: open
-  analysis: ''
+  description: T0++ is an multitask fine-tuned language model based on T5.
   created_date:
-    explanation: The date the paper was posted to arXiv.
     value: 2021-10-15
-  dependencies: [T5, P3]
-  description: T0++ is an multitask fine-tuned language model based on T5.
-  feedback: https://huggingface.co/bigscience/T0pp/discussions
-  intended_uses: You can use the models to perform inference on tasks by specifying
-    your query in natural language, and the models will generate a prediction.
-  license: Apache 2.0
-  modality:
-    explanation: text (English)
-    value: text; text
+    explanation: The date the paper was posted to arXiv.
+  url: https://arxiv.org/pdf/2110.08207.pdf
   model_card: https://huggingface.co/bigscience/T0pp
-  monitoring: none
-  name: T0++
-  organization: BigScience
-  prohibited_uses: none
-  quality_control: https://arxiv.org/pdf/2110.08207.pdf
-  size: 11B parameters (dense)
+  modality: text (English)
+  size: 11B parameters (dense model)
+  analysis: ''
+  dependencies: [T5, P3]
   training_emissions:
-    explanation: Sourced from estimates in https://arxiv.org/pdf/2110.08207.pdf
     value: 0.9 tCO2e
-  training_hardware: Jean Zay (v3-512)
-  training_time:
     explanation: Sourced from estimates in https://arxiv.org/pdf/2110.08207.pdf
+  training_time:
     value: 27 hours
-  type: model
-  url: https://arxiv.org/pdf/2110.08207.pdf
-- access: open
-  analysis: ''
+    explanation: Sourced from estimates in https://arxiv.org/pdf/2110.08207.pdf
+  training_hardware: Jean Zay (v3-512)
+  quality_control: https://arxiv.org/pdf/2110.08207.pdf
+  access: open
+  license: Apache 2.0
+  intended_uses: You can use the models to perform inference on tasks by specifying
+    your query in natural language, and the models will generate a prediction.
+  prohibited_uses: none
+  monitoring: none
+  feedback: https://huggingface.co/bigscience/T0pp/discussions
+
+- type: model
+  name: BLOOM
+  organization: BigScience
+  description: BLOOM is an autoregressive multilingual language model.
   created_date:
-    explanation: The date the model was released
     value: 2022-07-12
+    explanation: The date the model was released
+  url: https://arxiv.org/abs/2211.05100
+  model_card: ''
+  modality: text (46 languages) and code (13 programming languages)
+  size: 176B parameters (dense model)
+  analysis: ''
   dependencies: [ROOTS]
-  description: BLOOM is an autoregressive multilingual language model.
-  feedback: https://huggingface.co/bigscience/bloom/discussions
+  training_emissions:
+    value: 25 tCO2e
+    explanation: Sourced from estimates in https://arxiv.org/abs/2211.02001
+  training_time:
+    value: 7039 petaflop/s-days
+    explanation: 1082990 A100 hours at 156 TFLOP/s maximum utilization
+  training_hardware: Jean Zay (48 * 8xA100 80GB nodes)
+  quality_control: ''
+  access: open
+  license:
+    value: BigScience RAIL v1.0
+    explanation: Model is licensed under https://huggingface.co/spaces/bigscience/license
   intended_uses: This model is being created in order to enable public research
     on large language models (LLMs). LLMs are intended to be used for language generation
     or as a pretrained base model that can be further fine-tuned for specific tasks.
     Use cases below are not exhaustive.
-  license:
-    explanation: Model is licensed under https://huggingface.co/spaces/bigscience/license
-    value: BigScience RAIL v1.0
-  modality:
-    explanation: text (46 languages) and code (13 programming languages)
-    value: code, text; code, text
-  model_card: ''
-  monitoring: none
-  name: BLOOM
-  organization: BigScience
   prohibited_uses: Using the model in high-stakes settings is out of scope for this
     model (e.g. biomedical/political/legal/finance domains, evaluating or scoring
     individuals). The model is not designed for critical decisions nor uses with
@@ -151,72 +166,61 @@
     using the model for harm, violating human rights, or other kinds of malicious
     activities, is a misuse of this model (e.g. spam generation, disinformation,
     disparagement, deception, surveillance).
-  quality_control: ''
-  size: 176B parameters (dense)
-  training_emissions:
-    explanation: Sourced from estimates in https://arxiv.org/abs/2211.02001
-    value: 25 tCO2e
-  training_hardware: Jean Zay (48 * 8xA100 80GB nodes)
-  training_time:
-    explanation: 1082990 A100 hours at 156 TFLOP/s maximum utilization
-    value: 7039 petaflop/s-days
-  type: model
-  url: https://arxiv.org/abs/2211.05100
-- access: open
-  analysis: https://huggingface.co/bigscience/bloomz#evaluation
-  created_date:
-    explanation: The date the model was released
-    value: 2021-10-15
-  dependencies: [mT5, xP3]
-  description: mT0 is an multitask fine-tuned multilingual language model based
-    on mT5.
-  feedback: https://huggingface.co/bigscience/bloomz/discussions
-  intended_uses: We recommend using the model to perform tasks expressed in natural
-    language.
-  license:
-    explanation: Model is licensed under https://huggingface.co/spaces/bigscience/license
-    value: BigScience RAIL v1.0
-  modality:
-    explanation: text (English)
-    value: text; text
-  model_card: https://huggingface.co/bigscience/T0pp
   monitoring: none
+  feedback: https://huggingface.co/bigscience/bloom/discussions
+
+- type: model
   name: mT0
   organization: BigScience
-  prohibited_uses: none
-  quality_control: https://arxiv.org/pdf/2211.01786.pdf
-  size: 13B parameters (dense)
+  description: mT0 is an multitask fine-tuned multilingual language model based
+    on mT5.
+  created_date:
+    value: 2021-10-15
+    explanation: The date the model was released
+  url: https://arxiv.org/pdf/2110.08207.pdf
+  model_card: https://huggingface.co/bigscience/T0pp
+  modality: text (English)
+  size: 13B parameters (dense model)
+  analysis: https://huggingface.co/bigscience/bloomz#evaluation
+  dependencies: [mT5, xP3]
   training_emissions: unknown
+  training_time: unknown
   training_hardware: Jean Zay (288 A100 80GB GPUs with 8 GPUs per node (36 nodes)
     using NVLink 4 inter-gpu connects, 4 OmniPath links)
-  training_time: unknown
-  type: model
-  url: https://arxiv.org/pdf/2110.08207.pdf
-- access: open
-  analysis: https://huggingface.co/bigscience/bloomz#evaluation
-  created_date: 2022-11-03
-  dependencies: [BLOOM, xP3]
-  description: BLOOMZ is an multitask fine-tuned autoregressive multilingual language
-    model.
-  feedback: https://huggingface.co/bigscience/bloomz/discussions
-  intended_uses: We recommend using the model to perform tasks expressed in natural
-    language.
+  quality_control: https://arxiv.org/pdf/2211.01786.pdf
+  access: open
   license:
-    explanation: Model is licensed under https://huggingface.co/spaces/bigscience/license
     value: BigScience RAIL v1.0
-  modality:
-    explanation: text (46 languages) and code (13 programming languages)
-    value: code, text; code, text
-  model_card: https://huggingface.co/bigscience/bloomz
+    explanation: Model is licensed under https://huggingface.co/spaces/bigscience/license
+  intended_uses: We recommend using the model to perform tasks expressed in natural
+    language.
+  prohibited_uses: none
   monitoring: none
+  feedback: https://huggingface.co/bigscience/bloomz/discussions
+
+- type: model
   name: BLOOMZ
   organization: BigScience
-  prohibited_uses: none
-  quality_control: https://arxiv.org/pdf/2211.01786.pdf
-  size: 176B parameters (dense)
+  description: BLOOMZ is an multitask fine-tuned autoregressive multilingual language
+    model.
+  created_date: 2022-11-03
+  url: https://arxiv.org/pdf/2211.01786.pdf
+  model_card: https://huggingface.co/bigscience/bloomz
+  modality: text (46 languages) and code (13 programming languages)
+  size: 176B parameters (dense model)
+  analysis: https://huggingface.co/bigscience/bloomz#evaluation
+  dependencies: [BLOOM, xP3]
   training_emissions: unknown
+  training_time: unknown
   training_hardware: Jean Zay (288 A100 80GB GPUs with 8 GPUs per node (36 nodes)
     using NVLink 4 inter-gpu connects, 4 OmniPath links)
-  training_time: unknown
-  type: model
-  url: https://arxiv.org/pdf/2211.01786.pdf
+  quality_control: https://arxiv.org/pdf/2211.01786.pdf
+  access: open
+  license:
+    value: BigScience RAIL v1.0
+    explanation: Model is licensed under https://huggingface.co/spaces/bigscience/license
+  intended_uses: We recommend using the model to perform tasks expressed in natural
+    language.
+  prohibited_uses: none
+  monitoring: none
+  feedback: https://huggingface.co/bigscience/bloomz/discussions
diff --git a/assets/bloomberg.yaml b/assets/bloomberg.yaml
index e6a4b58a..352f03f9 100644
--- a/assets/bloomberg.yaml
+++ b/assets/bloomberg.yaml
@@ -1,126 +1,137 @@
 ---
-- access:
-    explanation: See [[Openness]](https://arxiv.org/pdf/2303.17564.pdf#subsection.8.2)
-    value: closed
-  analysis: ''
+
+- type: dataset
+  name: FinPile
+  # General
+  organization: Bloomberg
+  description: A comprehensive dataset consisting of a range of English financial
+    documents including news, filings, press releases, web-scraped financial documents,
+    and social media drawn from the Bloomberg archives that was used to train the
+    BloombergGPT model.
   created_date:
-    explanation: The date BloombergGPT was announced in the [[Bloomberg article]](https://www.bloomberg.com/company/press/bloomberggpt-50-billion-parameter-llm-tuned-finance/).
     value: 2023-03-30
+    explanation: The date BloombergGPT was announced in the [[Bloomberg article]](https://www.bloomberg.com/company/press/bloomberggpt-50-billion-parameter-llm-tuned-finance/).
+  url: https://arxiv.org/pdf/2303.17564.pdf#section.2
   datasheet:
-    explanation: Section 2 of the BloombergGPT paper.
     value: https://arxiv.org/pdf/2303.17564.pdf#section.2
+    explanation: Section 2 of the BloombergGPT paper.
+  modality: text (English)
+  size:
+    value: 363B tokens
+    explanation: See [[Financial Datasets]](https://arxiv.org/pdf/2303.17564.pdf#subsection.2.1)
+  sample: []
+  analysis: ''
+  # Construction
   dependencies: []
-  description: A comprehensive dataset consisting of a range of English financial
-    documents including news, filings, press releases, web-scraped financial documents,
-    and social media drawn from the Bloomberg archives that was used to train the
-    BloombergGPT model.
-  excluded: ''
-  feedback: ''
+  license:
+    value: unknown
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
   included:
+    value: |
+      FinPile consists of English financial documents. Authors utilize the The Bloomberg
+      Terminal, which is an extensive collection of curated and maintained documents,
+      to create the FinPile dataset. Each document in FinPile is time-stamped, with
+      dates ranging from 2007-03-01 to 2022-07-31.
+      Types of data included are given below:
+        1. Web (298B tokens) - Inclues Bloomberg's web crawl focused on high-quality
+      websites that have financially relevant information. This makes up the majority
+      of FinPile.
+        2. News (38B tokens) - Includes all news sources relevant to the financial
+      community, excluding news articles written by Bloomberg journalists. Overall,
+      there are hundreds of English news sources in FinPile including "Bloomberg
+      Transcripts", which are transcripts of Bloomberg TV news.
+        3. Filings (14B tokens) - Includes financial statements prepared by (public)
+      companies and made available to the general public.  In the dataset, a majority
+      of the filings come from EDGAR, which is the SEC's online database.
+        4. Press (9B tokens) - Includes press releases typically issued by companies
+      that are financially relevant.
+        5. Bloomberg (5B tokens) - Includes Bloomberg authored news and other documents
+      such as opinions and analyses. The largest sources are “Bloomberg News” and
+      “Bloomberg First Word”, the Bloomberg-authored wire of real-time news.
     explanation: See [[Financial Datasets]](https://arxiv.org/pdf/2303.17564.pdf#subsection.2.1)
-    value: "FinPile consists of English financial documents. Authors utilize the\
-      \ The Bloomberg\nTerminal, which is an extensive collection of curated and\
-      \ maintained documents,\nto create the FinPile dataset. Each document in FinPile\
-      \ is time-stamped, with\ndates ranging from 2007-03-01 to 2022-07-31.\nTypes\
-      \ of data included are given below:\n  1. Web (298B tokens) - Inclues Bloomberg's\
-      \ web crawl focused on high-quality\nwebsites that have financially relevant\
-      \ information. This makes up the majority\nof FinPile.\n  2. News (38B tokens)\
-      \ - Includes all news sources relevant to the financial\ncommunity, excluding\
-      \ news articles written by Bloomberg journalists. Overall,\nthere are hundreds\
-      \ of English news sources in FinPile including \"Bloomberg\nTranscripts\"\
-      , which are transcripts of Bloomberg TV news.\n  3. Filings (14B tokens) -\
-      \ Includes financial statements prepared by (public)\ncompanies and made available\
-      \ to the general public.  In the dataset, a majority\nof the filings come\
-      \ from EDGAR, which is the SEC's online database.\n  4. Press (9B tokens)\
-      \ - Includes press releases typically issued by companies\nthat are financially\
-      \ relevant.\n  5. Bloomberg (5B tokens) - Includes Bloomberg authored news\
-      \ and other documents\nsuch as opinions and analyses. The largest sources\
-      \ are “Bloomberg News” and\n“Bloomberg First Word”, the Bloomberg-authored\
-      \ wire of real-time news.\n"
+  excluded: ''
+  quality_control: ''
+  # Downstream
+  access:
+    value: closed
+    explanation: See [[Openness]](https://arxiv.org/pdf/2303.17564.pdf#subsection.8.2)
   intended_uses: Used to train the BloombergGPT model.
-  license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
-    value: unknown
-  modality:
-    explanation: text (English)
-    value: text
+  prohibited_uses: ''
   monitoring: ''
-  name: FinPile
+  feedback: ''
+
+- type: model
+  name: BloombergGPT
+  # General
   organization: Bloomberg
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size:
-    explanation: See [[Financial Datasets]](https://arxiv.org/pdf/2303.17564.pdf#subsection.2.1)
-    value: 363B tokens
-  type: dataset
-  url: https://arxiv.org/pdf/2303.17564.pdf#section.2
-- access:
-    explanation: See [[Openness]](https://arxiv.org/pdf/2303.17564.pdf#subsection.8.2)
-    value: closed
-  analysis:
-    explanation: See [[Evaluation]](https://arxiv.org/pdf/2303.17564.pdf#section.5)
-    value: "Authors evaluate the performance of BloombergGPT on two broad categories\
-      \ of tasks, finance-specific and general purpose, on several standard benchmarks.\
-      \ They compare BloombergGPT to the three closest models: GPT-NeoX, OPT-66B\
-      \ and BLOOM-176B. They also report results from the original GPT-3 whenever\
-      \ externally available. They conclude \"We achieve strong results on general\
-      \ LLM benchmarks and outperform comparable models on financial tasks. We attribute\
-      \ this, in decreasing order of impact, to 1. a well-curated internal dataset,\
-      \ 2. our unique choice in tokenizer, and 3. an up-to-date architecture.\"\n"
-  created_date:
-    explanation: The date the model was announced in the [[Bloomberg article]](https://www.bloomberg.com/company/press/bloomberggpt-50-billion-parameter-llm-tuned-finance/).
-    value: 2023-03-30
-  dependencies: [FinPile, The Pile, C4, Wikipedia]
   description: BloombergGPT is a 50 billion parameter large language model that
     is specifically trained on a wide range of financial data to support a diverse
     set of natural language processing tasks within the financial industry.
-  feedback: ''
-  intended_uses:
-    explanation: See [[Bloomberg article]](https://www.bloomberg.com/company/press/bloomberggpt-50-billion-parameter-llm-tuned-finance/)
-    value: "\"This model will assist Bloomberg in improving existing financial NLP\
-      \ tasks, such as sentiment analysis, named entity recognition, news classification,\
-      \ and question answering, among others. Furthermore, BloombergGPT will unlock\
-      \ new opportunities for marshalling the vast quantities of data available\
-      \ on the Bloomberg Terminal to better help the firm's customers, while bringing\
-      \ the full potential of AI to the financial domain.\"\n"
-  license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
-    value: unknown
-  modality:
-    explanation: text (English)
-    value: text; text
+  created_date:
+    value: 2023-03-30
+    explanation: The date the model was announced in the [[Bloomberg article]](https://www.bloomberg.com/company/press/bloomberggpt-50-billion-parameter-llm-tuned-finance/).
+  url: https://arxiv.org/abs/2303.17564
   model_card: none
-  monitoring: ''
-  name: BloombergGPT
-  organization: Bloomberg
-  prohibited_uses: ''
-  quality_control:
-    explanation: See [[Ethics, Limitations, and Implications]](https://arxiv.org/pdf/2303.17564.pdf#section.8)
-    value: "Authors state the following:\n- \"To provide natural language applications\
-      \ to the financial community, we\n  have developed a rigorous risk and testing\
-      \ assessment process. This process\n  includes careful annotation guidelines\
-      \ Tseng et al. (2020), pre-launch review\n  at multiple levels by the central\
-      \ risk and compliance organizations, and\n  by the product leaders (e.g.,\
-      \ the newsroom) as applicable, and post-launch\n  monitoring. Moreover, we\
-      \ conduct our research, development, and deployment\n  of NLP and AI systems\
-      \ in accordance with all applicable regulations.\"\n- \"Similarly, toxicity\
-      \ and bias are areas where, as a company, we take extraordinary\n  care with\
-      \ any content we produce, whether from humans or machines. Since\n  the measurement\
-      \ of toxicity and bias in our model depends on its application\n  areas, quantifying\
-      \ the potential for the generation of harmful language\n  remains an open\
-      \ question. We are particularly interested in studying whether\n  FinPile,\
-      \ which is cleaner and contains fewer examples of overtly biased\n  or toxic\
-      \ language (e.g., Press Releases), reduces the proclivity of the\n  model\
-      \ to generate inappropriate content.\"\n"
-  size: 50B parameters (dense)
+  modality: text (English)
+  analysis:
+    value: >
+      Authors evaluate the performance of BloombergGPT on two broad categories of
+      tasks, finance-specific and general purpose, on several standard benchmarks.
+      They compare BloombergGPT to the three closest models: GPT-NeoX, OPT-66B and
+      BLOOM-176B. They also report results from the original GPT-3 whenever externally
+      available. They conclude "We achieve strong results on general LLM benchmarks
+      and outperform comparable models on financial tasks. We attribute this, in
+      decreasing order of impact, to 1. a well-curated internal dataset, 2. our
+      unique choice in tokenizer, and 3. an up-to-date architecture."
+    explanation: See [[Evaluation]](https://arxiv.org/pdf/2303.17564.pdf#section.5)
+  size: 50B parameters (dense model)
+  # Construction
+  dependencies: [FinPile, The Pile, C4, Wikipedia]
   training_emissions: unknown
+  training_time:
+    value: 53 days
+    explanation: See [[Training Run]](https://arxiv.org/pdf/2303.17564.pdf#section.4)
   training_hardware:
-    explanation: See [[Training Configuration]](https://arxiv.org/pdf/2303.17564.pdf#subsection.3.3)
     value: 64 Amazon EC2 p4d.24xlarge instances each with 8 NVIDIA 40GB A100 GPUs
       (i.e. total 512 A100 GPUs)
-  training_time:
-    explanation: See [[Training Run]](https://arxiv.org/pdf/2303.17564.pdf#section.4)
-    value: 53 days
-  type: model
-  url: https://arxiv.org/abs/2303.17564
+    explanation: See [[Training Configuration]](https://arxiv.org/pdf/2303.17564.pdf#subsection.3.3)
+  quality_control:
+    value: |
+      Authors state the following:
+      - "To provide natural language applications to the financial community, we
+        have developed a rigorous risk and testing assessment process. This process
+        includes careful annotation guidelines Tseng et al. (2020), pre-launch review
+        at multiple levels by the central risk and compliance organizations, and
+        by the product leaders (e.g., the newsroom) as applicable, and post-launch
+        monitoring. Moreover, we conduct our research, development, and deployment
+        of NLP and AI systems in accordance with all applicable regulations."
+      - "Similarly, toxicity and bias are areas where, as a company, we take extraordinary
+        care with any content we produce, whether from humans or machines. Since
+        the measurement of toxicity and bias in our model depends on its application
+        areas, quantifying the potential for the generation of harmful language
+        remains an open question. We are particularly interested in studying whether
+        FinPile, which is cleaner and contains fewer examples of overtly biased
+        or toxic language (e.g., Press Releases), reduces the proclivity of the
+        model to generate inappropriate content."
+    explanation: See [[Ethics, Limitations, and Implications]](https://arxiv.org/pdf/2303.17564.pdf#section.8)
+  # Downstream
+  access:
+    value: closed
+    explanation: See [[Openness]](https://arxiv.org/pdf/2303.17564.pdf#subsection.8.2)
+  license:
+    value: unknown
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses:
+    value: >
+      "This model will assist Bloomberg in improving existing financial NLP
+      tasks, such as sentiment analysis, named entity recognition, news classification,
+      and question answering, among others. Furthermore, BloombergGPT will unlock
+      new opportunities for marshalling the vast quantities of data available on
+      the Bloomberg Terminal to better help the firm's customers, while bringing
+      the full potential of AI to the financial domain."
+    explanation: See [[Bloomberg article]](https://www.bloomberg.com/company/press/bloomberggpt-50-billion-parameter-llm-tuned-finance/)
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/brex.yaml b/assets/brex.yaml
index 76d01b3e..374f6842 100644
--- a/assets/brex.yaml
+++ b/assets/brex.yaml
@@ -1,28 +1,30 @@
 ---
-- access: limited
-  adaptation: ''
-  created_date: 2023-03-07
-  dependencies: [ChatGPT API]
+
+- type: application
+  name: Brex Chat
+  organization: Brex
   description: Brex Inc., a highly valued startup that makes software for finance
     professionals, is turning to the same artificial intelligence tool behind ChatGPT
     for a service that can answer questions about corporate budgets, policy and
     spending.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
+  created_date: 2023-03-07
+  url: https://fortune.com/2023/03/07/cfo-chatbot-chatgpt-ai-brex-finance-software-startup-budgets-policies/
+  dependencies: [ChatGPT API]
+  adaptation: ''
+  output_space: ''
+  quality_control: ''
+  access: limited
   license:
-    explanation: "\"Brex grants you a nonexclusive and nontransferable license to\
-      \ use Brex Property as provided through the Services and as permitted by these\
-      \ User Terms.\" - excerpt from the Terms of Service.\n"
     value: custom
+    explanation: >
+      "Brex grants you a nonexclusive and nontransferable license to use Brex
+      Property as provided through the Services and as permitted by these User
+      Terms." - excerpt from the Terms of Service.
+  terms_of_service: https://www.brex.com/legal/user-terms
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
   monthly_active_users: ''
-  name: Brex Chat
-  organization: Brex
-  output_space: ''
-  prohibited_uses: ''
-  quality_control: ''
-  terms_of_service: https://www.brex.com/legal/user-terms
-  type: application
-  url: https://fortune.com/2023/03/07/cfo-chatbot-chatgpt-ai-brex-finance-software-startup-budgets-policies/
   user_distribution: ''
+  failures: ''
diff --git a/assets/casia.yaml b/assets/casia.yaml
index 4190e5a9..0634f801 100644
--- a/assets/casia.yaml
+++ b/assets/casia.yaml
@@ -1,26 +1,25 @@
 ---
-- access: open
-  analysis: Reports results on standard translation benchmarks across 102 languages
-    in comparison with Google Translate and ChatGPT
-  created_date: 2023-05-29
-  dependencies: [LLaMA, CLUE, BigTrans parallel dataset]
+
+- type: model
+  name: BigTrans
+  organization: Institute of Automation Chinese Academy of Sciences
   description: BigTrans is a model which adapts LLaMA that covers only 20 languages
     and enhances it with multilingual translation capability on more than 100 languages
-  feedback: https://huggingface.co/James-WYang/BigTrans/discussions
-  intended_uses: Advancing future research in multilingual LLMs
-  license: Apache 2.0
-  modality:
-    explanation: text
-    value: text; text
+  created_date: 2023-05-29
+  url: https://arxiv.org/pdf/2305.18098v1.pdf
   model_card: https://huggingface.co/James-WYang/BigTrans
-  monitoring: ''
-  name: BigTrans
-  organization: Institute of Automation Chinese Academy of Sciences
-  prohibited_uses: ''
-  quality_control: ''
-  size: 13B parameters (dense)
+  modality: text
+  analysis: Reports results on standard translation benchmarks across 102 languages
+    in comparison with Google Translate and ChatGPT
+  size: 13B parameters (dense model)
+  dependencies: [LLaMA, CLUE, BigTrans parallel dataset]
   training_emissions: unknown
-  training_hardware: 16 A100 GPUs with 80 GB of RAM
   training_time: unknown
-  type: model
-  url: https://arxiv.org/pdf/2305.18098v1.pdf
+  training_hardware: 16 A100 GPUs with 80 GB of RAM
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: Advancing future research in multilingual LLMs
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/James-WYang/BigTrans/discussions
diff --git a/assets/cerebras.yaml b/assets/cerebras.yaml
index 421cd8a4..636a9f70 100644
--- a/assets/cerebras.yaml
+++ b/assets/cerebras.yaml
@@ -1,59 +1,76 @@
 ---
-- access:
-    explanation: "The Pile is an open source dataset. Hugging Face compatible checkpoints\
-      \ available on the [[Cerebras Hugging Face page]](https://huggingface.co/cerebras/Cerebras-GPT-13B).\
-      \ Cerebras systems checkpoints for pre-training and fine tuning are available\
-      \ in the cloud via the [[Cerebras Model Studio]](https://www.cerebras.net/product-cloud/).\n"
-    value: open
-  analysis: "\"We evaluate our models on the PILE validation set comprising 380M\
-    \ tokens. We also evaluate the public checkpoints of Pythia, Eleuther (2022);\
-    \ OPT, Zhang et al. (2022); GPT-NeoX 20B, Black et al. (2022); and GPT-J 6B,\
-    \ Wang & Komatsuzaki (2021). We performed upstream (pre-training) evaluations\
-    \ of text prediction cross-entropy using the Pile validation and test splits.\
-    \ We performed downstream evaluations of text generation accuracy on standardized\
-    \ tasks using the Eleuther lm-evaluation-harness.\" [[Evaluations]] (https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#evaluations).\n"
+
+- type: model
+  name: Cerebras-GPT
+  # General
+  organization: Cerebras
+  description: >
+    A Family of Open, Compute-efficient, Large Language Models. The family includes
+    111M, 256M, 590M, 1.3B, 2.7B, 6.7B, and 13B models. All models in the Cerebras-GPT
+    family have been trained in accordance with Chinchilla scaling laws (20 tokens
+    per model parameter).
+    [[Cerebras Blog Post]](https://www.cerebras.net/blog/cerebras-gpt-a-family-of-open-compute-efficient-large-language-models)
   created_date:
-    explanation: "The date the model was announced in the [[Cerebras blog post]](https://www.cerebras.net/blog/cerebras-gpt-a-family-of-open-compute-efficient-large-language-models).\n"
     value: 2023-03-28
-  dependencies: [The Pile]
-  description: "A Family of Open, Compute-efficient, Large Language Models. The\
-    \ family includes 111M, 256M, 590M, 1.3B, 2.7B, 6.7B, and 13B models. All models\
-    \ in the Cerebras-GPT family have been trained in accordance with Chinchilla\
-    \ scaling laws (20 tokens per model parameter). [[Cerebras Blog Post]](https://www.cerebras.net/blog/cerebras-gpt-a-family-of-open-compute-efficient-large-language-models)\n"
-  feedback: ''
-  intended_uses: "\"The primary intended use is to further research into large language\
-    \ models. These models can be used as a foundation model for NLP, applications,\
-    \ ethics, and alignment research. Our primary intended users are researchers\
-    \ who are working to improve LLMs and practitioners seeking reference implementations,\
-    \ training setups, hyperparameters, or pre-trained models. We release these\
-    \ models with a fully permissive Apache license for the community to use freely.\"\
-    \ [[Uses and Limitations]](https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#uses-and-limitations).\n"
-  license: Apache 2.0
-  modality:
-    explanation: text (English)
-    value: text; text
+    explanation: >
+      The date the model was announced in the [[Cerebras blog post]](https://www.cerebras.net/blog/cerebras-gpt-a-family-of-open-compute-efficient-large-language-models).
+  url: https://www.cerebras.net/blog/cerebras-gpt-a-family-of-open-compute-efficient-large-language-models/
   model_card: https://huggingface.co/cerebras/Cerebras-GPT-13B
-  monitoring: ''
-  name: Cerebras-GPT
-  organization: Cerebras
-  prohibited_uses: "Authors note the following limitations of the model: \"Cerebras-GPT\
-    \ models are trained on the Pile, with English language only, and are not suitable\
-    \ for machine translation tasks. Cerebras-GPT models have not been tuned for\
-    \ human-facing dialog applications like chatbots and will not respond to prompts\
-    \ in a similar way to models that have received instruction tuning or reinforcement\
-    \ learning from human feedback (RLHF) like Flan-T5 or ChatGPT.\" [[Uses and\
-    \ Limitations]](https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#out-of-scope-use).\n"
-  quality_control: "The Pile dataset has been thoroughly analyzed from various ethical\
-    \ standpoints such as toxicity analysis, gender bias, pejorative content, racially\
-    \ sensitive content etc. Only mitigations in standard Pile dataset pre-processing\
-    \ were employed when pre-training Cerebras-GPT. [[Risk, Bias, Ethical Considerations]](https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#risk-bias-ethical-considerations)\n"
-  size: 13B parameters (dense)
+  modality: text (English)
+  size:
+    value: 13B parameters (dense model)
+    explanation: Size of the largest model in the Cerebras-GPT family.
+  analysis: >
+    "We evaluate our models on the PILE validation set comprising 380M tokens. We
+    also evaluate the public checkpoints of Pythia, Eleuther (2022); OPT, Zhang
+    et al. (2022); GPT-NeoX 20B, Black et al. (2022); and GPT-J 6B, Wang & Komatsuzaki
+    (2021). We performed upstream (pre-training) evaluations of text prediction
+    cross-entropy using the Pile validation and test splits. We performed downstream
+    evaluations of text generation accuracy on standardized tasks using the Eleuther
+    lm-evaluation-harness."
+    [[Evaluations]]
+    (https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#evaluations).
+  # Construction
+  dependencies: [The Pile]
   training_emissions: ''
-  training_hardware:
-    explanation: "According to [[Model Description]](https://huggingface.co/cerebras/Cerebras-GPT-13B#model-description)\n"
-    value: 16x Cerebras CS-2 wafer scale systems
   training_time:
-    explanation: ''
     value: ''
-  type: model
-  url: https://www.cerebras.net/blog/cerebras-gpt-a-family-of-open-compute-efficient-large-language-models/
+    explanation: ''
+  training_hardware:
+    value: 16x Cerebras CS-2 wafer scale systems
+    explanation: >
+      According to [[Model Description]](https://huggingface.co/cerebras/Cerebras-GPT-13B#model-description)
+  quality_control: >
+    The Pile dataset has been thoroughly analyzed from various ethical standpoints
+    such as toxicity analysis, gender bias, pejorative content, racially sensitive
+    content etc. Only mitigations in standard Pile dataset pre-processing were employed
+    when pre-training Cerebras-GPT.
+    [[Risk, Bias, Ethical Considerations]](https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#risk-bias-ethical-considerations)
+  # Downstream
+  access:
+    value: open
+    explanation: >
+      The Pile is an open source dataset.
+      Hugging Face compatible checkpoints available on the [[Cerebras Hugging Face
+      page]](https://huggingface.co/cerebras/Cerebras-GPT-13B).
+      Cerebras systems checkpoints for pre-training and fine tuning are available
+      in the cloud via the [[Cerebras Model Studio]](https://www.cerebras.net/product-cloud/).
+  license: Apache 2.0
+  intended_uses: >
+    "The primary intended use is to further research into large language models.
+    These models can be used as a foundation model for NLP, applications, ethics,
+    and alignment research. Our primary intended users are researchers who are working
+    to improve LLMs and practitioners seeking reference implementations, training
+    setups, hyperparameters, or pre-trained models. We release these models with
+    a fully permissive Apache license for the community to use freely."
+    [[Uses and Limitations]](https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#uses-and-limitations).
+  prohibited_uses: >
+    Authors note the following limitations of the model: "Cerebras-GPT models are
+    trained on the Pile, with English language only, and are not suitable for machine
+    translation tasks. Cerebras-GPT models have not been tuned for human-facing
+    dialog applications like chatbots and will not respond to prompts in a similar
+    way to models that have received instruction tuning or reinforcement learning
+    from human feedback (RLHF) like Flan-T5 or ChatGPT."
+    [[Uses and Limitations]](https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#out-of-scope-use).
+  monitoring: ''
+  feedback: ''
diff --git a/assets/chatglm.yaml b/assets/chatglm.yaml
index 7005d126..5bb6d82e 100644
--- a/assets/chatglm.yaml
+++ b/assets/chatglm.yaml
@@ -1,25 +1,24 @@
 ---
-- access: open
-  analysis: Performance evaluated on English and Chinese language benchmark tests.
-  created_date: 2023-03-14
-  dependencies: []
+
+- type: model
+  name: ChatGLM
+  organization: ChatGLM
   description: ChatGLM is a Chinese-English language model with question and answer
     and dialogue functions, and is aimed at a Chinese audience.
-  feedback: ''
-  intended_uses: ''
-  license: Apache 2.0
-  modality:
-    explanation: text
-    value: text; text
+  created_date: 2023-03-14
+  url: https://chatglm.cn/blog
   model_card: ''
-  monitoring: ''
-  name: ChatGLM
-  organization: ChatGLM
-  prohibited_uses: ''
-  quality_control: ''
-  size: 6B parameters (dense)
+  modality: text
+  analysis: Performance evaluated on English and Chinese language benchmark tests.
+  size: 6B parameters
+  dependencies: []
   training_emissions: unknown
-  training_hardware: ''
   training_time: unknown
-  type: model
-  url: https://chatglm.cn/blog
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/cmu.yaml b/assets/cmu.yaml
index d5085ad0..ceda272f 100644
--- a/assets/cmu.yaml
+++ b/assets/cmu.yaml
@@ -1,35 +1,34 @@
 ---
-- access:
-    explanation: Model checkpoints are available for download at https://github.com/VHellendoorn/Code-LMs
-    value: open
-  analysis: Reports results on standard code benchmarks across a variety of programming
-    languages.
-  created_date:
-    explanation: The date the model paper was released
-    value: 2022-02-26
-  dependencies: [Github]
+
+- type: model
+  name: PolyCoder
+  organization: CMU
   description: PolyCoder is a code model trained on 2.7B parameters based on the
     GPT-2 architecture, which was trained on 249GB of code across 12 programming
     languages on a single machine.
-  feedback: https://huggingface.co/NinedayWang/PolyCoder-2.7B/discussion
-  intended_uses: unknown
-  license:
-    explanation: The license is provided in the [[Github repository]](https://github.com/VHellendoorn/Code-LMs)
-    value: MIT
-  modality:
-    explanation: code (12 programming languages)
-    value: code; code
+  created_date:
+    value: 2022-02-26
+    explanation: The date the model paper was released
+  url: https://arxiv.org/abs/2202.13169
   model_card: https://huggingface.co/NinedayWang/PolyCoder-2.7B
-  monitoring: None
-  name: PolyCoder
-  organization: CMU
-  prohibited_uses: None
+  modality: code (12 programming languages)
+  analysis: Reports results on standard code benchmarks across a variety of programming
+    languages.
+  size: 2.7B parameters (dense model)
+  dependencies: [Github]
+  training_emissions: unknown
+  training_time: 6 weeks
+  training_hardware: 8 NVIDIA RTX 8000
   quality_control: No specific quality control is mentioned in model training, though
     details on data processing and how the tokenizer was trained are provided in
     the paper.
-  size: 2.7B parameters (dense)
-  training_emissions: unknown
-  training_hardware: 8 NVIDIA RTX 8000
-  training_time: 6 weeks
-  type: model
-  url: https://arxiv.org/abs/2202.13169
+  access:
+    value: open
+    explanation: Model checkpoints are available for download at https://github.com/VHellendoorn/Code-LMs
+  license:
+    value: MIT
+    explanation: The license is provided in the [[Github repository]](https://github.com/VHellendoorn/Code-LMs)
+  intended_uses: unknown
+  prohibited_uses: None
+  monitoring: None
+  feedback: https://huggingface.co/NinedayWang/PolyCoder-2.7B/discussion
diff --git a/assets/cohere.yaml b/assets/cohere.yaml
index 9051aa78..21c3379a 100644
--- a/assets/cohere.yaml
+++ b/assets/cohere.yaml
@@ -1,460 +1,657 @@
 ---
-- access:
-    explanation: The dataset isn't provided to the public.
-    value: closed
-  analysis:
-    explanation: The analysis performed on the dataset is not released.
-    value: unknown
+
+- type: dataset
+  name: coheretext
+  # General
+  organization: Cohere
+  description: >
+    The family of datasets used to train Cohere models, which come in two forms:
+    coheretext-filtered and coheretext-unfiltered. The former is used to train the
+    Representation models, while the latter one is used to train the Generation
+    models.
   created_date:
-    explanation: "The date the Cohere API was announced on the news [[News Article]]\
-      \ (https://venturebeat.com/2021/11/15/openai-rival-cohere-launches-language-model-api/).\n"
     value: 2021-11-15
+    explanation: >
+      The date the Cohere API was announced on the news
+      [[News Article]]
+      (https://venturebeat.com/2021/11/15/openai-rival-cohere-launches-language-model-api/).
+  url: none
   datasheet: https://docs.cohere.ai/data-statement
-  dependencies: []
-  description: "The family of datasets used to train Cohere models, which come in\
-    \ two forms: coheretext-filtered and coheretext-unfiltered. The former is used\
-    \ to train the Representation models, while the latter one is used to train\
-    \ the Generation models.\n"
-  excluded: "Documents that are not in English are excluded.\n"
-  feedback:
-    explanation: The internal feedback mechanisms for Cohere are unknown.
+  modality: text (English)
+  size:
+    value: 200 GB
+    explanation: >
+      Size of the filtered dataset is 200 GB. The unfiltered dataset is ~3 TB.
+  sample: []
+  analysis:
     value: unknown
-  included: "As stated in the datasheet, the dataset \"includes the Google Books\
-    \ dataset, CommonCrawl, and text from the internet scraped by the Cohere infrastructure\
-    \ team.\" The top ten domains scraped were: wordpress.com, medium.com, stackexchange.com,\
-    \ tumblr.com, elsevier.com, genius.com, bbc.co.uk, libsyn.com, yahoo.com, nytimes.com\
-    \ [[Datasheet]](https://docs.cohere.ai/data-statement).\n"
-  intended_uses: "The intended use of the dataset is to train Cohere's language\
-    \ models.\n"
+    explanation: The analysis performed on the dataset is not released.
+  # Construction
+  dependencies: []
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: text (English)
-    value: text
-  monitoring:
-    explanation: "There is no information on how Cohere is internally monitoring\
-      \ the use of the dataset.\n"
-    value: unknown
-  name: coheretext
-  organization: Cohere
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  included: >
+    As stated in the datasheet, the dataset "includes the Google Books dataset,
+    CommonCrawl, and text from the internet scraped by the Cohere infrastructure
+    team." The top ten domains scraped were: wordpress.com, medium.com,
+    stackexchange.com, tumblr.com, elsevier.com, genius.com, bbc.co.uk,
+    libsyn.com, yahoo.com, nytimes.com
+    [[Datasheet]](https://docs.cohere.ai/data-statement).
+  excluded: >
+    Documents that are not in English are excluded.
+  quality_control: >
+    In the datasheet, it is implied that Cohere employs filtration methods
+    for removing racist, biased and toxic content, but the details are not
+    provided. These filtration methods take both the context and the language,
+    as opposed to using a list of blockwords
+    [[Datasheet]](https://docs.cohere.ai/data-statement).
+  # Downstream
+  access:
+    value: closed
+    explanation: The dataset isn't provided to the public.
+  intended_uses: >
+    The intended use of the dataset is to train Cohere's language models.
   prohibited_uses:
-    explanation: "There are no known prohibited uses of the dataset, but the Cohere\
-      \ API is bound by the Cohere usage guidelines, which may also apply to this\
-      \ dataset [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n"
     value: unknown
-  quality_control: "In the datasheet, it is implied that Cohere employs filtration\
-    \ methods for removing racist, biased and toxic content, but the details are\
-    \ not provided. These filtration methods take both the context and the language,\
-    \ as opposed to using a list of blockwords [[Datasheet]](https://docs.cohere.ai/data-statement).\n"
-  sample: []
-  size:
-    explanation: "Size of the filtered dataset is 200 GB. The unfiltered dataset\
-      \ is ~3 TB.\n"
-    value: 200 GB
-  type: dataset
-  url: none
-- access:
-    explanation: "The model is available to the public through the Cohere Platform\
-      \ [[Cohere Platform]](https://os.cohere.ai/login).\n"
-    value: limited
-  analysis: The model's performance was analyzed on Hellaswag and COPA, as well
-    as several safety benchmarks [[Model Card]](https://docs.cohere.ai/generation-card).
-  created_date:
-    explanation: "The date the Cohere API was announced on the news [[News Article]]\
-      \ (https://venturebeat.com/2021/11/15/openai-rival-cohere-launches-language-model-api/).\n"
-    value: 2021-11-15
-  dependencies: [coheretext]
-  description: "The Generations model is a language model trained by Cohere for\
-    \ generation tasks.\n"
-  feedback:
-    explanation: "There are no specific feedback mechanisms for the Generation model,\
-      \ but a generic contact email is provided on Cohere website, which is support\
-      \ at cohere.ai.\n"
+    explanation: >
+      There are no known prohibited uses of the dataset, but the Cohere API is
+      bound by the Cohere usage guidelines, which may also apply to this dataset
+      [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
+  monitoring:
     value: unknown
-  intended_uses: "On the model card, the intended uses are stated as \"interactive\
-    \ autocomplete, augmenting human writing processes, summarization, text rephrasing,\
-    \ and other text-to-text tasks in non-sensitive domains\" [[Model Card]](https://docs.cohere.ai/generation-card).\n"
-  license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
+    explanation: >
+      There is no information on how Cohere is internally monitoring the use of
+      the dataset.
+  feedback:
     value: unknown
-  modality:
-    explanation: text (English)
-    value: text; text
-  model_card: https://docs.cohere.ai/generation-card
-  monitoring: "The usage of the model is monitored by Cohere [[Model Card]](https://docs.cohere.ai/generation-card).\n"
+    explanation: The internal feedback mechanisms for Cohere are unknown.
+
+- type: model
   name: Cohere Base
+  # General
   organization: Cohere
-  prohibited_uses: "The usage of the model is bound by the Cohere usage guidelines\
-    \ [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). A non-comprehensive\
-    \ list of specific application violating these guidelines are: astroturfing,\
-    \ generation of misinformation and other harmful content, and \"generation of\
-    \ text about people, places, or events without a human-in-the-loop\" [[Model\
-    \ Card]](https://docs.cohere.ai/generation-card).\n"
-  quality_control:
-    explanation: "The quality control measures taken are unknown, but it is implied\
-      \ that Cohere performed mitigation strategies for toxic degeneration [[Model\
-      \ Card]](https://docs.cohere.ai/generation-card).\n"
+  description: >
+    The Generations model is a language model trained by Cohere for
+    generation tasks.
+  created_date:
+    value: 2021-11-15
+    explanation: >
+      The date the Cohere API was announced on the news
+      [[News Article]]
+      (https://venturebeat.com/2021/11/15/openai-rival-cohere-launches-language-model-api/).
+  url: none
+  model_card: https://docs.cohere.ai/generation-card
+  modality: text (English)
+  size:
     value: unknown
-  size: unknown
+    explanation: >
+      The exact sizes of the generation models are unknown, but we know
+      that they come in three sizes: small, medium, and large
+      [[Model Card]](https://docs.cohere.ai/generation-card).
+  analysis: The model's performance was analyzed on Hellaswag and COPA, as well
+    as several safety benchmarks [[Model Card]](https://docs.cohere.ai/generation-card).
+  # Construction
+  dependencies: [coheretext]
   training_emissions:
+    value: unknown
     explanation: The emissions of the models are unknown.
+  training_time:
     value: unknown
+    explanation: The training time for the models are unknown.
   training_hardware:
-    explanation: "The training hardware wasn't explicitly announced, but it was\
-      \ reported that Google Cloud teamed up with Cohere on a TPU partnership [[TechCrunch\
-      \ Article]] (https://techcrunch.com/2021/11/17/google-cloud-teams-up-with-nlp-startup-cohere-on-multi-year-partnership/).\n"
     value: unknown
-  training_time:
-    explanation: The training time for the models are unknown.
+    explanation: >
+      The training hardware wasn't explicitly announced, but it was
+      reported that Google Cloud teamed up with Cohere on a TPU partnership
+      [[TechCrunch Article]]
+      (https://techcrunch.com/2021/11/17/google-cloud-teams-up-with-nlp-startup-cohere-on-multi-year-partnership/).
+  quality_control:
     value: unknown
-  type: model
-  url: none
-- access:
-    explanation: "The model is available to the public through the Cohere Platform\
-      \ [[Cohere Platform]](https://os.cohere.ai/login).\n"
+    explanation: >
+      The quality control measures taken are unknown, but it is implied
+      that Cohere performed mitigation strategies for toxic degeneration
+      [[Model Card]](https://docs.cohere.ai/generation-card).
+  # Downstream
+  access:
     value: limited
-  analysis: The model's performance was analyzed on Hellaswag and COPA, as well
-    as several safety benchmarks [[Model Card]](https://docs.cohere.ai/generation-card).
-  created_date:
-    explanation: "Cohere Command was in beta before this date\n"
-    value: 2023-01-01
-  dependencies: [Cohere Base]
-  description: "This model is a generative model optimized to follow commands in\
-    \ the prompt.\n"
-  feedback:
-    explanation: "There are no specific feedback mechanisms for the Generation model,\
-      \ but a generic contact email is provided on Cohere website, which is support\
-      \ at cohere.ai.\n"
-    value: unknown
-  intended_uses: "On the model card, the intended uses are stated as \"interactive\
-    \ autocomplete, augmenting human writing processes, summarization, text rephrasing,\
-    \ and other text-to-text tasks in non-sensitive domains\" [[Model Card]](https://docs.cohere.ai/generation-card).\n"
+    explanation: >
+      The model is available to the public through the Cohere Platform
+      [[Cohere Platform]](https://os.cohere.ai/login).
   license:
-    explanation: The model likely has a license specifically for Cohere's use.
     value: unknown
-  modality:
-    explanation: text (English)
-    value: text; text
-  model_card: https://docs.cohere.ai/generation-card
-  monitoring: "The usage of the model is monitored by Cohere [[Model Card]](https://docs.cohere.ai/generation-card).\n"
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: >
+    On the model card, the intended uses are stated as "interactive
+    autocomplete, augmenting human writing processes, summarization,
+    text rephrasing, and other text-to-text tasks in non-sensitive domains"
+    [[Model Card]](https://docs.cohere.ai/generation-card).
+  prohibited_uses: >
+    The usage of the model is bound by the Cohere usage guidelines
+    [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
+    A non-comprehensive list of specific application violating these guidelines
+    are: astroturfing, generation of misinformation and other harmful content,
+    and "generation of text about people, places, or events without a
+    human-in-the-loop"
+    [[Model Card]](https://docs.cohere.ai/generation-card).
+  monitoring: >
+    The usage of the model is monitored by Cohere
+    [[Model Card]](https://docs.cohere.ai/generation-card).
+  feedback:
+    value: unknown
+    explanation: >
+      There are no specific feedback mechanisms for the Generation model,
+      but a generic contact email is provided on Cohere website, which is
+      support at cohere.ai.
+
+- type: model
   name: Cohere Command
+  # General
   organization: Cohere
-  prohibited_uses: "The usage of the model is bound by the Cohere usage guidelines\
-    \ [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). A non-comprehensive\
-    \ list of specific application violating these guidelines are: astroturfing,\
-    \ generation of misinformation and other harmful content, and \"generation of\
-    \ text about people, places, or events without a human-in-the-loop\" [[Model\
-    \ Card]](https://docs.cohere.ai/generation-card).\n"
-  quality_control:
-    explanation: unknown
+  description: >
+    This model is a generative model optimized to follow commands in the prompt.
+  created_date:
+    value: 2023-01-01
+    explanation: >
+      Cohere Command was in beta before this date
+  url: https://docs.cohere.com/docs/command-beta
+  model_card: https://docs.cohere.ai/generation-card
+  modality: text (English)
+  size:
     value: unknown
-  size: unknown
+    explanation: >
+      The exact sizes of the generation models are unknown, but we know
+      that they come in three sizes: small, medium, and large
+      [[Model Card]](https://docs.cohere.ai/generation-card).
+  analysis: The model's performance was analyzed on Hellaswag and COPA, as well
+    as several safety benchmarks [[Model Card]](https://docs.cohere.ai/generation-card).
+  # Construction
+  dependencies: [Cohere Base]
   training_emissions:
+    value: unknown
     explanation: unknown
+  training_time:
     value: unknown
-  training_hardware:
     explanation: unknown
+  training_hardware:
     value: unknown
-  training_time:
     explanation: unknown
+  quality_control:
     value: unknown
-  type: model
-  url: https://docs.cohere.com/docs/command-beta
-- access:
-    explanation: "The model is available to the public through the Cohere Platform\
-      \ [[Cohere Platform]](https://cohere.ai/).\n"
+    explanation: unknown
+  # Downstream
+  access:
     value: limited
-  analysis: "The model's performance was analyzed on several safety benchmarks [[Model\
-    \ Card]](https://docs.cohere.ai/representation-card).\n"
-  created_date:
-    explanation: "The date the Cohere API was announced on the news [[News Article]]\
-      \ (https://venturebeat.com/2021/11/15/openai-rival-cohere-launches-language-model-api/).\n"
-    value: 2021-11-15
-  dependencies: []
-  description: "The Embedding Large (English) model is a language model trained\
-    \ by Cohere for tasks requiring embeddings.\n"
-  feedback:
-    explanation: "There are no specific feedback mechanisms for the Generation model,\
-      \ but a generic contact email is provided on Cohere website, which is support\
-      \ at cohere.ai.\n"
-    value: unknown
-  intended_uses: "The intended uses are stated as \"estimating semantic similarity\
-    \ between two sentences, choosing a sentence which is most likely to follow\
-    \ another sentence, sentiment analysis, topic extraction, or categorizing user\
-    \ feedback\" on the Cohere model card [[Model Card]](https://docs.cohere.ai/representation-card).\n"
+    explanation: >
+      The model is available to the public through the Cohere Platform
+      [[Cohere Platform]](https://os.cohere.ai/login).
   license:
+    value: unknown
     explanation: The model likely has a license specifically for Cohere's use.
+  intended_uses: >
+    On the model card, the intended uses are stated as "interactive
+    autocomplete, augmenting human writing processes, summarization,
+    text rephrasing, and other text-to-text tasks in non-sensitive domains"
+    [[Model Card]](https://docs.cohere.ai/generation-card).
+  prohibited_uses: >
+    The usage of the model is bound by the Cohere usage guidelines
+    [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
+    A non-comprehensive list of specific application violating these guidelines
+    are: astroturfing, generation of misinformation and other harmful content,
+    and "generation of text about people, places, or events without a
+    human-in-the-loop"
+    [[Model Card]](https://docs.cohere.ai/generation-card).
+  monitoring: >
+    The usage of the model is monitored by Cohere
+    [[Model Card]](https://docs.cohere.ai/generation-card).
+  feedback:
     value: unknown
-  modality:
-    explanation: Text (English)
-    value: text; text
-  model_card: https://docs.cohere.ai/representation-card
-  monitoring: "The usage of the model is monitored by Cohere [[Model Card]](https://docs.cohere.ai/representation-card).\n"
+    explanation: >
+      There are no specific feedback mechanisms for the Generation model,
+      but a generic contact email is provided on Cohere website, which is
+      support at cohere.ai.
+
+
+- type: model
   name: Cohere Embed (English)
+  # General
   organization: Cohere
-  prohibited_uses: "The usage of the model is bound by the Cohere usage guidelines\
-    \ [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). A non-comprehensive\
-    \ list of specific application violating these guidelines are: extraction of\
-    \ identity and demographic information, building purposefully opaque text classification\
-    \ systems, and \"building downstream classifiers that serve as automated decision-making\
-    \ systems that have real-world consequences on people, where those decisions\
-    \ are made without a human-in-the-loop\" [[Model Card]](https://docs.cohere.ai/representation-card).\n"
-  quality_control: unknown
-  size: unknown
-  training_emissions:
-    explanation: The emissions of the models are unknown.
+  description: >
+    The Embedding Large (English) model is a language model trained by Cohere for
+    tasks requiring embeddings.
+  created_date:
+    value: 2021-11-15
+    explanation: >
+      The date the Cohere API was announced on the news
+      [[News Article]]
+      (https://venturebeat.com/2021/11/15/openai-rival-cohere-launches-language-model-api/).
+  url: none
+  model_card: https://docs.cohere.ai/representation-card
+  modality: Text (English)
+  size:
     value: unknown
-  training_hardware:
-    explanation: "The training hardware wasn't explicitly announced, but it was\
-      \ reported that Google Cloud teamed up with Cohere on a TPU partnership [[TechCrunch\
-      \ Article]] (https://techcrunch.com/2021/11/17/google-cloud-teams-up-with-nlp-startup-cohere-on-multi-year-partnership/).\n"
+    explanation: >
+      The exact sizes of the embedding models are unknown, but we know
+      that they come in three sizes: small, medium, and large
+      [[Model Card]](https://docs.cohere.ai/representation-card).
+  analysis: >
+    The model's performance was analyzed on several safety benchmarks
+    [[Model Card]](https://docs.cohere.ai/representation-card).
+  # Construction
+  dependencies: []
+  training_emissions:
     value: unknown
+    explanation: The emissions of the models are unknown.
   training_time:
+    value: unknown
     explanation: The training time for the models are unknown.
+  training_hardware:
     value: unknown
-  type: model
-  url: none
-- access:
-    explanation: "The model is available to the public through the Cohere Platform\
-      \ [[Cohere Platform]](https://cohere.ai/).\n"
+    explanation: >
+      The training hardware wasn't explicitly announced, but it was
+      reported that Google Cloud teamed up with Cohere on a TPU partnership
+      [[TechCrunch Article]]
+      (https://techcrunch.com/2021/11/17/google-cloud-teams-up-with-nlp-startup-cohere-on-multi-year-partnership/).
+  quality_control: unknown
+  # Downstream
+  access:
     value: limited
-  analysis: "The model's performance was analyzed on several safety benchmarks [[Model\
-    \ Card]](https://docs.cohere.ai/representation-card).\n"
-  created_date:
-    explanation: "The launch of the multilingual embedding model was announced in\
-      \ this [[blog post]] (https://txt.cohere.com/multilingual/).\n"
-    value: 2022-12-12
-  dependencies: []
-  description: "This model maps text from 100+ languages to a semantic vector space,\
-    \ positioning text with a similar meaning (regardless of language) in close\
-    \ proximity.\n"
-  feedback:
-    explanation: "There are no specific feedback mechanisms for the Generation model,\
-      \ but a generic contact email is provided on Cohere website, which is support\
-      \ at cohere.ai.\n"
-    value: unknown
-  intended_uses: "The intended uses are stated as \"estimating semantic similarity\
-    \ between two sentences, choosing a sentence which is most likely to follow\
-    \ another sentence, sentiment analysis, topic extraction, or categorizing user\
-    \ feedback\" on the Cohere model card [[Model Card]](https://docs.cohere.ai/representation-card).\n"
+    explanation: >
+      The model is available to the public through the Cohere Platform
+      [[Cohere Platform]](https://cohere.ai/).
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: text (Multilingual)
-    value: text; text
-  model_card: https://docs.cohere.ai/representation-card
-  monitoring: "The usage of the model is monitored by Cohere [[Model Card]](https://docs.cohere.ai/representation-card).\n"
+    explanation: The model likely has a license specifically for Cohere's use.
+  intended_uses: >
+    The intended uses are stated as "estimating semantic similarity between two
+    sentences, choosing a sentence which is most likely to follow another
+    sentence, sentiment analysis, topic extraction, or categorizing user
+    feedback" on the Cohere model card
+    [[Model Card]](https://docs.cohere.ai/representation-card).
+  prohibited_uses: >
+    The usage of the model is bound by the Cohere usage guidelines
+    [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
+    A non-comprehensive list of specific application violating these guidelines
+    are: extraction of identity and demographic information, building
+    purposefully opaque text classification systems, and "building downstream
+    classifiers that serve as automated decision-making systems that have
+    real-world consequences on people, where those decisions are made without
+    a
+    human-in-the-loop"
+    [[Model Card]](https://docs.cohere.ai/representation-card).
+  monitoring: >
+    The usage of the model is monitored by Cohere
+    [[Model Card]](https://docs.cohere.ai/representation-card).
+  feedback:
+    value: unknown
+    explanation: >
+      There are no specific feedback mechanisms for the Generation model,
+      but a generic contact email is provided on Cohere website, which is
+      support at cohere.ai.
+
+- type: model
   name: Cohere Embed (Multilingual)
+  # General
   organization: Cohere
-  prohibited_uses: "The usage of the model is bound by the Cohere usage guidelines\
-    \ [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). A non-comprehensive\
-    \ list of specific application violating these guidelines are: extraction of\
-    \ identity and demographic information, building purposefully opaque text classification\
-    \ systems, and \"building downstream classifiers that serve as automated decision-making\
-    \ systems that have real-world consequences on people, where those decisions\
-    \ are made without a human-in-the-loop\" [[Model Card]](https://docs.cohere.ai/representation-card).\n"
-  quality_control: unknown
-  size: unknown
-  training_emissions:
-    explanation: The emissions of the models are unknown.
+  description: >
+    This model maps text from 100+ languages to a semantic
+    vector space, positioning text with a similar meaning
+    (regardless of language) in close proximity.
+  modality: text (Multilingual)
+  created_date:
+    value: 2022-12-12
+    explanation: >
+      The launch of the multilingual embedding model was announced in this
+      [[blog post]]
+      (https://txt.cohere.com/multilingual/).
+  url: https://txt.cohere.ai/multilingual/
+  model_card: https://docs.cohere.ai/representation-card
+  size:
     value: unknown
-  training_hardware:
-    explanation: "The training hardware wasn't explicitly announced, but it was\
-      \ reported that Google Cloud teamed up with Cohere on a TPU partnership [[TechCrunch\
-      \ Article]] (https://techcrunch.com/2021/11/17/google-cloud-teams-up-with-nlp-startup-cohere-on-multi-year-partnership/).\n"
+    explanation: >
+      The exact sizes of the embedding models are unknown, but we know
+      that they come in three sizes: small, medium, and large
+      [[Model Card]](https://docs.cohere.ai/representation-card).
+  analysis: >
+    The model's performance was analyzed on several safety benchmarks
+    [[Model Card]](https://docs.cohere.ai/representation-card).
+  # Construction
+  dependencies: []
+  training_emissions:
     value: unknown
+    explanation: The emissions of the models are unknown.
   training_time:
+    value: unknown
     explanation: The training time for the models are unknown.
+  training_hardware:
     value: unknown
-  type: model
-  url: https://txt.cohere.ai/multilingual/
-- access:
-    explanation: "Users can access the Cohere API by signing up on the Cohere website\
-      \ [[Cohere Website]](https://cohere.ai/).\n"
+    explanation: >
+      The training hardware wasn't explicitly announced, but it was
+      reported that Google Cloud teamed up with Cohere on a TPU partnership
+      [[TechCrunch Article]]
+      (https://techcrunch.com/2021/11/17/google-cloud-teams-up-with-nlp-startup-cohere-on-multi-year-partnership/).
+  quality_control: unknown
+  # Downstream
+  access:
     value: limited
-  adaptation: unknown
+    explanation: >
+      The model is available to the public through the Cohere Platform
+      [[Cohere Platform]](https://cohere.ai/).
+  license:
+    value: unknown
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: >
+    The intended uses are stated as "estimating semantic similarity between two
+    sentences, choosing a sentence which is most likely to follow another
+    sentence, sentiment analysis, topic extraction, or categorizing user
+    feedback" on the Cohere model card
+    [[Model Card]](https://docs.cohere.ai/representation-card).
+  prohibited_uses: >
+    The usage of the model is bound by the Cohere usage guidelines
+    [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
+    A non-comprehensive list of specific application violating these guidelines
+    are: extraction of identity and demographic information, building
+    purposefully opaque text classification systems, and "building downstream
+    classifiers that serve as automated decision-making systems that have
+    real-world consequences on people, where those decisions are made without
+    a
+    human-in-the-loop"
+    [[Model Card]](https://docs.cohere.ai/representation-card).
+  monitoring: >
+    The usage of the model is monitored by Cohere
+    [[Model Card]](https://docs.cohere.ai/representation-card).
+  feedback:
+    value: unknown
+    explanation: >
+      There are no specific feedback mechanisms for the Generation model,
+      but a generic contact email is provided on Cohere website, which is
+      support at cohere.ai.
+
+- type: application
+  name: Cohere API
+  # General
+  organization: Cohere
+  description: >
+    Cohere API allows users to access the cohere language models and utilize
+    them in their applications.
   created_date:
-    explanation: "The date the Cohere API was announced on the news [[News Article]]\
-      \ (https://venturebeat.com/2021/11/15/openai-rival-cohere-launches-language-model-api/).\n"
     value: 2021-11-15
+    explanation: >
+      The date the Cohere API was announced on the news
+      [[News Article]]
+      (https://venturebeat.com/2021/11/15/openai-rival-cohere-launches-language-model-api/).
+  url: https://cohere.ai/
+  # Construction
   dependencies:
     - Cohere Generate Endpoint
     - Cohere Embed Endpoint
     - Cohere Classify Endpoint
     - Cohere Summarize Endpoint
-  description: "Cohere API allows users to access the cohere language models and\
-    \ utilize them in their applications.\n"
-  failures: unknown
-  feedback: "General feedback as well as the violations of the usage guidelines\
-    \ can be reported to Cohere at responsibility at cohere.ai [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n"
-  intended_uses: "Intended to be used by developers who would like to incorporate\
-    \ NLP into their applications [[Cohere Website]](https://cohere.ai/).\n"
+  adaptation: unknown
+  output_space: generation and embeddings
+  quality_control: >
+    The new users of the API get a limited access restricting the sizes of the
+    models as well as the number of tokens that can be used. Users are required
+    to go through an internal application to upgrade to full access
+    [[Limited Access]](https://docs.cohere.ai/limited-access).
+  # Downstream
+  access:
+    value: limited
+    explanation: >
+      Users can access the Cohere API by signing up on the Cohere website
+      [[Cohere Website]](https://cohere.ai/).
   license:
-    explanation: "\"Subject to these Terms of Use, we grant you anon-exclusive,\
-      \ non-transferable, non-sublicensable and revocable licence to use the Cohere\
-      \ Solution in accordance with these Terms of Use\" - excerpt from the Terms\
-      \ of Service document.\n"
     value: custom
-  monitoring: "All applications developed using the Cohere API is subject to review\
-    \ by Cohere.\n"
-  monthly_active_users: unknown
-  name: Cohere API
-  organization: Cohere
-  output_space: generation and embeddings
-  prohibited_uses: "The usage of the API is bound by the Cohere usage guidelines.\
-    \ Disallowed use cases include violence and threats, antisocial and antidemocratic\
-    \ uses, deceit, attacks on security or privacy, unsafe unsupervised uses, decision-making,\
-    \ high-Risk generations among others [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n"
-  quality_control: "The new users of the API get a limited access restricting the\
-    \ sizes of the models as well as the number of tokens that can be used. Users\
-    \ are required to go through an internal application to upgrade to full access\
-    \ [[Limited Access]](https://docs.cohere.ai/limited-access).\n"
+    explanation: >
+      "Subject to these Terms of Use, we grant you anon-exclusive,
+      non-transferable, non-sublicensable and revocable licence to use the
+      Cohere Solution in accordance with these Terms of Use" - excerpt from the
+      Terms of Service document.
   terms_of_service: https://cohere.ai/terms-of-use
-  type: application
-  url: https://cohere.ai/
+  intended_uses: >
+    Intended to be used by developers who would like to incorporate NLP into
+    their applications
+    [[Cohere Website]](https://cohere.ai/).
+  prohibited_uses: >
+    The usage of the API is bound by the Cohere usage guidelines.
+    Disallowed use cases include violence and threats, antisocial and
+    antidemocratic uses, deceit, attacks on security or privacy, unsafe
+    unsupervised uses, decision-making, high-Risk generations among others
+    [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
+  monitoring: >
+    All applications developed using the Cohere API is subject to review by
+    Cohere.
+  feedback: >
+    General feedback as well as the violations of the usage guidelines can
+    be reported to Cohere at responsibility at cohere.ai
+    [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
+  # Deployment
+  monthly_active_users: unknown
   user_distribution: unknown
-- access:
-    explanation: "Users can access the Cohere API by signing up on the Cohere website\
-      \ [[Cohere Website]](https://cohere.ai/).\n"
-    value: limited
-  adaptation: unknown
-  created_date:
-    explanation: "The launch of Generate was announced in this [[news article]]\
-      \ (https://venturebeat.com/uncategorized/openai-rival-cohere-launches-language-model-api/).\n"
-    value: 2021-11-15
-  dependencies: [Cohere Base, Cohere Command]
-  description: "This endpoint generates realistic text conditioned on a given input.\n"
   failures: unknown
-  feedback: "General feedback as well as the violations of the usage guidelines\
-    \ can be reported to Cohere at responsibility at cohere.ai [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n"
-  intended_uses: "Intended to be used by developers who would like to incorporate\
-    \ NLP into their applications [[Cohere Website]](https://cohere.ai/).\n"
-  license: "Limited use license to Cohere platform users [[Terms of Use]](https://cohere.ai/terms-of-use).\n"
-  monitoring: "All applications developed using the Cohere API is subject to review\
-    \ by Cohere.\n"
-  monthly_active_users: unknown
+
+- type: application
   name: Cohere Generate Endpoint
+  # General
   organization: Cohere
+  description: >
+    This endpoint generates realistic text conditioned on a given input.
+  created_date:
+    value: 2021-11-15
+    explanation: >
+      The launch of Generate was announced in this
+      [[news article]]
+      (https://venturebeat.com/uncategorized/openai-rival-cohere-launches-language-model-api/).
+  url: https://docs.cohere.ai/reference/generate
+  # Construction
+  dependencies: [Cohere Base, Cohere Command]
+  adaptation: unknown
   output_space: generation
-  prohibited_uses: "The usage of the API is bound by the Cohere usage guidelines.\
-    \ Disallowed use cases include violence and threats, antisocial and antidemocratic\
-    \ uses, deceit, attacks on security or privacy, unsafe unsupervised uses, decision-making,\
-    \ high-Risk generations among others [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n"
-  quality_control: "The new users of the API get a limited access restricting the\
-    \ sizes of the models as well as the number of tokens that can be used. Users\
-    \ are required to go through an internal application to upgrade to full access\
-    \ [[Limited Access]](https://docs.cohere.ai/limited-access).\n"
+  quality_control: >
+    The new users of the API get a limited access restricting the sizes of the
+    models as well as the number of tokens that can be used. Users are required
+    to go through an internal application to upgrade to full access
+    [[Limited Access]](https://docs.cohere.ai/limited-access).
+  # Downstream
+  access:
+    value: limited
+    explanation: >
+      Users can access the Cohere API by signing up on the Cohere website
+      [[Cohere Website]](https://cohere.ai/).
+  license: >
+    Limited use license to Cohere platform users
+    [[Terms of Use]](https://cohere.ai/terms-of-use).
   terms_of_service: https://cohere.ai/terms-of-use
-  type: application
-  url: https://docs.cohere.ai/reference/generate
+  intended_uses: >
+    Intended to be used by developers who would like to incorporate NLP into
+    their applications
+    [[Cohere Website]](https://cohere.ai/).
+  prohibited_uses: >
+    The usage of the API is bound by the Cohere usage guidelines.
+    Disallowed use cases include violence and threats, antisocial and
+    antidemocratic uses, deceit, attacks on security or privacy, unsafe
+    unsupervised uses, decision-making, high-Risk generations among others
+    [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
+  monitoring: >
+    All applications developed using the Cohere API is subject to review by
+    Cohere.
+  feedback: >
+    General feedback as well as the violations of the usage guidelines can
+    be reported to Cohere at responsibility at cohere.ai
+    [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
+  # Deployment
+  monthly_active_users: unknown
   user_distribution: unknown
-- access:
-    explanation: "Users can access the Cohere API by signing up on the Cohere website\
-      \ [[Cohere Website]](https://cohere.ai/).\n"
-    value: limited
-  adaptation: unknown
-  created_date:
-    explanation: "The launch of Embed was announced in this [[news article]] (https://venturebeat.com/uncategorized/openai-rival-cohere-launches-language-model-api/).\n"
-    value: 2021-11-15
-  dependencies: [Cohere Embed (Multilingual), Cohere Embed (English)]
-  description: "This endpoint returns text embeddings. An embedding is a list of\
-    \ floating point numbers that captures semantic information about the text that\
-    \ it represents.\n"
   failures: unknown
-  feedback: "General feedback as well as the violations of the usage guidelines\
-    \ can be reported to Cohere at responsibility at cohere.ai [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n"
-  intended_uses: "Intended to be used by developers who would like to incorporate\
-    \ NLP into their applications [[Cohere Website]](https://cohere.ai/).\n"
-  license: "Limited use license to Cohere platform users [[Terms of Use]](https://cohere.ai/terms-of-use).\n"
-  monitoring: "All applications developed using the Cohere API is subject to review\
-    \ by Cohere.\n"
-  monthly_active_users: unknown
+
+- type: application
   name: Cohere Embed Endpoint
+  # General
   organization: Cohere
+  description: >
+    This endpoint returns text embeddings. An
+    embedding is a list of floating point numbers that
+    captures semantic information about the text that it represents.
+  created_date:
+    value: 2021-11-15
+    explanation: >
+      The launch of Embed was announced in this
+      [[news article]]
+      (https://venturebeat.com/uncategorized/openai-rival-cohere-launches-language-model-api/).
+  url: https://docs.cohere.ai/reference/embed
+  # Construction
+  dependencies: [Cohere Embed (Multilingual), Cohere Embed (English)]
+  adaptation: unknown
   output_space: embedding
-  prohibited_uses: "The usage of the API is bound by the Cohere usage guidelines.\
-    \ Disallowed use cases include violence and threats, antisocial and antidemocratic\
-    \ uses, deceit, attacks on security or privacy, unsafe unsupervised uses, decision-making,\
-    \ high-Risk generations among others [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n"
-  quality_control: "The new users of the API get a limited access restricting the\
-    \ sizes of the models as well as the number of tokens that can be used. Users\
-    \ are required to go through an internal application to upgrade to full access\
-    \ [[Limited Access]](https://docs.cohere.ai/limited-access).\n"
+  quality_control: >
+    The new users of the API get a limited access restricting the sizes of the
+    models as well as the number of tokens that can be used. Users are required
+    to go through an internal application to upgrade to full access
+    [[Limited Access]](https://docs.cohere.ai/limited-access).
+  # Downstream
+  access:
+    value: limited
+    explanation: >
+      Users can access the Cohere API by signing up on the Cohere website
+      [[Cohere Website]](https://cohere.ai/).
+  license: >
+    Limited use license to Cohere platform users
+    [[Terms of Use]](https://cohere.ai/terms-of-use).
   terms_of_service: https://cohere.ai/terms-of-use
-  type: application
-  url: https://docs.cohere.ai/reference/embed
+  intended_uses: >
+    Intended to be used by developers who would like to incorporate NLP into
+    their applications
+    [[Cohere Website]](https://cohere.ai/).
+  prohibited_uses: >
+    The usage of the API is bound by the Cohere usage guidelines.
+    Disallowed use cases include violence and threats, antisocial and
+    antidemocratic uses, deceit, attacks on security or privacy, unsafe
+    unsupervised uses, decision-making, high-Risk generations among others
+    [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
+  monitoring: >
+    All applications developed using the Cohere API is subject to review by
+    Cohere.
+  feedback: >
+    General feedback as well as the violations of the usage guidelines can
+    be reported to Cohere at responsibility at cohere.ai
+    [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
+  # Deployment
+  monthly_active_users: unknown
   user_distribution: unknown
-- access:
-    explanation: "Users can access the Cohere API by signing up on the Cohere website\
-      \ [[Cohere Website]](https://cohere.ai/).\n"
-    value: limited
-  adaptation: unknown
-  created_date:
-    explanation: "The launch of Classify was announced in this [[blog post]] (https://txt.cohere.com/classification-is-now-available/).\n"
-    value: 2022-05-05
-  dependencies: [Cohere Embed (Multilingual), Cohere Embed (English)]
-  description: "This endpoint makes a prediction about which label best fits a specified\
-    \ text input. To make a prediction, Classify uses the provided examples of text\
-    \ + label pairs as a reference.\n"
   failures: unknown
-  feedback: "General feedback as well as the violations of the usage guidelines\
-    \ can be reported to Cohere at responsibility at cohere.ai [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n"
-  intended_uses: "Intended to be used by developers who would like to incorporate\
-    \ NLP into their applications [[Cohere Website]](https://cohere.ai/).\n"
-  license: "Limited use license to Cohere platform users [[Terms of Use]](https://cohere.ai/terms-of-use).\n"
-  monitoring: "All applications developed using the Cohere API is subject to review\
-    \ by Cohere.\n"
-  monthly_active_users: unknown
+
+- type: application
   name: Cohere Classify Endpoint
+  # General
   organization: Cohere
+  description: >
+    This endpoint makes a prediction about which label best
+    fits a specified text input. To make a prediction,
+    Classify uses the provided examples of text + label
+    pairs as a reference.
+  created_date:
+    value: 2022-05-05
+    explanation: >
+      The launch of Classify was announced in this
+      [[blog post]]
+      (https://txt.cohere.com/classification-is-now-available/).
+  url: https://docs.cohere.ai/reference/classify
+  # Construction
+  dependencies: [Cohere Embed (Multilingual), Cohere Embed (English)]
+  adaptation: unknown
   output_space: embedding
-  prohibited_uses: "The usage of the API is bound by the Cohere usage guidelines.\
-    \ Disallowed use cases include violence and threats, antisocial and antidemocratic\
-    \ uses, deceit, attacks on security or privacy, unsafe unsupervised uses, decision-making,\
-    \ high-Risk generations among others [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n"
-  quality_control: "The new users of the API get a limited access restricting the\
-    \ sizes of the models as well as the number of tokens that can be used. Users\
-    \ are required to go through an internal application to upgrade to full access\
-    \ [[Limited Access]](https://docs.cohere.ai/limited-access).\n"
+  quality_control: >
+    The new users of the API get a limited access restricting the sizes of the
+    models as well as the number of tokens that can be used. Users are required
+    to go through an internal application to upgrade to full access
+    [[Limited Access]](https://docs.cohere.ai/limited-access).
+  # Downstream
+  access:
+    value: limited
+    explanation: >
+      Users can access the Cohere API by signing up on the Cohere website
+      [[Cohere Website]](https://cohere.ai/).
+  license: >
+    Limited use license to Cohere platform users
+    [[Terms of Use]](https://cohere.ai/terms-of-use).
   terms_of_service: https://cohere.ai/terms-of-use
-  type: application
-  url: https://docs.cohere.ai/reference/classify
+  intended_uses: >
+    Intended to be used by developers who would like to incorporate NLP into
+    their applications
+    [[Cohere Website]](https://cohere.ai/).
+  prohibited_uses: >
+    The usage of the API is bound by the Cohere usage guidelines.
+    Disallowed use cases include violence and threats, antisocial and
+    antidemocratic uses, deceit, attacks on security or privacy, unsafe
+    unsupervised uses, decision-making, high-Risk generations among others
+    [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
+  monitoring: >
+    All applications developed using the Cohere API is subject to review by
+    Cohere.
+  feedback: >
+    General feedback as well as the violations of the usage guidelines can
+    be reported to Cohere at responsibility at cohere.ai
+    [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
+  # Deployment
+  monthly_active_users: unknown
   user_distribution: unknown
-- access:
-    explanation: "Users can access the Cohere API by signing up on the Cohere website\
-      \ [[Cohere Website]](https://cohere.ai/).\n"
-    value: limited
-  adaptation: unknown
-  created_date:
-    explanation: "The beta launch of summarize was announced in this [[blog post]]\
-      \ (https://txt.cohere.com/summarize-beta/).\n"
-    value: 2023-02-22
-  dependencies: []
-  description: "This endpoint generates a succinct version of the original text\
-    \ that relays the most important information.\n"
   failures: unknown
-  feedback: "General feedback as well as the violations of the usage guidelines\
-    \ can be reported to Cohere at responsibility at cohere.ai [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n"
-  intended_uses: "Intended to be used by developers who would like to incorporate\
-    \ NLP into their applications [[Cohere Website]](https://cohere.ai/).\n"
-  license: "Limited use license to Cohere platform users [[Terms of Use]](https://cohere.ai/terms-of-use).\n"
-  monitoring: "All applications developed using the Cohere API is subject to review\
-    \ by Cohere.\n"
-  monthly_active_users: unknown
+
+- type: application
   name: Cohere Summarize Endpoint
+  # General
   organization: Cohere
+  description: >
+    This endpoint generates a succinct
+    version of the original text that relays
+    the most important information.
+  created_date:
+    value: 2023-02-22
+    explanation: >
+      The beta launch of summarize was announced in this
+      [[blog post]]
+      (https://txt.cohere.com/summarize-beta/).
+  url: https://docs.cohere.ai/reference/summarize
+  # Construction
+  dependencies: []
+  adaptation: unknown
   output_space: generation
-  prohibited_uses: "The usage of the API is bound by the Cohere usage guidelines.\
-    \ Disallowed use cases include violence and threats, antisocial and antidemocratic\
-    \ uses, deceit, attacks on security or privacy, unsafe unsupervised uses, decision-making,\
-    \ high-Risk generations among others [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).\n"
-  quality_control: "The new users of the API get a limited access restricting the\
-    \ sizes of the models as well as the number of tokens that can be used. Users\
-    \ are required to go through an internal application to upgrade to full access\
-    \ [[Limited Access]](https://docs.cohere.ai/limited-access).\n"
+  quality_control: >
+    The new users of the API get a limited access restricting the sizes of the
+    models as well as the number of tokens that can be used. Users are required
+    to go through an internal application to upgrade to full access
+    [[Limited Access]](https://docs.cohere.ai/limited-access).
+  # Downstream
+  access:
+    value: limited
+    explanation: >
+      Users can access the Cohere API by signing up on the Cohere website
+      [[Cohere Website]](https://cohere.ai/).
+  license: >
+    Limited use license to Cohere platform users
+    [[Terms of Use]](https://cohere.ai/terms-of-use).
   terms_of_service: https://cohere.ai/terms-of-use
-  type: application
-  url: https://docs.cohere.ai/reference/summarize
+  intended_uses: >
+    Intended to be used by developers who would like to incorporate NLP into
+    their applications
+    [[Cohere Website]](https://cohere.ai/).
+  prohibited_uses: >
+    The usage of the API is bound by the Cohere usage guidelines.
+    Disallowed use cases include violence and threats, antisocial and
+    antidemocratic uses, deceit, attacks on security or privacy, unsafe
+    unsupervised uses, decision-making, high-Risk generations among others
+    [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
+  monitoring: >
+    All applications developed using the Cohere API is subject to review by
+    Cohere.
+  feedback: >
+    General feedback as well as the violations of the usage guidelines can
+    be reported to Cohere at responsibility at cohere.ai
+    [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
+  # Deployment
+  monthly_active_users: unknown
   user_distribution: unknown
+  failures: unknown
diff --git a/assets/columbia.yaml b/assets/columbia.yaml
index cef14a6c..2ca39802 100644
--- a/assets/columbia.yaml
+++ b/assets/columbia.yaml
@@ -1,24 +1,23 @@
 ---
-- access: open
-  analysis: Evaluated on wide range of tasks using own evaluation benchmarks.
-  created_date: 2022-11-20
-  dependencies: [AlphaFold2, OpenProteinSet]
-  description: OpenFold is an open source recreation of AlphaFold2.
-  feedback: ''
-  intended_uses: ''
-  license: CC BY 4.0
-  modality:
-    explanation: protein structures
-    value: image; image
-  model_card: ''
-  monitoring: ''
+
+- type: model
   name: OpenFold
   organization: Columbia
-  prohibited_uses: ''
-  quality_control: ''
+  description: OpenFold is an open source recreation of AlphaFold2.
+  created_date: 2022-11-20
+  url: https://www.biorxiv.org/content/10.1101/2022.11.20.517210v2
+  model_card: ''
+  modality: protein structures
+  analysis: Evaluated on wide range of tasks using own evaluation benchmarks.
   size: ''
+  dependencies: [AlphaFold2, OpenProteinSet]
   training_emissions: unknown
-  training_hardware: Single A100 NVIDIA GPU
   training_time: 50,000 GPU hours
-  type: model
-  url: https://www.biorxiv.org/content/10.1101/2022.11.20.517210v2
+  training_hardware: Single A100 NVIDIA GPU
+  quality_control: ''
+  access: open
+  license: CC BY 4.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/databricks.yaml b/assets/databricks.yaml
index 7439f500..40dc6f16 100644
--- a/assets/databricks.yaml
+++ b/assets/databricks.yaml
@@ -1,50 +1,67 @@
 ---
-- access:
-    explanation: "Model training code can be accessed at the official Dolly repository.\
-      \ Trained weights can be requested at hello-dolly@databricks.com. [[Dolly\
-      \ Repository]](https://github.com/databrickslabs/dolly).\n"
-    value: open
-  analysis: "\"We evaluated Dolly on the instruction-following capabilities described\
-    \ in the InstructGPT paper that ChatGPT is based on and found that it exhibits\
-    \ many of the same qualitative capabilities, including text generation, brainstorming\
-    \ and open Q&A.\" [[Databricks Blog Post]] (https://www.databricks.com/blog/2023/03/24/hello-dolly-democratizing-magic-chatgpt-open-models.html).\n"
+
+- type: model
+  name: Dolly
+  # General
+  organization: Databricks
+  description: >
+    "Databricks’ Dolly, a large language model trained on the Databricks
+     Machine Learning Platform, demonstrates that a two-years-old open source
+     model (GPT-J) can, when subjected to just 30 minutes of fine tuning on a
+     focused corpus of 50k records (Stanford Alpaca), exhibit surprisingly
+     high quality instruction following behavior not characteristic of the
+     foundation model on which it is based."
+     [[Dolly Repository]](https://github.com/databrickslabs/dolly).
   created_date:
-    explanation: "The date the model was announced in the [[Cerebras blog post]](https://www.databricks.com/blog/2023/03/24/hello-dolly-democratizing-magic-chatgpt-open-models.html).\n"
     value: 2023-03-24
-  dependencies: [GPT-J, Alpaca dataset]
-  description: "\"Databricks’ Dolly, a large language model trained on the Databricks\n\
-    \ Machine Learning Platform, demonstrates that a two-years-old open source\n\
-    \ model (GPT-J) can, when subjected to just 30 minutes of fine tuning on a\n\
-    \ focused corpus of 50k records (Stanford Alpaca), exhibit surprisingly\n high\
-    \ quality instruction following behavior not characteristic of the\n foundation\
-    \ model on which it is based.\"\n [[Dolly Repository]](https://github.com/databrickslabs/dolly).\n"
-  feedback: https://github.com/databrickslabs/dolly/issues
-  intended_uses: "\"Dolly is intended exclusively for research purposes and is not\
-    \ licensed for commercial use.\" [[Limitations]](https://github.com/databrickslabs/dolly#limitations).\n"
-  license: Apache 2.0
-  modality:
-    explanation: text (English)
-    value: text; text
+    explanation: >
+      The date the model was announced in the [[Cerebras blog post]](https://www.databricks.com/blog/2023/03/24/hello-dolly-democratizing-magic-chatgpt-open-models.html).
+  url: https://www.databricks.com/blog/2023/03/24/hello-dolly-democratizing-magic-chatgpt-open-models.html
   model_card: ''
-  monitoring: none
-  name: Dolly
-  organization: Databricks
-  prohibited_uses: "Authors note the following limitations of the model: \"The Dolly\
-    \ model family is under active development, and so any list of shortcomings\
-    \ is unlikely to be exhaustive, but we include known limitations and misfires\
-    \ here as a means to document and share our preliminary findings with the community.\
-    \ In particular, dolly-6b struggles with syntactically complex prompts, mathematical\
-    \ operations, factual errors, dates and times, open-ended question answering,\
-    \ hallucination, enumerating lists of specific length, and stylistic mimicry.\"\
-    \ [[Limitations]](https://github.com/databrickslabs/dolly#limitations).\n"
-  quality_control: none
-  size: 6B parameters (dense)
+  modality: text (English)
+  size: 6B parameters (dense model)
+  analysis: >
+    "We evaluated Dolly on the instruction-following capabilities described in the
+    InstructGPT
+    paper that ChatGPT is based on and found that it exhibits many of the same qualitative
+    capabilities, including text generation, brainstorming and open Q&A."
+    [[Databricks Blog Post]]
+    (https://www.databricks.com/blog/2023/03/24/hello-dolly-democratizing-magic-chatgpt-open-models.html).
+  # Construction
+  dependencies: [GPT-J, Alpaca dataset]
   training_emissions: unknown
-  training_hardware:
-    explanation: "According to [[Model Overview]](https://github.com/databrickslabs/dolly#model-overview)\n"
-    value: A single NDasrA100_v4 machine with 8x A100 40GB GPUs
   training_time:
-    explanation: "According to [[Model Overview]](https://github.com/databrickslabs/dolly#model-overview)\n"
     value: 30 minutes
-  type: model
-  url: https://www.databricks.com/blog/2023/03/24/hello-dolly-democratizing-magic-chatgpt-open-models.html
+    explanation: >
+      According to [[Model Overview]](https://github.com/databrickslabs/dolly#model-overview)
+  training_hardware:
+    value: A single NDasrA100_v4 machine with 8x A100 40GB GPUs
+    explanation: >
+      According to [[Model Overview]](https://github.com/databrickslabs/dolly#model-overview)
+  quality_control: none
+  # Downstream
+  access:
+    value: open
+    explanation: >
+      Model training code can be accessed at the official Dolly repository.
+      Trained weights can be requested at hello-dolly@databricks.com.
+      [[Dolly Repository]](https://github.com/databrickslabs/dolly).
+  license: Apache 2.0
+  intended_uses: >
+    "Dolly is intended exclusively for research purposes and is not licensed for
+    commercial use."
+    [[Limitations]](https://github.com/databrickslabs/dolly#limitations).
+  prohibited_uses: >
+    Authors note the following limitations of the model: "The Dolly model family
+    is under active development,
+    and so any list of shortcomings is unlikely to be exhaustive, but we include
+    known limitations and misfires
+    here as a means to document and share our preliminary findings with the community.
+    In particular, dolly-6b
+    struggles with syntactically complex prompts, mathematical operations, factual
+    errors, dates and times,
+    open-ended question answering, hallucination, enumerating lists of specific
+    length, and stylistic mimicry."
+    [[Limitations]](https://github.com/databrickslabs/dolly#limitations).
+  monitoring: none
+  feedback: https://github.com/databrickslabs/dolly/issues
diff --git a/assets/deepmind.yaml b/assets/deepmind.yaml
index 81a0f83d..a59747e1 100644
--- a/assets/deepmind.yaml
+++ b/assets/deepmind.yaml
@@ -1,670 +1,840 @@
 ---
-- access:
-    explanation: "The dataset access is limited to DeepMind researchers [[Model\
-      \ Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C).\n"
-    value: closed
-  analysis: "MassiveText data was analyzed for toxicity, language distribution,\
-    \ URL breakdown, and tokenizer compression rates on the subsets [[Section A.2]](https://arxiv.org/pdf/2112.11446.pdf#subsection.A.2).\n"
+
+- type: dataset
+  name: MassiveText
+  # General
+  organization: DeepMind
+  description: >
+    The MassiveText dataset was used to train the Gopher model.
   created_date:
-    explanation: "The date that Gopher was announced [[DeepMind Blog Post]] (https://www.deepmind.com/blog/language-modelling-at-scale-gopher-ethical-considerations-and-retrieval).\n"
     value: 2021-12-08
+    explanation: >
+      The date that Gopher was announced
+      [[DeepMind Blog Post]]
+      (https://www.deepmind.com/blog/language-modelling-at-scale-gopher-ethical-considerations-and-retrieval).
+  url: https://arxiv.org/pdf/2112.11446.pdf
   datasheet: https://arxiv.org/pdf/2112.11446.pdf#subsection.A.5
+  modality: Text (English) and Code
+  size: 10.5 TB
+  sample: []
+  analysis: >
+    MassiveText data was analyzed for toxicity, language distribution, URL
+    breakdown, and tokenizer compression rates on the subsets
+    [[Section A.2]](https://arxiv.org/pdf/2112.11446.pdf#subsection.A.2).
+  # Construction
   dependencies: []
-  description: "The MassiveText dataset was used to train the Gopher model.\n"
-  excluded: "Documents that are not in English are excluded.\n"
-  feedback:
-    explanation: "The internal feedback mechanisms for WebText are unknown [[Model\
-      \ Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C).\n"
-    value: unknown
-  included: "MassiveText data come from 6 sources: MassiveWeb (48%), Books (27%),\
-    \ C4 (10%), News (10%), GitHub (3%), and Wikipedia (2%). MassiveWeb is a web\
-    \ text corpus curated for MassiveText.\n"
-  intended_uses: "Pre-training of language models by DeepMind researchers [[Model\
-    \ Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C).\n"
   license:
-    explanation: "The model likely has a license specifically for DeepMind's use,\
-      \ based on the information provided in the datasheet [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#subsection.A.5).\n"
     value: unknown
-  modality:
-    explanation: Text (English) and Code
-    value: code, text
+    explanation: >
+      The model likely has a license specifically for DeepMind's use,
+      based on the information provided in the datasheet
+      [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#subsection.A.5).
+  included: >
+    MassiveText data come from 6 sources: MassiveWeb (48%), Books (27%),
+    C4 (10%), News (10%), GitHub (3%), and Wikipedia (2%). MassiveWeb is a
+    web text corpus curated for MassiveText.
+  excluded: >
+    Documents that are not in English are excluded.
+  quality_control: >
+    The authors use simple heuristics for filtering low quality documents as
+    opposed to relying on a classifier based on a "gold" set such as the English
+    Wikipedia, which could "inadvertently bias towards a certain demographic or
+    erase certain dialects or sociolects from representation." MassiveWeb
+    subset was filtered using Google’s SafeSearch filter, preferring it over
+    to word filters that "disproportinately filter out inoffensive content
+    associated with minority groups. MassiveWeb was filtered
+    further for word or phrase repetitions. All the subsets were filtered for
+    document deduplication and test set contamination"
+    [[Appendix A]](https://arxiv.org/pdf/2112.11446.pdf#appendix.A).
+  # Downstream
+  access:
+    value: closed
+    explanation: >
+      The dataset access is limited to DeepMind researchers
+      [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C).
+  intended_uses: >
+    Pre-training of language models by DeepMind researchers
+    [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C).
+  prohibited_uses:
+    value: unknown
+    explanation: >
+      There are no known prohibited uses of the dataset, but the authors
+      state that it should not be used for training models with multilingual
+      capabilities as it only contains the English language
+      [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C).
   monitoring:
-    explanation: "There is no information on how DeepMind is internally monitoring\
-      \ the use of the dataset.\n"
     value: unknown
-  name: MassiveText
-  organization: DeepMind
-  prohibited_uses:
-    explanation: "There are no known prohibited uses of the dataset, but the authors\
-      \ state that it should not be used for training models with multilingual capabilities\
-      \ as it only contains the English language [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C).\n"
+    explanation: >
+      There is no information on how DeepMind is internally monitoring
+      the use of the dataset.
+  feedback:
     value: unknown
-  quality_control: "The authors use simple heuristics for filtering low quality\
-    \ documents as opposed to relying on a classifier based on a \"gold\" set such\
-    \ as the English Wikipedia, which could \"inadvertently bias towards a certain\
-    \ demographic or erase certain dialects or sociolects from representation.\"\
-    \ MassiveWeb subset was filtered using Google’s SafeSearch filter, preferring\
-    \ it over to word filters that \"disproportinately filter out inoffensive content\
-    \ associated with minority groups. MassiveWeb was filtered further for word\
-    \ or phrase repetitions. All the subsets were filtered for document deduplication\
-    \ and test set contamination\" [[Appendix A]](https://arxiv.org/pdf/2112.11446.pdf#appendix.A).\n"
-  sample: []
-  size: 10.5 TB
-  type: dataset
-  url: https://arxiv.org/pdf/2112.11446.pdf
-- access:
-    explanation: "The dataset access is limited to DeepMind researchers [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F).\n"
-    value: closed
-  analysis: ''
+    explanation: >
+      The internal feedback mechanisms for WebText are unknown
+      [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C).
+
+- type: dataset
+  name: M3W
+  # General
+  organization: DeepMind
+  description: >
+    M3W (MassiveWeb) is dataset used to train Flamingo, and other vision-language
+    models and was created by researchers and engineers.
   created_date:
-    explanation: "The date that Flamingo was posted to arXiv [[arXiv]] (https://arxiv.org/pdf/2204.14198.pdf).\n"
     value: 2022-04-29
+    explanation: >
+      The date that Flamingo was posted to arXiv
+      [[arXiv]]
+      (https://arxiv.org/pdf/2204.14198.pdf).
+  url: https://arxiv.org/pdf/2204.14198.pdf
   datasheet: https://arxiv.org/pdf/2204.14198.pdf#appendix.F
+  modality: text (English) and code
+  size: 182GB Text, 185M Images
+  sample: []
+  analysis: ''
+  # Construction
   dependencies: []
-  description: "M3W (MassiveWeb) is dataset used to train Flamingo, and other vision-language\
-    \ models and was created by researchers and engineers.\n"
-  excluded: unknown
-  feedback:
-    explanation: "No feedback mechanism is mentioned in the datasheet [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F).\n"
-    value: none
-  included: "M3W has interleaved images (185M) and text (182GB) from the web.\n"
-  intended_uses: "Pre-training of vision and language models by DeepMind researchers\
-    \ [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F).\n"
   license:
-    explanation: "The model likely has a license specifically for DeepMind's use,\
-      \ based on the information provided in the datasheet [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F).\n"
     value: unknown
-  modality:
-    explanation: text (English) and code
-    value: code, text
+    explanation: >
+      The model likely has a license specifically for DeepMind's use,
+      based on the information provided in the datasheet
+      [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F).
+  included: >
+    M3W has interleaved images (185M) and text (182GB) from the web.
+  excluded: unknown
+  quality_control: >
+    The authors provide a basic description of data processing and cleaning.
+  # Downstream
+  access:
+    value: closed
+    explanation: >
+      The dataset access is limited to DeepMind researchers
+      [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F).
+  intended_uses: >
+    Pre-training of vision and language models by DeepMind researchers
+    [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F).
+  prohibited_uses:
+    value: unknown
+    explanation: >
+      There are no known prohibited uses of the dataset
+      [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F).
   monitoring:
-    explanation: "There is no information on how DeepMind is internally monitoring\
-      \ the use of the dataset.\n"
     value: unknown
-  name: M3W
+    explanation: >
+      There is no information on how DeepMind is internally monitoring
+      the use of the dataset.
+  feedback:
+    value: none
+    explanation: >
+      No feedback mechanism is mentioned in the datasheet
+      [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F).
+
+- type: dataset
+  name: Gato dataset
+  # General
   organization: DeepMind
-  prohibited_uses:
-    explanation: "There are no known prohibited uses of the dataset [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F).\n"
-    value: unknown
-  quality_control: "The authors provide a basic description of data processing and\
-    \ cleaning.\n"
-  sample: []
-  size: 182GB Text, 185M Images
-  type: dataset
-  url: https://arxiv.org/pdf/2204.14198.pdf
-- access:
-    explanation: "The full dataset is not directly provided by the authors, though\
-      \ some underlying data is public whereas others (e.g. MassiveText) is not.\n"
-    value: closed
-  analysis: "The Gato dataset compiles many datasets introduced in prior works,\
-    \ with associated analyses.\n"
+  description: >
+    The Gato datasets are a collection of data used to train the Gato model.
   created_date:
-    explanation: "The date that Gato was announced [[DeepMind Blog Post]] (https://www.deepmind.com/blog/a-generalist-agent).\n"
     value: 2022-05-12
+    explanation: >
+      The date that Gato was announced
+      [[DeepMind Blog Post]]
+      (https://www.deepmind.com/blog/a-generalist-agent).
+  url: https://www.deepmind.com/blog/a-generalist-agent
   datasheet: none
+  modality: text, text-image pair, simulated control task, robotics simulation
+  size: 10.5 TB Text, 2.2B Text-Image pairs, 1.5T tokens of simulated control, 500k
+    robotics trajectories
+  sample: []
+  analysis: >
+    The Gato dataset compiles many datasets introduced in prior works, with associated
+    analyses.
+  # Construction
   dependencies: [MassiveText]
-  description: "The Gato datasets are a collection of data used to train the Gato\
-    \ model.\n"
+  license:
+    value: unknown
+    explanation: >
+      The datasets have individual licenses, but no overall license is mentioned
+      by the authors.
+  included: >
+    The full composition of the dataset across individual sources can be found in
+    the paper.
   excluded:
-    explanation: "No specific filtering is mentioned in the Gato paper.\n"
     value: none
-  feedback:
-    explanation: "There is no mention on feedback mechanisms either internally or\
-      \ externally.\n"
-    value: none
-  included: "The full composition of the dataset across individual sources can be\
-    \ found in the paper.\n"
+    explanation: >
+      No specific filtering is mentioned in the Gato paper.
+  quality_control: unknown
+  # Downstream
+  access:
+    value: closed
+    explanation: >
+      The full dataset is not directly provided by the authors, though some underlying
+      data is public whereas others (e.g. MassiveText) is not.
   intended_uses:
-    explanation: "There are no known intended uses of the dataset stated by authors\
-      \ beyond training Gato.\n"
     value: unknown
-  license:
-    explanation: "The datasets have individual licenses, but no overall license\
-      \ is mentioned by the authors.\n"
+    explanation: >
+      There are no known intended uses of the dataset stated by authors beyond training
+      Gato.
+  prohibited_uses:
     value: unknown
-  modality:
-    explanation: text, text-image pair, simulated control task, robotics simulation
-    value: image, text
+    explanation: >
+      There are no known prohibited uses of the dataset stated by authors.
   monitoring:
-    explanation: "There is no mention on how DeepMind is internally monitoring the\
-      \ use of the dataset.\n"
     value: none
-  name: Gato dataset
+    explanation: >
+      There is no mention on how DeepMind is internally monitoring
+      the use of the dataset.
+  feedback:
+    value: none
+    explanation: >
+      There is no mention on feedback mechanisms either internally or externally.
+
+- type: model
+  name: AlphaFold2
   organization: DeepMind
-  prohibited_uses:
-    explanation: "There are no known prohibited uses of the dataset stated by authors.\n"
-    value: unknown
-  quality_control: unknown
-  sample: []
-  size: 10.5 TB Text, 2.2B Text-Image pairs, 1.5T tokens of simulated control, 500k
-    robotics trajectories
-  type: dataset
-  url: https://www.deepmind.com/blog/a-generalist-agent
-- access:
-    explanation: Models are available for download from the [[Github repository]](https://github.com/deepmind/alphafold)
-    value: open
-  analysis: ''
+  description: AlphaFold2 is a protein language model trained on protein sequences
   created_date:
-    explanation: The date the model paper was released
     value: 2021-07-15
-  dependencies: [Protein Data Bank]
-  description: AlphaFold2 is a protein language model trained on protein sequences
-  feedback: ''
-  intended_uses: ''
-  license:
-    explanation: The license is provided in the [[Github repository]](https://github.com/deepmind/alphafold)
-    value: Apache 2.0
-  modality:
-    explanation: protein sequence
-    value: text; text
+    explanation: The date the model paper was released
+  url: https://www.nature.com/articles/s41586-021-03819-2
   model_card: none
-  monitoring: ''
-  name: AlphaFold2
-  organization: DeepMind
-  prohibited_uses: ''
-  quality_control: ''
-  size: 93M parameters (dense)
+  modality: protein sequence
+  size: 93M parameters (dense model)
+  analysis: ''
+  dependencies: [Protein Data Bank]
   training_emissions: ''
-  training_hardware:
-    explanation: Described in paper
-    value: 128 TPUv3 cores
   training_time:
+    value: 11 days
     explanation: Training takes "approximately 1 week" and finetuning takes "approximately
       4 days"
-    value: 11 days
-  type: model
-  url: https://www.nature.com/articles/s41586-021-03819-2
-- access:
-    explanation: "The model has not been released and no discussion of release is\
-      \ stated in the model card [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).\n"
-    value: closed
-  analysis: "Model performance was evaluated on image and video datasets primarily,\
-    \ including dialogue.\n"
-  created_date:
-    explanation: "The date that Flamingo was posted to arXiv [[arXiv]] (https://arxiv.org/abs/2204.14198).\n"
-    value: 2022-04-29
-  dependencies: [M3W, ALIGN, LTIP, VTP, Chinchilla]
-  description: "Flamingo is a Visual Language Model using the Transformer architecture\
-    \ that is intended for few-shot learning.\n"
-  feedback:
-    explanation: "No contact information is provided for feedback in the model card\
-      \ [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).\n"
-    value: none
-  intended_uses: "The intended uses are stated in the model card: \"The primary\
-    \ use is research on visual language models (VLM), including: research on VLM\
-    \ applications like classification, captioning or visual question answering,\
-    \ understanding how strong VLMs can contribute to AGI, advancing fairness and\
-    \ safety research in the area of multimodal research, and understanding limitations\
-    \ of current large VLMs.\" [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).\n"
+  training_hardware:
+    value: 128 TPUv3 cores
+    explanation: Described in paper
+  quality_control: ''
+  access:
+    value: open
+    explanation: Models are available for download from the [[Github repository]](https://github.com/deepmind/alphafold)
   license:
-    explanation: "No license is provided in the model card [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).\n"
-    value: unknown
-  modality:
-    explanation: Images, Text
-    value: image, text; image, text
-  model_card: https://arxiv.org/pdf/2204.14198.pdf#appendix.E
-  monitoring:
-    explanation: "There is no information on how DeepMind is internally monitoring\
-      \ the use of the model.\n"
-    value: unknown
+    value: Apache 2.0
+    explanation: The license is provided in the [[Github repository]](https://github.com/deepmind/alphafold)
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+
+- type: model
   name: Flamingo
+  # General
   organization: DeepMind
-  prohibited_uses: "The model card lists the following as out of scope uses of the\
-    \ model: \"Uses of the model for visually conditioned language generation in\
-    \ harmful or deceitful settings. Broadly speaking, the model should not be used\
-    \ for downstream applications without further safety and fairness mitigations\
-    \ specific to each application.\" [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).\n"
-  quality_control:
-    explanation: "Reported in the mitigations in the model card [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).\n"
-    value: none
-  size: 80B parameters (dense)
+  description: >
+    Flamingo is a Visual Language Model using the Transformer architecture that
+    is intended for few-shot learning.
+  created_date:
+    value: 2022-04-29
+    explanation: >
+      The date that Flamingo was posted to arXiv
+      [[arXiv]]
+      (https://arxiv.org/abs/2204.14198).
+  url: https://arxiv.org/pdf/2204.14198.pdf
+  model_card: https://arxiv.org/pdf/2204.14198.pdf#appendix.E
+  modality: Images, Text
+  size:
+    value: 80B parameters (dense model)
+    explanation: >
+      Gato family has models of several sizes (3.2B, 9.3B, 80B) which involve frozen
+      language, vision, and trainable parameters
+      [[Table 5]](https://arxiv.org/pdf/2204.14198.pdf).
+  analysis: >
+    Model performance was evaluated on image and video datasets primarily, including
+    dialogue.
+  # Construction
+  dependencies: [M3W, ALIGN, LTIP, VTP, Chinchilla]
   training_emissions:
-    explanation: "Authors do not report the training emissions.\n"
     value: unknown
-  training_hardware:
-    explanation: "Reported in the paper checklist [[Checklist]](https://arxiv.org/pdf/2204.14198.pdf).\n"
-    value: TPU
+    explanation: >
+      Authors do not report the training emissions.
   training_time:
-    explanation: "Reported in the paper checklist [[Checklist]](https://arxiv.org/pdf/2204.14198.pdf).\n"
     value: 15 days on 1536 TPUs
-  type: model
-  url: https://arxiv.org/pdf/2204.14198.pdf
-- access:
-    explanation: DeepMind does not provide access to AlphaCode to external researchers
+    explanation: >
+      Reported in the paper checklist
+      [[Checklist]](https://arxiv.org/pdf/2204.14198.pdf).
+  training_hardware:
+    value: TPU
+    explanation: >
+      Reported in the paper checklist
+      [[Checklist]](https://arxiv.org/pdf/2204.14198.pdf).
+  quality_control:
+    value: none
+    explanation: >
+      Reported in the mitigations in the model card
+      [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).
+  # Downstream
+  access:
     value: closed
-  analysis: ''
-  created_date:
-    explanation: The date the model paper was released
-    value: 2022-02-02
-  dependencies: []
-  description: AlphaCode is an autoregressive language model trained on code
-  feedback: ''
-  intended_uses: ''
+    explanation: >
+      The model has not been released and no discussion of release is stated in
+      the model card
+      [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).
   license:
-    explanation: ''
     value: unknown
-  modality:
-    explanation: code
-    value: code; code
-  model_card: ''
-  monitoring: ''
+    explanation: >
+      No license is provided in the model card
+      [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).
+  intended_uses: >
+    The intended uses are stated in the model card: "The primary use is research
+    on visual language models (VLM), including: research on VLM applications like
+    classification, captioning or visual question answering, understanding how
+    strong VLMs can contribute to AGI, advancing
+    fairness and safety research in the area of multimodal research, and understanding
+    limitations of current large VLMs."
+    [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).
+  prohibited_uses: >
+    The model card lists the following as out of scope uses of the model: "Uses
+    of the model for visually conditioned language generation in harmful or deceitful
+    settings. Broadly speaking,
+    the model should not be used for downstream applications
+    without further safety and fairness mitigations specific to
+    each application."
+    [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).
+  monitoring:
+    value: unknown
+    explanation: >
+      There is no information on how DeepMind is internally monitoring
+      the use of the model.
+  feedback:
+    value: none
+    explanation: >
+      No contact information is provided for feedback in the model card
+      [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).
+
+- type: model
   name: AlphaCode
   organization: DeepMind
-  prohibited_uses: ''
-  quality_control: ''
-  size: 41B parameters (dense)
+  description: AlphaCode is an autoregressive language model trained on code
+  created_date:
+    value: 2022-02-02
+    explanation: The date the model paper was released
+  url: https://arxiv.org/abs/2203.07814
+  model_card: ''
+  modality: code
+  analysis: ''
+  size: 41B parameters (dense model)
+  dependencies: []
   training_emissions: ''
-  training_hardware: ''
   training_time: ''
-  type: model
-  url: https://arxiv.org/abs/2203.07814
-- access:
-    explanation: "The model access is limited to DeepMind researchers. The model\
-      \ won't be released to the public [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).\n"
+  training_hardware: ''
+  quality_control: ''
+  access:
     value: closed
-  analysis: "Model performance was evaluated and analyzed on 152 NLP tasks including:\
-    \ Language Modelling (20), Reading Comprehension (3), Fact Checking (3), Question\
-    \ Answering (3), Common Sense (4), MMLU (57), BIG-bench (62) [[Section 4]](https://arxiv.org/pdf/2112.11446.pdf#section.4);\
-    \ on toxicity and bias datasets [[Section 5]](https://arxiv.org/pdf/2112.11446.pdf#section.5);\
-    \ and on dialogue tasks [[Section 6]](https://arxiv.org/pdf/2112.11446.pdf#section.6).\n"
-  created_date:
-    explanation: "The date that Gopher was announced [[DeepMind Blog Post]] (https://www.deepmind.com/blog/language-modelling-at-scale-gopher-ethical-considerations-and-retrieval).\n"
-    value: 2021-12-08
-  dependencies: [MassiveText]
-  description: "Gopher is an autoregressive language model based on the Transformer\
-    \ architecture with two modifications: using RMSNorm instead of LayerNorm and\
-    \ using relative positional encoding scheme instead of absolute positional encodings\
-    \ [[Section 3]](https://arxiv.org/pdf/2112.11446.pdf#subsection.3.1).\n"
-  feedback: "The feedback for the model can be provided at the email linked in the\
-    \ model card, geoffreyi at google.com [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).\n"
-  intended_uses: "The intended uses are stated in the Gopher model card: \"The primary\
-    \ use is research on language models, including: research on NLP applications\
-    \ like machine translation and question answering, understanding how strong\
-    \ language models can contribute to AGI, advancing fairness and safety research,\
-    \ and understanding limitations of current LLMs\" [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).\n"
+    explanation: DeepMind does not provide access to AlphaCode to external researchers
   license:
-    explanation: "The model likely has a license specifically for DeepMind's use,\
-      \ based on the information provided in the model card [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).\n"
-    value: unknown
-  modality:
-    explanation: Text (English) and Code
-    value: code, text; code, text
-  model_card: https://arxiv.org/pdf/2112.11446.pdf#appendix.B
-  monitoring:
-    explanation: "There is no information on how DeepMind is internally monitoring\
-      \ the use of the model.\n"
     value: unknown
+    explanation: ''
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+
+- type: model
   name: Gopher
+  # General
   organization: DeepMind
-  prohibited_uses: "The model card lists the following as out of scope uses of the\
-    \ model: \"for language generation in harmful or deceitful settings. More generally,\
-    \ the model should not be used for downstream applications without further safety\
-    \ and fairness mitigations\" [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).\n"
-  quality_control: none
-  size: 280B parameters (dense)
+  description: >
+    Gopher is an autoregressive language model based on the Transformer
+    architecture with two modifications: using RMSNorm instead of LayerNorm and
+    using relative positional encoding scheme instead of absolute positional
+    encodings
+    [[Section 3]](https://arxiv.org/pdf/2112.11446.pdf#subsection.3.1).
+  created_date:
+    value: 2021-12-08
+    explanation: >
+      The date that Gopher was announced
+      [[DeepMind Blog Post]]
+      (https://www.deepmind.com/blog/language-modelling-at-scale-gopher-ethical-considerations-and-retrieval).
+  url: https://arxiv.org/pdf/2112.11446.pdf
+  model_card: https://arxiv.org/pdf/2112.11446.pdf#appendix.B
+  modality: Text (English) and Code
+  size:
+    value: 280B parameters (dense model)
+    explanation: >
+      Gopher family has models of several sizes, but the name
+      Gopher uniquely identify the 280B parameter version. Sizes for the other
+      models in the Gopher family can be seen in the paper
+      [[Table 1]](https://arxiv.org/pdf/2112.11446.pdf#table.caption.1).
+  analysis: >
+    Model performance was evaluated and analyzed on 152 NLP tasks including:
+    Language Modelling (20), Reading Comprehension (3), Fact Checking (3),
+    Question Answering (3), Common Sense (4), MMLU (57), BIG-bench (62)
+    [[Section 4]](https://arxiv.org/pdf/2112.11446.pdf#section.4); on toxicity
+    and bias datasets
+    [[Section 5]](https://arxiv.org/pdf/2112.11446.pdf#section.5); and on
+    dialogue tasks
+    [[Section 6]](https://arxiv.org/pdf/2112.11446.pdf#section.6).
+  # Construction
+  dependencies: [MassiveText]
   training_emissions:
-    explanation: "The training emission estimate from the paper [[Section F]](https://arxiv.org/pdf/2112.11446.pdf#appendix.F)\n"
     value: 380 tCO2e
-  training_hardware:
-    explanation: "Reported in the paper [[Section F]](https://arxiv.org/pdf/2112.11446.pdf#appendix.F).\n"
-    value: TPUv3 pods
+    explanation: >
+      The training emission estimate from the paper
+      [[Section F]](https://arxiv.org/pdf/2112.11446.pdf#appendix.F)
   training_time:
-    explanation: "The authors reported the training petaflops for all of the 4 different\
-      \ sizes of the model. For the 280B parameter model, the petaflops was reported\
-      \ as 6.31E+08. We compute the Gopher's training time in petaflop/s-day as\
-      \ 6.31E+08 / (60*60*24) = 7303.24 petaflop/s-day.\n"
     value: 7303.24 petaflop/s-day
-  type: model
-  url: https://arxiv.org/pdf/2112.11446.pdf
-- access:
-    explanation: "The model access is limited to DeepMind researchers. The model\
-      \ won't be released to the public [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).\n"
+    explanation: >
+      The authors reported the training petaflops for all of the 4 different
+      sizes of the model. For the 280B parameter model, the petaflops was
+      reported as 6.31E+08. We compute the Gopher's training time in
+      petaflop/s-day as 6.31E+08 / (60*60*24) = 7303.24 petaflop/s-day.
+  training_hardware:
+    value: TPUv3 pods
+    explanation: >
+      Reported in the paper
+      [[Section F]](https://arxiv.org/pdf/2112.11446.pdf#appendix.F).
+  quality_control: none
+  # Downstream
+  access:
     value: closed
-  analysis: "Model performance was evaluated and analyzed on many NLP tasks including\
-    \ language modeling, reading comprehension, question answering, commonsense-intensive\
-    \ tasks, and the BIG-Bench and MMLU meta-benchmarks.\n"
-  created_date:
-    explanation: "The date that Chinchilla was posted on arXiv [[arXiv]] (https://arxiv.org/abs/2203.15556).\n"
-    value: 2022-03-29
-  dependencies: [MassiveText]
-  description: "Chinchilla is an autoregressive language model based on the Transformer\
-    \ architecture with improved scaling laws.\n"
-  feedback: "The feedback for the model can be provided at the email linked in the\
-    \ model card, {jordanhoffmann, sborgeaud, amensch,sifre} at deepmind.com [[Model\
-    \ Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I).\n"
-  intended_uses: "The intended uses are stated in the Chinchilla model card: \"\
-    The primary use is research on language models, including: research on the scaling\
-    \ behaviour of language models along with those listed in Gopher paper\" [[Model\
-    \ Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I).\n"
+    explanation: >
+      The model access is limited to DeepMind researchers. The model won't be
+      released to the public
+      [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).
   license:
-    explanation: "The model likely has a license specifically for DeepMind's use,\
-      \ based on the information provided in the model card [[Model Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I).\n"
     value: unknown
-  modality:
-    explanation: text (English) and cpde
-    value: text; text
-  model_card: https://arxiv.org/pdf/2203.15556.pdf
+    explanation: >
+      The model likely has a license specifically for DeepMind's use,
+      based on the information provided in the model card
+      [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).
+  intended_uses: >
+    The intended uses are stated in the Gopher model card: "The primary use is
+    research on language models, including: research on NLP applications like
+    machine translation and question answering, understanding how strong
+    language models can contribute to AGI, advancing fairness and safety
+    research, and understanding limitations of current LLMs"
+    [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).
+  prohibited_uses: >
+    The model card lists the following as out of scope uses of the model: "for
+    language generation in harmful or deceitful settings. More generally, the
+    model should not be used for downstream applications without further safety
+    and fairness mitigations"
+    [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).
   monitoring:
-    explanation: "There is no information on how DeepMind is internally monitoring\
-      \ the use of the model.\n"
-    value: Unknown
+    value: unknown
+    explanation: >
+      There is no information on how DeepMind is internally monitoring
+      the use of the model.
+  feedback: >
+    The feedback for the model can be provided at the email linked in the
+    model card, geoffreyi at google.com [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).
+
+- type: model
   name: Chinchilla
+  # General
   organization: DeepMind
-  prohibited_uses: "The model card lists the following as out of scope uses of the\
-    \ model: \"for language generation in harmful or deceitful settings. More generally,\
-    \ the model should not be used for downstream applications without further safety\
-    \ and fairness mitigations\" [[Model Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I).\n"
-  quality_control: none
-  size: 70B parameters (dense)
+  description: >
+    Chinchilla is an autoregressive language model based on the Transformer
+    architecture with improved scaling laws.
+  created_date:
+    value: 2022-03-29
+    explanation: >
+      The date that Chinchilla was posted on arXiv
+      [[arXiv]]
+      (https://arxiv.org/abs/2203.15556).
+  url: https://arxiv.org/pdf/2203.15556.pdf
+  model_card: https://arxiv.org/pdf/2203.15556.pdf
+  modality: text (English) and cpde
+  size:
+    value: 70B parameters (dense model)
+    explanation: >
+      Chinchilla family has many models, but the primary model is the 70B parameter
+      model.
+  analysis: >
+    Model performance was evaluated and analyzed on many NLP tasks including language
+    modeling, reading comprehension, question answering, commonsense-intensive
+    tasks, and the BIG-Bench and MMLU meta-benchmarks.
+  # Construction
+  dependencies: [MassiveText]
   training_emissions:
-    explanation: "Authors do not report the training emissions.\n"
     value: Unknown
-  training_hardware:
-    explanation: "Reported in the paper [[Section 4]](https://arxiv.org/pdf/2203.15556.pdf).\n"
-    value: TPUv3/TPUv4 pods
+    explanation: >
+      Authors do not report the training emissions.
   training_time:
-    explanation: "The authors reported the training petaflops for all models, including\
-      \ hypothetical larger models. For the 70B parameter model, the petaflops was\
-      \ reported as 5.76E+08. We compute the Gopher's training time in petaflop/s-day\
-      \ as 5.76E+08 / (60*60*24) = 6666.66 petaflop/s-day.\n"
     value: 7303.24 petaflop/s-day
-  type: model
-  url: https://arxiv.org/pdf/2203.15556.pdf
-- access:
-    explanation: "The model access is limited to DeepMind researchers. The model\
-      \ won't be released to the public [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).\n"
+    explanation: >
+      The authors reported the training petaflops for all models, including hypothetical
+      larger models.
+      For the 70B parameter model, the petaflops was
+      reported as 5.76E+08. We compute the Gopher's training time in
+      petaflop/s-day as 5.76E+08 / (60*60*24) = 6666.66 petaflop/s-day.
+  training_hardware:
+    value: TPUv3/TPUv4 pods
+    explanation: >
+      Reported in the paper
+      [[Section 4]](https://arxiv.org/pdf/2203.15556.pdf).
+  quality_control: none
+  # Downstream
+  access:
     value: closed
-  analysis: "Model performance was evaluated on simulated and robotics task primarily,\
-    \ including out-of-distribution and skill generalization.\n"
-  created_date:
-    explanation: "The date that Gato was announced [[DeepMind Blog Post]] (https://www.deepmind.com/blog/a-generalist-agent).\n"
-    value: 2022-05-12
-  dependencies: [Gato dataset]
-  description: "Gato is a generalist agent based on sequence modeling using the\
-    \ Transformer architecture to implement multi-modal, multi-task, multi-embodiment\
-    \ generalist policy.\n"
-  feedback: "The feedback for the model can be provided at the email linked in the\
-    \ model card, reedscot at google.com [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).\n"
-  intended_uses: "The intended uses are stated in the Gopher model card: \"Learn\
-    \ to accomplish a wide variety of tasks from expert demonstrations, such as\
-    \ playing video games, controlling simulated embodiments, and real world block\
-    \ stacking.\" [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).\n"
+    explanation: >
+      The model access is limited to DeepMind researchers. The model won't be
+      released to the public
+      [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: text, image, control/robotics
-    value: image, text; image, text
-  model_card: https://openreview.net/pdf?id=1ikK0kHjvj#appendix.B
+    explanation: >
+      The model likely has a license specifically for DeepMind's use,
+      based on the information provided in the model card
+      [[Model Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I).
+  intended_uses: >
+    The intended uses are stated in the Chinchilla model card: "The primary use
+    is research on language models, including:
+    research on the scaling behaviour of language models along with those listed
+    in Gopher paper"
+    [[Model Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I).
+  prohibited_uses: >
+    The model card lists the following as out of scope uses of the model: "for
+    language generation in harmful or deceitful settings. More generally, the
+    model should not be used for downstream applications without further safety
+    and fairness mitigations"
+    [[Model Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I).
   monitoring:
-    explanation: "There is no information on how DeepMind is internally monitoring\
-      \ the use of the model.\n"
-    value: unknown
+    value: Unknown
+    explanation: >
+      There is no information on how DeepMind is internally monitoring
+      the use of the model.
+  feedback: >
+    The feedback for the model can be provided at the email linked in the model
+    card, {jordanhoffmann, sborgeaud, amensch,sifre} at deepmind.com
+    [[Model Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I).
+
+- type: model
   name: Gato
+  # General
   organization: DeepMind
-  prohibited_uses: "The model card lists the following as out of scope uses of the\
-    \ model: \"Not intended for commercial or production use. Military uses are\
-    \ strictly prohibited.\" [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).\n"
-  quality_control:
-    explanation: "Reported in the mitigations in the model card [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).\n"
-    value: none
-  size: 1.2B parameters (dense)
+  description: >
+    Gato is a generalist agent based on sequence modeling using the Transformer
+    architecture to implement multi-modal, multi-task, multi-embodiment generalist
+    policy.
+  created_date:
+    value: 2022-05-12
+    explanation: >
+      The date that Gato was announced
+      [[DeepMind Blog Post]]
+      (https://www.deepmind.com/blog/a-generalist-agent).
+  url: https://www.deepmind.com/blog/a-generalist-agent
+  model_card: https://openreview.net/pdf?id=1ikK0kHjvj#appendix.B
+  modality: text, image, control/robotics
+  size:
+    value: 1.2B parameters (dense model)
+    explanation: >
+      Gato family has models of several sizes (79M, 364M, 1.18B)
+      [[Table 6]](https://openreview.net/pdf?id=1ikK0kHjvj).
+  analysis: >
+    Model performance was evaluated on simulated and robotics task primarily,
+    including out-of-distribution and skill generalization.
+  # Construction
+  dependencies: [Gato dataset]
   training_emissions:
-    explanation: "Authors do not report the training emissions.\n"
     value: unknown
-  training_hardware:
-    explanation: "Reported in the paper [[Section 2.3]](https://openreview.net/pdf?id=1ikK0kHjvj).\n"
-    value: 16x16 TPU v3 slice
+    explanation: >
+      Authors do not report the training emissions.
   training_time:
-    explanation: "Reported in the paper [[Section 2.3]](https://openreview.net/pdf?id=1ikK0kHjvj).\n"
     value: 4 days on a 16x16 TPU v3 slice
-  type: model
-  url: https://www.deepmind.com/blog/a-generalist-agent
-- access: closed
-  analysis: ''
+    explanation: >
+      Reported in the paper
+      [[Section 2.3]](https://openreview.net/pdf?id=1ikK0kHjvj).
+  training_hardware:
+    value: 16x16 TPU v3 slice
+    explanation: >
+      Reported in the paper
+      [[Section 2.3]](https://openreview.net/pdf?id=1ikK0kHjvj).
+  quality_control:
+    value: none
+    explanation: >
+      Reported in the mitigations in the model card
+      [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).
+  # Downstream
+  access:
+    value: closed
+    explanation: >
+      The model access is limited to DeepMind researchers. The model won't be
+      released to the public
+      [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).
+  license:
+    value: unknown
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: >
+    The intended uses are stated in the Gopher model card: "Learn to accomplish
+    a wide variety of tasks from expert demonstrations, such as playing video
+    games, controlling simulated embodiments,
+    and real world block stacking."
+    [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).
+  prohibited_uses: >
+    The model card lists the following as out of scope uses of the model: "Not
+    intended for commercial or production use. Military uses are
+    strictly prohibited."
+    [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).
+  monitoring:
+    value: unknown
+    explanation: >
+      There is no information on how DeepMind is internally monitoring
+      the use of the model.
+  feedback: >
+    The feedback for the model can be provided at the email linked in the model
+    card, reedscot at google.com
+    [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).
+
+- type: model
+  name: Sparrow
+  organization: DeepMind
+  description: ''
   created_date: 2022-09-28
+  url: https://arxiv.org/abs/2209.14375
+  model_card: ''
+  modality: text
+  analysis: ''
+  size: 70B
   dependencies:
     - Chinchilla
     - Google Search
     - Sparrow Rule reward model
     - Sparrow Preference reward model
-  description: ''
-  feedback: ''
-  intended_uses: ''
-  license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
-    value: unknown
-  modality:
-    explanation: text
-    value: text; text
-  model_card: ''
-  monitoring: ''
-  name: Sparrow
-  organization: DeepMind
-  prohibited_uses: ''
-  quality_control: ''
-  size: 70B parameters (dense)
   training_emissions: ''
-  training_hardware: ''
   training_time: ''
-  type: model
-  url: https://arxiv.org/abs/2209.14375
-- access: closed
-  analysis: ''
-  created_date: 2021-12-08
-  dependencies: [MassiveText]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: Text
-    value: text; text
-  model_card: ''
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: RETRO
   organization: DeepMind
-  prohibited_uses: ''
-  quality_control: ''
-  size: 7.5B parameters (dense)
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
+  description: ''
+  created_date: 2021-12-08
   url: https://arxiv.org/abs/2112.04426
-- access: closed
+  model_card: ''
+  modality: Text
   analysis: ''
-  created_date: 2022-09-28
-  dependencies: [Chinchilla, Sparrow adversarial probing dataset]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  size: 7.5B
+  dependencies: [MassiveText]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: text
-    value: text; text
-  model_card: ''
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: Sparrow Rule reward model
   organization: DeepMind
-  prohibited_uses: ''
-  quality_control: ''
-  size: 70B parameters (dense)
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
+  description: ''
+  created_date: 2022-09-28
   url: https://arxiv.org/abs/2209.14375
-- access: closed
+  model_card: ''
+  modality: text
   analysis: ''
-  created_date: 2022-09-28
-  dependencies: [Chinchilla, Sparrow response preference dataset]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  size: 70B
+  dependencies: [Chinchilla, Sparrow adversarial probing dataset]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: text
-    value: text; text
-  model_card: ''
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: Sparrow Preference reward model
   organization: DeepMind
-  prohibited_uses: ''
-  quality_control: ''
-  size: 70B parameters (dense)
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
+  description: ''
+  created_date: 2022-09-28
   url: https://arxiv.org/abs/2209.14375
-- access: closed
+  model_card: ''
+  modality: text
   analysis: ''
-  created_date: 2022-09-28
-  datasheet: ''
-  dependencies: [Chinchilla]
-  description: ''
-  excluded: ''
-  feedback: ''
-  included: ''
-  intended_uses: ''
+  size: 70B
+  dependencies: [Chinchilla, Sparrow response preference dataset]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality: text
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: dataset
   name: Sparrow adversarial probing dataset
   organization: DeepMind
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: 27k ratings
-  type: dataset
-  url: https://arxiv.org/abs/2209.14375
-- access: closed
-  analysis: ''
+  description: ''
   created_date: 2022-09-28
+  url: https://arxiv.org/abs/2209.14375
   datasheet: ''
+  modality: text
+  size: 27k ratings
+  sample: []
+  analysis: ''
   dependencies: [Chinchilla]
-  description: ''
-  excluded: ''
-  feedback: ''
   included: ''
-  intended_uses: ''
+  excluded: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality: text
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: dataset
   name: Sparrow response preference dataset
   organization: DeepMind
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: 72k comparisons
-  type: dataset
+  description: ''
+  created_date: 2022-09-28
   url: https://arxiv.org/abs/2209.14375
-- access: closed
+  datasheet: ''
+  modality: text
+  size: 72k comparisons
+  sample: []
   analysis: ''
-  created_date: 2022-03-16
-  dependencies: [Gopher, Google Search, GopherCite reward model]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  dependencies: [Chinchilla]
+  included: ''
+  excluded: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: Text
-    value: text; text
-  model_card: ''
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: GopherCite
   organization: DeepMind
-  prohibited_uses: ''
-  quality_control: ''
-  size: 280B parameters (dense)
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
+  description: ''
+  created_date: 2022-03-16
   url: https://storage.googleapis.com/deepmind-media/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes.pdf
-- access: closed
+  model_card: ''
+  modality: Text
   analysis: ''
-  created_date: 2022-03-16
-  dependencies: [Gopher, GopherCite Preference dataset]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  size: 280B
+  dependencies: [Gopher, Google Search, GopherCite reward model]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: text
-    value: text; text
-  model_card: ''
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: GopherCite reward model
   organization: DeepMind
-  prohibited_uses: ''
-  quality_control: ''
-  size: 7B parameters (dense)
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
+  description: ''
+  created_date: 2022-03-16
   url: https://storage.googleapis.com/deepmind-media/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes.pdf
-- access: closed
+  model_card: ''
+  modality: text
   analysis: ''
-  created_date: 2022-03-16
-  datasheet: ''
-  dependencies: [Gopher, Google Search]
-  description: ''
-  excluded: ''
-  feedback: ''
-  included: ''
-  intended_uses: ''
+  size: 7B
+  dependencies: [Gopher, GopherCite Preference dataset]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality: text
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: dataset
   name: GopherCite Preference dataset
   organization: DeepMind
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: 33k response pairs
-  type: dataset
+  description: ''
+  created_date: 2022-03-16
   url: https://storage.googleapis.com/deepmind-media/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes.pdf
-- access: closed
+  datasheet: ''
+  modality: text
+  size: 33k response pairs
+  sample: []
   analysis: ''
-  created_date: 2022-09-29
-  dependencies: [Chinchilla]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  dependencies: [Gopher, Google Search]
+  included: ''
+  excluded: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: text
-    value: text; text
-  model_card: ''
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: Dramatron
   organization: DeepMind
-  prohibited_uses: ''
-  quality_control: ''
-  size: 70B parameters (dense)
+  description: ''
+  created_date: 2022-09-29
+  url: https://arxiv.org/abs/2209.14958
+  model_card: ''
+  modality: text
+  analysis: ''
+  size: 70B parameters (dense model)
+  dependencies: [Chinchilla]
   training_emissions: ''
-  training_hardware: ''
   training_time: ''
-  type: model
-  url: https://arxiv.org/abs/2209.14958
+  training_hardware: ''
+  quality_control: ''
+  access: closed
+  license:
+    value: unknown
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/duckduckgo.yaml b/assets/duckduckgo.yaml
index a1defb85..465b62eb 100644
--- a/assets/duckduckgo.yaml
+++ b/assets/duckduckgo.yaml
@@ -1,25 +1,27 @@
 ---
-- access: open
-  adaptation: ''
-  created_date: 2023-03-08
-  dependencies: [Anthropic API]
+
+- type: application
+  name: DuckAssist
+  organization: DuckDuckGo
   description: The first Instant Answer in DuckDuckGo search results to use natural
     language technology to generate answers to search queries using Wikipedia and
     other related sources
-  failures: ''
-  feedback: ''
-  intended_uses: ''
+  created_date: 2023-03-08
+  url: https://spreadprivacy.com/duckassist-launch/
+  dependencies: [Anthropic API]
+  adaptation: ''
+  output_space: ''
+  quality_control: ''
+  access: open
   license:
-    explanation: "The asset is no longer available, and hence the license is unknown.\n"
     value: unknown
+    explanation: >
+      The asset is no longer available, and hence the license is unknown.
+  terms_of_service: ''
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
   monthly_active_users: ''
-  name: DuckAssist
-  organization: DuckDuckGo
-  output_space: ''
-  prohibited_uses: ''
-  quality_control: ''
-  terms_of_service: ''
-  type: application
-  url: https://spreadprivacy.com/duckassist-launch/
   user_distribution: ''
+  failures: ''
diff --git a/assets/duolingo.yaml b/assets/duolingo.yaml
index 44e4e886..de25f466 100644
--- a/assets/duolingo.yaml
+++ b/assets/duolingo.yaml
@@ -1,58 +1,62 @@
 ---
-- access: limited
-  adaptation: ''
-  created_date: 2023-03-14
-  dependencies: [GPT-4 API]
+
+- type: application
+  name: Duolingo Explain My Answer
+  organization: Duolingo
   description: Explain My Answer offers learners the chance to learn more about
     their response in a lesson (whether their answer was correct or incorrect!)
     By tapping a button after certain exercise types, learners can enter a chat
     with Duo to get a simple explanation on why their answer was right or wrong,
     and ask for examples or further clarification.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
+  created_date: 2023-03-14
+  url: https://blog.duolingo.com/duolingo-max/
+  dependencies: [GPT-4 API]
+  adaptation: ''
+  output_space: ''
+  quality_control: ''
+  access: limited
   license:
-    explanation: "Users are granted a limited license per Duolingo Terms.\n"
     value: custom
+    explanation: >
+      Users are granted a limited license per Duolingo Terms.
+  terms_of_service: https://www.duolingo.com/terms
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
   monthly_active_users: ''
-  name: Duolingo Explain My Answer
-  organization: Duolingo
-  output_space: ''
-  prohibited_uses: ''
-  quality_control: ''
-  terms_of_service: https://www.duolingo.com/terms
-  type: application
-  url: https://blog.duolingo.com/duolingo-max/
   user_distribution: ''
-- access: limited
-  adaptation: ''
-  created_date: 2023-03-14
-  dependencies: [Duolingo Role Play, Duolingo Explain My Answer]
+  failures: ''
+
+- type: application
+  name: Duolingo Max
+  organization: Duolingo
   description: Duolingo Max is a new subscription tier above Super Duolingo that
     gives learners access to two brand-new features and exercises - Explain My Answer
     and Roleplay.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
+  created_date: 2023-03-14
+  url: https://blog.duolingo.com/duolingo-max/
+  dependencies: [Duolingo Role Play, Duolingo Explain My Answer]
+  adaptation: ''
+  output_space: ''
+  quality_control: ''
+  access: limited
   license:
-    explanation: "Users are granted a limited license per Duolingo Terms.\n"
     value: custom
+    explanation: >
+      Users are granted a limited license per Duolingo Terms.
+  terms_of_service: ''
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
   monthly_active_users: ''
-  name: Duolingo Max
-  organization: Duolingo
-  output_space: ''
-  prohibited_uses: ''
-  quality_control: ''
-  terms_of_service: ''
-  type: application
-  url: https://blog.duolingo.com/duolingo-max/
   user_distribution: ''
-- access: limited
-  adaptation: ''
-  created_date: 2023-03-14
-  dependencies: [GPT-4 API]
+  failures: ''
+
+- type: application
+  name: Duolingo Role Play
+  organization: Duolingo
   description: Roleplay allows learners to practice real-world conversation skills
     with world characters in the app. These challenges, which earn XP, will live
     alongside the path as one of the “Side Quests” learners can access by tapping
@@ -60,24 +64,26 @@
     scenarios! Learners might discuss future vacation plans with Lin, order coffee
     at a café in Paris, go furniture shopping with Eddy, or ask a friend to go for
     a hike.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
+  created_date: 2023-03-14
+  url: https://blog.duolingo.com/duolingo-max/
+  dependencies: [GPT-4 API]
+  adaptation: ''
+  output_space: ''
+  quality_control: ''
+  access: limited
   license:
-    explanation: "\"Subject to the terms of these Terms and Conditions, Duolingo\
-      \ grants you a non-transferable, non-exclusive license to download, install,\
-      \ and use one copy of each App in object code form only on an interactive\
-      \ wireless device that you own or control\" - excerpt from the Terms of Service\
-      \ document.\n"
     value: custom
+    explanation: >
+      "Subject to the terms of these Terms and Conditions, Duolingo grants you
+      a non-transferable, non-exclusive license to download, install, and use
+      one copy of each App in object code form only on an interactive wireless
+      device that you own or control" - excerpt from the Terms of Service
+      document.
+  terms_of_service: https://www.duolingo.com/terms
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
   monthly_active_users: ''
-  name: Duolingo Role Play
-  organization: Duolingo
-  output_space: ''
-  prohibited_uses: ''
-  quality_control: ''
-  terms_of_service: https://www.duolingo.com/terms
-  type: application
-  url: https://blog.duolingo.com/duolingo-max/
   user_distribution: ''
+  failures: ''
diff --git a/assets/eleutherai.yaml b/assets/eleutherai.yaml
index 5813633b..524c8010 100644
--- a/assets/eleutherai.yaml
+++ b/assets/eleutherai.yaml
@@ -1,49 +1,16 @@
 ---
-- access:
-    explanation: "The dataset is freely available to the public and can be downloaded\
-      \ from The Eye [[The Pile]](https://mystic.the-eye.eu/public/AI/pile/).\n"
-    value: open
-  analysis: "Analyses of the data's composition, document statistics, language/dialectal\
-    \ coverage, topical distribution, and biases are conducted are conducted in\
-    \ the paper [[The Pile Paper]](https://arxiv.org/pdf/2101.00027.pdf).\n"
-  created_date: 2021-01-01
-  datasheet: https://arxiv.org/pdf/2201.07311.pdf
-  dependencies: []
-  description: "A latge language model training dataset, used to train GPT-NeoX-20B.\n"
-  excluded: "Authors report that they have excluded some datasets \"because they\
-    \ were too small to be worth spending time or because the English component\
-    \ of the data did not merit inclusion on its own. Three datasets were excluded\
-    \ for other reasons: (1) US Congressional Records were excluded because it \"\
-    reflects the opinions and biases of the political class over the past 200 years,\
-    \ including segregationism and xenophobia.\" (2) Online Fanfiction resources\
-    \ amounting to Hundreds of GiB were excluded on logistical grounds. (3) Literotica,\
-    \ platform where users can upload short-form erotic fiction, was excluded because\
-    \ the authors decided to exclude fanfiction, the corpus would require significant\
-    \ investigation, and corpus contain significant amount of stereotyping [[Appendix\
-    \ B]](https://arxiv.org/pdf/2101.00027.pdf).\n"
-  feedback: "Feedback can be given by emailing the authors at contact at eleuther.ai.\n"
-  included: "The Pile data come from 22 sources, with over half of the data being\
-    \ from Common Crawl (Pile-CC; 227GB), fiction and nonfiction books (Books3;\
-    \ 101GB), biomedical articles (PubMed Central; 90GB), and code (Github; 95 GB).\
-    \ Refer to the paper for full decomposition [[Table 1]](https://arxiv.org/pdf/2101.00027.pdf#table.caption.2).\n"
-  intended_uses: "The Pile was intended to be used as a high quality large text\
-    \ dataset for language modeling tasks, explained in more detail in the paper\
-    \ [[Section 1]](https://arxiv.org/pdf/2101.00027.pdf#section.1).\n"
-  license:
-    explanation: As indicated in the paper.
-    value: MIT
-  modality:
-    explanation: text (English) and code
-    value: code, text
-  monitoring: none
+
+- type: dataset
   name: The Pile
+  # General
   organization: EleutherAI
-  prohibited_uses: none
-  quality_control: "In addition to the data inclusion and exclusion decisions, the\
-    \ quality was controlled through filtering for English (pycld2 language classifier),\
-    \ filtering for documents similar to OpenWebText2 (classifier on CommonCrawl),\
-    \ and several forms of deduplication as detailed in the paper [[Appendix C]](https://arxiv.org/pdf/2101.00027.pdf#appendix.1.C)\
-    \ [[Appendix D]](https://arxiv.org/pdf/2101.00027.pdf#appendix.1.D).\n"
+  description: >
+    A latge language model training dataset, used to train GPT-NeoX-20B.
+  created_date: 2021-01-01
+  url: https://arxiv.org/pdf/2101.00027.pdf
+  datasheet: https://arxiv.org/pdf/2201.07311.pdf
+  modality: text (English) and code
+  size: 825 GB
   sample:
     - '...pot trending topics and the coverage around them. First up, there’s a
       bit of a visual redesign. Previously, clicking on a trending topic would highlight
@@ -64,204 +31,287 @@
       the defendants urge that this Court should reconsider the case of Green Mountain
       Marble Co. v. Highway Board, supra, and follow the Federal practice of looking
       to the evide
-  size: 825 GB
-  type: dataset
-  url: https://arxiv.org/pdf/2101.00027.pdf
-- access:
-    explanation: "The model can be downloaded for free from [[The Eye]](https://mystic.the-eye.eu/public/AI/GPT-J-6B/step_383500.tar.zstd)\n"
+  analysis: >
+    Analyses of the data's composition, document statistics,
+    language/dialectal coverage, topical distribution, and biases are
+    conducted are conducted in the paper
+    [[The Pile Paper]](https://arxiv.org/pdf/2101.00027.pdf).
+  # Construction
+  dependencies: []
+  license:
+    value: MIT
+    explanation: As indicated in the paper.
+  included: >
+    The Pile data come from 22 sources, with over half of the data being from
+    Common Crawl (Pile-CC; 227GB), fiction and nonfiction books (Books3; 101GB),
+    biomedical articles (PubMed Central; 90GB), and code (Github; 95 GB).
+    Refer to the paper for full decomposition
+    [[Table 1]](https://arxiv.org/pdf/2101.00027.pdf#table.caption.2).
+  excluded: >
+    Authors report that they have excluded some datasets "because they were too
+    small to be worth spending time or because the English component of the data
+    did not merit inclusion on its own. Three datasets were excluded for other
+    reasons: (1) US Congressional Records were excluded because it "reflects the
+    opinions and biases of the political class over the past 200 years,
+    including segregationism and xenophobia." (2) Online Fanfiction resources
+    amounting to Hundreds of GiB were excluded on logistical grounds.
+    (3) Literotica, platform where users can upload short-form erotic fiction,
+    was excluded because the authors decided to exclude fanfiction, the
+    corpus would require significant investigation, and corpus contain
+    significant amount of stereotyping
+    [[Appendix B]](https://arxiv.org/pdf/2101.00027.pdf).
+  quality_control: >
+    In addition to the data inclusion and exclusion decisions, the quality was
+    controlled through filtering for English (pycld2 language classifier),
+    filtering for documents similar to OpenWebText2 (classifier on CommonCrawl),
+    and several forms of deduplication as detailed in the paper
+    [[Appendix C]](https://arxiv.org/pdf/2101.00027.pdf#appendix.1.C)
+    [[Appendix D]](https://arxiv.org/pdf/2101.00027.pdf#appendix.1.D).
+  # Downstream
+  access:
     value: open
-  analysis: ''
+    explanation: >
+      The dataset is freely available to the public and
+      can be downloaded from The Eye
+      [[The Pile]](https://mystic.the-eye.eu/public/AI/pile/).
+  intended_uses: >
+    The Pile was intended to be used as a high quality large text dataset for
+    language modeling tasks, explained in more detail in the paper
+    [[Section 1]](https://arxiv.org/pdf/2101.00027.pdf#section.1).
+  prohibited_uses: none
+  monitoring: none
+  feedback: >
+    Feedback can be given by emailing the authors at contact at eleuther.ai.
+
+- type: model
+  name: GPT-J
+  organization: EleutherAI
+  description: GPT-J is an open-source autoregressive language model.
   created_date:
-    explanation: Date model blog post was published
     value: 2021-06-04
-  dependencies: [The Pile]
-  description: GPT-J is an open-source autoregressive language model.
-  feedback: ''
-  intended_uses: ''
-  license:
-    explanation: "As indicated in the [[Github repository]](https://github.com/kingoflolz/mesh-transformer-jax)\n"
-    value: Apache 2.0
-  modality:
-    explanation: text (English)
-    value: text; text
+    explanation: Date model blog post was published
+  url: https://arankomatsuzaki.wordpress.com/2021/06/04/gpt-j/
   model_card: ''
-  monitoring: ''
-  name: GPT-J
-  organization: EleutherAI
-  prohibited_uses: ''
-  quality_control: ''
-  size: 6B parameters (dense)
+  modality: text (English)
+  analysis: ''
+  size: 6B parameters (dense model)
+  dependencies: [The Pile]
   training_emissions: ''
-  training_hardware: 'TRC (Unspecified # of TPU v3-8s)'
   training_time: ''
-  type: model
-  url: https://arankomatsuzaki.wordpress.com/2021/06/04/gpt-j/
-- access:
-    explanation: "The model can be downloaded for free from [[The Eye]](https://mystic.the-eye.eu/public/AI/gptneo-release/GPT3_2-7B/)\n"
+  training_hardware: 'TRC (Unspecified # of TPU v3-8s)'
+  quality_control: ''
+  access:
     value: open
-  analysis: ''
-  created_date:
-    explanation: Date Github repo was update
-    value: 2021-03-21
-  dependencies: [The Pile]
-  description: ''
-  feedback: ''
+    explanation: >
+      The model can be downloaded for free from [[The Eye]](https://mystic.the-eye.eu/public/AI/GPT-J-6B/step_383500.tar.zstd)
+  license:
+    value: Apache 2.0
+    explanation: >
+      As indicated in the [[Github repository]](https://github.com/kingoflolz/mesh-transformer-jax)
   intended_uses: ''
-  license: MIT
-  modality:
-    explanation: Text (English)
-    value: text; text
-  model_card: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: GPT-Neo
   organization: EleutherAI
-  prohibited_uses: ''
-  quality_control: ''
-  size: 2.7B parameters (dense)
+  description: ''
+  created_date:
+    value: 2021-03-21
+    explanation: Date Github repo was update
+  url: https://github.com/EleutherAI/gpt-neo
+  model_card: ''
+  modality: Text (English)
+  analysis: ''
+  size: 2.7B parameters (dense model)
+  dependencies: [The Pile]
   training_emissions: ''
-  training_hardware: ''
   training_time: ''
-  type: model
-  url: https://github.com/EleutherAI/gpt-neo
-- access:
-    explanation: "The model can be downloaded for free The Eye [[GPT-NeoX-20B]](https://mystic.the-eye.eu/public/AI/models/GPT-NeoX-20B/).\n"
+  training_hardware: ''
+  quality_control: ''
+  access:
     value: open
-  analysis: "The model was evaluated on standard NLP benchmarks: LAMBADA, ANLI,\
-    \ HellaSwag, MMLU among others [[Section 4]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#section.4).\n"
-  created_date: 2022-02-02
-  dependencies: [The Pile]
-  description: "GPT-NeoX (20B) is an open-sourced autoregressive language model.\n"
-  feedback: "Feedback can be provided using the  # 20b channel in EleutherAI Discord\
-    \ group [[EleutherAI Blog Post]](https://blog.eleuther.ai/announcing-20b/).\
-    \ Find the Discord link in the FAQ page [[FAQ]](https://www.eleuther.ai/faq/).\n"
-  intended_uses: "As stated in the model card: \"GPT-NeoX-20B learns an inner representation\
-    \ of the English language that can be used to extract features useful for downstream\
-    \ tasks. The model is best at what it was pretrained for however, which is generating\
-    \ text from a prompt. Due to the generality of the pretraining set, it has acquired\
-    \ the ability to generate completions across a wide range of tasks - from programming\
-    \ to fiction writing [[Model Card]](https://mystic.the-eye.eu/public/AI/models/GPT-NeoX-20B/20B_model_card.md).\"\
-    \n"
-  license:
-    explanation: "As indicated in the accompanying blog post [[EleutherAI Blog Post]](https://blog.eleuther.ai/announcing-20b/).\n"
-    value: Apache 2.0
-  modality:
-    explanation: text (English) and code
-    value: code, text; code, text
-  model_card: https://mystic.the-eye.eu/public/AI/models/GPT-NeoX-20B/20B_model_card.md
-  monitoring: none
+    explanation: >
+      The model can be downloaded for free from [[The Eye]](https://mystic.the-eye.eu/public/AI/gptneo-release/GPT3_2-7B/)
+  license: MIT
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+
+- type: model
   name: GPT-NeoX
+  # General
   organization: EleutherAI
-  prohibited_uses: none
-  quality_control: none
-  size: 20B parameters (dense)
+  description: >
+    GPT-NeoX (20B) is an open-sourced autoregressive language model.
+  created_date: 2022-02-02
+  url: http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf
+  model_card: https://mystic.the-eye.eu/public/AI/models/GPT-NeoX-20B/20B_model_card.md
+  modality: text (English) and code
+  size: 20B parameters (dense model)
+  analysis: >
+    The model was evaluated on standard NLP benchmarks: LAMBADA, ANLI,
+    HellaSwag, MMLU among others
+    [[Section 4]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#section.4).
+  # Construction
+  dependencies: [The Pile]
   training_emissions:
-    explanation: "The amount of emission during the development and training of\
-      \ the model based on the author's estimation [[Section 6.4]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#subsection.6.4).\n"
     value: 31.73 tCO2e
-  training_hardware:
-    explanation: "As outline by the authors [[Section 2.3]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#subsection.2.3)\n"
-    value: 12 x 8 A100 GPUs
+    explanation: >
+      The amount of emission during the development and training of
+      the model based on the author's estimation
+      [[Section 6.4]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#subsection.6.4).
   training_time:
-    explanation: "Training time was reported as 1830 hours reported by the authors,\
-      \ equaling 76.25 days. [[Section 6.4]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#subsection.6.4).\
-      \ The authors report that 96 (12 * 8) A100 GPUs were used during the training.\
-      \ The A100 GPUs have a single precision performance of 0.0195 petaflops [[A100\
-      \ Datasheet]](https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf).\
-      \ Assuming the estimated utilization is 33%, following [[OpenAI AI and Computer\
-      \ Blog]](https://openai.com/blog/ai-and-compute/#addendum), the training time\
-      \ is 47.10 petaflop/s-day (76.25 * 96 * 0.0195 * 0.33).\n"
     value: 47.10 petaflop/s-day
-  type: model
-  url: http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf
-- access:
-    explanation: "GooseAI API can be accessed by signing up on the goose.ai website.\n"
-    value: limited
-  adaptation: unknown
-  created_date: unknown
-  dependencies: [GPT-NeoX]
-  description: "GooseAI API is an API service providing access to NLP services.\n"
-  failures: unknown
-  feedback:
-    explanation: "In the \"Error Reporting and Feedback\" section of the Goose.ai\
-      \ Terms of Service, GooseAI asks all the feedback to be sent to support at\
-      \ goose.ai [[GooseAI Terms of Service]](https://goose.ai/docs/tos).\n"
-    value: Email support
-  intended_uses: "Intended to be used as an NLP infrastructure.\n"
+    explanation: >
+      Training time was reported as 1830 hours reported by the authors, equaling
+      76.25 days.
+      [[Section 6.4]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#subsection.6.4).
+      The authors report that 96 (12 * 8) A100 GPUs were used during the
+      training.
+      The A100 GPUs have a single precision performance of 0.0195 petaflops
+      [[A100 Datasheet]](https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf).
+      Assuming the estimated utilization is 33%, following
+      [[OpenAI AI and Computer Blog]](https://openai.com/blog/ai-and-compute/#addendum),
+      the training time is 47.10 petaflop/s-day (76.25 * 96 * 0.0195 * 0.33).
+  training_hardware:
+    value: 12 x 8 A100 GPUs
+    explanation: >
+      As outline by the authors
+      [[Section 2.3]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#subsection.2.3)
+  quality_control: none
+  # Downstream
+  access:
+    value: open
+    explanation: >
+      The model can be downloaded for free The Eye
+      [[GPT-NeoX-20B]](https://mystic.the-eye.eu/public/AI/models/GPT-NeoX-20B/).
   license:
-    explanation: "\"Subject to Customer’s strict compliance with this TOS, GooseAI\
-      \ grants Customer a limited, non-exclusive, non-transferable, non-sublicensable,\
-      \ revocable license to access and use the Platform as described in and subject\
-      \ to this TOS\" - excerpt from the Terms of Service document.\n"
-    value: custom
-  monitoring:
-    explanation: "In the \"GooseAI Monitoring and Enforcement\" section of GooseAI's\
-      \ Acceptable Use Policy (AUP), it is stated that Goose.AI has the right to\
-      \ investigate any suspected violation of its AUP [[GooseAI Acceptable Use\
-      \ Policy]](https://goose.ai/docs/aup).\n"
-    value: At will monitoring by the provider
-  monthly_active_users: unknown
+    value: Apache 2.0
+    explanation: >
+      As indicated in the accompanying blog post
+      [[EleutherAI Blog Post]](https://blog.eleuther.ai/announcing-20b/).
+  intended_uses: >
+    As stated in the model card: "GPT-NeoX-20B learns an inner representation
+    of the English language that can be used to extract features useful for
+    downstream tasks. The model is best at what it was pretrained for however,
+    which is generating text from a prompt.
+    Due to the generality of the pretraining set, it has acquired the ability
+    to generate completions across a wide range of tasks - from programming to
+    fiction writing
+    [[Model Card]](https://mystic.the-eye.eu/public/AI/models/GPT-NeoX-20B/20B_model_card.md)."
+  prohibited_uses: none
+  monitoring: none
+  feedback: >
+    Feedback can be provided using the  # 20b channel in EleutherAI Discord
+    group
+    [[EleutherAI Blog Post]](https://blog.eleuther.ai/announcing-20b/).
+    Find the Discord link in the FAQ page
+    [[FAQ]](https://www.eleuther.ai/faq/).
+
+- type: application
   name: GooseAI API
+  # General
   organization: GooseAI
+  description: >
+    GooseAI API is an API service providing access to NLP services.
+  created_date: unknown
+  url: goose.ai
+  # Construction
+  dependencies: [GPT-NeoX]
+  adaptation: unknown
   output_space:
-    explanation: "Question/Answer and Classification tasks are coming soon according\
-      \ to GooseAI [[Main Page]](goose.ai).\n"
     value: Text Generation, Text Completion
-  prohibited_uses:
-    explanation: "Prohibited uses are detailed in the Acceptable Use Policy [[GooseAI\
-      \ Acceptable Use Policy]](https://goose.ai/docs/aup).\n"
-    value: "Illegal or abusive activity, security violations, network abuse\n"
+    explanation: >
+      Question/Answer and Classification tasks are coming soon according to
+      GooseAI
+      [[Main Page]](goose.ai).
   quality_control: unknown
+  # Downstream
+  access:
+    value: limited
+    explanation: >
+      GooseAI API can be accessed by signing up on the goose.ai website.
+  license:
+    value: custom
+    explanation: >
+      "Subject to Customer’s strict compliance with this TOS, GooseAI grants
+      Customer a limited, non-exclusive, non-transferable, non-sublicensable,
+      revocable license to access and use the Platform as described in and
+      subject to this TOS" - excerpt from the Terms of Service document.
   terms_of_service: https://goose.ai/docs/tos
-  type: application
-  url: goose.ai
+  intended_uses: >
+    Intended to be used as an NLP infrastructure.
+  prohibited_uses:
+    value: >
+      Illegal or abusive activity, security violations, network abuse
+    explanation: >
+      Prohibited uses are detailed in the Acceptable Use Policy
+      [[GooseAI Acceptable Use Policy]](https://goose.ai/docs/aup).
+  monitoring:
+    value: At will monitoring by the provider
+    explanation: >
+      In the "GooseAI Monitoring and Enforcement" section of GooseAI's
+      Acceptable Use Policy (AUP), it is stated that Goose.AI has the right to
+      investigate any suspected violation of its AUP
+      [[GooseAI Acceptable Use Policy]](https://goose.ai/docs/aup).
+  feedback:
+    value: Email support
+    explanation: >
+      In the "Error Reporting and Feedback" section of the Goose.ai Terms of
+      Service, GooseAI asks all the feedback to be sent to support at goose.ai
+      [[GooseAI Terms of Service]](https://goose.ai/docs/tos).
+  # Deployment
+  monthly_active_users: unknown
   user_distribution: unknown
-- access: open
-  analysis: Evaluated by human testers rating alignment of text input, image output
-    pairs.
-  created_date: 2022-09-04
-  dependencies: [VQGAN, CLIP]
+  failures: unknown
+
+- type: model
+  name: VQGAN-CLIP
+  organization: EleutherAI
   description: VQGAN-CLIP is a model that better generates and edits images using
     a multimodal encoder to guide image generation.
-  feedback: ''
-  intended_uses: ''
-  license: MIT
-  modality:
-    explanation: text input with image output
-    value: text; image
+  created_date: 2022-09-04
+  url: https://arxiv.org/pdf/2204.08583.pdf
   model_card: ''
-  monitoring: ''
-  name: VQGAN-CLIP
-  organization: EleutherAI
-  prohibited_uses: ''
-  quality_control: ''
-  size: 227M parameters (dense)
+  modality: text input with image output
+  analysis: Evaluated by human testers rating alignment of text input, image output
+    pairs.
+  size: 227M parameters
+  dependencies: [VQGAN, CLIP]
   training_emissions: unknown
-  training_hardware: 1 NVIDIA Tesla K80 GPU
   training_time: Less than 1 V100-hour
-  type: model
-  url: https://arxiv.org/pdf/2204.08583.pdf
-- access: open
-  analysis: Evaluated on a variety of NLP benchmarks and found to perform similarly
-    to OPT and BLOOM models.
-  created_date: 2023-05-31
-  dependencies: [The Pile]
-  description: A suite of 16 LLMs all trained on public data seen in the exact same
-    order and ranging in size from 70M to 12B parameters
-  feedback: https://huggingface.co/EleutherAI/pythia-6.9b/discussions
+  training_hardware: 1 NVIDIA Tesla K80 GPU
+  quality_control: ''
+  access: open
+  license: MIT
   intended_uses: ''
-  license: Apache 2.0
-  modality:
-    explanation: natural language text
-    value: text; text
-  model_card: https://huggingface.co/EleutherAI/pythia-12b
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: Pythia
   organization: Eleuther AI
-  prohibited_uses: ''
-  quality_control: ''
-  size: 12B parameters (dense)
+  description: A suite of 16 LLMs all trained on public data seen in the exact same
+    order and ranging in size from 70M to 12B parameters
+  created_date: 2023-05-31
+  url: https://arxiv.org/pdf/2304.01373.pdf
+  model_card: https://huggingface.co/EleutherAI/pythia-12b
+  modality: natural language text
+  analysis: Evaluated on a variety of NLP benchmarks and found to perform similarly
+    to OPT and BLOOM models.
+  size: 12B parameters
+  dependencies: [The Pile]
   training_emissions: ''
-  training_hardware: 64 A100 GPUs
   training_time: ''
-  type: model
-  url: https://arxiv.org/pdf/2304.01373.pdf
+  training_hardware: 64 A100 GPUs
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/EleutherAI/pythia-6.9b/discussions
diff --git a/assets/ens.yaml b/assets/ens.yaml
index b728b918..5209fe3d 100644
--- a/assets/ens.yaml
+++ b/assets/ens.yaml
@@ -1,37 +1,32 @@
 ---
-- access:
-    explanation: Dataset, evaluation code and models are publicly available at the
-      [[HowTo100M dataset webpage]](https://www.di.ens.fr/willow/research/howto100m/).
-    value: open
+
+- type: dataset
+  name: HowTo100M
+  organization: École Normale Supérieure, Inria
+  description: HowTo100M is a large-scale dataset of narrated videos with an emphasis
+    on instructional videos where content creators teach complex tasks with an explicit
+    intention of explaining the visual content on screen. HowTo100M features a total
+    of 136M video clips with captions sourced from 1.2M Youtube videos (15 years
+    of video) and 23k activities from domains such as cooking, hand crafting, personal
+    care, gardening or fitness.
+  created_date:
+    value: 2019-06-07
+    explanation: The date the [[paper]](https://arxiv.org/abs/1906.03327) was submitted.
+  url: https://arxiv.org/pdf/1906.03327.pdf
+  datasheet: https://arxiv.org/pdf/1906.03327.pdf
+  modality: text (English) and video
+  size: 136M video clips
+  sample: []
   analysis:
-    explanation: See [[Experiments]](https://arxiv.org/pdf/1906.03327v2.pdf#section.5)
     value: Authors use the dataset to learn a joint text-video embedding by leveraging
       more than 130M video clip-caption pairs. They then evaluate the learned embeddings
       on the tasks of localizing steps in instructional videos of CrossTask and
       textbased video retrieval on YouCook2, MSR-VTT and LSMDC datasets. They show
       that their learned embedding can perform better compared to models trained
       on existing carefully annotated but smaller video description datasets.
-  created_date:
-    explanation: The date the [[paper]](https://arxiv.org/abs/1906.03327) was submitted.
-    value: 2019-06-07
-  datasheet: https://arxiv.org/pdf/1906.03327.pdf
+    explanation: See [[Experiments]](https://arxiv.org/pdf/1906.03327v2.pdf#section.5)
   dependencies: [YouTube]
-  description: HowTo100M is a large-scale dataset of narrated videos with an emphasis
-    on instructional videos where content creators teach complex tasks with an explicit
-    intention of explaining the visual content on screen. HowTo100M features a total
-    of 136M video clips with captions sourced from 1.2M Youtube videos (15 years
-    of video) and 23k activities from domains such as cooking, hand crafting, personal
-    care, gardening or fitness.
-  excluded:
-    explanation: See [[Data collection]](https://arxiv.org/pdf/1906.03327v2.pdf#subsection.3.1)
-    value: Categories such as Relationships and Finance and Business, that may be
-      more abstract, are excluded. Videos with less than 100 views are removed.
-      Authors also ignore videos that have less than 100 words. Videos longer than
-      2,000 seconds are removed. As some videos may appear in several tasks, the
-      videos are deduplicated based on YouTube IDs.
-  feedback: ''
   included:
-    explanation: See [[Data collection]](https://arxiv.org/pdf/1906.03327v2.pdf#subsection.3.1)
     value: The dataset features 1.22 million videos from YouTube with a primary
       focus on videos containing "visual tasks", that involve some interaction with
       the physical world (e.g. Making peanut butter, Pruning a tree) as compared
@@ -43,22 +38,29 @@
       Arts and Entertainment, Computers and Electronics). They also restrict to
       the top 200 YouTube search results, as the latter ones may not be related
       to the query task.
-  intended_uses: ''
+    explanation: See [[Data collection]](https://arxiv.org/pdf/1906.03327v2.pdf#subsection.3.1)
+  excluded:
+    value: Categories such as Relationships and Finance and Business, that may be
+      more abstract, are excluded. Videos with less than 100 views are removed.
+      Authors also ignore videos that have less than 100 words. Videos longer than
+      2,000 seconds are removed. As some videos may appear in several tasks, the
+      videos are deduplicated based on YouTube IDs.
+    explanation: See [[Data collection]](https://arxiv.org/pdf/1906.03327v2.pdf#subsection.3.1)
+  quality_control: ''
+  access:
+    value: open
+    explanation: Dataset, evaluation code and models are publicly available at the
+      [[HowTo100M dataset webpage]](https://www.di.ens.fr/willow/research/howto100m/).
   license: Apache 2.0
-  modality: {}
-  monitoring: ''
-  name: HowTo100M
-  organization: École Normale Supérieure, Inria
+  intended_uses: ''
   prohibited_uses:
+    value: >
+      No uses are explicitly prohibited by the authors. They note the following
+      limitations of the dataset: "We note that the distribution of identities and
+      activities in the HowTo100M dataset may not be representative of the global
+      human population and the diversity in society. Please be careful of unintended
+      societal, gender, racial and other biases when training or deploying models
+      trained on this data."
     explanation: See [[HowTo100M dataset webpage]](https://www.di.ens.fr/willow/research/howto100m/)
-    value: "No uses are explicitly prohibited by the authors. They note the following\
-      \ limitations of the dataset: \"We note that the distribution of identities\
-      \ and activities in the HowTo100M dataset may not be representative of the\
-      \ global human population and the diversity in society. Please be careful\
-      \ of unintended societal, gender, racial and other biases when training or\
-      \ deploying models trained on this data.\"\n"
-  quality_control: ''
-  sample: []
-  size: 136M video clips
-  type: dataset
-  url: https://arxiv.org/pdf/1906.03327.pdf
+  monitoring: ''
+  feedback: ''
diff --git a/assets/faraday.yaml b/assets/faraday.yaml
index a1af0488..7c986cb4 100644
--- a/assets/faraday.yaml
+++ b/assets/faraday.yaml
@@ -1,26 +1,27 @@
 ---
-- access: open
-  adaptation: ''
+
+- type: application
+  name: ARES
+  organization: Faraday Lab
+  description: ARES is a text-to-image generator based on Stable Diffusion. The
+    goal is to provide a simple tool with a user interface allowing mainstream AI
+    access for artists and creators.
   created_date:
+    value: 2023-04-26
     explanation: The date Faraday Lab's hugging face card was last updated. Date
       for ARES release is not published on site.
-    value: 2023-04-26
+  url: https://faradaylab.fr/
   dependencies: [Stable Diffusion]
-  description: ARES is a text-to-image generator based on Stable Diffusion. The
-    goal is to provide a simple tool with a user interface allowing mainstream AI
-    access for artists and creators.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
-  license: unknown
-  monitoring: ''
-  monthly_active_users: ''
-  name: ARES
-  organization: Faraday Lab
+  adaptation: ''
   output_space: generated images
-  prohibited_uses: ''
   quality_control: ''
+  access: open
+  license: unknown
   terms_of_service: ''
-  type: application
-  url: https://faradaylab.fr/
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
   user_distribution: ''
+  failures: ''
diff --git a/assets/google.yaml b/assets/google.yaml
index 42aab482..fb56a8dc 100644
--- a/assets/google.yaml
+++ b/assets/google.yaml
@@ -1,71 +1,100 @@
 ---
-- access:
-    explanation: https://huggingface.co/datasets/c4
-    value: open
-  analysis: https://arxiv.org/abs/2104.08758
+
+# Datasets
+- type: dataset
+  name: C4
+  # General
+  organization: Google
+  description: The Colossal Clean Crawled Corpus (C4) is a processed version of
+    Common Crawl to facilitate transfer learning in NLP.
   created_date:
-    explanation: The date the T5 paper was made public.
     value: 2019-10-23
+    explanation: The date the T5 paper was made public.
+  url: https://arxiv.org/abs/1910.10683
   datasheet: https://huggingface.co/datasets/c4
+  modality: Text (English)
+  size: 750GB
+  sample: [https://huggingface.co/datasets/c4/viewer/en/train]
+  analysis: https://arxiv.org/abs/2104.08758
+  # Construction
   dependencies: [CommonCrawl]
-  description: The Colossal Clean Crawled Corpus (C4) is a processed version of
-    Common Crawl to facilitate transfer learning in NLP.
-  excluded: "Data was filtered for English using langdetect. Further, data was filtered\
-    \ to end in terminal punctuation, to remove short pages (less than 5 sentences),\
-    \ and to remove \"Dirty, Naughty, Obscene or Otherwise Bad Words\".\n"
-  feedback: https://huggingface.co/datasets/c4/discussions
+  license: ODC-By 1.0
   included: none
+  excluded: >
+    Data was filtered for English using langdetect. Further, data was filtered
+    to end in terminal punctuation, to remove short pages (less than 5 sentences),
+    and to remove "Dirty, Naughty, Obscene or Otherwise Bad Words".
+  quality_control: >
+    Data filtering excluded obscene words from a block list as well
+    as short documents and some deduplication was done based on string overlap.
+  # Downstream
+  access:
+    value: open
+    explanation: https://huggingface.co/datasets/c4
   intended_uses: To faciliate transfer learning research in NLP.
-  license: ODC-By 1.0
-  modality: {}
+  prohibited_uses: none
   monitoring: none
-  name: C4
+  feedback: https://huggingface.co/datasets/c4/discussions
+
+- type: dataset
+  name: Internal Google BERT dataset
+  # General
   organization: Google
-  prohibited_uses: none
-  quality_control: "Data filtering excluded obscene words from a block list as well\
-    \ as short documents and some deduplication was done based on string overlap.\n"
-  sample: [https://huggingface.co/datasets/c4/viewer/en/train]
-  size: 750GB
-  type: dataset
-  url: https://arxiv.org/abs/1910.10683
-- access: closed
-  analysis: unknown
+  description: >
+    The dataset used to train Internal Google BERT models.
   created_date:
-    explanation: "The date of the Google product update blog announcing that BERT\
-      \ models were for ranking and featured snippets in Search.\n"
     value: 2019-11-25
+    explanation: >
+      The date of the Google product update blog announcing that BERT models
+      were for ranking and featured snippets in Search.
+  url: https://blog.google/products/search/search-language-understanding-bert/
   datasheet: none
+  modality: text
+  size: unknown
+  sample: []
+  analysis: unknown
+  # Construction
   dependencies: []
-  description: "The dataset used to train Internal Google BERT models.\n"
-  excluded: unknown
-  feedback: unknown
+  license:
+    value: unknown
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
   included:
-    explanation: "Although we don't exactly know the contents of the Internal Google\
-      \ BERT dataset, it likely includes contents from web pages and search queries.\n"
     value: Web pages, and search queries
+    explanation: >
+      Although we don't exactly know the contents of the Internal Google BERT
+      dataset, it likely includes contents from web pages and search queries.
+  excluded: unknown
+  quality_control: unknown
+  # Downstream
+  access: closed
   intended_uses:
-    explanation: "We don't have an exhaustive list of the intended use cases for\
-      \ the Internal Google BERT dataset, but we know that BERT was used in Google\
-      \ Search.\n"
     value: unknown
-  license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
-    value: unknown
-  modality: {}
+    explanation: >
+      We don't have an exhaustive list of the intended use cases for the
+      Internal Google BERT dataset, but we know that BERT was used in Google
+      Search.
+  prohibited_uses: unknown
   monitoring: unknown
-  name: Internal Google BERT dataset
+  feedback: unknown
+
+- type: dataset
+  name: Conceptual Captions
+  # General
   organization: Google
-  prohibited_uses: unknown
-  quality_control: unknown
+  description: >
+    A dataset containing 3 million (image-URL, caption) pairs designed for the
+    training and evaluation of machine learned image captioning systems.
+  created_date:
+    value: 2018-07-01
+    explanation: Due to the lack of information about the exact date, it is assumed
+      to be the 1st of the known month of creation.
+  url: https://aclanthology.org/P18-1238/
+  datasheet: none
+  modality: text(English) and image
+  size: 3.3M (image, text) pairs
   sample: []
-  size: unknown
-  type: dataset
-  url: https://blog.google/products/search/search-language-understanding-bert/
-- access:
-    explanation: Dataset can be downloaded at [[Download]](https://ai.google.com/research/ConceptualCaptions/download)
-    value: open
   analysis:
-    explanation: See [[Experimental Results]](https://aclanthology.org/P18-1238.pdf#section.5)
     value: Authors evaluate the dataset on two image captioning models - RNN-based
       model and Transformer model, under two experimental conditions - using the
       training & development sets provided by the MS COCO dataset, versus training
@@ -75,631 +104,708 @@
       evaluations indicate that the Conceptual-based models are superior. Automatic
       models fail to corroborate the human evaluation results. This highlights the
       weakness of these automatic metrics.
-  created_date:
-    explanation: Due to the lack of information about the exact date, it is assumed
-      to be the 1st of the known month of creation.
-    value: 2018-07-01
-  datasheet: none
+    explanation: See [[Experimental Results]](https://aclanthology.org/P18-1238.pdf#section.5)
+  # Construction
   dependencies: []
-  description: "A dataset containing 3 million (image-URL, caption) pairs designed\
-    \ for the training and evaluation of machine learned image captioning systems.\n"
-  excluded:
-    explanation: See [[Conceptual Captions Dataset Creation]](https://aclanthology.org/P18-1238.pdf#section.3)
-    value: "The following filtering steps are applied in the given order:\n1. Image-based\
-      \ Filtering - \"It only keeps JPEG images where both dimensions are greater\
-      \ than 400 pixels, and the ratio of larger to smaller dimension is no more\
-      \ than 2. It excludes images that trigger pornography or profanity detectors.\
-      \ These filters discard more than 65% of the candidates.\"\n2. Text-based\
-      \ Filtering - \"Candidates with no determiner, no noun, or no preposition\
-      \ are discarded; candidates with a high noun ratio are also discarded; candidates\
-      \ with a high rate of token repetition are discarded; candidates where the\
-      \ first word is not capitalized, or with too high capitalized-word ratio are\
-      \ discarded; we use a vocabulary VW of 1B token types, appearing at least\
-      \ 5 times in the English Wikipedia, and discard candidates that contain tokens\
-      \ that are not found in this vocabulary. candidates that score too high or\
-      \ too low on the polarity annotations, or trigger the pornography/profanity\
-      \ detectors, are discarded; predefined boiler-plate prefix/suffix sequences\
-      \ matching the text are cropped, e.g. “click to enlarge picture”, “stock photo”;\
-      \ we also drop text which begins/ends in certain patterns, e.g. “embedded\
-      \ image permalink”, “profile photo”. These filters only allow around 3% of\
-      \ the incoming candidates to pass to the later stages.\"\n3. Image&Text-based\
-      \ Filtering - \"We filter out candidates for which none of the text tokens\
-      \ can be mapped to the content of the image. This filter discards around 60%\
-      \ of the incoming candidates.\"\n4. Text Transformation with Hypernymization\
-      \ - \"Noun modifiers of certain types (proper nouns, numbers, units) are removed;\
-      \ dates, durations, and preposition-based locations (e.g., \"in Los Angeles\"\
-      ) are removed; named-entities are identified, matched against the KG entries,\
-      \ and substitute with their hypernym; resulting coordination noun-phrases\
-      \ with the same head (e.g., \"actor and actor\") are resolved into a single-head,\
-      \ pluralized form (e.g., \"actors\"). Around 20% of samples are discarded\
-      \ during this transformation. We then cluster all resolved entities (e.g.,\
-      \ 2560 \"actor\", \"dog\", \"neighborhood\", etc.) and keep only the candidates\
-      \ for which all detected types have a count of over 100 (around 55% of the\
-      \ candidates).\"\n"
-  feedback: Feedback can be provided by creating an issue in the [[Conceptual Captions
-    GitHub repository]](https://github.com/google-research-datasets/conceptual-captions)
-    or by emailing at conceptual-captions at google.com
-  included: ''
-  intended_uses: ''
   license:
-    explanation: "[[Conceptual Captions License]](https://github.com/google-research-datasets/conceptual-captions/blob/master/LICENSE)\n"
     value: Conceptual Captions License
-  modality: {}
-  monitoring: unknown
-  name: Conceptual Captions
-  organization: Google
-  prohibited_uses: unknown
-  quality_control:
+    explanation: >
+      [[Conceptual Captions License]](https://github.com/google-research-datasets/conceptual-captions/blob/master/LICENSE)
+  included: ''
+  excluded:
+    value: |
+      The following filtering steps are applied in the given order:
+      1. Image-based Filtering - "It only keeps JPEG images where both dimensions are greater than 400 pixels, and the ratio of larger to smaller dimension is no more than 2. It excludes images that trigger pornography or profanity detectors. These filters discard more than 65% of the candidates."
+      2. Text-based Filtering - "Candidates with no determiner, no noun, or no preposition are discarded; candidates with a high noun ratio are also discarded; candidates with a high rate of token repetition are discarded; candidates where the first word is not capitalized, or with too high capitalized-word ratio are discarded; we use a vocabulary VW of 1B token types, appearing at least 5 times in the English Wikipedia, and discard candidates that contain tokens that are not found in this vocabulary. candidates that score too high or too low on the polarity annotations, or trigger the pornography/profanity detectors, are discarded; predefined boiler-plate prefix/suffix sequences matching the text are cropped, e.g. “click to enlarge picture”, “stock photo”; we also drop text which begins/ends in certain patterns, e.g. “embedded image permalink”, “profile photo”. These filters only allow around 3% of the incoming candidates to pass to the later stages."
+      3. Image&Text-based Filtering - "We filter out candidates for which none of the text tokens can be mapped to the content of the image. This filter discards around 60% of the incoming candidates."
+      4. Text Transformation with Hypernymization - "Noun modifiers of certain types (proper nouns, numbers, units) are removed; dates, durations, and preposition-based locations (e.g., "in Los Angeles") are removed; named-entities are identified, matched against the KG entries, and substitute with their hypernym; resulting coordination noun-phrases with the same head (e.g., "actor and actor") are resolved into a single-head, pluralized form (e.g., "actors"). Around 20% of samples are discarded during this transformation. We then cluster all resolved entities (e.g., 2560 "actor", "dog", "neighborhood", etc.) and keep only the candidates for which all detected types have a count of over 100 (around 55% of the candidates)."
     explanation: See [[Conceptual Captions Dataset Creation]](https://aclanthology.org/P18-1238.pdf#section.3)
+  quality_control:
     value: Input candidate (image, caption) pairs pass through several stages of
       filtering and processing to ensure quality.
-  sample: []
-  size: 3.3M (image, text) pairs
-  type: dataset
-  url: https://aclanthology.org/P18-1238/
-- access:
-    explanation: Dataset is available at [[Conceptual 12M GitHub repository]](https://github.com/google-research-datasets/conceptual-12m).
+    explanation: See [[Conceptual Captions Dataset Creation]](https://aclanthology.org/P18-1238.pdf#section.3)
+  # Downstream
+  access:
     value: open
-  analysis:
-    explanation: See [[Evaluating Vision-and-Language PreTraining Data]](https://arxiv.org/pdf/2102.08981.pdf#section.3)
-    value: "The dataset is benchmarked against CC3M on two most fundamental V+L\
-      \ tasks: vision-to-language generation and vision-and-language matching, with\
-      \ an emphasis on long-tail visual recognition. The results illustrate the\
-      \ benefit of scaling up pre-training data for vision-and-language tasks, as\
-      \ indicated by the new state-of-the-art results on both the nocaps and Conceptual\
-      \ Captions benchmarks.\n"
+    explanation: Dataset can be downloaded at [[Download]](https://ai.google.com/research/ConceptualCaptions/download)
+  intended_uses: ''
+  prohibited_uses: unknown
+  monitoring: unknown
+  feedback: Feedback can be provided by creating an issue in the [[Conceptual Captions
+    GitHub repository]](https://github.com/google-research-datasets/conceptual-captions)
+    or by emailing at conceptual-captions at google.com
+
+- type: dataset
+  name: Conceptual 12M
+  # General
+  organization: Google
+  description: >
+    A dataset with 12 million image-text pairs specifically meant to be used for
+    vision-and-language pre-training.
   created_date:
-    explanation: "The date the [[paper]](https://arxiv.org/abs/2102.08981) was submitted.\n"
     value: 2021-02-17
+    explanation: >
+      The date the [[paper]](https://arxiv.org/abs/2102.08981) was submitted.
+  url: https://arxiv.org/pdf/2102.08981.pdf
   datasheet: none
+  modality: text (English) and image
+  size: 12M (image, text) pairs
+  sample: []
+  analysis:
+    value: >
+      The dataset is benchmarked against CC3M on two most fundamental V+L
+      tasks: vision-to-language generation and vision-and-language matching, with
+      an emphasis on long-tail visual recognition. The results illustrate the
+      benefit of scaling up pre-training data for vision-and-language tasks, as
+      indicated by the new state-of-the-art results on both the nocaps and Conceptual
+      Captions benchmarks.
+    explanation: See [[Evaluating Vision-and-Language PreTraining Data]](https://arxiv.org/pdf/2102.08981.pdf#section.3)
+  # Construction
   dependencies: []
-  description: "A dataset with 12 million image-text pairs specifically meant to\
-    \ be used for vision-and-language pre-training.\n"
-  excluded:
-    explanation: See [[Relaxing filters for higher recall]](https://arxiv.org/pdf/2102.08981.pdf#subsection.2.2)
-    value: "Some of the filtering steps used in the preparation of Conceptual Captions\
-      \ dataset are relaxed to trade off high-recall for low-precision. The following\
-      \ steps are applied in the given order:\n1. Image-based Filtering - Only keep\
-      \ JPEG images where both dimensions are greater than 400 pixels, and the ratio\
-      \ of larger to smaller dimension is no more than 2.5. Exclude images that\
-      \ trigger pornography or profanity detectors.\n2. Text-based Filtering - Allow\
-      \ text between 3 and 256 words in the alt-text. Discard candidates with no\
-      \ noun or no determiner, but permit ones without prepositions. Set the maximum\
-      \ fraction of word repetition allowed to 0.2. Increase the threshold for counting\
-      \ a word type as rare from 5 to 20.\n3. Image&Text-based Filtering - Filter\
-      \ out candidates for which none of the text tokens can be mapped to the content\
-      \ of the image.\n"
-  feedback: Feedback can be provided by creating an issue in the [[Conceptual 12M
-    GitHub repository]](https://github.com/google-research-datasets/conceptual-12m)
-    or by emailing at conceptual-captions at google.com
-  included: ''
-  intended_uses: ''
   license:
-    explanation: "[[Conceptual Captions License]](https://github.com/google-research-datasets/conceptual-captions/blob/master/LICENSE)\n"
     value: Conceptual Captions License
-  modality:
-    explanation: text (English) and image
-    value: image, text
-  monitoring: unknown
-  name: Conceptual 12M
-  organization: Google
-  prohibited_uses: unknown
+    explanation: >
+      [[Conceptual Captions License]](https://github.com/google-research-datasets/conceptual-captions/blob/master/LICENSE)
+  included: ''
+  excluded:
+    value: |
+      Some of the filtering steps used in the preparation of Conceptual Captions dataset are relaxed to trade off high-recall for low-precision. The following steps are applied in the given order:
+      1. Image-based Filtering - Only keep JPEG images where both dimensions are greater than 400 pixels, and the ratio of larger to smaller dimension is no more than 2.5. Exclude images that trigger pornography or profanity detectors.
+      2. Text-based Filtering - Allow text between 3 and 256 words in the alt-text. Discard candidates with no noun or no determiner, but permit ones without prepositions. Set the maximum fraction of word repetition allowed to 0.2. Increase the threshold for counting a word type as rare from 5 to 20.
+      3. Image&Text-based Filtering - Filter out candidates for which none of the text tokens can be mapped to the content of the image.
+    explanation: See [[Relaxing filters for higher recall]](https://arxiv.org/pdf/2102.08981.pdf#subsection.2.2)
   quality_control:
-    explanation: See [[Vision-and-Language Pre-Training Data]](https://arxiv.org/pdf/2102.08981.pdf#section.2)
     value: Input candidate (image, caption) pairs pass through several stages of
       filtering and processing to ensure quality. Person-name substitutions are
       performed in the alt-texts to protect the privacy of individuals in the associated
       images.
-  sample: []
-  size: 12M (image, text) pairs
-  type: dataset
-  url: https://arxiv.org/pdf/2102.08981.pdf
-- access:
-    explanation: https://huggingface.co/t5-large
+    explanation: See [[Vision-and-Language Pre-Training Data]](https://arxiv.org/pdf/2102.08981.pdf#section.2)
+  # Downstream
+  access:
     value: open
-  analysis: https://huggingface.co/t5-base#evaluation
+    explanation: Dataset is available at [[Conceptual 12M GitHub repository]](https://github.com/google-research-datasets/conceptual-12m).
+  intended_uses: ''
+  prohibited_uses: unknown
+  monitoring: unknown
+  feedback: Feedback can be provided by creating an issue in the [[Conceptual 12M
+    GitHub repository]](https://github.com/google-research-datasets/conceptual-12m)
+    or by emailing at conceptual-captions at google.com
+
+# Models
+- type: model
+  name: T5
+  organization: Google
+  description: Text-To-Text Transfer Transformer (T5) is a model that unifies all
+    NLP tasks under the text-to-text format.
   created_date:
-    explanation: The date the T5 paper was made public.
     value: 2019-10-23
+    explanation: The date the T5 paper was made public.
+  url: https://arxiv.org/abs/1910.10683
+  model_card: https://huggingface.co/t5-base
+  modality: text (English)
+  size:
+    value: 11B parameters (dense model)
+    explanation: T5 models were trained on several sizes including 3B and 11B parameters.
+  analysis: https://huggingface.co/t5-base#evaluation
   dependencies: [C4]
-  description: Text-To-Text Transfer Transformer (T5) is a model that unifies all
-    NLP tasks under the text-to-text format.
-  feedback: https://huggingface.co/t5-large/discussions
-  intended_uses: NLP tasks
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: 1,024 TPU v3 chips (Cloud TPU Pods)
+  quality_control: The T5 paper documents many analyses/ablations that were considered
+    before arriving at the final architecture/training procedure.
+  access:
+    value: open
+    explanation: https://huggingface.co/t5-large
   license: Apache 2.0
-  modality: {}
-  model_card: https://huggingface.co/t5-base
+  intended_uses: NLP tasks
+  prohibited_uses: unknown
   monitoring: none
-  name: T5
+  feedback: https://huggingface.co/t5-large/discussions
+
+- type: model
+  name: Internal Google BERT
+  # General
   organization: Google
-  prohibited_uses: unknown
-  quality_control: The T5 paper documents many analyses/ablations that were considered
-    before arriving at the final architecture/training procedure.
-  size: ''
-  training_emissions: unknown
-  training_hardware: 1,024 TPU v3 chips (Cloud TPU Pods)
-  training_time: unknown
-  type: model
-  url: https://arxiv.org/abs/1910.10683
-- access: closed
-  analysis: unknown
+  description: >
+    Internal Google BERT model used to power Google Search products.
   created_date:
-    explanation: "The date of the Google product update blog announcing that BERT\
-      \ models were for ranking and featured snippets in Search.\n"
     value: 2019-11-25
+    explanation: >
+      The date of the Google product update blog announcing that BERT models
+      were for ranking and featured snippets in Search.
+  url: https://blog.google/products/search/search-language-understanding-bert/
+  model_card: unknown
+  modality: text
+  size: unknown
+  analysis: unknown
+  # Construction
   dependencies: [Internal Google BERT dataset]
-  description: "Internal Google BERT model used to power Google Search products.\n"
-  feedback: unknown
-  intended_uses:
-    explanation: "We don't have an exhaustive list of the intended use cases for\
-      \ the Internal Google BERT model, but we know that Google Search was powered\
-      \ by a fine-tuned BERT.\n"
-    value: unknown
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: unknown
+  # Downstream
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality: {}
-  model_card: unknown
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses:
+    value: unknown
+    explanation: >
+      We don't have an exhaustive list of the intended use cases for the
+      Internal Google BERT model, but we know that Google Search was powered
+      by a fine-tuned BERT.
+  prohibited_uses: unknown
   monitoring: unknown
-  name: Internal Google BERT
+  feedback: unknown
+
+- type: application
+  name: Google Search
+  # General
   organization: Google
-  prohibited_uses: unknown
-  quality_control: unknown
-  size: ''
-  training_emissions: unknown
-  training_hardware: unknown
-  training_time: unknown
-  type: model
-  url: https://blog.google/products/search/search-language-understanding-bert/
-- access: open
-  adaptation: unknown
+  description: >
+    Google Search is Google's search engine.
   created_date:
-    explanation: "The date of the Google product update blog announcing that BERT\
-      \ models were for ranking and featured snippets in Search.\n"
     value: 2019-11-25
+    explanation: >
+      The date of the Google product update blog announcing that BERT models
+      were for ranking and featured snippets in Search.
+  url: https://blog.google/products/search/search-language-understanding-bert/
+  # Construction
   dependencies: [Internal Google BERT, MUM]
-  description: "Google Search is Google's search engine.\n"
-  failures: unknown
-  feedback: "Feedback can be sent to Google Feedback using the product interface\
-    \ [[Google Feedback]](https://www.google.com/tools/feedback).\n"
-  intended_uses: Searching the web using text, voice or image
-  license:
-    explanation: No license was found.
-    value: none
-  monitoring: "It is implied that Google scan uses of its products for spam, malware\
-    \ and illegal content in the [[Term of Service]](https://policies.google.com/terms).\n"
-  monthly_active_users: unknown
-  name: Google Search
-  organization: Google
+  adaptation: unknown
   output_space: web page ranking
-  prohibited_uses: "Prohibited use cases aren't specifically spelled out for Google\
-    \ search, but several illegal and discouraged use cases are shared in the Respect\
-    \ Others section of the [[Term of Service]](https://policies.google.com/terms).\n"
   quality_control: unknown
+  # Downstream
+  access: open
+  license:
+    value: none
+    explanation: No license was found.
   terms_of_service: https://policies.google.com/terms
-  type: application
-  url: https://blog.google/products/search/search-language-understanding-bert/
+  intended_uses: Searching the web using text, voice or image
+  prohibited_uses: >
+    Prohibited use cases aren't specifically spelled out for Google search, but
+    several illegal and discouraged use cases are shared in the Respect Others
+    section of the [[Term of Service]](https://policies.google.com/terms).
+  monitoring: >
+    It is implied that Google scan uses of its products for spam,
+    malware and illegal content in the
+    [[Term of Service]](https://policies.google.com/terms).
+  feedback: >
+    Feedback can be sent to Google Feedback using the product interface
+    [[Google Feedback]](https://www.google.com/tools/feedback).
+  # Deployment
+  monthly_active_users: unknown
   user_distribution: unknown
-- access: closed
-  analysis: unknown
+  failures: unknown
+
+- type: dataset
+  name: Infiniset
+  # General
+  organization: Google
+  description: >
+    Infiniset "is a combination of dialog data from public dialog data and
+    other public web documents"
+    [[Appendix E]](https://arxiv.org/pdf/2201.08239.pdf#appendix.E).
   created_date:
-    explanation: "The date of the Google company news blog announcing LaMDA [[Google\
-      \ News Blog]](https://blog.google/technology/ai/lamda/).\n"
     value: 2021-06-18
+    explanation: >
+      The date of the Google company news blog announcing LaMDA
+      [[Google News Blog]](https://blog.google/technology/ai/lamda/).
+  url: https://arxiv.org/pdf/2201.08239.pdf
   datasheet: none
-  dependencies: []
-  description: "Infiniset \"is a combination of dialog data from public dialog data\
-    \ and other public web documents\" [[Appendix E]](https://arxiv.org/pdf/2201.08239.pdf#appendix.E).\n"
-  excluded: unknown
-  feedback:
-    explanation: "Author contact information was not provided.\n"
-    value: none
-  included: "Included in the dataset are data from \"public forums (0%); C4 data\
-    \ (12.5% ); code documents from sites related to programming like Q&A sites\
-    \ tutorials, etc (12.5%); Wikipedia (English) (12.5%); English web documents\
-    \ (6.25%); and Non-English web documents (6.25%).\"\n"
-  intended_uses:
-    explanation: "Intended uses of the dataset wasn't explicitly linked, but it\
-      \ is likely intended for training language models specialized in dialogue.\n"
+  modality: text and code
+  size:
     value: unknown
+    explanation: >
+      The size of the dataset is unclear, but it is reported that the dataset
+      "consists of 2.97B documents and 1.12B dialogs with 13.39B utterances"
+      [[Appendix E]](https://arxiv.org/pdf/2201.08239.pdf#appendix.E).
+  sample: []
+  analysis: unknown
+  # Construction
+  dependencies: []
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: text and code
-    value: code, text
-  monitoring: unknown
-  name: Infiniset
-  organization: Google
-  prohibited_uses: "The prohibited uses for Infiniset weren't specifically listed,\
-    \ but the Google AI principles inspired safety objectives in [[Appendix A.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.A.1)\
-    \ advises avoiding harm, unjust impact and misinformation, among others.\n"
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  included: >
+    Included in the dataset are data from "public forums (0%); C4 data (12.5% );
+    code documents from sites related to programming like Q&A sites tutorials,
+    etc (12.5%); Wikipedia (English) (12.5%); English web documents (6.25%);
+    and Non-English web documents (6.25%)."
+  excluded: unknown
   quality_control: unknown
-  sample: []
-  size:
-    explanation: "The size of the dataset is unclear, but it is reported that the\
-      \ dataset \"consists of 2.97B documents and 1.12B dialogs with 13.39B utterances\"\
-      \ [[Appendix E]](https://arxiv.org/pdf/2201.08239.pdf#appendix.E).\n"
+  # Downstream
+  access: closed
+  intended_uses:
     value: unknown
-  type: dataset
-  url: https://arxiv.org/pdf/2201.08239.pdf
-- access: closed
-  analysis: "The model performance was analyzed on sensibleness, specificity and\
-    \ interestingness. The model was also analyzed on safety, following metrics\
-    \ derived from Google AI Principles [[Appendix A.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.A.1).\
-    \ Finally, the model was analyzed on groundedness, testing its ability to produce\
-    \ responses that can be associated with \"known sources whenever possible [[Section\
-    \ 4.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.4.1).\"\n"
-  created_date:
-    explanation: "The date of the Google company news blog announcing LaMDA [[Google\
-      \ News Blog]](https://blog.google/technology/ai/lamda/).\n"
-    value: 2021-06-18
-  dependencies: [Infiniset]
-  description: "LaMDA stands for Language Models for Dialog Application. It is a\
-    \ transformer based language model trained on dialogue data.\n"
+    explanation: >
+      Intended uses of the dataset wasn't explicitly linked, but it is likely
+      intended for training language models specialized in dialogue.
+  prohibited_uses: >
+    The prohibited uses for Infiniset weren't specifically listed, but the
+    Google AI principles inspired safety objectives in
+    [[Appendix A.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.A.1)
+    advises avoiding harm, unjust impact and misinformation, among others.
+  monitoring: unknown
   feedback:
-    explanation: "Author contact information was not provided.\n"
     value: none
-  intended_uses: "LaMDA is a language model, so it can be used for regular langauge\
-    \ modelling tasks without fine-tuning, but its fine-tuned for dialogue tasks.\n"
-  license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
-    value: unknown
-  modality:
-    explanation: text
-    value: text; text
-  model_card: none
-  monitoring: unknown
+    explanation: >
+      Author contact information was not provided.
+
+- type: model
   name: LaMDA
+  # General
   organization: Google
-  prohibited_uses: "The prohibited uses of LaMDA weren't specifically listed, but\
-    \ the Google AI principles inspired safety objectives in [[Appendix A.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.A.1)\
-    \ advises avoiding harm, unjust impact and misinformation, among others.\n"
-  quality_control: "LaMDA was fine-tuned to predict sensibleness, specificity and\
-    \ interestingness as well as safety. Then, the candidates were filtered out\
-    \ if the model safety predictions were below a certain threshold. The next candidates\
-    \ in the conversation were selected as a combination of these predictions. The\
-    \ model was also fine-tuned for groundedness. The results are shown in [[Figure\
-    \ 5]](https://arxiv.org/pdf/2201.08239.pdf#figure.caption.23).\n"
-  size: 137B parameters (dense)
+  description: >
+    LaMDA stands for Language Models for Dialog Application. It is a transformer
+    based language model trained on dialogue data.
+  created_date:
+    value: 2021-06-18
+    explanation: >
+      The date of the Google company news blog announcing LaMDA
+      [[Google News Blog]](https://blog.google/technology/ai/lamda/).
+  url: https://arxiv.org/pdf/2201.08239.pdf
+  model_card: none
+  modality: text
+  size:
+    value: 137B parameters (dense model)
+    explanation: >
+      Along with the 137B model, the authors also trained 2B and 8B LaMDA
+      models.
+  analysis: >
+    The model performance was analyzed on sensibleness, specificity and
+    interestingness.
+    The model was also analyzed on safety, following
+    metrics derived from Google AI Principles
+    [[Appendix A.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.A.1).
+    Finally, the model was analyzed on groundedness, testing its ability to
+    produce responses that can be associated with "known sources whenever
+    possible
+    [[Section 4.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.4.1)."
+  # Construction
+  dependencies: [Infiniset]
   training_emissions:
-    explanation: "\"...total carbon footprint of LaMDA’s pre-training of the largest\
-      \ model is approximately 25.2 tCO2e. The carbon footprint of pre-training\
-      \ of smaller models and fine-tuning of all models is approximately 0.7 tCO2e\
-      \ ... which brings the total footprint of LaMDA to approximately 26 tCO2e\
-      \ [[Section 10]](https://arxiv.org/pdf/2201.08239.pdf#section.10)\"\n"
     value: 26 tCO2e
-  training_hardware:
-    explanation: "Reported in [[Section 10]](https://arxiv.org/pdf/2201.08239.pdf#section.10).\n"
-    value: 1024 TPU-V3 chips
+    explanation: >
+      "...total carbon footprint of LaMDA’s pre-training of the largest model
+      is approximately 25.2 tCO2e. The carbon footprint of pre-training of
+      smaller models and fine-tuning of all models is approximately 0.7 tCO2e
+      ... which brings the total footprint of LaMDA to approximately 26
+      tCO2e
+      [[Section 10]](https://arxiv.org/pdf/2201.08239.pdf#section.10)"
   training_time:
-    explanation: "The total number of training flops of LaMDA was reported as 3.55E+23\
-      \ (3.55E+8 petaflops) [[Section 10]](https://arxiv.org/pdf/2201.08239.pdf#section.10),\
-      \ which is equal to 4108.80 = 3.55E+8 / (60 * 60 * 24) petaflop/s-day.\n"
     value: 4108.80 petaflop/s-day
-  type: model
-  url: https://arxiv.org/pdf/2201.08239.pdf
-- access: closed
-  analysis: unknown
-  created_date:
-    explanation: "The date of the Google AI blog announcing the details of PaLM\
-      \ [[Google AI Blog]](https://ai.googleblog.com/2022/04/pathways-language-model-palm-scaling-to.html).\n"
-    value: 2022-04-04
-  datasheet: https://arxiv.org/pdf/2204.02311.pdf#appendix.D
-  dependencies: [Infiniset]
-  description: "PaLM dataset \"was created for pre-training language models\" [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D).\n"
-  excluded: "GitHub repositories with copyleft licenses were excluded. Programming\
-    \ languageges other than the most common 24 were excluded [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3).\n"
-  feedback:
-    explanation: "Author contact information is shared in the paper [[Paper]](https://arxiv.org/pdf/2204.02311.pdf).\n"
-    value: Contact the authors.
-  included: "The dataset is based on Infiniset. It included multilingual text containing\
-    \ text from over 100 languages. The breakdown of the data included is as follows:\
-    \ Social media conversations (multilingual) 50, Filtered webpages (multilingual)\
-    \ 27%, BooksCorpus (English) 13%, GitHub (code) 5%, Wikipedia (multilingual)\
-    \ 4%, and News (English) 1%. Code was collected from GitHub repositories with\
-    \ appropriate licenses, totalling 96GB of source code [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3).\n"
-  intended_uses:
-    explanation: "As stated in [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D).\n"
-    value: "\"The dataset was created for pre-training language models by a team\
-      \ of researchers at Google\".\n"
+    explanation: >
+      The total number of training flops of LaMDA was reported as 3.55E+23
+      (3.55E+8 petaflops)
+      [[Section 10]](https://arxiv.org/pdf/2201.08239.pdf#section.10), which is
+      equal to 4108.80 = 3.55E+8 / (60 * 60 * 24) petaflop/s-day.
+  training_hardware:
+    value: 1024 TPU-V3 chips
+    explanation: >
+      Reported in [[Section 10]](https://arxiv.org/pdf/2201.08239.pdf#section.10).
+  quality_control: >
+    LaMDA was fine-tuned to predict sensibleness, specificity and
+    interestingness as well as safety. Then, the candidates were filtered out
+    if the model safety predictions were below a certain threshold. The next
+    candidates in the conversation were selected as a combination of these
+    predictions. The model was also fine-tuned for groundedness. The results
+    are shown in
+    [[Figure 5]](https://arxiv.org/pdf/2201.08239.pdf#figure.caption.23).
+  # Downstream
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: text and code
-    value: code, text
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: >
+    LaMDA is a language model, so it can be used for regular langauge modelling
+    tasks without fine-tuning, but its fine-tuned for dialogue tasks.
+  prohibited_uses: >
+    The prohibited uses of LaMDA weren't specifically listed, but the Google
+    AI principles inspired safety objectives in
+    [[Appendix A.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.A.1)
+    advises avoiding harm, unjust impact and misinformation, among others.
   monitoring: unknown
+  feedback:
+    value: none
+    explanation: >
+      Author contact information was not provided.
+
+- type: dataset
   name: PaLM dataset
+  # General
   organization: Google
-  prohibited_uses: "\"... should not be used for any of the unacceptable language\
-    \ model use cases, e.g., generation of toxic speech\" [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D).\n"
-  quality_control: "In order to reduce low quality web pages, the web pages were\
-    \ sampled according to a \"quality score\" classifier. Code files were de-duplicated\
-    \ using Levenshtein distance [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3).\n"
-  sample: []
+  description: >
+    PaLM dataset "was created for pre-training language models"
+    [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D).
+  created_date:
+    value: 2022-04-04
+    explanation: >
+      The date of the Google AI blog announcing the details of PaLM
+      [[Google AI Blog]](https://ai.googleblog.com/2022/04/pathways-language-model-palm-scaling-to.html).
+  url: https://arxiv.org/pdf/2204.02311.pdf
+  datasheet: https://arxiv.org/pdf/2204.02311.pdf#appendix.D
+  modality: text and code
   size:
-    explanation: "Dataset size in GB is not reported, but the dataset is reported\
-      \ to have 780 billion tokens [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D).\
-      \ The code portion of the dataset is reported to be 5% totaling a 196GB of\
-      \ source code [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3).\
-      \ It is unclear whether the reported size is before or after de-duplication.\
-      \ Nonetheless, one can estimate the dataset size by multiplying 196GB with\
-      \ 20 = 3.92 TB.\n"
     value: 3.92 TB
-  type: dataset
-  url: https://arxiv.org/pdf/2204.02311.pdf
-- access:
-    explanation: Weights can be downloaded from [Github](https://github.com/google-research/t5x/blob/main/docs/models.md)
-    value: open
-  analysis: Evaluated on a variety of standard language datasets.
-  created_date:
-    explanation: Date paper was released
-    value: 2022-10-20
-  dependencies: [T5, Muffin, P3, NaturalInstructions-v2, Flan CoT]
-  description: Flan-T5 is a version of the T5 language model fine-tuned on instruction
-    data
-  feedback: https://huggingface.co/google/flan-t5-xxl/discussions
-  intended_uses: unknown
+    explanation: >
+      Dataset size in GB is not reported, but the dataset is reported to have
+      780 billion tokens
+      [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D).
+      The code portion of the dataset is reported to be 5% totaling a 196GB
+      of source code
+      [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3).
+      It is unclear whether the reported size is before or after de-duplication.
+      Nonetheless, one can estimate the dataset size by multiplying 196GB with
+      20 = 3.92 TB.
+  sample: []
+  analysis: unknown
+  # Construction
+  dependencies: [Infiniset]
   license:
-    explanation: License on the [[Github repository]](https://github.com/google-research/google-research)
-    value: Apache 2.0
-  modality:
-    explanation: text (English)
-    value: text; text
-  model_card: https://arxiv.org/pdf/2210.11416.pdf
-  monitoring: none
+    value: unknown
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  included: >
+    The dataset is based on Infiniset. It included multilingual text containing
+    text from over 100 languages. The breakdown of the data included is as
+    follows: Social media conversations (multilingual) 50, Filtered webpages
+    (multilingual) 27%, BooksCorpus (English) 13%, GitHub (code) 5%, Wikipedia
+    (multilingual) 4%, and News (English) 1%.
+    Code was collected from GitHub repositories with appropriate licenses,
+    totalling 96GB of source code
+    [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3).
+  excluded: >
+    GitHub repositories with copyleft licenses were excluded. Programming
+    languageges other than the most common 24 were excluded
+    [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3).
+  quality_control: >
+    In order to reduce low quality web pages, the web pages were sampled
+    according to a "quality score" classifier.
+    Code files were de-duplicated using Levenshtein distance
+    [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3).
+  # Downstream
+  access: closed
+  intended_uses:
+    value: >
+      "The dataset was created for pre-training language models by a team of
+      researchers at Google".
+    explanation: >
+      As stated in
+      [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D).
+  prohibited_uses: >
+    "... should not be used for any of the unacceptable language model use
+    cases, e.g., generation of toxic speech"
+    [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D).
+  monitoring: unknown
+  feedback:
+    value: Contact the authors.
+    explanation: >
+      Author contact information is shared in the paper
+      [[Paper]](https://arxiv.org/pdf/2204.02311.pdf).
+
+- type: model
   name: Flan-T5
   organization: Google
-  prohibited_uses: none
+  description: Flan-T5 is a version of the T5 language model fine-tuned on instruction
+    data
+  created_date:
+    value: 2022-10-20
+    explanation: Date paper was released
+  url: https://arxiv.org/abs/2210.11416
+  model_card: https://arxiv.org/pdf/2210.11416.pdf
+  modality: text (English)
+  analysis: Evaluated on a variety of standard language datasets.
+  size: 11B parameters (dense model)
+  dependencies: [T5, Muffin, P3, NaturalInstructions-v2, Flan CoT]
+  training_emissions: Unknown
+  training_time: Unknown
+  training_hardware: 512 v4 TPU Chips
   quality_control: Across different multitask datasets, templates and formatting
     were maintained. For the chain-of-thoughts (CoT) data, specific exemplars were
     used.
-  size: 11B parameters (dense)
-  training_emissions: Unknown
-  training_hardware: 512 v4 TPU Chips
-  training_time: Unknown
-  type: model
-  url: https://arxiv.org/abs/2210.11416
-- access:
-    explanation: Model weights available for download in the [[Github repo]](https://github.com/google-research/google-research/tree/master/ul2)
+  access:
     value: open
-  analysis: ''
+    explanation: Weights can be downloaded from [Github](https://github.com/google-research/t5x/blob/main/docs/models.md)
+  license:
+    value: Apache 2.0
+    explanation: License on the [[Github repository]](https://github.com/google-research/google-research)
+  intended_uses: unknown
+  prohibited_uses: none
+  monitoring: none
+  feedback: https://huggingface.co/google/flan-t5-xxl/discussions
+
+- type: model
+  name: UL2
+  organization: Google
+  description: UL2 is a language model trained with a new pretraining objective
   created_date:
-    explanation: Date mdoel paper was released
     value: 2022-05-10
+    explanation: Date mdoel paper was released
+  url: https://arxiv.org/abs/2205.05131
+  model_card: ''
+  modality: text (English)
+  analysis: ''
+  size: 20B parameters (dense model)
   dependencies: [C4]
-  description: UL2 is a language model trained with a new pretraining objective
-  feedback: ''
-  intended_uses: ''
+  training_emissions: ''
+  training_time: ''
+  training_hardware: 128 TPUv4
+  quality_control: ''
+  access:
+    value: open
+    explanation: Model weights available for download in the [[Github repo]](https://github.com/google-research/google-research/tree/master/ul2)
   license:
-    explanation: 20B checkpoints only for three different iteration steps
     value: Apache 2.0
-  modality:
-    explanation: text (English)
-    value: text; text
-  model_card: ''
+    explanation: 20B checkpoints only for three different iteration steps
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: UL2
+  feedback: ''
+
+- type: model
+  name: Parti
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  size: 20B parameters (dense)
-  training_emissions: ''
-  training_hardware: 128 TPUv4
-  training_time: ''
-  type: model
-  url: https://arxiv.org/abs/2205.05131
-- access:
-    explanation: Google does not provide access to Parti for external researchers.
-    value: closed
-  analysis: ''
+  description: Parti is a text-to-image diffusion model
   created_date:
-    explanation: Date the model website was made public
     value: 2022-06-22
+    explanation: Date the model website was made public
+  url: https://parti.research.google/
+  model_card: ''
+  modality: Text (English) and Image
+  size: 20B parameters
+  analysis: ''
   dependencies: [C4, LAION-400M, FIT400M, JFT-4B]
-  description: Parti is a text-to-image diffusion model
-  feedback: ''
-  intended_uses: ''
+  training_emissions: ''
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: ''
+  access:
+    value: closed
+    explanation: Google does not provide access to Parti for external researchers.
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: Text (English) and Image
-    value: image, text; image, text
-  model_card: ''
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: Parti
+  feedback: ''
+
+- type: model
+  name: Imagen
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  size: 20B parameters (dense)
-  training_emissions: ''
-  training_hardware: unknown
-  training_time: unknown
-  type: model
-  url: https://parti.research.google/
-- access: open
-  analysis: ''
+  description: Imagen is a text-to-image diffusion model
   created_date:
-    explanation: Date the model website was made public
     value: 2022-05-23
+    explanation: Date the model website was made public
+  url: https://imagen.research.google/
+  model_card: ''
+  modality: text (English) and image
+  size: >
+    14B parameters total. 2B parameters (U-Net model), 11B parameters (T5-XXL),
+    and 600M and 400M parameter models for super resolution
+  analysis: ''
   dependencies: [LAION-400M, Google internal image-text dataset]
-  description: Imagen is a text-to-image diffusion model
-  feedback: ''
-  intended_uses: ''
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: 128 TPU-v4
+  quality_control: ''
+  access: open
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: text (English) and image
-    value: image, text; image, text
-  model_card: ''
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: Imagen
+  feedback: ''
+
+- type: model
+  name: VATT
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  size: 14B parameters (dense)
-  training_emissions: unknown
-  training_hardware: 128 TPU-v4
-  training_time: unknown
-  type: model
-  url: https://imagen.research.google/
-- access:
-    explanation: Model checkpoints can be downloaded from the [[Github repository]](https://github.com/google-research/google-research/tree/master/vatt)
-    value: open
-  analysis: ''
+  description: VATT is a family of models trained on multimodal data
   created_date:
-    explanation: Date the model paper was made public
     value: 2022-04-22
+    explanation: Date the model paper was made public
+  url: https://arxiv.org/abs/2104.11178
+  model_card: ''
+  modality: Text (English), Video, and Audio
+  size: 155M
+  analysis: ''
   dependencies: [AudioSet, HowTo100M]
-  description: VATT is a family of models trained on multimodal data
-  feedback: ''
-  intended_uses: ''
+  training_emissions: unknown
+  training_time: 3 days
+  training_hardware: 256 TPU-v3
+  quality_control: ''
+  access:
+    value: open
+    explanation: Model checkpoints can be downloaded from the [[Github repository]](https://github.com/google-research/google-research/tree/master/vatt)
   license:
-    explanation: License on the [[Github repository]](https://github.com/google-research/google-research)
     value: Apache 2.0
-  modality:
-    explanation: Text (English), Video, and Audio
-    value: audio, text, video; audio, text, video
-  model_card: ''
+    explanation: License on the [[Github repository]](https://github.com/google-research/google-research)
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: VATT
+  feedback: ''
+
+- type: model
+  name: PaLM
+  # General
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  size: 155M parameters (dense)
-  training_emissions: unknown
-  training_hardware: 256 TPU-v3
-  training_time: 3 days
-  type: model
-  url: https://arxiv.org/abs/2104.11178
-- access:
-    explanation: Made available through the PaLM API as of March 14, 2023.
-    value: limited
-  analysis: "\"PaLM is evaluated on English Natural Language Processing (NLP) tasks,\
-    \ tasks from BIG-bench, reasoning tasks, code completion tasks, multilingual\
-    \ generation and question answering tasks, translation tasks, and bias and toxicity\
-    \ benchmarks\" [[Model Card]](https://arxiv.org/pdf/2204.02311.pdf#appendix.E).\n"
+  description: >
+    PaLM stands Pathways Language Model, "dense decoder-only Transformer model
+    trained with the Pathways system"
+    [[Google ai Blog]](https://ai.googleblog.com/2022/04/pathways-language-model-palm-scaling-to.html).
   created_date:
-    explanation: "The date of the Google AI blog announcing the details of PaLM\
-      \ [[Google AI Blog]](https://ai.googleblog.com/2022/04/pathways-language-model-palm-scaling-to.html).\n"
     value: 2022-04-04
-  dependencies: [PaLM dataset]
-  description: "PaLM stands Pathways Language Model, \"dense decoder-only Transformer\
-    \ model trained with the Pathways system\" [[Google ai Blog]](https://ai.googleblog.com/2022/04/pathways-language-model-palm-scaling-to.html).\n"
-  feedback:
-    explanation: "Author contact information is shared in the paper [[Paper]](https://arxiv.org/pdf/2204.02311.pdf).\n"
-    value: Contact the authors.
-  intended_uses: "\"The primary use is research on language models, including: research\
-    \ on NLP applications like machine translation and question answering, advancing\
-    \ fairness and safety research, and understanding limitations of current LLMs.\
-    \ Within Google, PaLM is being used for research on a variety of open- ended\
-    \ text and code generation tasks, including reasoning [[Section 6.3]](https://arxiv.org/pdf/2204.02311.pdf#subsection.6.3)\
-    \ and code synthesis and understanding [[Section 6.4]](https://arxiv.org/pdf/2204.02311.pdf#subsection.6.4)\"\
-    \ [[Model Card]](https://arxiv.org/pdf/2204.02311.pdf#appendix.E).\n"
-  license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
-    value: unknown
-  modality:
-    explanation: text and code
-    value: code, text; code, text
+    explanation: >
+      The date of the Google AI blog announcing the details of PaLM
+      [[Google AI Blog]](https://ai.googleblog.com/2022/04/pathways-language-model-palm-scaling-to.html).
+  url: https://arxiv.org/pdf/2204.02311.pdf
   model_card: https://arxiv.org/pdf/2204.02311.pdf#appendix.E
-  monitoring: unknown
-  name: PaLM
-  organization: Google
-  prohibited_uses: "The model \"should not be used for downstream applications without\
-    \ further analysis on factors in the proposed downstream application [[Model\
-    \ Card]](https://arxiv.org/pdf/2204.02311.pdf#appendix.E)\"\n"
-  quality_control: Unknown
-  size: 540B parameters (dense)
+  modality: text and code
+  size: 540B parameters (dense model)
+  analysis: >
+    "PaLM is evaluated on English Natural Language Processing (NLP) tasks, tasks
+    from BIG-bench, reasoning tasks, code completion tasks, multilingual
+    generation and question answering tasks, translation tasks, and bias and
+    toxicity benchmarks"
+    [[Model Card]](https://arxiv.org/pdf/2204.02311.pdf#appendix.E).
+  # Construction
+  dependencies: [PaLM dataset]
   training_emissions:
-    explanation: "Reported in [[Appendix B]](https://arxiv.org/pdf/2204.02311.pdf#appendix.A)\n"
     value: 271.43 tCO2
-  training_hardware:
-    explanation: "Reported in [[Section 4]](https://arxiv.org/pdf/2204.02311.pdf#section.4).\n"
-    value: 6144 TPU v4 chips
+    explanation: >
+      Reported in
+      [[Appendix B]](https://arxiv.org/pdf/2204.02311.pdf#appendix.A)
   training_time:
-    explanation: "Reported in [[Appendix B]](https://arxiv.org/pdf/2204.02311.pdf#appendix.A)\n"
     value: 29600 petaflop/s-days
-  type: model
-  url: https://arxiv.org/pdf/2204.02311.pdf
-- access: limited
-  adaptation: ''
-  created_date: 2023-03-14
-  dependencies: [PaLM]
-  description: a new developer offering that makes it easy and safe to experiment
-    with Google’s language models.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
-  license: unknown
-  monitoring: ''
-  monthly_active_users: ''
+    explanation: >
+      Reported in
+      [[Appendix B]](https://arxiv.org/pdf/2204.02311.pdf#appendix.A)
+  training_hardware:
+    value: 6144 TPU v4 chips
+    explanation: >
+      Reported in [[Section 4]](https://arxiv.org/pdf/2204.02311.pdf#section.4).
+  quality_control: Unknown
+  # Downstream
+  access:
+    value: limited
+    explanation: Made available through the PaLM API as of March 14, 2023.
+  license:
+    value: unknown
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: >
+    "The primary use is research on language models, including: research
+    on NLP applications like machine translation and question answering,
+    advancing fairness and safety research, and understanding limitations of
+    current LLMs.
+    Within Google, PaLM is being used for research on a variety of open-
+    ended text and code generation tasks, including reasoning
+    [[Section 6.3]](https://arxiv.org/pdf/2204.02311.pdf#subsection.6.3)
+    and code synthesis and understanding
+    [[Section 6.4]](https://arxiv.org/pdf/2204.02311.pdf#subsection.6.4)"
+    [[Model Card]](https://arxiv.org/pdf/2204.02311.pdf#appendix.E).
+  prohibited_uses: >
+    The model "should not be used for downstream applications without further
+    analysis on factors in the proposed downstream application
+    [[Model Card]](https://arxiv.org/pdf/2204.02311.pdf#appendix.E)"
+  monitoring: unknown
+  feedback:
+    value: Contact the authors.
+    explanation: >
+      Author contact information is shared in the paper
+      [[Paper]](https://arxiv.org/pdf/2204.02311.pdf).
+
+- type: application
   name: PaLM API
   organization: Google
+  description: a new developer offering that makes it easy and safe to experiment
+    with Google’s language models.
+  created_date: 2023-03-14
+  url: https://developers.googleblog.com/2023/03/announcing-palm-api-and-makersuite.html
+  dependencies: [PaLM]
+  adaptation: ''
   output_space: ''
-  prohibited_uses: ''
   quality_control: ''
+  access: limited
+  license: unknown
   terms_of_service: ''
-  type: application
-  url: https://developers.googleblog.com/2023/03/announcing-palm-api-and-makersuite.html
-  user_distribution: ''
-- access: closed
-  analysis: ''
-  created_date: 2022-12-26
-  dependencies: [Flan-PaLM, MultiMedQA]
-  description: ''
-  feedback: ''
   intended_uses: ''
-  license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
-    value: unknown
-  modality:
-    explanation: text
-    value: text; text
-  model_card: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
+  user_distribution: ''
+  failures: ''
+
+- type: model
   name: Med-PaLM
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  size: 540B parameters (dense)
+  description: ''
+  created_date: 2022-12-26
+  url: https://arxiv.org/abs/2212.13138
+  model_card: ''
+  modality: text
+  analysis: ''
+  size: 540B
+  dependencies: [Flan-PaLM, MultiMedQA]
   training_emissions: ''
-  training_hardware: ''
   training_time: ''
-  type: model
-  url: https://arxiv.org/abs/2212.13138
-- access: closed
-  analysis: Evaluated on MultiMedBench tasks and radiologist evaluations of model-generated
-    chest X-ray reports
-  created_date: 2023-07-26
-  dependencies: [PaLM-E, MultiMedBench]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: text, image, and genomics input with text output
-    value: image, text; text
-  model_card: ''
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: Med-PaLM Multimodal
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
+  description: ''
+  created_date: 2023-07-26
+  url: https://arxiv.org/pdf/2307.14334.pdf
+  model_card: ''
+  modality: text, image, and genomics input with text output
+  analysis: Evaluated on MultiMedBench tasks and radiologist evaluations of model-generated
+    chest X-ray reports
   size: 562B parameters (dense)
+  dependencies: [PaLM-E, MultiMedBench]
   training_emissions: ''
-  training_hardware: ''
   training_time: ''
-  type: model
-  url: https://arxiv.org/pdf/2307.14334.pdf
-- access: closed
-  analysis: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
+  license:
+    value: unknown
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+
+- type: model
+  name: MultiMedQA
+  organization: Google
+  description: ''
   created_date: 2022-12-26
+  url: https://arxiv.org/abs/2212.13138
+  model_card: ''
+  modality: text
+  analysis: ''
+  size: unknown
   dependencies:
     - MedQA
     - MedMCQA
@@ -708,149 +814,149 @@
     - LiveQA
     - Medication QA
     - HealthSearchQA
-  description: ''
-  feedback: ''
-  intended_uses: ''
-  license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
-    value: unknown
-  modality:
-    explanation: text
-    value: text; text
-  model_card: ''
-  monitoring: ''
-  name: MultiMedQA
-  organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  size: unknown
   training_emissions: ''
-  training_hardware: ''
   training_time: ''
-  type: model
-  url: https://arxiv.org/abs/2212.13138
-- access: closed
-  analysis: ''
-  created_date: 2022-10-20
-  dependencies: [PaLM, Muffin, P3, NaturalInstructions-v2]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: text
-    value: text; text
-  model_card: ''
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: Flan-PaLM
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  size: 540B parameters (dense)
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
+  description: ''
+  created_date: 2022-10-20
   url: https://arxiv.org/abs/2210.11416
-- access: closed
+  model_card: ''
+  modality: text
   analysis: ''
-  created_date: 2022-10-20
-  dependencies: [U-PaLM, Muffin, P3, NaturalInstructions-v2]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  size: 540B
+  dependencies: [PaLM, Muffin, P3, NaturalInstructions-v2]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: text
-    value: text; text
-  model_card: ''
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: Flan-U-PaLM
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  size: 540B parameters (dense)
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
+  description: ''
+  created_date: 2022-10-20
   url: https://arxiv.org/abs/2210.11416
-- access: open
+  model_card: ''
+  modality: text
   analysis: ''
-  created_date: 2021-09-03
-  datasheet: ''
-  dependencies: []
-  description: ''
-  excluded: ''
-  feedback: ''
-  included: ''
+  size: 540B
+  dependencies: [U-PaLM, Muffin, P3, NaturalInstructions-v2]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
+  license:
+    value: unknown
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
   intended_uses: ''
-  license: Apache 2.0
-  modality: text
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: dataset
   name: Muffin
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: 62 tasks
-  type: dataset
-  url: https://arxiv.org/abs/2109.01652
-- access: closed
-  analysis: ''
-  created_date: 2022-10-20
-  dependencies: [PaLM, PaLM dataset]
   description: ''
-  feedback: ''
+  created_date: 2021-09-03
+  url: https://arxiv.org/abs/2109.01652
+  datasheet: ''
+  modality: text
+  size: 62 tasks
+  sample: []
+  analysis: ''
+  dependencies: []
+  included: ''
+  excluded: ''
+  quality_control: ''
+  access: open
+  license: Apache 2.0
   intended_uses: ''
-  license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
-    value: unknown
-  modality:
-    explanation: text
-    value: text; text
-  model_card: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: U-PaLM
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  size: 540B parameters (dense)
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
+  description: ''
+  created_date: 2022-10-20
   url: https://arxiv.org/abs/2210.11399
-- access: closed
+  model_card: ''
+  modality: text
   analysis: ''
-  created_date: 2022-08-16
-  dependencies: [PaLM]
-  description: ''
-  feedback: ''
+  size: 540B
+  dependencies: [PaLM, PaLM dataset]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
+  license:
+    value: unknown
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
   intended_uses: ''
-  license: unknown (model weights), Apache 2.0 (SayCan code)
-  modality:
-    explanation: text, robotic control
-    value: text; text
-  model_card: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: PaLM-SayCan
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  size: 540B parameters (dense)
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
+  description: ''
+  created_date: 2022-08-16
   url: https://arxiv.org/abs/2204.01691
-- access: closed
+  model_card: ''
+  modality: text, robotic control
   analysis: ''
+  size: 540B
+  dependencies: [PaLM]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
+  license: unknown (model weights), Apache 2.0 (SayCan code)
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+
+- type: model
+  name: GLaM
+  organization: Google
+  description: ''
   created_date: 2021-12-13
+  url: https://arxiv.org/abs/2112.06905
+  model_card: ''
+  modality: text
+  analysis: ''
+  size: 1.2T parameters (sparse)
   dependencies:
     - GLaM Web dataset
     - Wikipedia
@@ -858,740 +964,750 @@
     - GLaM Forums dataset
     - BooksCorpus
     - GLaM News dataset
-  description: ''
-  feedback: ''
-  intended_uses: ''
-  license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
-    value: unknown
-  modality:
-    explanation: text
-    value: text; text
-  model_card: ''
-  monitoring: ''
-  name: GLaM
-  organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  size: 1.2T parameters (sparse)
   training_emissions: ''
-  training_hardware: ''
   training_time: ''
-  type: model
-  url: https://arxiv.org/abs/2112.06905
-- access: closed
-  analysis: ''
-  created_date: 2021-12-13
-  datasheet: ''
-  dependencies: []
-  description: ''
-  excluded: ''
-  feedback: ''
-  included: ''
-  intended_uses: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality: text
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: dataset
   name: GLaM Web dataset
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: unknown
-  type: dataset
-  url: https://arxiv.org/abs/2112.06905
-- access: closed
-  analysis: ''
+  description: ''
   created_date: 2021-12-13
+  url: https://arxiv.org/abs/2112.06905
   datasheet: ''
+  modality: text
+  size: unknown
+  sample: []
+  analysis: ''
   dependencies: []
-  description: ''
-  excluded: ''
-  feedback: ''
   included: ''
-  intended_uses: ''
+  excluded: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality: text
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: dataset
   name: GLaM Conversations dataset
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: unknown
-  type: dataset
-  url: https://arxiv.org/abs/2112.06905
-- access: closed
-  analysis: ''
+  description: ''
   created_date: 2021-12-13
+  url: https://arxiv.org/abs/2112.06905
   datasheet: ''
+  modality: text
+  size: unknown
+  sample: []
+  analysis: ''
   dependencies: []
-  description: ''
-  excluded: ''
-  feedback: ''
   included: ''
-  intended_uses: ''
+  excluded: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality: text
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: dataset
   name: GLaM Forums dataset
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: unknown
-  type: dataset
-  url: https://arxiv.org/abs/2112.06905
-- access: closed
-  analysis: ''
+  description: ''
   created_date: 2021-12-13
+  url: https://arxiv.org/abs/2112.06905
   datasheet: ''
+  modality: text
+  size: unknown
+  sample: []
+  analysis: ''
   dependencies: []
-  description: ''
-  excluded: ''
-  feedback: ''
   included: ''
-  intended_uses: ''
+  excluded: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality: text
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: dataset
   name: GLaM News dataset
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: unknown
-  type: dataset
+  description: ''
+  created_date: 2021-12-13
   url: https://arxiv.org/abs/2112.06905
-- access: closed
+  datasheet: ''
+  modality: text
+  size: unknown
+  sample: []
   analysis: ''
-  created_date: 2021-05-18
-  dependencies: [MUM dataset]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  dependencies: []
+  included: ''
+  excluded: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: text, image
-    value: image, text; image, text
-  model_card: ''
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: MUM
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
+  description: ''
+  created_date: 2021-05-18
+  url: https://blog.google/products/search/introducing-mum/
+  model_card: ''
+  modality: text, image
+  analysis: ''
   size: unknown
+  dependencies: [MUM dataset]
   training_emissions: ''
-  training_hardware: ''
   training_time: ''
-  type: model
-  url: https://blog.google/products/search/introducing-mum/
-- access: closed
-  analysis: ''
-  created_date: 2021-05-18
-  datasheet: ''
-  dependencies: []
-  description: ''
-  excluded: ''
-  feedback: ''
-  included: ''
-  intended_uses: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: text, image
-    value: image, text
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: dataset
   name: MUM dataset
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: unknown
-  type: dataset
+  description: ''
+  created_date: 2021-05-18
   url: https://blog.google/products/search/introducing-mum/
-- access: closed
+  datasheet: ''
+  modality: text, image
+  size: unknown
+  sample: []
   analysis: ''
-  created_date: 2023-02-01
-  dependencies: [LAION-400M, Phenaki Video-Text Corpus]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  dependencies: []
+  included: ''
+  excluded: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: text, video
-    value: text, video; text, video
-  model_card: ''
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: Phenaki
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  size: 1.8B parameters (dense)
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
+  description: ''
+  created_date: 2023-02-01
   url: https://openreview.net/pdf?id=vOEXS39nOF
-- access: closed
+  model_card: ''
+  modality: text, video
   analysis: ''
+  size: 1.8B parameters (dense model)
+  dependencies: [LAION-400M, Phenaki Video-Text Corpus]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
+  license:
+    value: unknown
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+
+- type: dataset
+  name: Phenaki Video-Text Corpus
+  organization: Google
+  description: ''
   created_date: 2023-02-01
+  url: https://openreview.net/pdf?id=vOEXS39nOF
   datasheet: ''
-  dependencies: []
-  description: ''
-  excluded: ''
-  feedback: ''
+  modality: text, video
+  size: 15M text-video pairs at 8FPS
+  sample: []
+  analysis: ''
+  dependencies: []
   included: ''
-  intended_uses: ''
+  excluded: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality: text, video
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: Phenaki Video-Text Corpus
+  feedback: ''
+
+- type: model
+  name: Flan-UL2
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: 15M text-video pairs at 8FPS
-  type: dataset
-  url: https://openreview.net/pdf?id=vOEXS39nOF
-- access: open
-  analysis: ''
+  description: ''
   created_date: 2023-03-02
+  url: https://arxiv.org/abs/2205.05131
+  model_card: ''
+  modality: text
+  analysis: ''
+  size: 20B parameters (dense model)
   dependencies: [UL2, Flan Collection]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: open
   license: Apache 2.0
-  modality:
-    explanation: text
-    value: text; text
-  model_card: ''
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: Flan-UL2
+  feedback: ''
+
+- type: dataset
+  name: Flan Collection
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  size: 20B parameters (dense)
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
-  url: https://arxiv.org/abs/2205.05131
-- access: open
-  analysis: ''
+  description: ''
   created_date: 2023-01-31
+  url: https://arxiv.org/abs/2301.13688
   datasheet: ''
+  modality: text
+  size: 1836 tasks
+  sample: []
+  analysis: ''
   dependencies: [Flan dataset, P3, NaturalInstructions-v2]
-  description: ''
-  excluded: ''
-  feedback: ''
   included: ''
-  intended_uses: ''
+  excluded: ''
+  quality_control: ''
+  access: open
   license: Apache 2.0
-  modality: text
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: Flan Collection
+  feedback: ''
+
+- type: model
+  name: MusicLM
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: 1836 tasks
-  type: dataset
-  url: https://arxiv.org/abs/2301.13688
-- access: closed
-  analysis: ''
+  description: ''
   created_date: 2023-01-26
+  url: https://arxiv.org/pdf/2301.11325.pdf
+  model_card: ''
+  modality: audio (music), text
+  analysis: ''
+  size: 1.4B parameters (430M semantic stage, 430M acoustic stage, 600M w2v-BERT)
   dependencies:
     - SoundStream
     - w2v-BERT
     - MuLan
     - MusicLM semantic model
     - MusicLM acoustic model
-  description: ''
-  feedback: ''
-  intended_uses: ''
-  license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
-    value: unknown
-  modality:
-    explanation: audio (music), text
-    value: audio, text; audio, text
-  model_card: ''
-  monitoring: ''
-  name: MusicLM
-  organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  size: 1.4B parameters (dense)
   training_emissions: ''
-  training_hardware: ''
   training_time: ''
-  type: model
-  url: https://arxiv.org/pdf/2301.11325.pdf
-- access: closed
-  analysis: ''
-  created_date: 2023-01-26
-  dependencies: [Free Music Archive]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: audio (music)
-    value: audio; audio
-  model_card: ''
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: SoundStream
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  size: unknown
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
+  description: ''
+  created_date: 2023-01-26
   url: https://arxiv.org/pdf/2301.11325.pdf
-- access: closed
+  model_card: ''
+  modality: audio (music)
   analysis: ''
-  created_date: 2023-01-26
+  size: unknown
   dependencies: [Free Music Archive]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: audio (music)
-    value: audio; audio
-  model_card: ''
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: w2v-BERT
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  size: 600M parameters (dense)
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
+  description: ''
+  created_date: 2023-01-26
   url: https://arxiv.org/pdf/2301.11325.pdf
-- access: closed
+  model_card: ''
+  modality: audio (music)
   analysis: ''
-  created_date: 2022-08-26
-  dependencies: [AST, BERT, MuLan dataset]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  size: 600M parameter (dense model)
+  dependencies: [Free Music Archive]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: audio (music), text
-    value: audio, text; audio, text
-  model_card: ''
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: MuLan
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
+  description: ''
+  created_date: 2022-08-26
+  url: https://arxiv.org/abs/2208.12415
+  model_card: ''
+  modality: audio (music), text
+  analysis: ''
   size: Unknown
+  dependencies: [AST, BERT, MuLan dataset]
   training_emissions: ''
-  training_hardware: ''
   training_time: ''
-  type: model
-  url: https://arxiv.org/abs/2208.12415
-- access: closed
-  analysis: ''
-  created_date: 2022-08-26
-  datasheet: ''
-  dependencies: []
-  description: ''
-  excluded: ''
-  feedback: ''
-  included: ''
-  intended_uses: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: audio (music), text
-    value: audio, text
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: dataset
   name: MuLan dataset
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: 370K hours audio
-  type: dataset
+  description: ''
+  created_date: 2022-08-26
   url: https://arxiv.org/abs/2208.12415
-- access: closed
-  analysis: ''
-  created_date: 2023-01-26
   datasheet: ''
+  modality: audio (music), text
+  size: 370K hours audio
+  sample: []
+  analysis: ''
   dependencies: []
-  description: ''
-  excluded: ''
-  feedback: ''
   included: ''
-  intended_uses: ''
+  excluded: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: audio (music)
-    value: audio
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: dataset
   name: MusicLM dataset
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: 280K hours audio
-  type: dataset
+  description: ''
+  created_date: 2023-01-26
   url: https://arxiv.org/pdf/2301.11325.pdf
-- access: closed
+  datasheet: ''
+  modality: audio (music)
+  size: 280K hours audio
+  sample: []
   analysis: ''
-  created_date: 2023-01-26
-  dependencies: [MusicLM dataset]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  dependencies: []
+  included: ''
+  excluded: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: Audio (music)
-    value: audio; audio
-  model_card: ''
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: MusicLM semantic model
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  size: 430M parameters (dense)
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
+  description: ''
+  created_date: 2023-01-26
   url: https://arxiv.org/pdf/2301.11325.pdf
-- access: closed
+  model_card: ''
+  modality: Audio (music)
   analysis: ''
-  created_date: 2023-01-26
+  size: 430M parameters (dense model)
   dependencies: [MusicLM dataset]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: Audio (music)
-    value: audio; audio
-  model_card: ''
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: MusicLM acoustic model
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  size: 430M parameters (dense)
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
+  description: ''
+  created_date: 2023-01-26
   url: https://arxiv.org/pdf/2301.11325.pdf
-- access: closed
+  model_card: ''
+  modality: Audio (music)
   analysis: ''
-  created_date: 2023-02-08
-  dependencies: [Noise2Music pseudolabel dataset]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  size: 430M parameters (dense model)
+  dependencies: [MusicLM dataset]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: audio (music), text
-    value: audio, text; audio, text
-  model_card: ''
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: Noise2Music
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
+  description: ''
+  created_date: 2023-02-08
+  url: https://google-research.github.io/noise2music/noise2music.pdf
+  model_card: ''
+  modality: audio (music), text
+  analysis: ''
   size: unknown
+  dependencies: [Noise2Music pseudolabel dataset]
   training_emissions: ''
-  training_hardware: ''
   training_time: ''
-  type: model
-  url: https://google-research.github.io/noise2music/noise2music.pdf
-- access: closed
-  analysis: ''
-  created_date: 2023-02-08
-  datasheet: ''
-  dependencies: [LaMDA]
-  description: ''
-  excluded: ''
-  feedback: ''
-  included: ''
-  intended_uses: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality: text
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: dataset
   name: LaMDA-LF
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: 150k songs
-  type: dataset
-  url: https://google-research.github.io/noise2music/noise2music.pdf
-- access: closed
-  analysis: ''
+  description: ''
   created_date: 2023-02-08
+  url: https://google-research.github.io/noise2music/noise2music.pdf
   datasheet: ''
-  dependencies: [MusicCaps]
-  description: ''
-  excluded: ''
-  feedback: ''
+  modality: text
+  size: 150k songs
+  sample: []
+  analysis: ''
+  dependencies: [LaMDA]
   included: ''
-  intended_uses: ''
+  excluded: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality: text
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: dataset
   name: Rater-LF
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: 10k captions
-  type: dataset
-  url: https://google-research.github.io/noise2music/noise2music.pdf
-- access: closed
-  analysis: ''
+  description: ''
   created_date: 2023-02-08
+  url: https://google-research.github.io/noise2music/noise2music.pdf
   datasheet: ''
+  modality: text
+  size: 10k captions
+  sample: []
+  analysis: ''
   dependencies: [MusicCaps]
-  description: ''
-  excluded: ''
-  feedback: ''
   included: ''
-  intended_uses: ''
+  excluded: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: Text
-    value: text
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: dataset
   name: Rater-SF
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: 24k captions
-  type: dataset
+  description: ''
+  created_date: 2023-02-08
   url: https://google-research.github.io/noise2music/noise2music.pdf
-- access: closed
+  datasheet: ''
+  modality: Text
+  size: 24k captions
+  sample: []
   analysis: ''
-  created_date: 2023-02-08
-  dependencies: [MuLan, MuLaMCap, LaMDA-LF, Rater-LF, Rater-SF]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  dependencies: [MusicCaps]
+  included: ''
+  excluded: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: audio (music), text
-    value: audio, text; audio, text
-  model_card: ''
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: Noise2Music pseudolabeler
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
+  description: ''
+  created_date: 2023-02-08
+  url: https://google-research.github.io/noise2music/noise2music.pdf
+  model_card: ''
+  modality: audio (music), text
+  analysis: ''
   size: unknown
+  dependencies: [MuLan, MuLaMCap, LaMDA-LF, Rater-LF, Rater-SF]
   training_emissions: ''
-  training_hardware: ''
   training_time: ''
-  type: model
-  url: https://google-research.github.io/noise2music/noise2music.pdf
-- access: closed
-  analysis: ''
-  created_date: 2023-02-08
-  datasheet: ''
-  dependencies: []
-  description: ''
-  excluded: ''
-  feedback: ''
-  included: ''
-  intended_uses: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: audio (music)
-    value: audio
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: dataset
   name: Noise2Music audio dataset
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: 340k hours audio
-  type: dataset
-  url: https://google-research.github.io/noise2music/noise2music.pdf
-- access: closed
-  analysis: ''
+  description: ''
   created_date: 2023-02-08
+  url: https://google-research.github.io/noise2music/noise2music.pdf
   datasheet: ''
-  dependencies: [Noise2Music audio dataset, Noise2Music pseudolabeler]
-  description: ''
-  excluded: ''
-  feedback: ''
+  modality: audio (music)
+  size: 340k hours audio
+  sample: []
+  analysis: ''
+  dependencies: []
   included: ''
-  intended_uses: ''
+  excluded: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: audio (music), text
-    value: audio, text
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: dataset
   name: Noise2Music pseudolabel dataset
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: 340k hours audio with pseudolabels
-  type: dataset
+  description: ''
+  created_date: 2023-02-08
   url: https://google-research.github.io/noise2music/noise2music.pdf
-- access: limited
-  adaptation: ''
-  created_date: 2022-08-25
-  dependencies: [LaMDA]
-  description: AI Test Kitchen provides a new way for people to learn about, experience,
-    and give feedback on emerging AI technology, like LaMDA.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
+  datasheet: ''
+  modality: audio (music), text
+  size: 340k hours audio with pseudolabels
+  sample: []
+  analysis: ''
+  dependencies: [Noise2Music audio dataset, Noise2Music pseudolabeler]
+  included: ''
+  excluded: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  monthly_active_users: ''
+  feedback: ''
+
+- type: application
   name: AI Test Kitchen
   organization: Google
-  output_space: ''
-  prohibited_uses: ''
-  quality_control: ''
-  terms_of_service: ''
-  type: application
+  description: AI Test Kitchen provides a new way for people to learn about, experience,
+    and give feedback on emerging AI technology, like LaMDA.
+  created_date: 2022-08-25
   url: https://blog.google/technology/ai/join-us-in-the-ai-test-kitchen/
-  user_distribution: ''
-- access: closed
-  adaptation: ''
-  created_date: 2023-02-06
   dependencies: [LaMDA]
-  description: Conversational AI service, powered by LaMDA
-  failures: ''
-  feedback: ''
-  intended_uses: ''
+  adaptation: ''
+  output_space: ''
+  quality_control: ''
+  access: limited
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  terms_of_service: ''
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
   monthly_active_users: ''
+  user_distribution: ''
+  failures: ''
+
+- type: application
   name: Bard
   organization: Google
+  description: Conversational AI service, powered by LaMDA
+  created_date: 2023-02-06
+  url: https://blog.google/technology/ai/bard-google-ai-search-updates/
+  dependencies: [LaMDA]
+  adaptation: ''
   output_space: ''
-  prohibited_uses: ''
   quality_control: ''
+  access: closed
+  license:
+    value: unknown
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
   terms_of_service: ''
-  type: application
-  url: https://blog.google/technology/ai/bard-google-ai-search-updates/
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
   user_distribution: ''
-- access: closed
-  analysis: ''
+  failures: ''
+
+- type: model
+  name: Minerva
+  organization: Google
+  description: ''
   created_date: 2022-06-29
+  url: https://arxiv.org/abs/2206.14858
+  model_card: ''
+  modality: text
+  analysis: ''
+  size: 540B parameters (dense model)
   dependencies:
     - PaLM
     - arXiv
     - PaLM dataset
     - Minerva Math Web Pages dataset
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: text
-    value: text; text
-  model_card: ''
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: Minerva
+  feedback: ''
+
+- type: dataset
+  name: Minerva Math Web Pages dataset
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  size: 540B parameters (dense)
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
-  url: https://arxiv.org/abs/2206.14858
-- access: closed
-  analysis: ''
+  description: ''
   created_date: 2022-06-29
+  url: https://arxiv.org/abs/2206.14858
   datasheet: ''
+  modality: Text
+  size: 17.5B tokens
+  sample: []
+  analysis: ''
   dependencies: []
-  description: ''
-  excluded: ''
-  feedback: ''
   included: ''
-  intended_uses: ''
+  excluded: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: Text
-    value: text
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: Minerva Math Web Pages dataset
+  feedback: ''
+
+- type: model
+  name: USM
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: 17.5B tokens
-  type: dataset
-  url: https://arxiv.org/abs/2206.14858
-- access: limited
-  analysis: ''
-  created_date: 2023-03-06
-  dependencies: [YT-NLU-U, Pub-U, Web-NTL, YT-SUP+, Pub-S]
   description: Universal Speech Model (USM) is a family of state-of-the-art speech
     models with 2B parameters trained on 12 million hours of speech and 28 billion
     sentences of text, spanning 300+ languages. USM, which is for use in YouTube
@@ -1600,272 +1716,270 @@
     Punjabi, Assamese, Santhali, Balinese, Shona, Malagasy, Luganda, Luo, Bambara,
     Soga, Maninka, Xhosa, Akan, Lingala, Chichewa, Nkore, Nzema to name a few. Some
     of these languages are spoken by fewer than twenty million people, making it
-    very hard to find the necessary training data.
-  feedback: ''
-  intended_uses: ''
-  license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
-    value: unknown
-  modality:
-    explanation: speech, text (300+ languages)
-    value: audio, text; audio, text
-  model_card: ''
-  monitoring: ''
-  name: USM
-  organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  size: 2B parameters (dense)
+    very hard to find the necessary training data.
+  created_date: 2023-03-06
+  url: https://arxiv.org/abs/2303.01037
+  model_card: ''
+  modality: speech, text (300+ languages)
+  analysis: ''
+  size: 2B parameters (dense model)
+  dependencies: [YT-NLU-U, Pub-U, Web-NTL, YT-SUP+, Pub-S]
   training_emissions: ''
-  training_hardware: ''
   training_time: ''
-  type: model
-  url: https://arxiv.org/abs/2303.01037
-- access: open
-  adaptation: ''
-  created_date: 2005-02-14
-  dependencies: [USM]
-  description: YouTube is a global online video sharing and social media platform
-  failures: ''
-  feedback: ''
+  training_hardware: ''
+  quality_control: ''
+  access: limited
+  license:
+    value: unknown
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
   intended_uses: ''
-  license: ''
+  prohibited_uses: ''
   monitoring: ''
-  monthly_active_users: ''
+  feedback: ''
+
+- type: application
   name: YouTube
   organization: Google
+  description: YouTube is a global online video sharing and social media platform
+  created_date: 2005-02-14
+  url: https://www.youtube.com/
+  dependencies: [USM]
+  adaptation: ''
   output_space: ''
-  prohibited_uses: ''
   quality_control: ''
+  access: open
+  license: ''
   terms_of_service: ''
-  type: application
-  url: https://www.youtube.com/
-  user_distribution: ''
-- access: closed
-  analysis: ''
-  created_date: 2023-03-06
-  dependencies: [PaLM, ViT-22B]
-  description: ''
-  feedback: ''
   intended_uses: ''
-  license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
-    value: unknown
-  modality:
-    explanation: text, image
-    value: image, text; image, text
-  model_card: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
+  user_distribution: ''
+  failures: ''
+
+- type: model
   name: PaLM-E
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  size: 562B parameters (dense)
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
+  description: ''
+  created_date: 2023-03-06
   url: https://arxiv.org/abs/2303.03378
-- access: closed
+  model_card: ''
+  modality: text, image
   analysis: ''
-  created_date: 2023-02-10
-  dependencies: [JFT]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  size: 562B parameters (dense model)
+  dependencies: [PaLM, ViT-22B]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: image
-    value: image; image
-  model_card: ''
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: ViT-22B
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  size: 22B parameters (dense)
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
+  description: ''
+  created_date: 2023-02-10
   url: https://arxiv.org/abs/2302.05442
-- access: closed
+  model_card: ''
+  modality: image
   analysis: ''
-  created_date: 2022-09-07
-  dependencies: [w2v-BERT, SoundStream]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  size: 22B parameters (dense model)
+  dependencies: [JFT]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: audio, text
-    value: audio, text; audio, text
-  model_card: ''
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: AudioLM
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  size: 1B parameters (dense)
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
+  description: ''
+  created_date: 2022-09-07
   url: https://arxiv.org/abs/2209.03143
-- access: closed
+  model_card: ''
+  modality: audio, text
   analysis: ''
-  created_date: 2022-09-14
-  dependencies: [mT5, ViT-e, WebLI]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  size: 1B parameters (dense model)
+  dependencies: [w2v-BERT, SoundStream]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: text, image
-    value: image, text; image, text
-  model_card: ''
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: PaLI
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  size: 17B parameters (dense)
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
+  description: ''
+  created_date: 2022-09-14
   url: https://arxiv.org/abs/2209.06794
-- access: closed
+  model_card: ''
+  modality: text, image
   analysis: ''
-  created_date: 2022-09-14
-  dependencies: [JFT]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  size: 17B parameters (13B text, 4B images)
+  dependencies: [mT5, ViT-e, WebLI]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: image
-    value: image; image
-  model_card: ''
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: ViT-e
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  size: 3.9B parameters (dense)
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
+  description: ''
+  created_date: 2022-09-14
   url: https://arxiv.org/abs/2209.06794
-- access: closed
+  model_card: ''
+  modality: image
   analysis: ''
-  created_date: 2022-09-14
-  datasheet: ''
-  dependencies: []
-  description: ''
-  excluded: ''
-  feedback: ''
-  included: ''
-  intended_uses: ''
+  size: 3.9B parameters (dense model)
+  dependencies: [JFT]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: text, image
-    value: image, text
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: dataset
   name: WebLI
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: 10B images, 12B alt-text
-  type: dataset
+  description: ''
+  created_date: 2022-09-14
   url: https://arxiv.org/abs/2209.06794
-- access: open
+  datasheet: ''
+  modality: text, image
+  size: 10B images, 12B alt-text
+  sample: []
   analysis: ''
-  created_date: 2023-02-27
-  dependencies: [T5, CLIP, YT-Temporal-1B]
-  description: ''
-  feedback: ''
+  dependencies: []
+  included: ''
+  excluded: ''
+  quality_control: ''
+  access: closed
+  license:
+    value: unknown
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
   intended_uses: ''
-  license: Apache 2.0
-  modality:
-    explanation: text, video
-    value: text, video; text, video
-  model_card: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: Vid2Seq
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
-  size: 500M parameters (dense)
+  description: ''
+  created_date: 2023-02-27
+  url: https://arxiv.org/abs/2302.14115
+  model_card: ''
+  modality: text, video
+  analysis: ''
+  size: 500M parameters (200M text, 300M video)
+  dependencies: [T5, CLIP, YT-Temporal-1B]
   training_emissions: ''
-  training_hardware: ''
   training_time: ''
-  type: model
-  url: https://arxiv.org/abs/2302.14115
-- access: open
-  analysis: evaluated on DSTC11 Challenge Task, based on MultiWoz 2.1, with a focus
-    on dialog state tracking.
-  created_date: 2023-06-08
-  dependencies: [CTC blank-filtering, Speech2Text adapter]
-  description: Joint speech and language model using a Speech2Text adapter and using
-    a CTC-based blank-filtering.
-  feedback: ''
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license: Apache 2.0
   intended_uses: ''
-  license: Google
-  modality:
-    explanation: speech input with textual output
-    value: audio; text
-  model_card: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: Google Joint SLM
   organization: Google
-  prohibited_uses: ''
-  quality_control: ''
+  description: Joint speech and language model using a Speech2Text adapter and using
+    a CTC-based blank-filtering.
+  created_date: 2023-06-08
+  url: https://arxiv.org/pdf/2306.07944.pdf
+  model_card: ''
+  modality: speech input with textual output
+  analysis: evaluated on DSTC11 Challenge Task, based on MultiWoz 2.1, with a focus
+    on dialog state tracking.
   size: ''
+  dependencies: [CTC blank-filtering, Speech2Text adapter]
   training_emissions: ''
-  training_hardware: ''
   training_time: ''
-  type: model
-  url: https://arxiv.org/pdf/2306.07944.pdf
-- access: open
-  analysis: Reports results on standard code benchmarks across a variety of programming
-    languages.
-  created_date: 2023-05-10
-  dependencies: [palm 2 dataset]
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license: Google
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+
+- type: model
+  name: PaLM 2
+  organization: Google
   description: PaLM2 is a new state-of-the-art language model that has better multilingual
     and reasoning capabilities and is more compute-efficient than its predecessor
     PaLM. PaLM 2 is a Transformer-based model trained using a mixture of objectives
     similar to UL2.
-  feedback: Specific queries provided by annotators
+  created_date: 2023-05-10
+  url: https://blog.google/technology/ai/google-palm-2-ai-large-language-model/
+  model_card: https://ai.google/static/documents/palm2techreport.pdf
+  modality: text (input and output)
+  analysis: Reports results on standard code benchmarks across a variety of programming
+    languages.
+  size: 3.6T parameters
+  dependencies: [palm 2 dataset]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: TPU v4 (number unspecified)
+  quality_control: Employed de-duplication, removal of sensitive-PII and filtering.
+    Added control tokens marking toxicity of text.
+  access: open
+  license: Google
   intended_uses: general use large language model that can be used for language,
     reasoning, and code tasks.
-  license: Google
-  modality:
-    explanation: text (input and output)
-    value: text; text
-  model_card: https://ai.google/static/documents/palm2techreport.pdf
-  monitoring: Google internal monitoring
-  name: PaLM 2
-  organization: Google
   prohibited_uses: becoming part of a general-purpose service or product or use
     within specific downstream applications without prior assessment
-  quality_control: Employed de-duplication, removal of sensitive-PII and filtering.
-    Added control tokens marking toxicity of text.
-  size: 3.6T parameters (dense)
-  training_emissions: ''
-  training_hardware: TPU v4 (number unspecified)
-  training_time: ''
-  type: model
-  url: https://blog.google/technology/ai/google-palm-2-ai-large-language-model/
+  monitoring: Google internal monitoring
+  feedback: Specific queries provided by annotators
diff --git a/assets/h2oai.yaml b/assets/h2oai.yaml
index 53654100..601e629d 100644
--- a/assets/h2oai.yaml
+++ b/assets/h2oai.yaml
@@ -1,24 +1,23 @@
 ---
-- access: open
-  analysis: Evaluated on EleutherAI evaluation harness.
-  created_date: 2023-06-16
-  dependencies: [GPT-NeoX, H2O AI OpenAssistant, h2oGPT Repositories]
-  description: Series of models fine-tuned on well-known LLMs using the h2oGPT repositories.
-  feedback: https://huggingface.co/h2oai/h2ogpt-oasst1-512-20b/discussions
-  intended_uses: ''
-  license: Apache 2.0
-  modality:
-    explanation: text
-    value: text; text
-  model_card: https://huggingface.co/h2oai/h2ogpt-oasst1-512-20b
-  monitoring: ''
+
+- type: model
   name: h2oGPT
   organization: H2O AI
-  prohibited_uses: ''
-  quality_control: ''
-  size: 20B parameters (dense)
+  description: Series of models fine-tuned on well-known LLMs using the h2oGPT repositories.
+  created_date: 2023-06-16
+  url: https://arxiv.org/pdf/2306.08161.pdf
+  model_card: https://huggingface.co/h2oai/h2ogpt-oasst1-512-20b
+  modality: text
+  analysis: Evaluated on EleutherAI evaluation harness.
+  size: 20B parameters
+  dependencies: [GPT-NeoX, H2O AI OpenAssistant, h2oGPT Repositories]
   training_emissions: unknown
-  training_hardware: unspecified number of 48GB A100 NVIDIA GPUs
   training_time: unknown
-  type: model
-  url: https://arxiv.org/pdf/2306.08161.pdf
+  training_hardware: unspecified number of 48GB A100 NVIDIA GPUs
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/h2oai/h2ogpt-oasst1-512-20b/discussions
diff --git a/assets/hubspot.yaml b/assets/hubspot.yaml
index 91876dd7..90298984 100644
--- a/assets/hubspot.yaml
+++ b/assets/hubspot.yaml
@@ -1,26 +1,28 @@
 ---
-- access: limited
-  adaptation: ''
-  created_date: 2023-01-31
-  dependencies: [ChatGPT API]
+
+- type: application
+  name: ChatGPT powered by OBO
+  organization: HubSpot
   description: Give your sales, marketing, and customer service teams one of the
     most powerful AI tools available - ChatGPT priority access, no timeout limits,
     company wide access managed through a single account, incorporate into your
     existing processes without leaving HubSpot
-  failures: ''
-  feedback: ''
-  intended_uses: ''
+  created_date: 2023-01-31
+  url: https://ecosystem.hubspot.com/marketplace/apps/sales/sales-enablement/the-obo-group-chatgpt-1398072
+  dependencies: [ChatGPT API]
+  adaptation: ''
+  output_space: ''
+  quality_control: ''
+  access: limited
   license:
-    explanation: "We weren't able to locate a license.\n"
     value: unknown
+    explanation: >
+      We weren't able to locate a license.
+  terms_of_service: ''
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
   monthly_active_users: ''
-  name: ChatGPT powered by OBO
-  organization: HubSpot
-  output_space: ''
-  prohibited_uses: ''
-  quality_control: ''
-  terms_of_service: ''
-  type: application
-  url: https://ecosystem.hubspot.com/marketplace/apps/sales/sales-enablement/the-obo-group-chatgpt-1398072
   user_distribution: ''
+  failures: ''
diff --git a/assets/huggingface.yaml b/assets/huggingface.yaml
index 23e07d5d..84710565 100644
--- a/assets/huggingface.yaml
+++ b/assets/huggingface.yaml
@@ -1,30 +1,29 @@
 ---
-- access:
-    explanation: The license is provided in the [[HuggingFace repository]](https://huggingface.co/codeparrot/codeparrot)
-    value: open
-  analysis: none
+
+- type: model
+  name: CodeParrot
+  organization: HuggingFace
+  description: CodeParrot is an autoregressive language model trained on code
   created_date:
-    explanation: The date the model was announced
     value: 2021-12-06
+    explanation: The date the model was announced
+  url: https://twitter.com/lvwerra/status/1467933794699259908
+  model_card: none
+  modality: code (Python)
+  analysis: none
+  size: 1B parameters (dense model)
   dependencies: []
-  description: CodeParrot is an autoregressive language model trained on code
-  feedback: none
-  intended_uses: none
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: 16 x A100 (40GB)
+  quality_control: none
+  access:
+    value: open
+    explanation: The license is provided in the [[HuggingFace repository]](https://huggingface.co/codeparrot/codeparrot)
   license:
-    explanation: No license is explicitly provided for this model.
     value: none
-  modality:
-    explanation: code (Python)
-    value: code; code
-  model_card: none
-  monitoring: none
-  name: CodeParrot
-  organization: HuggingFace
+    explanation: No license is explicitly provided for this model.
+  intended_uses: none
   prohibited_uses: none
-  quality_control: none
-  size: 1B parameters (dense)
-  training_emissions: unknown
-  training_hardware: 16 x A100 (40GB)
-  training_time: unknown
-  type: model
-  url: https://twitter.com/lvwerra/status/1467933794699259908
+  monitoring: none
+  feedback: none
diff --git a/assets/inflection.yaml b/assets/inflection.yaml
index 9283431b..5113a5f1 100644
--- a/assets/inflection.yaml
+++ b/assets/inflection.yaml
@@ -1,47 +1,49 @@
 ---
-- access: limited
-  analysis: Evaluated on wide range of language benchmarks like MMLU 5-shot, GSM-8K,
-    and HellaSwag 10-shot among others.
-  created_date: 2023-06-22
-  dependencies: []
+
+- type: model
+  name: Inflection-1
+  organization: Inflection AI
   description: Inflection AI's in-house LLM, which powers Pi and will soon be available
     via Inflection AI's conversational API.
-  feedback: ''
-  intended_uses: ''
-  license: unknown
-  modality:
-    explanation: text
-    value: text; text
+  created_date: 2023-06-22
+  url: https://inflection.ai/inflection-1
   model_card: ''
-  monitoring: ''
-  name: Inflection-1
-  organization: Inflection AI
-  prohibited_uses: ''
-  quality_control: ''
-  size: unknown
+  modality: text
+  analysis: Evaluated on wide range of language benchmarks like MMLU 5-shot, GSM-8K,
+    and HellaSwag 10-shot among others.
+  size:
+    value: unknown
+    explanation: Designed to be roughly around Chat-GPT's size (175B parameters).
+  dependencies: []
   training_emissions: ''
-  training_hardware: unknown
   training_time: ''
-  type: model
-  url: https://inflection.ai/inflection-1
-- access: limited
-  adaptation: ''
-  created_date: 2023-05-02
-  dependencies: [Inflection-1]
-  description: Personal AI chatbot designed to be conversational and specialized
-    in emotional intelligence.
-  failures: ''
-  feedback: ''
-  intended_uses: to be used as a personal assistant chatbot for everyday activities
+  training_hardware: unknown
+  quality_control: ''
+  access: limited
   license: unknown
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  monthly_active_users: ''
+  feedback: ''
+
+- type: application
   name: Pi
   organization: Inflection AI
+  description: Personal AI chatbot designed to be conversational and specialized
+    in emotional intelligence.
+  created_date: 2023-05-02
+  url: https://inflection.ai/press
+  dependencies: [Inflection-1]
+  adaptation: ''
   output_space: natural language text responses
-  prohibited_uses: ''
   quality_control: ''
+  access: limited
+  license: unknown
   terms_of_service: ''
-  type: application
-  url: https://inflection.ai/press
+  intended_uses: to be used as a personal assistant chatbot for everyday activities
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
   user_distribution: ''
+  failures: ''
diff --git a/assets/instacart.yaml b/assets/instacart.yaml
index 4dc27bf8..b3a970ad 100644
--- a/assets/instacart.yaml
+++ b/assets/instacart.yaml
@@ -1,26 +1,27 @@
 ---
-- access: limited
-  adaptation: ''
-  created_date: 2023-03-01
-  dependencies: [ChatGPT API]
+
+- type: application
+  name: Ask Instacart
+  organization: Instacart
   description: Instacart is augmenting the Instacart app to enable customers to
     ask about food and get inspirational, shoppable answers. This uses ChatGPT alongside
     Instacart’s own AI and product data from their 75,000+ retail partner store
     locations to help customers discover ideas for open-ended shopping goals, such
     as “How do I make great fish tacos?” or “What’s a healthy lunch for my kids?”
     Instacart plans to launch “Ask Instacart” later this year.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
-  license: ''
-  monitoring: ''
-  monthly_active_users: ''
-  name: Ask Instacart
-  organization: Instacart
+  created_date: 2023-03-01
+  url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis
+  dependencies: [ChatGPT API]
+  adaptation: ''
   output_space: ''
-  prohibited_uses: ''
   quality_control: ''
+  access: limited
+  license: ''
   terms_of_service: ''
-  type: application
-  url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
   user_distribution: ''
+  failures: ''
diff --git a/assets/juni.yaml b/assets/juni.yaml
index 9e175922..13d2757a 100644
--- a/assets/juni.yaml
+++ b/assets/juni.yaml
@@ -1,21 +1,22 @@
 ---
-- access: limited
-  adaptation: ''
-  created_date: unknown
-  dependencies: [Anthropic API]
-  description: An online tutoring solution to help students achieve academic success.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
-  license: unknown
-  monitoring: ''
-  monthly_active_users: ''
+
+- type: application
   name: Juni Tutor Bot
   organization: Juni Learning
+  description: An online tutoring solution to help students achieve academic success.
+  created_date: unknown
+  url: https://junilearning.com/
+  dependencies: [Anthropic API]
+  adaptation: ''
   output_space: ''
-  prohibited_uses: ''
   quality_control: ''
+  access: limited
+  license: unknown
   terms_of_service: ''
-  type: application
-  url: https://junilearning.com/
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
   user_distribution: ''
+  failures: ''
diff --git a/assets/kakaobrain.yaml b/assets/kakaobrain.yaml
index f95f0b13..4c9546a8 100644
--- a/assets/kakaobrain.yaml
+++ b/assets/kakaobrain.yaml
@@ -1,25 +1,32 @@
 ---
-- access:
-    explanation: You can download the dataset from [[COYO Huggingface page]](https://huggingface.co/datasets/kakaobrain/coyo-700m).
-      For more information on downloading the image dataset, please refer to [[download/README.md]](https://github.com/kakaobrain/coyo-dataset/blob/main/download/README.md)
-      on Github.
-    value: open
-  analysis:
-    explanation: See [[Experiments]](https://github.com/kakaobrain/coyo-dataset#experiments)
-    value: "\"We empirically validated the quality of COYO dataset by re-implementing\
-      \ popular models such as ALIGN, unCLIP, and ViT. We trained these models on\
-      \ COYO-700M or its subsets from scratch, achieving competitive performance\
-      \ to the reported numbers or generated samples in the original papers.\"\n"
+
+- type: dataset
+  name: COYO-700M
+  organization: Kakao Brain
+  description: >
+    COYO-700M is a large-scale dataset that contains 747M image-text pairs as well
+    as many other meta-attributes to increase the usability to train various models.
   created_date:
-    explanation: As mentioned in the [[Google Scholar page]](https://scholar.google.com/citations?view_op=view_citation&hl=en&user=8WXfStMAAAAJ&citation_for_view=8WXfStMAAAAJ:-f6ydRqryjwC).
     value: 2022-08-31
+    explanation: As mentioned in the [[Google Scholar page]](https://scholar.google.com/citations?view_op=view_citation&hl=en&user=8WXfStMAAAAJ&citation_for_view=8WXfStMAAAAJ:-f6ydRqryjwC).
+  url: https://github.com/kakaobrain/coyo-dataset
   datasheet: none
+  modality: text (English) and image
+  size: 747M image-text pairs
+  sample: []
+  analysis:
+    value: >
+      "We empirically validated the quality of COYO dataset by re-implementing popular
+      models such as ALIGN, unCLIP, and ViT. We trained these models on COYO-700M
+      or its subsets from scratch, achieving competitive performance to the reported
+      numbers or generated samples in the original papers."
+    explanation: See [[Experiments]](https://github.com/kakaobrain/coyo-dataset#experiments)
   dependencies: [CommonCrawl]
-  description: "COYO-700M is a large-scale dataset that contains 747M image-text\
-    \ pairs as well as many other meta-attributes to increase the usability to train\
-    \ various models.\n"
-  excluded:
+  included:
+    value: Included all image formats that Pillow library can decode. Collected
+      only English text using cld3.
     explanation: See [[Data Filtering]](https://github.com/kakaobrain/coyo-dataset#data-filtering)
+  excluded:
     value: Removed images less than 5KB image size. Removed images with an aspect
       ratio greater than 3.0. Removed images with min(width, height) < 200. Removed
       images with a score of OpenNSFW2 or GantMan/NSFW higher than 0.5. Removed
@@ -29,36 +36,34 @@
       texts over 1000 in length. Removed texts appearing more than 10 times. Removed
       texts containing NSFW words. Removed duplicated samples based on (image_phash,
       text).
-  feedback: Feedback can be given by emailing at coyo at kakaobrain.com
-  included:
     explanation: See [[Data Filtering]](https://github.com/kakaobrain/coyo-dataset#data-filtering)
-    value: Included all image formats that Pillow library can decode. Collected
-      only English text using cld3.
+  quality_control: ''
+  access:
+    value: open
+    explanation: You can download the dataset from [[COYO Huggingface page]](https://huggingface.co/datasets/kakaobrain/coyo-700m).
+      For more information on downloading the image dataset, please refer to [[download/README.md]](https://github.com/kakaobrain/coyo-dataset/blob/main/download/README.md)
+      on Github.
+  license: CC-BY-4.0
   intended_uses:
+    value: >
+      "It is strongly recommended that this dataset be used only for research, keeping
+      this in mind when using the dataset, and Kakao Brain does not recommend using
+      this dataset as it is without special processing to clear inappropriate data
+      to create commercial products."
     explanation: See [[Disclaimer & Content Warning]](https://github.com/kakaobrain/coyo-dataset#disclaimer--content-warning)
-    value: "\"It is strongly recommended that this dataset be used only for research,\
-      \ keeping this in mind when using the dataset, and Kakao Brain does not recommend\
-      \ using this dataset as it is without special processing to clear inappropriate\
-      \ data to create commercial products.\"\n"
-  license: CC-BY-4.0
-  modality:
-    explanation: text (English) and image
-    value: image, text
-  monitoring: none
-  name: COYO-700M
-  organization: Kakao Brain
   prohibited_uses:
+    value: >
+      Authors note the following limitations: "Kakao Brain tried to construct a
+      "Safe"
+      dataset when building the COYO dataset. However, despite these efforts, this
+      large-scale dataset was not hand-picked by humans to avoid the risk due to
+      its
+      very large size (over 700M). Keep in mind that the unscreened nature of the
+      dataset means that the collected images can lead to strongly discomforting
+      and
+      disturbing content for humans. The COYO dataset may contain some inappropriate
+      data, and any problems resulting from such data are the full responsibility
+      of the user who used it."
     explanation: See [[Disclaimer & Content Warning]](https://github.com/kakaobrain/coyo-dataset#disclaimer--content-warning)
-    value: "Authors note the following limitations: \"Kakao Brain tried to construct\
-      \ a \"Safe\" dataset when building the COYO dataset. However, despite these\
-      \ efforts, this large-scale dataset was not hand-picked by humans to avoid\
-      \ the risk due to its very large size (over 700M). Keep in mind that the unscreened\
-      \ nature of the dataset means that the collected images can lead to strongly\
-      \ discomforting and disturbing content for humans. The COYO dataset may contain\
-      \ some inappropriate data, and any problems resulting from such data are the\
-      \ full responsibility of the user who used it.\"\n"
-  quality_control: ''
-  sample: []
-  size: 747M image-text pairs
-  type: dataset
-  url: https://github.com/kakaobrain/coyo-dataset
+  monitoring: none
+  feedback: Feedback can be given by emailing at coyo at kakaobrain.com
diff --git a/assets/khan.yaml b/assets/khan.yaml
index abdf0636..75050a66 100644
--- a/assets/khan.yaml
+++ b/assets/khan.yaml
@@ -1,24 +1,26 @@
 ---
-- access: limited
-  adaptation: ''
-  created_date: 2023-03-14
-  dependencies: [GPT-4 API]
+
+- type: application
+  name: Khanmigo
+  organization: Khan Academy
   description: An AI-powered assistant that functions as both a virtual tutor for
     students and a classroom assistant for teachers.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
+  created_date: 2023-03-14
+  url: https://www.khanacademy.org/khan-labs#khanmigo
+  dependencies: [GPT-4 API]
+  adaptation: ''
+  output_space: ''
+  quality_control: ''
+  access: limited
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  terms_of_service: ''
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
   monthly_active_users: ''
-  name: Khanmigo
-  organization: Khan Academy
-  output_space: ''
-  prohibited_uses: ''
-  quality_control: ''
-  terms_of_service: ''
-  type: application
-  url: https://www.khanacademy.org/khan-labs#khanmigo
   user_distribution: ''
+  failures: ''
diff --git a/assets/laion.yaml b/assets/laion.yaml
index 14972cc3..6a62325e 100644
--- a/assets/laion.yaml
+++ b/assets/laion.yaml
@@ -1,69 +1,79 @@
 ---
-- access:
-    explanation: The dataset index is available from [[dataset blog post]](https://laion.ai/blog/laion-400-open-dataset/)
-    value: open
-  analysis: ''
+
+- type: dataset
+  name: LAION-400M
+  # General
+  organization: LAION
+  description: LAION-400M is a dataset with CLIP-filtered 400 million image-text
+    pairs, their CLIP embeddings and kNN indices that allow efficient similarity
+    search. This dataset is entirely openly, freely accessible.
   created_date:
+    value: 2021-08-20
     explanation: The date the [[blog post]](https://laion.ai/blog/laion-400-open-dataset/)
       was released.
-    value: 2021-08-20
+  url: https://laion.ai/blog/laion-400-open-dataset/
   datasheet: https://laion.ai/blog/laion-400-open-dataset/
+  modality: text (English) and image
+  size: 400M image-text pairs
+  sample: []
+  analysis: ''
+  # Construction
   dependencies: [CLIP, CommonCrawl]
-  description: LAION-400M is a dataset with CLIP-filtered 400 million image-text
-    pairs, their CLIP embeddings and kNN indices that allow efficient similarity
-    search. This dataset is entirely openly, freely accessible.
+  included: ''
   excluded:
+    value: >
+      Authors apply the following filtering conditions on the WAT files downloaded
+      from Common Crawl: "All samples with less than 5 character alt-text length
+      or less than 5 KB image size are dropped. Duplicate removal is performed with
+      bloom filter based on URL and alt-text. We use CLIP to compute embeddings
+      of the image and alt-text. Then we compute the cosine similarity of both embeddings
+      and drop all samples with cosine similarity below 0.3. This threshold was
+      selected based on human inspections. We use the CLIP embeddings of images
+      and texts to filter out illegal contents."
     explanation: See [[Dataset and Methods]](https://arxiv.org/pdf/2111.02114.pdf#section.2)
-    value: "Authors apply the following filtering conditions on the WAT files downloaded\
-      \ from Common Crawl: \"All samples with less than 5 character alt-text length\
-      \ or less than 5 KB image size are dropped. Duplicate removal is performed\
-      \ with bloom filter based on URL and alt-text. We use CLIP to compute embeddings\
-      \ of the image and alt-text. Then we compute the cosine similarity of both\
-      \ embeddings and drop all samples with cosine similarity below 0.3. This threshold\
-      \ was selected based on human inspections. We use the CLIP embeddings of images\
-      \ and texts to filter out illegal contents.\"\n"
-  feedback: ''
-  included: ''
+  quality_control:
+    value: The authors use  CLIP embeddings of images and texts to filter out illegal
+      contents. They also use CLIP to tag image-text pairs as NSFW. They note that
+      less than 1% of images were detected as NSFW, which can be filtered out by
+      an user with NSFW tag.
+    explanation: See [[Dataset and Methods]](https://arxiv.org/pdf/2111.02114.pdf#section.2)
+  # Downstream
+  access:
+    value: open
+    explanation: The dataset index is available from [[dataset blog post]](https://laion.ai/blog/laion-400-open-dataset/)
+  license:
+    value: CC BY 4.0
+    explanation: The license is listed on the [[dataset blog post]](https://laion.ai/blog/laion-400-open-dataset/)
   intended_uses: The authors recommend using the dataset "for research purposes"
     and warn that "this large-scale dataset is non-curated. It was built for research
     purposes to enable testing model training on larger scale for broad researcher
     and other interested communities, and is not meant for any real-world production
     or application."
-  license:
-    explanation: The license is listed on the [[dataset blog post]](https://laion.ai/blog/laion-400-open-dataset/)
-    value: CC BY 4.0
-  modality:
-    explanation: text (English) and image
-    value: image, text
-  monitoring: ''
-  name: LAION-400M
-  organization: LAION
   prohibited_uses: No uses are explicitly prohibited by the license. Users are warned
     from using LAION-400M for any real-world production or application.
-  quality_control:
-    explanation: See [[Dataset and Methods]](https://arxiv.org/pdf/2111.02114.pdf#section.2)
-    value: The authors use  CLIP embeddings of images and texts to filter out illegal
-      contents. They also use CLIP to tag image-text pairs as NSFW. They note that
-      less than 1% of images were detected as NSFW, which can be filtered out by
-      an user with NSFW tag.
-  sample: []
-  size: 400M image-text pairs
-  type: dataset
-  url: https://laion.ai/blog/laion-400-open-dataset/
-- access:
-    explanation: The dataset index is available from [[dataset blog post]](https://laion.ai/blog/laion-5b/)
-    value: open
-  analysis: ''
+  monitoring: ''
+  feedback: ''
+
+- type: dataset
+  name: LAION-5B
+  # General
+  organization: LAION
+  description: LAION is a dataset of 5 billion image-text pairs from the Internet
   created_date:
+    value: 2022-12-12
     explanation: The date the [[blog post]](https://laion.ai/blog/laion-5b/) was
       released. Note that the dataset was already compiled earlier, e.g. the Stable
       Diffusion model released earlier was trained on a subset of LAION-5B
-    value: 2022-12-12
+  url: https://laion.ai/blog/laion-5b/
   datasheet: https://laion.ai/blog/laion-5b/
+  modality: text and image
+  size: 5B image-text pairs
+  sample: []
+  analysis: ''
+  # Construction
   dependencies: [CLIP, mCLIP, CommonCrawl]
-  description: LAION is a dataset of 5 billion image-text pairs from the Internet
+  included: ''
   excluded:
-    explanation: See [[dataset blog post]](https://laion.ai/blog/laion-5b/)
     value: All samples with less than 5 characters alt-text length or less than
       5 KB image size are dropped. All images with the too big resolution, potentially
       DOS bombs, were dropped before attempting to process them. Duplicate removal
@@ -76,43 +86,45 @@
       were selected based on human inspection of the test results. We use the CLIP
       embeddings of images and texts to filter out to the possible extent the illegal
       content.
-  feedback: ''
-  included: ''
+    explanation: See [[dataset blog post]](https://laion.ai/blog/laion-5b/)
+  quality_control: ''
+  # Downstream
+  access:
+    value: open
+    explanation: The dataset index is available from [[dataset blog post]](https://laion.ai/blog/laion-5b/)
+  license:
+    value: CC BY 4.0
+    explanation: The license is listed on the [[dataset blog post]](https://laion.ai/blog/laion-5b/)
   intended_uses: The authors recommend using the dataset "for research purposes"
     and "do not recommend using it for creating ready-to-go industrial products,
     as the basic research about general properties and safety of such large-scale
     models, which we would like to encourage with this release, is still in progress"
-  license:
-    explanation: The license is listed on the [[dataset blog post]](https://laion.ai/blog/laion-5b/)
-    value: CC BY 4.0
-  modality:
-    explanation: text and image
-    value: image, text
-  monitoring: ''
-  name: LAION-5B
-  organization: LAION
   prohibited_uses: No uses are explicitly prohibited by the license. Users are warned
     from using LAION-5B for non-research purposes.
-  quality_control: ''
-  sample: []
-  size: 5B image-text pairs
-  type: dataset
-  url: https://laion.ai/blog/laion-5b/
-- access:
-    explanation: The dataset index is available from [[dataset blog post]](https://laion.ai/blog/laion-5b/)
-    value: open
-  analysis: ''
+  monitoring: ''
+  feedback: ''
+
+- type: dataset
+  name: LAION-2B-en
+  # General
+  organization: LAION
+  description: LAION-2B-en is a subset of the LAION-5B dataset and contains 2.3
+    billion English image-text pairs.
   created_date:
+    value: 2022-12-12
     explanation: The date the [[blog post]](https://laion.ai/blog/laion-5b/) was
       released. Note that the dataset was already compiled earlier, e.g. the Stable
       Diffusion model released earlier was trained on a subset of LAION-5B
-    value: 2022-12-12
+  url: https://arxiv.org/pdf/2210.08402.pdf
   datasheet: https://laion.ai/blog/laion-5b/
+  modality: text (English) and image
+  size: 2.32B image-text pairs
+  sample: []
+  analysis: ''
+  # Construction
   dependencies: [CLIP, LAION-5B]
-  description: LAION-2B-en is a subset of the LAION-5B dataset and contains 2.3
-    billion English image-text pairs.
+  included: ''
   excluded:
-    explanation: See [[dataset blog post]](https://laion.ai/blog/laion-5b/)
     value: All samples with less than 5 characters alt-text length or less than
       5 KB image size are dropped. All images with the too big resolution, potentially
       DOS bombs, were dropped before attempting to process them. Duplicate removal
@@ -125,50 +137,44 @@
       were selected based on human inspection of the test results. We use the CLIP
       embeddings of images and texts to filter out to the possible extent the illegal
       content.
-  feedback: ''
-  included: ''
+    explanation: See [[dataset blog post]](https://laion.ai/blog/laion-5b/)
+  quality_control: ''
+  # Downstream
+  access:
+    value: open
+    explanation: The dataset index is available from [[dataset blog post]](https://laion.ai/blog/laion-5b/)
+  license:
+    value: CC BY 4.0
+    explanation: The license is listed on the [[dataset blog post]](https://laion.ai/blog/laion-5b/)
   intended_uses: The authors recommend using the dataset "for research purposes"
     and "do not recommend using it for creating ready-to-go industrial products,
     as the basic research about general properties and safety of such large-scale
     models, which we would like to encourage with this release, is still in progress"
-  license:
-    explanation: The license is listed on the [[dataset blog post]](https://laion.ai/blog/laion-5b/)
-    value: CC BY 4.0
-  modality:
-    explanation: text (English) and image
-    value: image, text
-  monitoring: ''
-  name: LAION-2B-en
-  organization: LAION
   prohibited_uses: No uses are explicitly prohibited by the license. Users are warned
     from using LAION-2B-en for non-research purposes.
-  quality_control: ''
-  sample: []
-  size: 2.32B image-text pairs
-  type: dataset
-  url: https://arxiv.org/pdf/2210.08402.pdf
-- access: open
-  analysis: Evaluated on COCO captioning and VQAv2 vision-language tasks.
-  created_date: 2023-03-28
-  dependencies: [LLaMA, CLIP]
+  monitoring: ''
+  feedback: ''
+
+- type: model
+  name: OpenFlamingo
+  organization: LAION
   description: An open-source reproduction of DeepMind's Flamingo model. At its
     core, OpenFlamingo is a framework that enables training and evaluation of large
     multimodal models (LMMs).
-  feedback: ''
-  intended_uses: academic research purposes
-  license: MIT
-  modality:
-    explanation: image and text input with textual output
-    value: image, text; text
+  created_date: 2023-03-28
+  url: https://laion.ai/blog/open-flamingo/
   model_card: https://github.com/mlfoundations/open_flamingo/blob/main/MODEL_CARD.md
-  monitoring: ''
-  name: OpenFlamingo
-  organization: LAION
-  prohibited_uses: commercial use
-  quality_control: ''
-  size: 9B parameters (dense)
+  modality: image and text input with textual output
+  analysis: Evaluated on COCO captioning and VQAv2 vision-language tasks.
+  size: 9B parameters
+  dependencies: [LLaMA, CLIP]
   training_emissions: ''
-  training_hardware: ''
   training_time: ''
-  type: model
-  url: https://laion.ai/blog/open-flamingo/
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license: MIT
+  intended_uses: academic research purposes
+  prohibited_uses: commercial use
+  monitoring: ''
+  feedback: ''
diff --git a/assets/latitude.yaml b/assets/latitude.yaml
index 2e5bc564..f6d5e39c 100644
--- a/assets/latitude.yaml
+++ b/assets/latitude.yaml
@@ -1,33 +1,43 @@
 ---
-- access:
-    explanation: "The game is available to public upon sign up.\n"
-    value: limited
-  adaptation: ''
+
+- type: application
+  name: AI Dungeon
+  # General
+  organization: Latitude
+  description: >
+    AI Dungeon is a single-player text adventure game that uses AI to generate
+    content.
   created_date:
-    explanation: "Release date of the game on iOS and Android platforms [[Wikipedia]](https://en.wikipedia.org/wiki/AI_Dungeon).\n"
     value: 2019-12-17
+    explanation: >
+      Release date of the game on iOS and Android platforms
+      [[Wikipedia]](https://en.wikipedia.org/wiki/AI_Dungeon).
+  url: https://play.aidungeon.io
+  # Construction
   dependencies: [OpenAI API]
-  description: "AI Dungeon is a single-player text adventure game that uses AI to\
-    \ generate content.\n"
-  failures: ''
-  feedback: ''
-  intended_uses: ''
+  adaptation: ''
+  output_space: ''
+  quality_control: ''
+  # Downstream
+  access:
+    value: limited
+    explanation: >
+      The game is available to public upon sign up.
+  terms_of_service: https://play.aidungeon.io/main/termsOfService
   license:
-    explanation: "\"Subject to your compliance with these Terms, the Company hereby\
-      \ grants to you, a personal, worldwide, royalty-free, non-assignable, non-sublicensable,\
-      \ non-transferrable, and non-exclusive license to use the software provided\
-      \ to you as part of the Services (and to download a single copy of the App\
-      \ onto the equipment or device specified by us)\" - excerpt from the Terms\
-      \ of Service document.\n"
     value: custom
+    explanation: >
+      "Subject to your compliance with these Terms, the Company hereby grants
+      to you, a personal, worldwide, royalty-free, non-assignable,
+      non-sublicensable, non-transferrable, and non-exclusive license to use
+      the software provided to you as part of the Services (and to download a
+      single copy of the App onto the equipment or device specified by us)" -
+      excerpt from the Terms of Service document.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+  # Deployment
   monthly_active_users: ''
-  name: AI Dungeon
-  organization: Latitude
-  output_space: ''
-  prohibited_uses: ''
-  quality_control: ''
-  terms_of_service: https://play.aidungeon.io/main/termsOfService
-  type: application
-  url: https://play.aidungeon.io
   user_distribution: ''
+  failures: ''
diff --git a/assets/lehigh.yaml b/assets/lehigh.yaml
index c153f345..9e8df418 100644
--- a/assets/lehigh.yaml
+++ b/assets/lehigh.yaml
@@ -1,31 +1,30 @@
 ---
-- access: open
+
+- type: model
+  name: BiomedGPT
+  organization: Lehigh University
+  description: BiomedGPT leverages self-supervision on large and diverse datasets
+    to accept multi-modal inputs and perform a range of downstream tasks.
+  created_date: 2023-05-26
+  url: https://arxiv.org/pdf/2305.17100.pdf
+  model_card: ''
+  modality: image and text input, text output
   analysis: outperforms majority of preceding state-of-the-art models over 15 unique
     biomedical modalities.
-  created_date: 2023-05-26
+  size: 472M parameters
   dependencies:
     - GPT-style autoregressive decoder
     - BiomedGPT biomedical datasets
-  description: BiomedGPT leverages self-supervision on large and diverse datasets
-    to accept multi-modal inputs and perform a range of downstream tasks.
-  feedback: ''
-  intended_uses: furthering research in developing unified and generalist models
-    for biomedicine.
-  license: Apache 2.0
-  modality:
-    explanation: image and text input, text output
-    value: image, text; text
-  model_card: ''
-  monitoring: ''
-  name: BiomedGPT
-  organization: Lehigh University
-  prohibited_uses: ''
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: 10 NVIDIA A5000 GPUs
   quality_control: No specific quality control is mentioned in model training, though
     details on data processing and how the model was trained are provided in the
     paper.
-  size: 472M parameters (dense)
-  training_emissions: unknown
-  training_hardware: 10 NVIDIA A5000 GPUs
-  training_time: unknown
-  type: model
-  url: https://arxiv.org/pdf/2305.17100.pdf
+  access: open
+  license: Apache 2.0
+  intended_uses: furthering research in developing unified and generalist models
+    for biomedicine.
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/linkedin.yaml b/assets/linkedin.yaml
index a92551e9..0d3d2ab0 100644
--- a/assets/linkedin.yaml
+++ b/assets/linkedin.yaml
@@ -1,26 +1,27 @@
 ---
-- access: open
-  adaptation: ''
-  created_date: unknown
-  dependencies: [Azure Cognitive Services for Vision]
+
+- type: application
+  name: LinkedIn
+  organization: LinkedIn
   description: More than 40 percent of LinkedIn's feed posts include at least one
     image. We want every member to have equal access to opportunity and are committed
     to ensuring that we make images accessible to our members who are blind or who
     have low vision so they can be a part of the online conversation. With Azure
     Cognitive Service for Vision, we can provide auto-captioning to edit and support
     alt. text descriptions.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
-  license: unknown
-  monitoring: ''
-  monthly_active_users: ''
-  name: LinkedIn
-  organization: LinkedIn
+  created_date: unknown
+  url: https://www.linkedin.com/
+  dependencies: [Azure Cognitive Services for Vision]
+  adaptation: ''
   output_space: ''
-  prohibited_uses: ''
   quality_control: ''
+  access: open
+  license: unknown
   terms_of_service: ''
-  type: application
-  url: https://www.linkedin.com/
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
   user_distribution: ''
+  failures: ''
diff --git a/assets/lmsys.yaml b/assets/lmsys.yaml
index 3058f7a0..6f07781e 100644
--- a/assets/lmsys.yaml
+++ b/assets/lmsys.yaml
@@ -1,25 +1,24 @@
 ---
-- access: open
-  analysis: Evaluated against similar LLMs using GPT-4 as a judge.
-  created_date: 2023-03-30
-  dependencies: [LLaMA, ShareGPT conversations data]
+
+- type: model
+  name: Vicuna
+  organization: LMSYS
   description: An open-source chatbot trained by fine-tuning LLaMA on user-shared
     conversations collected from ShareGPT.
-  feedback: https://huggingface.co/datasets/bigcode/the-stack/discussions
-  intended_uses: research on LLMs and chatbots
-  license: Apache 2.0
-  modality:
-    explanation: natural language text
-    value: text; text
+  created_date: 2023-03-30
+  url: https://lmsys.org/blog/2023-03-30-vicuna/
   model_card: https://huggingface.co/lmsys/vicuna-13b-delta-v0
-  monitoring: ''
-  name: Vicuna
-  organization: LMSYS
-  prohibited_uses: ''
-  quality_control: ''
-  size: 13B parameters (dense)
+  modality: natural language text
+  analysis: Evaluated against similar LLMs using GPT-4 as a judge.
+  size: 13B parameters
+  dependencies: [LLaMA, ShareGPT conversations data]
   training_emissions: ''
-  training_hardware: 8 A100 GPUs
   training_time: 1 day
-  type: model
-  url: https://lmsys.org/blog/2023-03-30-vicuna/
+  training_hardware: 8 A100 GPUs
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: research on LLMs and chatbots
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/datasets/bigcode/the-stack/discussions
diff --git a/assets/meta.yaml b/assets/meta.yaml
index ad4d99eb..c44d7474 100644
--- a/assets/meta.yaml
+++ b/assets/meta.yaml
@@ -1,40 +1,51 @@
 ---
-- access:
-    explanation: Models are available for download from [[GitHub repository]](https://github.com/facebookresearch/esm#available-models)
-    value: open
-  analysis: ''
+
+- type: model
+  name: ESM-2
+  organization: Meta
+  description: ESM-2 is a series of protein language models trained on protein sequences
   created_date:
+    value: 2022-10-31
     explanation: The date the [[model paper]](https://www.biorxiv.org/content/10.1101/2022.07.20.500902v2.full.pdf+html)
       was released
-    value: 2022-10-31
+  url: https://www.biorxiv.org/content/10.1101/2022.07.20.500902v2.full.pdf+html
+  model_card: none
+  modality: protein sequence
+  size: 15B parameters (dense model)
+  analysis: ''
   dependencies: [UniRef50, UniRef90]
-  description: ESM-2 is a series of protein language models trained on protein sequences
-  feedback: ''
-  intended_uses: ''
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access:
+    value: open
+    explanation: Models are available for download from [[GitHub repository]](https://github.com/facebookresearch/esm#available-models)
   license:
-    explanation: "The license is provided in the [[Github repository]](https://github.com/facebookresearch/esm#available-models)\n"
     value: MIT
-  modality:
-    explanation: protein sequence
-    value: text; text
-  model_card: none
+    explanation: >
+      The license is provided in the [[Github repository]](https://github.com/facebookresearch/esm#available-models)
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: ESM-2
+  feedback: ''
+
+- type: dataset
+  name: PMD
+  # General
   organization: Meta
-  prohibited_uses: ''
-  quality_control: ''
-  size: 15B parameters (dense)
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
-  url: https://www.biorxiv.org/content/10.1101/2022.07.20.500902v2.full.pdf+html
-- access: closed
-  analysis: none
+  description: PMD (Public Multimodal Datasets) is a collection of image-text datasets
+    introduced in the FLAVA work.
   created_date:
-    explanation: The date the model paper was released
     value: 2021-12-08
+    explanation: The date the model paper was released
+  url: https://arxiv.org/abs/2112.04482
   datasheet: none
+  modality: text, image
+  size: 70M
+  sample: []
+  analysis: none
+  # Construction
   dependencies:
     - COCO
     - YFCC100M
@@ -44,286 +55,329 @@
     - Wikipedia
     - Conceptual Captions
     - Red Caps
-  description: PMD (Public Multimodal Datasets) is a collection of image-text datasets
-    introduced in the FLAVA work.
+  license:
+    value: unknown
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  included: none
   excluded: YFCC100M is filtered for non-English captions and very short (< 2 word)
     captions.
-  feedback: none
-  included: none
+  quality_control: Beyond filtering mentioned in excluded, nothing further is done.
+  # Downstream
+  access: closed
   intended_uses: unknown
-  license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
-    value: unknown
-  modality:
-    explanation: text, image
-    value: image, text
+  prohibited_uses: unknown
   monitoring: none
-  name: PMD
+  feedback: none
+
+- type: model
+  name: FLAVA
   organization: Meta
-  prohibited_uses: unknown
-  quality_control: Beyond filtering mentioned in excluded, nothing further is done.
-  sample: []
-  size: 70M
-  type: dataset
+  description: FLAVA is a multimodal model composed of an image encoder, text encoder,
+    and multimodal encoder.
+  created_date:
+    value: 2021-12-08
+    explanation: The date the model paper was released
   url: https://arxiv.org/abs/2112.04482
-- access:
-    explanation: "Model checkpoints are available for download from the [[HuggingFace\
-      \ repository]](https://huggingface.co/facebook/flava-full)\n"
-    value: open
+  model_card: https://huggingface.co/facebook/flava-full
+  modality: Text (English) and Image
+  size:
+    value: 306M
+    explanation: >
+      110M (Language encoder) + 86M (Vision encoder) + 110M (mul encoder)
   analysis: FLAVA is benchmarked on a range of vision-only (e.g. CIFAR-10), language-only
     (e.g. GLUE), and multimodal (e.g. Hateful Memes) standard evaluations.
-  created_date:
-    explanation: The date the model paper was released
-    value: 2021-12-08
   dependencies: [PMD]
-  description: FLAVA is a multimodal model composed of an image encoder, text encoder,
-    and multimodal encoder.
-  feedback: https://huggingface.co/facebook/flava-full/discussions
-  intended_uses: "Per the [[HuggingFace repository]](https://huggingface.co/facebook/flava-full),\
-    \ \"The model is intended to serve as a reproducible research artifact for research\
-    \ communities in the light of models whose exact reproduction details are never\
-    \ released such as CLIP and SimVLM.\"\n"
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: FLAVA introduces a variety of new modeling techniques, specifically
+    with an interest in improved text-image alignment through contrastive objectives.
+  access:
+    value: open
+    explanation: >
+      Model checkpoints are available for download from the [[HuggingFace
+      repository]](https://huggingface.co/facebook/flava-full)
   license:
-    explanation: "The license is provided in the [[HuggingFace repository]](https://huggingface.co/facebook/flava-full)\n"
     value: BSD-3-Clause
-  modality:
-    explanation: Text (English) and Image
-    value: image, text; image, text
-  model_card: https://huggingface.co/facebook/flava-full
+    explanation: >
+      The license is provided in the [[HuggingFace repository]](https://huggingface.co/facebook/flava-full)
+  intended_uses: >
+    Per the [[HuggingFace repository]](https://huggingface.co/facebook/flava-full),
+    "The model is intended to serve as a reproducible research artifact for research
+    communities in the light of models whose exact reproduction details are never
+    released such as CLIP and SimVLM."
+  prohibited_uses: >
+    Per the [[HuggingFace repository]](https://huggingface.co/facebook/flava-full),
+    "Any deployed use case of the model - whether commercial or not" - is currently
+    out of scope.
   monitoring: none
-  name: FLAVA
+  feedback: https://huggingface.co/facebook/flava-full/discussions
+
+- type: dataset
+  name: The Galactica Corpus
+  # General
   organization: Meta
-  prohibited_uses: "Per the [[HuggingFace repository]](https://huggingface.co/facebook/flava-full),\
-    \ \"Any deployed use case of the model - whether commercial or not\" - is currently\
-    \ out of scope.\n"
-  quality_control: FLAVA introduces a variety of new modeling techniques, specifically
-    with an interest in improved text-image alignment through contrastive objectives.
-  size: 306M parameters (dense)
-  training_emissions: unknown
-  training_hardware: unknown
-  training_time: unknown
-  type: model
-  url: https://arxiv.org/abs/2112.04482
-- access: closed
-  analysis: none
+  description: The Galactica Corpus is a collection of scientific datasets introduced
+    in the Galactica work.
   created_date:
-    explanation: "The date the Galactica paper was released\n"
     value: 2022-11-15
+    explanation: >
+      The date the Galactica paper was released
+  url: https://galactica.org/static/paper.pdf
   datasheet: none
+  modality: text
+  size: 106B tokens
+  sample: []
+  analysis: none
+  # Construction
   dependencies: [CommonCrawl, Wikipedia, arXiv]
-  description: The Galactica Corpus is a collection of scientific datasets introduced
-    in the Galactica work.
-  excluded: ''
-  feedback: none
+  license:
+    value: unknown
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
   included: Prompts and reasoning data is explicitly included to improve model capabilities
     derived from this data.
+  excluded: ''
+  quality_control: ''
+  # Downstream
+  access: closed
   intended_uses: unknown
-  license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
-    value: unknown
-  modality: text
+  prohibited_uses: unknown
   monitoring: none
-  name: The Galactica Corpus
+  feedback: none
+
+- type: model
+  name: Galactica
   organization: Meta
-  prohibited_uses: unknown
-  quality_control: ''
-  sample: []
-  size: 106B tokens
-  type: dataset
-  url: https://galactica.org/static/paper.pdf
-- access:
-    explanation: Model checkpoints freely available at https://github.com/paperswithcode/galai
-    value: open
-  analysis: ''
+  description: Galactica is a family of autoregressive language models.
   created_date:
-    explanation: "The date the Galactica paper was released\n"
     value: 2022-11-15
-  dependencies: [The Galactica Corpus]
-  description: Galactica is a family of autoregressive language models.
-  feedback: ''
-  intended_uses: ''
-  license:
-    explanation: https://github.com/paperswithcode/galai/blob/main/LICENSE-MODEL.md
-    value: CC BY-NC 4.0
-  modality:
-    explanation: Text (English), Code, Math, Chemistry, Biology
-    value: code, text; code, text
+    explanation: >
+      The date the Galactica paper was released
+  url: https://galactica.org/static/paper.pdf
   model_card: https://huggingface.co/facebook/galactica-6.7b
-  monitoring: ''
-  name: Galactica
-  organization: Meta
-  prohibited_uses: ''
-  quality_control: ''
-  size: 120B parameters (dense)
+  modality: Text (English), Code, Math, Chemistry, Biology
+  analysis: ''
+  size: 120B parameters (dense model)
+  dependencies: [The Galactica Corpus]
   training_emissions: unknown
+  training_time: unknown
   training_hardware: Meta AI Cluster. Trained on 1024 80GB A100 GPUs (128 8xA100
     80GB nodes)
-  training_time: unknown
-  type: model
-  url: https://galactica.org/static/paper.pdf
-- access:
-    explanation: Model weights are available via the [[HuggingFace repository]](https://huggingface.co/facebook/incoder-6B)
+  quality_control: ''
+  access:
     value: open
-  analysis: none
-  created_date:
-    explanation: The date the model paper was released
-    value: 2022-04-12
-  dependencies: []
-  description: InCoder is a language model trained on code with a causal masking
-    objective
-  feedback: ''
-  intended_uses: ''
+    explanation: Model checkpoints freely available at https://github.com/paperswithcode/galai
   license:
-    explanation: The license is provided in the [[HuggingFace repository]](https://huggingface.co/facebook/incoder-6B?text=My+name+is+Lewis+and+I+like+to)
     value: CC BY-NC 4.0
-  modality:
-    explanation: text (English) and code
-    value: code, text; code, text
-  model_card: none
+    explanation: https://github.com/paperswithcode/galai/blob/main/LICENSE-MODEL.md
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: InCoder
   organization: Meta, CMU, TTI-Chicago, UC Berkeley, University of Washington
-  prohibited_uses: ''
-  quality_control: unknown
-  size: 6B parameters (dense)
+  description: InCoder is a language model trained on code with a causal masking
+    objective
+  created_date:
+    value: 2022-04-12
+    explanation: The date the model paper was released
+  url: https://arxiv.org/abs/2204.05999
+  model_card: none
+  modality: text (English) and code
+  analysis: none
+  size: 6B parameters (dense model)
+  dependencies: []
   training_emissions: Unknown
-  training_hardware: 248 V100 GPUs, according to [[the paper]](https://arxiv.org/pdf/2204.05999.pdf)
   training_time: 24 days, according to [[the paper]](https://arxiv.org/pdf/2204.05999.pdf)
-  type: model
-  url: https://arxiv.org/abs/2204.05999
-- access:
-    explanation: The 175B model requires manual approval from Meta to access. Other
-      models are available through HuggingFace.
-    value: limited
-  analysis: ''
-  created_date:
-    explanation: "The date the OPT paper was submitted to Arxiv\n"
-    value: 2022-05-01
-  dependencies: [RoBERTa dataset, The Pile, PushShift.io Reddit]
-  description: OPT is a family of autoregressive language models.
-  feedback: ''
-  intended_uses: ''
+  training_hardware: 248 V100 GPUs, according to [[the paper]](https://arxiv.org/pdf/2204.05999.pdf)
+  quality_control: unknown
+  access:
+    value: open
+    explanation: Model weights are available via the [[HuggingFace repository]](https://huggingface.co/facebook/incoder-6B)
   license:
-    explanation: "All released with the [[OPT-175B License]](https://github.com/facebookresearch/metaseq/blob/main/projects/OPT/MODEL_LICENSE.md),\
-      \ except 66B (TBD) and 17B (requires manual approval)\n"
-    value: OPT-175B License
-  modality:
-    explanation: text (English)
-    value: text; text
-  model_card: https://arxiv.org/pdf/2205.01068.pdf
+    value: CC BY-NC 4.0
+    explanation: The license is provided in the [[HuggingFace repository]](https://huggingface.co/facebook/incoder-6B?text=My+name+is+Lewis+and+I+like+to)
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: OPT
   organization: Meta
-  prohibited_uses: ''
-  quality_control: ''
-  size: 175B parameters (dense)
+  description: OPT is a family of autoregressive language models.
+  created_date:
+    value: 2022-05-01
+    explanation: >
+      The date the OPT paper was submitted to Arxiv
+  url: https://arxiv.org/abs/2205.01068
+  model_card: https://arxiv.org/pdf/2205.01068.pdf
+  modality: text (English)
+  analysis: ''
+  size: 175B parameters (dense model)
+  dependencies: [RoBERTa dataset, The Pile, PushShift.io Reddit]
   training_emissions:
-    explanation: "Estimate by authors for the OPT-175B model only. Not including\
-      \ ablations and baselines.\n"
     value: 75 tCO2e
-  training_hardware: Meta AI cluster. Trained on 992 80GB A100 GPUs
+    explanation: >
+      Estimate by authors for the OPT-175B model only. Not including ablations and
+      baselines.
   training_time: ''
-  type: model
-  url: https://arxiv.org/abs/2205.01068
-- access:
-    explanation: "The datasets involved are public, but the full dataset is not\
-      \ directly available, nor are filtering scripts.\n"
+  training_hardware: Meta AI cluster. Trained on 992 80GB A100 GPUs
+  quality_control: ''
+  access:
     value: limited
-  analysis: ''
+    explanation: The 175B model requires manual approval from Meta to access. Other
+      models are available through HuggingFace.
+  license:
+    value: OPT-175B License
+    explanation: >
+      All released with the
+      [[OPT-175B License]](https://github.com/facebookresearch/metaseq/blob/main/projects/OPT/MODEL_LICENSE.md),
+      except 66B (TBD) and 17B (requires manual approval)
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+
+- type: dataset
+  name: Make-A-Video dataset
+  # General
+  organization: Meta
+  description: >
+    The Make-A-Video dataset is the dataset used to train Make-A-Video, which includes
+    both image-text and video-only datasets with specific and significant filtering.
   created_date:
-    explanation: "The date that Make-A-Video was posted to arXiv [[arXiv]] (https://arxiv.org/abs/2209.14792).\n"
     value: 2022-09-29
+    explanation: >
+      The date that Make-A-Video was posted to arXiv
+      [[arXiv]]
+      (https://arxiv.org/abs/2209.14792).
+  url: https://arxiv.org/pdf/2209.14792.pdf
   datasheet: none
+  modality: video, image-text
+  size: 20M video clips, 2.3B image-text pairs
+  sample: []
+  analysis: ''
+  # Construction
   dependencies: [LAION-5B, WebVid-10M, HD-VILA-100M]
-  description: "The Make-A-Video dataset is the dataset used to train Make-A-Video,\
-    \ which includes both image-text and video-only datasets with specific and significant\
-    \ filtering.\n"
-  excluded: "The LAION-5B dataset is filtered to 2.3B by removing NSFW images using\
-    \ [https://github.com/GantMan/nsfw](https://github.com/GantMan/nsfw), toxic\
-    \ words in text, and images with watermark probability > 0.5. The HD-VILA-100M\
-    \ is randomly subsampled to 10M video clips.\n"
-  feedback:
-    explanation: "No feedback mechanism is mentioned by the authors.\n"
+  license:
     value: none
+    explanation: >
+      No license was found, though the underlying datasets are public and have
+      licenses.
   included:
-    explanation: "Data from the three underlying datasets is filtered, but nothing\
-      \ is included beyond this.\n"
     value: none
+    explanation: >
+      Data from the three underlying datasets is filtered, but nothing is included
+      beyond this.
+  excluded: >
+    The LAION-5B dataset is filtered to 2.3B by removing NSFW images using [https://github.com/GantMan/nsfw](https://github.com/GantMan/nsfw),
+    toxic words in text, and images with watermark probability > 0.5. The HD-VILA-100M
+    is randomly subsampled to 10M video clips.
+  quality_control: >
+    The authors exclude NSFW, toxic, and likely watermarked data from LAION-5B.
+  # Downstream
+  access:
+    value: limited
+    explanation: >
+      The datasets involved are public, but the full dataset is not directly available,
+      nor are filtering scripts.
   intended_uses: unknown
-  license:
-    explanation: "No license was found, though the underlying datasets are public\
-      \ and have licenses.\n"
-    value: none
-  modality:
-    explanation: video, image-text
-    value: image, text, video
+  prohibited_uses: unknown
   monitoring:
-    explanation: "There is no information on how Meta is internally monitoring the\
-      \ use of the dataset.\n"
     value: unknown
-  name: Make-A-Video dataset
+    explanation: >
+      There is no information on how Meta is internally monitoring
+      the use of the dataset.
+  feedback:
+    value: none
+    explanation: >
+      No feedback mechanism is mentioned by the authors.
+
+- type: model
+  name: Make-A-Video
+  # General
   organization: Meta
-  prohibited_uses: unknown
-  quality_control: "The authors exclude NSFW, toxic, and likely watermarked data\
-    \ from LAION-5B.\n"
-  sample: []
-  size: 20M video clips, 2.3B image-text pairs
-  type: dataset
-  url: https://arxiv.org/pdf/2209.14792.pdf
-- access:
-    explanation: "The model has not been released; a form existed to potentially\
-      \ acquire access but is now closed as of 2022-12-07 [[Access Form]](https://docs.google.com/forms/u/0/d/e/1FAIpQLSfMjC57wcXWUDV0UbS2Tn6VhjLEiCXaHvWZuWgWRa-Zx8-Few/closedform).\n"
-    value: closed
-  analysis: "Model performance was evaluated using automated (Frechet Video Distance;\
-    \ Frechet Inception Distance) and human evaluation on two datasets (UCF-101,\
-    \ MSR-VTT) in the zero-shot setting.\n"
+  description: >
+    Make-A-Video is a model for Text-to-Video Generation without Text-Video Data.
   created_date:
-    explanation: "The date that Make-A-Video was posted to arXiv [[arXiv]] (https://arxiv.org/abs/2209.14792).\n"
     value: 2022-09-29
+    explanation: >
+      The date that Make-A-Video was posted to arXiv
+      [[arXiv]]
+      (https://arxiv.org/abs/2209.14792).
+  url: https://arxiv.org/pdf/2209.14792.pdf
+  model_card: none
+  modality: text, video
+  size:
+    value: unknown
+    explanation: >
+      The authors do not state the model size in the paper.
+  analysis: >
+    Model performance was evaluated using automated (Frechet Video Distance; Frechet
+    Inception Distance) and human evaluation on two datasets (UCF-101, MSR-VTT)
+    in the zero-shot setting.
+  # Construction
   dependencies: [Make-A-Video dataset]
-  description: "Make-A-Video is a model for Text-to-Video Generation without Text-Video\
-    \ Data.\n"
-  feedback:
-    explanation: "Authors do not mention or provide a feedback mechanism.\n"
-    value: none
-  intended_uses:
-    explanation: "Authors do not report the intended uses.\n"
+  training_emissions:
     value: unknown
-  license:
-    explanation: No license was found.
-    value: none
-  modality:
-    explanation: text, video
-    value: text, video; text, video
-  model_card: none
-  monitoring:
-    explanation: "Authors do not report the monitoring process for Make-A-Video\
-      \ internally at Meta.\n"
+    explanation: >
+      Authors do not report the training emissions.
+  training_time:
     value: unknown
-  name: Make-A-Video
-  organization: Meta
-  prohibited_uses:
-    explanation: "Authors do not report the prohibited uses.\n"
+    explanation: >
+      Authors do not report the training time.
+  training_hardware:
     value: unknown
+    explanation: >
+      Authors do not report the training hardware or provider.
   quality_control:
-    explanation: "Authors do not report specific quality control steps taken in\
-      \ modeling, though filtering is done in producing the Make-A-Video dataset.\n"
     value: none
-  size: unknown
-  training_emissions:
-    explanation: "Authors do not report the training emissions.\n"
+    explanation: >
+      Authors do not report specific quality control steps taken in modeling, though
+      filtering is done in producing the Make-A-Video dataset.
+  # Downstream
+  access:
+    value: closed
+    explanation: >
+      The model has not been released; a form existed to potentially acquire access
+      but is now closed as of 2022-12-07
+      [[Access Form]](https://docs.google.com/forms/u/0/d/e/1FAIpQLSfMjC57wcXWUDV0UbS2Tn6VhjLEiCXaHvWZuWgWRa-Zx8-Few/closedform).
+  license:
+    value: none
+    explanation: No license was found.
+  intended_uses:
     value: unknown
-  training_hardware:
-    explanation: "Authors do not report the training hardware or provider.\n"
+    explanation: >
+      Authors do not report the intended uses.
+  prohibited_uses:
     value: unknown
-  training_time:
-    explanation: "Authors do not report the training time.\n"
+    explanation: >
+      Authors do not report the prohibited uses.
+  monitoring:
     value: unknown
-  type: model
-  url: https://arxiv.org/pdf/2209.14792.pdf
-- access: open
-  analysis: ''
+    explanation: >
+      Authors do not report the monitoring process for Make-A-Video internally at
+      Meta.
+  feedback:
+    value: none
+    explanation: >
+      Authors do not mention or provide a feedback mechanism.
+
+- type: model
+  name: LLaMA
+  organization: Meta
+  description: ''
   created_date: 2023-02-24
+  url: https://arxiv.org/abs/2302.13971
+  model_card: ''
+  modality: Text
+  analysis: ''
+  size: 65B parameters (dense model)
   dependencies:
     - CommonCrawl
     - C4
@@ -332,311 +386,306 @@
     - BooksCorpus
     - arXiv
     - StackExchange
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: open
   license: LLaMa License (model weights), GPLv3 (code)
-  modality:
-    explanation: Text
-    value: text; text
-  model_card: ''
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: LLaMA
+  feedback: ''
+
+- type: model
+  name: LLaMA 2
   organization: Meta
-  prohibited_uses: ''
-  quality_control: ''
-  size: 65B parameters (dense)
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
-  url: https://arxiv.org/abs/2302.13971
-- access: open
-  analysis: Evaluated on standard academic benchmarks and internal Meta libraries.
-  created_date: 2023-07-18
-  dependencies: []
   description: LLaMA 2 is an updated version of LLaMA trained on a new mix of publicly
     available data.
-  feedback: ''
+  created_date: 2023-07-18
+  url: https://ai.meta.com/resources/models-and-libraries/llama/
+  model_card: Can be found at appendix of paper at https://ai.meta.com/research/publications/llama-2-open-foundation-and-fine-tuned-chat-models/
+  modality: text
+  analysis: Evaluated on standard academic benchmarks and internal Meta libraries.
+  size: 70B parameters (dense model)
+  dependencies: []
+  training_emissions: 539 tCO2eq
+  training_time: ''
+  training_hardware: NVIDIA A100-80GB GPUs (TDP of 350-400W)
+  quality_control: ''
+  access: open
+  license:
+    value: custom
+    explanation: The license can be found at https://ai.meta.com/resources/models-and-libraries/llama-downloads/
   intended_uses: LLaMA 2 is intended for commercial and research use in English.
     Tuned models are intended for assistant-like chat, whereas pretrained models
     can be adapted for a variety of natural language generation tasks.
-  license:
-    explanation: The license can be found at https://ai.meta.com/resources/models-and-libraries/llama-downloads/
-    value: custom
-  modality:
-    explanation: text
-    value: text; text
-  model_card: Can be found at appendix of paper at https://ai.meta.com/research/publications/llama-2-open-foundation-and-fine-tuned-chat-models/
-  monitoring: ''
-  name: LLaMA 2
-  organization: Meta
   prohibited_uses: Use in any manner that violates applicable laws or regulations
     (including trade compliance laws). Use in languages other than English. Use
     in any other way that is prohibited by the Acceptable Use Policy and Licensing
     Agreement for LLaMA 2.
-  quality_control: ''
-  size: 70B parameters (dense)
-  training_emissions: 539 tCO2eq
-  training_hardware: NVIDIA A100-80GB GPUs (TDP of 350-400W)
-  training_time: ''
-  type: model
-  url: https://ai.meta.com/resources/models-and-libraries/llama/
-- access: open
-  analysis: ''
-  created_date: 2022-12-22
-  dependencies: [OPT, OPT-IML Bench]
-  description: ''
-  feedback: ''
-  intended_uses: ''
-  license: OPT-IML 175B License
-  modality:
-    explanation: text
-    value: text; text
-  model_card: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: OPT-IML
   organization: Meta
-  prohibited_uses: ''
-  quality_control: ''
+  description: ''
+  created_date: 2022-12-22
+  url: https://arxiv.org/abs/2212.12017
+  model_card: ''
+  modality: text
+  analysis: ''
   size: 175B parameters (dense)
+  dependencies: [OPT, OPT-IML Bench]
   training_emissions: ''
-  training_hardware: ''
   training_time: ''
-  type: model
-  url: https://arxiv.org/abs/2212.12017
-- access:
-    explanation: "The full dataset can be downloaded at [[SA-1B Download]](https://ai.facebook.com/datasets/segment-anything-downloads/).\
-      \ A 50k image preview of the full dataset is available [[here]](https://segment-anything.com/dataset/index.html).\n"
-    value: open
-  analysis: ''
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license: OPT-IML 175B License
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+
+- type: dataset
+  name: SA-1B
+  # General
+  organization: Meta
+  description: >
+    SA-1B (Segment Anything 1 Billion) is a dataset designed for training general-purpose
+    object segmentation models from open world images. It consists of 11M diverse,
+    high-resolution, privacy protecting images and 1.1B high-quality segmentation
+    masks.
   created_date:
+    value: 2023-04-05
     explanation: The date the [[Meta blog post]](https://ai.facebook.com/blog/segment-anything-foundation-model-image-segmentation/)
       was released.
-    value: 2023-04-05
+  url: https://ai.facebook.com/datasets/segment-anything/
   datasheet:
+    value: https://arxiv.org/pdf/2304.02643.pdf#page=25
     explanation: Datasheet can be found in the Appendix section of the Segment Anything
       paper.
-    value: https://arxiv.org/pdf/2304.02643.pdf#page=25
+  modality: image
+  size: 11M images, 1.1B mask annotations
+  sample: []
+  analysis: ''
+  # Construction
   dependencies: []
-  description: "SA-1B (Segment Anything 1 Billion) is a dataset designed for training\
-    \ general-purpose object segmentation models from open world images. It consists\
-    \ of 11M diverse, high-resolution, privacy protecting images and 1.1B high-quality\
-    \ segmentation masks.\n"
-  excluded:
-    explanation: See [[Datasheet]](https://arxiv.org/pdf/2304.02643.pdf#page=25)
-    value: "\"We withheld ~2k randomly selected images for testing purposes.\" \
-      \ \"Each image is accompanied by a short caption that describes the content\
-      \ and place of the photo in a free form text. Per our agreement with the photo\
-      \ provider we are not allowed to release these captions.\"\n"
-  feedback: Feedback can be given via the feedback form on their website [segment-anything.com](https://segment-anything.com/)
-    or by emailing at segment-anything at meta.com.
+  license:
+    value: SA-1B Dataset Research License
+    explanation: SA-1B is released under a favorable license agreement for certain
+      research uses and with protections for researchers. See [[SA-1B Dataset Research
+      License]](https://ai.facebook.com/datasets/segment-anything-downloads/).
   included:
+    value: >
+      SA-1B consists of 11M diverse, high-resolution (averaging 1500×2250 pixels),
+      and privacy protecting images collected and licensed from a third party photo
+      company. The images are photos taken from a camera, i.e. not artwork.
+      The images vary in subject matter. Common themes of the images include: locations,
+      objects, scenes.
+      The dataset includes 1.1B high-quality segmentation masks collected with the
+      Segment Anything Data Engine.
+      SA-1B only includes automatically generated masks (99.1%), as the authors
+      conclude after experiments that the automatic masks are high quality and effective
+      for training models. The masks range from large scale objects such as buildings
+      to fine grained details such as door handles.
+      Masks are provided in the COCO run-length encoding (RLE) annotation format.
     explanation: According to section [[Segment Anything Dataset]](https://arxiv.org/pdf/2304.02643.pdf#section.5)
       of the paper and [[SA-1B website]](https://ai.facebook.com/datasets/segment-anything/).
-    value: "SA-1B consists of 11M diverse, high-resolution (averaging 1500×2250\
-      \ pixels), and privacy protecting images collected and licensed from a third\
-      \ party photo company. The images are photos taken from a camera, i.e. not\
-      \ artwork. The images vary in subject matter. Common themes of the images\
-      \ include: locations, objects, scenes. The dataset includes 1.1B high-quality\
-      \ segmentation masks collected with the Segment Anything Data Engine. SA-1B\
-      \ only includes automatically generated masks (99.1%), as the authors conclude\
-      \ after experiments that the automatic masks are high quality and effective\
-      \ for training models. The masks range from large scale objects such as buildings\
-      \ to fine grained details such as door handles. Masks are provided in the\
-      \ COCO run-length encoding (RLE) annotation format.\n"
+  excluded:
+    value: >
+      "We withheld ~2k randomly selected images for testing purposes."  "Each image
+      is accompanied by a short caption that describes the content and place of
+      the photo in a free form text. Per our agreement with the photo provider we
+      are not allowed to release these captions."
+    explanation: See [[Datasheet]](https://arxiv.org/pdf/2304.02643.pdf#page=25)
+  quality_control:
+    value: |
+      - Dataset quality:
+        Due to potential accessibility and storage challenges, the original high-resolution images (averaging 3300×4950 pixels) were downsampled to an average resolution of 1500×2250 pixels. Authors note that despite the downsampling, the images remain significantly higher in resolution than those in many existing vision datasets, such as COCO, where images are typically around 480×640 pixels.
+        The images were processed to blur faces and license plates to protect the identities of those in the image.
+        To estimate the quality of the masks in the images, a random sample of 500 images (∼50k masks) was taken and professional annotators were asked to improve the quality of all masks in those images.
+      - Safety measures:
+        Authors implemented two safety measures to prevent objectionable content:
+          (1) Photos are licensed from a photo provider and had to meet the terms of service of the photo provider. Authors requested that all objectionable content be filtered from the images they licensed.
+          (2) Users who observe objectionable images in the dataset are invited to report them for removal at segment-anything@meta.com.
+        Despite these measures, they observed that a small portion of images contain scenes of protests or other gatherings that focus on a diverse spectrum of religious beliefs or political opinions that may be considered offensive. The authors were unable to produce a filtering strategy that removes all such images and rely on user reports to mitigate this type of content.
+    explanation: According to sections [[Segment Anything Dataset]](https://arxiv.org/pdf/2304.02643.pdf#section.5)
+      and [[Datasheet]](https://arxiv.org/pdf/2304.02643.pdf#page=25) of the paper.
+  # Downstream
+  access:
+    value: open
+    explanation: >
+      The full dataset can be downloaded at [[SA-1B Download]](https://ai.facebook.com/datasets/segment-anything-downloads/).
+      A 50k image preview of the full dataset is available [[here]](https://segment-anything.com/dataset/index.html).
   intended_uses:
-    explanation: See [[SA-1B website]](https://ai.facebook.com/datasets/segment-anything/)
     value: SA-1B is intended to be used for research purposes only. It allows access
       to a privacy protecting and copyright friendly large-scale image dataset.
       Researchers can use it to train and evaluate generic object segmentation models.
-  license:
-    explanation: SA-1B is released under a favorable license agreement for certain
-      research uses and with protections for researchers. See [[SA-1B Dataset Research
-      License]](https://ai.facebook.com/datasets/segment-anything-downloads/).
-    value: SA-1B Dataset Research License
-  modality: image
+    explanation: See [[SA-1B website]](https://ai.facebook.com/datasets/segment-anything/)
+  prohibited_uses:
+    value: >
+      Authors note the following limitations of the dataset:
+        The masks are generated by a segmentation model, so there may be errors
+      or inconsistencies in the masks.
+        While no two images are the same, there are instances of images of the same
+      subject taken close together in time.
+        The dataset contains scenes of protests, or other gatherings that may suggest
+      religious beliefs, political opinions or union memberships that may be offensive.
+
+    explanation: See [[Datasheet]](https://arxiv.org/pdf/2304.02643.pdf#page=25)
   monitoring:
+    value: >
+      The dataset will be hosted at https://ai.facebook.com/datasets/segment-anything
+      and maintained by Meta AI.
+      "If a user observes objectionable image(s) in the dataset, we invite them
+      to report the image(s) at segment-anything at meta.com for removal"
+      "To aid reproducibility of research using SA-1B, the only updates (to the
+      dataset) will be to remove reported images."
+      "We encourage users to gather further annotations for SA-1B. Any users who
+      generate annotations will be liable for hosting and distributing their annotations."
     explanation: See [[Datasheet]](https://arxiv.org/pdf/2304.02643.pdf#page=25)
-    value: "The dataset will be hosted at https://ai.facebook.com/datasets/segment-anything\
-      \ and maintained by Meta AI. \"If a user observes objectionable image(s) in\
-      \ the dataset, we invite them to report the image(s) at segment-anything at\
-      \ meta.com for removal\" \"To aid reproducibility of research using SA-1B,\
-      \ the only updates (to the dataset) will be to remove reported images.\" \"\
-      We encourage users to gather further annotations for SA-1B. Any users who\
-      \ generate annotations will be liable for hosting and distributing their annotations.\"\
-      \n"
-  name: SA-1B
+  feedback: Feedback can be given via the feedback form on their website [segment-anything.com](https://segment-anything.com/)
+    or by emailing at segment-anything at meta.com.
+
+- type: model
+  name: SAM
+  # General
   organization: Meta
-  prohibited_uses:
-    explanation: See [[Datasheet]](https://arxiv.org/pdf/2304.02643.pdf#page=25)
-    value: "Authors note the following limitations of the dataset:\n  The masks\
-      \ are generated by a segmentation model, so there may be errors\nor inconsistencies\
-      \ in the masks.\n  While no two images are the same, there are instances of\
-      \ images of the same\nsubject taken close together in time.\n  The dataset\
-      \ contains scenes of protests, or other gatherings that may suggest\nreligious\
-      \ beliefs, political opinions or union memberships that may be offensive.\n"
-  quality_control:
-    explanation: According to sections [[Segment Anything Dataset]](https://arxiv.org/pdf/2304.02643.pdf#section.5)
-      and [[Datasheet]](https://arxiv.org/pdf/2304.02643.pdf#page=25) of the paper.
-    value: "- Dataset quality:\n  Due to potential accessibility and storage challenges,\
-      \ the original high-resolution images (averaging 3300×4950 pixels) were downsampled\
-      \ to an average resolution of 1500×2250 pixels. Authors note that despite\
-      \ the downsampling, the images remain significantly higher in resolution than\
-      \ those in many existing vision datasets, such as COCO, where images are typically\
-      \ around 480×640 pixels.\n  The images were processed to blur faces and license\
-      \ plates to protect the identities of those in the image.\n  To estimate the\
-      \ quality of the masks in the images, a random sample of 500 images (∼50k\
-      \ masks) was taken and professional annotators were asked to improve the quality\
-      \ of all masks in those images.\n- Safety measures:\n  Authors implemented\
-      \ two safety measures to prevent objectionable content:\n    (1) Photos are\
-      \ licensed from a photo provider and had to meet the terms of service of the\
-      \ photo provider. Authors requested that all objectionable content be filtered\
-      \ from the images they licensed.\n    (2) Users who observe objectionable\
-      \ images in the dataset are invited to report them for removal at segment-anything@meta.com.\n\
-      \  Despite these measures, they observed that a small portion of images contain\
-      \ scenes of protests or other gatherings that focus on a diverse spectrum\
-      \ of religious beliefs or political opinions that may be considered offensive.\
-      \ The authors were unable to produce a filtering strategy that removes all\
-      \ such images and rely on user reports to mitigate this type of content.\n"
-  sample: []
-  size: 11M images, 1.1B mask annotations
-  type: dataset
-  url: https://ai.facebook.com/datasets/segment-anything/
-- access:
-    explanation: "Inference code and model checkpoints are available on the model's\
-      \ [[GitHub repository]](https://github.com/facebookresearch/segment-anything).\
-      \ Its training dataset SA-1B can be used for research purposes and is available\
-      \ for download [here](https://ai.facebook.com/datasets/segment-anything-downloads/).\n"
-    value: open
-  analysis:
-    explanation: See [[Zero-Shot Transfer Experiments]](https://arxiv.org/pdf/2304.02643.pdf#section.7)
-      for more details.
-    value: "\"We extensively evaluate SAM. First, using a diverse new suite of 23\
-      \ segmentation datasets, we find that SAM produces high-quality masks from\
-      \ a single foreground point, often only slightly below that of the manually\
-      \ annotated ground truth. Second, we find consistently strong quantitative\
-      \ and qualitative results on a variety of downstream tasks under a zero-shot\
-      \ transfer protocol using prompt engineering, including edge detection, object\
-      \ proposal generation, instance segmentation, and a preliminary exploration\
-      \ of text-to-mask prediction.\"\n"
-  created_date:
-    explanation: The date the [[Meta blog post]](https://ai.facebook.com/blog/segment-anything-foundation-model-image-segmentation/)
-      was released.
-    value: 2023-04-05
-  dependencies: [SA-1B]
   description: SAM (Segment Anything Model) is a foundation model for image segmentation.
     The model is designed and trained to be promptable, and supports flexible prompts
     (point, box, mask and free-form text) to compute masks in real-time to allow
     interactive use.
-  feedback: Feedback can be given via the feedback form on their website [segment-anything.com](https://segment-anything.com/)
-    or by emailing at segment-anything at meta.com.
-  intended_uses:
-    explanation: See [[Model card]](https://arxiv.org/pdf/2304.02643.pdf#page=28)
-    value: "\"SAM is intended to be used for any prompt-based segmentation task.\
-      \ We explored its use in segmenting objects from a point, edge detection,\
-      \ segmenting all objects, and segmenting detected objects. We explored how\
-      \ SAM can integrate with other vision models to segment objects from text.\"\
-      \n"
-  license:
-    explanation: See [[LICENSE]](https://github.com/facebookresearch/segment-anything/blob/main/LICENSE)
-    value: Apache 2.0
-  modality:
-    explanation: image, text
-    value: image, text; image, text
+  created_date:
+    value: 2023-04-05
+    explanation: The date the [[Meta blog post]](https://ai.facebook.com/blog/segment-anything-foundation-model-image-segmentation/)
+      was released.
+  url: https://arxiv.org/pdf/2304.02643.pdf
   model_card:
-    explanation: Model card can be found in the Appendix section of the paper.
     value: https://arxiv.org/pdf/2304.02643.pdf#page=28
-  monitoring: ''
-  name: SAM
-  organization: Meta
-  prohibited_uses:
-    explanation: See [[Discussion]](https://arxiv.org/pdf/2304.02643.pdf#section.8)
-    value: "For out-of-scope use cases see terms of use in [[LICENSE]](https://github.com/facebookresearch/segment-anything/blob/main/LICENSE).\
-      \ Authors also discuss the following limitations of the model: \"While SAM\
-      \ performs well in general, it is not perfect. It can miss fine structures,\
-      \ hallucinates small disconnected components at times, and does not produce\
-      \ boundaries as crisply as more computationally intensive methods that “zoom-in”,\
-      \ e.g. [18]. In general, we expect dedicated interactive segmentation methods\
-      \ to outperform SAM when many points are provided, e.g. [67]. Unlike these\
-      \ methods, SAM is designed for generality and breadth of use rather than high\
-      \ IoU interactive segmentation. Moreover, SAM can process prompts in real-time,\
-      \ but nevertheless SAM's overall performance is not real-time when using a\
-      \ heavy image encoder. Our foray into the text-to-mask task is exploratory\
-      \ and not entirely robust, although we believe it can be improved with more\
-      \ effort. While SAM can perform many tasks, it is unclear how to design simple\
-      \ prompts that implement semantic and panoptic segmentation. Finally, there\
-      \ are domain-specific tools, such as [7], that we expect to outperform SAM\
-      \ in their respective domains.\"\n"
-  quality_control:
-    explanation: See [[Segment Anything RAI Analysis]](https://arxiv.org/pdf/2304.02643.pdf#section.6)
+    explanation: Model card can be found in the Appendix section of the paper.
+  modality: image, text
+  analysis:
+    value: >
+      "We extensively evaluate SAM.
+      First, using a diverse new suite of 23 segmentation datasets, we find that
+      SAM produces high-quality masks from a single foreground point, often only
+      slightly below that of the manually annotated ground truth.
+      Second, we find consistently strong quantitative and qualitative results on
+      a variety of downstream tasks under a zero-shot transfer protocol using prompt
+      engineering, including edge detection, object proposal generation, instance
+      segmentation, and a preliminary exploration of text-to-mask prediction."
+    explanation: See [[Zero-Shot Transfer Experiments]](https://arxiv.org/pdf/2304.02643.pdf#section.7)
       for more details.
-    value: "\"We perform a Responsible AI (RAI) analysis of our work by investigating\
-      \ potential fairness concerns and biases when using SA-1B and SAM. We focus\
-      \ on the geographic and income distribution of SA-1B and fairness of SAM across\
-      \ protected attributes of people.\"\n"
   size: unknown
+  # Construction
+  dependencies: [SA-1B]
   training_emissions:
-    explanation: See [[Model card]](https://arxiv.org/pdf/2304.02643.pdf#page=28)
     value: 2.8 metric tons of carbon dioxide
-  training_hardware:
     explanation: See [[Model card]](https://arxiv.org/pdf/2304.02643.pdf#page=28)
-    value: 256 A100 GPUs
   training_time:
-    explanation: See [[Model card]](https://arxiv.org/pdf/2304.02643.pdf#page=28)
     value: 68 hours
-  type: model
-  url: https://arxiv.org/pdf/2304.02643.pdf
-- access: closed
+    explanation: See [[Model card]](https://arxiv.org/pdf/2304.02643.pdf#page=28)
+  training_hardware:
+    value: 256 A100 GPUs
+    explanation: See [[Model card]](https://arxiv.org/pdf/2304.02643.pdf#page=28)
+  quality_control:
+    value: >
+      "We perform a Responsible AI (RAI) analysis of our work by investigating potential
+      fairness concerns and biases when using SA-1B and SAM. We focus on the geographic
+      and income distribution of SA-1B and fairness of SAM across protected attributes
+      of people."
+    explanation: See [[Segment Anything RAI Analysis]](https://arxiv.org/pdf/2304.02643.pdf#section.6)
+      for more details.
+  # Downstream
+  access:
+    value: open
+    explanation: >
+      Inference code and model checkpoints are available on the model's
+      [[GitHub repository]](https://github.com/facebookresearch/segment-anything).
+      Its training dataset SA-1B can be used for research purposes and is available
+      for download [here](https://ai.facebook.com/datasets/segment-anything-downloads/).
+  license:
+    value: Apache 2.0
+    explanation: See [[LICENSE]](https://github.com/facebookresearch/segment-anything/blob/main/LICENSE)
+  intended_uses:
+    value: >
+      "SAM is intended to be used for any prompt-based segmentation task. We explored
+      its use in segmenting objects from a point, edge detection, segmenting all
+      objects, and segmenting detected objects. We explored how SAM can integrate
+      with other vision models to segment objects from text."
+    explanation: See [[Model card]](https://arxiv.org/pdf/2304.02643.pdf#page=28)
+  prohibited_uses:
+    value: >
+      For out-of-scope use cases see terms of use in [[LICENSE]](https://github.com/facebookresearch/segment-anything/blob/main/LICENSE).
+      Authors also discuss the following limitations of the model: "While SAM performs
+      well in general, it is not perfect. It can miss fine structures, hallucinates
+      small disconnected components at times, and does not produce boundaries as
+      crisply as more computationally intensive methods that “zoom-in”, e.g. [18].
+      In general, we expect dedicated interactive segmentation methods to outperform
+      SAM when many points are provided, e.g. [67]. Unlike these methods, SAM is
+      designed for generality and breadth of use rather than high IoU interactive
+      segmentation. Moreover, SAM can process prompts in real-time, but nevertheless
+      SAM's overall performance is not real-time when using a heavy image encoder.
+      Our foray into the text-to-mask task is exploratory and not entirely robust,
+      although we believe it can be improved with more effort. While SAM can perform
+      many tasks, it is unclear how to design simple prompts that implement semantic
+      and panoptic segmentation. Finally, there are domain-specific tools, such
+      as [7], that we expect to outperform SAM in their respective domains."
+    explanation: See [[Discussion]](https://arxiv.org/pdf/2304.02643.pdf#section.8)
+  monitoring: ''
+  feedback: Feedback can be given via the feedback form on their website [segment-anything.com](https://segment-anything.com/)
+    or by emailing at segment-anything at meta.com.
+
+- type: model
+  name: Voicebox
+  organization: Meta
+  description: Voicebox is the first generative AI model for speech to generalize
+    across tasks with state-of-the-art performance.
+  created_date: 2023-06-16
+  url: https://research.facebook.com/publications/voicebox-text-guided-multilingual-universal-speech-generation-at-scale/
+  model_card: ''
+  modality: text and audio input and audio output
   analysis: Evaluated on zero-shot text-to-speech benchmarks, with Voicebox outperforming
     the current state-of-the-art English model VALL-E.
-  created_date: 2023-06-16
+  size: 330M parameters
   dependencies: []
-  description: Voicebox is the first generative AI model for speech to generalize
-    across tasks with state-of-the-art performance.
-  feedback: ''
-  intended_uses: ''
+  training_emissions: unknown
+  training_time: 750,000 iterations
+  training_hardware: 32 GPUs of unspecified type
+  quality_control: ''
+  access: closed
   license: ''
-  modality:
-    explanation: text and audio input and audio output
-    value: audio, text; audio
-  model_card: ''
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: Voicebox
+  feedback: ''
+
+- type: model
+  name: PEER
   organization: Meta
-  prohibited_uses: ''
-  quality_control: ''
-  size: 330M parameters (dense)
-  training_emissions: unknown
-  training_hardware: 32 GPUs of unspecified type
-  training_time: 750,000 iterations
-  type: model
-  url: https://research.facebook.com/publications/voicebox-text-guided-multilingual-universal-speech-generation-at-scale/
-- access: open
-  analysis: PEER is evaluated on core research questions intended to gauge language
-    understanding, proper use of citations, instruction following, and iterative
-    use.
-  created_date: 2022-08-24
-  dependencies: []
   description: PEER is a collaborative language model that is trained to imitate
     the entire writing process itself. PEER can write drafts, add suggestions, propose
     edits and provide explanations for its actions.
-  feedback: ''
-  intended_uses: adapting LLMs to work with collaborative writing and updating.
-  license: ''
-  modality:
-    explanation: natural language text
-    value: text; text
+  created_date: 2022-08-24
+  url: https://arxiv.org/pdf/2208.11663.pdf
   model_card: ''
-  monitoring: ''
-  name: PEER
-  organization: Meta
-  prohibited_uses: ''
-  quality_control: Heuristics and edit filtering was used on data set, which consisted
-    mostly of Wikipedia pages.
-  size: 3B parameters (dense)
+  modality: natural language text
+  analysis: PEER is evaluated on core research questions intended to gauge language
+    understanding, proper use of citations, instruction following, and iterative
+    use.
+  size: 3B parameters
+  dependencies: []
   training_emissions: ''
-  training_hardware: 64 GPUs
   training_time: ''
-  type: model
-  url: https://arxiv.org/pdf/2208.11663.pdf
+  training_hardware: 64 GPUs
+  quality_control: Heuristics and edit filtering was used on data set, which consisted
+    mostly of Wikipedia pages.
+  access: open
+  license: ''
+  intended_uses: adapting LLMs to work with collaborative writing and updating.
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/microsoft.yaml b/assets/microsoft.yaml
index 773e3bb4..8d324607 100644
--- a/assets/microsoft.yaml
+++ b/assets/microsoft.yaml
@@ -1,13 +1,17 @@
 ---
-- access:
-    explanation: Microsoft does not provide VLMo to external researchers. One author
-      commented that code would be pushed "soon" in [[November of 2021]](https://github.com/microsoft/unilm/issues/532),
-      but the repository does not contain relevant changes.
-    value: closed
-  analysis: ''
+
+- type: model
+  name: VLMo
+  organization: Microsoft
+  description: VLMo is a model for text-to-image generation
   created_date:
-    explanation: The date the model paper was released
     value: 2021-11-03
+    explanation: The date the model paper was released
+  url: https://arxiv.org/abs/2111.02358
+  model_card: none
+  modality: text (English) and image
+  size: 562M
+  analysis: ''
   dependencies:
     - Conceptual Captions
     - SBU Captions
@@ -15,263 +19,300 @@
     - Visual Genome
     - Wikipedia
     - BooksCorpus
-  description: VLMo is a model for text-to-image generation
-  feedback: ''
-  intended_uses: ''
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access:
+    value: closed
+    explanation: Microsoft does not provide VLMo to external researchers. One author
+      commented that code would be pushed "soon" in [[November of 2021]](https://github.com/microsoft/unilm/issues/532),
+      but the repository does not contain relevant changes.
   license:
-    explanation: No license was found.
     value: none
-  modality:
-    explanation: text (English) and image
-    value: image, text; image, text
-  model_card: none
+    explanation: No license was found.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: VLMo
+  feedback: ''
+
+- type: model
+  name: T-ULRv5
   organization: Microsoft
-  prohibited_uses: ''
-  quality_control: ''
-  size: 562M parameters (dense)
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
-  url: https://arxiv.org/abs/2111.02358
-- access:
-    explanation: Manual approval through early access request form required.
-    value: limited
-  analysis: ''
+  description: T-ULRv5 is a language model trained with two unique training objectives
   created_date:
-    explanation: Date model blog post was released
     value: 2022-09-28
-  dependencies: []
-  description: T-ULRv5 is a language model trained with two unique training objectives
-  feedback: ''
-  intended_uses: ''
-  license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
-    value: unknown
-  modality:
-    explanation: Text (English, 93 other languages)
-    value: text; text
+    explanation: Date model blog post was released
+  url: https://www.microsoft.com/en-us/research/blog/microsoft-turing-universal-language-representation-model-t-ulrv5-tops-xtreme-leaderboard-and-trains-100x-faster/
   model_card: ''
-  monitoring: ''
-  name: T-ULRv5
-  organization: Microsoft
-  prohibited_uses: ''
-  quality_control: ''
-  size: 2.2B parameters (dense)
+  modality: Text (English, 93 other languages)
+  size: 2.2B parameters (dense model)
+  analysis: ''
+  dependencies: []
   training_emissions: ''
-  training_hardware: 256 A100
   training_time: Less than two weeks
-  type: model
-  url: https://www.microsoft.com/en-us/research/blog/microsoft-turing-universal-language-representation-model-t-ulrv5-tops-xtreme-leaderboard-and-trains-100x-faster/
-- access:
-    explanation: Manual approval through early access request form required.
+  training_hardware: 256 A100
+  quality_control: ''
+  access:
     value: limited
-  analysis: ''
-  created_date:
-    explanation: Date model blog post was released
-    value: 2021-12-02
-  dependencies: []
-  description: ''
-  feedback: ''
-  intended_uses: ''
+    explanation: Manual approval through early access request form required.
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: text (English)
-    value: text; text
-  model_card: ''
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: Turing NLR-v5
   organization: Microsoft
-  prohibited_uses: ''
-  quality_control: ''
-  size: 5B parameters (dense)
+  description: ''
+  created_date:
+    value: 2021-12-02
+    explanation: Date model blog post was released
+  url: https://www.microsoft.com/en-us/research/blog/efficiently-and-effectively-scaling-up-language-model-pretraining-for-best-language-representation-model-on-glue-and-superglue/?OCID=msr_blog_TNLRV5_tw
+  model_card: ''
+  modality: text (English)
+  analysis: ''
+  size: 5B parameters (dense model)
+  dependencies: []
   training_emissions: ''
-  training_hardware: ''
   training_time: ''
-  type: model
-  url: https://www.microsoft.com/en-us/research/blog/efficiently-and-effectively-scaling-up-language-model-pretraining-for-best-language-representation-model-on-glue-and-superglue/?OCID=msr_blog_TNLRV5_tw
-- access:
-    explanation: Megatron-Turing NLG can be accessed through the [[Turing Academic
-      Program]](https://www.microsoft.com/en-us/research/collaboration/microsoft-turing-academic-program/)
+  training_hardware: ''
+  quality_control: ''
+  access:
     value: limited
-  analysis: ''
-  created_date:
-    explanation: "The date of the Microsoft Research blog announcing MT-NLG [[Microsoft\
-      \ Research Blog]](https://www.microsoft.com/en-us/research/blog/using-deepspeed-and-megatron-to-train-megatron-turing-nlg-530b-the-worlds-largest-and-most-powerful-generative-language-model/).\n"
-    value: 2022-01-28
-  dependencies: [The Pile]
-  description: "Megatron-Turing NLG is a 530B parameter autoregressive language\
-    \ model.\n"
-  feedback: ''
-  intended_uses: ''
+    explanation: Manual approval through early access request form required.
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: text (English)
-    value: text; text
-  model_card: ''
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: Megatron-Turing NLG
   organization: Microsoft, NVIDIA
-  prohibited_uses: ''
-  quality_control: ''
-  size: 530B parameters (dense)
-  training_emissions: ''
-  training_hardware: 4480 A100s (560 x 8)
-  training_time: ''
-  type: model
+  description: >
+    Megatron-Turing NLG is a 530B parameter autoregressive language model.
+  created_date:
+    value: 2022-01-28
+    explanation: >
+      The date of the Microsoft Research blog announcing MT-NLG
+      [[Microsoft Research Blog]](https://www.microsoft.com/en-us/research/blog/using-deepspeed-and-megatron-to-train-megatron-turing-nlg-530b-the-worlds-largest-and-most-powerful-generative-language-model/).
   url: https://arxiv.org/abs/2201.11990
-- access:
-    explanation: Microsoft does not provide public access to VALL-E
-    value: closed
+  model_card: ''
+  modality: text (English)
+  size: 530B parameters (dense model)
   analysis: ''
-  created_date:
-    explanation: The date the [[model paper]](https://arxiv.org/abs/2301.02111)
-      was released
-    value: 2023-01-05
-  dependencies: []
-  description: Vall-E is a neural code model for text-to-speech synthesis
-  feedback: ''
-  intended_uses: ''
+  dependencies: [The Pile]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: 4480 A100s (560 x 8)
+  quality_control: ''
+  access:
+    value: limited
+    explanation: Megatron-Turing NLG can be accessed through the [[Turing Academic
+      Program]](https://www.microsoft.com/en-us/research/collaboration/microsoft-turing-academic-program/)
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: text, audio
-    value: audio, text; audio, text
-  model_card: none
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: VALL-E
   organization: Microsoft
-  prohibited_uses: ''
-  quality_control: ''
+  description: Vall-E is a neural code model for text-to-speech synthesis
+  created_date:
+    value: 2023-01-05
+    explanation: The date the [[model paper]](https://arxiv.org/abs/2301.02111)
+      was released
+  url: https://valle-demo.github.io/
+  model_card: none
+  modality: text, audio
   size: unknown
+  analysis: ''
+  dependencies: []
   training_emissions: ''
-  training_hardware: 16 V100 32GB GPUs
   training_time: ''
-  type: model
-  url: https://valle-demo.github.io/
-- access:
-    explanation: "The feature is available to developers in a restricted technical\
-      \ preview [[GitHub CoPilot]](https://copilot.github.com/).\n"
-    value: limited
-  adaptation: unknown
-  created_date:
-    explanation: "Date of the blog post introducing CoPilot [[GitHub Blog Post]]\
-      \ (https://github.blog/2021-06-29-introducing-github-copilot-ai-pair-programmer/).\n"
-    value: 2021-06-29
-  dependencies: [Codex]
-  description: "GitHub CoPilot is a coding pair programmer assisting programmers\
-    \ as they write code.\n"
-  failures: unknown
-  feedback: "Feedback can be provided in the CoPilot feedback project [[CoPilot\
-    \ feedback]] (https://github.com/github/feedback/discussions/categories/copilot-feedback).\n"
-  intended_uses: "GitHub CoPilot is intended to be used as a coding assistant.\n"
+  training_hardware: 16 V100 32GB GPUs
+  quality_control: ''
+  access:
+    value: closed
+    explanation: Microsoft does not provide public access to VALL-E
   license:
-    explanation: "We weren't able to locate the license.\n"
     value: unknown
-  monitoring: "value: unknown explanation: >\n  There may be internal monitoring\
-    \ mechanisms unknown to the public.\n"
-  monthly_active_users: "GitHub Copilot reportedly has over 1 million sign-ups [[Tweet\
-    \ Source]](https://twitter.com/sama/status/1539737789310259200?s=21&t=YPaYd0ZueJzrR6rLslUqzg).\n"
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+
+# Applications
+
+- type: application
   name: GitHub CoPilot
+  # General
   organization: Microsoft
-  output_space: Code completions
-  prohibited_uses: "Access to GPT-3 is governed by GitHub Acceptable Use Policies\
-    \ and Terms of Service, both of which list a set of prohibited uses [[Use Policies]]\
-    \ (https://docs.github.com/en/site-policy/acceptable-use-policies/github-acceptable-use-policies)\
-    \ [[Terms of Service]] (https://docs.github.com/en/site-policy/github-terms/github-terms-of-service).\n"
-  quality_control: "GitHub is working on a filter to detect and suppress code generations\
-    \ that are verbatim from the training set [[GitHub Research Recitation]] (https://docs.github.com/en/github/copilot/research-recitation).\
-    \ According to the FAQ, GitHub implemented a simple filter that blocks emails\
-    \ in standard formats to protect personally identifiable data that may be present\
-    \ in the training data [[GitHub CoPilot]](https://copilot.github.com/).\n"
-  terms_of_service: "https://docs.github.com/en/site-policy/github-terms/github-terms-of-service\n"
-  type: application
+  description: >
+    GitHub CoPilot is a coding pair programmer assisting programmers as they
+    write code.
+  created_date:
+    value: 2021-06-29
+    explanation: >
+      Date of the blog post introducing CoPilot
+      [[GitHub Blog Post]]
+      (https://github.blog/2021-06-29-introducing-github-copilot-ai-pair-programmer/).
   url: https://copilot.github.com/
+  # Construction
+  dependencies: [Codex]
+  adaptation: unknown
+  output_space: Code completions
+  quality_control: >
+    GitHub is working on a filter to detect and suppress code generations that
+    are verbatim from the training set
+    [[GitHub Research Recitation]]
+    (https://docs.github.com/en/github/copilot/research-recitation).
+    According to the FAQ, GitHub implemented a simple filter that blocks emails
+    in standard formats to protect personally identifiable data that may be
+    present in the training data
+    [[GitHub CoPilot]](https://copilot.github.com/).
+  # Downstream
+  access:
+    value: limited
+    explanation: >
+      The feature is available to developers in a restricted
+      technical preview
+      [[GitHub CoPilot]](https://copilot.github.com/).
+  license:
+    value: unknown
+    explanation: >
+      We weren't able to locate the license.
+  terms_of_service: >
+    https://docs.github.com/en/site-policy/github-terms/github-terms-of-service
+  intended_uses: >
+    GitHub CoPilot is intended to be used as a coding assistant.
+  prohibited_uses: >
+    Access to GPT-3 is governed by GitHub Acceptable Use Policies and Terms of
+    Service, both of which list a set of prohibited uses
+    [[Use Policies]]
+    (https://docs.github.com/en/site-policy/acceptable-use-policies/github-acceptable-use-policies)
+    [[Terms of Service]]
+    (https://docs.github.com/en/site-policy/github-terms/github-terms-of-service).
+  monitoring: >
+    value: unknown
+    explanation: >
+      There may be internal monitoring mechanisms unknown to the public.
+  feedback: >
+    Feedback can be provided in the CoPilot feedback project
+    [[CoPilot feedback]]
+    (https://github.com/github/feedback/discussions/categories/copilot-feedback).
+  # Deployment
+  monthly_active_users: >
+    GitHub Copilot reportedly has over 1 million sign-ups
+    [[Tweet Source]](https://twitter.com/sama/status/1539737789310259200?s=21&t=YPaYd0ZueJzrR6rLslUqzg).
   user_distribution: unknown
-- access: open
-  analysis: ''
+  failures: unknown
+
+- type: model
+  name: BioGPT
+  organization: Microsoft
+  description: ''
   created_date: 2022-09-24
+  url: https://academic.oup.com/bib/article/23/6/bbac409/6713511?guestAccessKey=a66d9b5d-4f83-4017-bb52-405815c907b9&login=true
+  model_card: ''
+  modality: text
+  analysis: ''
+  size: 1.5B parameters (dense model)
   dependencies: [PubMed]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: open
   license: MIT
-  modality:
-    explanation: text
-    value: text; text
-  model_card: ''
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: BioGPT
+  feedback: ''
+
+- type: application
+  name: Microsoft Teams
   organization: Microsoft
-  prohibited_uses: ''
-  quality_control: ''
-  size: 1.5B parameters (dense)
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
-  url: https://academic.oup.com/bib/article/23/6/bbac409/6713511?guestAccessKey=a66d9b5d-4f83-4017-bb52-405815c907b9&login=true
-- access: open
-  adaptation: ''
-  created_date: 2023-02-01
-  dependencies: [ChatGPT API]
   description: Microsoft Teams Premium is generally available. Built on the familiar,
     all-in-one collaborative experience of Microsoft Teams, Teams Premium brings
     the latest technologies, powered by OpenAI’s GPT-3.5, to make meetings more
     intelligent, personalized, and protected—whether it’s one-on-one, large meetings,
     virtual appointments, or webinars.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
+  created_date: 2023-02-01
+  url: https://www.microsoft.com/en-us/microsoft-365/blog/2023/02/01/microsoft-teams-premium-cut-costs-and-add-ai-powered-productivity/
+  dependencies: [ChatGPT API]
+  adaptation: ''
+  output_space: ''
+  quality_control: ''
+  access: open
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  terms_of_service: ''
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
   monthly_active_users: ''
-  name: Microsoft Teams
-  organization: Microsoft
-  output_space: ''
-  prohibited_uses: ''
-  quality_control: ''
-  terms_of_service: ''
-  type: application
-  url: https://www.microsoft.com/en-us/microsoft-365/blog/2023/02/01/microsoft-teams-premium-cut-costs-and-add-ai-powered-productivity/
   user_distribution: ''
-- access: limited
-  adaptation: unknown
-  created_date: 2023-02-07
-  dependencies: [ChatGPT API]
+  failures: ''
+
+- type: application
+  name: Bing Search
+  organization: Microsoft
   description: AI-powered Bing search engine and Edge browser, available in preview
     now at Bing.com, to deliver better search, more complete answers, a new chat
     experience and the ability to generate content. We think of these tools as an
     AI copilot for the web.
-  failures: ''
-  feedback: "Feedback can be submitted at [bing.com](bing.com).\n"
-  intended_uses: Search engine
+  created_date: 2023-02-07
+  url: https://blogs.microsoft.com/blog/2023/02/07/reinventing-search-with-a-new-ai-powered-microsoft-bing-and-edge-your-copilot-for-the-web/
+  dependencies: [ChatGPT API]
+  adaptation: unknown
+  output_space: Search results
+  quality_control: ''
+  access: limited
   license:
-    explanation: "Software license as described in the Terms of Service document.\n"
     value: custom
+    explanation: >
+      Software license as described in the Terms of Service document.
+  terms_of_service: https://www.microsoft.com/legal/terms-of-use
+  intended_uses: Search engine
+  prohibited_uses: ''
   monitoring: ''
+  feedback: >
+    Feedback can be submitted at [bing.com](bing.com).
   monthly_active_users: ''
-  name: Bing Search
-  organization: Microsoft
-  output_space: Search results
-  prohibited_uses: ''
-  quality_control: ''
-  terms_of_service: https://www.microsoft.com/legal/terms-of-use
-  type: application
-  url: https://blogs.microsoft.com/blog/2023/02/07/reinventing-search-with-a-new-ai-powered-microsoft-bing-and-edge-your-copilot-for-the-web/
   user_distribution: ''
-- access: closed
-  analysis: ''
+  failures: ''
+
+- type: model
+  name: KOSMOS-1
+  organization: Microsoft
+  description: KOSMOS-1 is a multimodal language model that is capable of perceiving
+    multimodal input, following instructions, and performing in-context learning
+    for not only language tasks but also multimodal tasks.
   created_date: 2023-03-01
+  url: https://arxiv.org/pdf/2302.14045.pdf
+  model_card: ''
+  modality: text, image
+  analysis: ''
+  size: 1.6B parameters (dense model)
   dependencies:
     - The Pile
     - CommonCrawl
@@ -279,427 +320,439 @@
     - LAION-400M
     - COYO-700M
     - Conceptual Captions
-  description: KOSMOS-1 is a multimodal language model that is capable of perceiving
-    multimodal input, following instructions, and performing in-context learning
-    for not only language tasks but also multimodal tasks.
-  feedback: ''
-  intended_uses: ''
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
   license: MIT
-  modality:
-    explanation: text, image
-    value: image, text; image, text
-  model_card: ''
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: KOSMOS-1
+  feedback: ''
+
+- type: model
+  name: Prometheus
   organization: Microsoft
-  prohibited_uses: ''
-  quality_control: ''
-  size: 1.6B parameters (dense)
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
-  url: https://arxiv.org/pdf/2302.14045.pdf
-- access: closed
-  analysis: ''
-  created_date: 2023-02-07
-  dependencies: []
   description: In the context of Bing, we have developed a proprietary way of working
     with the OpenAI model that allows us to best leverage its power. We call this
     collection of capabilities and techniques the Prometheus model. This combination
     gives you more relevant, timely and targeted results, with improved safety.
-  feedback: ''
-  intended_uses: ''
-  license:
-    explanation: "We weren't able to locate a license.\n"
-    value: unknown
-  modality: {}
+  created_date: 2023-02-07
+  url: https://blogs.microsoft.com/blog/2023/02/07/reinventing-search-with-a-new-ai-powered-microsoft-bing-and-edge-your-copilot-for-the-web/
   model_card: ''
-  monitoring: ''
-  name: Prometheus
-  organization: Microsoft
-  prohibited_uses: ''
-  quality_control: ''
+  modality: unknown
+  analysis: ''
   size: unknown
+  dependencies: []
   training_emissions: ''
-  training_hardware: ''
   training_time: ''
-  type: model
-  url: https://blogs.microsoft.com/blog/2023/02/07/reinventing-search-with-a-new-ai-powered-microsoft-bing-and-edge-your-copilot-for-the-web/
-- access: closed
-  analysis: ''
-  created_date: 2022-11-23
-  dependencies: [FLD-900M]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "We weren't able to locate a license.\n"
     value: unknown
-  modality:
-    explanation: text, image
-    value: image, text; image, text
-  model_card: ''
+    explanation: >
+      We weren't able to locate a license.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: Florence
   organization: Microsoft
-  prohibited_uses: ''
-  quality_control: ''
-  size: 900M parameters (dense)
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
+  description: ''
+  created_date: 2022-11-23
   url: https://arxiv.org/abs/2111.11432
-- access: closed
+  model_card: ''
+  modality: text, image
   analysis: ''
+  size: 900M parameters (637M image, 256M text)
+  dependencies: [FLD-900M]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
+  license:
+    value: unknown
+    explanation: >
+      We weren't able to locate a license.
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+
+- type: dataset
+  name: FLD-900M
+  organization: Microsoft
+  description: ''
   created_date: 2022-11-23
+  url: https://arxiv.org/abs/2111.11432
   datasheet: ''
+  modality: text, image
+  size: 900M image-text pairs
+  sample: []
+  analysis: ''
   dependencies: []
-  description: ''
-  excluded: ''
-  feedback: ''
   included: ''
-  intended_uses: ''
+  excluded: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "We weren't able to locate a license.\n"
     value: unknown
-  modality:
-    explanation: text, image
-    value: image, text
+    explanation: >
+      We weren't able to locate a license.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: FLD-900M
+  feedback: ''
+
+- type: application
+  name: Azure Cognitive Services for Vision
   organization: Microsoft
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: 900M image-text pairs
-  type: dataset
-  url: https://arxiv.org/abs/2111.11432
-- access: limited
-  adaptation: ''
-  created_date: 2023-03-07
-  dependencies: [Florence]
   description: Cost-effective, production-ready computer vision services in Azure
     Cognitive Service for Vision. The improved Vision Services enables developers
     to create cutting-edge, market-ready, responsible computer vision applications
     across various industries.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
+  created_date: 2023-03-07
+  url: https://azure.microsoft.com/en-us/blog/announcing-a-renaissance-in-computer-vision-ai-with-microsofts-florence-foundation-model/?utm_content=buffer16fa0&utm_medium=social&utm_source=twitter.com&utm_campaign=buffer
+  dependencies: [Florence]
+  adaptation: ''
+  output_space: ''
+  quality_control: ''
+  access: limited
   license:
-    explanation: "Software license in the Microsoft Terms of Use govern the license\
-      \ rules for Azure services as outlined in the Terms of Service document.\n"
     value: custom
+    explanation: >
+      Software license in the Microsoft Terms of Use govern the license rules
+      for Azure services as outlined in the Terms of Service document.
+  terms_of_service: https://azure.microsoft.com/en-us/support/legal/
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
   monthly_active_users: ''
-  name: Azure Cognitive Services for Vision
-  organization: Microsoft
-  output_space: ''
-  prohibited_uses: ''
-  quality_control: ''
-  terms_of_service: https://azure.microsoft.com/en-us/support/legal/
-  type: application
-  url: https://azure.microsoft.com/en-us/blog/announcing-a-renaissance-in-computer-vision-ai-with-microsofts-florence-foundation-model/?utm_content=buffer16fa0&utm_medium=social&utm_source=twitter.com&utm_campaign=buffer
   user_distribution: ''
-- access: closed
-  analysis: ''
+  failures: ''
+
+- type: model
+  name: VisualChatGPT
+  organization: Microsoft
+  description: ''
   created_date: 2023-03-08
+  url: https://arxiv.org/pdf/2303.04671.pdf
+  model_card: ''
+  modality: text, image
+  analysis: ''
+  size: unknown
   dependencies: [OpenAI API]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: No license was found.
     value: none
-  modality:
-    explanation: text, image
-    value: image, text; image, text
-  model_card: ''
+    explanation: No license was found.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: VisualChatGPT
+  feedback: ''
+
+- type: application
+  name: Microsoft 365 Copilot
   organization: Microsoft
-  prohibited_uses: ''
-  quality_control: ''
-  size: unknown
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
-  url: https://arxiv.org/pdf/2303.04671.pdf
-- access: limited
-  adaptation: ''
-  created_date: 2023-03-16
-  dependencies: [GPT-4 API]
   description: It combines the power of language models with your data in the Microsoft
     Graph and the Microsoft 365 apps to turn your words into the most powerful productivity
     tool on the planet.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
+  created_date: 2023-03-16
+  url: https://blogs.microsoft.com/blog/2023/03/16/introducing-microsoft-365-copilot-your-copilot-for-work/
+  dependencies: [GPT-4 API]
+  adaptation: ''
+  output_space: ''
+  quality_control: ''
+  access: limited
   license:
-    explanation: "Software license as described in the Terms of Service document.\n"
     value: custom
+    explanation: >
+      Software license as described in the Terms of Service document.
+  terms_of_service: https://www.microsoft.com/legal/terms-of-use
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
   monthly_active_users: ''
-  name: Microsoft 365 Copilot
-  organization: Microsoft
-  output_space: ''
-  prohibited_uses: ''
-  quality_control: ''
-  terms_of_service: https://www.microsoft.com/legal/terms-of-use
-  type: application
-  url: https://blogs.microsoft.com/blog/2023/03/16/introducing-microsoft-365-copilot-your-copilot-for-work/
   user_distribution: ''
-- access: limited
-  adaptation: ''
-  created_date: 2023-03-16
-  dependencies: [Microsoft 365 Copilot]
+  failures: ''
+
+- type: application
+  name: Microsoft Business Chat
+  organization: Microsoft
   description: Business Chat works across the langugae model, the Microsoft 365
     apps, and your data — your calendar, emails, chats, documents, meetings and
     contacts — to do things you’ve never been able to do before. You can give it
     natural language prompts like “Tell my team how we updated the product strategy,”
     and it will generate a status update based on the morning’s meetings, emails
     and chat threads.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
-  license:
-    explanation: "Software license as described in the Terms of Service document.\n"
-    value: custom
-  monitoring: ''
-  monthly_active_users: ''
-  name: Microsoft Business Chat
-  organization: Microsoft
-  output_space: ''
-  prohibited_uses: ''
-  quality_control: ''
-  terms_of_service: https://www.microsoft.com/legal/terms-of-use
-  type: application
+  created_date: 2023-03-16
   url: https://blogs.microsoft.com/blog/2023/03/16/introducing-microsoft-365-copilot-your-copilot-for-work/
-  user_distribution: ''
-- access: open
-  adaptation: ''
-  created_date: unknown
   dependencies: [Microsoft 365 Copilot]
-  description: Microsoft Excel is the industry leading spreadsheet software program,
-    a powerful data visualization and analysis tool.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
+  adaptation: ''
+  output_space: ''
+  quality_control: ''
+  access: limited
   license:
-    explanation: "Software license as described in the Terms of Service document.\n"
     value: custom
+    explanation: >
+      Software license as described in the Terms of Service document.
+  terms_of_service: https://www.microsoft.com/legal/terms-of-use
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
   monthly_active_users: ''
+  user_distribution: ''
+  failures: ''
+
+- type: application
   name: Microsoft Excel
   organization: Microsoft
-  output_space: ''
-  prohibited_uses: ''
-  quality_control: ''
-  terms_of_service: https://www.microsoft.com/legal/terms-of-use
-  type: application
-  url: https://www.microsoft.com/en-us/microsoft-365/excel
-  user_distribution: ''
-- access: open
-  adaptation: ''
+  description: Microsoft Excel is the industry leading spreadsheet software program,
+    a powerful data visualization and analysis tool.
   created_date: unknown
+  url: https://www.microsoft.com/en-us/microsoft-365/excel
   dependencies: [Microsoft 365 Copilot]
-  description: Microsoft Outlook is a personal information manager software system
-    from Microsoft, available as a part of the Microsoft Office and Microsoft 365
-    software suites.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
+  adaptation: ''
+  output_space: ''
+  quality_control: ''
+  access: open
   license:
-    explanation: "Software license as described in the Terms of Service document.\n"
     value: custom
+    explanation: >
+      Software license as described in the Terms of Service document.
+  terms_of_service: https://www.microsoft.com/legal/terms-of-use
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
   monthly_active_users: ''
+  user_distribution: ''
+  failures: ''
+
+- type: application
   name: Microsoft Outlook
   organization: Microsoft
-  output_space: ''
-  prohibited_uses: ''
-  quality_control: ''
-  terms_of_service: https://www.microsoft.com/legal/terms-of-use
-  type: application
-  url: https://www.microsoft.com/en-us/microsoft-365/outlook/email-and-calendar-software-microsoft-outlook
-  user_distribution: ''
-- access: limited
-  adaptation: ''
+  description: Microsoft Outlook is a personal information manager software system
+    from Microsoft, available as a part of the Microsoft Office and Microsoft 365
+    software suites.
   created_date: unknown
+  url: https://www.microsoft.com/en-us/microsoft-365/outlook/email-and-calendar-software-microsoft-outlook
   dependencies: [Microsoft 365 Copilot]
-  description: Microsoft Power Platform is a line of business intelligence, app
-    development, and app connectivity software applications.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
+  adaptation: ''
+  output_space: ''
+  quality_control: ''
+  access: open
   license:
-    explanation: "Software license as described in the Terms of Service document.\n"
     value: custom
+    explanation: >
+      Software license as described in the Terms of Service document.
+  terms_of_service: https://www.microsoft.com/legal/terms-of-use
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
   monthly_active_users: ''
+  user_distribution: ''
+  failures: ''
+
+- type: application
   name: Microsoft Power Platform
   organization: Microsoft
-  output_space: ''
-  prohibited_uses: ''
-  quality_control: ''
-  terms_of_service: https://www.microsoft.com/legal/terms-of-use
-  type: application
-  url: https://powerplatform.microsoft.com/en-us/
-  user_distribution: ''
-- access: open
-  adaptation: ''
+  description: Microsoft Power Platform is a line of business intelligence, app
+    development, and app connectivity software applications.
   created_date: unknown
+  url: https://powerplatform.microsoft.com/en-us/
   dependencies: [Microsoft 365 Copilot]
-  description: Microsoft PowerPoint empowers you to create clean slideshow presentations
-    and intricate pitch decks and gives you a powerful presentation maker.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
+  adaptation: ''
+  output_space: ''
+  quality_control: ''
+  access: limited
   license:
-    explanation: "Software license as described in the Terms of Service document.\n"
     value: custom
+    explanation: >
+      Software license as described in the Terms of Service document.
+  terms_of_service: https://www.microsoft.com/legal/terms-of-use
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
   monthly_active_users: ''
+  user_distribution: ''
+  failures: ''
+
+- type: application
   name: Microsoft PowerPoint
   organization: Microsoft
-  output_space: ''
-  prohibited_uses: ''
-  quality_control: ''
-  terms_of_service: https://www.microsoft.com/legal/terms-of-use
-  type: application
+  description: Microsoft PowerPoint empowers you to create clean slideshow presentations
+    and intricate pitch decks and gives you a powerful presentation maker.
+  created_date: unknown
   url: https://www.microsoft.com/en-us/microsoft-365/powerpoint
-  user_distribution: ''
-- access: open
+  dependencies: [Microsoft 365 Copilot]
   adaptation: ''
-  created_date: unknown
-  dependencies: [Microsoft 365 Copilot, Microsoft Business Chat]
-  description: Microsoft Teams is a proprietary business communication platform
-    developed by Microsoft, as part of the Microsoft 365 family of products.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
+  output_space: ''
+  quality_control: ''
+  access: open
   license:
-    explanation: "Software license as described in the Terms of Service document.\n"
     value: custom
+    explanation: >
+      Software license as described in the Terms of Service document.
+  terms_of_service: https://www.microsoft.com/legal/terms-of-use
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
   monthly_active_users: ''
+  user_distribution: ''
+  failures: ''
+
+- type: application
   name: Microsoft Teams
   organization: Microsoft
-  output_space: ''
-  prohibited_uses: ''
-  quality_control: ''
-  terms_of_service: https://www.microsoft.com/legal/terms-of-use
-  type: application
+  description: Microsoft Teams is a proprietary business communication platform
+    developed by Microsoft, as part of the Microsoft 365 family of products.
+  created_date: unknown
   url: https://www.microsoft.com/en-us/microsoft-teams/group-chat-software
-  user_distribution: ''
-- access: open
+  dependencies: [Microsoft 365 Copilot, Microsoft Business Chat]
   adaptation: ''
-  created_date: unknown
-  dependencies: [Microsoft 365 Copilot]
-  description: Microsoft Word is a word processing software developed by Microsoft
-  failures: ''
-  feedback: ''
-  intended_uses: ''
+  output_space: ''
+  quality_control: ''
+  access: open
   license:
-    explanation: "Software license as described in the Terms of Service document.\n"
     value: custom
+    explanation: >
+      Software license as described in the Terms of Service document.
+  terms_of_service: https://www.microsoft.com/legal/terms-of-use
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
   monthly_active_users: ''
+  user_distribution: ''
+  failures: ''
+
+- type: application
   name: Microsoft Word
   organization: Microsoft
+  description: Microsoft Word is a word processing software developed by Microsoft
+  created_date: unknown
+  url: https://www.microsoft.com/en-us/microsoft-365/word
+  dependencies: [Microsoft 365 Copilot]
+  adaptation: ''
   output_space: ''
-  prohibited_uses: ''
   quality_control: ''
-  terms_of_service: https://www.microsoft.com/legal/terms-of-use
-  type: application
-  url: https://www.microsoft.com/en-us/microsoft-365/word
-  user_distribution: ''
-- access: limited
-  adaptation: unknown
-  created_date: unknown
-  dependencies: []
-  description: "Inside look is a Microsoft Office feature, composing document insights\
-    \ highlighting key points, expected time to read, and popularity among others.\n"
-  failures: unknown
-  feedback: unknown
-  intended_uses: Providing document insights to users.
+  access: open
   license:
-    explanation: "Software license as described in the Terms of Service document.\n"
     value: custom
-  monitoring: unknown
-  monthly_active_users: unknown
+    explanation: >
+      Software license as described in the Terms of Service document.
+  terms_of_service: https://www.microsoft.com/legal/terms-of-use
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
+  user_distribution: ''
+  failures: ''
+
+- type: application
   name: Microsoft Inside Look
+  # General
   organization: Microsoft
-  output_space: Document level insights for users.
-  prohibited_uses: unknown
-  quality_control: unknown
-  terms_of_service: https://www.microsoft.com/legal/terms-of-use
-  type: application
-  url: https://support.microsoft.com/en-us/office/see-file-insights-before-you-open-a-file-87a23bbc-a516-42e2-a7b6-0ecb8259e026
-  user_distribution: unknown
-- access: limited
-  adaptation: unknown
+  description: >
+    Inside look is a Microsoft Office feature, composing document insights
+    highlighting key points, expected time to read, and popularity among others.
   created_date: unknown
+  url: https://support.microsoft.com/en-us/office/see-file-insights-before-you-open-a-file-87a23bbc-a516-42e2-a7b6-0ecb8259e026
   dependencies: []
-  description: "Suggested replies is a Microsoft Outlook feature that suggests responses\
-    \ to emails, available in: English, Spanish, Italian, French, German, Portuguese\
-    \ Chinese Simplified, Chinese Traditional, Swedish, Russian, Korean, Czech,\
-    \ Hungarian, Arabic, Hebrew, Thai, Turkish, Japanese, Dutch, Norwegian, Danish,\
-    \ and Polish.\n"
-  failures: unknown
-  feedback: unknown
-  intended_uses: Suggesting email replies.
+  adaptation: unknown
+  output_space: Document level insights for users.
+  quality_control: unknown
+  # Downstream
+  access: limited
   license:
-    explanation: "Software license as described in the Terms of Service document.\n"
     value: custom
+    explanation: >
+      Software license as described in the Terms of Service document.
+  terms_of_service: https://www.microsoft.com/legal/terms-of-use
+  intended_uses: Providing document insights to users.
+  prohibited_uses: unknown
   monitoring: unknown
+  feedback: unknown
+  # Deployment
   monthly_active_users: unknown
+  user_distribution: unknown
+  failures: unknown
+
+- type: application
   name: Microsoft Suggested Replies
+  # General
   organization: Microsoft
+  description: >
+    Suggested replies is a Microsoft Outlook feature that suggests responses
+    to emails, available in: English, Spanish, Italian, French, German,
+    Portuguese Chinese Simplified, Chinese Traditional, Swedish, Russian,
+    Korean, Czech, Hungarian, Arabic, Hebrew, Thai, Turkish, Japanese, Dutch,
+    Norwegian, Danish, and Polish.
+  created_date: unknown
+  url: https://support.microsoft.com/en-us/office/use-suggested-replies-in-outlook-19316194-0434-43ba-a742-6b5890157379
+  dependencies: []
+  adaptation: unknown
   output_space: Suggested emails.
-  prohibited_uses: unknown
   quality_control: unknown
+  # Downstream
+  access: limited
+  license:
+    value: custom
+    explanation: >
+      Software license as described in the Terms of Service document.
   terms_of_service: https://www.microsoft.com/legal/terms-of-use
-  type: application
-  url: https://support.microsoft.com/en-us/office/use-suggested-replies-in-outlook-19316194-0434-43ba-a742-6b5890157379
+  intended_uses: Suggesting email replies.
+  prohibited_uses: unknown
+  monitoring: unknown
+  feedback: unknown
+  # Deployment
+  monthly_active_users: unknown
   user_distribution: unknown
-- access: limited
+  failures: unknown
+
+- type: application
+  name: Microsoft Security Copilot
+  # General
+  organization: Microsoft
+  description: >
+    Microsoft Security Copilot is an AI-powered security analysis tool that enables
+    analysts to respond to threats quickly, process signals at machine speed, and
+    assess risk exposure in minutes.
+  created_date:
+    value: 2023-03-28
+    explanation: The date Security Copilot was announced in the [[Microsoft blog
+      post]](https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/).
+  url: https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/
+  # Construction
+  dependencies: [GPT-4, Microsoft security-specific model]
   adaptation:
-    explanation: See [[blog post]](https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/).
     value: Security Copilot combines OpenAI's GPT-4 generative AI with a security-specific
       model from Microsoft. This security-specific model in turn incorporates a
       growing set of security-specific skills and is informed by Microsoft's unique
       global threat intelligence and more than 65 trillion daily signals.
-  created_date:
-    explanation: The date Security Copilot was announced in the [[Microsoft blog
-      post]](https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/).
-    value: 2023-03-28
-  dependencies: [GPT-4, Microsoft security-specific model]
-  description: "Microsoft Security Copilot is an AI-powered security analysis tool\
-    \ that enables analysts to respond to threats quickly, process signals at machine\
-    \ speed, and assess risk exposure in minutes.\n"
-  failures: unknown
-  feedback: unknown
-  intended_uses: Security Copilot is designed to enhance the capabilities of cybersecurity
-    professionals. It leverages machine speed and scale to accelerate response to
-    security incidents, discover and process threat signals, and assess risk exposure
-    within minutes.
-  license:
-    explanation: "Software license as described in the Terms of Service document.\n"
-    value: custom
-  monitoring: ''
-  monthly_active_users: unknown
-  name: Microsoft Security Copilot
-  organization: Microsoft
+    explanation: See [[blog post]](https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/).
   output_space:
-    explanation: See [[product demo]](https://www.microsoft.com/en-us/security/business/ai-machine-learning/microsoft-security-copilot).
     value: Actionable responses to security-related questions (text and image).
       Security event, incident or threat reports (PowerPoint slide).
-  prohibited_uses: unknown
+    explanation: See [[product demo]](https://www.microsoft.com/en-us/security/business/ai-machine-learning/microsoft-security-copilot).
   quality_control:
-    explanation: See [[blog post]](https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/).
     value: Security Copilot employs a closed-loop learning system that learns from
       user interactions and feedback, enabling it to provide more coherent, relevant,
       and useful answers that continually improve over time. Security Copilot is
@@ -707,106 +760,119 @@
       that customers' data and AI models are protected with enterprise compliance
       and security controls. Customer data is owned and controlled by them, and
       not used to train AI models for anyone outside their organization.
+    explanation: See [[blog post]](https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/).
+  # Downstream
+  access: limited
+  license:
+    value: custom
+    explanation: >
+      Software license as described in the Terms of Service document.
   terms_of_service: https://www.microsoft.com/legal/terms-of-use
-  type: application
-  url: https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/
+  intended_uses: Security Copilot is designed to enhance the capabilities of cybersecurity
+    professionals. It leverages machine speed and scale to accelerate response to
+    security incidents, discover and process threat signals, and assess risk exposure
+    within minutes.
+  prohibited_uses: unknown
+  monitoring: ''
+  feedback: unknown
+  # Deployment
+  monthly_active_users: unknown
   user_distribution: unknown
-- access: open
-  analysis: Evaluated on GLUE, SQuAD 2.0, and CoQA benchmarks.
-  created_date: 2019-10-01
-  dependencies: []
+  failures: unknown
+
+- type: model
+  name: UniLM
+  organization: Microsoft
   description: UniLM is a unified language model that can be fine-tuned for both
     natural language understanding and generation tasks.
-  feedback: ''
-  intended_uses: ''
-  license: MIT
-  modality: {}
+  created_date: 2019-10-01
+  url: https://proceedings.neurips.cc/paper_files/paper/2019/file/c20bb2d9a50d5ac1f713f8b34d9aac5a-Paper.pdf
   model_card: ''
-  monitoring: ''
-  name: UniLM
-  organization: Microsoft
-  prohibited_uses: ''
-  quality_control: ''
-  size: ''
+  modality: text
+  analysis: Evaluated on GLUE, SQuAD 2.0, and CoQA benchmarks.
+  size: 340M parameters
+  dependencies: []
   training_emissions: unknown
-  training_hardware: 8 NVIDIA Tesla V100 32GB GPUs
   training_time: 10,000 steps in 7 hours
-  type: model
-  url: https://proceedings.neurips.cc/paper_files/paper/2019/file/c20bb2d9a50d5ac1f713f8b34d9aac5a-Paper.pdf
-- access: limited
-  analysis: ''
-  created_date: 2021-04-12
-  dependencies: []
-  description: Docugami is a LLM focused on writing business documents and data
-    using generative AI.
-  feedback: ''
-  intended_uses: analyzing, writing, and connecting business documents and data
-  license: ''
-  modality:
-    explanation: text
-    value: text; text
-  model_card: ''
+  training_hardware: 8 NVIDIA Tesla V100 32GB GPUs
+  quality_control: ''
+  access: open
+  license: MIT
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: Docugami
   organization: Microsoft
-  prohibited_uses: ''
-  quality_control: ''
-  size: 20B parameters (dense)
+  description: Docugami is a LLM focused on writing business documents and data
+    using generative AI.
+  created_date: 2021-04-12
+  url: https://www.docugami.com/generative-ai
+  model_card: ''
+  modality: text
+  analysis: ''
+  size: 20B parameters
+  dependencies: []
   training_emissions: unknown
-  training_hardware: ''
   training_time: unknown
-  type: model
-  url: https://www.docugami.com/generative-ai
-- access: open
-  analysis: Evaluated on a range of standardized vision benchmarks, and achieves
-    state of the art performance on all experimentally.
-  created_date: 2022-08-31
-  dependencies: [Multiway Transformer network]
-  description: BEiT-3 is a general-purpose multimodal foundation model for vision
-    and vision-language tasks.
-  feedback: ''
-  intended_uses: ''
+  training_hardware: ''
+  quality_control: ''
+  access: limited
   license: ''
-  modality:
-    explanation: text and image input and output
-    value: image, text; image, text
-  model_card: ''
+  intended_uses: analyzing, writing, and connecting business documents and data
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: BEiT-3
   organization: Microsoft
-  prohibited_uses: ''
-  quality_control: ''
-  size: 1.9B parameters (dense)
+  description: BEiT-3 is a general-purpose multimodal foundation model for vision
+    and vision-language tasks.
+  created_date: 2022-08-31
+  url: https://arxiv.org/pdf/2208.10442.pdf
+  model_card: ''
+  modality: text and image input and output
+  analysis: Evaluated on a range of standardized vision benchmarks, and achieves
+    state of the art performance on all experimentally.
+  size: 1.9B parameters
+  dependencies: [Multiway Transformer network]
   training_emissions: unknown
-  training_hardware: ''
   training_time: ''
-  type: model
-  url: https://arxiv.org/pdf/2208.10442.pdf
-- access: open
-  analysis: Reports results on standard LLM benchmarks in comparison to other LLMs
-    and test sets.
-  created_date: 2023-04-24
-  dependencies: [LLaMA, Evol-Instruct, Alpaca dataset]
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license: ''
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+
+- type: model
+  name: WizardLM
+  organization: Microsoft
   description: Starting with an initial set of instructions, we use our proposed
     Evol-Instruct to rewrite them step by step into more complex instructions. Then,
     we mix all generated instruction data to fine-tune LLaMA. We call the resulting
     model WizardLM.
-  feedback: https://huggingface.co/datasets/WizardLM/evol_instruct_70k/discussions
-  intended_uses: Creating large amounts of instruction data, particularly with high
-    complexity
-  license: Apache 2.0
-  modality:
-    explanation: natural language text
-    value: text; text
+  created_date: 2023-04-24
+  url: https://arxiv.org/pdf/2304.12244v1.pdf
   model_card: https://huggingface.co/WizardLM/WizardLM-13B-1.0
-  monitoring: ''
-  name: WizardLM
-  organization: Microsoft
-  prohibited_uses: ''
-  quality_control: ''
-  size: 7B parameters (dense)
+  modality: natural language text
+  analysis: Reports results on standard LLM benchmarks in comparison to other LLMs
+    and test sets.
+  size: 7B parameters
+  dependencies: [LLaMA, Evol-Instruct, Alpaca dataset]
   training_emissions: ''
-  training_hardware: 8 V100 GPUs
   training_time: 70 hours on 3 epochs
-  type: model
-  url: https://arxiv.org/pdf/2304.12244v1.pdf
+  training_hardware: 8 V100 GPUs
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: Creating large amounts of instruction data, particularly with high
+    complexity
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/datasets/WizardLM/evol_instruct_70k/discussions
diff --git a/assets/mosaic.yaml b/assets/mosaic.yaml
index a58049f1..a887f3b4 100644
--- a/assets/mosaic.yaml
+++ b/assets/mosaic.yaml
@@ -1,25 +1,24 @@
 ---
-- access: open
-  analysis: Evaluated on a range of benchmarks and performed on par with LLaMA-7B.
-  created_date: 2023-05-05
-  dependencies: [RedPajama-Data, C4, The Stack, Multimodal C4]
+
+- type: model
+  name: MPT
+  organization: Mosaic
   description: MPT is a series of large language models seeking to address the limitations
     of other open source models like LLaMA and Pythia.
-  feedback: ''
-  intended_uses: ''
-  license: Apache 2.0
-  modality:
-    explanation: text
-    value: text; text
+  created_date: 2023-05-05
+  url: https://www.mosaicml.com/blog/mpt-7b
   model_card: ''
-  monitoring: ''
-  name: MPT
-  organization: Mosaic
-  prohibited_uses: ''
-  quality_control: ''
-  size: 7B parameters (dense)
+  modality: text
+  analysis: Evaluated on a range of benchmarks and performed on par with LLaMA-7B.
+  size: 7B parameters
+  dependencies: [RedPajama-Data, C4, The Stack, Multimodal C4]
   training_emissions: unknown
-  training_hardware: 440 A100 40GB GPUs
   training_time: 9.5 days
-  type: model
-  url: https://www.mosaicml.com/blog/mpt-7b
+  training_hardware: 440 A100 40GB GPUs
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/nanyang.yaml b/assets/nanyang.yaml
index e5b04469..02f40c24 100644
--- a/assets/nanyang.yaml
+++ b/assets/nanyang.yaml
@@ -1,28 +1,27 @@
 ---
-- access: open
-  analysis: Evaluated on researcher experiments to test deeper understanding and
-    advanced commonsense reasoning
-  created_date: 2023-05-05
-  dependencies: [MIMIC-IT, OpenFlamingo]
+
+- type: model
+  name: Otter
+  organization: Nanyang Technological University
   description: Otter is a multi-modal model based on OpenFlamingo (open-sourced
     version of DeepMind’s Flamingo), trained on MIMIC-IT and showcasing improved
     instruction-following ability and in-context learning.
-  feedback: ''
-  intended_uses: Following and executing new instructions with few in-context learning
-    examples given image and textual input.
-  license: MIT
-  modality:
-    explanation: outputs text from text and image input
-    value: text; image, text
+  created_date: 2023-05-05
+  url: https://arxiv.org/pdf/2305.03726v1.pdf
   model_card: https://github.com/Luodian/Otter/blob/main/docs/model_card.md
-  monitoring: ''
-  name: Otter
-  organization: Nanyang Technological University
-  prohibited_uses: ''
-  quality_control: ''
-  size: 1.3B parameters (dense)
+  modality: outputs text from text and image input
+  analysis: Evaluated on researcher experiments to test deeper understanding and
+    advanced commonsense reasoning
+  size: 1.3B parameters
+  dependencies: [MIMIC-IT, OpenFlamingo]
   training_emissions: ''
-  training_hardware: 4 RTX-3090 GPUs
   training_time: ''
-  type: model
-  url: https://arxiv.org/pdf/2305.03726v1.pdf
+  training_hardware: 4 RTX-3090 GPUs
+  quality_control: ''
+  access: open
+  license: MIT
+  intended_uses: Following and executing new instructions with few in-context learning
+    examples given image and textual input.
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/naver.yaml b/assets/naver.yaml
index 588ea0a4..a262bc3a 100644
--- a/assets/naver.yaml
+++ b/assets/naver.yaml
@@ -1,30 +1,31 @@
 ---
-- access:
-    explanation: The model is not available for download or direct API use
-    value: closed
-  analysis: ''
+
+# models
+- type: model
+  name: HyperCLOVA
+  organization: Naver
+  description: HyperClova is an autoregressive language model
   created_date:
-    explanation: The date the model paper was submitted to arxiv
     value: 2021-05-21
+    explanation: The date the model paper was submitted to arxiv
+  url: https://arxiv.org/abs/2109.04650
+  model_card: ''
+  modality: text (Korean | 한국어)
+  size: 82B parameters (dense model)
+  analysis: ''
   dependencies: []
-  description: HyperClova is an autoregressive language model
-  feedback: ''
-  intended_uses: ''
+  training_emissions: ''
+  training_time: 130.4 days
+  training_hardware: 1024 A100 GPUs
+  quality_control: ''
+  access:
+    value: closed
+    explanation: The model is not available for download or direct API use
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: text (Korean | 한국어)
-    value: text; text
-  model_card: ''
-  monitoring: ''
-  name: HyperCLOVA
-  organization: Naver
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
   prohibited_uses: ''
-  quality_control: ''
-  size: 82B parameters (dense)
-  training_emissions: ''
-  training_hardware: 1024 A100 GPUs
-  training_time: 130.4 days
-  type: model
-  url: https://arxiv.org/abs/2109.04650
+  monitoring: ''
+  feedback: ''
diff --git a/assets/neeva.yaml b/assets/neeva.yaml
index c081acf3..18e64ffa 100644
--- a/assets/neeva.yaml
+++ b/assets/neeva.yaml
@@ -1,79 +1,85 @@
 ---
-- access: closed
-  analysis: ''
+
+- type: dataset
+  name: Neeva dataset
+  organization: Neeva
+  description: ''
   created_date: unknown
+  url: https://neeva.com/index
   datasheet: ''
+  modality: text
+  size: unknown
+  sample: []
+  analysis: ''
   dependencies: []
-  description: ''
-  excluded: ''
-  feedback: ''
   included: ''
-  intended_uses: ''
+  excluded: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality: {}
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: Neeva dataset
+  feedback: ''
+
+- type: model
+  name: Neeva model
   organization: Neeva
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: unknown
-  type: dataset
+  description: ''
+  created_date: unknown
   url: https://neeva.com/index
-- access: closed
+  model_card: ''
+  modality: text
   analysis: ''
-  created_date: unknown
+  size: unknown
   dependencies: [Neeva dataset]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality: {}
-  model_card: ''
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: Neeva model
+  feedback: ''
+
+- type: application
+  name: NeevaAI
   organization: Neeva
-  prohibited_uses: ''
-  quality_control: ''
-  size: ''
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
-  url: https://neeva.com/index
-- access: open
-  adaptation: ''
+  description: NeevaAI is an AI-powered search tool that combines the capabilities
+    of LLMs with Neeva's independent in-house search stack to create a unique and
+    transformative search experience.
   created_date:
+    value: 2023-01-06
     explanation: The [[Blog post]](https://neeva.com/blog/introducing-neevaai) announcing
       NeevaAI.
-    value: 2023-01-06
+  url: https://neeva.com/blog/introducing-neevaai
   dependencies: [Neeva model]
-  description: NeevaAI is an AI-powered search tool that combines the capabilities
-    of LLMs with Neeva's independent in-house search stack to create a unique and
-    transformative search experience.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
+  adaptation: ''
+  output_space: ''
+  quality_control: ''
+  access: open
   license:
-    explanation: "\"If you comply with these Terms, Neeva grants to you, during\
-      \ the term of these Terms, a limited non-exclusive, non-transferable license,\
-      \ with no right to sublicense, to download and install the App on your personal\
-      \ computers, mobile handsets, tablets, wearable devices, and/or other devices\
-      \ and to run the App solely for your own personal non-commercial purposes.\"\
-      \ - excerpt from the Terms of Service document.\n"
     value: Custom
+    explanation: >
+      "If you comply with these Terms, Neeva grants to you, during the term of
+      these Terms, a limited non-exclusive, non-transferable license, with no
+      right to sublicense, to download and install the App on your personal
+      computers, mobile handsets, tablets, wearable devices, and/or other
+      devices and to run the App solely for your own personal non-commercial
+      purposes." - excerpt from the Terms of Service document.
+  terms_of_service: https://neeva.com/terms
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
   monthly_active_users: ''
-  name: NeevaAI
-  organization: Neeva
-  output_space: ''
-  prohibited_uses: ''
-  quality_control: ''
-  terms_of_service: https://neeva.com/terms
-  type: application
-  url: https://neeva.com/blog/introducing-neevaai
   user_distribution: ''
+  failures: ''
diff --git a/assets/nextdoor.yaml b/assets/nextdoor.yaml
index 1cb54b17..0dfafe79 100644
--- a/assets/nextdoor.yaml
+++ b/assets/nextdoor.yaml
@@ -1,23 +1,24 @@
 ---
-- access: open
-  adaptation: ''
-  created_date: 2023-05-02
-  dependencies: [ChatGPT]
+
+- type: application
+  name: Nextdoor Assistant
+  organization: Nextdoor
   description: AI chatbot on Nextdoor that helps users write more clear and conscientious
     posts.
-  failures: ''
-  feedback: ''
+  created_date: 2023-05-02
+  url: https://help.nextdoor.com/s/article/Introducing-Assistant
+  dependencies: [ChatGPT]
+  adaptation: ''
+  output_space: natural language text guidance
+  quality_control: ''
+  access: open
+  license: unknown
+  terms_of_service: ''
   intended_uses: to be used to help make the Nextdoor experience more positive for
     users
-  license: unknown
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
   monthly_active_users: ''
-  name: Nextdoor Assistant
-  organization: Nextdoor
-  output_space: natural language text guidance
-  prohibited_uses: ''
-  quality_control: ''
-  terms_of_service: ''
-  type: application
-  url: https://help.nextdoor.com/s/article/Introducing-Assistant
   user_distribution: ''
+  failures: ''
diff --git a/assets/nolano.yaml b/assets/nolano.yaml
index 0f23e021..f2b8fe84 100644
--- a/assets/nolano.yaml
+++ b/assets/nolano.yaml
@@ -1,22 +1,23 @@
 ---
-- access: limited
-  adaptation: ''
-  created_date: 2023-03-19
-  dependencies: []
-  description: Cformers is a set of transformers that act as an API for AI inference
-    in code.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
-  license: MIT
-  monitoring: ''
-  monthly_active_users: ''
+
+- type: application
   name: Cformers
   organization: Nolano
+  description: Cformers is a set of transformers that act as an API for AI inference
+    in code.
+  created_date: 2023-03-19
+  url: https://www.nolano.org/services/Cformers/
+  dependencies: []
+  adaptation: ''
   output_space: ''
-  prohibited_uses: ''
   quality_control: ''
+  access: limited
+  license: MIT
   terms_of_service: ''
-  type: application
-  url: https://www.nolano.org/services/Cformers/
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
   user_distribution: ''
+  failures: ''
diff --git a/assets/notion.yaml b/assets/notion.yaml
index d0f580c5..ba21e83f 100644
--- a/assets/notion.yaml
+++ b/assets/notion.yaml
@@ -1,23 +1,24 @@
 ---
-- access: limited
-  adaptation: ''
-  created_date: 2023-02-22
-  dependencies: [Anthropic API]
+
+- type: application
+  name: Notion AI
+  organization: Notion
   description: Notion AI is a connected assistant that helps you think bigger, work
     faster, and augments your creativity, right inside the functional workspace
     you’re already familiar with.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
-  license: ''
-  monitoring: ''
-  monthly_active_users: ''
-  name: Notion AI
-  organization: Notion
+  created_date: 2023-02-22
+  url: https://www.notion.so/help/guides/notion-ai-for-docs
+  dependencies: [Anthropic API]
+  adaptation: ''
   output_space: ''
-  prohibited_uses: ''
   quality_control: ''
+  access: limited
+  license: ''
   terms_of_service: ''
-  type: application
-  url: https://www.notion.so/help/guides/notion-ai-for-docs
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
   user_distribution: ''
+  failures: ''
diff --git a/assets/nvidia.yaml b/assets/nvidia.yaml
index 39e0f118..5f9e0978 100644
--- a/assets/nvidia.yaml
+++ b/assets/nvidia.yaml
@@ -1,100 +1,99 @@
 ---
-- access:
-    explanation: "Neither the 8.3B parameter model trained to convergence nor the\
-      \ 1 trillion paramter model is available for download\n"
-    value: closed
-  analysis: ''
+
+# models
+- type: model
+  name: Megatron-LM
+  organization: NVIDIA
+  description: Megatron-LM is an autoregressive language model
   created_date:
-    explanation: The date the paper for the 1 trillion parameter model was published
     value: 2021-04-09
+    explanation: The date the paper for the 1 trillion parameter model was published
+  url: https://arxiv.org/abs/2104.04473
+  model_card: none
+  modality: text (English)
+  analysis: ''
+  size: 1000B parameters (dense model)
   dependencies: []
-  description: Megatron-LM is an autoregressive language model
-  feedback: none
-  intended_uses: none
+  training_emissions: unknown
+  training_time: 84 days
+  training_hardware: 3072 A100 GPUs
+  quality_control: unknown
+  access:
+    value: closed
+    explanation: >
+      Neither the 8.3B parameter model trained to convergence nor the 1 trillion
+      paramter model is available for download
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: text (English)
-    value: text; text
-  model_card: none
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: none
+  prohibited_uses: none
   monitoring: none
-  name: Megatron-LM
+  feedback: none
+
+- type: dataset
+  name: MineDojo
   organization: NVIDIA
-  prohibited_uses: none
-  quality_control: unknown
-  size: 1000B parameters (dense)
-  training_emissions: unknown
-  training_hardware: 3072 A100 GPUs
-  training_time: 84 days
-  type: model
-  url: https://arxiv.org/abs/2104.04473
-- access: open
-  analysis: ''
+  description: ''
   created_date: 2022-06-17
+  url: https://arxiv.org/abs/2206.08853
   datasheet: ''
+  modality: Videos, Text
+  size: 730k videos, 6k Wikipedia pages, 340k reddit posts
+  sample: []
+  analysis: ''
   dependencies: [YouTube, Wikipedia, Reddit]
-  description: ''
-  excluded: ''
-  feedback: ''
   included: ''
-  intended_uses: ''
+  excluded: ''
+  quality_control: ''
+  access: open
   license: MIT
-  modality:
-    explanation: Videos, Text
-    value: text, video
-  monitoring: ''
-  name: MineDojo
-  organization: NVIDIA
+  intended_uses: ''
   prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: 730k videos, 6k Wikipedia pages, 340k reddit posts
-  type: dataset
-  url: https://arxiv.org/abs/2206.08853
-- access: open
-  analysis: ''
+  monitoring: ''
+  feedback: ''
+
+- type: dataset
+  name: VIMA dataset
+  organization: NVIDIA, Stanford
+  description: ''
   created_date: 2022-10-06
+  url: https://vimalabs.github.io/
   datasheet: ''
+  modality: text and image
+  size: 200M parameters (dense model)
+  sample: []
+  analysis: ''
   dependencies: [T5, Mask R-CNN, VIMA dataset]
-  description: ''
-  excluded: ''
-  feedback: ''
   included: ''
-  intended_uses: ''
+  excluded: ''
+  quality_control: ''
+  access: open
   license: MIT
-  modality:
-    explanation: text and image
-    value: image, text
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: VIMA dataset
+  feedback: ''
+
+- type: model
+  name: VIMA
   organization: NVIDIA, Stanford
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: 200M parameters (dense model)
-  type: dataset
+  description: ''
+  created_date: 2022-10-06
   url: https://vimalabs.github.io/
-- access: open
+  model_card: ''
+  modality: Simulation
   analysis: ''
-  created_date: 2022-10-06
+  size: 650K successful trajectories
   dependencies: []
-  description: ''
-  feedback: ''
-  intended_uses: ''
-  license: MIT
-  modality:
-    explanation: Simulation
-    value: other; other
-  model_card: ''
-  monitoring: ''
-  name: VIMA
-  organization: NVIDIA, Stanford
-  prohibited_uses: ''
-  quality_control: ''
-  size: 650K parameters (dense)
   training_emissions: ''
-  training_hardware: ''
   training_time: ''
-  type: model
-  url: https://vimalabs.github.io/
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license: MIT
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/openai.yaml b/assets/openai.yaml
index ec2714f9..74e6779e 100644
--- a/assets/openai.yaml
+++ b/assets/openai.yaml
@@ -1,104 +1,118 @@
 ---
-- access:
-    explanation: "The GPT-3 dataset isn't released to the public, but it may be\
-      \ available to Microsoft through the GPT-3 licencing agreement between OpenAI\
-      \ and Microsoft [[OpenAI Blog Post]] (https://openai.com/blog/openai-licenses-gpt-3-technology-to-microsoft/).\n"
-    value: closed
-  analysis: "The GPT-3 paper, which also introduces the GPT-3 dataset, provides\
-    \ a limited analysis on the GPT-3 dataset, reporting the dirtiness of the dataset\
-    \ after the it was filtered for text occurring in common benchmarking tasks.\
-    \ The authors report that \"as the dataset becomes more contaminated, the variance\
-    \ of the clean over all fraction increases, but there is no apparent bias towards\
-    \ improved or degraded performance\" [[Appendix C]](https://arxiv.org/pdf/2005.14165.pdf#appendix.C).\n"
+
+# Datasets
+- type: dataset
+  name: GPT-3 dataset
+  # General
+  organization: OpenAI
+  description: >
+    The GPT-3 dataset is the text corpus that was used to train the GPT-3
+    model. Information on the GPT-3 dataset is limited to discussion in the
+    paper introducing GPT-3
+    [[Section 2.2]](https://arxiv.org/pdf/2005.14165.pdf#subsection.2.2).
   created_date:
-    explanation: "The date for the public announcement of GPT-3. The GPT-3 dataset\
-      \ didn't have a specific release date separate from the model [[Open AI Blog\
-      \ Post]](https://openai.com/blog/openai-api/).\n"
     value: 2020-06-11
+    explanation: >
+      The date for the public announcement of GPT-3. The GPT-3
+      dataset didn't have a specific release date separate from the model
+      [[Open AI Blog Post]](https://openai.com/blog/openai-api/).
+  url: https://arxiv.org/pdf/2005.14165.pdf
   datasheet:
-    explanation: No datasheet available as of 2022-04-04.
     value: none
+    explanation: No datasheet available as of 2022-04-04.
+  modality: text (English)
+  size: 570 GB
+  sample: []
+  analysis: >
+    The GPT-3 paper, which also introduces the GPT-3 dataset, provides a limited
+    analysis on the GPT-3 dataset, reporting the dirtiness of the dataset after
+    the it was filtered for text occurring in common benchmarking tasks.
+    The authors report that "as the dataset becomes more contaminated, the
+    variance of the clean over all fraction increases, but there is no apparent
+    bias
+    towards improved or degraded performance"
+    [[Appendix C]](https://arxiv.org/pdf/2005.14165.pdf#appendix.C).
+  # Construction
   dependencies: [WebText]
-  description: "The GPT-3 dataset is the text corpus that was used to train the\
-    \ GPT-3 model. Information on the GPT-3 dataset is limited to discussion in\
-    \ the paper introducing GPT-3 [[Section 2.2]](https://arxiv.org/pdf/2005.14165.pdf#subsection.2.2).\n"
-  excluded: "The Common Crawl dataset was processed using a classifier that kept\
-    \ high quality documents and filtered low quality documents. WebText was used\
-    \ as a proxy for high quality documents [[Appendix A]](https://arxiv.org/pdf/2005.14165.pdf#appendix.A).\n"
-  feedback:
-    explanation: "There are no known (internal or external) feedback mechanisms\
-      \ for the GPT-3 dataset as of 2022-04-04.\n"
-    value: unknown
-  included: "The dataset is composed several NLP corpora: Common Crawl (filtered,\
-    \ 60%), WebText2 (22%), Books1 (8%), Books2 (8%), Wikipedia (3%) [[Section 2.2]](https://arxiv.org/pdf/2005.14165.pdf#subsection.2.2).\n"
-  intended_uses: "The intended use of the GPT-3 dataset is to train language models.\n"
   license:
-    explanation: "There is no known license specific to the GPT-3 dataset, however,\
-      \ the governing organization, OpenAI, licensed GPT-3 to Microsoft, which makes\
-      \ it likely that the GPT-3 dataset was also licensed [[OpenAI Blog Post]]\
-      \ (https://openai.com/blog/openai-licenses-gpt-3-technology-to-microsoft/).\n"
     value: unknown
-  modality:
-    explanation: text (English)
-    value: text
+    explanation: >
+      There is no known license specific to the GPT-3 dataset, however,
+      the governing organization, OpenAI, licensed GPT-3 to Microsoft, which
+      makes it likely that the GPT-3 dataset was also licensed
+      [[OpenAI Blog Post]]
+      (https://openai.com/blog/openai-licenses-gpt-3-technology-to-microsoft/).
+  included: >
+    The dataset is composed several NLP corpora: Common Crawl (filtered, 60%),
+    WebText2 (22%), Books1 (8%), Books2 (8%), Wikipedia (3%)
+    [[Section 2.2]](https://arxiv.org/pdf/2005.14165.pdf#subsection.2.2).
+  excluded: >
+    The Common Crawl dataset was processed using a classifier that kept high
+    quality documents and filtered low quality documents. WebText was used as a
+    proxy for high quality documents
+    [[Appendix A]](https://arxiv.org/pdf/2005.14165.pdf#appendix.A).
+  quality_control: >
+    In addition to excluding low quality documents from the Common Crawl
+    dataset, the authors fuzzily deduplicated documents within each dataset, by
+    removing documents that have high overlap with each other. The same
+    procedure was followed to fuzzily deduplicate WebText from Common Crawl
+    [[Appendix A]](https://arxiv.org/pdf/2005.14165.pdf#appendix.A).
+    Text occuring in benchmark datasets were also partially removed
+    [[Appendix C]](https://arxiv.org/pdf/2005.14165.pdf#appendix.C).
+  # Downstream
+  access:
+    value: closed
+    explanation: >
+      The GPT-3 dataset isn't released to the public, but it may be available
+      to Microsoft through the GPT-3 licencing agreement between OpenAI and
+      Microsoft [[OpenAI Blog Post]]
+      (https://openai.com/blog/openai-licenses-gpt-3-technology-to-microsoft/).
+  intended_uses: >
+    The intended use of the GPT-3 dataset is to train language models.
+  prohibited_uses:
+    value: unknown
+    explanation: >
+      OpenAI didn't provide a list of prohibited uses specifically for
+      the GPT-3 dataset. However, public OpenAI products are governed by the
+      OpenAI Terms of Use, which may also apply to the OpenAI dataset.
+      The OpenAI Terms of Use prohibit the following:
+      (i) Illegal activities, such as child pornography, gambling, cybercrime,
+      piracy, violating copyright, trademark or other intellectual property laws;
+      (ii) Accessing or authorizing anyone to access the APIs from an embargoed
+      country, region, or territory as prohibited by the U.S. government;
+      (iii) Threatening, stalking, defaming, defrauding, degrading, victimizing
+      or intimidating anyone for any reason
+      [[Open AI Terms of Use]](https://openai.com/api/policies/terms/).
   monitoring:
-    explanation: "There are no known (internal or external) monitoring mechanisms\
-      \ that are in place for the use of the GPT-3 dataset as of 2022-04-04.\n"
     value: unknown
-  name: GPT-3 dataset
-  organization: OpenAI
-  prohibited_uses:
-    explanation: "OpenAI didn't provide a list of prohibited uses specifically for\
-      \ the GPT-3 dataset. However, public OpenAI products are governed by the OpenAI\
-      \ Terms of Use, which may also apply to the OpenAI dataset. The OpenAI Terms\
-      \ of Use prohibit the following: (i) Illegal activities, such as child pornography,\
-      \ gambling, cybercrime, piracy, violating copyright, trademark or other intellectual\
-      \ property laws; (ii) Accessing or authorizing anyone to access the APIs from\
-      \ an embargoed country, region, or territory as prohibited by the U.S. government;\
-      \ (iii) Threatening, stalking, defaming, defrauding, degrading, victimizing\
-      \ or intimidating anyone for any reason [[Open AI Terms of Use]](https://openai.com/api/policies/terms/).\n"
+    explanation: >
+      There are no known (internal or external) monitoring mechanisms
+      that are in place for the use of the GPT-3 dataset as of 2022-04-04.
+  feedback:
     value: unknown
-  quality_control: "In addition to excluding low quality documents from the Common\
-    \ Crawl dataset, the authors fuzzily deduplicated documents within each dataset,\
-    \ by removing documents that have high overlap with each other. The same procedure\
-    \ was followed to fuzzily deduplicate WebText from Common Crawl [[Appendix A]](https://arxiv.org/pdf/2005.14165.pdf#appendix.A).\
-    \ Text occuring in benchmark datasets were also partially removed [[Appendix\
-    \ C]](https://arxiv.org/pdf/2005.14165.pdf#appendix.C).\n"
-  sample: []
-  size: 570 GB
-  type: dataset
-  url: https://arxiv.org/pdf/2005.14165.pdf
-- access:
-    explanation: "HumanEval dataset is publicly available and comes with a an evaluation\
-      \ framework [[HumanEval GitHub Repository]](https://www.github.com/openai/human-eval).\n"
-    value: open
-  analysis: none
+    explanation: >
+      There are no known (internal or external) feedback mechanisms for
+      the GPT-3 dataset as of 2022-04-04.
+
+- type: dataset
+  name: HumanEval
+  # General
+  organization: OpenAI
+  description: >
+    HumanEval is a dataset of 164 programming problems hand-written to evaluate
+    their Codex model.
   created_date:
-    explanation: "The date that Codex, the model evaluated on the HumanEval dataset,\
-      \ was announced to the public [[OpenAI Blog Post]](https://openai.com/blog/openai-codex/).\n"
     value: 2021-08-10
+    explanation: >
+      The date that Codex, the model evaluated on the HumanEval
+      dataset, was announced to the public
+      [[OpenAI Blog Post]](https://openai.com/blog/openai-codex/).
+  url: https://arxiv.org/pdf/2107.03374.pdf
   datasheet:
-    explanation: No datasheet available as of 2022-04-10.
     value: none
-  dependencies: []
-  description: "HumanEval is a dataset of 164 programming problems hand-written\
-    \ to evaluate their Codex model.\n"
-  excluded: "Code problems easily found on the internet.\n"
-  feedback: "Email the authors [[Codex Paper]](https://arxiv.org/pdf/2107.03374.pdf).\n"
-  included: "164 hand-written questions.\n"
-  intended_uses: "Evaluating code generation capabilities of models.\n"
-  license:
-    explanation: https://github.com/openai/human-eval/blob/master/LICENSE
-    value: MIT
-  modality:
-    explanation: code (Python)
-    value: code
-  monitoring: none
-  name: HumanEval
-  organization: OpenAI
-  prohibited_uses: none
-  quality_control: "The evaluation dataset was handwritten to ensure that the evaluation\
-    \ problems do not exist in the Codex dataset [[Section 2.2]](https://arxiv.org/pdf/2107.03374.pdf#subsection.2.2).\n"
+    explanation: No datasheet available as of 2022-04-10.
+  modality: code (Python)
+  size: 214 KB
   sample:
     - "\n\ndef string_sequence(n: int) -> str:\n    \"\"\" Return a string containing\
       \ space-delimited numbers starting from 0 upto n inclusive.\n    >>> string_sequence(0)\n\
@@ -125,680 +139,936 @@
       \ 'seven', 'eight' and 'nine'.\n    Return the string with numbers sorted\
       \ from smallest to largest\n    >>> sort_numbers('three one five')\n    'one\
       \ three five'\n    \"\"\"\n"
-  size: 214 KB
-  type: dataset
-  url: https://arxiv.org/pdf/2107.03374.pdf
-- access:
-    explanation: "The dataset might have been made available to Microsoft as part\
-      \ of OpenAI giving Microsoft access to its Codex model [GitHub Copilot](https://copilot.github.com/).\n"
-    value: closed
-  analysis:
-    explanation: The paper doesn't provide an analysis on the training dataset.
-    value: none
+  analysis: none
+  # Construction
+  dependencies: []
+  license:
+    value: MIT
+    explanation: https://github.com/openai/human-eval/blob/master/LICENSE
+  included: >
+    164 hand-written questions.
+  excluded: >
+    Code problems easily found on the internet.
+  quality_control: >
+    The evaluation dataset was handwritten to ensure that the evaluation
+    problems do not exist in the Codex dataset
+    [[Section 2.2]](https://arxiv.org/pdf/2107.03374.pdf#subsection.2.2).
+  # Downstream
+  access:
+    value: open
+    explanation: >
+      HumanEval dataset is publicly available and comes
+      with a an evaluation framework
+      [[HumanEval GitHub Repository]](https://www.github.com/openai/human-eval).
+  intended_uses: >
+    Evaluating code generation capabilities of models.
+  prohibited_uses: none
+  monitoring: none
+  feedback: >
+    Email the authors [[Codex Paper]](https://arxiv.org/pdf/2107.03374.pdf).
+
+- type: dataset
+  name: Codex dataset
+  # General
+  organization: OpenAI
+  description: >
+    The dataset used to train the Codex model.
   created_date:
-    explanation: "The date that Codex, the model trained on the Codex dataset, was\
-      \ announced to the public [[OpenAI Blog Post]](https://openai.com/blog/openai-codex/).\n"
     value: 2021-08-10
+    explanation: >
+      The date that Codex, the model trained on the Codex dataset,
+      was announced to the public
+      [[OpenAI Blog Post]](https://openai.com/blog/openai-codex/).
+  url: https://arxiv.org/pdf/2107.03374.pdf
   datasheet: none
+  modality: code
+  size:
+    value: 159 GB
+    explanation: As reported by the authors [[Section 3.1]](https://arxiv.org/pdf/2107.03374.pdf#subsection.3.1).
+  sample: []
+  analysis:
+    value: none
+    explanation: The paper doesn't provide an analysis on the training dataset.
+  # Construction
   dependencies: []
-  description: "The dataset used to train the Codex model.\n"
-  excluded: "Following were filtered from the dataset: autogenerated files; files\
-    \ with average line length > 100, maximum line length > 1000, or few alphanumeric\
-    \ characters [[Section 3.1]](https://arxiv.org/pdf/2107.03374.pdf#subsection.3.1).\n"
-  feedback: "Email the authors [[Codex Paper]](https://arxiv.org/pdf/2107.03374.pdf).\n"
-  included: "The dataset includes 54 million public software repositories hosted\
-    \ on GitHub as of an unspecified date in May 2020 [[Section 3.1]](https://arxiv.org/pdf/2107.03374.pdf#subsection.3.1).\n"
-  intended_uses: Training language models on code.
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality: code
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  included: >
+    The dataset includes 54 million public software repositories hosted on
+    GitHub as of an unspecified date in May 2020
+    [[Section 3.1]](https://arxiv.org/pdf/2107.03374.pdf#subsection.3.1).
+  excluded: >
+    Following were filtered from the dataset: autogenerated files; files with
+    average line length > 100, maximum line length > 1000, or few alphanumeric
+    characters
+    [[Section 3.1]](https://arxiv.org/pdf/2107.03374.pdf#subsection.3.1).
+  quality_control: >
+    Dataset was filtered using simple heuristics, as outlined in the excluded
+    field.
+  # Downstream
+  access:
+    value: closed
+    explanation: >
+      The dataset might have been made available to Microsoft
+      as part of OpenAI giving Microsoft access to its Codex model
+      [GitHub Copilot](https://copilot.github.com/).
+  intended_uses: Training language models on code.
+  prohibited_uses: unknown
   monitoring: unknown
-  name: Codex dataset
+  feedback: >
+    Email the authors
+    [[Codex Paper]](https://arxiv.org/pdf/2107.03374.pdf).
+
+- type: dataset
+  name: CLIP dataset
+  # General
   organization: OpenAI
-  prohibited_uses: unknown
-  quality_control: "Dataset was filtered using simple heuristics, as outlined in\
-    \ the excluded field.\n"
-  sample: []
-  size:
-    explanation: As reported by the authors [[Section 3.1]](https://arxiv.org/pdf/2107.03374.pdf#subsection.3.1).
-    value: 159 GB
-  type: dataset
-  url: https://arxiv.org/pdf/2107.03374.pdf
-- access:
-    explanation: The dataset wasn't released to the public.
-    value: closed
-  analysis: "The dataset contained some overlap with the test sets of the benchmarks\
-    \ used for evaluation, but the authors determined the impact to be small: \"\
-    There is a median overlap of 2.2% and an average overlap of 3.2%. Due to this\
-    \ small amount of overlap, overall accuracy is rarely shifted by more than 0.1%\
-    \ with only 7 datasets above this threshold\" [[Section 5]](https://arxiv.org/pdf/2103.00020.pdf#section.5).\n"
+  description: >
+    CLIP dataset contains text-image pairs crawled from the internet.
   created_date:
-    explanation: "The date of the blog post announcing CLIP [[OpenAI Blog Post]](https://openai.com/blog/clip/).\n"
     value: 2021-01-05
+    explanation: >
+      The date of the blog post announcing CLIP
+      [[OpenAI Blog Post]](https://openai.com/blog/clip/).
+  url: https://arxiv.org/pdf/2103.00020.pdf
   datasheet: none
+  modality: text (English) and image
+  size: 400M (image, text) pairs
+  sample: []
+  analysis: >
+    The dataset contained some overlap with the test sets of the benchmarks used
+    for evaluation, but the authors determined the impact to be small: "There
+    is a median overlap of 2.2% and an average overlap of 3.2%. Due to this
+    small amount of overlap, overall accuracy is rarely shifted by more than
+    0.1% with only 7 datasets above this threshold"
+    [[Section 5]](https://arxiv.org/pdf/2103.00020.pdf#section.5).
+  # Construction
   dependencies: []
-  description: "CLIP dataset contains text-image pairs crawled from the internet.\n"
-  excluded: none
-  feedback:
-    explanation: The feedback mechanisms in place are unknown.
+  license:
     value: unknown
-  included: "Data crawled from the internet, without any filtering (including de-duplication)\
-    \ or curation.\n"
+    explanation: >
+      The asset doesn't have a known license as it hasn't been publicly
+      released.
+  included: >
+    Data crawled from the internet, without any filtering (including
+    de-duplication) or curation.
+  excluded: none
+  quality_control: >
+    The data was "only crawled websites that had policies against excessively
+    violent and adult images and allowed us to filter out such content"
+    [[Model Card]](https://github.com/openai/CLIP/blob/main/model-card.md).
+  # Downstream
+  access:
+    value: closed
+    explanation: The dataset wasn't released to the public.
   intended_uses: Training multimodal vision models.
-  license:
-    explanation: "The asset doesn't have a known license as it hasn't been publicly\
-      \ released.\n"
+  prohibited_uses:
     value: unknown
-  modality:
-    explanation: text (English) and image
-    value: image, text
+    explanation: The prohibited uses of the dataset are unknown.
   monitoring:
+    value: unknown
     explanation: The monitoring mechanisms in place are unknown.
+  feedback:
     value: unknown
-  name: CLIP dataset
+    explanation: The feedback mechanisms in place are unknown.
+
+- type: dataset
+  name: DALL·E dataset
+  # General
   organization: OpenAI
-  prohibited_uses:
-    explanation: The prohibited uses of the dataset are unknown.
-    value: unknown
-  quality_control: "The data was \"only crawled websites that had policies against\
-    \ excessively violent and adult images and allowed us to filter out such content\"\
-    \ [[Model Card]](https://github.com/openai/CLIP/blob/main/model-card.md).\n"
+  description: >
+    DALL·E dataset is the training set consisting of image and text pairs
+    collected to train the DALL·E model.
+  created_date:
+    value: 2021-01-05
+    explanation: >
+      The date of the blog post announcing DALL·E
+      [[OpenAI Blog Post]](https://openai.com/blog/dall-e/).
+  url: https://arxiv.org/abs/2102.12092
+  datasheet: none
+  modality: text (English) and image
+  size: >
+    250M (image, text) pairs
   sample: []
-  size: 400M (image, text) pairs
-  type: dataset
-  url: https://arxiv.org/pdf/2103.00020.pdf
-- access:
-    explanation: The dataset wasn't released to the public.
-    value: closed
   analysis: The authors found that the dataset contained 21% of the images in the
     MS-COCO validation set, but observed no significant changes in the performance
     of the accompanying DALL·E when tested on MS-COCO evaluation set with and without
     the said images [[Section 3.1]](https://arxiv.org/pdf/2102.12092.pdf#subsection.3.1).
-  created_date:
-    explanation: "The date of the blog post announcing DALL·E [[OpenAI Blog Post]](https://openai.com/blog/dall-e/).\n"
-    value: 2021-01-05
-  datasheet: none
+  # Construction
   dependencies: []
-  description: "DALL·E dataset is the training set consisting of image and text\
-    \ pairs collected to train the DALL·E model.\n"
-  excluded: "MS-COCO was excluded from the dataset, but because MS-COCO was created\
-    \ from YFCC100M, some of the test images (not the captions) were included.\n"
-  feedback:
-    explanation: The feedback mechanisms in place are unknown.
+  license:
     value: unknown
-  included: "Data from the internet, including Conceptual Captions and a filtered\
-    \ subset of YFCC100M.\n"
+    explanation: >
+      The asset doesn't have a known license as it hasn't been publicly
+      released.
+  included: >
+    Data from the internet, including Conceptual Captions and a filtered subset
+    of YFCC100M.
+  excluded: >
+    MS-COCO was excluded from the dataset, but because MS-COCO was created from
+    YFCC100M, some of the test images (not the captions) were included.
+  quality_control: >
+    The data was de-duplicated
+    [[Section 3.2]](https://arxiv.org/pdf/2102.12092.pdf#subsection.3.2).
+    The data collected from the internet was filtered using image, text and
+    joint image and text filters, which included: "discarding instances whose
+    captions are too short, are classified as non-English by the Python package
+    cld3, or that consist primarily of boilerplate phrases such as “photographed
+    on <date>”, where <date> matches various formats for dates that we found in
+    the data". The authors also discard "instances whose images have aspect
+    ratios not in [1/2, 2]"
+    [[Appendix C]](https://arxiv.org/pdf/2102.12092.pdf#appendix.C).
+  # Downstream
+  access:
+    value: closed
+    explanation: The dataset wasn't released to the public.
   intended_uses: Training multimodal vision models.
-  license:
-    explanation: "The asset doesn't have a known license as it hasn't been publicly\
-      \ released.\n"
+  prohibited_uses:
     value: unknown
-  modality:
-    explanation: text (English) and image
-    value: image, text
+    explanation: The prohibited uses of the dataset are unknown.
   monitoring:
+    value: unknown
     explanation: The monitoring mechanisms in place are unknown.
+  feedback:
     value: unknown
-  name: DALL·E dataset
+    explanation: The feedback mechanisms in place are unknown.
+
+- type: dataset
+  name: Whisper dataset
+  # General
   organization: OpenAI
-  prohibited_uses:
-    explanation: The prohibited uses of the dataset are unknown.
-    value: unknown
-  quality_control: "The data was de-duplicated [[Section 3.2]](https://arxiv.org/pdf/2102.12092.pdf#subsection.3.2).\
-    \ The data collected from the internet was filtered using image, text and joint\
-    \ image and text filters, which included: \"discarding instances whose captions\
-    \ are too short, are classified as non-English by the Python package cld3, or\
-    \ that consist primarily of boilerplate phrases such as “photographed on <date>”,\
-    \ where <date> matches various formats for dates that we found in the data\"\
-    . The authors also discard \"instances whose images have aspect ratios not in\
-    \ [1/2, 2]\" [[Appendix C]](https://arxiv.org/pdf/2102.12092.pdf#appendix.C).\n"
-  sample: []
-  size: "250M (image, text) pairs\n"
-  type: dataset
-  url: https://arxiv.org/abs/2102.12092
-- access:
-    explanation: "The dataset isn't released to the public.\n"
-    value: closed
-  analysis: "The Whisper paper provides limited details on preprocessing.\n"
+  description: >
+    The Whisper dataset is the speech corpus that was used to train the Whisper
+    model. Information on the dataset is limited to discussion in the
+    paper introducing Whisper.
+    [[Section 2.1]](https://cdn.openai.com/papers/whisper.pdf).
   created_date:
-    explanation: "The date for the public announcement of Whisper. The dataset didn't\
-      \ have a specific release date separate from the model [[Open AI Blog Post]](https://openai.com/blog/whisper/).\n"
     value: 2022-09-21
+    explanation: >
+      The date for the public announcement of Whisper. The
+      dataset didn't have a specific release date separate from the model
+      [[Open AI Blog Post]](https://openai.com/blog/whisper/).
+  url: https://cdn.openai.com/papers/whisper.pdf
   datasheet:
-    explanation: No datasheet available as of 2022-12-07.
     value: none
+    explanation: No datasheet available as of 2022-12-07.
+  modality: speech
+  size: 680k hours
+  sample: []
+  analysis: >
+    The Whisper paper provides limited details on preprocessing.
+  # Construction
   dependencies: []
-  description: "The Whisper dataset is the speech corpus that was used to train\
-    \ the Whisper model. Information on the dataset is limited to discussion in\
-    \ the paper introducing Whisper. [[Section 2.1]](https://cdn.openai.com/papers/whisper.pdf).\n"
-  excluded: "Automated filtering was conducted.\n"
-  feedback:
-    explanation: "There are no known (internal or external) feedback mechanisms\
-      \ for the dataset as of 2022-12-07.\n"
-    value: unknown
-  included: "The dataset is composed three major sources: multilingual speech recognition\
-    \ (17%), translation (18%), and English speech recognition (65%). [[Figure 11]](https://cdn.openai.com/papers/whisper.pdf).\n"
-  intended_uses: The intended use is to train speech models.
   license:
-    explanation: "The asset doesn't have a known license as it hasn't been publicly\
-      \ released.\n"
     value: unknown
-  modality:
-    explanation: speech
-    value: audio
+    explanation: >
+      The asset doesn't have a known license as it hasn't been publicly
+      released.
+  included: >
+    The dataset is composed three major sources: multilingual speech recognition
+    (17%), translation (18%), and English speech recognition (65%).
+    [[Figure 11]](https://cdn.openai.com/papers/whisper.pdf).
+  excluded: >
+    Automated filtering was conducted.
+  quality_control: >
+    In addition to filtering, basic text standardization was done.
+  # Downstream
+  access:
+    value: closed
+    explanation: >
+      The dataset isn't released to the public.
+  intended_uses: The intended use is to train speech models.
+  prohibited_uses:
+    value: none
+    explanation: >
+      No uses are explicitly prohibited.
   monitoring:
-    explanation: "There are no known (internal or external) monitoring mechanisms\
-      \ that are in place for the use of the dataset as of 2022-12-07.\n"
     value: none
-  name: Whisper dataset
+    explanation: >
+      There are no known (internal or external) monitoring mechanisms
+      that are in place for the use of the dataset as of 2022-12-07.
+  feedback:
+    value: unknown
+    explanation: >
+      There are no known (internal or external) feedback mechanisms for
+      the dataset as of 2022-12-07.
+
+- type: dataset
+  name: WebText
+  # General
   organization: OpenAI
-  prohibited_uses:
-    explanation: "No uses are explicitly prohibited.\n"
-    value: none
-  quality_control: "In addition to filtering, basic text standardization was done.\n"
-  sample: []
-  size: 680k hours
-  type: dataset
-  url: https://cdn.openai.com/papers/whisper.pdf
-- access: closed
-  analysis: ''
+  description: ''
   created_date:
+    value: 2019-11-01
     explanation: Due to the lack of information about the exact date, it is assumed
       to be the 1st of the known month of creation.
-    value: 2019-11-01
+  url: https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf
   datasheet: ''
+  modality: text (English)
+  size: 40 GB
+  sample: []
+  analysis: ''
+  # Construction
   dependencies: []
-  description: ''
-  excluded: ''
-  feedback: ''
-  included: ''
-  intended_uses: ''
   license:
-    explanation: "The asset doesn't have a known license as it hasn't been publicly\
-      \ released.\n"
     value: unknown
-  modality: {}
+    explanation: >
+      The asset doesn't have a known license as it hasn't been publicly
+      released.
+  included: ''
+  excluded: ''
+  quality_control: ''
+  # Downstream
+  access: closed
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: WebText
+  feedback: ''
+
+# Models
+- type: model
+  name: GPT-2
+  # General
   organization: OpenAI
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: 40 GB
-  type: dataset
-  url: https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf
-- access: open
-  analysis: ''
+  description: ''
   created_date:
+    value: 2019-11-01
     explanation: Due to the lack of information about the exact date, it is assumed
       to be the 1st of the known month of creation.
-    value: 2019-11-01
+  url: https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf
+  model_card: https://github.com/openai/gpt-2/blob/master/model_card.md
+  modality: text (English)
+  size: 1.5B parameters (dense model)
+  analysis: ''
+  # Construction
   dependencies: [WebText]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  # Downstream
+  access: open
   license:
-    explanation: "GPT-2 uses a [[modified MIT License]](https://github.com/openai/gpt-2/blob/master/LICENSE).\n"
     value: Modified MIT License
-  modality: {}
-  model_card: https://github.com/openai/gpt-2/blob/master/model_card.md
+    explanation: >
+      GPT-2 uses a
+      [[modified MIT License]](https://github.com/openai/gpt-2/blob/master/LICENSE).
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: GPT-2
+  feedback: ''
+
+- type: model
+  name: GPT-3
+  # General
   organization: OpenAI
-  prohibited_uses: ''
-  quality_control: ''
-  size: ''
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
-  url: https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf
-- access:
-    explanation: "The GPT-3 model isn't fully released to the public, but it was\
-      \ made available to Microsoft through the licencing agreement between OpenAI\
-      \ and Microsoft [[OpenAI Blog Post]] (https://openai.com/blog/openai-licenses-gpt-3-technology-to-microsoft/).\
-      \ The public can access the model through the Open AI API, which is available\
-      \ in supported countries [[Supported Countries]](https://beta.openai.com/docs/supported-countries)\
-      \ [[OpenAI API]](https://openai.com/api/).\n"
-    value: limited
-  analysis: "The GPT-3 model was evaluated on language modeling, closed-book question\
-    \ answering, translation, Winograd-style tasks, commonsense reasoning, reading\
-    \ comprehension, SuperGLUE, NLI, synthetic tasks, and generation [[Section 4]](https://arxiv.org/pdf/2005.14165.pdf#section.4);\
-    \ as well as on fairness and biases [[Section 6]](https://arxiv.org/pdf/2005.14165.pdf#section.6).\n"
+  description: >
+    GPT-3 is an autoregressive language model.
   created_date:
-    explanation: "The date that GPT-3 was announced to the public [[OpenAI Blog\
-      \ Post]](https://openai.com/blog/openai-api/).\n"
     value: 2020-06-11
-  dependencies: [GPT-3 dataset]
-  description: "GPT-3 is an autoregressive language model.\n"
-  feedback: "Feedback for GPT-3 can be provided on the feedback form linked in the\
-    \ model card [[Model Card]](https://github.com/openai/gpt-3/blob/master/model-card.md).\
-    \ The form is especially meant to collect feedback on concerns about misuse,\
-    \ synthetic text detection, bias, and risk of generative language models.\n"
-  intended_uses: "GPT-3 was intended to be use through the OpenAI API by developers\
-    \ for language applications. Other intended use of GPT-3 include researchers\
-    \ accessing the model through the API to study its paradigms [[Model Card]](https://github.com/openai/gpt-3/blob/master/model-card.md).\n"
-  license:
-    explanation: "The asset doesn't have a known license as it hasn't been publicly\
-      \ released.\n"
-    value: unknown
-  modality:
-    explanation: text (English)
-    value: text; text
+    explanation: >
+      The date that GPT-3 was announced to the public
+      [[OpenAI Blog Post]](https://openai.com/blog/openai-api/).
+  url: https://arxiv.org/pdf/2005.14165.pdf
   model_card: https://github.com/openai/gpt-3/blob/master/model-card.md
-  monitoring: "OpenAI reviews all use cases of the model [[Model Card]](https://github.com/openai/gpt-3/blob/master/model-card.md).\n"
-  name: GPT-3
-  organization: OpenAI
-  prohibited_uses: "Access to GPT-3 is governed by Open AI API Usage Guidelines\
-    \ and API Terms of Use, prohibiting the use of the API in a way that causes\
-    \ societal harm. [[Usage Guidelines]] (https://beta.openai.com/docs/usage-guidelines/content-policy)\
-    \ [[Terms of Use]](https://openai.com/api/policies/terms/). The list of disallowed\
-    \ applications can be found in the usage guidelines [[Disallowed Applications]]\
-    \ (https://beta.openai.com/docs/usage-guidelines/disallowed-applications).\n"
-  quality_control: "One quality control method OpenAI employed was releasing GPT-3\
-    \ only through the OpenAI API. OpenAI states that it is easier to respond to\
-    \ misuse when the access to the model is gated through the API. It also hints\
-    \ that it plans to broaden the API access over time based on the amount of misuse\
-    \ [[OpenAI API Blog Post]](https://openai.com/blog/openai-api/). The authors\
-    \ identify potential misuses of GPT-3 in the paper and analyze it for fairness,\
-    \ bias and representation issues, but do not identify mitigation strategies\
-    \ [[Section 6]](https://arxiv.org/pdf/2005.14165.pdf#section.6).\n"
-  size: 175B parameters (dense)
+  modality: text (English)
+  size:
+    value: 175B parameters (dense model)
+    explanation: >
+      GPT-3 comes in several sizes. Here we report the size of the Davinci
+      model, the largest GPT-3 model served through the OpenAI API. Sizes of the
+      other models can be found in the paper
+      [[Table 2.1]](https://arxiv.org/pdf/2005.14165.pdf#table.caption.7).
+  analysis: >
+    The GPT-3 model was evaluated on language modeling, closed-book question
+    answering, translation, Winograd-style tasks, commonsense reasoning,
+    reading comprehension, SuperGLUE, NLI, synthetic tasks, and generation
+    [[Section 4]](https://arxiv.org/pdf/2005.14165.pdf#section.4);
+    as well as on fairness and biases
+    [[Section 6]](https://arxiv.org/pdf/2005.14165.pdf#section.6).
+  # Construction
+  dependencies: [GPT-3 dataset]
   training_emissions:
-    explanation: "Estimate of the CO2(e) emissions for GPT-3 were not provided by\
-      \ OpenAI, but they were provided by a follow up work investigating the CO2\
-      \ equivalent emissions (CO2e) of GPT-3 [[Patterson et al.]] (https://arxiv.org/ftp/arxiv/papers/2104/2104.10350.pdf).\n"
     value: 552.1 tCO2e
-  training_hardware:
-    explanation: "The original paper doesn't specify the training hardware for GPT-3,\
-      \ but a follow up blog post indicates that it was trained on a cluster on\
-      \ Azure cluster, using 10000 GPUs with 400 Gbps [[Microsoft Blog Post]] (https://blogs.microsoft.com/ai/openai-azure-supercomputer/).\n"
-    value: Azure
+    explanation: >
+      Estimate of the CO2(e) emissions for GPT-3 were not provided
+      by OpenAI, but they were provided by a follow up work investigating the CO2
+      equivalent emissions (CO2e) of GPT-3
+      [[Patterson et al.]]
+      (https://arxiv.org/ftp/arxiv/papers/2104/2104.10350.pdf).
   training_time:
-    explanation: "The time required to train different sized GPT-3 models are listed\
-      \ in [[Table D.1]](https://arxiv.org/pdf/2005.14165.pdf#table.caption.50).\
-      \ The time required to train the GPT-3 model with 175B parameters is reported\
-      \ as 3.64E+03 petaflop/s-days.\n"
     value: 3640 petaflop/s-days
-  type: model
-  url: https://arxiv.org/pdf/2005.14165.pdf
-- access:
-    explanation: "The model is made available via the OpenAI API [[OpenAI API]](https://openai.com/api/)\
-      \ as code-cushman-001 according to the [[Model Index]](https://platform.openai.com/docs/model-index-for-researchers).\n"
+    explanation: >
+      The time required to train different sized GPT-3 models are listed in
+      [[Table D.1]](https://arxiv.org/pdf/2005.14165.pdf#table.caption.50).
+      The time required to train the GPT-3 model with 175B parameters is
+      reported as 3.64E+03 petaflop/s-days.
+  training_hardware:
+    value: Azure
+    explanation: >
+      The original paper doesn't specify the training hardware for GPT-3,
+      but a follow up blog post indicates that it was trained on a cluster on
+      Azure cluster, using 10000 GPUs with 400 Gbps
+      [[Microsoft Blog Post]]
+      (https://blogs.microsoft.com/ai/openai-azure-supercomputer/).
+  quality_control: >
+    One quality control method OpenAI employed was releasing GPT-3 only through
+    the OpenAI API. OpenAI states that it is easier to respond to misuse when
+    the access to the model is gated through the API. It also hints that it
+    plans to broaden the API access over time based on the amount of misuse
+    [[OpenAI API Blog Post]](https://openai.com/blog/openai-api/).
+    The authors identify potential misuses of GPT-3 in the paper and analyze
+    it for fairness, bias and representation issues, but do not identify
+    mitigation strategies
+    [[Section 6]](https://arxiv.org/pdf/2005.14165.pdf#section.6).
+  # Downstream
+  access:
     value: limited
-  analysis: "The model was evaluated using the HumanEval dataset with pass@k metric\
-    \ and BLEU scores [[Section 2]](https://arxiv.org/pdf/2107.03374.pdf#section.2).\n"
-  created_date:
-    explanation: "The date that Codex was announced to the public [[OpenAI Blog\
-      \ Post]](https://openai.com/blog/openai-codex/).\n"
-    value: 2021-08-10
-  dependencies: [GPT-3, Codex dataset, HumanEval]
-  description: "Codex is a GPT language model fine-tuned on publicly available code\
-    \ from GitHub.\n"
-  feedback: "Email the authors [[Codex Paper]](https://arxiv.org/pdf/2107.03374.pdf).\n"
-  intended_uses: "Codex is intended to be used for coding related language modelling\
-    \ tasks.\n"
+    explanation: >
+      The GPT-3 model isn't fully released to the public, but
+      it was made available to Microsoft through the licencing agreement between
+      OpenAI and Microsoft
+      [[OpenAI Blog Post]]
+      (https://openai.com/blog/openai-licenses-gpt-3-technology-to-microsoft/).
+      The public can access the model through the Open AI API, which is available
+      in supported countries
+      [[Supported Countries]](https://beta.openai.com/docs/supported-countries)
+      [[OpenAI API]](https://openai.com/api/).
   license:
-    explanation: "The asset doesn't have a known license as it hasn't been publicly\
-      \ released.\n"
-    value: unknown
-  modality:
-    explanation: text (English) and code
-    value: code, text; code, text
-  model_card: none
-  monitoring:
-    explanation: "There isn't any known monitoring in place for the model, but there\
-      \ may be internal mechanisms.\n"
     value: unknown
+    explanation: >
+      The asset doesn't have a known license as it hasn't been publicly
+      released.
+  intended_uses: >
+    GPT-3 was intended to be use through the OpenAI API by developers for
+    language applications. Other intended use of GPT-3 include researchers
+    accessing the model through the API to study its paradigms
+    [[Model Card]](https://github.com/openai/gpt-3/blob/master/model-card.md).
+  prohibited_uses: >
+    Access to GPT-3 is governed by Open AI API Usage Guidelines and API Terms
+    of Use, prohibiting the use of the API in a way that causes societal harm.
+    [[Usage Guidelines]]
+    (https://beta.openai.com/docs/usage-guidelines/content-policy)
+    [[Terms of Use]](https://openai.com/api/policies/terms/).
+    The list of disallowed applications can be found in the usage guidelines
+    [[Disallowed Applications]]
+    (https://beta.openai.com/docs/usage-guidelines/disallowed-applications).
+  monitoring: >
+    OpenAI reviews all use cases of the model
+    [[Model Card]](https://github.com/openai/gpt-3/blob/master/model-card.md).
+  feedback: >
+    Feedback for GPT-3 can be provided on the feedback form linked in the
+    model card
+    [[Model Card]](https://github.com/openai/gpt-3/blob/master/model-card.md).
+    The form is especially meant to collect feedback on concerns about misuse,
+    synthetic text detection, bias, and risk of generative language models.
+
+- type: model
   name: Codex
+  # General
   organization: OpenAI
-  prohibited_uses:
-    explanation: The prohibited uses of the model aren't specified.
-    value: unknown
-  quality_control: "The model wasn't fully released to the public as a quality control\
-    \ measure. The authors identify potential risks of Codex in their paper due\
-    \ to the following: over-reliance, misalignment, bias and representation, economic\
-    \ and labor market impacts, security implications, environmental impact and\
-    \ legal implications. They also make suggestions for some of these, but do not\
-    \ implement them in Codex [[Section 7]](https://arxiv.org/pdf/2107.03374.pdf#section.7).\n"
-  size: 12B parameters (dense)
+  description: >
+    Codex is a GPT language model fine-tuned on publicly available code from
+    GitHub.
+  created_date:
+    value: 2021-08-10
+    explanation: >
+      The date that Codex was announced to the public
+      [[OpenAI Blog Post]](https://openai.com/blog/openai-codex/).
+  url: https://arxiv.org/pdf/2107.03374.pdf
+  model_card: none
+  modality: text (English) and code
+  size: 12B parameters (dense model)
+  analysis: >
+    The model was evaluated using the HumanEval dataset with pass@k metric and
+    BLEU scores
+    [[Section 2]](https://arxiv.org/pdf/2107.03374.pdf#section.2).
+  # Construction
+  dependencies: [GPT-3, Codex dataset, HumanEval]
   training_emissions:
-    explanation: Authors do not report the training emissions.
     value: unknown
-  training_hardware:
-    explanation: "The paper specifies that Azure was used, but the underlying architecture\
-      \ wasn't specified.\n"
-    value: Azure
+    explanation: Authors do not report the training emissions.
   training_time:
-    explanation: "Authors estimate hundreds of petaflop/s-days of compute [[Section\
-      \ 7.6]](https://arxiv.org/pdf/2107.03374.pdf#subsection.7.6), but don't provide\
-      \ an exact number.\n"
     value: 100-1000 petaflop/s-days
-  type: model
-  url: https://arxiv.org/pdf/2107.03374.pdf
-- access:
-    explanation: "The model is made available via the OpenAI API but the specific\
-      \ endpoint (davinci-instruct-beta) is currently unavailable. [[Model Index]](https://platform.openai.com/docs/model-index-for-researchers).\n"
-    value: closed
-  analysis: The model was evaluated on human ratings to the InstructGPT answers
-    to the prompts submitted to the OpenAI API as well as on public NLP datasets
-    spanning truthfulness, toxicity, and bias, question answering, reading comprehension,
-    and summarization tasks.
-  created_date:
-    explanation: "Date of the public announcement introducing InstructGPT [[OpenAI\
-      \ Blog Post]] (https://openai.com/blog/instruction-following/).\n"
-    value: 2022-01-27
-  dependencies: [GPT-3, OpenAI API]
-  description: "InstructGPT is a family of GPT-3 based models fine-tuned on human\
-    \ feedback, which allows for better instruction following capabilities than\
-    \ GPT-3.\n"
-  feedback: "Email the authors [[InstructGPT Paper]](https://arxiv.org/pdf/2203.02155.pdf).\n"
-  intended_uses: "As stated in the model card: \"The intended direct users of InstructGPT\
-    \ are developers who access its capabilities via the OpenAI API. Through the\
-    \ OpenAI API, the model can be used by those who may not have AI development\
-    \ experience, to build and explore language modeling systems across a wide range\
-    \ of functions. We also anticipate that the model will continue to be used by\
-    \ researchers to better understand the behaviors, capabilities, biases, and\
-    \ constraints of large-scale language models\" [[Model Card]](https://github.com/openai/following-instructions-human-feedback/blob/main/model-card.md).\n"
+    explanation: >
+      Authors estimate hundreds of petaflop/s-days of compute
+      [[Section 7.6]](https://arxiv.org/pdf/2107.03374.pdf#subsection.7.6), but
+      don't provide an exact number.
+  training_hardware:
+    value: Azure
+    explanation: >
+      The paper specifies that Azure was used, but the underlying
+      architecture wasn't specified.
+  quality_control: >
+    The model wasn't fully released to the public as a quality control measure.
+    The authors identify potential risks of Codex in their paper due to the
+    following: over-reliance, misalignment, bias and representation, economic
+    and labor market impacts, security implications, environmental impact and
+    legal implications. They also make suggestions for some of these, but do not
+    implement them in Codex
+    [[Section 7]](https://arxiv.org/pdf/2107.03374.pdf#section.7).
+  # Downstream
+  access:
+    value: limited
+    explanation: >
+      The model is made available via the OpenAI API
+      [[OpenAI API]](https://openai.com/api/) as code-cushman-001 according to the
+      [[Model Index]](https://platform.openai.com/docs/model-index-for-researchers).
   license:
-    explanation: "The asset doesn't have a known license as it hasn't been publicly\
-      \ released.\n"
     value: unknown
-  modality:
-    explanation: text (English) and code
-    value: code, text; code, text
-  model_card: https://github.com/openai/following-instructions-human-feedback/blob/main/model-card.md
+    explanation: >
+      The asset doesn't have a known license as it hasn't been publicly
+      released.
+  intended_uses: >
+    Codex is intended to be used for coding related language modelling tasks.
+  prohibited_uses:
+    value: unknown
+    explanation: The prohibited uses of the model aren't specified.
   monitoring:
-    explanation: "There isn't any known monitoring in place for the model, but there\
-      \ may be internal mechanisms.\n"
     value: unknown
+    explanation: >
+      There isn't any known monitoring in place for the model, but there
+      may be internal mechanisms.
+  feedback: >
+    Email the authors
+    [[Codex Paper]](https://arxiv.org/pdf/2107.03374.pdf).
+
+- type: model
   name: InstructGPT
+  # General
   organization: OpenAI
-  prohibited_uses: "Access to InstructGPT is governed by Open AI API Usage Guidelines\
-    \ and API Terms of Use, prohibiting the use of the API in a way that causes\
-    \ societal harm. [[Usage Guidelines]] (https://beta.openai.com/docs/usage-guidelines/content-policy)\
-    \ [[Terms of Use]](https://openai.com/api/policies/terms/). The list of disallowed\
-    \ applications can be found in the usage guidelines [[Disallowed Applications]]\
-    \ (https://beta.openai.com/docs/usage-guidelines/disallowed-applications).\n"
-  quality_control: "The model wasn't fully released to the public as a quality control\
-    \ measure.\n"
-  size: 175B parameters (dense)
-  training_emissions:
-    explanation: The authors do not estimate the emissions of the model.
-    value: unknown
-  training_hardware:
-    explanation: The authors do not disclose the training hardware used.
-    value: unknown
-  training_time:
-    explanation: "175B SFT model required 4.9 petaflops/s-days; 175B PPO-ptx model\
-      \ required 60 petaflops/s-days [[Section 5]](https://arxiv.org/pdf/2203.02155.pdf#section.5).\n"
-    value: 60 petaflops/s-days
-  type: model
-  url: https://arxiv.org/pdf/2203.02155.pdf
-- access:
-    explanation: "Models are available at [https://github.com/openai/whisper](https://github.com/openai/whisper).\n"
-    value: open
-  analysis: The model was evaluated for zero-shot English and multingual speech
-    recognition, translation, language identification and robustness to noise.
+  description: >
+    InstructGPT is a family of GPT-3 based models fine-tuned on human feedback,
+    which allows for better instruction following capabilities than GPT-3.
   created_date:
-    explanation: "The date that Whisper was announced to the public [[OpenAI Blog\
-      \ Post]](https://openai.com/blog/whisper/).\n"
-    value: 2022-09-21
-  dependencies: [Whisper dataset]
-  description: "GPT-3 is a speech recognition model.\n"
-  feedback: "The discussions page of the codebase is not formally cited as a place\
-    \ for feedback, but is being used in this way [[Discussions page]](https://github.com/openai/whisper/discussions)\n"
-  intended_uses: "Whisper is a general-purpose speech recognition model; it is a\
-    \ multi-task model that can perform multilingual speech recognition as well\
-    \ as speech translation and language identification.\n"
+    value: 2022-01-27
+    explanation: >
+      Date of the public announcement introducing InstructGPT
+      [[OpenAI Blog Post]] (https://openai.com/blog/instruction-following/).
+  url: https://arxiv.org/pdf/2203.02155.pdf
+  model_card: https://github.com/openai/following-instructions-human-feedback/blob/main/model-card.md
+  modality: text (English) and code
+  size:
+    value: 175B parameters (dense model)
+    explanation: Size of the largest InstructGPT model.
+  analysis: The model was evaluated on human ratings to the InstructGPT answers
+    to the prompts submitted to the OpenAI API as well as on public NLP datasets
+    spanning truthfulness, toxicity, and bias, question answering, reading comprehension,
+    and summarization tasks.
+  # Construction
+  dependencies: [GPT-3, OpenAI API]
+  training_emissions:
+    value: unknown
+    explanation: The authors do not estimate the emissions of the model.
+  training_time:
+    value: 60 petaflops/s-days
+    explanation: >
+      175B SFT model required 4.9 petaflops/s-days; 175B PPO-ptx model required
+      60 petaflops/s-days
+      [[Section 5]](https://arxiv.org/pdf/2203.02155.pdf#section.5).
+  training_hardware:
+    value: unknown
+    explanation: The authors do not disclose the training hardware used.
+  quality_control: >
+    The model wasn't fully released to the public as a quality control measure.
+  # Downstream
+  access:
+    value: closed
+    explanation: >
+      The model is made available via the OpenAI API but the specific endpoint (davinci-instruct-beta)
+      is currently unavailable.
+      [[Model Index]](https://platform.openai.com/docs/model-index-for-researchers).
   license:
-    explanation: https://github.com/openai/whisper](https://github.com/openai/whisper
-    value: MIT
-  modality:
-    explanation: Speech (multilingual, though primarily English)
-    value: audio; audio
-  model_card: https://github.com/openai/whisper/blob/main/model-card.md
+    value: unknown
+    explanation: >
+      The asset doesn't have a known license as it hasn't been publicly
+      released.
+  intended_uses: >
+    As stated in the model card: "The intended direct users of InstructGPT are
+    developers who access its capabilities via the OpenAI API. Through the
+    OpenAI API, the model can be used by those who may not have AI development
+    experience, to build and explore language modeling systems across a wide
+    range of functions. We also anticipate that the model will continue to be
+    used by researchers to better understand the behaviors, capabilities,
+    biases, and constraints of large-scale language models"
+    [[Model Card]](https://github.com/openai/following-instructions-human-feedback/blob/main/model-card.md).
+  prohibited_uses: >
+    Access to InstructGPT is governed by Open AI API Usage Guidelines and API
+    Terms
+    of Use, prohibiting the use of the API in a way that causes societal harm.
+    [[Usage Guidelines]]
+    (https://beta.openai.com/docs/usage-guidelines/content-policy)
+    [[Terms of Use]](https://openai.com/api/policies/terms/).
+    The list of disallowed applications can be found in the usage guidelines
+    [[Disallowed Applications]]
+    (https://beta.openai.com/docs/usage-guidelines/disallowed-applications).
   monitoring:
-    explanation: "No monitoring is mentioned by the authors.\n"
-    value: none
+    value: unknown
+    explanation: >
+      There isn't any known monitoring in place for the model, but there
+      may be internal mechanisms.
+  feedback: >
+    Email the authors [[InstructGPT Paper]](https://arxiv.org/pdf/2203.02155.pdf).
+
+- type: model
   name: Whisper
+  # General
   organization: OpenAI
-  prohibited_uses: unknown
-  quality_control: No specific quality control methods are documented.
-  size: 1.5B parameters (dense)
+  description: >
+    GPT-3 is a speech recognition model.
+  created_date:
+    value: 2022-09-21
+    explanation: >
+      The date that Whisper was announced to the public
+      [[OpenAI Blog Post]](https://openai.com/blog/whisper/).
+  url: https://cdn.openai.com/papers/whisper.pdf
+  model_card: https://github.com/openai/whisper/blob/main/model-card.md
+  modality: Speech (multilingual, though primarily English)
+  size:
+    value: 1.5B parameters (dense model)
+    explanation: >
+      Whisper comes in several sizes (39M, 74M, 244M, 769M, 1.55B). Here we report
+      the size of the largest
+      model [[Table 1]](https://cdn.openai.com/papers/whisper.pdf).
+  analysis: The model was evaluated for zero-shot English and multingual speech
+    recognition, translation, language identification and robustness to noise.
+  # Construction
+  dependencies: [Whisper dataset]
   training_emissions:
-    explanation: "Authors do not report the training emissions.\n"
     value: unknown
-  training_hardware:
-    explanation: "The original paper doesn't specify the training hardware for Whisper,\
-      \ but we expect it is trained on Azure given other models from OpenAI are\
-      \ trained using Azure [[Microsoft Blog Post]] (https://blogs.microsoft.com/ai/openai-azure-supercomputer/).\n"
-    value: Azure
+    explanation: >
+      Authors do not report the training emissions.
   training_time:
-    explanation: "Authors do not report the training time.\n"
     value: unknown
-  type: model
-  url: https://cdn.openai.com/papers/whisper.pdf
-- access:
-    explanation: "Model checkpoints and the helper code can be accessed at the official\
-      \ CLIP repository [[CLIP Repository]](https://github.com/openai/CLIP).\n"
+    explanation: >
+      Authors do not report the training time.
+  training_hardware:
+    value: Azure
+    explanation: >
+      The original paper doesn't specify the training hardware for Whisper,
+      but we expect it is trained on Azure given other models from OpenAI are trained
+      using Azure
+      [[Microsoft Blog Post]]
+      (https://blogs.microsoft.com/ai/openai-azure-supercomputer/).
+  quality_control: No specific quality control methods are documented.
+  # Downstream
+  access:
     value: open
-  analysis: The model was evaluated on standard vision datasets (e.g. CIFAR10, ImageNet)
-    and showed robust state of the art results.
-  created_date:
-    explanation: "The date of the blog post announcing CLIP [[OpenAI Blog Post]](https://openai.com/blog/clip/).\n"
-    value: 2021-01-05
-  dependencies: [CLIP dataset]
-  description: "\"CLIP (Contrastive Language-Image Pre-Training) is a neural network\
-    \ trained on a variety of (image, text) pairs. It can be instructed in natural\
-    \ language to predict the most relevant text snippet, given an image, without\
-    \ directly optimizing for the task, similarly to the zero-shot capabilities\
-    \ of GPT-2 and 3. We found CLIP matches the performance of the original ResNet50\
-    \ on ImageNet “zero-shot” without using any of the original 1.28M labeled examples,\
-    \ overcoming several major challenges in computer vision\" [[CLIP Repository]](https://github.com/openai/CLIP).\n"
-  feedback: Questions can be shared at the feedback form linked in the CLIP model
-    card [[Model Card]](https://github.com/openai/CLIP/blob/main/model-card.mdlicen).
-  intended_uses: "The model is intended to be used by AI researchers to better understand\
-    \ \"robustness, generalization, and other capabilities, biases, and constraints\
-    \ of computer vision models\" [[CLIP Model Card]](https://github.com/openai/CLIP/blob/main/model-card.md).\n"
-  license: MIT
-  modality:
-    explanation: Text (English) and Image
-    value: image, text; image, text
-  model_card: https://github.com/openai/CLIP/blob/main/model-card.md
+    explanation: >
+      Models are available at [https://github.com/openai/whisper](https://github.com/openai/whisper).
+  license:
+    value: MIT
+    explanation: https://github.com/openai/whisper](https://github.com/openai/whisper
+  intended_uses: >
+    Whisper is a general-purpose speech recognition model; it is a multi-task
+    model that can perform multilingual speech recognition as well as speech
+    translation and language identification.
+  prohibited_uses: unknown
   monitoring:
-    explanation: There are no monitoring mechanisms in place for CLIP.
     value: none
+    explanation: >
+      No monitoring is mentioned by the authors.
+  feedback: >
+    The discussions page of the codebase is not formally cited as a place
+    for feedback, but is being used in this way [[Discussions page]](https://github.com/openai/whisper/discussions)
+
+- type: model
   name: CLIP
+  # General
   organization: OpenAI
-  prohibited_uses: "\"Any deployed use case of the model - whether commercial or\
-    \ not - is currently out of scope. Non-deployed use cases such as image search\
-    \ in a constrained environment, are also not recommended unless there is thorough\
-    \ in-domain testing of the model with a specific, fixed class taxonomy. This\
-    \ is because our safety assessment demonstrated a high need for task specific\
-    \ testing especially given the variability of CLIP’s performance with different\
-    \ class taxonomies. This makes untested and unconstrained deployment of the\
-    \ model in any use case currently potentially harmful.\nCertain use cases which\
-    \ would fall under the domain of surveillance and facial recognition are always\
-    \ out-of-scope regardless of performance of the model. This is because the use\
-    \ of artificial intelligence for tasks such as these can be premature currently\
-    \ given the lack of testing norms and checks to ensure its fair use.\nSince\
-    \ the model has not been purposefully trained in or evaluated on any languages\
-    \ other than English, its use should be limited to English language use cases\"\
-    \ [[Model Card]](https://github.com/openai/CLIP/blob/main/model-card.mdlicen).\n"
-  quality_control: "The authors found that the performance of the model depended\
-    \ heavily on which classes are included (and excluded) for a given task. They\
-    \ reported significant race and gender based disparities on the Fairface dataset,\
-    \ depending on how the classes were constructed. The authors also demonstrated\
-    \ that the model was capable of racial profiling with high accuracy [[Section\
-    \ 7]](https://arxiv.org/pdf/2103.00020.pdf#section.7).\n"
-  size: unknown
-  training_emissions: unknown
-  training_hardware: NVIDIA V100 GPUs
-  training_time:
-    explanation: "The exact training time of CLIP depends on the vision and language\
-      \ encoders used: \"The largest ResNet model, RN50x64, took 18 days to train\
-      \ on 592 V100 GPUs while the largest Vision Transformer took 12 days on 256\
-      \ V100 GPUs. For the ViT-L/14 we also pre-train at a higher 336 pixel resolution\
-      \ for one additional epoch to boost performance ... Unless otherwise specified,\
-      \ all results reported in this paper as “CLIP” use this model which we found\
-      \ to perform best\" [[CLIP paper]](https://arxiv.org/pdf/2103.00020.pdf).\n\
-      Using the GPU Time method outlined in the [[OpenAI AI and Computer Blog]](https://openai.com/blog/ai-and-compute/#addendum),\
-      \ we can compute the training time in petaflop/s-day for RN50x64 and ViT-L/14\
-      \ with the following equation: Number of GPUs * (peta-flops/GPU) * days trained\
-      \ * estimated utilization. We will assume that estimated utilization is 33%,\
-      \ following [[OpenAI AI and Computer Blog]](https://openai.com/blog/ai-and-compute/#addendum).\
-      \ The specific V100 GPU used isn't cleared from the paper, so we will assume\
-      \ that V100 PCle was used. The V100 PCle GPUs have a single precision performance\
-      \ of 15.7 teraflops [[V100 Datasheet]](https://images.nvidia.com/content/technologies/volta/pdf/volta-v100-datasheet-update-us-1165301-r5.pdf),\
-      \ which is equal to 15.7 / 1000 = 0.0157 petaflops.\nFollowing the formula\
-      \ \"Number of GPUs * (peta-flops/GPU) * days trained * estimated utilization\"\
-      , we can compute the petaflop/s-day for RN50x64 as follows: 592 * 0.0157 *\
-      \ 18 * 0.33. Similarly, for ViT-L/14, we can compute the same as: 256 * 0.0157\
-      \ * 12 * 0.33. Adding the two, we estimate the total training time as 71.12\
-      \ petaflop/s-day.\n"
-    value: 71.12 petaflop/s-day
-  type: model
-  url: https://arxiv.org/pdf/2103.00020.pdf
-- access:
-    explanation: Available via the OpenAI API as of Q1 2023.
-    value: limited
-  analysis: "The model was evaluated against three prior approaches, AttnGAN, DM-GAN,\
-    \ and DF-GAN using Inception Score and Fréchet Inception Distance on MS-COCO\
-    \ as metrics. The model was also evaluated by humans and received the majority\
-    \ of the votes in generating images that look realistic and better match the\
-    \ caption when compared to the images generated by DF-GAN [[Section]](https://arxiv.org/pdf/2102.12092.pdf#section.3).\n"
+  description: >
+    "CLIP (Contrastive Language-Image Pre-Training) is a neural network trained
+    on a variety of (image, text) pairs. It can be instructed in natural
+    language to predict the most relevant text snippet, given an image,
+    without directly optimizing for the task, similarly to the zero-shot
+    capabilities of GPT-2 and 3. We found CLIP matches the performance of the
+    original ResNet50 on ImageNet “zero-shot” without using any of the original
+    1.28M labeled examples, overcoming several major challenges in computer
+    vision"
+    [[CLIP Repository]](https://github.com/openai/CLIP).
   created_date:
-    explanation: "The date of the blog post announcing DALL·E [[OpenAI Blog Post]](https://openai.com/blog/dall-e/).\n"
     value: 2021-01-05
-  dependencies: [DALL·E dataset]
-  description: "DALL·E is a GPT-3 based model trained to generate images from text\
-    \ descriptions. The authors found that it had \"a diverse set of capabilities,\
-    \ including creating anthropomorphized versions of animals and objects, combining\
-    \ unrelated concepts in plausible ways, rendering text, and applying transformations\
-    \ to existing images\" [[OpenAI Blog Post]](https://openai.com/blog/dall-e/).\n"
-  feedback: "Contact the paper author(s) specified on the paper [[Paper]](https://arxiv.org/pdf/2102.12092.pdf).\n"
-  intended_uses: "\"The model is intended for others to use for training their own\
-    \ generative models\" [[Model Card]](https://github.com/openai/DALL-E/blob/master/model_card.md).\n"
-  license:
-    explanation: "The asset doesn't have a known license as it hasn't been publicly\
-      \ released.\n"
+    explanation: >
+      The date of the blog post announcing CLIP
+      [[OpenAI Blog Post]](https://openai.com/blog/clip/).
+  url: https://arxiv.org/pdf/2103.00020.pdf
+  model_card: https://github.com/openai/CLIP/blob/main/model-card.md
+  modality: Text (English) and Image
+  size:
     value: unknown
-  modality:
-    explanation: Text (English) and Image
-    value: image, text; image, text
-  model_card: https://github.com/openai/DALL-E/blob/master/model_card.md
+    explanation: >
+      The total size is unknown, but the largest CLIP model is a
+      a combination of 63M-parameter (dense) text encoder and a 307M-parameter
+      vision encoder.
+  analysis: The model was evaluated on standard vision datasets (e.g. CIFAR10, ImageNet)
+    and showed robust state of the art results.
+  # Construction
+  dependencies: [CLIP dataset]
+  training_emissions: unknown
+  training_time:
+    value: 71.12 petaflop/s-day
+    explanation: >
+      The exact training time of CLIP depends on the vision and language
+      encoders used: "The largest ResNet model, RN50x64, took 18 days to train
+      on 592 V100 GPUs while the largest Vision Transformer took 12 days on 256
+      V100 GPUs. For the ViT-L/14 we also pre-train at a higher 336 pixel
+      resolution for one additional epoch to boost performance ... Unless
+      otherwise specified, all results reported in this paper as “CLIP” use this
+      model which we found to perform best"
+      [[CLIP paper]](https://arxiv.org/pdf/2103.00020.pdf).
+
+      Using the GPU Time method outlined in the
+      [[OpenAI AI and Computer Blog]](https://openai.com/blog/ai-and-compute/#addendum),
+      we can compute the training time in petaflop/s-day for RN50x64 and
+      ViT-L/14 with the following equation: Number of GPUs * (peta-flops/GPU) *
+      days trained * estimated utilization.
+      We will assume that estimated utilization is 33%, following
+      [[OpenAI AI and Computer Blog]](https://openai.com/blog/ai-and-compute/#addendum).
+      The specific V100 GPU used isn't cleared from the paper, so we will assume
+      that V100 PCle was used. The V100 PCle GPUs have a single precision
+      performance of 15.7 teraflops
+      [[V100 Datasheet]](https://images.nvidia.com/content/technologies/volta/pdf/volta-v100-datasheet-update-us-1165301-r5.pdf),
+      which is equal to 15.7 / 1000 = 0.0157 petaflops.
+
+      Following the formula "Number of GPUs * (peta-flops/GPU) * days trained *
+      estimated utilization", we can compute the petaflop/s-day for RN50x64 as
+      follows:
+      592 * 0.0157 * 18 * 0.33.
+      Similarly, for ViT-L/14, we can compute the same as:
+      256 * 0.0157 * 12 * 0.33.
+      Adding the two, we estimate the total training time as 71.12
+      petaflop/s-day.
+  training_hardware: NVIDIA V100 GPUs
+  quality_control: >
+    The authors found that the performance of the model depended heavily on
+    which classes are included (and excluded) for a given task. They reported
+    significant race and gender based disparities on the Fairface dataset,
+    depending on how the classes were constructed. The authors also demonstrated
+    that the model was capable of racial profiling with high accuracy
+    [[Section 7]](https://arxiv.org/pdf/2103.00020.pdf#section.7).
+  # Downstream
+  access:
+    value: open
+    explanation: >
+      Model checkpoints and the helper code can be accessed
+      at the official CLIP repository
+      [[CLIP Repository]](https://github.com/openai/CLIP).
+  license: MIT
+  intended_uses: >
+    The model is intended to be used by AI researchers to better understand
+    "robustness, generalization, and other capabilities, biases, and constraints
+    of computer vision models"
+    [[CLIP Model Card]](https://github.com/openai/CLIP/blob/main/model-card.md).
+  prohibited_uses: >
+    "Any deployed use case of the model - whether commercial or not - is
+    currently out of scope. Non-deployed use cases such as image search in a
+    constrained environment, are also not recommended unless there is thorough
+    in-domain testing of the model with a specific, fixed class taxonomy.
+    This is because our safety assessment demonstrated a high need for task
+    specific testing especially given the variability of CLIP’s performance
+    with different class taxonomies. This makes untested and unconstrained
+    deployment of the model in any use case currently potentially harmful.
+
+    Certain use cases which would fall under the domain of surveillance and
+    facial recognition are always out-of-scope regardless of performance of the
+    model. This is because the use of artificial intelligence for tasks such as
+    these can be premature currently given the lack of testing norms and checks
+    to ensure its fair use.
+
+    Since the model has not been purposefully trained in or evaluated on any
+    languages other than English, its use should be limited to English language
+    use cases"
+    [[Model Card]](https://github.com/openai/CLIP/blob/main/model-card.mdlicen).
   monitoring:
-    explanation: There are no monitoring mechanisms in place for DALL·E.
     value: none
+    explanation: There are no monitoring mechanisms in place for CLIP.
+  feedback: Questions can be shared at the feedback form linked in the CLIP model
+    card [[Model Card]](https://github.com/openai/CLIP/blob/main/model-card.mdlicen).
+
+- type: model
   name: DALL·E
+  # General
   organization: OpenAI
-  prohibited_uses:
-    explanation: The prohibited uses of the model are unknown.
-    value: unknown
-  quality_control: unknown
-  size: 12B parameters (dense)
+  description: >
+    DALL·E is a GPT-3 based model trained to generate images from text
+    descriptions. The authors found that it had "a diverse set of capabilities,
+    including creating anthropomorphized versions of animals and objects,
+    combining unrelated concepts in plausible ways, rendering text, and
+    applying transformations to existing images"
+    [[OpenAI Blog Post]](https://openai.com/blog/dall-e/).
+  created_date:
+    value: 2021-01-05
+    explanation: >
+      The date of the blog post announcing DALL·E
+      [[OpenAI Blog Post]](https://openai.com/blog/dall-e/).
+  url: https://arxiv.org/pdf/2102.12092.pdf
+  model_card: https://github.com/openai/DALL-E/blob/master/model_card.md
+  modality: Text (English) and Image
+  size: 12B parameters (dense model)
+  analysis: >
+    The model was evaluated against three prior approaches, AttnGAN, DM-GAN, and
+    DF-GAN using Inception Score and Fréchet Inception Distance on MS-COCO as
+    metrics. The model was also evaluated by humans and received the majority
+    of the votes in generating images that look realistic and better match the
+    caption when compared to the images generated by DF-GAN
+    [[Section]](https://arxiv.org/pdf/2102.12092.pdf#section.3).
+  # Construction
+  dependencies: [DALL·E dataset]
   training_emissions:
-    explanation: The training emissions were not reported.
     value: unknown
-  training_hardware: NVIDIA V100 GPUs
+    explanation: The training emissions were not reported.
   training_time:
+    value: unknown
     explanation: The training emissions were not reported.
+  training_hardware: NVIDIA V100 GPUs
+  quality_control: unknown
+  # Downstream
+  access:
+    value: limited
+    explanation: Available via the OpenAI API as of Q1 2023.
+  license:
     value: unknown
-  type: model
-  url: https://arxiv.org/pdf/2102.12092.pdf
-- access:
-    explanation: The model can be downloaded from the [Github repository](https://github.com/openai/jukebox)
-    value: open
-  analysis: Evaluations in paper are primarily considering the fidelity and novelty
-    of samples from Jukebox.
+    explanation: >
+      The asset doesn't have a known license as it hasn't been publicly
+      released.
+  intended_uses: >
+    "The model is intended for others to use for training their own generative
+    models"
+    [[Model Card]](https://github.com/openai/DALL-E/blob/master/model_card.md).
+  prohibited_uses:
+    value: unknown
+    explanation: The prohibited uses of the model are unknown.
+  monitoring:
+    value: none
+    explanation: There are no monitoring mechanisms in place for DALL·E.
+  feedback: >
+    Contact the paper author(s) specified on the paper
+    [[Paper]](https://arxiv.org/pdf/2102.12092.pdf).
+
+
+- type: model
+  name: Jukebox
+  organization: OpenAI
+  description: Jukebox is a generate model that produces music
   created_date:
-    explanation: The date the model paper was released
     value: 2020-04-30
+    explanation: The date the model paper was released
+  url: https://arxiv.org/abs/2005.00341
+  model_card:
+    value: none
+    explanation: No model card found as of 2023-01-09
+  modality: Audio (music)
+  size: 5B parameters
+  analysis: Evaluations in paper are primarily considering the fidelity and novelty
+    of samples from Jukebox.
   dependencies: [Jukebox Dataset]
-  description: Jukebox is a generate model that produces music
-  feedback: none
-  intended_uses: ''
+  training_emissions: unknown
+  training_time: 4 weeks
+  training_hardware: 510 V100s
+  quality_control: ''
+  access:
+    value: open
+    explanation: The model can be downloaded from the [Github repository](https://github.com/openai/jukebox)
   license:
-    explanation: "The license is provided in the [Github repository](https://github.com/openai/jukebox).\n"
     value: Noncommercial Use License
-  modality:
-    explanation: Audio (music)
-    value: audio; audio
-  model_card:
-    explanation: No model card found as of 2023-01-09
-    value: none
+    explanation: >
+      The license is provided in the [Github repository](https://github.com/openai/jukebox).
+  intended_uses: ''
+  prohibited_uses: none
   monitoring: none
-  name: Jukebox
+  feedback: none
+
+- type: model
+  name: DALL·E 2
+  # General
   organization: OpenAI
-  prohibited_uses: none
-  quality_control: ''
-  size: 5B parameters (dense)
-  training_emissions: unknown
-  training_hardware: 510 V100s
-  training_time: 4 weeks
-  type: model
-  url: https://arxiv.org/abs/2005.00341
-- access:
-    explanation: "The model is available to OpenAI employees, researchers, creatives\
-      \ and company friends. OpenAI opened a waitlist for DALL·E 2 access. [[System\
-      \ Card]] (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#access).\n"
-    value: limited
-  analysis: "The model is capable of generating explicit content and the researchers\
-    \ found limited amount of spurious content generated. The researchers also found\
-    \ that visual synonyms can be used to prompt the model to surface unwanted generations\
-    \ [[Probes and Evaluations]] (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#probes-and-evaluations).\n"
+  description: >
+    "DALL·E 2 is an artificial intelligence model that takes a text prompt
+    and/or existing image as an input and generates a new image as an output"
+    [[System Card]]
+    (https://github.com/openai/dalle-2-preview/blob/main/system-card.md).
+    The model wasn't fully released, but OpenAI released a version of the model
+    (DALL·E 2 Preview) to a select group of testers.
   created_date:
-    explanation: "OpenAI released in a blog post in April 2020 [[OpenAI Blog Post]](https://openai.com/dall-e-2/).\n"
     value: 2022-04-13
-  dependencies: [DALL·E dataset, CLIP dataset]
-  description: "\"DALL·E 2 is an artificial intelligence model that takes a text\
-    \ prompt and/or existing image as an input and generates a new image as an output\"\
-    \ [[System Card]] (https://github.com/openai/dalle-2-preview/blob/main/system-card.md).\
-    \ The model wasn't fully released, but OpenAI released a version of the model\
-    \ (DALL·E 2 Preview) to a select group of testers.\n"
-  feedback: Feedback can be provided at support at openai.com.
-  intended_uses: "\"The intended use of the DALL·E 2 Preview at this time is for\
-    \ personal, non-commercial exploration and research purposes by people who are\
-    \ interested in understanding the potential uses of these capabilities\" [[Use]]\
-    \ (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#use).\n"
-  license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
-    value: unknown
-  modality:
-    explanation: Text (English) and Image
-    value: image, text; image, text
+    explanation: >
+      OpenAI released in a blog post in April 2020
+      [[OpenAI Blog Post]](https://openai.com/dall-e-2/).
+  url: https://arxiv.org/abs/2204.06125
   model_card: https://github.com/openai/dalle-2-preview/blob/main/system-card.md
-  monitoring: "Uses of the model are monitored. In the preview version, any user\
-    \ can flag content. The specific policies for monitoring are not disclosed,\
-    \ but possible measures include disabling of accounts violating the content\
-    \ policies [[Monitoring and Reporting]] (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#monitoring-and-reporting).\n"
-  name: DALL·E 2
-  organization: OpenAI
-  prohibited_uses: "Use of the model is governed by the OpenAI Content Policy, which\
-    \ prohibits posting of G rated content. Users are not allowed to utilize the\
-    \ model in commercial products in the preview version [[Content Policy]] (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#policies-and-enforcement).\n"
-  quality_control: "The model isn't fully released to the public as part of a quality\
-    \ control measure. The usage of the model by testers is monitored and user provided\
-    \ prompts are filtered [[Input filters]] (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#input-filters).\n"
+  modality: Text (English) and Image
   size: unknown
+  analysis: >
+    The model is capable of generating explicit content and the researchers
+    found limited amount of spurious content generated. The researchers also
+    found that visual synonyms can be used to prompt the model to surface
+    unwanted generations
+    [[Probes and Evaluations]]
+    (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#probes-and-evaluations).
+  # Construction
+  dependencies: [DALL·E dataset, CLIP dataset]
   training_emissions: unknown
-  training_hardware: unknown
   training_time: unknown
-  type: model
-  url: https://arxiv.org/abs/2204.06125
-- access:
-    explanation: "The OpenAI API is available to the public in supported countries\
-      \ [[Supported Countries]](https://beta.openai.com/docs/supported-countries)\
-      \ [[OpenAI API]](https://openai.com/api/).\n"
+  training_hardware: unknown
+  quality_control: >
+    The model isn't fully released to the public as part of a quality control
+    measure. The usage of the model by testers is monitored and user provided
+    prompts are filtered
+    [[Input filters]]
+    (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#input-filters).
+  # Downstream
+  access:
     value: limited
-  adaptation: The API exposes the models fairly direclty with a range of hyperparameters
-    (e.g. temperature scaling).
+    explanation: >
+      The model is available to OpenAI employees, researchers, creatives
+      and company friends. OpenAI opened a waitlist for DALL·E 2 access.
+      [[System Card]]
+      (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#access).
+  license:
+    value: unknown
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: >
+    "The intended use of the DALL·E 2 Preview at this time is for personal,
+    non-commercial exploration and research purposes by people who are
+    interested in understanding the potential uses of these capabilities"
+    [[Use]]
+    (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#use).
+  prohibited_uses: >
+    Use of the model is governed by the OpenAI Content Policy, which prohibits
+    posting of G rated content.
+    Users are not allowed to utilize the model in commercial products in the
+    preview version
+    [[Content Policy]]
+    (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#policies-and-enforcement).
+  monitoring: >
+    Uses of the model are monitored. In the preview version, any user can flag
+    content. The specific policies for monitoring are not disclosed, but
+    possible measures include disabling of accounts violating the content
+    policies
+    [[Monitoring and Reporting]]
+    (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#monitoring-and-reporting).
+  feedback: Feedback can be provided at support at openai.com.
+
+- type: application
+  name: OpenAI API
+  # General
+  organization: OpenAI
+  description: >
+    OpenAI API is a general purpose "text in, text out" interface connecting
+    users with a suite of language models. The API was initially released
+    as a gateway to GPT-3, but it now supports access to other, more
+    specialized OpenAI models.
+    [[Open AI Blog Post]](https://openai.com/blog/openai-api/)
   created_date:
-    explanation: "The date that OpenAI API was announced to the public [[Open AI\
-      \ Blog Post]](https://openai.com/blog/openai-api/).\n"
     value: 2020-06-11
+    explanation: >
+      The date that OpenAI API was announced to the public
+      [[Open AI Blog Post]](https://openai.com/blog/openai-api/).
+  url: https://openai.com/api/
+  # Construction
   dependencies:
     - GPT-3
     - Codex
@@ -809,559 +1079,588 @@
     - Whisper
     - DALL·E
     - GPT-4
-  description: "OpenAI API is a general purpose \"text in, text out\" interface\
-    \ connecting users with a suite of language models. The API was initially released\
-    \ as a gateway to GPT-3, but it now supports access to other, more specialized\
-    \ OpenAI models. [[Open AI Blog Post]](https://openai.com/blog/openai-api/)\n"
-  failures:
-    explanation: "There are no known documented failures of the OpenAI API at the\
-      \ time of writing.\n"
-    value: unknown
-  feedback:
-    explanation: "There is no known specific feedback channel for the OpenAI API,\
-      \ but OpenAI support theme can be reached via email at support at openai.com.\n"
-    value: unknown
-  intended_uses: "OpenAI API was designed to be used by developers to empower applications,\
-    \ and researchers to study language models [[Section 3]](https://openai.com/api/policies/terms/).\n"
+  adaptation: The API exposes the models fairly direclty with a range of hyperparameters
+    (e.g. temperature scaling).
+  output_space: >
+    Given a prompting text, the OpenAI API provides access to text completions,
+    and log probabilities. The support for text and code embeddings were added
+    on 2022-01-25
+    [[OpenAI Blog Post]]
+    (https://openai.com/blog/introducing-text-and-code-embeddings/).
+  quality_control: >
+    Given a prompt, OpenAI API checks whether a completion contains unsafe
+    language using its filters and marks the completion accordingly if so.
+    The API also provides developers with special endpoints that scope the
+    API usage. OpenAI also developed user guidelines to help developers
+    understand safety issues
+    [[OpenAI API]](https://openai.com/api/).
+  # Downstream
+  access:
+    value: limited
+    explanation: >
+      The OpenAI API is available to the public in supported countries
+      [[Supported Countries]](https://beta.openai.com/docs/supported-countries)
+      [[OpenAI API]](https://openai.com/api/).
+  terms_of_service: https://openai.com/api/policies/terms/
   license:
+    value: custom
     explanation: Per the Terms of Use, a limited license is provided to the users
       during their use of the API [[Section 2]](https://openai.com/api/policies/terms/).
-    value: custom
-  monitoring: "OpenAI may monitor the API use to ensure \"quality and improve OpenAI\
-    \ systems, products and services; perform research; and ensure compliance\"\
-    \ with the Terms of Service and all applicable laws. Users of the API will give\
-    \ OpenAI reasonable access to their application to monitor compliance with the\
-    \ terms listed in the Terms of Service [[Section 5(b)]](https://openai.com/api/policies/terms/).\
-    \ Apps using the OpenAI API should submit an application once they are deployed\
-    \ to real users. The review form takes 10 minutes to complete and over 97% of\
-    \ the applications are directly accepted or conditionally accepted. The applicants\
-    \ are notified of the decision within 2 business days [[App Review Guidelines]]\
-    \ (https://beta.openai.com/docs/usage-guidelines/app-review).\n"
+  intended_uses: >
+    OpenAI API was designed to be used by developers to empower applications,
+    and researchers to study language models
+    [[Section 3]](https://openai.com/api/policies/terms/).
+  prohibited_uses: >
+    OpenAI API Terms of Use prohibits the use of the API in a way violating
+    the applicable law, including: (i) "Illegal activities, such as child
+    pornography, gambling, cybercrime, piracy, violating copyright,
+    trademark or other intellectual property laws"; (ii) "Accessing or
+    authorizing anyone to access the APIs from an embargoed country, region, or
+    territory as prohibited by the U.S. government"; (iii) "Threatening,
+    stalking, defaming, defrauding, degrading, victimizing or intimidating
+    anyone for any reason".
+    The usage requirements are detailed in the Terms of Use
+    [[Section 3]](https://openai.com/api/policies/terms/).
+  monitoring: >
+    OpenAI may monitor the API use to ensure "quality and improve OpenAI
+    systems, products and services; perform research; and ensure compliance"
+    with the Terms of Service and all applicable laws. Users of the API will
+    give OpenAI reasonable access to their application to monitor compliance
+    with the terms listed in the Terms of Service
+    [[Section 5(b)]](https://openai.com/api/policies/terms/).
+    Apps using the OpenAI API should submit an application once they are
+    deployed to real users. The review form takes 10 minutes to complete and
+    over 97% of the applications are directly accepted or conditionally
+    accepted. The applicants are notified of the decision within 2 business
+    days
+    [[App Review Guidelines]]
+    (https://beta.openai.com/docs/usage-guidelines/app-review).
+  feedback:
+    value: unknown
+    explanation: >
+      There is no known specific feedback channel for the OpenAI API,
+      but OpenAI support theme can be reached via email at support at openai.com.
+  # Deployment
   monthly_active_users:
-    explanation: "The number of monthly active users is not known publicly, but\
-      \ OpenAI mentioned that the API was being used by tens of thousands of developers\
-      \ in a blog post from 2021-11-18 [[OpenAI Blog Post]](https://openai.com/blog/api-no-waitlist/).\n"
     value: unknown
-  name: OpenAI API
-  organization: OpenAI
-  output_space: "Given a prompting text, the OpenAI API provides access to text\
-    \ completions, and log probabilities. The support for text and code embeddings\
-    \ were added on 2022-01-25 [[OpenAI Blog Post]] (https://openai.com/blog/introducing-text-and-code-embeddings/).\n"
-  prohibited_uses: "OpenAI API Terms of Use prohibits the use of the API in a way\
-    \ violating the applicable law, including: (i) \"Illegal activities, such as\
-    \ child pornography, gambling, cybercrime, piracy, violating copyright, trademark\
-    \ or other intellectual property laws\"; (ii) \"Accessing or authorizing anyone\
-    \ to access the APIs from an embargoed country, region, or territory as prohibited\
-    \ by the U.S. government\"; (iii) \"Threatening, stalking, defaming, defrauding,\
-    \ degrading, victimizing or intimidating anyone for any reason\". The usage\
-    \ requirements are detailed in the Terms of Use [[Section 3]](https://openai.com/api/policies/terms/).\n"
-  quality_control: "Given a prompt, OpenAI API checks whether a completion contains\
-    \ unsafe language using its filters and marks the completion accordingly if\
-    \ so. The API also provides developers with special endpoints that scope the\
-    \ API usage. OpenAI also developed user guidelines to help developers understand\
-    \ safety issues [[OpenAI API]](https://openai.com/api/).\n"
-  terms_of_service: https://openai.com/api/policies/terms/
-  type: application
-  url: https://openai.com/api/
+    explanation: >
+      The number of monthly active users is not known publicly, but
+      OpenAI mentioned that the API was being used by tens of thousands of
+      developers in a blog post from 2021-11-18
+      [[OpenAI Blog Post]](https://openai.com/blog/api-no-waitlist/).
   user_distribution:
-    explanation: "The distribution of the users is not known, but we estimate majority\
-      \ of the users to be developers based in the United States.\n"
     value: unknown
-- access: open
-  analysis: ''
+    explanation: >
+      The distribution of the users is not known, but we estimate
+      majority of the users to be developers based in the United States.
+  failures:
+    value: unknown
+    explanation: >
+      There are no known documented failures of the OpenAI API at the
+      time of writing.
+
+- type: model
+  name: VPT
+  organization: OpenAI
+  description: ''
   created_date: 2022-06-23
+  url: https://arxiv.org/abs/2206.11795
+  model_card: ''
+  modality: video
+  analysis: ''
+  size: 500M parameters (dense model)
   dependencies: [web_clean]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: open
   license: MIT
-  modality:
-    explanation: video
-    value: video; video
-  model_card: ''
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: VPT
+  feedback: ''
+
+- type: dataset
+  name: web_clean
   organization: OpenAI
-  prohibited_uses: ''
-  quality_control: ''
-  size: 500M parameters (dense)
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
-  url: https://arxiv.org/abs/2206.11795
-- access: closed
-  analysis: ''
+  description: ''
   created_date: 2022-06-23
+  url: https://arxiv.org/abs/2206.11795
   datasheet: ''
+  modality: Video
+  size: 70k hours
+  sample: []
+  analysis: ''
   dependencies: []
-  description: ''
-  excluded: ''
-  feedback: ''
   included: ''
-  intended_uses: ''
+  excluded: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: Video
-    value: video
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: web_clean
+  feedback: ''
+
+- type: application
+  name: ChatGPT
   organization: OpenAI
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: 70k hours
-  type: dataset
-  url: https://arxiv.org/abs/2206.11795
-- access: open
-  adaptation: ''
+  description: ChatGPT is an artificial intelligence chatbot developed by OpenAI
   created_date: 2022-11-30
+  url: https://openai.com/blog/chatgpt
   dependencies: [gpt-3.5-turbo, OpenAI toxicity classifier]
-  description: ChatGPT is an artificial intelligence chatbot developed by OpenAI
-  failures: ''
-  feedback: ''
-  intended_uses: ''
+  adaptation: ''
+  output_space: ''
+  quality_control: ''
+  access: open
   license:
+    value: custom
     explanation: Per the Terms of Use, a limited license is provided to the users
       during their use of the API [[Section 2]](https://openai.com/api/policies/terms/).
-    value: custom
+  terms_of_service: ''
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
   monthly_active_users:
-    explanation: https://www.reuters.com/technology/chatgpt-sets-record-fastest-growing-user-base-analyst-note-2023-02-01/
     value: 100000000
-  name: ChatGPT
-  organization: OpenAI
-  output_space: ''
-  prohibited_uses: ''
-  quality_control: ''
-  terms_of_service: ''
-  type: application
-  url: https://openai.com/blog/chatgpt
+    explanation: https://www.reuters.com/technology/chatgpt-sets-record-fastest-growing-user-base-analyst-note-2023-02-01/
   user_distribution: ''
-- access: limited
-  analysis: ''
+  failures: ''
+
+- type: model
+  name: gpt-3.5-turbo
+  organization: OpenAI
+  description: ''
   created_date: 2023-03-01
+  url: https://platform.openai.com/docs/models/gpt-3-5
+  model_card: ''
+  modality: text
+  analysis: ''
+  size: unknown
   dependencies: [gpt-3.5-turbo dataset]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: limited
   license:
+    value: custom
     explanation: Per the Terms of Use, a limited license is provided to the users
       during their use of the API [[Section 2]](https://openai.com/api/policies/terms/).
-    value: custom
-  modality:
-    explanation: text
-    value: text; text
-  model_card: ''
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: gpt-3.5-turbo
+  feedback: ''
+
+- type: dataset
+  name: gpt-3.5-turbo dataset
   organization: OpenAI
-  prohibited_uses: ''
-  quality_control: ''
-  size: unknown
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
-  url: https://platform.openai.com/docs/models/gpt-3-5
-- access: limited
-  analysis: ''
+  description: ''
   created_date: 2023-03-01
+  url: https://platform.openai.com/docs/models/gpt-3-5
   datasheet: ''
-  dependencies: []
-  description: ''
-  excluded: ''
-  feedback: ''
-  included: ''
-  intended_uses: ''
-  license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
-    value: unknown
   modality: text
-  monitoring: ''
-  name: gpt-3.5-turbo dataset
-  organization: OpenAI
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
   size: unknown
-  type: dataset
-  url: https://platform.openai.com/docs/models/gpt-3-5
-- access: limited
+  sample: []
   analysis: ''
-  created_date: 2022-05-01
-  datasheet: ''
   dependencies: []
-  description: ''
-  excluded: ''
-  feedback: ''
   included: ''
-  intended_uses: ''
+  excluded: ''
+  quality_control: ''
+  access: limited
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: code and text
-    value: code, text
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: dataset
   name: code-davinci-002 dataset
   organization: OpenAI
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: unknown
-  type: dataset
+  description: ''
+  created_date: 2022-05-01
   url: https://platform.openai.com/docs/model-index-for-researchers
-- access: limited
+  datasheet: ''
+  modality: code and text
+  size: unknown
+  sample: []
   analysis: ''
-  created_date: 2022-05-01
-  dependencies: [code-davinci-002 dataset]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  dependencies: []
+  included: ''
+  excluded: ''
+  quality_control: ''
+  access: limited
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: code and text
-    value: code, text; code, text
-  model_card: ''
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: code-davinci-002
   organization: OpenAI
-  prohibited_uses: ''
-  quality_control: ''
+  description: ''
+  created_date: 2022-05-01
+  url: https://platform.openai.com/docs/model-index-for-researchers
+  model_card: ''
+  modality: code and text
+  analysis: ''
   size: unknown
+  dependencies: [code-davinci-002 dataset]
   training_emissions: ''
-  training_hardware: ''
   training_time: ''
-  type: model
-  url: https://platform.openai.com/docs/model-index-for-researchers
-- access: limited
-  analysis: ''
-  created_date: 2022-05-01
-  dependencies: [code-davinci-002]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  training_hardware: ''
+  quality_control: ''
+  access: limited
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: text
-    value: text; text
-  model_card: ''
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: text-davinci-002
   organization: OpenAI
-  prohibited_uses: ''
-  quality_control: ''
+  description: ''
+  created_date: 2022-05-01
+  url: https://platform.openai.com/docs/model-index-for-researchers
+  model_card: ''
+  modality: text
+  analysis: ''
   size: unknown
+  dependencies: [code-davinci-002]
   training_emissions: ''
-  training_hardware: ''
   training_time: ''
-  type: model
-  url: https://platform.openai.com/docs/model-index-for-researchers
-- access: limited
-  analysis: ''
-  created_date: 2022-11-30
-  dependencies: [text-davinci-002]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  training_hardware: ''
+  quality_control: ''
+  access: limited
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality:
-    explanation: text
-    value: text; text
-  model_card: ''
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: text-davinci-003
   organization: OpenAI
-  prohibited_uses: ''
-  quality_control: ''
+  description: ''
+  created_date: 2022-11-30
+  url: https://platform.openai.com/docs/model-index-for-researchers
+  model_card: ''
+  modality: text
+  analysis: ''
   size: unknown
+  dependencies: [text-davinci-002]
   training_emissions: ''
-  training_hardware: ''
   training_time: ''
-  type: model
-  url: https://platform.openai.com/docs/model-index-for-researchers
-- access: open
-  adaptation: ''
-  created_date: 2023-03-01
-  dependencies: [Whisper]
-  description: API to query OpenAI's Whisper model.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
+  training_hardware: ''
+  quality_control: ''
+  access: limited
   license:
-    explanation: Per the Terms of Use, a limited license is provided to the users
-      during their use of the API [[Section 2]](https://openai.com/api/policies/terms/).
-    value: custom
+    value: unknown
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  monthly_active_users: ''
+  feedback: ''
+
+- type: application
   name: Whisper API
   organization: OpenAI
-  output_space: ''
-  prohibited_uses: ''
-  quality_control: ''
-  terms_of_service: ''
-  type: application
+  description: API to query OpenAI's Whisper model.
+  created_date: 2023-03-01
   url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis
-  user_distribution: ''
-- access: open
+  dependencies: [Whisper]
   adaptation: ''
-  created_date: 2023-03-01
-  dependencies: [ChatGPT]
-  description: API to query OpenAI's ChatGPT model.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
+  output_space: ''
+  quality_control: ''
+  access: open
   license:
+    value: custom
     explanation: Per the Terms of Use, a limited license is provided to the users
       during their use of the API [[Section 2]](https://openai.com/api/policies/terms/).
-    value: custom
+  terms_of_service: ''
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
   monthly_active_users: ''
+  user_distribution: ''
+  failures: ''
+
+- type: application
   name: ChatGPT API
   organization: OpenAI
-  output_space: ''
-  prohibited_uses: ''
-  quality_control: ''
-  terms_of_service: ''
-  type: application
+  description: API to query OpenAI's ChatGPT model.
+  created_date: 2023-03-01
   url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis
-  user_distribution: ''
-- access: open
+  dependencies: [ChatGPT]
   adaptation: ''
-  created_date: 2022-08-10
-  dependencies: [OpenAI toxicity classifier]
-  description: This endpoint provides OpenAI API developers with free access to
-    GPT-based classifiers that detect undesired content—an instance of using AI
-    systems to assist with human supervision of these systems.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
+  output_space: ''
+  quality_control: ''
+  access: open
   license:
+    value: custom
     explanation: Per the Terms of Use, a limited license is provided to the users
       during their use of the API [[Section 2]](https://openai.com/api/policies/terms/).
-    value: custom
+  terms_of_service: ''
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
   monthly_active_users: ''
+  user_distribution: ''
+  failures: ''
+
+- type: application
   name: OpenAI Moderation API
   organization: OpenAI
+  description: This endpoint provides OpenAI API developers with free access to
+    GPT-based classifiers that detect undesired content—an instance of using AI
+    systems to assist with human supervision of these systems.
+  created_date: 2022-08-10
+  url: https://openai.com/blog/new-and-improved-content-moderation-tooling
+  dependencies: [OpenAI toxicity classifier]
+  adaptation: ''
   output_space: ''
-  prohibited_uses: ''
   quality_control: ''
+  access: open
+  license:
+    value: custom
+    explanation: Per the Terms of Use, a limited license is provided to the users
+      during their use of the API [[Section 2]](https://openai.com/api/policies/terms/).
   terms_of_service: ''
-  type: application
-  url: https://openai.com/blog/new-and-improved-content-moderation-tooling
-  user_distribution: ''
-- access: closed
-  analysis: ''
-  created_date: 2023-01-18
-  dependencies: [OpenAI toxicity dataset]
-  description: ''
-  feedback: ''
   intended_uses: ''
-  license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
-    value: unknown
-  modality:
-    explanation: Text
-    value: text; text
-  model_card: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
+  user_distribution: ''
+  failures: ''
+
+- type: model
   name: OpenAI toxicity classifier
   organization: OpenAI
-  prohibited_uses: ''
-  quality_control: ''
+  description: ''
+  created_date: 2023-01-18
+  url: https://time.com/6247678/openai-chatgpt-kenya-workers/#:~:text=In%20a%20statement%2C%20an%20OpenAI,datasets%20of%20tools%20like%20ChatGPT.
+  model_card: ''
+  modality: Text
+  analysis: ''
   size: unknown
+  dependencies: [OpenAI toxicity dataset]
   training_emissions: ''
-  training_hardware: ''
   training_time: ''
-  type: model
-  url: https://time.com/6247678/openai-chatgpt-kenya-workers/#:~:text=In%20a%20statement%2C%20an%20OpenAI,datasets%20of%20tools%20like%20ChatGPT.
-- access: closed
-  analysis: ''
-  created_date: 2023-01-18
-  datasheet: ''
-  dependencies: []
-  description: ''
-  excluded: ''
-  feedback: ''
-  included: ''
-  intended_uses: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality: text
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: dataset
   name: OpenAI toxicity dataset
   organization: OpenAI
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: unknown
-  type: dataset
+  description: ''
+  created_date: 2023-01-18
   url: https://time.com/6247678/openai-chatgpt-kenya-workers/#:~:text=In%20a%20statement%2C%20an%20OpenAI,datasets%20of%20tools%20like%20ChatGPT.
-- access: limited
-  adaptation: ''
-  created_date: 2023-02-03
-  dependencies: [Sage]
-  description: A chatbot language model available via Quora's Poe
-  failures: ''
-  feedback: ''
+  datasheet: ''
+  modality: text
+  size: unknown
+  sample: []
+  analysis: ''
+  dependencies: []
+  included: ''
+  excluded: ''
+  quality_control: ''
+  access: closed
+  license:
+    value: unknown
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
   intended_uses: ''
-  license: unknown
+  prohibited_uses: ''
   monitoring: ''
-  monthly_active_users: ''
+  feedback: ''
+
+- type: application
   name: Sage API
   organization: OpenAI
+  description: A chatbot language model available via Quora's Poe
+  created_date: 2023-02-03
+  url: https://quorablog.quora.com/Poe-1
+  dependencies: [Sage]
+  adaptation: ''
   output_space: ''
-  prohibited_uses: ''
   quality_control: ''
+  access: limited
+  license: unknown
   terms_of_service: ''
-  type: application
-  url: https://quorablog.quora.com/Poe-1
-  user_distribution: ''
-- access: limited
-  adaptation: ''
-  created_date: 2023-02-03
-  dependencies: [Dragonfly]
-  description: A chatbot language model available via Quora's Poe
-  failures: ''
-  feedback: ''
   intended_uses: ''
-  license: unknown
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
   monthly_active_users: ''
+  user_distribution: ''
+  failures: ''
+
+- type: application
   name: Dragonfly API
   organization: OpenAI
+  description: A chatbot language model available via Quora's Poe
+  created_date: 2023-02-03
+  url: https://quorablog.quora.com/Poe-1
+  dependencies: [Dragonfly]
+  adaptation: ''
   output_space: ''
-  prohibited_uses: ''
   quality_control: ''
+  access: limited
+  license: unknown
   terms_of_service: ''
-  type: application
-  url: https://quorablog.quora.com/Poe-1
-  user_distribution: ''
-- access: limited
-  analysis: ''
-  created_date: 2023-02-03
-  dependencies: []
-  description: A chatbot language model available via Quora's Poe
-  feedback: ''
   intended_uses: ''
-  license: unknown
-  modality:
-    explanation: text
-    value: text; text
-  model_card: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
+  user_distribution: ''
+  failures: ''
+
+- type: model
   name: Sage
   organization: OpenAI
-  prohibited_uses: ''
-  quality_control: ''
-  size: unknown
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
+  description: A chatbot language model available via Quora's Poe
+  created_date: 2023-02-03
   url: https://quorablog.quora.com/Poe-1
-- access: limited
+  model_card: ''
+  modality: text
   analysis: ''
-  created_date: 2023-02-03
+  size: unknown
   dependencies: []
-  description: A chatbot language model available via Quora's Poe
-  feedback: ''
-  intended_uses: ''
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: limited
   license: unknown
-  modality:
-    explanation: text
-    value: text; text
-  model_card: ''
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: Dragonfly
   organization: OpenAI
-  prohibited_uses: ''
-  quality_control: ''
+  description: A chatbot language model available via Quora's Poe
+  created_date: 2023-02-03
+  url: https://quorablog.quora.com/Poe-1
+  model_card: ''
+  modality: text
+  analysis: ''
   size: unknown
+  dependencies: []
   training_emissions: ''
-  training_hardware: ''
   training_time: ''
-  type: model
-  url: https://quorablog.quora.com/Poe-1
-- access: limited
-  adaptation: ''
-  created_date: 2023-03-07
-  dependencies: [ChatGPT API]
-  description: The app integrates ChatGPT’s powerful AI technology to deliver instant
-    conversation summaries, research tools, and writing assistance directly in Slack
-    to help millions of companies work more productively.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
+  training_hardware: ''
+  quality_control: ''
+  access: limited
   license: unknown
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  monthly_active_users: ''
+  feedback: ''
+
+- type: application
   name: ChatGPT for Slack
   organization: OpenAI, Salesforce
+  description: The app integrates ChatGPT’s powerful AI technology to deliver instant
+    conversation summaries, research tools, and writing assistance directly in Slack
+    to help millions of companies work more productively.
+  created_date: 2023-03-07
+  url: https://www.salesforce.com/news/stories/chatgpt-app-for-slack/
+  dependencies: [ChatGPT API]
+  adaptation: ''
   output_space: ''
-  prohibited_uses: ''
   quality_control: ''
+  access: limited
+  license: unknown
   terms_of_service: ''
-  type: application
-  url: https://www.salesforce.com/news/stories/chatgpt-app-for-slack/
-  user_distribution: ''
-- access: limited
-  analysis: ''
-  created_date: 2023-03-14
-  dependencies: []
-  description: ''
-  feedback: ''
   intended_uses: ''
-  license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
-    value: unknown
-  modality:
-    explanation: text and image
-    value: image, text; image, text
-  model_card: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
+  user_distribution: ''
+  failures: ''
+
+- type: model
   name: GPT-4
   organization: OpenAI
-  prohibited_uses: ''
-  quality_control: ''
+  description: ''
+  created_date: 2023-03-14
+  url: https://arxiv.org/abs/2303.08774
+  model_card: ''
+  modality: text and image
+  analysis: ''
   size: unknown
+  dependencies: []
   training_emissions: ''
-  training_hardware: ''
   training_time: ''
-  type: model
-  url: https://arxiv.org/abs/2303.08774
-- access: limited
-  adaptation: ''
-  created_date: 2023-03-14
-  dependencies: [GPT-4]
-  description: GPT-4 is OpenAI’s most advanced system, producing safer and more
-    useful responses
-  failures: ''
-  feedback: ''
-  intended_uses: ''
+  training_hardware: ''
+  quality_control: ''
+  access: limited
   license:
-    explanation: Per the Terms of Use, a limited license is provided to the users
-      during their use of the API [[Section 2]](https://openai.com/api/policies/terms/).
-    value: custom
+    value: unknown
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  monthly_active_users: ''
+  feedback: ''
+
+- type: application
   name: GPT-4 API
   organization: OpenAI
+  description: GPT-4 is OpenAI’s most advanced system, producing safer and more
+    useful responses
+  created_date: 2023-03-14
+  url: https://openai.com/product/gpt-4
+  dependencies: [GPT-4]
+  adaptation: ''
   output_space: ''
-  prohibited_uses: ''
   quality_control: ''
+  access: limited
+  license:
+    value: custom
+    explanation: Per the Terms of Use, a limited license is provided to the users
+      during their use of the API [[Section 2]](https://openai.com/api/policies/terms/).
   terms_of_service: ''
-  type: application
-  url: https://openai.com/product/gpt-4
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
   user_distribution: ''
+  failures: ''
diff --git a/assets/othersideai.yaml b/assets/othersideai.yaml
index 7d9b0f05..0753d45c 100644
--- a/assets/othersideai.yaml
+++ b/assets/othersideai.yaml
@@ -1,31 +1,42 @@
 ---
-- access: limited
-  adaptation: unknown
-  created_date: unknown
-  dependencies: [OpenAI API]
-  description: "HyperWrite is a writing assistant that generates text based on a\
-    \ user's request, as well as style and tone choices.\n"
-  failures: unknown
-  feedback: unknown
-  intended_uses: "HyperWrite is intended to be used as a writing assistant.\n"
-  license:
-    explanation: "\"The Software, including software embedded in the Software, is\
-      \ licensed, not sold, to you by Otherside only under the terms of this Agreement;\
-      \ and Otherside reserves all rights not expressly granted under this Agreemen\"\
-      \ - excerpt from the Terms of Service.\n"
-    value: custom
-  monitoring: unknown
-  monthly_active_users: unknown
+
+- type: application
   name: HyperWrite
+  # General
   organization: OthersideAI
+  description: >
+    HyperWrite is a writing assistant that generates text based on a user's
+    request, as well as style and tone choices.
+  created_date: unknown
+  url: https://hyperwriteai.com/
+  # Construction
+  dependencies: [OpenAI API]
+  adaptation: unknown
   output_space: Generation
-  prohibited_uses: unknown
   quality_control:
-    explanation: "There is a HyperWrite blog warning the users about the risks of\
-      \ writing with an AI assistant, but the company doesn't list the quality control\
-      \ measures taken to prevent the listed risks, if any [[HyperWrite Blog]] (https://blog.hyperwrite.ai/what-are-the-risks-of-ai-writing/).\n"
     value: unknown
+    explanation: >
+      There is a HyperWrite blog warning the users about the risks of writing
+      with an AI assistant, but the company doesn't list the quality control
+      measures taken to prevent the listed risks, if any
+      [[HyperWrite Blog]]
+      (https://blog.hyperwrite.ai/what-are-the-risks-of-ai-writing/).
+  # Downstream
+  access: limited
+  license:
+    value: custom
+    explanation: >
+      "The Software, including software embedded in the Software, is licensed,
+      not sold, to you by Otherside only under the terms of this Agreement; and
+      Otherside reserves all rights not expressly granted under this Agreemen"
+      - excerpt from the Terms of Service.
   terms_of_service: https://hyperwriteai.com/terms
-  type: application
-  url: https://hyperwriteai.com/
+  intended_uses: >
+    HyperWrite is intended to be used as a writing assistant.
+  prohibited_uses: unknown
+  monitoring: unknown
+  feedback: unknown
+  # Deployment
+  monthly_active_users: unknown
   user_distribution: unknown
+  failures: unknown
diff --git a/assets/oxford.yaml b/assets/oxford.yaml
index c6f1477c..d57f98bc 100644
--- a/assets/oxford.yaml
+++ b/assets/oxford.yaml
@@ -1,110 +1,133 @@
 ---
-- access:
-    explanation: The captions and video url's can be found at the [[dataset page]](https://m-bain.github.io/webvid-dataset/).
-      CLIP ViT-B/32 features of this dataset can be found at the [[WebVid github
-      repository]](https://github.com/m-bain/webvid#download-clip-features-%EF%B8%8F).
-    value: open
-  analysis: unknown
+
+- type: dataset
+  name: WebVid-10M
+  # General
+  organization: University of Oxford
+  description: >
+    WebVid-10M is a large-scale dataset of short videos with textual descriptions
+    sourced from stock footage sites.
   created_date:
-    explanation: "The date the [[paper]](https://arxiv.org/abs/2104.00650) was released.\n"
     value: 2021-04-01
+    explanation: >
+      The date the [[paper]](https://arxiv.org/abs/2104.00650) was released.
+  url: https://m-bain.github.io/webvid-dataset/
   datasheet: none
+  modality: text (English) and video
+  size: 10.7M video-text pairs, 52K hours video
+  sample: []
+  analysis: unknown
+  # Construction
   dependencies: []
-  description: "WebVid-10M is a large-scale dataset of short videos with textual\
-    \ descriptions sourced from stock footage sites.\n"
-  excluded: unknown
-  feedback: Feedback can be given by emailing at maxbain at robots.ox.ac.uk
+  license:
+    value: WebVid Dataset Terms
+    explanation: >
+      [WebVid Dataset Terms](https://github.com/m-bain/webvid/blob/main/TERMS.md)
   included:
+    value: Video URLs and textual description annotations
     explanation: Authors note in the paper:"We scrape the web for a new dataset
       of videos with textual description annotations, called WebVid-2M. The data
       was scraped from the web following a similar procedure to Google Conceptual
       Captions"
-    value: Video URLs and textual description annotations
-  intended_uses:
-    explanation: See [[Terms of Access]](https://github.com/m-bain/webvid/blob/main/TERMS.md)
-    value: "\"Provided you keep to these rules, the University grants you (the researcher)\
-      \ a non-exclusive and non-transferable licence to use the content free of\
-      \ charge strictly for non-commercial research (i.e., whose output artefacts\
-      \ are not incorporated in commercial products) for 12 months.\"\n"
-  license:
-    explanation: "[WebVid Dataset Terms](https://github.com/m-bain/webvid/blob/main/TERMS.md)\n"
-    value: WebVid Dataset Terms
-  modality:
-    explanation: text (English) and video
-    value: text, video
-  monitoring: ''
-  name: WebVid-10M
-  organization: University of Oxford
-  prohibited_uses: "- Authors note the following prohibited uses: \"You must not\
-    \ use the content other than for the Permitted Purpose in strict conformity\
-    \ with these terms and any other reasonable instructions of the University.\
-    \ You must not, except as may be strictly necessary for carrying out the Permitted\
-    \ Purpose, provide or otherwise make available content to any third party or\
-    \ allow use of it or them by or on behalf of any third party, in whole or in\
-    \ part, whether by way of sale, resale, loan, transfer, hire or any other form\
-    \ of exploitation; or attempt to identify any living or deceased individual\
-    \ from the content.\" [[Terms of Access]](https://github.com/m-bain/webvid/blob/main/TERMS.md)\n\
-    - Authors also note the following limitations of the dataset: \"We note that\
-    \ data sourced from the web may be prone to biases and may contain graphic content.\
-    \ Please be careful of unintended societal, gender, racial and other biases\
-    \ when training or deploying models trained on this data.\" [[Disclaimer]](https://github.com/m-bain/webvid#disclaimer-%EF%B8%8F)\n"
+  excluded: unknown
   quality_control: unknown
-  sample: []
-  size: 10.7M video-text pairs, 52K hours video
-  type: dataset
-  url: https://m-bain.github.io/webvid-dataset/
-- access:
+  # Downstream
+  access:
+    value: open
     explanation: The captions and video url's can be found at the [[dataset page]](https://m-bain.github.io/webvid-dataset/).
       CLIP ViT-B/32 features of this dataset can be found at the [[WebVid github
       repository]](https://github.com/m-bain/webvid#download-clip-features-%EF%B8%8F).
-    value: open
-  analysis: unknown
+  intended_uses:
+    value: >
+      "Provided you keep to these rules, the University grants you (the researcher)
+      a non-exclusive and non-transferable licence to use the content free of charge
+      strictly for non-commercial research (i.e., whose output artefacts are not
+      incorporated
+      in commercial products) for 12 months."
+    explanation: See [[Terms of Access]](https://github.com/m-bain/webvid/blob/main/TERMS.md)
+  prohibited_uses: >
+    - Authors note the following prohibited uses: "You must not use the content
+    other than for the Permitted Purpose in strict
+    conformity with these terms and any other reasonable instructions of the University.
+    You must not, except as may be strictly necessary for carrying out the Permitted
+    Purpose,
+    provide or otherwise make available content to any third party or allow use
+    of it or them by or on behalf of any third party, in whole or in part, whether
+    by way of sale, resale, loan, transfer, hire or any other form of exploitation;
+    or attempt to identify any living or deceased individual from the content."
+    [[Terms of Access]](https://github.com/m-bain/webvid/blob/main/TERMS.md)
+
+    - Authors also note the following limitations of the dataset: "We note that
+    data
+    sourced from the web may be prone to biases and may contain graphic content.
+    Please be careful of unintended societal, gender, racial and other biases when
+    training or deploying models trained on this data."
+    [[Disclaimer]](https://github.com/m-bain/webvid#disclaimer-%EF%B8%8F)
+  monitoring: ''
+  feedback: Feedback can be given by emailing at maxbain at robots.ox.ac.uk
+
+- type: dataset
+  name: WebVid-2M
+  # General
+  organization: University of Oxford
+  description: >
+    WebVid-2M is a large-scale dataset of 2.5M short videos with textual descriptions
+    sourced from stock footage sites. A subset of the WebVid-10M dataset.
   created_date:
-    explanation: "The date the [[paper]](https://arxiv.org/abs/2104.00650) was released.\n"
     value: 2021-04-01
+    explanation: >
+      The date the [[paper]](https://arxiv.org/abs/2104.00650) was released.
+  url: https://m-bain.github.io/webvid-dataset/
   datasheet: none
+  modality: text (English) and video
+  size: 2.5M video-text pairs, 13K hours video
+  sample: []
+  analysis: unknown
+  # Construction
   dependencies: [WebVid-10M]
-  description: "WebVid-2M is a large-scale dataset of 2.5M short videos with textual\
-    \ descriptions sourced from stock footage sites. A subset of the WebVid-10M\
-    \ dataset.\n"
-  excluded: unknown
-  feedback: Feedback can be given by emailing at maxbain at robots.ox.ac.uk
+  license:
+    value: WebVid Dataset Terms
+    explanation: >
+      [WebVid Dataset Terms](https://github.com/m-bain/webvid/blob/main/TERMS.md)
   included:
+    value: Video URLs and textual description annotations
     explanation: Authors note in the paper:"We scrape the web for a new dataset
       of videos with textual description annotations, called WebVid-2M. The data
       was scraped from the web following a similar procedure to Google Conceptual
       Captions"
-    value: Video URLs and textual description annotations
+  excluded: unknown
+  quality_control: unknown
+  # Downstream
+  access:
+    value: open
+    explanation: The captions and video url's can be found at the [[dataset page]](https://m-bain.github.io/webvid-dataset/).
+      CLIP ViT-B/32 features of this dataset can be found at the [[WebVid github
+      repository]](https://github.com/m-bain/webvid#download-clip-features-%EF%B8%8F).
   intended_uses:
+    value: >
+      "Provided you keep to these rules, the University grants you (the researcher)
+      a non-exclusive and non-transferable licence to use the content free of charge
+      strictly for non-commercial research (i.e., whose output artefacts are not
+      incorporated
+      in commercial products) for 12 months."
     explanation: See [[Terms of Access]](https://github.com/m-bain/webvid/blob/main/TERMS.md)
-    value: "\"Provided you keep to these rules, the University grants you (the researcher)\
-      \ a non-exclusive and non-transferable licence to use the content free of\
-      \ charge strictly for non-commercial research (i.e., whose output artefacts\
-      \ are not incorporated in commercial products) for 12 months.\"\n"
-  license:
-    explanation: "[WebVid Dataset Terms](https://github.com/m-bain/webvid/blob/main/TERMS.md)\n"
-    value: WebVid Dataset Terms
-  modality:
-    explanation: text (English) and video
-    value: text, video
+  prohibited_uses: >
+    - Authors note the following prohibited uses: "You must not use the content
+    other than for the Permitted Purpose in strict
+    conformity with these terms and any other reasonable instructions of the University.
+    You must not, except as may be strictly necessary for carrying out the Permitted
+    Purpose,
+    provide or otherwise make available content to any third party or allow use
+    of it or them by or on behalf of any third party, in whole or in part, whether
+    by way of sale, resale, loan, transfer, hire or any other form of exploitation;
+    or attempt to identify any living or deceased individual from the content."
+    [[Terms of Access]](https://github.com/m-bain/webvid/blob/main/TERMS.md)
+
+    - Authors also note the following limitations of the dataset: "We note that
+    data
+    sourced from the web may be prone to biases and may contain graphic content.
+    Please be careful of unintended societal, gender, racial and other biases when
+    training or deploying models trained on this data."
+    [[Disclaimer]](https://github.com/m-bain/webvid#disclaimer-%EF%B8%8F)
   monitoring: ''
-  name: WebVid-2M
-  organization: University of Oxford
-  prohibited_uses: "- Authors note the following prohibited uses: \"You must not\
-    \ use the content other than for the Permitted Purpose in strict conformity\
-    \ with these terms and any other reasonable instructions of the University.\
-    \ You must not, except as may be strictly necessary for carrying out the Permitted\
-    \ Purpose, provide or otherwise make available content to any third party or\
-    \ allow use of it or them by or on behalf of any third party, in whole or in\
-    \ part, whether by way of sale, resale, loan, transfer, hire or any other form\
-    \ of exploitation; or attempt to identify any living or deceased individual\
-    \ from the content.\" [[Terms of Access]](https://github.com/m-bain/webvid/blob/main/TERMS.md)\n\
-    - Authors also note the following limitations of the dataset: \"We note that\
-    \ data sourced from the web may be prone to biases and may contain graphic content.\
-    \ Please be careful of unintended societal, gender, racial and other biases\
-    \ when training or deploying models trained on this data.\" [[Disclaimer]](https://github.com/m-bain/webvid#disclaimer-%EF%B8%8F)\n"
-  quality_control: unknown
-  sample: []
-  size: 2.5M video-text pairs, 13K hours video
-  type: dataset
-  url: https://m-bain.github.io/webvid-dataset/
+  feedback: Feedback can be given by emailing at maxbain at robots.ox.ac.uk
diff --git a/assets/paladin.yaml b/assets/paladin.yaml
index 80aec963..76870f97 100644
--- a/assets/paladin.yaml
+++ b/assets/paladin.yaml
@@ -1,21 +1,22 @@
 ---
-- access: limited
-  adaptation: ''
-  created_date: 2023-02-01
-  dependencies: []
-  description: UnderwriteGPT is the world's first generative AI underwriting tool.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
-  license: ''
-  monitoring: ''
-  monthly_active_users: ''
+
+- type: application
   name: UnderwriteGPT
   organization: Paladin Group and Dais Technology
+  description: UnderwriteGPT is the world's first generative AI underwriting tool.
+  created_date: 2023-02-01
+  url: https://dais.com/underwritegpt/
+  dependencies: []
+  adaptation: ''
   output_space: ''
-  prohibited_uses: ''
   quality_control: ''
+  access: limited
+  license: ''
   terms_of_service: ''
-  type: application
-  url: https://dais.com/underwritegpt/
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
   user_distribution: ''
+  failures: ''
diff --git a/assets/perplexity.yaml b/assets/perplexity.yaml
index 8bf35081..117a7acc 100644
--- a/assets/perplexity.yaml
+++ b/assets/perplexity.yaml
@@ -1,50 +1,52 @@
 ---
-- access: open
-  adaptation: ''
+
+- type: application
+  name: Perplexity Ask
+  organization: Perplexity
+  description: Perplexity Ask is a new search interface that uses advanced artificial
+    intelligence technologies
   created_date:
+    value: 2022-12-07
     explanation: Perplexity Ask was announced in a [[Twitter post]](https://twitter.com/perplexity_ai/status/1600551871554338816?s=20)
       by the company.
-    value: 2022-12-07
+  url: https://www.perplexity.ai/
   dependencies: [GPT-3.5, Bing Search]
-  description: Perplexity Ask is a new search interface that uses advanced artificial
-    intelligence technologies
-  failures: ''
-  feedback: ''
-  intended_uses: ''
-  license:
-    explanation: No license was found.
-    value: none
-  monitoring: ''
-  monthly_active_users: ''
-  name: Perplexity Ask
-  organization: Perplexity
+  adaptation: ''
   output_space: ''
-  prohibited_uses: ''
   quality_control: ''
-  terms_of_service: ''
-  type: application
-  url: https://www.perplexity.ai/
-  user_distribution: ''
-- access: deprecated
-  adaptation: ''
-  created_date: 2022-12-15
-  dependencies: [Perplexity Ask, OpenAI API]
-  description: Twitter search interface that is powered by Perplexity's structured
-    search engine.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
+  access: open
   license:
-    explanation: No license was found.
     value: none
+    explanation: No license was found.
+  terms_of_service: ''
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
   monthly_active_users: ''
+  user_distribution: ''
+  failures: ''
+
+- type: application
   name: Bird SQL
   organization: Perplexity
+  description: Twitter search interface that is powered by Perplexity's structured
+    search engine.
+  created_date: 2022-12-15
+  url: https://www.perplexity.ai/sql
+  dependencies: [Perplexity Ask, OpenAI API]
+  adaptation: ''
   output_space: ''
-  prohibited_uses: ''
   quality_control: ''
+  access: deprecated
+  license:
+    value: none
+    explanation: No license was found.
   terms_of_service: ''
-  type: application
-  url: https://www.perplexity.ai/sql
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
   user_distribution: ''
+  failures: ''
diff --git a/assets/portkey.yaml b/assets/portkey.yaml
index 51fac815..2fca2885 100644
--- a/assets/portkey.yaml
+++ b/assets/portkey.yaml
@@ -1,22 +1,23 @@
 ---
-- access: open
-  adaptation: ''
-  created_date: 2023-05-06
-  dependencies: []
-  description: Portkey is a hosted middleware that allows users to create generative
-    AI applications
-  failures: ''
-  feedback: ''
-  intended_uses: ''
-  license: ''
-  monitoring: ''
-  monthly_active_users: ''
+
+- type: application
   name: Portkey
   organization: Portkey
+  description: Portkey is a hosted middleware that allows users to create generative
+    AI applications
+  created_date: 2023-05-06
+  url: https://portkey.ai/
+  dependencies: []
+  adaptation: ''
   output_space: generative AI apps
-  prohibited_uses: ''
   quality_control: ''
+  access: open
+  license: ''
   terms_of_service: https://portkey.ai/terms
-  type: application
-  url: https://portkey.ai/
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
   user_distribution: ''
+  failures: ''
diff --git a/assets/quizlet.yaml b/assets/quizlet.yaml
index a7580d0b..cfd5d9c6 100644
--- a/assets/quizlet.yaml
+++ b/assets/quizlet.yaml
@@ -1,25 +1,26 @@
 ---
-- access: open
-  adaptation: ''
-  created_date: 2023-03-01
-  dependencies: [ChatGPT API]
+
+- type: application
+  name: Q-Chat
+  organization: Quizlet
   description: Quizlet is introducing Q-Chat, a fully-adaptive AI tutor that engages
     students with adaptive questions based on relevant study materials delivered
     through a fun chat experience.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
+  created_date: 2023-03-01
+  url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis
+  dependencies: [ChatGPT API]
+  adaptation: ''
+  output_space: ''
+  quality_control: ''
+  access: open
   license:
-    explanation: No license was found.
     value: none
+    explanation: No license was found.
+  terms_of_service: https://quizlet.com/tos
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
   monthly_active_users: ''
-  name: Q-Chat
-  organization: Quizlet
-  output_space: ''
-  prohibited_uses: ''
-  quality_control: ''
-  terms_of_service: https://quizlet.com/tos
-  type: application
-  url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis
   user_distribution: ''
+  failures: ''
diff --git a/assets/quora.yaml b/assets/quora.yaml
index 2b32f96d..e28e31a2 100644
--- a/assets/quora.yaml
+++ b/assets/quora.yaml
@@ -1,31 +1,32 @@
 ---
-- access: limited
-  adaptation: ''
+
+- type: application
+  name: Poe
+  organization: Quora
+  description: Poe lets people ask questions, get instant answers, and have back-and-forth
+    conversations with several AI-powered bots. It is initially available on iOS,
+    but we will be adding support for all major platforms in the next few months,
+    along with more bots.
   created_date: 2023-02-03
+  url: https://quorablog.quora.com/Poe-1
   dependencies:
     - ChatGPT API
     - GPT-4 API
     - Claude API
     - Dragonfly API
     - Sage API
-  description: Poe lets people ask questions, get instant answers, and have back-and-forth
-    conversations with several AI-powered bots. It is initially available on iOS,
-    but we will be adding support for all major platforms in the next few months,
-    along with more bots.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
+  adaptation: ''
+  output_space: ''
+  quality_control: ''
+  access: limited
   license:
-    explanation: No license was found.
     value: none
+    explanation: No license was found.
+  terms_of_service: https://poe.com/tos
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
   monthly_active_users: ''
-  name: Poe
-  organization: Quora
-  output_space: ''
-  prohibited_uses: ''
-  quality_control: ''
-  terms_of_service: https://poe.com/tos
-  type: application
-  url: https://quorablog.quora.com/Poe-1
   user_distribution: ''
+  failures: ''
diff --git a/assets/robin.yaml b/assets/robin.yaml
index 813dc634..123cc804 100644
--- a/assets/robin.yaml
+++ b/assets/robin.yaml
@@ -1,26 +1,27 @@
 ---
-- access: limited
-  adaptation: ''
-  created_date: unknown
-  dependencies: [Anthropic API]
+
+- type: application
+  name: Robin AI
+  organization: Robin AI
   description: Robin AI uses Claude and Anthropic's models to understand language
     - including in technical domains like legal language. It's also very confident
     at drafting, summarising, translations, and explaining complex concepts in simple
     terms
-  failures: ''
-  feedback: ''
-  intended_uses: ''
+  created_date: unknown
+  url: https://www.robinai.co.uk/
+  dependencies: [Anthropic API]
+  adaptation: ''
+  output_space: ''
+  quality_control: ''
+  access: limited
   license:
-    explanation: No license was found.
     value: none
+    explanation: No license was found.
+  terms_of_service: https://www.robinai.co.uk/terms
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
   monthly_active_users: ''
-  name: Robin AI
-  organization: Robin AI
-  output_space: ''
-  prohibited_uses: ''
-  quality_control: ''
-  terms_of_service: https://www.robinai.co.uk/terms
-  type: application
-  url: https://www.robinai.co.uk/
   user_distribution: ''
+  failures: ''
diff --git a/assets/salesforce.yaml b/assets/salesforce.yaml
index f6c12de0..19a1d021 100644
--- a/assets/salesforce.yaml
+++ b/assets/salesforce.yaml
@@ -1,35 +1,42 @@
 ---
-- access:
-    explanation: Model weights are available for download with links in the [[GitHub
-      repo]](https://github.com/salesforce/CodeGen)
-    value: open
-  analysis: ''
+
+- type: model
+  name: CodeGen
+  organization: Salesforce
+  description: CodeGen is a language model for code
   created_date:
-    explanation: Date model paper was released
     value: 2022-03-25
+    explanation: Date model paper was released
+  url: https://arxiv.org/abs/2203.13474
+  model_card: ''
+  modality: text (English) and code
+  analysis: ''
+  size: 16B parameters (dense model)
   dependencies: []
-  description: CodeGen is a language model for code
-  feedback: ''
-  intended_uses: ''
+  training_emissions: ''
+  training_time: ''
+  training_hardware: Unspecified Salesforce Compute (TPU-V4s)
+  quality_control: ''
+  access:
+    value: open
+    explanation: Model weights are available for download with links in the [[GitHub
+      repo]](https://github.com/salesforce/CodeGen)
   license: none (model weights), BSD-3-Clause (code)
-  modality:
-    explanation: text (English) and code
-    value: code, text; code, text
-  model_card: ''
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: CodeGen
+  feedback: ''
+
+- type: model
+  name: BLIP
   organization: Salesforce
-  prohibited_uses: ''
-  quality_control: ''
-  size: 16B parameters (dense)
-  training_emissions: ''
-  training_hardware: Unspecified Salesforce Compute (TPU-V4s)
-  training_time: ''
-  type: model
-  url: https://arxiv.org/abs/2203.13474
-- access: open
-  analysis: ''
+  description: ''
   created_date: 2022-01-28
+  url: https://arxiv.org/abs/2201.12086
+  model_card: ''
+  modality: text and image
+  analysis: ''
+  size: unknown
   dependencies:
     - ViT-B
     - BERT
@@ -39,68 +46,60 @@
     - Conceptual 12M
     - SBU Captions
     - LAION-115M
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: open
   license: BSD-3-Clause
-  modality:
-    explanation: text and image
-    value: image, text; image, text
-  model_card: ''
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: BLIP
+  feedback: ''
+
+- type: dataset
+  name: LAION-115M
   organization: Salesforce
-  prohibited_uses: ''
-  quality_control: ''
-  size: unknown
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
-  url: https://arxiv.org/abs/2201.12086
-- access: open
-  analysis: ''
+  description: ''
   created_date: 2022-01-28
+  url: https://arxiv.org/abs/2201.12086
   datasheet: ''
+  modality: text and image
+  size: 115M image-text pairs
+  sample: []
+  analysis: ''
   dependencies: [LAION-400M]
-  description: ''
-  excluded: ''
-  feedback: ''
   included: ''
-  intended_uses: ''
+  excluded: ''
+  quality_control: ''
+  access: open
   license: BSD-3-Clause
-  modality:
-    explanation: text and image
-    value: image, text
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: LAION-115M
+  feedback: ''
+
+- type: application
+  name: EinsteinGPT
   organization: Salesforce
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: 115M image-text pairs
-  type: dataset
-  url: https://arxiv.org/abs/2201.12086
-- access: limited
-  adaptation: ''
-  created_date: 2023-03-07
-  dependencies: [ChatGPT API]
   description: EinsteinGPT is generative AI for customer relationship management
     (CRFM).
-  failures: ''
-  feedback: ''
-  intended_uses: ''
+  created_date: 2023-03-07
+  url: https://www.salesforce.com/products/einstein/overview/?d=cta-body-promo-8
+  dependencies: [ChatGPT API]
+  adaptation: ''
+  output_space: ''
+  quality_control: ''
+  access: limited
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  terms_of_service: ''
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
   monthly_active_users: ''
-  name: EinsteinGPT
-  organization: Salesforce
-  output_space: ''
-  prohibited_uses: ''
-  quality_control: ''
-  terms_of_service: ''
-  type: application
-  url: https://www.salesforce.com/products/einstein/overview/?d=cta-body-promo-8
   user_distribution: ''
+  failures: ''
diff --git a/assets/sana.yaml b/assets/sana.yaml
index 22234f8f..09bda05f 100644
--- a/assets/sana.yaml
+++ b/assets/sana.yaml
@@ -1,40 +1,55 @@
 ---
-- access: limited
-  adaptation: "Customized GPT-3, fine-tuned on private data [[Sana GPT-3 Demo]](https://gpt3demo.com/apps/sanalabs).\n"
+
+- type: application
+  name: Sana
+  # General
+  organization: Sana
+  description: >
+    "Sana is your all-in-one, AI-assisted, online learning platform (LMS).
+    Author employee training courses and measure team development with Sana's
+    powerful analytics. Sana partners with the world's most important
+    organizations and fastest-growing startups to make personalized, adaptive
+    learning available for everyone, everywhere"
+    [[Sana GPT-3 Demo]](https://gpt3demo.com/apps/sanalabs).
   created_date:
-    explanation: "The company was founded in 2016 according to [[Crunchbase]](https://www.crunchbase.com/organization/sana-2).\
-      \ It is unclear when Sana adapted the OpenAI API to their products.\n"
     value: unknown
+    explanation: >
+      The company was founded in 2016 according to
+      [[Crunchbase]](https://www.crunchbase.com/organization/sana-2). It is
+      unclear when Sana adapted the OpenAI API to their products.
+  url: https://www.sanalabs.com/
+  # Construction
   dependencies: [OpenAI API]
-  description: "\"Sana is your all-in-one, AI-assisted, online learning platform\
-    \ (LMS). Author employee training courses and measure team development with\
-    \ Sana's powerful analytics. Sana partners with the world's most important organizations\
-    \ and fastest-growing startups to make personalized, adaptive learning available\
-    \ for everyone, everywhere\" [[Sana GPT-3 Demo]](https://gpt3demo.com/apps/sanalabs).\n"
-  failures: unknown
-  feedback: unknown
-  intended_uses: "Sana is intended to be used by employers to provide a learning\
-    \ service for their employees.\n"
+  adaptation: >
+    Customized GPT-3, fine-tuned on private data
+    [[Sana GPT-3 Demo]](https://gpt3demo.com/apps/sanalabs).
+  output_space: question and answer, summarization, sentiment analysis,topic identification
+  quality_control: unknown
+  # Downstream
+  access: limited
   license:
-    explanation: "\"Sana Labs grants Subscriber a limited non-exclusive, non-transferable,\
-      \ non-sublicensable license to use Sana Labs' web-based personalized and collaborative\
-      \ learning platform (“Sana Platform”) solely in connection with the subscribed\
-      \ Services in accordance with the Agreement.\" - excerpt from the Terms of\
-      \ Service.\n"
     value: custom
-  monitoring: unknown
-  monthly_active_users: unknown
-  name: Sana
-  organization: Sana
-  output_space: question and answer, summarization, sentiment analysis,topic identification
+    explanation: >
+      "Sana Labs grants Subscriber a limited non-exclusive, non-transferable,
+      non-sublicensable license to use Sana Labs' web-based personalized and
+      collaborative learning platform (“Sana Platform”) solely in connection
+      with the subscribed Services in accordance with the Agreement." -
+      excerpt from the Terms of Service.
+  terms_of_service: https://www.sanalabs.com/legal/
+  intended_uses: >
+    Sana is intended to be used by employers to provide a learning service for
+    their employees.
   prohibited_uses:
-    explanation: "\"Sana provides an extensive set of legal documents [[Sana Legal]](https://www.sanalabs.com/legal/),\
-      \ but missing from the documents are prohibited uses of the Sana platform,\
-      \ beyond a mention of what is impermissible under relevant law.\" - exceprt\
-      \ from the Terms of Service document.\n"
     value: none
-  quality_control: unknown
-  terms_of_service: https://www.sanalabs.com/legal/
-  type: application
-  url: https://www.sanalabs.com/
+    explanation: >
+      "Sana provides an extensive set of legal documents
+      [[Sana Legal]](https://www.sanalabs.com/legal/), but missing from the
+      documents are prohibited uses of the Sana platform, beyond a mention of
+      what is impermissible under relevant law." - exceprt from the Terms of
+      Service document.
+  monitoring: unknown
+  feedback: unknown
+  # Deployment
+  monthly_active_users: unknown
   user_distribution: unknown
+  failures: unknown
diff --git a/assets/shanghai.yaml b/assets/shanghai.yaml
index 80208ec7..8dae5081 100644
--- a/assets/shanghai.yaml
+++ b/assets/shanghai.yaml
@@ -1,7 +1,15 @@
 ---
-- access: open
-  analysis: ''
+
+- type: model
+  name: InternVideo
+  organization: Shanghai AI Laboratory
+  description: ''
   created_date: 2022-12-06
+  url: https://arxiv.org/pdf/2212.03191.pdf
+  model_card: ''
+  modality: text and video
+  analysis: ''
+  size: 1.3B parameters (dense model)
   dependencies:
     - Kinetics-400
     - WebVid-2M
@@ -10,46 +18,36 @@
     - AVA
     - Something-Something-v2
     - Kinetics-710
-  description: ''
-  feedback: ''
-  intended_uses: ''
-  license: Apache 2.0
-  modality:
-    explanation: text and video
-    value: text, video; text, video
-  model_card: ''
-  monitoring: ''
-  name: InternVideo
-  organization: Shanghai AI Laboratory
-  prohibited_uses: ''
-  quality_control: ''
-  size: 1.3B parameters (dense)
   training_emissions: ''
-  training_hardware: ''
   training_time: ''
-  type: model
-  url: https://arxiv.org/pdf/2212.03191.pdf
-- access: open
-  analysis: Evaluated based on own constructed dataset covering 433 languages.
-  created_date: 2023-05-29
-  dependencies: [OPUS]
-  description: Lego-MT is a multilingual large language model which uses a more
-    efficient approach of being an effective detachable model.
-  feedback: ''
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license: Apache 2.0
   intended_uses: ''
-  license: ''
-  modality:
-    explanation: multilingual text
-    value: text; text
-  model_card: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: Lego-MT
   organization: Shanghai AI Laboratory
-  prohibited_uses: ''
-  quality_control: ''
-  size: 1.2B parameters (dense)
+  description: Lego-MT is a multilingual large language model which uses a more
+    efficient approach of being an effective detachable model.
+  created_date: 2023-05-29
+  url: https://arxiv.org/pdf/2212.10551.pdf
+  model_card: ''
+  modality: multilingual text
+  analysis: Evaluated based on own constructed dataset covering 433 languages.
+  size: 1.2B parameters
+  dependencies: [OPUS]
   training_emissions: unknown
-  training_hardware: 32 A100 GPUs
   training_time: 15 days
-  type: model
-  url: https://arxiv.org/pdf/2212.10551.pdf
+  training_hardware: 32 A100 GPUs
+  quality_control: ''
+  access: open
+  license: ''
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/shop.yaml b/assets/shop.yaml
index 60403fc0..5700ad95 100644
--- a/assets/shop.yaml
+++ b/assets/shop.yaml
@@ -1,24 +1,25 @@
 ---
-- access: open
-  adaptation: ''
-  created_date: 2023-03-01
-  dependencies: [ChatGPT API]
+
+- type: application
+  name: Shop Assistant
+  organization: Shop
   description: When shoppers search for products, the shopping assistant makes personalized
     recommendations based on their requests. Shop’s new AI-powered shopping assistant
     will streamline in-app shopping by scanning millions of products to quickly
     find what buyers are looking for—or help them discover something new.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
-  license: ''
-  monitoring: ''
-  monthly_active_users: ''
-  name: Shop Assistant
-  organization: Shop
+  created_date: 2023-03-01
+  url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis
+  dependencies: [ChatGPT API]
+  adaptation: ''
   output_space: ''
-  prohibited_uses: ''
   quality_control: ''
+  access: open
+  license: ''
   terms_of_service: ''
-  type: application
-  url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
   user_distribution: ''
+  failures: ''
diff --git a/assets/singapore.yaml b/assets/singapore.yaml
index 3e378eb9..5e0ab85c 100644
--- a/assets/singapore.yaml
+++ b/assets/singapore.yaml
@@ -1,28 +1,27 @@
 ---
-- access: open
+
+- type: model
+  name: GOAT
+  organization: National University of Singapore
+  description: GOAT is a fine-tuned LLaMA model which uses the tokenization of numbers
+    to significantly outperform benchmark standards on a range of arithmetic tasks.
+  created_date: 2023-05-23
+  url: https://arxiv.org/pdf/2305.14201.pdf
+  model_card: none
+  modality: textual input, numerical data output
   analysis: Performance assessed on BIG-bench arithmetic sub-task, and various elementary
     arithmetic tasks.
-  created_date: 2023-05-23
+  size: 7B parameters
   dependencies: [LLaMA, GOAT dataset]
-  description: GOAT is a fine-tuned LLaMA model which uses the tokenization of numbers
-    to significantly outperform benchmark standards on a range of arithmetic tasks.
-  feedback: ''
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: 24 GB VRAM GPU
+  quality_control: Number data is randomly generated from log space to reduce likelihood
+    of redundancy and range of magnitudes.
+  access: open
+  license: Apache 2.0
   intended_uses: Integration into other instruction-tuned LLMs to further enhance
     arithmetic reasoning abilities in solving math word problems.
-  license: Apache 2.0
-  modality:
-    explanation: textual input, numerical data output
-    value: text; other
-  model_card: none
-  monitoring: ''
-  name: GOAT
-  organization: National University of Singapore
   prohibited_uses: ''
-  quality_control: Number data is randomly generated from log space to reduce likelihood
-    of redundancy and range of magnitudes.
-  size: 7B parameters (dense)
-  training_emissions: unknown
-  training_hardware: 24 GB VRAM GPU
-  training_time: unknown
-  type: model
-  url: https://arxiv.org/pdf/2305.14201.pdf
+  monitoring: ''
+  feedback: ''
diff --git a/assets/snap.yaml b/assets/snap.yaml
index d5a384d3..67727722 100644
--- a/assets/snap.yaml
+++ b/assets/snap.yaml
@@ -1,28 +1,30 @@
 ---
-- access: open
-  adaptation: ''
-  created_date: 2023-03-01
-  dependencies: [ChatGPT API]
+
+- type: application
+  name: My AI for Snapchat
+  organization: Snap
   description: My AI offers Snapchatters a friendly, customizable chatbot at their
     fingertips that offers recommendations, and can even write a haiku for friends
     in seconds. Snapchat, where communication and messaging is a daily behavior,
     has 750 million monthly Snapchatters.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
+  created_date: 2023-03-01
+  url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis
+  dependencies: [ChatGPT API]
+  adaptation: ''
+  output_space: ''
+  quality_control: ''
+  access: open
   license:
-    explanation: "\"Snap grants you a worldwide, royalty-free, non-assignable, non-exclusive,\
-      \  revocable, and non-sublicensable license to use the Services.\" - excerpt\
-      \ from the Terms of Service document.\n"
     value: custom
+    explanation: >
+      "Snap grants you a worldwide, royalty-free, non-assignable, non-exclusive,  revocable,
+      and non-sublicensable license to use the Services." - excerpt
+      from the Terms of Service document.
+  terms_of_service: https://snap.com/terms
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
   monthly_active_users: ''
-  name: My AI for Snapchat
-  organization: Snap
-  output_space: ''
-  prohibited_uses: ''
-  quality_control: ''
-  terms_of_service: https://snap.com/terms
-  type: application
-  url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis
   user_distribution: ''
+  failures: ''
diff --git a/assets/speak.yaml b/assets/speak.yaml
index 94e61ac3..8771590f 100644
--- a/assets/speak.yaml
+++ b/assets/speak.yaml
@@ -1,23 +1,24 @@
 ---
-- access: open
-  adaptation: ''
-  created_date: 2023-03-01
-  dependencies: [Whisper API]
+
+- type: application
+  name: Speak
+  organization: Speak
   description: Speak is an AI-powered language learning app focused on building
     the best path to spoken fluency and is the the fastest-growing English app in
     South Korea.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
-  license: ''
-  monitoring: ''
-  monthly_active_users: ''
-  name: Speak
-  organization: Speak
+  created_date: 2023-03-01
+  url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis
+  dependencies: [Whisper API]
+  adaptation: ''
   output_space: ''
-  prohibited_uses: ''
   quality_control: ''
+  access: open
+  license: ''
   terms_of_service: ''
-  type: application
-  url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
   user_distribution: ''
+  failures: ''
diff --git a/assets/spotify.yaml b/assets/spotify.yaml
index 6e436586..f7016bf5 100644
--- a/assets/spotify.yaml
+++ b/assets/spotify.yaml
@@ -1,29 +1,32 @@
 ---
-- access: limited
-  adaptation: ''
-  created_date: 2023-02-23
-  dependencies: [ChatGPT API, Sonantic AI]
+
+- type: application
+  name: AI DJ
+  organization: Spotify
   description: The DJ is a personalized AI guide that knows you and your music taste
     so well that it can choose what to play for you. This feature, first rolling
     out in beta, will deliver a curated lineup of music alongside commentary around
     the tracks and artists we think you’ll like in a stunningly realistic voice.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
+  created_date: 2023-02-23
+  url: https://newsroom.spotify.com/2023-02-22/spotify-debuts-a-new-ai-dj-right-in-your-pocket/
+  dependencies: [ChatGPT API, Sonantic AI]
+  adaptation: ''
+  output_space: ''
+  quality_control: ''
+  access: limited
   license:
-    explanation: "\"The Spotify software applications and the Content are licensed,\
-      \ not sold or transferred to you, and Spotify and its licensors retain ownership\
-      \ of all copies of the Spotify software applications and Content even after\
-      \ installation on your Devices.\" - excerpt from the Terms of Service document.\n"
     value: custom
+    explanation: >
+      "The Spotify software applications and the Content are licensed, not sold
+      or transferred to you, and Spotify and its licensors retain ownership of
+      all copies of the Spotify software applications and Content even after
+      installation on your Devices." - excerpt from the Terms of Service
+      document.
+  terms_of_service: https://www.spotify.com/us/legal/end-user-agreement/
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
   monthly_active_users: ''
-  name: AI DJ
-  organization: Spotify
-  output_space: ''
-  prohibited_uses: ''
-  quality_control: ''
-  terms_of_service: https://www.spotify.com/us/legal/end-user-agreement/
-  type: application
-  url: https://newsroom.spotify.com/2023-02-22/spotify-debuts-a-new-ai-dj-right-in-your-pocket/
   user_distribution: ''
+  failures: ''
diff --git a/assets/stability.yaml b/assets/stability.yaml
index 8b137891..8c572529 100644
--- a/assets/stability.yaml
+++ b/assets/stability.yaml
@@ -1 +1,102 @@
+---
 
+- type: model
+  name: DeepFloyd IF
+  organization: Stability AI
+  description: A text-to-image cascaded pixel diffusion model released in conjunction
+    with AI research lab DeepFloyd.
+  created_date: 2023-04-28
+  url: https://stability.ai/blog/deepfloyd-if-text-to-image-model
+  model_card: https://huggingface.co/DeepFloyd/IF-I-XL-v1.0
+  modality: textual input with image output
+  analysis: Evaluated on the COCO dataset.
+  size: 4.3B parameters
+  dependencies: [LAION-5B]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license:
+    value: custom
+    explanation: License agreement can be found on model card https://huggingface.co/DeepFloyd/IF-I-XL-v1.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/DeepFloyd/IF-I-XL-v1.0/discussions
+
+
+- type: model
+  name: StableLM
+  organization: Stability AI
+  description: Large language models trained on up to 1.5 trillion tokens.
+  created_date: 2023-04-20
+  url: https://github.com/Stability-AI/StableLM
+  model_card: ''
+  modality: natural language text
+  analysis: ''
+  size: 7B parameters
+  dependencies:
+    - StableLM-Alpha dataset
+    - Alpaca dataset
+    - gpt4all dataset
+    - ShareGPT52K dataset
+    - Dolly dataset
+    - HH dataset
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+
+
+- type: application
+  name: Stable Diffusion
+  organization: Stability AI
+  description: Stable Diffusion is a generative software that creates images from
+    text prompts.
+  created_date: 2022-08-22
+  url: https://stability.ai/blog/stable-diffusion-public-release
+  dependencies: []
+  adaptation: ''
+  output_space: image
+  quality_control: ''
+  access: open
+  license:
+    value: custom
+    explanation: License agreement can be found on https://github.com/CompVis/stable-diffusion/blob/main/LICENSE
+  terms_of_service: ''
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/CompVis/stable-diffusion/discussions
+  monthly_active_users: ''
+  user_distribution: ''
+  failures: ''
+
+- type: application
+  name: Stable Diffusion XL
+  organization: Stability AI
+  description: Stable Diffusion XL is an updated version of Stable Diffusion, and
+    creates descriptive images with shorter prompts and generate words within images.
+  created_date: 2023-07-26
+  url: https://stability.ai/stablediffusion
+  dependencies: []
+  adaptation: ''
+  output_space: image
+  quality_control: ''
+  access: limited
+  license: MIT
+  terms_of_service: ''
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
+  user_distribution: ''
+  failures: ''
diff --git a/assets/stanford.yaml b/assets/stanford.yaml
index 37f02528..f54d0d1e 100644
--- a/assets/stanford.yaml
+++ b/assets/stanford.yaml
@@ -1,77 +1,74 @@
 ---
-- access: open
-  analysis: ''
-  created_date: 2022-12-15
-  dependencies: [The Pile]
-  description: ''
-  feedback: ''
-  intended_uses: ''
-  license: bigscience-bloom-rail-1.0
-  modality:
-    explanation: text
-    value: text; text
-  model_card: ''
-  monitoring: ''
+
+- type: model
   name: BioMedLM
   organization: Stanford
-  prohibited_uses: ''
-  quality_control: ''
-  size: 2.7B parameters (dense)
+  description: ''
+  created_date: 2022-12-15
+  url: https://crfm.stanford.edu/2022/12/15/pubmedgpt.html
+  model_card: ''
+  modality: text
+  analysis: ''
+  size: 2.7B parameters (dense model)
+  dependencies: [The Pile]
   training_emissions: ''
-  training_hardware: ''
   training_time: ''
-  type: model
-  url: https://crfm.stanford.edu/2022/12/15/pubmedgpt.html
-- access: open
-  analysis: Evaluated on own framework that tests domain-specific tasks in medical
-    field.
-  created_date: 2022-11-23
-  dependencies: [Stable Diffusion, RoentGen radiology dataset]
-  description: RoentGen is a generative medical imaging model that can create visually
-    convincing X-ray images.
-  feedback: ''
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license: bigscience-bloom-rail-1.0
   intended_uses: ''
-  license: ''
-  modality:
-    explanation: text input and image output
-    value: text; image
-  model_card: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: RoentGen
   organization: Stanford
-  prohibited_uses: ''
-  quality_control: ''
-  size: 330M parameters (dense)
+  description: RoentGen is a generative medical imaging model that can create visually
+    convincing X-ray images.
+  created_date: 2022-11-23
+  url: https://arxiv.org/pdf/2211.12737.pdf
+  model_card: ''
+  modality: text input and image output
+  analysis: Evaluated on own framework that tests domain-specific tasks in medical
+    field.
+  size: 330M parameters
+  dependencies: [Stable Diffusion, RoentGen radiology dataset]
   training_emissions: unknown
-  training_hardware: 64 A100 GPUs
   training_time: 60k training steps per day
-  type: model
-  url: https://arxiv.org/pdf/2211.12737.pdf
-- access: open
-  analysis: Evaluated on three physical control tasks, drawing, steering, and human
-    body movement on various dynamics
-  created_date: 2023-06-12
-  dependencies: [GPT-2, BABEL, text-davinci-003]
-  description: Model trained to generate language corrections for physical control
-    tasks.
-  feedback: ''
+  training_hardware: 64 A100 GPUs
+  quality_control: ''
+  access: open
+  license: ''
   intended_uses: ''
-  license: MIT
-  modality:
-    explanation: trajectory input with text output
-    value: other; text
-  model_card: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: CORGI
   organization: Stanford
-  prohibited_uses: ''
-  quality_control: ''
-  size: 124M parameters (dense)
+  description: Model trained to generate language corrections for physical control
+    tasks.
+  created_date: 2023-06-12
+  url: https://arxiv.org/pdf/2306.07012.pdf
+  model_card: ''
+  modality: trajectory input with text output
+  analysis: Evaluated on three physical control tasks, drawing, steering, and human
+    body movement on various dynamics
+  size: 124M parameters
+  dependencies: [GPT-2, BABEL, text-davinci-003]
   training_emissions: ''
-  training_hardware: one NVIDIA A40 GPU
   training_time:
+    value: unknown
     explanation: The authors do not report the training time, but do report that
       they train for 200 epochs.
-    value: unknown
-  type: model
-  url: https://arxiv.org/pdf/2306.07012.pdf
+  training_hardware: one NVIDIA A40 GPU
+  quality_control: ''
+  access: open
+  license: MIT
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/stonybrook.yaml b/assets/stonybrook.yaml
index a160a186..ce4a4657 100644
--- a/assets/stonybrook.yaml
+++ b/assets/stonybrook.yaml
@@ -1,51 +1,62 @@
 ---
-- access:
-    explanation: The dataset is available for download on the [[SBU Captions Dataset
-      website]](https://www.cs.rice.edu/~vo9/sbucaptions/), along with additional
-      resources.
-    value: open
-  analysis:
-    explanation: See [[Section 5]](https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf)
-    value: "Authors perform two quantitative evaluations for image captioning -\
-      \ direct user ratings of relevance and BLEU score. They also propose a new\
-      \ evaluation task: \"we propose a new evaluation task where a user is presented\
-      \ with two photographs and one caption. The user must assign the caption to\
-      \ the most relevant image. For evaluation we use a query image, a random image\
-      \ and a generated caption.\"\n"
+
+- type: dataset
+  name: SBU Captions
+  # General
+  organization: Stony Brook University
+  description: >
+    SBU Captions Dataset is a collection of 1 million images and associated captions
+    from Flickr, filtered so that the descriptions are likely to refer to visual
+    content.
   created_date:
-    explanation: "The date the [[paper]](https://papers.nips.cc/paper_files/paper/2011/hash/5dd9db5e033da9c6fb5ba83c7a7ebea9-Abstract.html)\
-      \ was published.\n"
     value: 2011-12-12
+    explanation: >
+      The date the [[paper]](https://papers.nips.cc/paper_files/paper/2011/hash/5dd9db5e033da9c6fb5ba83c7a7ebea9-Abstract.html)
+      was published.
+  url: https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf
   datasheet: none
+  modality: text (English) and image
+  size: 1M image-text pairs
+  sample: []
+  analysis:
+    value: >
+      Authors perform two quantitative evaluations for image captioning - direct
+      user
+      ratings of relevance and BLEU score. They also propose a new evaluation task:
+      "we propose a new evaluation task where a user is presented with two photographs
+      and one caption. The user must assign the caption to the most relevant image.
+      For evaluation we use a query image, a random image and a generated caption."
+    explanation: See [[Section 5]](https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf)
+  # Construction
   dependencies: [Flickr]
-  description: "SBU Captions Dataset is a collection of 1 million images and associated\
-    \ captions from Flickr, filtered so that the descriptions are likely to refer\
-    \ to visual content.\n"
-  excluded:
-    explanation: See [[Section 2]](https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf)
-    value: "\"This produces a very large, but noisy initial set of photographs with\
-      \ associated text. We filter this set of photos so that the descriptions attached\
-      \ to a picture are relevant and visually descriptive.\"\n"
-  feedback: ''
+  license:
+    value: none
+    explanation: No license was found.
   included:
+    value: >
+      "To encourage visual descriptiveness in our collection, we select only those
+      images with descriptions of satisfactory length based on observed lengths
+      in
+      visual descriptions. We also enforce that retained descriptions contain at
+      least
+      2 words belonging to our term lists and at least one prepositional word, e.g.
+      “on”, “under” which often indicate visible spatial relationships."
     explanation: See [[Section 2]](https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf)
-    value: "\"To encourage visual descriptiveness in our collection, we select only\
-      \ those images with descriptions of satisfactory length based on observed\
-      \ lengths in visual descriptions. We also enforce that retained descriptions\
-      \ contain at least 2 words belonging to our term lists and at least one prepositional\
-      \ word, e.g. “on”, “under” which often indicate visible spatial relationships.\"\
-      \n"
+  excluded:
+    value: >
+      "This produces a very large, but noisy initial set of photographs with associated
+      text. We filter this set of photos so that the descriptions attached to a
+      picture
+      are relevant and visually descriptive."
+    explanation: See [[Section 2]](https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf)
+  quality_control: unknown
+  # Downstream
+  access:
+    value: open
+    explanation: The dataset is available for download on the [[SBU Captions Dataset
+      website]](https://www.cs.rice.edu/~vo9/sbucaptions/), along with additional
+      resources.
   intended_uses: ''
-  license:
-    explanation: No license was found.
-    value: none
-  modality: {}
-  monitoring: none
-  name: SBU Captions
-  organization: Stony Brook University
   prohibited_uses: ''
-  quality_control: unknown
-  sample: []
-  size: 1M image-text pairs
-  type: dataset
-  url: https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf
+  monitoring: none
+  feedback: ''
diff --git a/assets/suno.yaml b/assets/suno.yaml
index 5e4e59e6..6e277fb6 100644
--- a/assets/suno.yaml
+++ b/assets/suno.yaml
@@ -1,25 +1,24 @@
 ---
-- access: open
-  analysis: ''
-  created_date: 2023-04-20
-  dependencies: [AudioLM]
+
+- type: model
+  name: Bark
+  organization: Suno
   description: Bark is a text-to-audio model that can generate multilingual speech
     as well as other noises.
-  feedback: https://huggingface.co/spaces/suno/bark/discussions
-  intended_uses: ''
-  license: MIT
-  modality:
-    explanation: text input with audio output
-    value: text; audio
+  created_date: 2023-04-20
+  url: https://github.com/suno-ai/bark
   model_card: https://github.com/suno-ai/bark/blob/main/model-card.md
-  monitoring: ''
-  name: Bark
-  organization: Suno
-  prohibited_uses: ''
-  quality_control: ''
+  modality: text input with audio output
+  analysis: ''
   size: ''
+  dependencies: [AudioLM]
   training_emissions: unknown
-  training_hardware: ''
   training_time: unknown
-  type: model
-  url: https://github.com/suno-ai/bark
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license: MIT
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/spaces/suno/bark/discussions
diff --git a/assets/together.yaml b/assets/together.yaml
index 1ec0ab7c..2bb3cb97 100644
--- a/assets/together.yaml
+++ b/assets/together.yaml
@@ -1,134 +1,134 @@
 ---
-- access: open
-  analysis: ''
-  created_date: 2022-11-29
-  dependencies: [GPT-J, P3, NaturalInstructions-v2]
-  description: ''
-  feedback: ''
-  intended_uses: ''
-  license: Apache 2.0
-  modality:
-    explanation: text
-    value: text; text
-  model_card: ''
-  monitoring: ''
+
+- type: model
   name: GPT-JT
   organization: Together
-  prohibited_uses: ''
-  quality_control: ''
-  size: 6B parameters (dense)
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
+  description: ''
+  created_date: 2022-11-29
   url: https://www.together.xyz/blog/releasing-v1-of-gpt-jt-powered-by-open-source-ai
-- access: open
+  model_card: ''
+  modality: text
   analysis: ''
-  created_date: 2023-03-10
-  dependencies: [GPT-NeoX, OIG-43M]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  size: 6B parameters (dense model)
+  dependencies: [GPT-J, P3, NaturalInstructions-v2]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: open
   license: Apache 2.0
-  modality:
-    explanation: text
-    value: text; text
-  model_card: ''
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: GPT-NeoXT-Chat-Base
   organization: Together
-  prohibited_uses: ''
-  quality_control: ''
-  size: 20B parameters (dense)
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
+  description: ''
+  created_date: 2023-03-10
   url: https://www.together.xyz/blog/openchatkit
-- access: open
+  model_card: ''
+  modality: text
   analysis: ''
-  created_date: 2023-03-10
-  dependencies: [GPT-JT, OIG-moderation]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  size: 20B parameters (dense model)
+  dependencies: [GPT-NeoX, OIG-43M]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: open
   license: Apache 2.0
-  modality:
-    explanation: text
-    value: text; text
-  model_card: ''
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: OpenChatKit moderation model
   organization: Together
-  prohibited_uses: ''
-  quality_control: ''
-  size: 6B parameters (dense)
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
+  description: ''
+  created_date: 2023-03-10
   url: https://www.together.xyz/blog/openchatkit
-- access: open
+  model_card: ''
+  modality: text
   analysis: ''
-  created_date: 2023-03-10
-  datasheet: ''
-  dependencies: [P3, NaturalInstructions-v2, FLAN dataset]
-  description: ''
-  excluded: ''
-  feedback: ''
-  included: ''
-  intended_uses: ''
+  size: 6B parameters (dense model)
+  dependencies: [GPT-JT, OIG-moderation]
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: open
   license: Apache 2.0
-  modality: text
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: dataset
   name: OIG-43M
   organization: Together, LAION, Ontocord
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: 43M instructions
-  type: dataset
-  url: https://laion.ai/blog/oig-dataset/
-- access: open
-  analysis: ''
+  description: ''
   created_date: 2023-03-10
+  url: https://laion.ai/blog/oig-dataset/
   datasheet: ''
-  dependencies: []
-  description: ''
-  excluded: ''
-  feedback: ''
+  modality: text
+  size: 43M instructions
+  sample: []
+  analysis: ''
+  dependencies: [P3, NaturalInstructions-v2, FLAN dataset]
   included: ''
-  intended_uses: ''
+  excluded: ''
+  quality_control: ''
+  access: open
   license: Apache 2.0
-  modality: text
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: dataset
   name: OIG-moderation
   organization: Together, LAION, Ontocord
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: unknown
-  type: dataset
+  description: ''
+  created_date: 2023-03-10
   url: https://laion.ai/blog/oig-dataset/
-- access: open
+  datasheet: ''
+  modality: text
+  size: unknown
+  sample: []
   analysis: ''
-  created_date: 2022-04-17
-  datasheet: https://huggingface.co/datasets/togethercomputer/RedPajama-Data-1T
-  dependencies: [GitHub, Wikipedia]
-  description: The RedPajama base dataset is a 1.2 trillion token fully-open dataset
-    created by following the recipe described in the LLaMA paper
-  excluded: ''
-  feedback: ''
+  dependencies: []
   included: ''
-  intended_uses: ''
+  excluded: ''
+  quality_control: ''
+  access: open
   license: Apache 2.0
-  modality: text
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: dataset
   name: RedPajama-Data
   organization: Together
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: 1.2 trillion tokens
-  type: dataset
+  description: The RedPajama base dataset is a 1.2 trillion token fully-open dataset
+    created by following the recipe described in the LLaMA paper
+  created_date: 2022-04-17
   url: https://www.together.xyz/blog/redpajama
+  datasheet: https://huggingface.co/datasets/togethercomputer/RedPajama-Data-1T
+  modality: text
+  size: 1.2 trillion tokens
+  sample: []
+  analysis: ''
+  dependencies: [GitHub, Wikipedia]
+  included: ''
+  excluded: ''
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/transformify.yaml b/assets/transformify.yaml
index 59a59c34..118b559f 100644
--- a/assets/transformify.yaml
+++ b/assets/transformify.yaml
@@ -1,22 +1,23 @@
 ---
-- access: open
-  adaptation: ''
-  created_date: 2023-05-30
-  dependencies: [GPT-4]
-  description: Transformify Automate is a platform for automated task integration
-    using natural language prompts.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
-  license: ''
-  monitoring: ''
-  monthly_active_users: ''
+
+- type: application
   name: Transformify Automate
   organization: Transformify
+  description: Transformify Automate is a platform for automated task integration
+    using natural language prompts.
+  created_date: 2023-05-30
+  url: https://www.transformify.ai/automate
+  dependencies: [GPT-4]
+  adaptation: ''
   output_space: text and code
-  prohibited_uses: ''
   quality_control: ''
+  access: open
+  license: ''
   terms_of_service: https://www.transformify.ai/legal-stuff
-  type: application
-  url: https://www.transformify.ai/automate
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
   user_distribution: ''
+  failures: ''
diff --git a/assets/trevor.yaml b/assets/trevor.yaml
index 02126721..9c21533d 100644
--- a/assets/trevor.yaml
+++ b/assets/trevor.yaml
@@ -1,38 +1,50 @@
 ---
-- access:
-    explanation: "The tool can only be accessed by the Trevor Project will be counsolors\
-      \ for training purposes.\n"
-    value: closed
-  adaptation:
-    explanation: "The base GPT-2 model was fine-tuned on the previous conversations\
-      \ between new trainees and the Trevor Project counselors posing to be a fictional\
-      \ persona in crisis.\n"
-    value: Fine-tuning
+
+- type: application
+  name: Crisis Contact Simulator
+  # General
+  organization: The Trevor Project
+  description: >
+    Crisis Contact Simulator, developed as part of a collaboration with
+    Google.org, helps train The Trevor Project counselors by
+    mimicking to be a teen in crisis. Crisis Contact Simulator is used
+    as part of the training programs for the Trevor Project's 24/7 digital
+    crisis services that supports LGBTQ youth
+    [[Trevor Project Blog]](https://www.thetrevorproject.org/blog/the-trevor-project-launches-new-ai-tool-to-support-crisis-counselor-training/).
   created_date:
-    explanation: "The release date of the blog post introducing Crisis Contact Simulator\
-      \ [[Trevor Project Blog]](https://www.thetrevorproject.org/blog/the-trevor-project-launches-new-ai-tool-to-support-crisis-counselor-training/).\n"
     value: 2021-03-24
+    explanation: >
+      The release date of the blog post introducing Crisis Contact Simulator
+      [[Trevor Project Blog]](https://www.thetrevorproject.org/blog/the-trevor-project-launches-new-ai-tool-to-support-crisis-counselor-training/).
+  url: https://www.thetrevorproject.org/
+  # Construction
   dependencies: [OpenAI API]
-  description: "Crisis Contact Simulator, developed as part of a collaboration with\
-    \ Google.org, helps train The Trevor Project counselors by mimicking to be a\
-    \ teen in crisis. Crisis Contact Simulator is used as part of the training programs\
-    \ for the Trevor Project's 24/7 digital crisis services that supports LGBTQ\
-    \ youth [[Trevor Project Blog]](https://www.thetrevorproject.org/blog/the-trevor-project-launches-new-ai-tool-to-support-crisis-counselor-training/).\n"
-  failures: unknown
-  feedback: unknown
-  intended_uses: "Training counselors\n"
-  license: unknown
-  monitoring: unknown
-  monthly_active_users: unknown
-  name: Crisis Contact Simulator
-  organization: The Trevor Project
+  adaptation:
+    value: Fine-tuning
+    explanation: >
+      The base GPT-2 model was fine-tuned on the previous conversations between
+      new trainees and the Trevor Project counselors posing to be a fictional
+      persona in crisis.
   output_space: Dialogue
-  prohibited_uses: unknown
   quality_control:
-    explanation: "The tool can only be accessed by the Trevor Project will be counsolors\
-      \ for training purposes.\n"
     value: Limited release
+    explanation: >
+      The tool can only be accessed by the Trevor Project will be counsolors
+      for training purposes.
+  # Downstream
+  access:
+    value: closed
+    explanation: >
+      The tool can only be accessed by the Trevor Project will be counsolors
+      for training purposes.
+  license: unknown
   terms_of_service: unknown
-  type: application
-  url: https://www.thetrevorproject.org/
+  intended_uses: >
+    Training counselors
+  prohibited_uses: unknown
+  monitoring: unknown
+  feedback: unknown
+  # Deployment
+  monthly_active_users: unknown
   user_distribution: unknown
+  failures: unknown
diff --git a/assets/tsinghua.yaml b/assets/tsinghua.yaml
index c8b21ee6..38fbaa3a 100644
--- a/assets/tsinghua.yaml
+++ b/assets/tsinghua.yaml
@@ -1,149 +1,148 @@
 ---
-- access:
-    explanation: Model weights are available but gated via an [[application form]](https://models.aminer.cn/codegeex/download/request)
-    value: limited
-  analysis: none
+
+# models
+- type: model
+  name: CodeGeeX
+  organization: Tsinghua
+  description: CodeGeeX is an autoregressive language model trained on code
   created_date: 2022-09-20
+  url: https://github.com/THUDM/CodeGeeX
+  model_card: none
+  modality: code
+  analysis: none
+  size: 13B parameters (dense model)
   dependencies: []
-  description: CodeGeeX is an autoregressive language model trained on code
-  feedback: none
-  intended_uses: none
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: THUDM 1536 Ascend 910 (32GB) Cluster
+  quality_control: none
+  access:
+    value: limited
+    explanation: Model weights are available but gated via an [[application form]](https://models.aminer.cn/codegeex/download/request)
   license:
-    explanation: The license is provided in the [[Github repository]](https://github.com/THUDM/CodeGeeX)
     value: Apache 2.0
-  modality:
-    explanation: code
-    value: code; code
-  model_card: none
+    explanation: The license is provided in the [[Github repository]](https://github.com/THUDM/CodeGeeX)
+  intended_uses: none
+  prohibited_uses: none
   monitoring: none
-  name: CodeGeeX
+  feedback: none
+
+- type: model
+  name: CogView
   organization: Tsinghua
-  prohibited_uses: none
-  quality_control: none
-  size: 13B parameters (dense)
-  training_emissions: unknown
-  training_hardware: THUDM 1536 Ascend 910 (32GB) Cluster
-  training_time: unknown
-  type: model
-  url: https://github.com/THUDM/CodeGeeX
-- access:
-    explanation: Model checkpoints available from [[Wudao-Wenhui]](https://resource.wudaoai.cn/home?ind=2&name=WuDao%20WenHui&id=1399364355975327744)
-    value: open
-  analysis: ''
+  description: CogView is a transformer model for text-to-image generation
   created_date:
-    explanation: The date the model paper was released
     value: 2021-05-26
-  dependencies: []
-  description: CogView is a transformer model for text-to-image generation
-  feedback: ''
-  intended_uses: ''
-  license:
-    explanation: "The license is provided in the [[Github repository]](https://github.com/THUDM/CogView)\n"
-    value: Apache 2.0
-  modality:
-    explanation: text (Chinese) and image
-    value: image, text; image, text
+    explanation: The date the model paper was released
+  url: https://arxiv.org/abs/2105.13290
   model_card: none
-  monitoring: ''
-  name: CogView
-  organization: Tsinghua
-  prohibited_uses: ''
-  quality_control: ''
-  size: 4B parameters (dense)
+  modality: text (Chinese) and image
+  size: 4B parameters
+  analysis: ''
+  dependencies: []
   training_emissions: ''
-  training_hardware: ''
   training_time: ''
-  type: model
-  url: https://arxiv.org/abs/2105.13290
-- access:
-    explanation: The model checkpoints are available for download from [[BAAI]](https://model.baai.ac.cn/model-detail/100041)
+  training_hardware: ''
+  quality_control: ''
+  access:
     value: open
-  analysis: ''
-  created_date:
-    explanation: The date the model paper was released
-    value: 2022-04-28
-  dependencies: []
-  description: CogView 2 is a hierarchical transformer for text-to-image generation
-  feedback: ''
-  intended_uses: ''
+    explanation: Model checkpoints available from [[Wudao-Wenhui]](https://resource.wudaoai.cn/home?ind=2&name=WuDao%20WenHui&id=1399364355975327744)
   license:
-    explanation: "The license is provided in the [[Github repository]](https://github.com/THUDM/CogView2)\n"
     value: Apache 2.0
-  modality:
-    explanation: text (Chinese, English) and image
-    value: image, text; image, text
-  model_card: none
+    explanation: >
+      The license is provided in the [[Github repository]](https://github.com/THUDM/CogView)
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: CogView 2
   organization: Tsinghua
-  prohibited_uses: ''
-  quality_control: ''
-  size: 6B parameters (dense)
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
-  url: https://arxiv.org/abs/2204.14217
-- access:
-    explanation: Model checkpoints are available for download from https://github.com/THUDM/CogVideo
-    value: open
-  analysis: ''
+  description: CogView 2 is a hierarchical transformer for text-to-image generation
   created_date:
+    value: 2022-04-28
     explanation: The date the model paper was released
-    value: 2022-05-29
+  url: https://arxiv.org/abs/2204.14217
+  model_card: none
+  modality: text (Chinese, English) and image
+  size: 6B parameters
+  analysis: ''
   dependencies: []
-  description: CogVideo is a transformer model for text-to-video generation
-  feedback: ''
-  intended_uses: ''
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access:
+    value: open
+    explanation: The model checkpoints are available for download from [[BAAI]](https://model.baai.ac.cn/model-detail/100041)
   license:
-    explanation: "The license is provided in the [[Github repository]](https://github.com/THUDM/CogVideo)\n"
     value: Apache 2.0
-  modality:
-    explanation: text (Chinese) and video
-    value: text, video; text, video
-  model_card: none
+    explanation: >
+      The license is provided in the [[Github repository]](https://github.com/THUDM/CogView2)
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
+
+- type: model
   name: CogVideo
   organization: Tsinghua
-  prohibited_uses: ''
-  quality_control: ''
+  description: CogVideo is a transformer model for text-to-video generation
+  created_date:
+    value: 2022-05-29
+    explanation: The date the model paper was released
+  url: https://arxiv.org/abs/2205.15868
+  model_card: none
+  modality: text (Chinese) and video
   size: unknown
+  analysis: ''
+  dependencies: []
   training_emissions: ''
-  training_hardware: ''
   training_time: ''
-  type: model
-  url: https://arxiv.org/abs/2205.15868
-- access:
-    explanation: Model checkpoints are available from the [[GitHub repository]](https://github.com/THUDM/GLM-130B/blob/main/MODEL_LICENSE)
+  training_hardware: ''
+  quality_control: ''
+  access:
     value: open
-  analysis: ''
+    explanation: Model checkpoints are available for download from https://github.com/THUDM/CogVideo
+  license:
+    value: Apache 2.0
+    explanation: >
+      The license is provided in the [[Github repository]](https://github.com/THUDM/CogVideo)
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+
+- type: model
+  name: GLM-130B
+  organization: Tsinghua
+  description: GLM-130B is a bidirectional language model trained on English and
+    Chinese
   created_date:
-    explanation: The date the model website was made public
     value: 2022-08-04
+    explanation: The date the model website was made public
+  url: https://keg.cs.tsinghua.edu.cn/glm-130b/
+  model_card: none
+  modality: text (Chinese | English)
+  size: 130B parameters (dense model)
+  analysis: ''
   dependencies:
     - The Pile
     - GLM-130B Chinese corpora
     - P3
     - DeepStruct finetuning dataset
-  description: GLM-130B is a bidirectional language model trained on English and
-    Chinese
-  feedback: ''
-  intended_uses: ''
+  training_emissions: ''
+  training_time: ''
+  training_hardware: THUDM 96 DGX-A100 (40G) cluster
+  quality_control: ''
+  access:
+    value: open
+    explanation: Model checkpoints are available from the [[GitHub repository]](https://github.com/THUDM/GLM-130B/blob/main/MODEL_LICENSE)
   license:
-    explanation: Unique model license. See the [[GitHub repository]](https://github.com/THUDM/GLM-130B/blob/main/MODEL_LICENSE)
     value: GLM-130B License
-  modality:
-    explanation: text (Chinese | English)
-    value: text; text
-  model_card: none
-  monitoring: ''
-  name: GLM-130B
-  organization: Tsinghua
+    explanation: Unique model license. See the [[GitHub repository]](https://github.com/THUDM/GLM-130B/blob/main/MODEL_LICENSE)
+  intended_uses: ''
   prohibited_uses: ''
-  quality_control: ''
-  size: 130B parameters (dense)
-  training_emissions: ''
-  training_hardware: THUDM 96 DGX-A100 (40G) cluster
-  training_time: ''
-  type: model
-  url: https://keg.cs.tsinghua.edu.cn/glm-130b/
+  monitoring: ''
+  feedback: ''
diff --git a/assets/uae.yaml b/assets/uae.yaml
index 9ed62419..127d82d0 100644
--- a/assets/uae.yaml
+++ b/assets/uae.yaml
@@ -1,50 +1,50 @@
 ---
-- access: open
-  analysis: ''
-  created_date: 2023-06-14
-  dependencies: [RefinedWeb]
+
+- type: model
+  name: Falcon
+  organization: UAE Technology Innovation Institute
   description: Falcon-40B is a 40B parameters causal decoder-only model built by
     TII and trained on 1,000B tokens of RefinedWeb enhanced with curated corpora.
-  feedback: https://huggingface.co/tiiuae/falcon-40b/discussions
-  intended_uses: ''
-  license: Apache 2.0
-  modality:
-    explanation: natural language text
-    value: text; text
+  created_date: 2023-06-14
+  url: https://huggingface.co/tiiuae/falcon-40b
   model_card: https://huggingface.co/tiiuae/falcon-40b
-  monitoring: None
-  name: Falcon
-  organization: UAE Technology Innovation Institute
-  prohibited_uses: irresponsible or harmful use or production use without adequate
-    assessment of risks and mitigation
-  quality_control: ''
-  size: 40B parameters (dense)
+  modality: natural language text
+  analysis: ''
+  size: 40B parameters
+  dependencies: [RefinedWeb]
   training_emissions: ''
-  training_hardware: 384 A100 40GB GPUs
   training_time: 2 months
-  type: model
-  url: https://huggingface.co/tiiuae/falcon-40b
-- access: open
-  analysis: ''
+  training_hardware: 384 A100 40GB GPUs
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: irresponsible or harmful use or production use without adequate
+    assessment of risks and mitigation
+  monitoring: None
+  feedback: https://huggingface.co/tiiuae/falcon-40b/discussions
+
+- type: dataset
+  name: RefinedWeb
+  organization: UAE Technology Innovation Institute
+  description: RefinedWeb is a high-quality five trillion tokens web-only English
+    pretraining dataset.
   created_date: 2023-06-01
+  url: https://arxiv.org/pdf/2306.01116.pdf
   datasheet: https://huggingface.co/datasets/tiiuae/falcon-refinedweb
+  modality: text
+  size: 600B tokens
+  sample: []
+  analysis: ''
   dependencies: []
-  description: RefinedWeb is a high-quality five trillion tokens web-only English
-    pretraining dataset.
-  excluded: ''
-  feedback: ''
   included: ''
-  intended_uses: ''
+  excluded: ''
+  quality_control: ''
+  access: open
   license:
-    explanation: License can be found at https://huggingface.co/datasets/tiiuae/falcon-refinedweb
     value: unknown
-  modality: text
-  monitoring: ''
-  name: RefinedWeb
-  organization: UAE Technology Innovation Institute
+    explanation: License can be found at https://huggingface.co/datasets/tiiuae/falcon-refinedweb
+  intended_uses: ''
   prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: 600B tokens
-  type: dataset
-  url: https://arxiv.org/pdf/2306.01116.pdf
+  monitoring: ''
+  feedback: ''
diff --git a/assets/uw.yaml b/assets/uw.yaml
index 15d2832c..29a49c7d 100644
--- a/assets/uw.yaml
+++ b/assets/uw.yaml
@@ -1,22 +1,23 @@
 ---
-- access: open
-  analysis: ''
+
+- type: dataset
+  name: YT-Temporal-1B
+  organization: University of Washington
+  description: ''
   created_date: 2022-01-07
+  url: https://arxiv.org/abs/2201.02639
   datasheet: ''
+  modality: video
+  size: 20M videos
+  sample: []
+  analysis: ''
   dependencies: [YouTube]
-  description: ''
-  excluded: ''
-  feedback: ''
   included: ''
-  intended_uses: ''
+  excluded: ''
+  quality_control: ''
+  access: open
   license: MIT
-  modality: video
-  monitoring: ''
-  name: YT-Temporal-1B
-  organization: University of Washington
+  intended_uses: ''
   prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: 20M videos
-  type: dataset
-  url: https://arxiv.org/abs/2201.02639
+  monitoring: ''
+  feedback: ''
diff --git a/assets/uwashington.yaml b/assets/uwashington.yaml
index aded82ab..32208f00 100644
--- a/assets/uwashington.yaml
+++ b/assets/uwashington.yaml
@@ -1,27 +1,26 @@
 ---
-- access: open
-  analysis: Reports results on the Vicuna benchmark and compares performance level
-    and time expenditure with ChatGPT
-  created_date: 2023-05-23
-  dependencies: [QLoRA, OASST1]
+
+- type: model
+  name: Guanaco
+  organization: University of Washington
   description: Guanaco is a model family trained with QLORA, an efficient finetuning
     approach that reduces memory usage enough to finetune a 65B parameter model
     on a single 48GB GPU while preserving full 16-bit finetuning task performance.
-  feedback: ''
-  intended_uses: ''
-  license: MIT
-  modality:
-    explanation: natural language text
-    value: text; text
+  created_date: 2023-05-23
+  url: https://arxiv.org/pdf/2305.14314v1.pdf
   model_card: ''
-  monitoring: ''
-  name: Guanaco
-  organization: University of Washington
-  prohibited_uses: ''
-  quality_control: ''
-  size: 33B parameters (dense)
+  modality: natural language text
+  analysis: Reports results on the Vicuna benchmark and compares performance level
+    and time expenditure with ChatGPT
+  size: 33B parameters
+  dependencies: [QLoRA, OASST1]
   training_emissions: ''
-  training_hardware: A single 24 GB GPU
   training_time: ''
-  type: model
-  url: https://arxiv.org/pdf/2305.14314v1.pdf
+  training_hardware: A single 24 GB GPU
+  quality_control: ''
+  access: open
+  license: MIT
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
diff --git a/assets/viable.yaml b/assets/viable.yaml
index 35ab68cd..c29dce6e 100644
--- a/assets/viable.yaml
+++ b/assets/viable.yaml
@@ -1,28 +1,38 @@
 ---
-- access: limited
-  adaptation: unknown
-  created_date: unknown
-  dependencies: [OpenAI API]
-  description: "Viable analyzes qualitative consumer feedback and provides summary\
-    \ feedback to companies.\n"
-  failures: unknown
-  feedback:
-    explanation: "The feedback mechanism isn't explicitly listed. Company contact\
-      \ information is provided in the Terms of Service [[Terms of Service]](https://www.askviable.com/terms-of-service).\n"
-    value: unknown
-  intended_uses: "Intended to be used by companies to digest qualitative consumer\
-    \ feedback.\n"
-  license: unknown
-  monitoring: unknown
-  monthly_active_users: unknown
+
+- type: application
   name: Viable
+  # General
   organization: Viable
+  description: >
+    Viable analyzes qualitative consumer feedback and provides summary feedback
+    to companies.
+  created_date: unknown
+  url: https://www.askviable.com/
+  # Construction
+  dependencies: [OpenAI API]
+  adaptation: unknown
   output_space: Question and answer, summarization, sentiment analysis, topic identification
-  prohibited_uses: "Prohibited uses are listed in the Terms of Service [[Terms of\
-    \ Service]](https://www.askviable.com/terms-of-service). The terms don't include\
-    \ statements specific to the use of the content generated by the system or GPT-3.\n"
   quality_control: unknown
+  # Downstream
+  access: limited
+  license: unknown
   terms_of_service: https://www.askviable.com/terms-of-service
-  type: application
-  url: https://www.askviable.com/
+  intended_uses: >
+    Intended to be used by companies to digest qualitative consumer feedback.
+  prohibited_uses: >
+    Prohibited uses are listed in the Terms of Service
+    [[Terms of Service]](https://www.askviable.com/terms-of-service).
+    The terms don't include statements specific to the use of the content
+    generated by the system or GPT-3.
+  monitoring: unknown
+  feedback:
+    value: unknown
+    explanation: >
+      The feedback mechanism isn't explicitly listed. Company contact
+      information is provided in the Terms of Service
+      [[Terms of Service]](https://www.askviable.com/terms-of-service).
+  # Deployment
+  monthly_active_users: unknown
   user_distribution: unknown
+  failures: unknown
diff --git a/assets/writer.yaml b/assets/writer.yaml
index 9879f42f..a56591b9 100644
--- a/assets/writer.yaml
+++ b/assets/writer.yaml
@@ -1,55 +1,53 @@
 ---
-- access: open
-  analysis: Evaluated on the SuperGLUE benchmark
+
+- type: model
+  name: Palmyra
+  organization: Writer
+  description: Palmyra is a privacy-first LLM for enterprises trained on business
+    and marketing writing.
   created_date:
+    value: 2023-01-01
     explanation: The model was stated to be published in January, but which day
       is not specified on the website.
-    value: 2023-01-01
+  url: https://gpt3demo.com/apps/palmyra
+  model_card: https://huggingface.co/Writer/palmyra-base
+  modality: text
+  analysis: Evaluated on the SuperGLUE benchmark
+  size: 128M (Small), 5B (Base), 20B (Large) parameters
   dependencies: [Writer dataset]
-  description: Palmyra is a privacy-first LLM for enterprises trained on business
-    and marketing writing.
-  feedback: https://huggingface.co/Writer/palmyra-base/discussions
-  intended_uses: generating text from a prompt
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: ''
+  quality_control: ''
+  access: open
   license: Apache 2.0
-  modality:
-    explanation: text
-    value: text; text
-  model_card: https://huggingface.co/Writer/palmyra-base
+  intended_uses: generating text from a prompt
+  prohibited_uses: ''
   monitoring: ''
-  name: Palmyra
+  feedback: https://huggingface.co/Writer/palmyra-base/discussions
+
+- type: model
+  name: Camel
   organization: Writer
-  prohibited_uses: ''
-  quality_control: ''
-  size: 128M parameters (dense)
-  training_emissions: unknown
-  training_hardware: ''
-  training_time: unknown
-  type: model
-  url: https://gpt3demo.com/apps/palmyra
-- access: open
-  analysis: ''
+  description: Camel is an instruction-following large language model tailored for
+    advanced NLP and comprehension capabilities.
   created_date:
+    value: 2023-04-01
     explanation: The model was stated to be published in April, but which day is
       not specified on the website.
-    value: 2023-04-01
-  dependencies: [Palmyra, Camel dataset]
-  description: Camel is an instruction-following large language model tailored for
-    advanced NLP and comprehension capabilities.
-  feedback: https://huggingface.co/Writer/camel-5b-hf/discussions
-  intended_uses: ''
-  license: Apache 2.0
-  modality:
-    explanation: text
-    value: text; text
+  url: https://chatcamel.vercel.app/
   model_card: https://huggingface.co/Writer/camel-5b-hf
-  monitoring: ''
-  name: Camel
-  organization: Writer
-  prohibited_uses: ''
-  quality_control: ''
-  size: 5B parameters (dense)
+  modality: text
+  analysis: ''
+  size: 5B parameters
+  dependencies: [Palmyra, Camel dataset]
   training_emissions: unknown
-  training_hardware: ''
   training_time: unknown
-  type: model
-  url: https://chatcamel.vercel.app/
+  training_hardware: ''
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/Writer/camel-5b-hf/discussions
diff --git a/assets/yandex.yaml b/assets/yandex.yaml
index 0090546b..becdb8e5 100644
--- a/assets/yandex.yaml
+++ b/assets/yandex.yaml
@@ -1,59 +1,63 @@
 ---
-- access: open
-  analysis: ''
+
+- type: model
+  name: YaLM
+  organization: Yandex
+  description: YaLM is a 100B parameter autoregressive model trained on 25% English
+    and 75% Russian text.
   created_date:
-    explanation: The date the blog post about YaLM was published.
     value: 2022-06-22
+    explanation: The date the blog post about YaLM was published.
+  url: https://medium.com/yandex/yandex-publishes-yalm-100b-its-the-largest-gpt-like-neural-network-in-open-source-d1df53d0e9a6
+  model_card: ''
+  modality: text (English, Russian)
+  analysis: ''
+  size: 100B parameters (dense model)
   dependencies: [The Pile, Yandex Russian Pretraining Dataset]
-  description: YaLM is a 100B parameter autoregressive model trained on 25% English
-    and 75% Russian text.
-  feedback: ''
-  intended_uses: ''
+  training_emissions: ''
+  training_time: ''
+  training_hardware: Yandex 800 A100 Cluster
+  quality_control: ''
+  access: open
   license:
-    explanation: ''
     value: Apache 2.0
-  modality:
-    explanation: text (English, Russian)
-    value: text; text
-  model_card: ''
+    explanation: ''
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: YaLM
+  feedback: ''
+
+- type: application
+  name: Yandex Search
   organization: Yandex
-  prohibited_uses: ''
-  quality_control: ''
-  size: 100B parameters (dense)
-  training_emissions: ''
-  training_hardware: Yandex 800 A100 Cluster
-  training_time: ''
-  type: model
-  url: https://medium.com/yandex/yandex-publishes-yalm-100b-its-the-largest-gpt-like-neural-network-in-open-source-d1df53d0e9a6
-- access: open
-  adaptation: ''
-  created_date: 2022-06-23
-  dependencies: [YaLM]
   description: Yandex is a search engine and web portal. Yandex offers internet
     search and other services
-  failures: ''
-  feedback: ''
-  intended_uses: ''
+  created_date: 2022-06-23
+  url: https://medium.com/yandex/yandex-publishes-yalm-100b-its-the-largest-gpt-like-neural-network-in-open-source-d1df53d0e9a6
+  dependencies: [YaLM]
+  adaptation: ''
+  output_space: ''
+  quality_control: ''
+  access: open
   license:
-    explanation: "\"3.1. The Rightholder, on the basis of a simple (non-exclusive)\
-      \ license, shall grant the User a free, worldwide, non-transferable right\
-      \ to use the Software as set forth below:\n3.1.1. To use the Software in accordance\
-      \ with its functionality by copying and installing it on the User's personal\
-      \ device(s). The User shall be entitled to install the Software on any number\
-      \ of personal devices.\n3.1.2. To reproduce and to distribute this Software\
-      \ in an unmodified form and solely for non-commercial purposes (free of charge).\"\
-      \ - excerpt from the Terms of Service document for browsers.\n"
     value: custom
+    explanation: >
+      "3.1. The Rightholder, on the basis of a simple (non-exclusive) license,
+      shall grant the User a free, worldwide, non-transferable right to use the
+      Software as set forth below:
+
+      3.1.1. To use the Software in accordance with its functionality by
+      copying and installing it on the User's personal device(s). The User shall
+      be entitled to install the Software on any number of personal devices.
+
+      3.1.2. To reproduce and to distribute this Software in an unmodified form
+      and solely for non-commercial purposes (free of charge)." - excerpt
+      from the Terms of Service document for browsers.
+  terms_of_service: https://yandex.com/legal/browser_agreement/
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
   monthly_active_users: ''
-  name: Yandex Search
-  organization: Yandex
-  output_space: ''
-  prohibited_uses: ''
-  quality_control: ''
-  terms_of_service: https://yandex.com/legal/browser_agreement/
-  type: application
-  url: https://medium.com/yandex/yandex-publishes-yalm-100b-its-the-largest-gpt-like-neural-network-in-open-source-d1df53d0e9a6
   user_distribution: ''
+  failures: ''
diff --git a/assets/you.yaml b/assets/you.yaml
index f7a1ffe3..60bf2a22 100644
--- a/assets/you.yaml
+++ b/assets/you.yaml
@@ -1,71 +1,77 @@
 ---
-- access: closed
-  analysis: ''
+
+- type: dataset
+  name: You dataset
+  organization: You
+  description: ''
   created_date: unknown
+  url: https://you.com/
   datasheet: ''
+  modality: text
+  size: unknown
+  sample: []
+  analysis: ''
   dependencies: []
-  description: ''
-  excluded: ''
-  feedback: ''
   included: ''
-  intended_uses: ''
+  excluded: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality: {}
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: You dataset
+  feedback: ''
+
+- type: model
+  name: You model
   organization: You
-  prohibited_uses: ''
-  quality_control: ''
-  sample: []
-  size: unknown
-  type: dataset
+  description: ''
+  created_date: unknown
   url: https://you.com/
-- access: closed
+  model_card: ''
+  modality: Text
   analysis: ''
-  created_date: unknown
+  size: unknown
   dependencies: [You dataset]
-  description: ''
-  feedback: ''
-  intended_uses: ''
+  training_emissions: ''
+  training_time: ''
+  training_hardware: ''
+  quality_control: ''
+  access: closed
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
-  modality: {}
-  model_card: ''
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
-  name: You model
+  feedback: ''
+
+- type: application
+  name: You Search
   organization: You
-  prohibited_uses: ''
-  quality_control: ''
-  size: ''
-  training_emissions: ''
-  training_hardware: ''
-  training_time: ''
-  type: model
-  url: https://you.com/
-- access: open
-  adaptation: ''
-  created_date: unknown
-  dependencies: [You model]
   description: You.com is a search engine built on artificial intelligence that
     provides users with a customized search experience while keeping their data
     100% private.
-  failures: ''
-  feedback: ''
-  intended_uses: ''
+  created_date: unknown
+  url: https://you.com/
+  dependencies: [You model]
+  adaptation: ''
+  output_space: ''
+  quality_control: ''
+  access: open
   license:
-    explanation: "The asset isn't released, and hence the license is unknown.\n"
     value: unknown
+    explanation: >
+      The asset isn't released, and hence the license is unknown.
+  terms_of_service: ''
+  intended_uses: ''
+  prohibited_uses: ''
   monitoring: ''
+  feedback: ''
   monthly_active_users: ''
-  name: You Search
-  organization: You
-  output_space: ''
-  prohibited_uses: ''
-  quality_control: ''
-  terms_of_service: ''
-  type: application
-  url: https://you.com/
   user_distribution: ''
+  failures: ''