Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tasks: update widget models, model recommendations #876

Merged
merged 10 commits into from
Sep 5, 2024
14 changes: 9 additions & 5 deletions packages/tasks/src/tasks/audio-classification/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@ const taskData: TaskDataCustom = {
datasets: [
{
description: "A benchmark of 10 different audio tasks.",
id: "superb",
id: "s3prl/superb",
},
{
description: "A dataset of YouTube clips and their sound categories.",
id: "agkphysics/AudioSet",
},
],
demo: {
Expand Down Expand Up @@ -50,16 +54,16 @@ const taskData: TaskDataCustom = {
],
models: [
{
description: "An easy-to-use model for Command Recognition.",
description: "An easy-to-use model for command recognition.",
id: "speechbrain/google_speech_command_xvector",
},
{
description: "An Emotion Recognition model.",
description: "An emotion recognition model.",
id: "ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition",
},
{
description: "A language identification model.",
id: "facebook/mms-lid-126",
id: "speechbrain/lang-id-voxlingua107-ecapa",
merveenoyan marked this conversation as resolved.
Show resolved Hide resolved
},
],
spaces: [
Expand All @@ -70,7 +74,7 @@ const taskData: TaskDataCustom = {
],
summary:
"Audio classification is the task of assigning a label or class to a given audio. It can be used for recognizing which command a user is giving or the emotion of a statement, as well as identifying a speaker.",
widgetModels: ["facebook/mms-lid-126"],
widgetModels: ["speechbrain/lang-id-voxlingua107-ecapa"],
merveenoyan marked this conversation as resolved.
Show resolved Hide resolved
youtubeId: "KWwzcmG98Ds",
};

Expand Down
6 changes: 5 additions & 1 deletion packages/tasks/src/tasks/audio-to-audio/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,11 @@ const taskData: TaskDataCustom = {
},
{
description: "A speech enhancement model.",
id: "speechbrain/metricgan-plus-voicebank",
id: "ResembleAI/resemble-enhance",
Vaibhavs10 marked this conversation as resolved.
Show resolved Hide resolved
},
{
description: "A model that can change the voice in a speech recording.",
id: "microsoft/speecht5_vc",
},
],
spaces: [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ const taskData: TaskDataCustom = {
},
{
description: "An English dataset with 1,000 hours of data.",
id: "librispeech_asr",
id: "openslr/librispeech_asr",
merveenoyan marked this conversation as resolved.
Show resolved Hide resolved
},
{
description: "A multi-lingual audio dataset with 370K hours of audio.",
Expand Down Expand Up @@ -54,6 +54,10 @@ const taskData: TaskDataCustom = {
description: "An end-to-end model that performs ASR and Speech Translation by MetaAI.",
id: "facebook/seamless-m4t-v2-large",
},
{
description: "Powerful speaker diarization model.",
id: "pyannote/speaker-diarization-3.1",
osanseviero marked this conversation as resolved.
Show resolved Hide resolved
},
],
spaces: [
{
Expand Down
10 changes: 7 additions & 3 deletions packages/tasks/src/tasks/document-question-answering/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,15 @@ const taskData: TaskDataCustom = {
],
models: [
{
description: "A LayoutLM model for the document QA task, fine-tuned on DocVQA and SQuAD2.0.",
description: "A robust document question answering model.",
id: "impira/layoutlm-document-qa",
},
{
description: "A special model for OCR-free Document QA task.",
description: "A strong document question answering model specialized in invoices.",
merveenoyan marked this conversation as resolved.
Show resolved Hide resolved
id: "impira/layoutlm-invoices",
},
{
description: "A special model for OCR-free document question answering.",
id: "microsoft/udop-large",
},
{
Expand All @@ -74,7 +78,7 @@ const taskData: TaskDataCustom = {
],
summary:
"Document Question Answering (also known as Document Visual Question Answering) is the task of answering questions on document images. Document question answering models take a (document, question) pair as input and return an answer in natural language. Models usually rely on multi-modal features, combining text, position of words (bounding-boxes) and image.",
widgetModels: ["impira/layoutlm-document-qa"],
widgetModels: ["impira/layoutlm-invoices"],
youtubeId: "",
};

Expand Down
4 changes: 2 additions & 2 deletions packages/tasks/src/tasks/fill-mask/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,11 @@ const taskData: TaskDataCustom = {
models: [
{
description: "A faster and smaller model than the famous BERT model.",
id: "distilbert-base-uncased",
id: "google-bert/bert-base-uncased",
merveenoyan marked this conversation as resolved.
Show resolved Hide resolved
},
{
description: "A multilingual model trained on 100 languages.",
id: "xlm-roberta-base",
id: "FacebookAI/xlm-roberta-base",
},
],
spaces: [],
Expand Down
2 changes: 1 addition & 1 deletion packages/tasks/src/tasks/image-segmentation/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ const taskData: TaskDataCustom = {
],
summary:
"Image Segmentation divides an image into segments where each pixel in the image is mapped to an object. This task has multiple variants such as instance segmentation, panoptic segmentation and semantic segmentation.",
widgetModels: ["facebook/detr-resnet-50-panoptic"],
widgetModels: ["nvidia/segformer-b0-finetuned-ade-512-512"],
youtubeId: "dKE8SIt9C-w",
};

Expand Down
2 changes: 1 addition & 1 deletion packages/tasks/src/tasks/image-to-image/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ const taskData: TaskDataCustom = {
],
summary:
"Image-to-image is the task of transforming a source image to match the characteristics of a target image or a target image domain. Any image manipulation and enhancement is possible with image to image models.",
widgetModels: ["lllyasviel/sd-controlnet-canny"],
widgetModels: ["stabilityai/stable-diffusion-2-inpainting"],
youtubeId: "",
};

Expand Down
2 changes: 1 addition & 1 deletion packages/tasks/src/tasks/image-to-text/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ const taskData: TaskDataCustom = {
],
summary:
"Image to text models output a text from a given image. Image captioning or optical character recognition can be considered as the most common applications of image to text.",
widgetModels: ["Salesforce/blip-image-captioning-base"],
widgetModels: ["Salesforce/blip-image-captioning-large"],
youtubeId: "",
};

Expand Down
6 changes: 5 additions & 1 deletion packages/tasks/src/tasks/question-answering/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,11 @@ const taskData: TaskDataCustom = {
id: "deepset/roberta-base-squad2",
},
{
description: "A special model that can answer questions from tables!",
description: "Small yet robust model that can answer questions.",
id: "distilbert/distilbert-base-cased-distilled-squad",
},
{
description: "A special model that can answer questions from tables.",
id: "google/tapas-base-finetuned-wtq",
},
],
Expand Down
6 changes: 3 additions & 3 deletions packages/tasks/src/tasks/sentence-similarity/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@ const taskData: TaskDataCustom = {
id: "sentence-transformers/all-mpnet-base-v2",
},
{
description: "A multilingual model trained for FAQ retrieval.",
id: "clips/mfaq",
osanseviero marked this conversation as resolved.
Show resolved Hide resolved
description: "A multilingual robust sentence similarity model..",
id: "BAAI/bge-m3",
},
],
spaces: [
Expand All @@ -94,7 +94,7 @@ const taskData: TaskDataCustom = {
],
summary:
"Sentence Similarity is the task of determining how similar two texts are. Sentence similarity models convert input texts into vectors (embeddings) that capture semantic information and calculate how close (similar) they are between them. This task is particularly useful for information retrieval and clustering/grouping.",
widgetModels: ["sentence-transformers/all-MiniLM-L6-v2"],
widgetModels: ["thenlper/gte-large"],
youtubeId: "VCZq5AkbNEU",
merveenoyan marked this conversation as resolved.
Show resolved Hide resolved
};

Expand Down
4 changes: 2 additions & 2 deletions packages/tasks/src/tasks/summarization/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ const taskData: TaskDataCustom = {
},
{
description: "A summarization model trained on medical articles.",
id: "google/bigbird-pegasus-large-pubmed",
id: "Falconsai/medical_summarization",
},
],
spaces: [
Expand All @@ -69,7 +69,7 @@ const taskData: TaskDataCustom = {
],
summary:
"Summarization is the task of producing a shorter version of a document while preserving its important information. Some models can extract text from the original input, while other models can generate entirely new text.",
widgetModels: ["sshleifer/distilbart-cnn-12-6"],
widgetModels: ["facebook/bart-large-cnn"],
youtubeId: "yHnr5Dk2zCI",
};

Expand Down
24 changes: 18 additions & 6 deletions packages/tasks/src/tasks/text-classification/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ const taskData: TaskDataCustom = {
datasets: [
{
description: "A widely used dataset used to benchmark multiple variants of text classification.",
id: "glue",
id: "nyu-mll/glue",
},
{
description: "A text classification dataset used to benchmark natural language inference models",
id: "snli",
id: "stanfordnlp/snli",
},
],
demo: {
Expand Down Expand Up @@ -61,11 +61,23 @@ const taskData: TaskDataCustom = {
models: [
{
description: "A robust model trained for sentiment analysis.",
id: "distilbert-base-uncased-finetuned-sst-2-english",
id: "distilbert/distilbert-base-uncased-finetuned-sst-2-english",
},
{
description: "Multi-genre natural language inference model.",
id: "roberta-large-mnli",
description: "A sentiment analysis model specialized in financial sentiment.",
id: "ProsusAI/finbert",
},
{
description: "A sentiment analysis model specialized in analyzing tweets.",
id: "cardiffnlp/twitter-roberta-base-sentiment-latest",
},
{
description: "A model that can classify languages.",
id: "papluca/xlm-roberta-base-language-detection",
},
{
description: "A model that can classify text generation attacks.",
id: "meta-llama/Prompt-Guard-86M",
},
],
spaces: [
Expand All @@ -84,7 +96,7 @@ const taskData: TaskDataCustom = {
],
summary:
"Text Classification is the task of assigning a label or class to a given text. Some use cases are sentiment analysis, natural language inference, and assessing grammatical correctness.",
widgetModels: ["distilbert-base-uncased-finetuned-sst-2-english"],
widgetModels: ["distilbert/distilbert-base-uncased-finetuned-sst-2-english"],
youtubeId: "leNG9fN9FQU",
};

Expand Down
6 changes: 3 additions & 3 deletions packages/tasks/src/tasks/text-generation/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,8 @@ const taskData: TaskDataCustom = {
id: "HuggingFaceH4/zephyr-chat",
},
{
description: "An text generation application that combines OpenAI and Hugging Face models.",
id: "microsoft/HuggingGPT",
description: "A leaderboard that ranks text generation models based on blind votes from people.",
id: "lmsys/chatbot-arena-leaderboard",
},
{
description: "An chatbot to converse with a very powerful text generation model.",
Expand All @@ -107,7 +107,7 @@ const taskData: TaskDataCustom = {
],
summary:
"Generating text is the task of generating new text given another text. These models can, for example, fill in incomplete text or paraphrase.",
widgetModels: ["HuggingFaceH4/zephyr-7b-beta"],
widgetModels: ["microsoft/Phi-3.5-mini-instruct"],
merveenoyan marked this conversation as resolved.
Show resolved Hide resolved
youtubeId: "e9gNEAlsOvU",
};

Expand Down
2 changes: 1 addition & 1 deletion packages/tasks/src/tasks/text-to-image/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ const taskData: TaskDataCustom = {
],
summary:
"Generates images from input text. These models can be used to generate and modify images based on text prompts.",
widgetModels: ["CompVis/stable-diffusion-v1-4"],
widgetModels: ["black-forest-labs/FLUX.1-dev"],
osanseviero marked this conversation as resolved.
Show resolved Hide resolved
youtubeId: "",
};

Expand Down
8 changes: 6 additions & 2 deletions packages/tasks/src/tasks/text-to-speech/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ const taskData: TaskDataCustom = {
},
{
description: "Multi-speaker English dataset.",
id: "LibriTTS",
id: "mythicinfinity/libritts_r",
},
],
demo: {
Expand Down Expand Up @@ -40,7 +40,11 @@ const taskData: TaskDataCustom = {
},
merveenoyan marked this conversation as resolved.
Show resolved Hide resolved
{
description: "A massively multi-lingual TTS model.",
id: "facebook/mms-tts",
id: "coqui/XTTS-v2",
},
{
description: "Robust TTS model.",
id: "metavoiceio/metavoice-1B-v0.1",
},
{
description: "A prompt based, powerful TTS model.",
Expand Down
15 changes: 12 additions & 3 deletions packages/tasks/src/tasks/token-classification/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@ const taskData: TaskDataCustom = {
datasets: [
{
description: "A widely used dataset useful to benchmark named entity recognition models.",
id: "conll2003",
id: "eriktks/conll2003",
},
{
description:
"A multilingual dataset of Wikipedia articles annotated for named entity recognition in over 150 different languages.",
id: "wikiann",
id: "unimelb-nlp/wikiann",
},
],
demo: {
Expand Down Expand Up @@ -63,6 +63,15 @@ const taskData: TaskDataCustom = {
"A robust performance model to identify people, locations, organizations and names of miscellaneous entities.",
id: "dslim/bert-base-NER",
},
{
description:
"A strong model to identify people, locations, organizations and names in multiple languages.",
id: "FacebookAI/xlm-roberta-large-finetuned-conll03-english",
},
{
description: "A token classification model specialized on medical entity recognition.",
id: "blaze999/Medical-NER",
},
{
description: "Flair models are typically the state of the art in named entity recognition tasks.",
id: "flair/ner-english",
Expand All @@ -77,7 +86,7 @@ const taskData: TaskDataCustom = {
],
summary:
"Token classification is a natural language understanding task in which a label is assigned to some tokens in a text. Some popular token classification subtasks are Named Entity Recognition (NER) and Part-of-Speech (PoS) tagging. NER models could be trained to identify specific entities in a text, such as dates, individuals and places; and PoS tagging would identify, for example, which words in a text are verbs, nouns, and punctuation marks.",
widgetModels: ["dslim/bert-base-NER"],
widgetModels: ["FacebookAI/xlm-roberta-large-finetuned-conll03-english"],
youtubeId: "wVHdVlPScxA",
};

Expand Down
16 changes: 8 additions & 8 deletions packages/tasks/src/tasks/translation/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ const taskData: TaskDataCustom = {
datasets: [
{
description: "A dataset of copyright-free books translated into 16 different languages.",
id: "opus_books",
id: "Helsinki-NLP/opus_books",
},
{
description:
"An example of translation between programming languages. This dataset consists of functions in Java and C#.",
id: "code_x_glue_cc_code_to_code_trans",
id: "google/code_x_glue_cc_code_to_code_trans",
},
],
demo: {
Expand Down Expand Up @@ -42,13 +42,13 @@ const taskData: TaskDataCustom = {
],
models: [
{
description: "A model that translates from English to French.",
id: "Helsinki-NLP/opus-mt-en-fr",
description: "Very powerful model that can translate many languages between each other, especially low-resource languages.",
id: "facebook/nllb-200-1.3B",
},
{
description:
"A general-purpose Transformer that can be used to translate from English to German, French, or Romanian.",
id: "t5-base",
id: "google-t5/t5-base",
},
],
spaces: [
Expand All @@ -57,12 +57,12 @@ const taskData: TaskDataCustom = {
id: "Iker/Translate-100-languages",
},
{
description: "An application that can translate between English, Spanish and Hindi.",
id: "EuroPython2022/Translate-with-Bloom",
description: "An application that can translate between many languages.",
id: "Geonmo/nllb-translation-demo",
},
],
summary: "Translation is the task of converting text from one language to another.",
widgetModels: ["t5-small"],
widgetModels: ["facebook/nllb-200-1.3B"],
merveenoyan marked this conversation as resolved.
Show resolved Hide resolved
youtubeId: "1JvfrvZgi6c",
};

Expand Down
6 changes: 3 additions & 3 deletions packages/tasks/src/tasks/video-classification/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,12 @@ const taskData: TaskDataCustom = {
models: [
{
// TO DO: write description
description: "Strong Video Classification model trained on the Kinects 400 dataset.",
id: "MCG-NJU/videomae-base-finetuned-kinetics",
description: "Strong Video Classification model trained on the Kinetics 400 dataset.",
id: "google/vivit-b-16x2-kinetics400",
},
{
// TO DO: write description
description: "Strong Video Classification model trained on the Kinects 400 dataset.",
description: "Strong Video Classification model trained on the Kinetics 400 dataset.",
id: "microsoft/xclip-base-patch32",
},
],
Expand Down
Loading
Loading