From c51dea8294432a9f95521e3537f5cf2eff848f21 Mon Sep 17 00:00:00 2001 From: Merve Noyan Date: Fri, 26 Jan 2024 13:52:33 +0300 Subject: [PATCH] Update tasks with new models (#460) --------- Co-authored-by: Omar Sanseviero --- .../tasks/src/tasks/depth-estimation/data.ts | 12 ++-- .../tasks/src/tasks/mask-generation/data.ts | 47 +++++++++++++-- .../tasks/src/tasks/text-generation/data.ts | 60 +++++++------------ .../tasks/src/tasks/text-to-image/data.ts | 8 ++- .../zero-shot-image-classification/data.ts | 13 ++-- .../tasks/zero-shot-object-detection/data.ts | 7 ++- 6 files changed, 92 insertions(+), 55 deletions(-) diff --git a/packages/tasks/src/tasks/depth-estimation/data.ts b/packages/tasks/src/tasks/depth-estimation/data.ts index 1fd08f25c..546e88d9c 100644 --- a/packages/tasks/src/tasks/depth-estimation/data.ts +++ b/packages/tasks/src/tasks/depth-estimation/data.ts @@ -28,8 +28,8 @@ const taskData: TaskDataCustom = { id: "Intel/dpt-large", }, { - description: "Strong Depth Estimation model trained on the KITTI dataset.", - id: "facebook/dpt-dinov2-large-kitti", + description: "Strong Depth Estimation model trained on a big compilation of datasets.", + id: "LiheYoung/depth-anything-large-hf", }, { description: "A strong monocular depth estimation model.", @@ -42,8 +42,12 @@ const taskData: TaskDataCustom = { id: "radames/dpt-depth-estimation-3d-voxels", }, { - description: "An application that can estimate the depth in a given image.", - id: "keras-io/Monocular-Depth-Estimation", + description: "An application to compare the outputs of different depth estimation models.", + id: "LiheYoung/Depth-Anything", + }, + { + description: "An application to try state-of-the-art depth estimation.", + id: "merve/compare_depth_models", }, ], summary: "Depth estimation is the task of predicting depth of the objects present in an image.", diff --git a/packages/tasks/src/tasks/mask-generation/data.ts b/packages/tasks/src/tasks/mask-generation/data.ts index a37ad9c83..05f542dfb 100644 --- a/packages/tasks/src/tasks/mask-generation/data.ts +++ b/packages/tasks/src/tasks/mask-generation/data.ts @@ -3,14 +3,51 @@ import type { TaskDataCustom } from ".."; const taskData: TaskDataCustom = { datasets: [], demo: { - inputs: [], - outputs: [], + inputs: [ + { + filename: "mask-generation-input.png", + type: "img", + }, + ], + outputs: [ + { + filename: "mask-generation-output.png", + type: "img", + }, + ], }, metrics: [], - models: [], - spaces: [], + models: [ + { + description: "Small yet powerful mask generation model.", + id: "Zigeng/SlimSAM-uniform-50", + }, + { + description: "Very strong mask generation model.", + id: "facebook/sam-vit-huge", + }, + ], + spaces: [ + { + description: + "An application that combines a mask generation model with an image embedding model for open-vocabulary image segmentation.", + id: "SkalskiP/SAM_and_MetaCLIP", + }, + { + description: "An application that compares the performance of a large and a small mask generation model.", + id: "merve/slimsam", + }, + { + description: "An application based on an improved mask generation model.", + id: "linfanluntan/Grounded-SAM", + }, + { + description: "An application to remove objects from videos using mask generation models.", + id: "SkalskiP/SAM_and_ProPainter", + }, + ], summary: - "Mask generation is creating a binary image that identifies a specific object or region of interest in an input image. Masks are often used in segmentation tasks, where they provide a precise way to isolate the object of interest for further processing or analysis.", + "Mask generation is the task of generating masks that identify a specific object or region of interest in a given image. Masks are often used in segmentation tasks, where they provide a precise way to isolate the object of interest for further processing or analysis.", widgetModels: [], youtubeId: "", }; diff --git a/packages/tasks/src/tasks/text-generation/data.ts b/packages/tasks/src/tasks/text-generation/data.ts index d442e247b..dbe5f203e 100644 --- a/packages/tasks/src/tasks/text-generation/data.ts +++ b/packages/tasks/src/tasks/text-generation/data.ts @@ -12,12 +12,12 @@ const taskData: TaskDataCustom = { id: "the_pile", }, { - description: "A crowd-sourced instruction dataset to develop an AI assistant.", - id: "OpenAssistant/oasst1", + description: "Truly open-source, curated and cleaned dialogue dataset.", + id: "HuggingFaceH4/ultrachat_200k", }, { - description: "A crowd-sourced instruction dataset created by Databricks employees.", - id: "databricks/databricks-dolly-15k", + description: "An instruction dataset with preference ratings on responses.", + id: "openbmb/UltraFeedback", }, ], demo: { @@ -59,66 +59,50 @@ const taskData: TaskDataCustom = { id: "bigcode/starcoder", }, { - description: "A model trained to follow instructions, uses Pythia-12b as base model.", - id: "databricks/dolly-v2-12b", + description: "A very powerful text generation model.", + id: "mistralai/Mixtral-8x7B-Instruct-v0.1", }, { - description: "A model trained to follow instructions curated by community, uses Pythia-12b as base model.", - id: "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5", + description: "Small yet powerful text generation model.", + id: "microsoft/phi-2", }, { - description: "A large language model trained to generate text in English.", - id: "stabilityai/stablelm-tuned-alpha-7b", + description: "A very powerful model that can chat, do mathematical reasoning and write code.", + id: "openchat/openchat-3.5-0106", }, { - description: "A model trained to follow instructions, based on mosaicml/mpt-7b.", - id: "mosaicml/mpt-7b-instruct", + description: "Very strong yet small assistant model.", + id: "HuggingFaceH4/zephyr-7b-beta", }, { - description: "A large language model trained to generate text in English.", - id: "EleutherAI/pythia-12b", - }, - { - description: "A large text-to-text model trained to follow instructions.", - id: "google/flan-ul2", - }, - { - description: "A large and powerful text generation model.", - id: "tiiuae/falcon-40b", - }, - { - description: "State-of-the-art open-source large language model.", + description: "Very strong open-source large language model.", id: "meta-llama/Llama-2-70b-hf", }, ], spaces: [ { - description: "A robust text generation model that can perform various tasks through natural language prompting.", - id: "huggingface/bloom_demo", + description: "A leaderboard to compare different open-source text generation models based on various benchmarks.", + id: "HuggingFaceH4/open_llm_leaderboard", }, { - description: "An text generation based application that can write code for 80+ languages.", - id: "bigcode/bigcode-playground", + description: "An text generation based application based on a very powerful LLaMA2 model.", + id: "ysharma/Explore_llamav2_with_TGI", }, { - description: "An text generation based application for conversations.", - id: "h2oai/h2ogpt-chatbot", + description: "An text generation based application to converse with Zephyr model.", + id: "HuggingFaceH4/zephyr-chat", }, { description: "An text generation application that combines OpenAI and Hugging Face models.", id: "microsoft/HuggingGPT", }, { - description: "An text generation application that uses StableLM-tuned-alpha-7b.", - id: "stabilityai/stablelm-tuned-alpha-chat", - }, - { - description: "An UI that uses StableLM-tuned-alpha-7b.", - id: "togethercomputer/OpenChatKit", + description: "An chatbot to converse with a very powerful text generation model.", + id: "mlabonne/phixtral-chat", }, ], summary: - "Generating text is the task of producing new text. These models can, for example, fill in incomplete text or paraphrase.", + "Generating text is the task of generating new text given another text. These models can, for example, fill in incomplete text or paraphrase.", widgetModels: ["HuggingFaceH4/zephyr-7b-beta"], youtubeId: "Vpjb1lu0MDk", }; diff --git a/packages/tasks/src/tasks/text-to-image/data.ts b/packages/tasks/src/tasks/text-to-image/data.ts index 4958765fa..f60e9f110 100644 --- a/packages/tasks/src/tasks/text-to-image/data.ts +++ b/packages/tasks/src/tasks/text-to-image/data.ts @@ -79,13 +79,17 @@ const taskData: TaskDataCustom = { id: "latent-consistency/lcm-lora-for-sdxl", }, { - description: "A powerful text-to-image application that can generate 3D representations.", - id: "hysts/Shap-E", + description: "A gallery to explore various text-to-image models.", + id: "multimodalart/LoraTheExplorer", }, { description: "An application for `text-to-image`, `image-to-image` and image inpainting.", id: "ArtGAN/Stable-Diffusion-ControlNet-WebUI", }, + { + description: "An application to generate realistic images given photos of a person and a prompt.", + id: "InstantX/InstantID", + }, ], summary: "Generates images from input text. These models can be used to generate and modify images based on text prompts.", diff --git a/packages/tasks/src/tasks/zero-shot-image-classification/data.ts b/packages/tasks/src/tasks/zero-shot-image-classification/data.ts index 2aa252071..b180acff4 100644 --- a/packages/tasks/src/tasks/zero-shot-image-classification/data.ts +++ b/packages/tasks/src/tasks/zero-shot-image-classification/data.ts @@ -52,9 +52,8 @@ const taskData: TaskDataCustom = { id: "openai/clip-vit-base-patch16", }, { - description: - "Robust image classification model trained on publicly available image-caption data trained on additional high pixel data for better performance.", - id: "openai/clip-vit-large-patch14-336", + description: "Strong zero-shot image classification model.", + id: "google/siglip-base-patch16-224", }, { description: "Strong image classification model for biomedical domain.", @@ -64,12 +63,16 @@ const taskData: TaskDataCustom = { spaces: [ { description: - "An application that leverages zero shot image classification to find best captions to generate an image. ", + "An application that leverages zero-shot image classification to find best captions to generate an image. ", id: "pharma/CLIP-Interrogator", }, + { + description: "An application to compare different zero-shot image classification models. ", + id: "merve/compare_clip_siglip", + }, ], summary: - "Zero shot image classification is the task of classifying previously unseen classes during training of a model.", + "Zero-shot image classification is the task of classifying previously unseen classes during training of a model.", widgetModels: ["openai/clip-vit-large-patch14-336"], youtubeId: "", }; diff --git a/packages/tasks/src/tasks/zero-shot-object-detection/data.ts b/packages/tasks/src/tasks/zero-shot-object-detection/data.ts index ac0363c17..607f2bb0e 100644 --- a/packages/tasks/src/tasks/zero-shot-object-detection/data.ts +++ b/packages/tasks/src/tasks/zero-shot-object-detection/data.ts @@ -47,7 +47,12 @@ const taskData: TaskDataCustom = { id: "google/owlv2-base-patch16-ensemble", }, ], - spaces: [], + spaces: [ + { + description: "A demo to try the state-of-the-art zero-shot object detection model, OWLv2.", + id: "merve/owlv2", + }, + ], summary: "Zero-shot object detection is a computer vision task to detect objects and their classes in images, without any prior training or knowledge of the classes. Zero-shot object detection models receive an image as input, as well as a list of candidate classes, and output the bounding boxes and labels where the objects have been detected.", widgetModels: [],