From c51dea8294432a9f95521e3537f5cf2eff848f21 Mon Sep 17 00:00:00 2001
From: Merve Noyan <merveenoyan@gmail.com>
Date: Fri, 26 Jan 2024 13:52:33 +0300
Subject: [PATCH] Update tasks with new models (#460)

---------

Co-authored-by: Omar Sanseviero <osanseviero@gmail.com>
---
 .../tasks/src/tasks/depth-estimation/data.ts  | 12 ++--
 .../tasks/src/tasks/mask-generation/data.ts   | 47 +++++++++++++--
 .../tasks/src/tasks/text-generation/data.ts   | 60 +++++++------------
 .../tasks/src/tasks/text-to-image/data.ts     |  8 ++-
 .../zero-shot-image-classification/data.ts    | 13 ++--
 .../tasks/zero-shot-object-detection/data.ts  |  7 ++-
 6 files changed, 92 insertions(+), 55 deletions(-)

diff --git a/packages/tasks/src/tasks/depth-estimation/data.ts b/packages/tasks/src/tasks/depth-estimation/data.ts
index 1fd08f25c..546e88d9c 100644
--- a/packages/tasks/src/tasks/depth-estimation/data.ts
+++ b/packages/tasks/src/tasks/depth-estimation/data.ts
@@ -28,8 +28,8 @@ const taskData: TaskDataCustom = {
 			id: "Intel/dpt-large",
 		},
 		{
-			description: "Strong Depth Estimation model trained on the KITTI dataset.",
-			id: "facebook/dpt-dinov2-large-kitti",
+			description: "Strong Depth Estimation model trained on a big compilation of datasets.",
+			id: "LiheYoung/depth-anything-large-hf",
 		},
 		{
 			description: "A strong monocular depth estimation model.",
@@ -42,8 +42,12 @@ const taskData: TaskDataCustom = {
 			id: "radames/dpt-depth-estimation-3d-voxels",
 		},
 		{
-			description: "An application that can estimate the depth in a given image.",
-			id: "keras-io/Monocular-Depth-Estimation",
+			description: "An application to compare the outputs of different depth estimation models.",
+			id: "LiheYoung/Depth-Anything",
+		},
+		{
+			description: "An application to try state-of-the-art depth estimation.",
+			id: "merve/compare_depth_models",
 		},
 	],
 	summary: "Depth estimation is the task of predicting depth of the objects present in an image.",
diff --git a/packages/tasks/src/tasks/mask-generation/data.ts b/packages/tasks/src/tasks/mask-generation/data.ts
index a37ad9c83..05f542dfb 100644
--- a/packages/tasks/src/tasks/mask-generation/data.ts
+++ b/packages/tasks/src/tasks/mask-generation/data.ts
@@ -3,14 +3,51 @@ import type { TaskDataCustom } from "..";
 const taskData: TaskDataCustom = {
 	datasets: [],
 	demo: {
-		inputs: [],
-		outputs: [],
+		inputs: [
+			{
+				filename: "mask-generation-input.png",
+				type: "img",
+			},
+		],
+		outputs: [
+			{
+				filename: "mask-generation-output.png",
+				type: "img",
+			},
+		],
 	},
 	metrics: [],
-	models: [],
-	spaces: [],
+	models: [
+		{
+			description: "Small yet powerful mask generation model.",
+			id: "Zigeng/SlimSAM-uniform-50",
+		},
+		{
+			description: "Very strong mask generation model.",
+			id: "facebook/sam-vit-huge",
+		},
+	],
+	spaces: [
+		{
+			description:
+				"An application that combines a mask generation model with an image embedding model for open-vocabulary image segmentation.",
+			id: "SkalskiP/SAM_and_MetaCLIP",
+		},
+		{
+			description: "An application that compares the performance of a large and a small mask generation model.",
+			id: "merve/slimsam",
+		},
+		{
+			description: "An application based on an improved mask generation model.",
+			id: "linfanluntan/Grounded-SAM",
+		},
+		{
+			description: "An application to remove objects from videos using mask generation models.",
+			id: "SkalskiP/SAM_and_ProPainter",
+		},
+	],
 	summary:
-		"Mask generation is creating a binary image that identifies a specific object or region of interest in an input image. Masks are often used in segmentation tasks, where they provide a precise way to isolate the object of interest for further processing or analysis.",
+		"Mask generation is the task of generating masks that identify a specific object or region of interest in a given image. Masks are often used in segmentation tasks, where they provide a precise way to isolate the object of interest for further processing or analysis.",
 	widgetModels: [],
 	youtubeId: "",
 };
diff --git a/packages/tasks/src/tasks/text-generation/data.ts b/packages/tasks/src/tasks/text-generation/data.ts
index d442e247b..dbe5f203e 100644
--- a/packages/tasks/src/tasks/text-generation/data.ts
+++ b/packages/tasks/src/tasks/text-generation/data.ts
@@ -12,12 +12,12 @@ const taskData: TaskDataCustom = {
 			id: "the_pile",
 		},
 		{
-			description: "A crowd-sourced instruction dataset to develop an AI assistant.",
-			id: "OpenAssistant/oasst1",
+			description: "Truly open-source, curated and cleaned dialogue dataset.",
+			id: "HuggingFaceH4/ultrachat_200k",
 		},
 		{
-			description: "A crowd-sourced instruction dataset created by Databricks employees.",
-			id: "databricks/databricks-dolly-15k",
+			description: "An instruction dataset with preference ratings on responses.",
+			id: "openbmb/UltraFeedback",
 		},
 	],
 	demo: {
@@ -59,66 +59,50 @@ const taskData: TaskDataCustom = {
 			id: "bigcode/starcoder",
 		},
 		{
-			description: "A model trained to follow instructions, uses Pythia-12b as base model.",
-			id: "databricks/dolly-v2-12b",
+			description: "A very powerful text generation model.",
+			id: "mistralai/Mixtral-8x7B-Instruct-v0.1",
 		},
 		{
-			description: "A model trained to follow instructions curated by community, uses Pythia-12b as base model.",
-			id: "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5",
+			description: "Small yet powerful text generation model.",
+			id: "microsoft/phi-2",
 		},
 		{
-			description: "A large language model trained to generate text in English.",
-			id: "stabilityai/stablelm-tuned-alpha-7b",
+			description: "A very powerful model that can chat, do mathematical reasoning and write code.",
+			id: "openchat/openchat-3.5-0106",
 		},
 		{
-			description: "A model trained to follow instructions, based on mosaicml/mpt-7b.",
-			id: "mosaicml/mpt-7b-instruct",
+			description: "Very strong yet small assistant model.",
+			id: "HuggingFaceH4/zephyr-7b-beta",
 		},
 		{
-			description: "A large language model trained to generate text in English.",
-			id: "EleutherAI/pythia-12b",
-		},
-		{
-			description: "A large text-to-text model trained to follow instructions.",
-			id: "google/flan-ul2",
-		},
-		{
-			description: "A large and powerful text generation model.",
-			id: "tiiuae/falcon-40b",
-		},
-		{
-			description: "State-of-the-art open-source large language model.",
+			description: "Very strong open-source large language model.",
 			id: "meta-llama/Llama-2-70b-hf",
 		},
 	],
 	spaces: [
 		{
-			description: "A robust text generation model that can perform various tasks through natural language prompting.",
-			id: "huggingface/bloom_demo",
+			description: "A leaderboard to compare different open-source text generation models based on various benchmarks.",
+			id: "HuggingFaceH4/open_llm_leaderboard",
 		},
 		{
-			description: "An text generation based application that can write code for 80+ languages.",
-			id: "bigcode/bigcode-playground",
+			description: "An text generation based application based on a very powerful LLaMA2 model.",
+			id: "ysharma/Explore_llamav2_with_TGI",
 		},
 		{
-			description: "An text generation based application for conversations.",
-			id: "h2oai/h2ogpt-chatbot",
+			description: "An text generation based application to converse with Zephyr model.",
+			id: "HuggingFaceH4/zephyr-chat",
 		},
 		{
 			description: "An text generation application that combines OpenAI and Hugging Face models.",
 			id: "microsoft/HuggingGPT",
 		},
 		{
-			description: "An text generation application that uses StableLM-tuned-alpha-7b.",
-			id: "stabilityai/stablelm-tuned-alpha-chat",
-		},
-		{
-			description: "An UI that uses StableLM-tuned-alpha-7b.",
-			id: "togethercomputer/OpenChatKit",
+			description: "An chatbot to converse with a very powerful text generation model.",
+			id: "mlabonne/phixtral-chat",
 		},
 	],
 	summary:
-		"Generating text is the task of producing new text. These models can, for example, fill in incomplete text or paraphrase.",
+		"Generating text is the task of generating new text given another text. These models can, for example, fill in incomplete text or paraphrase.",
 	widgetModels: ["HuggingFaceH4/zephyr-7b-beta"],
 	youtubeId: "Vpjb1lu0MDk",
 };
diff --git a/packages/tasks/src/tasks/text-to-image/data.ts b/packages/tasks/src/tasks/text-to-image/data.ts
index 4958765fa..f60e9f110 100644
--- a/packages/tasks/src/tasks/text-to-image/data.ts
+++ b/packages/tasks/src/tasks/text-to-image/data.ts
@@ -79,13 +79,17 @@ const taskData: TaskDataCustom = {
 			id: "latent-consistency/lcm-lora-for-sdxl",
 		},
 		{
-			description: "A powerful text-to-image application that can generate 3D representations.",
-			id: "hysts/Shap-E",
+			description: "A gallery to explore various text-to-image models.",
+			id: "multimodalart/LoraTheExplorer",
 		},
 		{
 			description: "An application for `text-to-image`, `image-to-image` and image inpainting.",
 			id: "ArtGAN/Stable-Diffusion-ControlNet-WebUI",
 		},
+		{
+			description: "An application to generate realistic images given photos of a person and a prompt.",
+			id: "InstantX/InstantID",
+		},
 	],
 	summary:
 		"Generates images from input text. These models can be used to generate and modify images based on text prompts.",
diff --git a/packages/tasks/src/tasks/zero-shot-image-classification/data.ts b/packages/tasks/src/tasks/zero-shot-image-classification/data.ts
index 2aa252071..b180acff4 100644
--- a/packages/tasks/src/tasks/zero-shot-image-classification/data.ts
+++ b/packages/tasks/src/tasks/zero-shot-image-classification/data.ts
@@ -52,9 +52,8 @@ const taskData: TaskDataCustom = {
 			id: "openai/clip-vit-base-patch16",
 		},
 		{
-			description:
-				"Robust image classification model trained on publicly available image-caption data trained on additional high pixel data for better performance.",
-			id: "openai/clip-vit-large-patch14-336",
+			description: "Strong zero-shot image classification model.",
+			id: "google/siglip-base-patch16-224",
 		},
 		{
 			description: "Strong image classification model for biomedical domain.",
@@ -64,12 +63,16 @@ const taskData: TaskDataCustom = {
 	spaces: [
 		{
 			description:
-				"An application that leverages zero shot image classification to find best captions to generate an image. ",
+				"An application that leverages zero-shot image classification to find best captions to generate an image. ",
 			id: "pharma/CLIP-Interrogator",
 		},
+		{
+			description: "An application to compare different zero-shot image classification models. ",
+			id: "merve/compare_clip_siglip",
+		},
 	],
 	summary:
-		"Zero shot image classification is the task of classifying previously unseen classes during training of a model.",
+		"Zero-shot image classification is the task of classifying previously unseen classes during training of a model.",
 	widgetModels: ["openai/clip-vit-large-patch14-336"],
 	youtubeId: "",
 };
diff --git a/packages/tasks/src/tasks/zero-shot-object-detection/data.ts b/packages/tasks/src/tasks/zero-shot-object-detection/data.ts
index ac0363c17..607f2bb0e 100644
--- a/packages/tasks/src/tasks/zero-shot-object-detection/data.ts
+++ b/packages/tasks/src/tasks/zero-shot-object-detection/data.ts
@@ -47,7 +47,12 @@ const taskData: TaskDataCustom = {
 			id: "google/owlv2-base-patch16-ensemble",
 		},
 	],
-	spaces: [],
+	spaces: [
+		{
+			description: "A demo to try the state-of-the-art zero-shot object detection model, OWLv2.",
+			id: "merve/owlv2",
+		},
+	],
 	summary:
 		"Zero-shot object detection is a computer vision task to detect objects and their classes in images, without any prior training or knowledge of the classes. Zero-shot object detection models receive an image as input, as well as a list of candidate classes, and output the bounding boxes and labels where the objects have been detected.",
 	widgetModels: [],