From fa428c2e2b0a7eb6dd8e5f0bc76e913fd311320f Mon Sep 17 00:00:00 2001 From: Luke Chang Date: Mon, 2 Sep 2024 11:35:00 -0400 Subject: [PATCH 01/15] register py-feat library (#879) The goal of this PR is to register a new library [py-feat](https://py-feat.org/) with huggingface and add supported tasks of `image-feature-extraction`. Please let me know if I should make any changes. --------- Co-authored-by: Lucain --- packages/tasks/src/model-libraries.ts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/packages/tasks/src/model-libraries.ts b/packages/tasks/src/model-libraries.ts index 71b8747b9..55bf8d09c 100644 --- a/packages/tasks/src/model-libraries.ts +++ b/packages/tasks/src/model-libraries.ts @@ -446,6 +446,13 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { snippets: snippets.pyannote_audio, filter: true, }, + "py-feat": { + prettyLabel: "Py-Feat", + repoName: "Py-Feat", + repoUrl: "https://github.com/cosanlab/py-feat", + docsUrl: "https://py-feat.org/", + filter: false, + }, pythae: { prettyLabel: "pythae", repoName: "pythae", From 82d822473fdcec1dd206ff1ac1576515046ce763 Mon Sep 17 00:00:00 2001 From: machineuser Date: Mon, 2 Sep 2024 16:55:39 +0000 Subject: [PATCH 02/15] =?UTF-8?q?=F0=9F=94=96=20@hugginface/tasks=200.11.1?= =?UTF-8?q?3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- packages/tasks/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/tasks/package.json b/packages/tasks/package.json index 06ea1cc0b..25c37c4ae 100644 --- a/packages/tasks/package.json +++ b/packages/tasks/package.json @@ -1,7 +1,7 @@ { "name": "@huggingface/tasks", "packageManager": "pnpm@8.10.5", - "version": "0.11.12", + "version": "0.11.13", "description": "List of ML tasks for huggingface.co/tasks", "repository": "https://github.com/huggingface/huggingface.js.git", "publishConfig": { From 1701fac9088845de393a924f12bfd09f9ca70056 Mon Sep 17 00:00:00 2001 From: Lucain Date: Mon, 2 Sep 2024 18:59:59 +0200 Subject: [PATCH 03/15] Add seed in text to image specs (#888) Following @apolinario's PR https://github.com/huggingface/api-inference-community/pull/450. This PR adds a "seed" input parameter in the `text-to-image` specs. --------- Co-authored-by: Pedro Cuenca --- .../tasks/src/tasks/text-to-image/inference.ts | 14 +++++++++----- .../tasks/src/tasks/text-to-image/spec/input.json | 10 +++++++--- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/packages/tasks/src/tasks/text-to-image/inference.ts b/packages/tasks/src/tasks/text-to-image/inference.ts index b2e735746..8c30d3e9e 100644 --- a/packages/tasks/src/tasks/text-to-image/inference.ts +++ b/packages/tasks/src/tasks/text-to-image/inference.ts @@ -26,8 +26,8 @@ export interface TextToImageInput { */ export interface TextToImageParameters { /** - * For diffusion models. A higher guidance scale value encourages the model to generate - * images closely linked to the text prompt at the expense of lower image quality. + * A higher guidance scale value encourages the model to generate images closely linked to + * the text prompt, but values too high may cause saturation and other artifacts. */ guidance_scale?: number; /** @@ -35,14 +35,18 @@ export interface TextToImageParameters { */ negative_prompt?: string[]; /** - * For diffusion models. The number of denoising steps. More denoising steps usually lead to - * a higher quality image at the expense of slower inference. + * The number of denoising steps. More denoising steps usually lead to a higher quality + * image at the expense of slower inference. */ num_inference_steps?: number; /** - * For diffusion models. Override the scheduler with a compatible one + * Override the scheduler with a compatible one. */ scheduler?: string; + /** + * Seed for the random number generator. + */ + seed?: number; /** * The size in pixel of the output image */ diff --git a/packages/tasks/src/tasks/text-to-image/spec/input.json b/packages/tasks/src/tasks/text-to-image/spec/input.json index 467b848f6..569f3c33a 100644 --- a/packages/tasks/src/tasks/text-to-image/spec/input.json +++ b/packages/tasks/src/tasks/text-to-image/spec/input.json @@ -22,7 +22,7 @@ "properties": { "guidance_scale": { "type": "number", - "description": "For diffusion models. A higher guidance scale value encourages the model to generate images closely linked to the text prompt at the expense of lower image quality." + "description": "A higher guidance scale value encourages the model to generate images closely linked to the text prompt, but values too high may cause saturation and other artifacts." }, "negative_prompt": { "type": "array", @@ -33,7 +33,7 @@ }, "num_inference_steps": { "type": "integer", - "description": "For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference." + "description": "The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference." }, "target_size": { "type": "object", @@ -50,7 +50,11 @@ }, "scheduler": { "type": "string", - "description": "For diffusion models. Override the scheduler with a compatible one" + "description": "Override the scheduler with a compatible one." + }, + "seed": { + "type": "integer", + "description": "Seed for the random number generator." } } } From e0d6c958dc3a5ce3c67b46d10d7f5224557a346d Mon Sep 17 00:00:00 2001 From: Linoy Tsaban <57615435+linoytsaban@users.noreply.github.com> Date: Tue, 3 Sep 2024 09:54:09 +0300 Subject: [PATCH 04/15] improve image-to-image task page (#867) some changes to improve clarity of task description, and general updates to improve task page --------- Co-authored-by: Pedro Cuenca Co-authored-by: Merve Noyan Co-authored-by: Omar Sanseviero --- .../tasks/src/tasks/image-to-image/about.md | 91 ++++++++++++++----- .../tasks/src/tasks/image-to-image/data.ts | 2 +- 2 files changed, 71 insertions(+), 22 deletions(-) diff --git a/packages/tasks/src/tasks/image-to-image/about.md b/packages/tasks/src/tasks/image-to-image/about.md index 63f490f82..3750b34e5 100644 --- a/packages/tasks/src/tasks/image-to-image/about.md +++ b/packages/tasks/src/tasks/image-to-image/about.md @@ -1,15 +1,10 @@ -## Use Cases - -### Style transfer +Image-to-image pipelines can also be used in text-to-image tasks, to provide visual guidance to the text-guided generation process. -One of the most popular use cases of image-to-image is style transfer. Style transfer models can convert a normal photography into a painting in the style of a famous painter. - -## Task Variants +## Use Cases ### Image inpainting -Image inpainting is widely used during photography editing to remove unwanted objects, such as poles, wires, or sensor -dust. +Image inpainting is widely used during photography editing to remove unwanted objects, such as poles, wires, or sensor dust. ### Image colorization @@ -24,18 +19,27 @@ Super-resolution models increase the resolution of an image, allowing for higher You can use pipelines for image-to-image in ๐Ÿงจdiffusers library to easily use image-to-image models. See an example for `StableDiffusionImg2ImgPipeline` below. ```python -from PIL import Image -from diffusers import StableDiffusionImg2ImgPipeline +import torch +from diffusers import AutoPipelineForImage2Image +from diffusers.utils import make_image_grid, load_image -model_id_or_path = "runwayml/stable-diffusion-v1-5" -pipe = StableDiffusionImg2ImgPipeline.from_pretrained(model_id_or_path, torch_dtype=torch.float16) -pipe = pipe.to(cuda) +pipeline = AutoPipelineForImage2Image.from_pretrained( + "stabilityai/stable-diffusion-xl-refiner-1.0", torch_dtype=torch.float16, variant="fp16", use_safetensors=True +) -init_image = Image.open("mountains_image.jpeg").convert("RGB").resize((768, 512)) -prompt = "A fantasy landscape, trending on artstation" +# this helps us to reduce memory usage- since SDXL is a bit heavy, this could help by +# offloading the model to CPU w/o hurting performance. +pipeline.enable_model_cpu_offload() -images = pipe(prompt=prompt, image=init_image, strength=0.75, guidance_scale=7.5).images -images[0].save("fantasy_landscape.png") +# prepare image +url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/img2img-sdxl-init.png" +init_image = load_image(url) + +prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k" + +# pass prompt and image to pipeline +image = pipeline(prompt, image=init_image, strength=0.5).images[0] +make_image_grid([init_image, image], rows=1, cols=2) ``` You can use [huggingface.js](https://github.com/huggingface/huggingface.js) to infer image-to-image models on Hugging Face Hub. @@ -53,13 +57,53 @@ await inference.imageToImage({ }); ``` -## ControlNet +## Uses Cases for Text Guided Image Generation -Controlling the outputs of diffusion models only with a text prompt is a challenging problem. ControlNet is a neural network model that provides image-based control to diffusion models. Control images can be edges or other landmarks extracted from a source image. +### Style Transfer + +One of the most popular use cases of image-to-image is style transfer. With style transfer models: -Many ControlNet models were trained in our community event, JAX Diffusers sprint. You can see the full list of the ControlNet models available [here](https://huggingface.co/spaces/jax-diffusers-event/leaderboard). +- a regular photo can be transformed into a variety of artistic styles or genres, such as a watercolor painting, a comic book illustration and more. +- new images can be generated using a text prompt, in the style of a reference input image. + +See ๐Ÿงจdiffusers example for style transfer with `AutoPipelineForText2Image` below. + +```python +from diffusers import AutoPipelineForText2Image +from diffusers.utils import load_image +import torch + +# load pipeline +pipeline = AutoPipelineForText2Image.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16).to("cuda") +pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin") + +# set the adapter and scales - this is a component that lets us add the style control from an image to the text-to-image model +scale = { + "down": {"block_2": [0.0, 1.0]}, + "up": {"block_0": [0.0, 1.0, 0.0]}, +} +pipeline.set_ip_adapter_scale(scale) + +style_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg") + +generator = torch.Generator(device="cpu").manual_seed(26) +image = pipeline( + prompt="a cat, masterpiece, best quality, high quality", + ip_adapter_image=style_image, + negative_prompt="text, watermark, lowres, low quality, worst quality, deformed, glitch, low contrast, noisy, saturation, blurry", + guidance_scale=5, + num_inference_steps=30, + generator=generator, +).images[0] +image +``` + +### ControlNet + +Controlling the outputs of diffusion models only with a text prompt is a challenging problem. ControlNet is a neural network model that provides image-based control to diffusion models. Control images can be edges or other landmarks extracted from a source image. +![Examples](https://huggingface.co/datasets/optimum/documentation-images/resolve/main/neuron/models/12-sdxl-text2img-controlnet.png) -## Most Used Model for the Task +## Pix2Pix Pix2Pix is a popular model used for image-to-image translation tasks. It is based on a conditional-GAN (generative adversarial network) where instead of a noise vector a 2D image is given as input. More information about Pix2Pix can be retrieved from this [link](https://phillipi.github.io/pix2pix/) where the associated paper and the GitHub repository can be found. @@ -70,8 +114,13 @@ The images below show some examples extracted from the Pix2Pix paper. This model ## Useful Resources - [Image-to-image guide with diffusers](https://huggingface.co/docs/diffusers/using-diffusers/img2img) +- Image inpainting: [inpainting with ๐Ÿงจdiffusers](https://huggingface.co/docs/diffusers/main/en/api/pipelines/stable_diffusion/inpaint), [demo](https://huggingface.co/spaces/diffusers/stable-diffusion-xl-inpainting) +- Colorization: [demo](https://huggingface.co/spaces/modelscope/old_photo_restoration) +- Super resolution: [image upscaling with ๐Ÿงจdiffusers](https://huggingface.co/docs/diffusers/main/en/api/pipelines/stable_diffusion/upscale#super-resolution), [demo](https://huggingface.co/spaces/radames/Enhance-This-HiDiffusion-SDXL) +- [Style transfer and layout control with diffusers ๐Ÿงจ](https://huggingface.co/docs/diffusers/main/en/using-diffusers/ip_adapter#style--layout-control) - [Train your ControlNet with diffusers ๐Ÿงจ](https://huggingface.co/blog/train-your-controlnet) - [Ultra fast ControlNet with ๐Ÿงจ Diffusers](https://huggingface.co/blog/controlnet) +- [List of ControlNets trained in the community JAX Diffusers sprint](https://huggingface.co/spaces/jax-diffusers-event/leaderboard) ## References diff --git a/packages/tasks/src/tasks/image-to-image/data.ts b/packages/tasks/src/tasks/image-to-image/data.ts index 99e91557a..65200fd92 100644 --- a/packages/tasks/src/tasks/image-to-image/data.ts +++ b/packages/tasks/src/tasks/image-to-image/data.ts @@ -93,7 +93,7 @@ const taskData: TaskDataCustom = { }, ], summary: - "Image-to-image is the task of transforming a source image to match the characteristics of a target image or a target image domain. Any image manipulation and enhancement is possible with image to image models.", + "Image-to-image is the task of transforming an input image through a variety of possible manipulations and enhancements, such as super-resolution, image inpainting, colorization, and more.", widgetModels: ["lllyasviel/sd-controlnet-canny"], youtubeId: "", }; From 1c9a2a281065a6da581317caeb4d5bd4174b7645 Mon Sep 17 00:00:00 2001 From: enzo Date: Tue, 3 Sep 2024 13:20:15 -0400 Subject: [PATCH 05/15] remove unused console.log (#891) --- packages/space-header/src/utils/get_space.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/space-header/src/utils/get_space.ts b/packages/space-header/src/utils/get_space.ts index 608350bd1..4250b2206 100644 --- a/packages/space-header/src/utils/get_space.ts +++ b/packages/space-header/src/utils/get_space.ts @@ -4,7 +4,6 @@ export const get_space = async (space_id: string): Promise => { try { const response = await fetch(`https://huggingface.co/api/spaces/${space_id}`); const data = await response.json(); - console.log(data); return data as Space; } catch (error) { return null; From 787c7cae842361a55d3808f6cacb4c2ba6bd2234 Mon Sep 17 00:00:00 2001 From: machineuser Date: Tue, 3 Sep 2024 17:24:52 +0000 Subject: [PATCH 06/15] =?UTF-8?q?=F0=9F=94=96=20@hugginface/space-header?= =?UTF-8?q?=201.0.4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- packages/space-header/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/space-header/package.json b/packages/space-header/package.json index 735315c8d..c9c2c43c9 100644 --- a/packages/space-header/package.json +++ b/packages/space-header/package.json @@ -1,6 +1,6 @@ { "name": "@huggingface/space-header", - "version": "1.0.3", + "version": "1.0.4", "packageManager": "pnpm@8.10.5", "description": "Use the Space mini_header outside Hugging Face", "repository": "https://github.com/huggingface/huggingface.js.git", From a9047d5af078a6bc8b649bde8677195c528d9f10 Mon Sep 17 00:00:00 2001 From: Lucain Date: Wed, 4 Sep 2024 09:15:30 +0200 Subject: [PATCH 07/15] We are Hugging Face, or huggingface, or Huggingface, who knows (#893) --- .github/workflows/agents-publish.yml | 2 +- .github/workflows/gguf-publish.yml | 2 +- .github/workflows/hub-publish.yml | 2 +- .github/workflows/inference-publish.yml | 2 +- .github/workflows/jinja-publish.yml | 2 +- .github/workflows/languages-publish.yml | 2 +- .github/workflows/space-header-publish.yml | 2 +- .github/workflows/tasks-publish.yml | 2 +- packages/doc-internal/README.md | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/agents-publish.yml b/.github/workflows/agents-publish.yml index 08108b585..edc72455f 100644 --- a/.github/workflows/agents-publish.yml +++ b/.github/workflows/agents-publish.yml @@ -50,7 +50,7 @@ jobs: node -e "const fs = require('fs'); const package = JSON.parse(fs.readFileSync('./package.json')); package.version = '$BUMPED_VERSION'; fs.writeFileSync('./package.json', JSON.stringify(package, null, '\t') + '\n');" pnpm --filter doc-internal run fix-cdn-versions git add ../.. - git commit -m "๐Ÿ”– @hugginface/agents $BUMPED_VERSION" + git commit -m "๐Ÿ”– @huggingface/agents $BUMPED_VERSION" git tag "agents-v$BUMPED_VERSION" - run: pnpm --filter agents... build && pnpm publish --no-git-checks . env: diff --git a/.github/workflows/gguf-publish.yml b/.github/workflows/gguf-publish.yml index dec6cd891..5e5cf81f0 100644 --- a/.github/workflows/gguf-publish.yml +++ b/.github/workflows/gguf-publish.yml @@ -47,7 +47,7 @@ jobs: BUMPED_VERSION=$(node -p "require('semver').inc('$PACKAGE_VERSION', '${{ github.event.inputs.newversion }}')") # Update package.json with the new version node -e "const fs = require('fs'); const package = JSON.parse(fs.readFileSync('./package.json')); package.version = '$BUMPED_VERSION'; fs.writeFileSync('./package.json', JSON.stringify(package, null, '\t') + '\n');" - git commit . -m "๐Ÿ”– @hugginface/gguf $BUMPED_VERSION" + git commit . -m "๐Ÿ”– @huggingface/gguf $BUMPED_VERSION" git tag "gguf-v$BUMPED_VERSION" - run: pnpm publish --no-git-checks . env: diff --git a/.github/workflows/hub-publish.yml b/.github/workflows/hub-publish.yml index 19bb63dcf..5937ad212 100644 --- a/.github/workflows/hub-publish.yml +++ b/.github/workflows/hub-publish.yml @@ -50,7 +50,7 @@ jobs: node -e "const fs = require('fs'); const package = JSON.parse(fs.readFileSync('./package.json')); package.version = '$BUMPED_VERSION'; fs.writeFileSync('./package.json', JSON.stringify(package, null, '\t') + '\n');" pnpm --filter doc-internal run fix-cdn-versions git add ../.. - git commit -m "๐Ÿ”– @hugginface/hub $BUMPED_VERSION" + git commit -m "๐Ÿ”– @huggingface/hub $BUMPED_VERSION" git tag "hub-v$BUMPED_VERSION" - name: Make sure that the latest version of @huggingface/tasks is consistent with the local version diff --git a/.github/workflows/inference-publish.yml b/.github/workflows/inference-publish.yml index 5ba387645..bdbb47d7d 100644 --- a/.github/workflows/inference-publish.yml +++ b/.github/workflows/inference-publish.yml @@ -50,7 +50,7 @@ jobs: node -e "const fs = require('fs'); const package = JSON.parse(fs.readFileSync('./package.json')); package.version = '$BUMPED_VERSION'; fs.writeFileSync('./package.json', JSON.stringify(package, null, '\t') + '\n');" pnpm --filter doc-internal run fix-cdn-versions git add ../.. - git commit -m "๐Ÿ”– @hugginface/inference $BUMPED_VERSION" + git commit -m "๐Ÿ”– @huggingface/inference $BUMPED_VERSION" git tag "inference-v$BUMPED_VERSION" - name: Make sure that the latest version of @huggingface/tasks is consistent with the local version diff --git a/.github/workflows/jinja-publish.yml b/.github/workflows/jinja-publish.yml index 5057d4ed5..47d7cad7f 100644 --- a/.github/workflows/jinja-publish.yml +++ b/.github/workflows/jinja-publish.yml @@ -47,7 +47,7 @@ jobs: BUMPED_VERSION=$(node -p "require('semver').inc('$PACKAGE_VERSION', '${{ github.event.inputs.newversion }}')") # Update package.json with the new version node -e "const fs = require('fs'); const package = JSON.parse(fs.readFileSync('./package.json')); package.version = '$BUMPED_VERSION'; fs.writeFileSync('./package.json', JSON.stringify(package, null, '\t') + '\n');" - git commit . -m "๐Ÿ”– @hugginface/jinja $BUMPED_VERSION" + git commit . -m "๐Ÿ”– @huggingface/jinja $BUMPED_VERSION" git tag "jinja-v$BUMPED_VERSION" - run: pnpm publish --no-git-checks . env: diff --git a/.github/workflows/languages-publish.yml b/.github/workflows/languages-publish.yml index 5dca90f89..913687a52 100644 --- a/.github/workflows/languages-publish.yml +++ b/.github/workflows/languages-publish.yml @@ -47,7 +47,7 @@ jobs: BUMPED_VERSION=$(node -p "require('semver').inc('$PACKAGE_VERSION', '${{ github.event.inputs.newversion }}')") # Update package.json with the new version node -e "const fs = require('fs'); const package = JSON.parse(fs.readFileSync('./package.json')); package.version = '$BUMPED_VERSION'; fs.writeFileSync('./package.json', JSON.stringify(package, null, '\t') + '\n');" - git commit . -m "๐Ÿ”– @hugginface/languages $BUMPED_VERSION" + git commit . -m "๐Ÿ”– @huggingface/languages $BUMPED_VERSION" git tag "languages-v$BUMPED_VERSION" - run: pnpm publish --no-git-checks . env: diff --git a/.github/workflows/space-header-publish.yml b/.github/workflows/space-header-publish.yml index 76ce2d89b..05c584514 100644 --- a/.github/workflows/space-header-publish.yml +++ b/.github/workflows/space-header-publish.yml @@ -47,7 +47,7 @@ jobs: BUMPED_VERSION=$(node -p "require('semver').inc('$PACKAGE_VERSION', '${{ github.event.inputs.newversion }}')") # Update package.json with the new version node -e "const fs = require('fs'); const package = JSON.parse(fs.readFileSync('./package.json')); package.version = '$BUMPED_VERSION'; fs.writeFileSync('./package.json', JSON.stringify(package, null, '\t') + '\n');" - git commit . -m "๐Ÿ”– @hugginface/space-header $BUMPED_VERSION" + git commit . -m "๐Ÿ”– @huggingface/space-header $BUMPED_VERSION" git tag "space-header-v$BUMPED_VERSION" - run: pnpm publish --no-git-checks . env: diff --git a/.github/workflows/tasks-publish.yml b/.github/workflows/tasks-publish.yml index 4c8b4567e..0dd797dd2 100644 --- a/.github/workflows/tasks-publish.yml +++ b/.github/workflows/tasks-publish.yml @@ -47,7 +47,7 @@ jobs: BUMPED_VERSION=$(node -p "require('semver').inc('$PACKAGE_VERSION', '${{ github.event.inputs.newversion }}')") # Update package.json with the new version node -e "const fs = require('fs'); const package = JSON.parse(fs.readFileSync('./package.json')); package.version = '$BUMPED_VERSION'; fs.writeFileSync('./package.json', JSON.stringify(package, null, '\t') + '\n');" - git commit . -m "๐Ÿ”– @hugginface/tasks $BUMPED_VERSION" + git commit . -m "๐Ÿ”– @huggingface/tasks $BUMPED_VERSION" git tag "tasks-v$BUMPED_VERSION" - run: pnpm publish --no-git-checks . env: diff --git a/packages/doc-internal/README.md b/packages/doc-internal/README.md index 8d51e72ab..71d8f8314 100644 --- a/packages/doc-internal/README.md +++ b/packages/doc-internal/README.md @@ -2,7 +2,7 @@ This package generates `.md` files inside the [docs](../../docs) folder using [typedoc](https://typedoc.org/) and [typedoc-plugin-markdown](https://github.com/tgreyuk/typedoc-plugin-markdown). -The `.md` files are generated when releasing packages. They are then published to [hugginface.co](https://huggingface.co/docs/huggingface.js/index) through the [doc-builder](https://github.com/huggingface/doc-builder)'s github action. +The `.md` files are generated when releasing packages. They are then published to [huggingface.co](https://huggingface.co/docs/huggingface.js/index) through the [doc-builder](https://github.com/huggingface/doc-builder)'s github action. We run a few scripts in between, [fix-md-links](./fix-md-links.ts) and [update-toc](./update-toc.ts) to preprocess the files for `doc-builder`. From 6e12cfae0a4535467290b9b6b045282b5a91cd8a Mon Sep 17 00:00:00 2001 From: Merve Noyan Date: Wed, 4 Sep 2024 11:15:53 +0300 Subject: [PATCH 08/15] Add any-to-any as a task to Hub (#860) This PR adds any-to-any for tasks that have 2+ inputs to 2+ outputs like 4M, Chameleon, Lumina-mGPT etc --- packages/tasks/src/pipelines.ts | 6 ++++++ packages/tasks/src/tasks/index.ts | 2 ++ .../widgets/src/lib/components/Icons/IconAnyToAny.svelte | 1 + .../src/lib/components/PipelineIcon/PipelineIcon.svelte | 2 ++ 4 files changed, 11 insertions(+) create mode 100644 packages/widgets/src/lib/components/Icons/IconAnyToAny.svelte diff --git a/packages/tasks/src/pipelines.ts b/packages/tasks/src/pipelines.ts index 7edc61605..34907f064 100644 --- a/packages/tasks/src/pipelines.ts +++ b/packages/tasks/src/pipelines.ts @@ -670,6 +670,12 @@ export const PIPELINE_DATA = { color: "red", hideInDatasets: true, }, + "any-to-any": { + name: "Any-to-Any", + modality: "multimodal", + color: "yellow", + hideInDatasets: true, + }, other: { name: "Other", modality: "other", diff --git a/packages/tasks/src/tasks/index.ts b/packages/tasks/src/tasks/index.ts index a72bb9c88..6c8068680 100644 --- a/packages/tasks/src/tasks/index.ts +++ b/packages/tasks/src/tasks/index.ts @@ -170,6 +170,7 @@ export const TASKS_MODEL_LIBRARIES: Record = { "zero-shot-object-detection": ["transformers", "transformers.js"], "text-to-3d": ["diffusers"], "image-to-3d": ["diffusers"], + "any-to-any": ["transformers"], }; /** @@ -191,6 +192,7 @@ function getData(type: PipelineType, partialTaskData: TaskDataCustom = placehold // Tasks that call getData() without the second argument will // have a "placeholder" page. export const TASKS_DATA: Record = { + "any-to-any": getData("any-to-any", placeholder), "audio-classification": getData("audio-classification", audioClassification), "audio-to-audio": getData("audio-to-audio", audioToAudio), "automatic-speech-recognition": getData("automatic-speech-recognition", automaticSpeechRecognition), diff --git a/packages/widgets/src/lib/components/Icons/IconAnyToAny.svelte b/packages/widgets/src/lib/components/Icons/IconAnyToAny.svelte new file mode 100644 index 000000000..9ebbf1b8c --- /dev/null +++ b/packages/widgets/src/lib/components/Icons/IconAnyToAny.svelte @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/packages/widgets/src/lib/components/PipelineIcon/PipelineIcon.svelte b/packages/widgets/src/lib/components/PipelineIcon/PipelineIcon.svelte index 02cb84f27..1429e1cb4 100644 --- a/packages/widgets/src/lib/components/PipelineIcon/PipelineIcon.svelte +++ b/packages/widgets/src/lib/components/PipelineIcon/PipelineIcon.svelte @@ -43,6 +43,7 @@ import IconImageTo3D from "../Icons/IconImageTo3D.svelte"; import IconImageFeatureExtraction from "../Icons/IconImageFeatureExtraction.svelte"; import IconVideoTextToText from "../Icons/IconVideoTextToText.svelte"; + import IconAnyToAny from "../Icons/IconAnyToAny.svelte"; import IconKeypointDetection from "../Icons/IconKeypointDetection.svelte"; import type { WidgetType } from "@huggingface/tasks"; @@ -97,6 +98,7 @@ "image-to-3d": IconImageTo3D, "image-feature-extraction": IconImageFeatureExtraction, "video-text-to-text": IconVideoTextToText, + "any-to-any": IconAnyToAny, "keypoint-detection": IconKeypointDetection, }; From fea4f15af8b61b0326add609c25d59071858e081 Mon Sep 17 00:00:00 2001 From: "Eliott C." Date: Wed, 4 Sep 2024 12:25:27 +0200 Subject: [PATCH 09/15] =?UTF-8?q?=F0=9F=94=A7=20Remove=20github=20packages?= =?UTF-8?q?=20publish=20(#896)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cc @Wauplin internal convo: https://huggingface.slack.com/archives/C04PJ0H35UM/p1725444018794059 due to not being able to use finegrained tokens for the job - see https://github.com/github/roadmap/issues/558 --- .github/workflows/agents-publish.yml | 7 ++++--- .github/workflows/gguf-publish.yml | 7 ++++--- .github/workflows/hub-publish.yml | 7 ++++--- .github/workflows/inference-publish.yml | 7 ++++--- .github/workflows/jinja-publish.yml | 7 ++++--- .github/workflows/languages-publish.yml | 7 ++++--- .github/workflows/space-header-publish.yml | 7 ++++--- .github/workflows/tasks-publish.yml | 7 ++++--- .github/workflows/widgets-publish.yml | 7 ++++--- 9 files changed, 36 insertions(+), 27 deletions(-) diff --git a/.github/workflows/agents-publish.yml b/.github/workflows/agents-publish.yml index edc72455f..a23b0676d 100644 --- a/.github/workflows/agents-publish.yml +++ b/.github/workflows/agents-publish.yml @@ -61,9 +61,10 @@ jobs: with: node-version: "20" registry-url: "https://npm.pkg.github.com" - - run: pnpm publish --no-git-checks . - env: - NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # Disable for now, until github supports PATs for writing github packages (https://github.com/github/roadmap/issues/558) + # - run: pnpm publish --no-git-checks . + # env: + # NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: "Update Doc" uses: peter-evans/repository-dispatch@v2 with: diff --git a/.github/workflows/gguf-publish.yml b/.github/workflows/gguf-publish.yml index 5e5cf81f0..f4791ac5d 100644 --- a/.github/workflows/gguf-publish.yml +++ b/.github/workflows/gguf-publish.yml @@ -58,6 +58,7 @@ jobs: with: node-version: "20" registry-url: "https://npm.pkg.github.com" - - run: pnpm publish --no-git-checks . - env: - NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # Disable for now, until github supports PATs for writing github packages (https://github.com/github/roadmap/issues/558) + # - run: pnpm publish --no-git-checks . + # env: + # NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/hub-publish.yml b/.github/workflows/hub-publish.yml index 5937ad212..545a947e1 100644 --- a/.github/workflows/hub-publish.yml +++ b/.github/workflows/hub-publish.yml @@ -99,9 +99,10 @@ jobs: with: node-version: "20" registry-url: "https://npm.pkg.github.com" - - run: pnpm publish --no-git-checks . - env: - NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # Disable for now, until github supports PATs for writing github packages (https://github.com/github/roadmap/issues/558) + # - run: pnpm publish --no-git-checks . + # env: + # NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: "Update Doc" uses: peter-evans/repository-dispatch@v2 with: diff --git a/.github/workflows/inference-publish.yml b/.github/workflows/inference-publish.yml index bdbb47d7d..ee346c24c 100644 --- a/.github/workflows/inference-publish.yml +++ b/.github/workflows/inference-publish.yml @@ -99,6 +99,7 @@ jobs: with: node-version: "20" registry-url: "https://npm.pkg.github.com" - - run: pnpm publish --no-git-checks . - env: - NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # Disable for now, until github supports PATs for writing github packages (https://github.com/github/roadmap/issues/558) + # - run: pnpm publish --no-git-checks . + # env: + # NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/jinja-publish.yml b/.github/workflows/jinja-publish.yml index 47d7cad7f..dc66cf2f9 100644 --- a/.github/workflows/jinja-publish.yml +++ b/.github/workflows/jinja-publish.yml @@ -58,6 +58,7 @@ jobs: with: node-version: "20" registry-url: "https://npm.pkg.github.com" - - run: pnpm publish --no-git-checks . - env: - NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # Disable for now, until github supports PATs for writing github packages (https://github.com/github/roadmap/issues/558) + # - run: pnpm publish --no-git-checks . + # env: + # NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/languages-publish.yml b/.github/workflows/languages-publish.yml index 913687a52..b7530f383 100644 --- a/.github/workflows/languages-publish.yml +++ b/.github/workflows/languages-publish.yml @@ -58,6 +58,7 @@ jobs: with: node-version: "20" registry-url: "https://npm.pkg.github.com" - - run: pnpm publish --no-git-checks . - env: - NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # Disable for now, until github supports PATs for writing github packages (https://github.com/github/roadmap/issues/558) + # - run: pnpm publish --no-git-checks . + # env: + # NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/space-header-publish.yml b/.github/workflows/space-header-publish.yml index 05c584514..d69596576 100644 --- a/.github/workflows/space-header-publish.yml +++ b/.github/workflows/space-header-publish.yml @@ -58,9 +58,10 @@ jobs: with: node-version: "20" registry-url: "https://npm.pkg.github.com" - - run: pnpm publish --no-git-checks . - env: - NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # Disable for now, until github supports PATs for writing github packages (https://github.com/github/roadmap/issues/558) + # - run: pnpm publish --no-git-checks . + # env: + # NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: "Update Doc" uses: peter-evans/repository-dispatch@v2 with: diff --git a/.github/workflows/tasks-publish.yml b/.github/workflows/tasks-publish.yml index 0dd797dd2..0cd22a97b 100644 --- a/.github/workflows/tasks-publish.yml +++ b/.github/workflows/tasks-publish.yml @@ -58,9 +58,10 @@ jobs: with: node-version: "20" registry-url: "https://npm.pkg.github.com" - - run: pnpm publish --no-git-checks . - env: - NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # Disable for now, until github supports PATs for writing github packages (https://github.com/github/roadmap/issues/558) + # - run: pnpm publish --no-git-checks . + # env: + # NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: "Update Doc" uses: peter-evans/repository-dispatch@v2 with: diff --git a/.github/workflows/widgets-publish.yml b/.github/workflows/widgets-publish.yml index 7b431ad14..502ffebe5 100644 --- a/.github/workflows/widgets-publish.yml +++ b/.github/workflows/widgets-publish.yml @@ -170,6 +170,7 @@ jobs: with: node-version: "20" registry-url: "https://npm.pkg.github.com" - - run: pnpm publish --no-git-checks . - env: - NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # Disable for now, until github supports PATs for writing github packages (https://github.com/github/roadmap/issues/558) + # - run: pnpm publish --no-git-checks . + # env: + # NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }} From 4959181d3c43ad5deb1c2a3a05db94abfe358dbe Mon Sep 17 00:00:00 2001 From: machineuser Date: Wed, 4 Sep 2024 10:27:42 +0000 Subject: [PATCH 10/15] =?UTF-8?q?=F0=9F=94=96=20@huggingface/tasks=200.12.?= =?UTF-8?q?0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- packages/tasks/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/tasks/package.json b/packages/tasks/package.json index 25c37c4ae..8698f0d20 100644 --- a/packages/tasks/package.json +++ b/packages/tasks/package.json @@ -1,7 +1,7 @@ { "name": "@huggingface/tasks", "packageManager": "pnpm@8.10.5", - "version": "0.11.13", + "version": "0.12.0", "description": "List of ML tasks for huggingface.co/tasks", "repository": "https://github.com/huggingface/huggingface.js.git", "publishConfig": { From be261ff40896fd64aae1dbfea54152ed2d3f665e Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Wed, 4 Sep 2024 14:05:11 +0200 Subject: [PATCH 11/15] Fix WBlob test (#895) Fixes #894 --- packages/hub/src/utils/WebBlob.spec.ts | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/packages/hub/src/utils/WebBlob.spec.ts b/packages/hub/src/utils/WebBlob.spec.ts index 75687ae1f..919c8f019 100644 --- a/packages/hub/src/utils/WebBlob.spec.ts +++ b/packages/hub/src/utils/WebBlob.spec.ts @@ -1,6 +1,5 @@ import { describe, expect, it, beforeAll } from "vitest"; import { WebBlob } from "./WebBlob"; -import { base64FromBytes } from "./base64FromBytes"; describe("WebBlob", () => { const resourceUrl = new URL("https://huggingface.co/spaces/aschen/push-model-from-web/raw/main/mobilenet/model.json"); @@ -51,15 +50,14 @@ describe("WebBlob", () => { it("should lazy load a LFS file hosted on Hugging Face", async () => { const stableDiffusionUrl = - "https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/39593d5650112b4cc580433f6b0435385882d819/v1-5-pruned.safetensors"; + "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/unet/diffusion_pytorch_model.fp16.safetensors"; const url = new URL(stableDiffusionUrl); const webBlob = await WebBlob.create(url); - expect(webBlob.size).toBe(7_703_324_286); + expect(webBlob.size).toBe(5_135_149_760); expect(webBlob).toBeInstanceOf(WebBlob); expect(webBlob).toMatchObject({ url }); - expect(base64FromBytes(new Uint8Array(await webBlob.slice(6, 12).arrayBuffer()))).toBe("AAB7Il9f"); - expect(base64FromBytes(new Uint8Array(await webBlob.slice(0, 12).arrayBuffer()))).toBe("ytIDAAAAAAB7Il9f"); + expect(await webBlob.slice(10, 22).text()).toBe("__metadata__"); }); it("should create a slice on the file", async () => { From b15ff77ca4e43af2f3fade01f46221b974e67718 Mon Sep 17 00:00:00 2001 From: Eli Costa <87460497+EliMCosta@users.noreply.github.com> Date: Wed, 4 Sep 2024 11:55:14 -0300 Subject: [PATCH 12/15] Adds vLLM as Option for Local App (#693) Adds vLLM as option for "Local apps" in Hugginface --------- Co-authored-by: Julien Chaumond Co-authored-by: Bertrand CHEVRIER Co-authored-by: Bertrand Chevrier Co-authored-by: Michael Goin Co-authored-by: Pedro Cuenca --- packages/tasks/src/local-apps.ts | 77 +++++++++++++++++++++++++++++++- packages/tasks/src/model-data.ts | 4 ++ 2 files changed, 79 insertions(+), 2 deletions(-) diff --git a/packages/tasks/src/local-apps.ts b/packages/tasks/src/local-apps.ts index 0abc4430f..168ed255b 100644 --- a/packages/tasks/src/local-apps.ts +++ b/packages/tasks/src/local-apps.ts @@ -58,11 +58,30 @@ export type LocalApp = { } ); -// eslint-disable-next-line @typescript-eslint/no-unused-vars -function isGgufModel(model: ModelData) { +function isGgufModel(model: ModelData): boolean { return model.tags.includes("gguf"); } +function isAwqModel(model: ModelData): boolean { + return model.config?.quantization_config?.quant_method === "awq"; +} + +function isGptqModel(model: ModelData): boolean { + return model.config?.quantization_config?.quant_method === "gptq"; +} + +function isAqlmModel(model: ModelData): boolean { + return model.config?.quantization_config?.quant_method === "aqlm"; +} + +function isMarlinModel(model: ModelData): boolean { + return model.config?.quantization_config?.quant_method === "marlin"; +} + +function isTransformersModel(model: ModelData): boolean { + return model.tags.includes("transformers"); +} + function isLlamaCppGgufModel(model: ModelData) { return !!model.gguf?.context_length; } @@ -127,6 +146,47 @@ const snippetLocalAI = (model: ModelData, filepath?: string): LocalAppSnippet[] ]; }; +const snippetVllm = (model: ModelData): LocalAppSnippet[] => { + const runCommand = [ + "", + "# Call the server using curl:", + `curl -X POST "http://localhost:8000/v1/chat/completions" \\ `, + ` -H "Content-Type: application/json" \\ `, + ` --data '{`, + ` "model": "${model.id}"`, + ` "messages": [`, + ` {"role": "user", "content": "Hello!"}`, + ` ]`, + ` }'`, + ]; + return [ + { + title: "Install from pip", + setup: ["# Install vLLM from pip:", "pip install vllm"].join("\n"), + content: ["# Load and run the model:", `vllm serve "${model.id}"`, ...runCommand].join("\n"), + }, + { + title: "Use Docker images", + setup: [ + "# Deploy with docker on Linux:", + `docker run --runtime nvidia --gpus all \\`, + ` --name my_vllm_container \\`, + ` -v ~/.cache/huggingface:/root/.cache/huggingface \\`, + ` --env "HUGGING_FACE_HUB_TOKEN=" \\`, + ` -p 8000:8000 \\`, + ` --ipc=host \\`, + ` vllm/vllm-openai:latest \\`, + ` --model ${model.id}`, + ].join("\n"), + content: [ + "# Load and run the model:", + `docker exec -it my_vllm_container bash -c "vllm serve ${model.id}"`, + ...runCommand, + ].join("\n"), + }, + ]; +}; + /** * Add your new local app here. * @@ -146,6 +206,19 @@ export const LOCAL_APPS = { displayOnModelPage: isLlamaCppGgufModel, snippet: snippetLlamacpp, }, + vllm: { + prettyLabel: "vLLM", + docsUrl: "https://docs.vllm.ai", + mainTask: "text-generation", + displayOnModelPage: (model: ModelData) => + isAwqModel(model) || + isGptqModel(model) || + isAqlmModel(model) || + isMarlinModel(model) || + isGgufModel(model) || + isTransformersModel(model), + snippet: snippetVllm, + }, lmstudio: { prettyLabel: "LM Studio", docsUrl: "https://lmstudio.ai", diff --git a/packages/tasks/src/model-data.ts b/packages/tasks/src/model-data.ts index fa23a9d20..53d66bfe1 100644 --- a/packages/tasks/src/model-data.ts +++ b/packages/tasks/src/model-data.ts @@ -38,6 +38,10 @@ export interface ModelData { bits?: number; load_in_4bit?: boolean; load_in_8bit?: boolean; + /** + * awq, gptq, aqlm, marlin, โ€ฆ Used by vLLM + */ + quant_method?: string; }; tokenizer_config?: TokenizerConfig; adapter_transformers?: { From 148e4135612898948f136b9da581f946e4a873b5 Mon Sep 17 00:00:00 2001 From: machineuser Date: Wed, 4 Sep 2024 14:58:29 +0000 Subject: [PATCH 13/15] =?UTF-8?q?=F0=9F=94=96=20@huggingface/tasks=200.12.?= =?UTF-8?q?1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- packages/tasks/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/tasks/package.json b/packages/tasks/package.json index 8698f0d20..9ea308ed4 100644 --- a/packages/tasks/package.json +++ b/packages/tasks/package.json @@ -1,7 +1,7 @@ { "name": "@huggingface/tasks", "packageManager": "pnpm@8.10.5", - "version": "0.12.0", + "version": "0.12.1", "description": "List of ML tasks for huggingface.co/tasks", "repository": "https://github.com/huggingface/huggingface.js.git", "publishConfig": { From a0874168ff84cab8456e417bbec100956b50b25d Mon Sep 17 00:00:00 2001 From: Merve Noyan Date: Thu, 5 Sep 2024 13:46:31 +0300 Subject: [PATCH 14/15] Tasks: update widget models, model recommendations (#876) Updated widget models and model recommendations and fixed former canonical model and dataset ids. --------- Co-authored-by: Merve Noyan Co-authored-by: Pedro Cuenca --- .../src/tasks/audio-classification/data.ts | 12 ++++++---- .../tasks/src/tasks/audio-to-audio/data.ts | 6 ++++- .../automatic-speech-recognition/data.ts | 8 +++++-- .../tasks/document-question-answering/data.ts | 10 +++++--- packages/tasks/src/tasks/fill-mask/data.ts | 6 ++--- .../src/tasks/image-segmentation/data.ts | 2 +- .../tasks/src/tasks/image-to-image/data.ts | 2 +- .../tasks/src/tasks/image-to-text/data.ts | 2 +- .../src/tasks/question-answering/data.ts | 6 ++++- .../src/tasks/sentence-similarity/data.ts | 6 ++--- .../tasks/src/tasks/summarization/data.ts | 4 ++-- .../src/tasks/text-classification/data.ts | 24 ++++++++++++++----- .../tasks/src/tasks/text-generation/data.ts | 6 ++--- .../tasks/src/tasks/text-to-image/data.ts | 2 +- .../tasks/src/tasks/text-to-speech/data.ts | 10 +++++--- .../src/tasks/token-classification/data.ts | 14 ++++++++--- packages/tasks/src/tasks/translation/data.ts | 17 ++++++------- .../src/tasks/video-classification/data.ts | 6 ++--- .../tasks/visual-question-answering/data.ts | 4 ++-- .../tasks/zero-shot-classification/data.ts | 12 ++++++---- .../zero-shot-image-classification/data.ts | 4 ++-- 21 files changed, 106 insertions(+), 57 deletions(-) diff --git a/packages/tasks/src/tasks/audio-classification/data.ts b/packages/tasks/src/tasks/audio-classification/data.ts index e58bfb677..be123d341 100644 --- a/packages/tasks/src/tasks/audio-classification/data.ts +++ b/packages/tasks/src/tasks/audio-classification/data.ts @@ -4,7 +4,11 @@ const taskData: TaskDataCustom = { datasets: [ { description: "A benchmark of 10 different audio tasks.", - id: "superb", + id: "s3prl/superb", + }, + { + description: "A dataset of YouTube clips and their sound categories.", + id: "agkphysics/AudioSet", }, ], demo: { @@ -50,11 +54,11 @@ const taskData: TaskDataCustom = { ], models: [ { - description: "An easy-to-use model for Command Recognition.", + description: "An easy-to-use model for command recognition.", id: "speechbrain/google_speech_command_xvector", }, { - description: "An Emotion Recognition model.", + description: "An emotion recognition model.", id: "ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition", }, { @@ -70,7 +74,7 @@ const taskData: TaskDataCustom = { ], summary: "Audio classification is the task of assigning a label or class to a given audio. It can be used for recognizing which command a user is giving or the emotion of a statement, as well as identifying a speaker.", - widgetModels: ["facebook/mms-lid-126"], + widgetModels: ["MIT/ast-finetuned-audioset-10-10-0.4593"], youtubeId: "KWwzcmG98Ds", }; diff --git a/packages/tasks/src/tasks/audio-to-audio/data.ts b/packages/tasks/src/tasks/audio-to-audio/data.ts index 373807a09..9d92983b2 100644 --- a/packages/tasks/src/tasks/audio-to-audio/data.ts +++ b/packages/tasks/src/tasks/audio-to-audio/data.ts @@ -44,7 +44,11 @@ const taskData: TaskDataCustom = { }, { description: "A speech enhancement model.", - id: "speechbrain/metricgan-plus-voicebank", + id: "ResembleAI/resemble-enhance", + }, + { + description: "A model that can change the voice in a speech recording.", + id: "microsoft/speecht5_vc", }, ], spaces: [ diff --git a/packages/tasks/src/tasks/automatic-speech-recognition/data.ts b/packages/tasks/src/tasks/automatic-speech-recognition/data.ts index e116a69e7..89078ce71 100644 --- a/packages/tasks/src/tasks/automatic-speech-recognition/data.ts +++ b/packages/tasks/src/tasks/automatic-speech-recognition/data.ts @@ -7,8 +7,8 @@ const taskData: TaskDataCustom = { id: "mozilla-foundation/common_voice_17_0", }, { - description: "An English dataset with 1,000 hours of data.", - id: "librispeech_asr", + description: "A dataset with 44.6k hours of English speaker data and 6k hours of other language speakers.", + id: "parler-tts/mls_eng", }, { description: "A multi-lingual audio dataset with 370K hours of audio.", @@ -54,6 +54,10 @@ const taskData: TaskDataCustom = { description: "An end-to-end model that performs ASR and Speech Translation by MetaAI.", id: "facebook/seamless-m4t-v2-large", }, + { + description: "Powerful speaker diarization model.", + id: "pyannote/speaker-diarization-3.1", + }, ], spaces: [ { diff --git a/packages/tasks/src/tasks/document-question-answering/data.ts b/packages/tasks/src/tasks/document-question-answering/data.ts index e966b3925..f36ed212c 100644 --- a/packages/tasks/src/tasks/document-question-answering/data.ts +++ b/packages/tasks/src/tasks/document-question-answering/data.ts @@ -46,11 +46,15 @@ const taskData: TaskDataCustom = { ], models: [ { - description: "A LayoutLM model for the document QA task, fine-tuned on DocVQA and SQuAD2.0.", + description: "A robust document question answering model.", id: "impira/layoutlm-document-qa", }, { - description: "A special model for OCR-free Document QA task.", + description: "A document question answering model specialized in invoices.", + id: "impira/layoutlm-invoices", + }, + { + description: "A special model for OCR-free document question answering.", id: "microsoft/udop-large", }, { @@ -74,7 +78,7 @@ const taskData: TaskDataCustom = { ], summary: "Document Question Answering (also known as Document Visual Question Answering) is the task of answering questions on document images. Document question answering models take a (document, question) pair as input and return an answer in natural language. Models usually rely on multi-modal features, combining text, position of words (bounding-boxes) and image.", - widgetModels: ["impira/layoutlm-document-qa"], + widgetModels: ["impira/layoutlm-invoices"], youtubeId: "", }; diff --git a/packages/tasks/src/tasks/fill-mask/data.ts b/packages/tasks/src/tasks/fill-mask/data.ts index 45d5d53b9..55550f042 100644 --- a/packages/tasks/src/tasks/fill-mask/data.ts +++ b/packages/tasks/src/tasks/fill-mask/data.ts @@ -61,12 +61,12 @@ const taskData: TaskDataCustom = { ], models: [ { - description: "A faster and smaller model than the famous BERT model.", - id: "distilbert-base-uncased", + description: "The famous BERT model.", + id: "google-bert/bert-base-uncased", }, { description: "A multilingual model trained on 100 languages.", - id: "xlm-roberta-base", + id: "FacebookAI/xlm-roberta-base", }, ], spaces: [], diff --git a/packages/tasks/src/tasks/image-segmentation/data.ts b/packages/tasks/src/tasks/image-segmentation/data.ts index 8aa686f78..8648eaff0 100644 --- a/packages/tasks/src/tasks/image-segmentation/data.ts +++ b/packages/tasks/src/tasks/image-segmentation/data.ts @@ -92,7 +92,7 @@ const taskData: TaskDataCustom = { ], summary: "Image Segmentation divides an image into segments where each pixel in the image is mapped to an object. This task has multiple variants such as instance segmentation, panoptic segmentation and semantic segmentation.", - widgetModels: ["facebook/detr-resnet-50-panoptic"], + widgetModels: ["nvidia/segformer-b0-finetuned-ade-512-512"], youtubeId: "dKE8SIt9C-w", }; diff --git a/packages/tasks/src/tasks/image-to-image/data.ts b/packages/tasks/src/tasks/image-to-image/data.ts index 65200fd92..a08c19edd 100644 --- a/packages/tasks/src/tasks/image-to-image/data.ts +++ b/packages/tasks/src/tasks/image-to-image/data.ts @@ -94,7 +94,7 @@ const taskData: TaskDataCustom = { ], summary: "Image-to-image is the task of transforming an input image through a variety of possible manipulations and enhancements, such as super-resolution, image inpainting, colorization, and more.", - widgetModels: ["lllyasviel/sd-controlnet-canny"], + widgetModels: ["stabilityai/stable-diffusion-2-inpainting"], youtubeId: "", }; diff --git a/packages/tasks/src/tasks/image-to-text/data.ts b/packages/tasks/src/tasks/image-to-text/data.ts index 690149537..64f4fe5c8 100644 --- a/packages/tasks/src/tasks/image-to-text/data.ts +++ b/packages/tasks/src/tasks/image-to-text/data.ts @@ -75,7 +75,7 @@ const taskData: TaskDataCustom = { ], summary: "Image to text models output a text from a given image. Image captioning or optical character recognition can be considered as the most common applications of image to text.", - widgetModels: ["Salesforce/blip-image-captioning-base"], + widgetModels: ["Salesforce/blip-image-captioning-large"], youtubeId: "", }; diff --git a/packages/tasks/src/tasks/question-answering/data.ts b/packages/tasks/src/tasks/question-answering/data.ts index f80e138f6..ac1443adf 100644 --- a/packages/tasks/src/tasks/question-answering/data.ts +++ b/packages/tasks/src/tasks/question-answering/data.ts @@ -52,7 +52,11 @@ const taskData: TaskDataCustom = { id: "deepset/roberta-base-squad2", }, { - description: "A special model that can answer questions from tables!", + description: "Small yet robust model that can answer questions.", + id: "distilbert/distilbert-base-cased-distilled-squad", + }, + { + description: "A special model that can answer questions from tables.", id: "google/tapas-base-finetuned-wtq", }, ], diff --git a/packages/tasks/src/tasks/sentence-similarity/data.ts b/packages/tasks/src/tasks/sentence-similarity/data.ts index 6feba5779..3ef54bbd3 100644 --- a/packages/tasks/src/tasks/sentence-similarity/data.ts +++ b/packages/tasks/src/tasks/sentence-similarity/data.ts @@ -69,8 +69,8 @@ const taskData: TaskDataCustom = { id: "sentence-transformers/all-mpnet-base-v2", }, { - description: "A multilingual model trained for FAQ retrieval.", - id: "clips/mfaq", + description: "A multilingual robust sentence similarity model..", + id: "BAAI/bge-m3", }, ], spaces: [ @@ -94,7 +94,7 @@ const taskData: TaskDataCustom = { ], summary: "Sentence Similarity is the task of determining how similar two texts are. Sentence similarity models convert input texts into vectors (embeddings) that capture semantic information and calculate how close (similar) they are between them. This task is particularly useful for information retrieval and clustering/grouping.", - widgetModels: ["sentence-transformers/all-MiniLM-L6-v2"], + widgetModels: ["BAAI/bge-small-en-v1.5"], youtubeId: "VCZq5AkbNEU", }; diff --git a/packages/tasks/src/tasks/summarization/data.ts b/packages/tasks/src/tasks/summarization/data.ts index bd04453da..239a04fc4 100644 --- a/packages/tasks/src/tasks/summarization/data.ts +++ b/packages/tasks/src/tasks/summarization/data.ts @@ -46,7 +46,7 @@ const taskData: TaskDataCustom = { }, { description: "A summarization model trained on medical articles.", - id: "google/bigbird-pegasus-large-pubmed", + id: "Falconsai/medical_summarization", }, ], spaces: [ @@ -69,7 +69,7 @@ const taskData: TaskDataCustom = { ], summary: "Summarization is the task of producing a shorter version of a document while preserving its important information. Some models can extract text from the original input, while other models can generate entirely new text.", - widgetModels: ["sshleifer/distilbart-cnn-12-6"], + widgetModels: ["facebook/bart-large-cnn"], youtubeId: "yHnr5Dk2zCI", }; diff --git a/packages/tasks/src/tasks/text-classification/data.ts b/packages/tasks/src/tasks/text-classification/data.ts index 068785e18..b6a26dcc4 100644 --- a/packages/tasks/src/tasks/text-classification/data.ts +++ b/packages/tasks/src/tasks/text-classification/data.ts @@ -4,11 +4,11 @@ const taskData: TaskDataCustom = { datasets: [ { description: "A widely used dataset used to benchmark multiple variants of text classification.", - id: "glue", + id: "nyu-mll/glue", }, { description: "A text classification dataset used to benchmark natural language inference models", - id: "snli", + id: "stanfordnlp/snli", }, ], demo: { @@ -61,11 +61,23 @@ const taskData: TaskDataCustom = { models: [ { description: "A robust model trained for sentiment analysis.", - id: "distilbert-base-uncased-finetuned-sst-2-english", + id: "distilbert/distilbert-base-uncased-finetuned-sst-2-english", }, { - description: "Multi-genre natural language inference model.", - id: "roberta-large-mnli", + description: "A sentiment analysis model specialized in financial sentiment.", + id: "ProsusAI/finbert", + }, + { + description: "A sentiment analysis model specialized in analyzing tweets.", + id: "cardiffnlp/twitter-roberta-base-sentiment-latest", + }, + { + description: "A model that can classify languages.", + id: "papluca/xlm-roberta-base-language-detection", + }, + { + description: "A model that can classify text generation attacks.", + id: "meta-llama/Prompt-Guard-86M", }, ], spaces: [ @@ -84,7 +96,7 @@ const taskData: TaskDataCustom = { ], summary: "Text Classification is the task of assigning a label or class to a given text. Some use cases are sentiment analysis, natural language inference, and assessing grammatical correctness.", - widgetModels: ["distilbert-base-uncased-finetuned-sst-2-english"], + widgetModels: ["distilbert/distilbert-base-uncased-finetuned-sst-2-english"], youtubeId: "leNG9fN9FQU", }; diff --git a/packages/tasks/src/tasks/text-generation/data.ts b/packages/tasks/src/tasks/text-generation/data.ts index 807c82943..26f085ad7 100644 --- a/packages/tasks/src/tasks/text-generation/data.ts +++ b/packages/tasks/src/tasks/text-generation/data.ts @@ -97,8 +97,8 @@ const taskData: TaskDataCustom = { id: "HuggingFaceH4/zephyr-chat", }, { - description: "An text generation application that combines OpenAI and Hugging Face models.", - id: "microsoft/HuggingGPT", + description: "A leaderboard that ranks text generation models based on blind votes from people.", + id: "lmsys/chatbot-arena-leaderboard", }, { description: "An chatbot to converse with a very powerful text generation model.", @@ -107,7 +107,7 @@ const taskData: TaskDataCustom = { ], summary: "Generating text is the task of generating new text given another text. These models can, for example, fill in incomplete text or paraphrase.", - widgetModels: ["HuggingFaceH4/zephyr-7b-beta"], + widgetModels: ["mistralai/Mistral-Nemo-Instruct-2407"], youtubeId: "e9gNEAlsOvU", }; diff --git a/packages/tasks/src/tasks/text-to-image/data.ts b/packages/tasks/src/tasks/text-to-image/data.ts index 7656c6eda..527f5cce0 100644 --- a/packages/tasks/src/tasks/text-to-image/data.ts +++ b/packages/tasks/src/tasks/text-to-image/data.ts @@ -93,7 +93,7 @@ const taskData: TaskDataCustom = { ], summary: "Generates images from input text. These models can be used to generate and modify images based on text prompts.", - widgetModels: ["CompVis/stable-diffusion-v1-4"], + widgetModels: ["black-forest-labs/FLUX.1-dev"], youtubeId: "", }; diff --git a/packages/tasks/src/tasks/text-to-speech/data.ts b/packages/tasks/src/tasks/text-to-speech/data.ts index d4e5c886f..22bd1202d 100644 --- a/packages/tasks/src/tasks/text-to-speech/data.ts +++ b/packages/tasks/src/tasks/text-to-speech/data.ts @@ -9,7 +9,7 @@ const taskData: TaskDataCustom = { }, { description: "Multi-speaker English dataset.", - id: "LibriTTS", + id: "mythicinfinity/libritts_r", }, ], demo: { @@ -36,11 +36,15 @@ const taskData: TaskDataCustom = { models: [ { description: "A powerful TTS model.", - id: "suno/bark", + id: "parler-tts/parler-tts-large-v1", }, { description: "A massively multi-lingual TTS model.", - id: "facebook/mms-tts", + id: "coqui/XTTS-v2", + }, + { + description: "Robust TTS model.", + id: "metavoiceio/metavoice-1B-v0.1", }, { description: "A prompt based, powerful TTS model.", diff --git a/packages/tasks/src/tasks/token-classification/data.ts b/packages/tasks/src/tasks/token-classification/data.ts index d4510819f..7a1d1abed 100644 --- a/packages/tasks/src/tasks/token-classification/data.ts +++ b/packages/tasks/src/tasks/token-classification/data.ts @@ -4,12 +4,12 @@ const taskData: TaskDataCustom = { datasets: [ { description: "A widely used dataset useful to benchmark named entity recognition models.", - id: "conll2003", + id: "eriktks/conll2003", }, { description: "A multilingual dataset of Wikipedia articles annotated for named entity recognition in over 150 different languages.", - id: "wikiann", + id: "unimelb-nlp/wikiann", }, ], demo: { @@ -63,6 +63,14 @@ const taskData: TaskDataCustom = { "A robust performance model to identify people, locations, organizations and names of miscellaneous entities.", id: "dslim/bert-base-NER", }, + { + description: "A strong model to identify people, locations, organizations and names in multiple languages.", + id: "FacebookAI/xlm-roberta-large-finetuned-conll03-english", + }, + { + description: "A token classification model specialized on medical entity recognition.", + id: "blaze999/Medical-NER", + }, { description: "Flair models are typically the state of the art in named entity recognition tasks.", id: "flair/ner-english", @@ -77,7 +85,7 @@ const taskData: TaskDataCustom = { ], summary: "Token classification is a natural language understanding task in which a label is assigned to some tokens in a text. Some popular token classification subtasks are Named Entity Recognition (NER) and Part-of-Speech (PoS) tagging. NER models could be trained to identify specific entities in a text, such as dates, individuals and places; and PoS tagging would identify, for example, which words in a text are verbs, nouns, and punctuation marks.", - widgetModels: ["dslim/bert-base-NER"], + widgetModels: ["FacebookAI/xlm-roberta-large-finetuned-conll03-english"], youtubeId: "wVHdVlPScxA", }; diff --git a/packages/tasks/src/tasks/translation/data.ts b/packages/tasks/src/tasks/translation/data.ts index 0edfab7b8..9707734d9 100644 --- a/packages/tasks/src/tasks/translation/data.ts +++ b/packages/tasks/src/tasks/translation/data.ts @@ -5,12 +5,12 @@ const taskData: TaskDataCustom = { datasets: [ { description: "A dataset of copyright-free books translated into 16 different languages.", - id: "opus_books", + id: "Helsinki-NLP/opus_books", }, { description: "An example of translation between programming languages. This dataset consists of functions in Java and C#.", - id: "code_x_glue_cc_code_to_code_trans", + id: "google/code_x_glue_cc_code_to_code_trans", }, ], demo: { @@ -42,13 +42,14 @@ const taskData: TaskDataCustom = { ], models: [ { - description: "A model that translates from English to French.", - id: "Helsinki-NLP/opus-mt-en-fr", + description: + "Very powerful model that can translate many languages between each other, especially low-resource languages.", + id: "facebook/nllb-200-1.3B", }, { description: "A general-purpose Transformer that can be used to translate from English to German, French, or Romanian.", - id: "t5-base", + id: "google-t5/t5-base", }, ], spaces: [ @@ -57,12 +58,12 @@ const taskData: TaskDataCustom = { id: "Iker/Translate-100-languages", }, { - description: "An application that can translate between English, Spanish and Hindi.", - id: "EuroPython2022/Translate-with-Bloom", + description: "An application that can translate between many languages.", + id: "Geonmo/nllb-translation-demo", }, ], summary: "Translation is the task of converting text from one language to another.", - widgetModels: ["t5-small"], + widgetModels: ["facebook/mbart-large-50-many-to-many-mmt"], youtubeId: "1JvfrvZgi6c", }; diff --git a/packages/tasks/src/tasks/video-classification/data.ts b/packages/tasks/src/tasks/video-classification/data.ts index f02d2bbed..47d2c2d75 100644 --- a/packages/tasks/src/tasks/video-classification/data.ts +++ b/packages/tasks/src/tasks/video-classification/data.ts @@ -56,12 +56,12 @@ const taskData: TaskDataCustom = { models: [ { // TO DO: write description - description: "Strong Video Classification model trained on the Kinects 400 dataset.", - id: "MCG-NJU/videomae-base-finetuned-kinetics", + description: "Strong Video Classification model trained on the Kinetics 400 dataset.", + id: "google/vivit-b-16x2-kinetics400", }, { // TO DO: write description - description: "Strong Video Classification model trained on the Kinects 400 dataset.", + description: "Strong Video Classification model trained on the Kinetics 400 dataset.", id: "microsoft/xclip-base-patch32", }, ], diff --git a/packages/tasks/src/tasks/visual-question-answering/data.ts b/packages/tasks/src/tasks/visual-question-answering/data.ts index 2d94edd42..f31334330 100644 --- a/packages/tasks/src/tasks/visual-question-answering/data.ts +++ b/packages/tasks/src/tasks/visual-question-answering/data.ts @@ -8,7 +8,7 @@ const taskData: TaskDataCustom = { }, { description: "A dataset to benchmark visual reasoning based on text in images.", - id: "textvqa", + id: "facebook/textvqa", }, ], demo: { @@ -63,7 +63,7 @@ const taskData: TaskDataCustom = { { description: "A visual question answering model trained for mathematical reasoning and chart derendering from images.", - id: "google/matcha-base ", + id: "google/matcha-base", }, { description: "A strong visual question answering that answers questions from book covers.", diff --git a/packages/tasks/src/tasks/zero-shot-classification/data.ts b/packages/tasks/src/tasks/zero-shot-classification/data.ts index 0def51240..1ecc51c95 100644 --- a/packages/tasks/src/tasks/zero-shot-classification/data.ts +++ b/packages/tasks/src/tasks/zero-shot-classification/data.ts @@ -4,17 +4,17 @@ const taskData: TaskDataCustom = { datasets: [ { description: "A widely used dataset used to benchmark multiple variants of text classification.", - id: "glue", + id: "nyu-mll/glue", }, { description: "The Multi-Genre Natural Language Inference (MultiNLI) corpus is a crowd-sourced collection of 433k sentence pairs annotated with textual entailment information.", - id: "MultiNLI", + id: "nyu-mll/multi_nli", }, { description: "FEVER is a publicly available dataset for fact extraction and verification against textual sources.", - id: "FEVER", + id: "fever/fever", }, ], demo: { @@ -53,9 +53,13 @@ const taskData: TaskDataCustom = { metrics: [], models: [ { - description: "Powerful zero-shot text classification model", + description: "Powerful zero-shot text classification model.", id: "facebook/bart-large-mnli", }, + { + description: "Powerful zero-shot multilingual text classification model that can accomplish multiple tasks.", + id: "MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7", + }, ], spaces: [], summary: diff --git a/packages/tasks/src/tasks/zero-shot-image-classification/data.ts b/packages/tasks/src/tasks/zero-shot-image-classification/data.ts index bc7b6aab3..5be19aedf 100644 --- a/packages/tasks/src/tasks/zero-shot-image-classification/data.ts +++ b/packages/tasks/src/tasks/zero-shot-image-classification/data.ts @@ -53,7 +53,7 @@ const taskData: TaskDataCustom = { }, { description: "Strong zero-shot image classification model.", - id: "google/siglip-base-patch16-224", + id: "google/siglip-so400m-patch14-224", }, { description: "Small yet powerful zero-shot image classification model that can run on edge devices.", @@ -77,7 +77,7 @@ const taskData: TaskDataCustom = { ], summary: "Zero-shot image classification is the task of classifying previously unseen classes during training of a model.", - widgetModels: ["openai/clip-vit-large-patch14-336"], + widgetModels: ["google/siglip-so400m-patch14-224"], youtubeId: "", }; From 83a9cb62ff1ba464e44dcd7487252545b3578275 Mon Sep 17 00:00:00 2001 From: NielsRogge <48327001+NielsRogge@users.noreply.github.com> Date: Thu, 5 Sep 2024 20:13:08 +0200 Subject: [PATCH 15/15] Add YOLOv10 as library (#884) The YOLOv10 author already added `library_name: yolov10` into each of his model repos, e.g. https://huggingface.co/jameslahm/yolov10x. This PR adds a "How to use this model" button along with a code snippet. cc @jameslahm --- packages/tasks/src/model-libraries-snippets.ts | 9 +++++++++ packages/tasks/src/model-libraries.ts | 7 +++++++ 2 files changed, 16 insertions(+) diff --git a/packages/tasks/src/model-libraries-snippets.ts b/packages/tasks/src/model-libraries-snippets.ts index f9c096095..3247f2e2d 100644 --- a/packages/tasks/src/model-libraries-snippets.ts +++ b/packages/tasks/src/model-libraries-snippets.ts @@ -891,6 +891,15 @@ wavs = chat.infer(texts, ) torchaudio.save("output1.wav", torch.from_numpy(wavs[0]), 24000)`, ]; +export const yolov10 = (model: ModelData): string[] => [ + `from ultralytics import YOLOv10 + +model = YOLOv10.from_pretrained("${model.id}") +source = 'http://images.cocodataset.org/val2017/000000039769.jpg' +model.predict(source=source, save=True) +`, +]; + export const birefnet = (model: ModelData): string[] => [ `# Option 1: use with transformers diff --git a/packages/tasks/src/model-libraries.ts b/packages/tasks/src/model-libraries.ts index 55bf8d09c..2c163e3de 100644 --- a/packages/tasks/src/model-libraries.ts +++ b/packages/tasks/src/model-libraries.ts @@ -657,6 +657,13 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { docsUrl: "https://github.com/jasonppy/VoiceCraft", snippets: snippets.voicecraft, }, + yolov10: { + prettyLabel: "YOLOv10", + repoName: "yolov10", + repoUrl: "https://github.com/THU-MIG/yolov10", + docsUrl: "https://github.com/THU-MIG/yolov10", + snippets: snippets.yolov10, + }, whisperkit: { prettyLabel: "WhisperKit", repoName: "WhisperKit",