Merge branch 'main' into update-models

huggingface · Sep 4, 2024 · 6f90063 · 6f90063
2 parents 1f9803c + 6e12cfa
commit 6f90063
Show file tree

Hide file tree

Showing 29 changed files with 262 additions and 46 deletions.
diff --git a/.github/workflows/agents-publish.yml b/.github/workflows/agents-publish.yml
@@ -50,7 +50,7 @@ jobs:
           node -e "const fs = require('fs'); const package = JSON.parse(fs.readFileSync('./package.json')); package.version = '$BUMPED_VERSION'; fs.writeFileSync('./package.json', JSON.stringify(package, null, '\t') + '\n');"
           pnpm --filter doc-internal run fix-cdn-versions
           git add ../..
-          git commit -m "🔖 @hugginface/agents $BUMPED_VERSION"
+          git commit -m "🔖 @huggingface/agents $BUMPED_VERSION"
           git tag "agents-v$BUMPED_VERSION"
       - run: pnpm --filter agents... build && pnpm publish --no-git-checks .
         env:

diff --git a/.github/workflows/gguf-publish.yml b/.github/workflows/gguf-publish.yml
@@ -47,7 +47,7 @@ jobs:
           BUMPED_VERSION=$(node -p "require('semver').inc('$PACKAGE_VERSION', '${{ github.event.inputs.newversion }}')")
           # Update package.json with the new version
           node -e "const fs = require('fs'); const package = JSON.parse(fs.readFileSync('./package.json')); package.version = '$BUMPED_VERSION'; fs.writeFileSync('./package.json', JSON.stringify(package, null, '\t') + '\n');"
-          git commit . -m "🔖 @hugginface/gguf $BUMPED_VERSION"
+          git commit . -m "🔖 @huggingface/gguf $BUMPED_VERSION"
           git tag "gguf-v$BUMPED_VERSION"
       - run: pnpm publish --no-git-checks .
         env:

diff --git a/.github/workflows/hub-publish.yml b/.github/workflows/hub-publish.yml
@@ -50,7 +50,7 @@ jobs:
           node -e "const fs = require('fs'); const package = JSON.parse(fs.readFileSync('./package.json')); package.version = '$BUMPED_VERSION'; fs.writeFileSync('./package.json', JSON.stringify(package, null, '\t') + '\n');"
           pnpm --filter doc-internal run fix-cdn-versions
           git add ../..
-          git commit -m "🔖 @hugginface/hub $BUMPED_VERSION"
+          git commit -m "🔖 @huggingface/hub $BUMPED_VERSION"
           git tag "hub-v$BUMPED_VERSION"
 
       - name: Make sure that the latest version of @huggingface/tasks is consistent with the local version

diff --git a/.github/workflows/inference-publish.yml b/.github/workflows/inference-publish.yml
@@ -50,7 +50,7 @@ jobs:
           node -e "const fs = require('fs'); const package = JSON.parse(fs.readFileSync('./package.json')); package.version = '$BUMPED_VERSION'; fs.writeFileSync('./package.json', JSON.stringify(package, null, '\t') + '\n');"
           pnpm --filter doc-internal run fix-cdn-versions
           git add ../..
-          git commit -m "🔖 @hugginface/inference $BUMPED_VERSION"
+          git commit -m "🔖 @huggingface/inference $BUMPED_VERSION"
           git tag "inference-v$BUMPED_VERSION"
 
       - name: Make sure that the latest version of @huggingface/tasks is consistent with the local version

diff --git a/.github/workflows/jinja-publish.yml b/.github/workflows/jinja-publish.yml
@@ -47,7 +47,7 @@ jobs:
           BUMPED_VERSION=$(node -p "require('semver').inc('$PACKAGE_VERSION', '${{ github.event.inputs.newversion }}')")
           # Update package.json with the new version
           node -e "const fs = require('fs'); const package = JSON.parse(fs.readFileSync('./package.json')); package.version = '$BUMPED_VERSION'; fs.writeFileSync('./package.json', JSON.stringify(package, null, '\t') + '\n');"
-          git commit . -m "🔖 @hugginface/jinja $BUMPED_VERSION"
+          git commit . -m "🔖 @huggingface/jinja $BUMPED_VERSION"
           git tag "jinja-v$BUMPED_VERSION"
       - run: pnpm publish --no-git-checks .
         env:

diff --git a/.github/workflows/languages-publish.yml b/.github/workflows/languages-publish.yml
@@ -47,7 +47,7 @@ jobs:
           BUMPED_VERSION=$(node -p "require('semver').inc('$PACKAGE_VERSION', '${{ github.event.inputs.newversion }}')")
           # Update package.json with the new version
           node -e "const fs = require('fs'); const package = JSON.parse(fs.readFileSync('./package.json')); package.version = '$BUMPED_VERSION'; fs.writeFileSync('./package.json', JSON.stringify(package, null, '\t') + '\n');"
-          git commit . -m "🔖 @hugginface/languages $BUMPED_VERSION"
+          git commit . -m "🔖 @huggingface/languages $BUMPED_VERSION"
           git tag "languages-v$BUMPED_VERSION"
       - run: pnpm publish --no-git-checks .
         env:

diff --git a/.github/workflows/space-header-publish.yml b/.github/workflows/space-header-publish.yml
@@ -47,7 +47,7 @@ jobs:
           BUMPED_VERSION=$(node -p "require('semver').inc('$PACKAGE_VERSION', '${{ github.event.inputs.newversion }}')")
           # Update package.json with the new version
           node -e "const fs = require('fs'); const package = JSON.parse(fs.readFileSync('./package.json')); package.version = '$BUMPED_VERSION'; fs.writeFileSync('./package.json', JSON.stringify(package, null, '\t') + '\n');"
-          git commit . -m "🔖 @hugginface/space-header $BUMPED_VERSION"
+          git commit . -m "🔖 @huggingface/space-header $BUMPED_VERSION"
           git tag "space-header-v$BUMPED_VERSION"
       - run: pnpm publish --no-git-checks .
         env:

diff --git a/.github/workflows/tasks-publish.yml b/.github/workflows/tasks-publish.yml
@@ -47,7 +47,7 @@ jobs:
           BUMPED_VERSION=$(node -p "require('semver').inc('$PACKAGE_VERSION', '${{ github.event.inputs.newversion }}')")
           # Update package.json with the new version
           node -e "const fs = require('fs'); const package = JSON.parse(fs.readFileSync('./package.json')); package.version = '$BUMPED_VERSION'; fs.writeFileSync('./package.json', JSON.stringify(package, null, '\t') + '\n');"
-          git commit . -m "🔖 @hugginface/tasks $BUMPED_VERSION"
+          git commit . -m "🔖 @huggingface/tasks $BUMPED_VERSION"
           git tag "tasks-v$BUMPED_VERSION"
       - run: pnpm publish --no-git-checks .
         env:

diff --git a/packages/doc-internal/README.md b/packages/doc-internal/README.md
@@ -2,7 +2,7 @@
 
 This package generates `.md` files inside the [docs](../../docs) folder using [typedoc](https://typedoc.org/) and [typedoc-plugin-markdown](https://github.com/tgreyuk/typedoc-plugin-markdown).
 
-The `.md` files are generated when releasing packages. They are then published to [hugginface.co](https://huggingface.co/docs/huggingface.js/index) through the [doc-builder](https://github.com/huggingface/doc-builder)'s github action.
+The `.md` files are generated when releasing packages. They are then published to [huggingface.co](https://huggingface.co/docs/huggingface.js/index) through the [doc-builder](https://github.com/huggingface/doc-builder)'s github action.
 
 We run a few scripts in between, [fix-md-links](./fix-md-links.ts) and [update-toc](./update-toc.ts) to preprocess the files for `doc-builder`.
 

diff --git a/packages/space-header/package.json b/packages/space-header/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@huggingface/space-header",
-	"version": "1.0.3",
+	"version": "1.0.4",
 	"packageManager": "[email protected]",
 	"description": "Use the Space mini_header outside Hugging Face",
 	"repository": "https://github.com/huggingface/huggingface.js.git",

diff --git a/packages/space-header/src/header/components/content/avatar.ts b/packages/space-header/src/header/components/content/avatar.ts
@@ -1,6 +1,8 @@
-export const Avatar = (username: string): HTMLImageElement => {
+export const Avatar = (username: string, type: "user" | "org" = "user"): HTMLImageElement => {
+	const route = type === "user" ? "users" : "organizations";
+
 	const element = document.createElement("img");
-	element.src = `https://huggingface.co/api/users/${username}/avatar`;
+	element.src = `https://huggingface.co/api/${route}/${username}/avatar`;
 
 	element.style.width = "0.875rem";
 	element.style.height = "0.875rem";

diff --git a/packages/space-header/src/header/components/content/index.ts b/packages/space-header/src/header/components/content/index.ts
@@ -15,7 +15,9 @@ export const Content = (space: Space): HTMLDivElement => {
 	content.style.paddingRight = "12px";
 	content.style.height = "40px";
 
-	content.appendChild(Avatar(space.author));
+	if (space.type !== "unknown") {
+		content.appendChild(Avatar(space.author, space.type));
+	}
 	content.appendChild(Username(space.author));
 	content.appendChild(Separation());
 	content.appendChild(Namespace(space.id));

diff --git a/packages/space-header/src/index.ts b/packages/space-header/src/index.ts
@@ -3,7 +3,8 @@ import type { Options, Space, Header } from "./type";
 import { inject_fonts } from "./inject_fonts";
 
 import { create } from "./header/create";
-import { get_space } from "./get_space";
+import { check_avatar } from "./utils/check_avatar";
+import { get_space } from "./utils/get_space";
 import { inject } from "./inject";
 
 async function main(initialSpace: string | Space, options?: Options) {
@@ -27,6 +28,9 @@ async function main(initialSpace: string | Space, options?: Options) {
 		space = initialSpace;
 	}
 
+	const [user, org] = await Promise.all([check_avatar(space.author, "user"), check_avatar(space.author, "org")]);
+	space.type = user ? "user" : org ? "org" : "unknown";
+
 	const mini_header_element = create(space as Space);
 	inject(mini_header_element, options);
 

diff --git a/packages/space-header/src/type.ts b/packages/space-header/src/type.ts
@@ -2,6 +2,7 @@ export interface Space {
 	id: string;
 	likes: number;
 	author: string;
+	type?: "user" | "org" | "unknown";
 }
 
 export interface User {

diff --git a/packages/space-header/src/utils/check_avatar.ts b/packages/space-header/src/utils/check_avatar.ts
@@ -0,0 +1,10 @@
+export const check_avatar = async (username: string, type: "user" | "org" = "user"): Promise<boolean> => {
+	const route = type === "user" ? "users" : "organizations";
+
+	try {
+		const response = await fetch(`https://huggingface.co/api/${route}/${username}/avatar`);
+		return response.ok;
+	} catch (error) {
+		return false;
+	}
+};
diff --git a/packages/space-header/src/get_space.ts → packages/space-header/src/utils/get_space.ts b/packages/space-header/src/get_space.ts → packages/space-header/src/utils/get_space.ts
@@ -1,4 +1,4 @@
-import type { Space } from "./type";
+import type { Space } from "./../type";
 
 export const get_space = async (space_id: string): Promise<Space | null> => {
 	try {

diff --git a/packages/tasks/package.json b/packages/tasks/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "@huggingface/tasks",
 	"packageManager": "[email protected]",
-	"version": "0.11.12",
+	"version": "0.11.13",
 	"description": "List of ML tasks for huggingface.co/tasks",
 	"repository": "https://github.com/huggingface/huggingface.js.git",
 	"publishConfig": {

diff --git a/packages/tasks/src/model-libraries.ts b/packages/tasks/src/model-libraries.ts
@@ -446,6 +446,13 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
 		snippets: snippets.pyannote_audio,
 		filter: true,
 	},
+	"py-feat": {
+		prettyLabel: "Py-Feat",
+		repoName: "Py-Feat",
+		repoUrl: "https://github.com/cosanlab/py-feat",
+		docsUrl: "https://py-feat.org/",
+		filter: false,
+	},
 	pythae: {
 		prettyLabel: "pythae",
 		repoName: "pythae",

diff --git a/packages/tasks/src/pipelines.ts b/packages/tasks/src/pipelines.ts
@@ -656,6 +656,24 @@ export const PIPELINE_DATA = {
 		name: "Video-Text-to-Text",
 		modality: "multimodal",
 		color: "blue",
+		hideInDatasets: false,
+	},
+	"keypoint-detection": {
+		name: "Keypoint Detection",
+		subtasks: [
+			{
+				type: "pose-estimation",
+				name: "Pose Estimation",
+			},
+		],
+		modality: "cv",
+		color: "red",
+		hideInDatasets: true,
+	},
+	"any-to-any": {
+		name: "Any-to-Any",
+		modality: "multimodal",
+		color: "yellow",
 		hideInDatasets: true,
 	},
 	other: {

diff --git a/packages/tasks/src/tasks/image-to-image/about.md b/packages/tasks/src/tasks/image-to-image/about.md
@@ -1,15 +1,10 @@
-## Use Cases
-
-### Style transfer
+Image-to-image pipelines can also be used in text-to-image tasks, to provide visual guidance to the text-guided generation process.
 
-One of the most popular use cases of image-to-image is style transfer. Style transfer models can convert a normal photography into a painting in the style of a famous painter.
-
-## Task Variants
+## Use Cases
 
 ### Image inpainting
 
-Image inpainting is widely used during photography editing to remove unwanted objects, such as poles, wires, or sensor
-dust.
+Image inpainting is widely used during photography editing to remove unwanted objects, such as poles, wires, or sensor dust.
 
 ### Image colorization
 
@@ -24,18 +19,27 @@ Super-resolution models increase the resolution of an image, allowing for higher
 You can use pipelines for image-to-image in 🧨diffusers library to easily use image-to-image models. See an example for `StableDiffusionImg2ImgPipeline` below.
 
 ```python
-from PIL import Image
-from diffusers import StableDiffusionImg2ImgPipeline
+import torch
+from diffusers import AutoPipelineForImage2Image
+from diffusers.utils import make_image_grid, load_image
 
-model_id_or_path = "runwayml/stable-diffusion-v1-5"
-pipe = StableDiffusionImg2ImgPipeline.from_pretrained(model_id_or_path, torch_dtype=torch.float16)
-pipe = pipe.to(cuda)
+pipeline = AutoPipelineForImage2Image.from_pretrained(
+    "stabilityai/stable-diffusion-xl-refiner-1.0", torch_dtype=torch.float16, variant="fp16", use_safetensors=True
+)
 
-init_image = Image.open("mountains_image.jpeg").convert("RGB").resize((768, 512))
-prompt = "A fantasy landscape, trending on artstation"
+# this helps us to reduce memory usage- since SDXL is a bit heavy, this could help by
+# offloading the model to CPU w/o hurting performance.
+pipeline.enable_model_cpu_offload()
 
-images = pipe(prompt=prompt, image=init_image, strength=0.75, guidance_scale=7.5).images
-images[0].save("fantasy_landscape.png")
+# prepare image
+url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/img2img-sdxl-init.png"
+init_image = load_image(url)
+
+prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k"
+
+# pass prompt and image to pipeline
+image = pipeline(prompt, image=init_image, strength=0.5).images[0]
+make_image_grid([init_image, image], rows=1, cols=2)
 ```
 
 You can use [huggingface.js](https://github.com/huggingface/huggingface.js) to infer image-to-image models on Hugging Face Hub.
@@ -53,13 +57,53 @@ await inference.imageToImage({
 });
 ```
 
-## ControlNet
+## Uses Cases for Text Guided Image Generation
 
-Controlling the outputs of diffusion models only with a text prompt is a challenging problem. ControlNet is a neural network model that provides image-based control to diffusion models. Control images can be edges or other landmarks extracted from a source image.
+### Style Transfer
+
+One of the most popular use cases of image-to-image is style transfer. With style transfer models:
 
-Many ControlNet models were trained in our community event, JAX Diffusers sprint. You can see the full list of the ControlNet models available [here](https://huggingface.co/spaces/jax-diffusers-event/leaderboard).
+- a regular photo can be transformed into a variety of artistic styles or genres, such as a watercolor painting, a comic book illustration and more.
+- new images can be generated using a text prompt, in the style of a reference input image.
+
+See 🧨diffusers example for style transfer with `AutoPipelineForText2Image` below.
+
+```python
+from diffusers import AutoPipelineForText2Image
+from diffusers.utils import load_image
+import torch
+
+# load pipeline
+pipeline = AutoPipelineForText2Image.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16).to("cuda")
+pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
+
+# set the adapter and scales - this is a component that lets us add the style control from an image to the text-to-image model
+scale = {
+    "down": {"block_2": [0.0, 1.0]},
+    "up": {"block_0": [0.0, 1.0, 0.0]},
+}
+pipeline.set_ip_adapter_scale(scale)
+
+style_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg")
+
+generator = torch.Generator(device="cpu").manual_seed(26)
+image = pipeline(
+    prompt="a cat, masterpiece, best quality, high quality",
+    ip_adapter_image=style_image,
+    negative_prompt="text, watermark, lowres, low quality, worst quality, deformed, glitch, low contrast, noisy, saturation, blurry",
+    guidance_scale=5,
+    num_inference_steps=30,
+    generator=generator,
+).images[0]
+image
+```
+
+### ControlNet
+
+Controlling the outputs of diffusion models only with a text prompt is a challenging problem. ControlNet is a neural network model that provides image-based control to diffusion models. Control images can be edges or other landmarks extracted from a source image.
+![Examples](https://huggingface.co/datasets/optimum/documentation-images/resolve/main/neuron/models/12-sdxl-text2img-controlnet.png)
 
-## Most Used Model for the Task
+## Pix2Pix
 
 Pix2Pix is a popular model used for image-to-image translation tasks. It is based on a conditional-GAN (generative adversarial network) where instead of a noise vector a 2D image is given as input. More information about Pix2Pix can be retrieved from this [link](https://phillipi.github.io/pix2pix/) where the associated paper and the GitHub repository can be found.
 
@@ -70,8 +114,13 @@ The images below show some examples extracted from the Pix2Pix paper. This model
 ## Useful Resources
 
 - [Image-to-image guide with diffusers](https://huggingface.co/docs/diffusers/using-diffusers/img2img)
+- Image inpainting: [inpainting with 🧨diffusers](https://huggingface.co/docs/diffusers/main/en/api/pipelines/stable_diffusion/inpaint), [demo](https://huggingface.co/spaces/diffusers/stable-diffusion-xl-inpainting)
+- Colorization: [demo](https://huggingface.co/spaces/modelscope/old_photo_restoration)
+- Super resolution: [image upscaling with 🧨diffusers](https://huggingface.co/docs/diffusers/main/en/api/pipelines/stable_diffusion/upscale#super-resolution), [demo](https://huggingface.co/spaces/radames/Enhance-This-HiDiffusion-SDXL)
+- [Style transfer and layout control with diffusers 🧨](https://huggingface.co/docs/diffusers/main/en/using-diffusers/ip_adapter#style--layout-control)
 - [Train your ControlNet with diffusers 🧨](https://huggingface.co/blog/train-your-controlnet)
 - [Ultra fast ControlNet with 🧨 Diffusers](https://huggingface.co/blog/controlnet)
+- [List of ControlNets trained in the community JAX Diffusers sprint](https://huggingface.co/spaces/jax-diffusers-event/leaderboard)
 
 ## References
 

diff --git a/packages/tasks/src/tasks/image-to-image/data.ts b/packages/tasks/src/tasks/image-to-image/data.ts
@@ -93,7 +93,7 @@ const taskData: TaskDataCustom = {
 		},
 	],
 	summary:
-		"Image-to-image is the task of transforming a source image to match the characteristics of a target image or a target image domain. Any image manipulation and enhancement is possible with image to image models.",
+		"Image-to-image is the task of transforming an input image through a variety of possible manipulations and enhancements, such as super-resolution, image inpainting, colorization, and more.",
 	widgetModels: ["stabilityai/stable-diffusion-2-inpainting"],
 	youtubeId: "",
 };

diff --git a/packages/tasks/src/tasks/index.ts b/packages/tasks/src/tasks/index.ts
@@ -126,6 +126,7 @@ export const TASKS_MODEL_LIBRARIES: Record<PipelineType, ModelLibraryKey[]> = {
 	"image-to-image": ["diffusers", "transformers", "transformers.js"],
 	"image-to-text": ["transformers", "transformers.js"],
 	"image-to-video": ["diffusers"],
+	"keypoint-detection": ["transformers"],
 	"video-classification": ["transformers"],
 	"mask-generation": ["transformers"],
 	"multiple-choice": ["transformers"],
@@ -169,6 +170,7 @@ export const TASKS_MODEL_LIBRARIES: Record<PipelineType, ModelLibraryKey[]> = {
 	"zero-shot-object-detection": ["transformers", "transformers.js"],
 	"text-to-3d": ["diffusers"],
 	"image-to-3d": ["diffusers"],
+	"any-to-any": ["transformers"],
 };
 
 /**
@@ -190,6 +192,7 @@ function getData(type: PipelineType, partialTaskData: TaskDataCustom = placehold
 // Tasks that call getData() without the second argument will
 // have a "placeholder" page.
 export const TASKS_DATA: Record<PipelineType, TaskData | undefined> = {
+	"any-to-any": getData("any-to-any", placeholder),
 	"audio-classification": getData("audio-classification", audioClassification),
 	"audio-to-audio": getData("audio-to-audio", audioToAudio),
 	"automatic-speech-recognition": getData("automatic-speech-recognition", automaticSpeechRecognition),
@@ -205,6 +208,7 @@ export const TASKS_DATA: Record<PipelineType, TaskData | undefined> = {
 	"image-text-to-text": getData("image-text-to-text", imageTextToText),
 	"image-to-text": getData("image-to-text", imageToText),
 	"image-to-video": undefined,
+	"keypoint-detection": getData("keypoint-detection", placeholder),
 	"mask-generation": getData("mask-generation", maskGeneration),
 	"multiple-choice": undefined,
 	"object-detection": getData("object-detection", objectDetection),