From 80cd45661ced1df0c1a19d5ee20e96b850c4021d Mon Sep 17 00:00:00 2001 From: "Gilad S." <7817232+giladgd@users.noreply.github.com> Date: Tue, 27 Aug 2024 14:22:10 +0300 Subject: [PATCH 01/19] :bug: Jinja - treat `none` as `null` instead of a variable name (#846) I encountered the issue where when a Jinja template sets a variable to `none`, it treats it as a variable name and sets its value to `undefined`, and then checking that the variable value is not `none` fails. Given this template: ``` {%- if not null_val is defined -%} {%- set null_val = none -%} {%- endif -%} {%- if null_val is not none -%} {{- 'fail' -}} {%- else -%} {{- 'pass' -}} {%- endif -%} ``` The current code will set `null_val` to `undefined`, then `null_val is not none` will be interpreted as `true` (since `undefined !== null`), and thus it'll render "fail". This PR fixes that by interpreting a variable named `none` as a `NullValue`, since `none` should be a reserved keyword. --------- Co-authored-by: Joshua Lochner --- packages/jinja/src/ast.ts | 7 ++++ packages/jinja/src/lexer.ts | 4 +++ packages/jinja/src/parser.ts | 6 ++++ packages/jinja/src/runtime.ts | 4 +++ packages/jinja/test/templates.test.js | 50 ++++++++++++++++++++++++++- 5 files changed, 70 insertions(+), 1 deletion(-) diff --git a/packages/jinja/src/ast.ts b/packages/jinja/src/ast.ts index 0f7e949ce..4f08a29aa 100644 --- a/packages/jinja/src/ast.ts +++ b/packages/jinja/src/ast.ts @@ -146,6 +146,13 @@ export class BooleanLiteral extends Literal { override type = "BooleanLiteral"; } +/** + * Represents null (none) in the template. + */ +export class NullLiteral extends Literal { + override type = "NullLiteral"; +} + /** * Represents an array literal in the template. */ diff --git a/packages/jinja/src/lexer.ts b/packages/jinja/src/lexer.ts index fd8ea58c1..6dfbecdbd 100644 --- a/packages/jinja/src/lexer.ts +++ b/packages/jinja/src/lexer.ts @@ -6,6 +6,7 @@ export const TOKEN_TYPES = Object.freeze({ NumericLiteral: "NumericLiteral", // e.g., 123 BooleanLiteral: "BooleanLiteral", // true or false + NullLiteral: "NullLiteral", // none StringLiteral: "StringLiteral", // 'string' Identifier: "Identifier", // Variables, functions, etc. Equals: "Equals", // = @@ -73,6 +74,7 @@ const KEYWORDS = Object.freeze({ // Literals true: TOKEN_TYPES.BooleanLiteral, false: TOKEN_TYPES.BooleanLiteral, + none: TOKEN_TYPES.NullLiteral, // NOTE: According to the Jinja docs: The special constants true, false, and none are indeed lowercase. // Because that caused confusion in the past, (True used to expand to an undefined variable that was considered false), @@ -80,6 +82,7 @@ const KEYWORDS = Object.freeze({ // you should use the lowercase versions. True: TOKEN_TYPES.BooleanLiteral, False: TOKEN_TYPES.BooleanLiteral, + None: TOKEN_TYPES.NullLiteral, }); /** @@ -271,6 +274,7 @@ export function tokenize(source: string, options: PreprocessOptions = {}): Token case TOKEN_TYPES.Identifier: case TOKEN_TYPES.NumericLiteral: case TOKEN_TYPES.BooleanLiteral: + case TOKEN_TYPES.NullLiteral: case TOKEN_TYPES.StringLiteral: case TOKEN_TYPES.CloseParen: case TOKEN_TYPES.CloseSquareBracket: diff --git a/packages/jinja/src/parser.ts b/packages/jinja/src/parser.ts index 37891c1b6..e99c1e6c1 100644 --- a/packages/jinja/src/parser.ts +++ b/packages/jinja/src/parser.ts @@ -12,6 +12,7 @@ import { NumericLiteral, StringLiteral, BooleanLiteral, + NullLiteral, ArrayLiteral, ObjectLiteral, BinaryExpression, @@ -486,6 +487,8 @@ export function parse(tokens: Token[]): Program { if (filter instanceof BooleanLiteral) { // Special case: treat boolean literals as identifiers filter = new Identifier(filter.value.toString()); + } else if (filter instanceof NullLiteral) { + filter = new Identifier("none"); } if (!(filter instanceof Identifier)) { throw new SyntaxError(`Expected identifier for the test`); @@ -527,6 +530,9 @@ export function parse(tokens: Token[]): Program { case TOKEN_TYPES.BooleanLiteral: ++current; return new BooleanLiteral(token.value.toLowerCase() === "true"); + case TOKEN_TYPES.NullLiteral: + ++current; + return new NullLiteral(null); case TOKEN_TYPES.Identifier: ++current; return new Identifier(token.value); diff --git a/packages/jinja/src/runtime.ts b/packages/jinja/src/runtime.ts index 7f2a1e5ea..2dfe9a2a3 100644 --- a/packages/jinja/src/runtime.ts +++ b/packages/jinja/src/runtime.ts @@ -2,6 +2,7 @@ import type { NumericLiteral, StringLiteral, BooleanLiteral, + NullLiteral, ArrayLiteral, Statement, Program, @@ -257,6 +258,7 @@ export class Environment { ], ["false", (operand) => operand.type === "BooleanValue" && !(operand as BooleanValue).value], ["true", (operand) => operand.type === "BooleanValue" && (operand as BooleanValue).value], + ["none", (operand) => operand.type === "NullValue"], ["string", (operand) => operand.type === "StringValue"], ["number", (operand) => operand.type === "NumericValue"], ["integer", (operand) => operand.type === "NumericValue" && Number.isInteger((operand as NumericValue).value)], @@ -1039,6 +1041,8 @@ export class Interpreter { return new StringValue((statement as StringLiteral).value); case "BooleanLiteral": return new BooleanValue((statement as BooleanLiteral).value); + case "NullLiteral": + return new NullValue((statement as NullLiteral).value); case "ArrayLiteral": return new ArrayValue((statement as ArrayLiteral).value.map((x) => this.evaluate(x, environment))); case "TupleLiteral": diff --git a/packages/jinja/test/templates.test.js b/packages/jinja/test/templates.test.js index 4b44c9675..afc69555b 100644 --- a/packages/jinja/test/templates.test.js +++ b/packages/jinja/test/templates.test.js @@ -124,6 +124,9 @@ const TEST_STRINGS = { UNDEFINED_VARIABLES: `{{ undefined_variable }}`, UNDEFINED_ACCESS: `{{ object.undefined_attribute }}`, + // Null + NULL_VARIABLE: `{% if not null_val is defined %}{% set null_val = none %}{% endif %}{% if null_val is not none %}{{ 'fail' }}{% else %}{{ 'pass' }}{% endif %}`, + // Ternary operator TERNARY_OPERATOR: `|{{ 'a' if true else 'b' }}|{{ 'a' if false else 'b' }}|{{ 'a' if 1 + 1 == 2 else 'b' }}|{{ 'a' if 1 + 1 == 3 or 1 * 2 == 3 else 'b' }}|`, @@ -2210,7 +2213,7 @@ const TEST_PARSED = { { value: "unknown", type: "StringLiteral" }, { value: ")", type: "CloseParen" }, { value: "is", type: "Is" }, - { value: "none", type: "Identifier" }, + { value: "none", type: "NullLiteral" }, { value: "}}", type: "CloseExpression" }, { value: "|", type: "Text" }, { value: "{{", type: "OpenExpression" }, @@ -2355,6 +2358,45 @@ const TEST_PARSED = { { value: "}}", type: "CloseExpression" }, ], + // Null + NULL_VARIABLE: [ + { value: "{%", type: "OpenStatement" }, + { value: "if", type: "If" }, + { value: "not", type: "UnaryOperator" }, + { value: "null_val", type: "Identifier" }, + { value: "is", type: "Is" }, + { value: "defined", type: "Identifier" }, + { value: "%}", type: "CloseStatement" }, + { value: "{%", type: "OpenStatement" }, + { value: "set", type: "Set" }, + { value: "null_val", type: "Identifier" }, + { value: "=", type: "Equals" }, + { value: "none", type: "NullLiteral" }, + { value: "%}", type: "CloseStatement" }, + { value: "{%", type: "OpenStatement" }, + { value: "endif", type: "EndIf" }, + { value: "%}", type: "CloseStatement" }, + { value: "{%", type: "OpenStatement" }, + { value: "if", type: "If" }, + { value: "null_val", type: "Identifier" }, + { value: "is", type: "Is" }, + { value: "not", type: "UnaryOperator" }, + { value: "none", type: "NullLiteral" }, + { value: "%}", type: "CloseStatement" }, + { value: "{{", type: "OpenExpression" }, + { value: "fail", type: "StringLiteral" }, + { value: "}}", type: "CloseExpression" }, + { value: "{%", type: "OpenStatement" }, + { value: "else", type: "Else" }, + { value: "%}", type: "CloseStatement" }, + { value: "{{", type: "OpenExpression" }, + { value: "pass", type: "StringLiteral" }, + { value: "}}", type: "CloseExpression" }, + { value: "{%", type: "OpenStatement" }, + { value: "endif", type: "EndIf" }, + { value: "%}", type: "CloseStatement" }, + ], + // Ternary operator TERNARY_OPERATOR: [ { value: "|", type: "Text" }, @@ -2894,6 +2936,9 @@ const TEST_CONTEXT = { UNDEFINED_VARIABLES: {}, UNDEFINED_ACCESS: { object: {} }, + // Null + NULL_VARIABLE: { a: null }, + // Ternary operator TERNARY_OPERATOR: {}, @@ -3037,6 +3082,9 @@ const EXPECTED_OUTPUTS = { UNDEFINED_VARIABLES: ``, UNDEFINED_ACCESS: ``, + // Null + NULL_VARIABLE: `pass`, + // Ternary operator TERNARY_OPERATOR: `|a|b|a|b|`, From 58e667c1a1f94ee855d7cdae107caccb60f27e0a Mon Sep 17 00:00:00 2001 From: Riceball LEE Date: Tue, 27 Aug 2024 21:43:49 +0800 Subject: [PATCH 02/19] feat(jinja): add string rstrip and lstrip methods (#854) Co-authored-by: Joshua Lochner --- packages/jinja/src/runtime.ts | 12 ++++++++++ packages/jinja/test/templates.test.js | 32 +++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/packages/jinja/src/runtime.ts b/packages/jinja/src/runtime.ts index 2dfe9a2a3..a35221c0a 100644 --- a/packages/jinja/src/runtime.ts +++ b/packages/jinja/src/runtime.ts @@ -105,6 +105,18 @@ export class StringValue extends RuntimeValue { }), ], ["length", new NumericValue(this.value.length)], + [ + "rstrip", + new FunctionValue(() => { + return new StringValue(this.value.trimEnd()); + }), + ], + [ + "lstrip", + new FunctionValue(() => { + return new StringValue(this.value.trimStart()); + }), + ], ]); } diff --git a/packages/jinja/test/templates.test.js b/packages/jinja/test/templates.test.js index afc69555b..7ce91b54b 100644 --- a/packages/jinja/test/templates.test.js +++ b/packages/jinja/test/templates.test.js @@ -146,6 +146,11 @@ const TEST_STRINGS = { MACROS: `{% macro hello(name) %}{{ 'Hello ' + name }}{% endmacro %}|{{ hello('Bob') }}|{{ hello('Alice') }}|`, MACROS_1: `{% macro hello(name, suffix='.') %}{{ 'Hello ' + name + suffix }}{% endmacro %}|{{ hello('A') }}|{{ hello('B', '!') }}|{{ hello('C', suffix='?') }}|`, MACROS_2: `{% macro fn(x, y=2, z=3) %}{{ x + ',' + y + ',' + z }}{% endmacro %}|{{ fn(1) }}|{{ fn(1, 0) }}|{{ fn(1, 0, -1) }}|{{ fn(1, y=0, z=-1) }}|{{ fn(1, z=0) }}|`, + + //rstrip + RSTRIP: `{{ " test it ".rstrip() }}`, + //lstrip + LSTRIP: `{{ " test it ".lstrip() }}`, }; const TEST_PARSED = { @@ -2716,6 +2721,25 @@ const TEST_PARSED = { { value: "}}", type: "CloseExpression" }, { value: "|", type: "Text" }, ], + + RSTRIP: [ + { value: "{{", type: "OpenExpression" }, + { value: " test it ", type: "StringLiteral" }, + { value: ".", type: "Dot" }, + { value: "rstrip", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: ")", type: "CloseParen" }, + { value: "}}", type: "CloseExpression" }, + ], + LSTRIP: [ + { value: "{{", type: "OpenExpression" }, + { value: " test it ", type: "StringLiteral" }, + { value: ".", type: "Dot" }, + { value: "lstrip", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: ")", type: "CloseParen" }, + { value: "}}", type: "CloseExpression" }, + ], }; const TEST_CONTEXT = { @@ -2960,6 +2984,10 @@ const TEST_CONTEXT = { MACROS: {}, MACROS_1: {}, MACROS_2: {}, + + //STRIP + RSTRIP: {}, + LSTRIP: {}, }; const EXPECTED_OUTPUTS = { @@ -3104,6 +3132,10 @@ const EXPECTED_OUTPUTS = { MACROS: `|Hello Bob|Hello Alice|`, MACROS_1: `|Hello A.|Hello B!|Hello C?|`, MACROS_2: `|1,2,3|1,0,3|1,0,-1|1,0,-1|1,2,0|`, + + // RSTRIP/LSTRIP + RSTRIP: ` test it`, + LSTRIP: `test it `, }; describe("Templates", () => { From 78f045845a306d94394cae0da2c9362f8467d658 Mon Sep 17 00:00:00 2001 From: enzo Date: Tue, 27 Aug 2024 10:06:59 -0400 Subject: [PATCH 03/19] wrong deeplink for diffusionbee (#852) linked to https://huggingface.slack.com/archives/C07777H04GL/p1723671712888659 --- packages/tasks/src/local-apps.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/tasks/src/local-apps.ts b/packages/tasks/src/local-apps.ts index 6b672c8fc..0abc4430f 100644 --- a/packages/tasks/src/local-apps.ts +++ b/packages/tasks/src/local-apps.ts @@ -237,7 +237,7 @@ export const LOCAL_APPS = { mainTask: "text-to-image", macOSOnly: true, displayOnModelPage: (model) => model.library_name === "diffusers" && model.pipeline_tag === "text-to-image", - deeplink: (model) => new URL(`diffusionbee://open_from_hf?model=${model.id}`), + deeplink: (model) => new URL(`https://diffusionbee.com/huggingface_import?model_id=${model.id}`), }, joyfusion: { prettyLabel: "JoyFusion", From 9bed74eb75967b3b5de25df532b51802128b6e36 Mon Sep 17 00:00:00 2001 From: Vaibhav Srivastav Date: Wed, 28 Aug 2024 09:28:30 +0200 Subject: [PATCH 04/19] add cp/ edge to supported libs. (#864) Co-authored-by: Pedro Cuenca --- .../tasks/src/model-libraries-snippets.ts | 39 +++++++++++++++++++ packages/tasks/src/model-libraries.ts | 12 ++++++ 2 files changed, 51 insertions(+) diff --git a/packages/tasks/src/model-libraries-snippets.ts b/packages/tasks/src/model-libraries-snippets.ts index 3dbb03920..cf5b256f6 100644 --- a/packages/tasks/src/model-libraries-snippets.ts +++ b/packages/tasks/src/model-libraries-snippets.ts @@ -170,6 +170,45 @@ export const diffusers = (model: ModelData): string[] => { } }; +export const cartesia_pytorch = (model: ModelData): string[] => [ + `# pip install --no-binary :all: cartesia-pytorch +from cartesia_pytorch import ReneLMHeadModel +from transformers import AutoTokenizer + +model = ReneLMHeadModel.from_pretrained("${model.id}") +tokenizer = AutoTokenizer.from_pretrained("allenai/OLMo-1B-hf") + +in_message = ["Rene Descartes was"] +inputs = tokenizer(in_message, return_tensors="pt") + +outputs = model.generate(inputs.input_ids, max_length=50, top_k=100, top_p=0.99) +out_message = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0] + +print(out_message) +)`, +]; + +export const cartesia_mlx = (model: ModelData): string[] => [ + `import mlx.core as mx +import cartesia_mlx as cmx + +model = cmx.from_pretrained("${model.id}") +model.set_dtype(mx.float32) + +prompt = "Rene Descartes was" + +for text in model.generate( + prompt, + max_tokens=500, + eval_every_n=5, + verbose=True, + top_p=0.99, + temperature=0.85, +): + print(text, end="", flush=True) +`, +]; + export const edsnlp = (model: ModelData): string[] => { const packageName = nameWithoutNamespace(model.id).replaceAll("-", "_"); return [ diff --git a/packages/tasks/src/model-libraries.ts b/packages/tasks/src/model-libraries.ts index 00f7e81f1..52442ffd2 100644 --- a/packages/tasks/src/model-libraries.ts +++ b/packages/tasks/src/model-libraries.ts @@ -179,6 +179,18 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { repoName: "doctr", repoUrl: "https://github.com/mindee/doctr", }, + cartesia_pytorch: { + prettyLabel: "Cartesia Pytorch", + repoName: "Cartesia Pytorch", + repoUrl: "https://github.com/cartesia-ai/cartesia_pytorch", + snippets: snippets.cartesia_pytorch, + }, + cartesia_mlx: { + prettyLabel: "Cartesia MLX", + repoName: "Cartesia MLX", + repoUrl: "https://github.com/cartesia-ai/cartesia_mlx", + snippets: snippets.cartesia_mlx, + }, edsnlp: { prettyLabel: "EDS-NLP", repoName: "edsnlp", From caef9219677181c0dc0c23eb65dab67e2292b5f9 Mon Sep 17 00:00:00 2001 From: henry senyondo Date: Wed, 28 Aug 2024 04:48:43 -0400 Subject: [PATCH 05/19] Enable download stats for deepforest-tree and deepforest-bird (#868) The models are produced using Deepforest, a python package. https://github.com/weecology/DeepForest --------- Co-authored-by: Lucain Co-authored-by: Vaibhav Srivastav --- packages/tasks/src/model-libraries.ts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/packages/tasks/src/model-libraries.ts b/packages/tasks/src/model-libraries.ts index 52442ffd2..0860f9352 100644 --- a/packages/tasks/src/model-libraries.ts +++ b/packages/tasks/src/model-libraries.ts @@ -150,6 +150,13 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { filter: false, countDownloads: `path:"adapter_config.json"`, }, + deepforest: { + prettyLabel: "DeepForest", + repoName: "deepforest", + docsUrl: "https://deepforest.readthedocs.io/en/latest/", + repoUrl: "https://github.com/weecology/DeepForest", + countDownloads: `path_extension:"pt"`, + }, "depth-anything-v2": { prettyLabel: "DepthAnythingV2", repoName: "Depth Anything V2", From e815d0b5b4f97b7a6e9ff992810cdb5262c83b56 Mon Sep 17 00:00:00 2001 From: Lucain Date: Thu, 29 Aug 2024 12:57:45 +0200 Subject: [PATCH 06/19] No extra newline in inference code snippets (#874) from @osanseviero suggestion in https://github.com/huggingface/hub-docs/pull/1398#discussion_r1735265611 Let's remove the extra newlines at the end of code snippets (if not already the case) --- packages/tasks/src/snippets/curl.ts | 9 +++------ packages/tasks/src/snippets/js.ts | 3 +-- packages/tasks/src/snippets/python.ts | 3 +-- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/packages/tasks/src/snippets/curl.ts b/packages/tasks/src/snippets/curl.ts index d3fbc0bcb..2104a0c29 100644 --- a/packages/tasks/src/snippets/curl.ts +++ b/packages/tasks/src/snippets/curl.ts @@ -7,8 +7,7 @@ export const snippetBasic = (model: ModelDataMinimal, accessToken: string): stri -X POST \\ -d '{"inputs": ${getModelInputSnippet(model, true)}}' \\ -H 'Content-Type: application/json' \\ - -H "Authorization: Bearer ${accessToken || `{API_TOKEN}`}" -`; + -H "Authorization: Bearer ${accessToken || `{API_TOKEN}`}"`; export const snippetTextGeneration = (model: ModelDataMinimal, accessToken: string): string => { if (model.config?.tokenizer_config?.chat_template) { @@ -33,15 +32,13 @@ export const snippetZeroShotClassification = (model: ModelDataMinimal, accessTok -X POST \\ -d '{"inputs": ${getModelInputSnippet(model, true)}, "parameters": {"candidate_labels": ["refund", "legal", "faq"]}}' \\ -H 'Content-Type: application/json' \\ - -H "Authorization: Bearer ${accessToken || `{API_TOKEN}`}" -`; + -H "Authorization: Bearer ${accessToken || `{API_TOKEN}`}"`; export const snippetFile = (model: ModelDataMinimal, accessToken: string): string => `curl https://api-inference.huggingface.co/models/${model.id} \\ -X POST \\ --data-binary '@${getModelInputSnippet(model, true, true)}' \\ - -H "Authorization: Bearer ${accessToken || `{API_TOKEN}`}" -`; + -H "Authorization: Bearer ${accessToken || `{API_TOKEN}`}"`; export const curlSnippets: Partial string>> = { // Same order as in js/src/lib/interfaces/Types.ts diff --git a/packages/tasks/src/snippets/js.ts b/packages/tasks/src/snippets/js.ts index 83fa63a96..8a621787a 100644 --- a/packages/tasks/src/snippets/js.ts +++ b/packages/tasks/src/snippets/js.ts @@ -36,8 +36,7 @@ for await (const chunk of inference.chatCompletionStream({ max_tokens: 500, })) { process.stdout.write(chunk.choices[0]?.delta?.content || ""); -} -`; +}`; } else { return snippetBasic(model, accessToken); } diff --git a/packages/tasks/src/snippets/python.ts b/packages/tasks/src/snippets/python.ts index f0fd26397..f7c252723 100644 --- a/packages/tasks/src/snippets/python.ts +++ b/packages/tasks/src/snippets/python.ts @@ -15,8 +15,7 @@ for message in client.chat_completion( max_tokens=500, stream=True, ): - print(message.choices[0].delta.content, end="") -`; + print(message.choices[0].delta.content, end="")`; export const snippetZeroShotClassification = (model: ModelDataMinimal): string => `def query(payload): From a2e196b20bd59e5b8739efb419e4ade7cc1728a3 Mon Sep 17 00:00:00 2001 From: henry senyondo Date: Fri, 30 Aug 2024 04:02:13 -0400 Subject: [PATCH 07/19] Add pl as model extension (#877) --- packages/tasks/src/model-libraries.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/tasks/src/model-libraries.ts b/packages/tasks/src/model-libraries.ts index 0860f9352..c32cd7adb 100644 --- a/packages/tasks/src/model-libraries.ts +++ b/packages/tasks/src/model-libraries.ts @@ -155,7 +155,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { repoName: "deepforest", docsUrl: "https://deepforest.readthedocs.io/en/latest/", repoUrl: "https://github.com/weecology/DeepForest", - countDownloads: `path_extension:"pt"`, + countDownloads: `path_extension:"pt" OR path_extension:"pl"`, }, "depth-anything-v2": { prettyLabel: "DepthAnythingV2", From 3324d6968a4f49eac59709e989570dda6e565706 Mon Sep 17 00:00:00 2001 From: machineuser Date: Fri, 30 Aug 2024 08:02:54 +0000 Subject: [PATCH 08/19] =?UTF-8?q?=F0=9F=94=96=20@hugginface/tasks=200.11.1?= =?UTF-8?q?2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- packages/tasks/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/tasks/package.json b/packages/tasks/package.json index 752c1a7ff..06ea1cc0b 100644 --- a/packages/tasks/package.json +++ b/packages/tasks/package.json @@ -1,7 +1,7 @@ { "name": "@huggingface/tasks", "packageManager": "pnpm@8.10.5", - "version": "0.11.11", + "version": "0.11.12", "description": "List of ML tasks for huggingface.co/tasks", "repository": "https://github.com/huggingface/huggingface.js.git", "publishConfig": { From bf73137ad117a287336435ae53f9018b5b7b75ca Mon Sep 17 00:00:00 2001 From: Lucain Date: Fri, 30 Aug 2024 10:50:07 +0200 Subject: [PATCH 09/19] Update specification for docs (#872) This PR should prove useful for the ongoing work of generating documentation pages based on the input/output specs (see https://github.com/huggingface/hub-docs/pull/1379). In particular, I've made changes when adding new pages to the docs (PRs https://github.com/huggingface/hub-docs/pull/1398 and https://github.com/huggingface/hub-docs/pull/1399). This PR is now ready for review. **Changes:** - use enums instead of `oneOf` + list of `const` - do not rely on `"$ref": "/inference/schemas/text2text-generation/input.json",` for Summarization / Translation. Makes things clearer + it's not possible to extend the parameters which was not possible before. - typo in `text-to-image` - add `src_lang` and `tgt_lang` in translation params - use enum for `early_stopping` parameter (in common defs) - for `audio-classification`, `automatic speech recognition`, `image classification`, `image to image`, `object detection`: - mention `base64-encoded string` as input - mention raw data can be sent if no parameters in the JSON payload - more descriptions in `object detection` - more descriptions in `image segmentation` - mention bytes output in `text-to-image` and `image-to-image` --- .../tasks/audio-classification/inference.ts | 5 +- .../audio-classification/spec/input.json | 3 +- .../audio-classification/spec/output.json | 1 + .../automatic-speech-recognition/inference.ts | 5 +- .../spec/input.json | 3 +- .../tasks/src/tasks/common-definitions.json | 23 ++------- .../tasks/image-classification/inference.ts | 5 +- .../image-classification/spec/input.json | 3 +- .../image-classification/spec/output.json | 1 + .../src/tasks/image-segmentation/inference.ts | 16 ++++--- .../tasks/image-segmentation/spec/input.json | 15 ++---- .../tasks/image-segmentation/spec/output.json | 7 +-- .../src/tasks/image-to-image/inference.ts | 11 +++-- .../src/tasks/image-to-image/spec/input.json | 5 +- .../src/tasks/image-to-image/spec/output.json | 2 +- packages/tasks/src/tasks/index.ts | 7 +-- .../src/tasks/object-detection/inference.ts | 21 +++++++-- .../tasks/object-detection/spec/input.json | 3 +- .../tasks/object-detection/spec/output.json | 16 ++++--- .../src/tasks/summarization/inference.ts | 25 +++++----- .../src/tasks/summarization/spec/input.json | 39 ++++++++++++++- .../text-classification/spec/output.json | 1 + .../src/tasks/text-to-image/inference.ts | 4 +- .../src/tasks/text-to-image/spec/input.json | 2 +- .../src/tasks/text-to-image/spec/output.json | 2 +- .../tasks/src/tasks/translation/inference.ts | 31 ++++++++---- .../src/tasks/translation/spec/input.json | 47 ++++++++++++++++++- .../zero-shot-classification/spec/output.json | 1 + 28 files changed, 199 insertions(+), 105 deletions(-) diff --git a/packages/tasks/src/tasks/audio-classification/inference.ts b/packages/tasks/src/tasks/audio-classification/inference.ts index ee61c7052..b8ed3f50f 100644 --- a/packages/tasks/src/tasks/audio-classification/inference.ts +++ b/packages/tasks/src/tasks/audio-classification/inference.ts @@ -8,9 +8,10 @@ */ export interface AudioClassificationInput { /** - * The input audio data + * The input audio data as a base64-encoded string. If no `parameters` are provided, you can + * also provide the audio data as a raw bytes payload. */ - inputs: unknown; + inputs: string; /** * Additional inference parameters */ diff --git a/packages/tasks/src/tasks/audio-classification/spec/input.json b/packages/tasks/src/tasks/audio-classification/spec/input.json index d0372bb68..0bfeb69cd 100644 --- a/packages/tasks/src/tasks/audio-classification/spec/input.json +++ b/packages/tasks/src/tasks/audio-classification/spec/input.json @@ -6,7 +6,8 @@ "type": "object", "properties": { "inputs": { - "description": "The input audio data" + "description": "The input audio data as a base64-encoded string. If no `parameters` are provided, you can also provide the audio data as a raw bytes payload.", + "type": "string" }, "parameters": { "description": "Additional inference parameters", diff --git a/packages/tasks/src/tasks/audio-classification/spec/output.json b/packages/tasks/src/tasks/audio-classification/spec/output.json index dac7a9225..f1f2dfe8e 100644 --- a/packages/tasks/src/tasks/audio-classification/spec/output.json +++ b/packages/tasks/src/tasks/audio-classification/spec/output.json @@ -5,6 +5,7 @@ "description": "Outputs for Audio Classification inference", "type": "array", "items": { + "type": "object", "$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutput" } } diff --git a/packages/tasks/src/tasks/automatic-speech-recognition/inference.ts b/packages/tasks/src/tasks/automatic-speech-recognition/inference.ts index dfc501519..c8ef9b9bc 100644 --- a/packages/tasks/src/tasks/automatic-speech-recognition/inference.ts +++ b/packages/tasks/src/tasks/automatic-speech-recognition/inference.ts @@ -9,9 +9,10 @@ */ export interface AutomaticSpeechRecognitionInput { /** - * The input audio data + * The input audio data as a base64-encoded string. If no `parameters` are provided, you can + * also provide the audio data as a raw bytes payload. */ - inputs: unknown; + inputs: string; /** * Additional inference parameters */ diff --git a/packages/tasks/src/tasks/automatic-speech-recognition/spec/input.json b/packages/tasks/src/tasks/automatic-speech-recognition/spec/input.json index 691c7f4b7..a618c68a3 100644 --- a/packages/tasks/src/tasks/automatic-speech-recognition/spec/input.json +++ b/packages/tasks/src/tasks/automatic-speech-recognition/spec/input.json @@ -6,7 +6,8 @@ "type": "object", "properties": { "inputs": { - "description": "The input audio data" + "description": "The input audio data as a base64-encoded string. If no `parameters` are provided, you can also provide the audio data as a raw bytes payload.", + "type": "string" }, "parameters": { "description": "Additional inference parameters", diff --git a/packages/tasks/src/tasks/common-definitions.json b/packages/tasks/src/tasks/common-definitions.json index f78d3d9e4..744893e4d 100644 --- a/packages/tasks/src/tasks/common-definitions.json +++ b/packages/tasks/src/tasks/common-definitions.json @@ -7,17 +7,7 @@ "title": "ClassificationOutputTransform", "type": "string", "description": "The function to apply to the model outputs in order to retrieve the scores.", - "oneOf": [ - { - "const": "sigmoid" - }, - { - "const": "softmax" - }, - { - "const": "none" - } - ] + "enum": ["sigmoid", "softmax", "none"] }, "ClassificationOutput": { "title": "ClassificationOutput", @@ -84,16 +74,9 @@ "description": "Whether to use sampling instead of greedy decoding when generating new tokens." }, "early_stopping": { + "type": ["boolean", "string"], "description": "Controls the stopping condition for beam-based methods.", - "oneOf": [ - { - "type": "boolean" - }, - { - "const": "never", - "type": "string" - } - ] + "enum": ["never", true, false] }, "num_beams": { "type": "integer", diff --git a/packages/tasks/src/tasks/image-classification/inference.ts b/packages/tasks/src/tasks/image-classification/inference.ts index e0689d887..bd56a7d31 100644 --- a/packages/tasks/src/tasks/image-classification/inference.ts +++ b/packages/tasks/src/tasks/image-classification/inference.ts @@ -8,9 +8,10 @@ */ export interface ImageClassificationInput { /** - * The input image data + * The input image data as a base64-encoded string. If no `parameters` are provided, you can + * also provide the image data as a raw bytes payload. */ - inputs: unknown; + inputs: string; /** * Additional inference parameters */ diff --git a/packages/tasks/src/tasks/image-classification/spec/input.json b/packages/tasks/src/tasks/image-classification/spec/input.json index a8cd4273c..cf0b30ec5 100644 --- a/packages/tasks/src/tasks/image-classification/spec/input.json +++ b/packages/tasks/src/tasks/image-classification/spec/input.json @@ -6,7 +6,8 @@ "type": "object", "properties": { "inputs": { - "description": "The input image data" + "type": "string", + "description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload." }, "parameters": { "description": "Additional inference parameters", diff --git a/packages/tasks/src/tasks/image-classification/spec/output.json b/packages/tasks/src/tasks/image-classification/spec/output.json index 2a3264bce..3ababaf63 100644 --- a/packages/tasks/src/tasks/image-classification/spec/output.json +++ b/packages/tasks/src/tasks/image-classification/spec/output.json @@ -5,6 +5,7 @@ "title": "ImageClassificationOutput", "type": "array", "items": { + "type": "object", "$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutput" } } diff --git a/packages/tasks/src/tasks/image-segmentation/inference.ts b/packages/tasks/src/tasks/image-segmentation/inference.ts index 02db5cb90..4ccd36e41 100644 --- a/packages/tasks/src/tasks/image-segmentation/inference.ts +++ b/packages/tasks/src/tasks/image-segmentation/inference.ts @@ -8,9 +8,10 @@ */ export interface ImageSegmentationInput { /** - * The input image data + * The input image data as a base64-encoded string. If no `parameters` are provided, you can + * also provide the image data as a raw bytes payload. */ - inputs: unknown; + inputs: string; /** * Additional inference parameters */ @@ -41,6 +42,9 @@ export interface ImageSegmentationParameters { threshold?: number; [property: string]: unknown; } +/** + * Segmentation task to be performed, depending on model capabilities. + */ export type ImageSegmentationSubtask = "instance" | "panoptic" | "semantic"; export type ImageSegmentationOutput = ImageSegmentationOutputElement[]; /** @@ -50,15 +54,15 @@ export type ImageSegmentationOutput = ImageSegmentationOutputElement[]; */ export interface ImageSegmentationOutputElement { /** - * The label of the predicted segment + * The label of the predicted segment. */ label: string; /** - * The corresponding mask as a black-and-white image + * The corresponding mask as a black-and-white image (base64-encoded). */ - mask: unknown; + mask: string; /** - * The score or confidence degreee the model has + * The score or confidence degree the model has. */ score?: number; [property: string]: unknown; diff --git a/packages/tasks/src/tasks/image-segmentation/spec/input.json b/packages/tasks/src/tasks/image-segmentation/spec/input.json index 500793554..697f8959b 100644 --- a/packages/tasks/src/tasks/image-segmentation/spec/input.json +++ b/packages/tasks/src/tasks/image-segmentation/spec/input.json @@ -6,7 +6,8 @@ "type": "object", "properties": { "inputs": { - "description": "The input image data" + "type": "string", + "description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload." }, "parameters": { "description": "Additional inference parameters", @@ -31,17 +32,7 @@ "title": "ImageSegmentationSubtask", "type": "string", "description": "Segmentation task to be performed, depending on model capabilities.", - "oneOf": [ - { - "const": "instance" - }, - { - "const": "panoptic" - }, - { - "const": "semantic" - } - ] + "enum": ["instance", "panoptic", "semantic"] }, "threshold": { "type": "number", diff --git a/packages/tasks/src/tasks/image-segmentation/spec/output.json b/packages/tasks/src/tasks/image-segmentation/spec/output.json index b20aa415e..6fa5b0d8e 100644 --- a/packages/tasks/src/tasks/image-segmentation/spec/output.json +++ b/packages/tasks/src/tasks/image-segmentation/spec/output.json @@ -10,14 +10,15 @@ "properties": { "label": { "type": "string", - "description": "The label of the predicted segment" + "description": "The label of the predicted segment." }, "mask": { - "description": "The corresponding mask as a black-and-white image" + "type": "string", + "description": "The corresponding mask as a black-and-white image (base64-encoded)." }, "score": { "type": "number", - "description": "The score or confidence degreee the model has" + "description": "The score or confidence degree the model has." } }, "required": ["label", "mask"] diff --git a/packages/tasks/src/tasks/image-to-image/inference.ts b/packages/tasks/src/tasks/image-to-image/inference.ts index bf732e070..8ba34b5ff 100644 --- a/packages/tasks/src/tasks/image-to-image/inference.ts +++ b/packages/tasks/src/tasks/image-to-image/inference.ts @@ -9,9 +9,10 @@ */ export interface ImageToImageInput { /** - * The input image data + * The input image data as a base64-encoded string. If no `parameters` are provided, you can + * also provide the image data as a raw bytes payload. */ - inputs: unknown; + inputs: string; /** * Additional inference parameters */ @@ -40,14 +41,14 @@ export interface ImageToImageParameters { */ num_inference_steps?: number; /** - * The size in pixel of the output image + * The size in pixel of the output image. */ target_size?: TargetSize; [property: string]: unknown; } /** - * The size in pixel of the output image + * The size in pixel of the output image. */ export interface TargetSize { height: number; @@ -60,7 +61,7 @@ export interface TargetSize { */ export interface ImageToImageOutput { /** - * The output image + * The output image returned as raw bytes in the payload. */ image?: unknown; [property: string]: unknown; diff --git a/packages/tasks/src/tasks/image-to-image/spec/input.json b/packages/tasks/src/tasks/image-to-image/spec/input.json index 873e1f20d..23695c6b1 100644 --- a/packages/tasks/src/tasks/image-to-image/spec/input.json +++ b/packages/tasks/src/tasks/image-to-image/spec/input.json @@ -6,7 +6,8 @@ "type": "object", "properties": { "inputs": { - "description": "The input image data" + "type": "string", + "description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload." }, "parameters": { "description": "Additional inference parameters", @@ -36,7 +37,7 @@ }, "target_size": { "type": "object", - "description": "The size in pixel of the output image", + "description": "The size in pixel of the output image.", "properties": { "width": { "type": "integer" diff --git a/packages/tasks/src/tasks/image-to-image/spec/output.json b/packages/tasks/src/tasks/image-to-image/spec/output.json index af4eff804..043544e75 100644 --- a/packages/tasks/src/tasks/image-to-image/spec/output.json +++ b/packages/tasks/src/tasks/image-to-image/spec/output.json @@ -6,7 +6,7 @@ "type": "object", "properties": { "image": { - "description": "The output image" + "description": "The output image returned as raw bytes in the payload." } } } diff --git a/packages/tasks/src/tasks/index.ts b/packages/tasks/src/tasks/index.ts index 5ad2f0215..b3c0e0e8e 100644 --- a/packages/tasks/src/tasks/index.ts +++ b/packages/tasks/src/tasks/index.ts @@ -73,12 +73,7 @@ export type * from "./table-question-answering/inference"; export type { TextToImageInput, TextToImageOutput, TextToImageParameters } from "./text-to-image/inference"; export type { TextToAudioParameters, TextToSpeechInput, TextToSpeechOutput } from "./text-to-speech/inference"; export type * from "./token-classification/inference"; -export type { - Text2TextGenerationParameters, - Text2TextGenerationTruncationStrategy, - TranslationInput, - TranslationOutput, -} from "./translation/inference"; +export type { TranslationInput, TranslationOutput } from "./translation/inference"; export type { ClassificationOutputTransform, TextClassificationInput, diff --git a/packages/tasks/src/tasks/object-detection/inference.ts b/packages/tasks/src/tasks/object-detection/inference.ts index d1765ad00..d117dcb0b 100644 --- a/packages/tasks/src/tasks/object-detection/inference.ts +++ b/packages/tasks/src/tasks/object-detection/inference.ts @@ -8,9 +8,10 @@ */ export interface ObjectDetectionInput { /** - * The input image data + * The input image data as a base64-encoded string. If no `parameters` are provided, you can + * also provide the image data as a raw bytes payload. */ - inputs: unknown; + inputs: string; /** * Additional inference parameters */ @@ -34,9 +35,21 @@ export interface ObjectDetectionParameters { * image. */ export interface BoundingBox { + /** + * The x-coordinate of the bottom-right corner of the bounding box. + */ xmax: number; + /** + * The x-coordinate of the top-left corner of the bounding box. + */ xmin: number; + /** + * The y-coordinate of the bottom-right corner of the bounding box. + */ ymax: number; + /** + * The y-coordinate of the top-left corner of the bounding box. + */ ymin: number; [property: string]: unknown; } @@ -51,11 +64,11 @@ export interface ObjectDetectionOutputElement { */ box: BoundingBox; /** - * The predicted label for the bounding box + * The predicted label for the bounding box. */ label: string; /** - * The associated score / probability + * The associated score / probability. */ score: number; [property: string]: unknown; diff --git a/packages/tasks/src/tasks/object-detection/spec/input.json b/packages/tasks/src/tasks/object-detection/spec/input.json index b694f2fa5..d00deefec 100644 --- a/packages/tasks/src/tasks/object-detection/spec/input.json +++ b/packages/tasks/src/tasks/object-detection/spec/input.json @@ -6,7 +6,8 @@ "type": "object", "properties": { "inputs": { - "description": "The input image data" + "type": "string", + "description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload." }, "parameters": { "description": "Additional inference parameters", diff --git a/packages/tasks/src/tasks/object-detection/spec/output.json b/packages/tasks/src/tasks/object-detection/spec/output.json index 20c92d5d3..8d91f1078 100644 --- a/packages/tasks/src/tasks/object-detection/spec/output.json +++ b/packages/tasks/src/tasks/object-detection/spec/output.json @@ -9,11 +9,11 @@ "properties": { "label": { "type": "string", - "description": "The predicted label for the bounding box" + "description": "The predicted label for the bounding box." }, "score": { "type": "number", - "description": "The associated score / probability" + "description": "The associated score / probability." }, "box": { "$ref": "#/$defs/BoundingBox", @@ -28,16 +28,20 @@ "title": "BoundingBox", "properties": { "xmin": { - "type": "integer" + "type": "integer", + "description": "The x-coordinate of the top-left corner of the bounding box." }, "xmax": { - "type": "integer" + "type": "integer", + "description": "The x-coordinate of the bottom-right corner of the bounding box." }, "ymin": { - "type": "integer" + "type": "integer", + "description": "The y-coordinate of the top-left corner of the bounding box." }, "ymax": { - "type": "integer" + "type": "integer", + "description": "The y-coordinate of the bottom-right corner of the bounding box." } }, "required": ["xmin", "xmax", "ymin", "ymax"] diff --git a/packages/tasks/src/tasks/summarization/inference.ts b/packages/tasks/src/tasks/summarization/inference.ts index 2b674184f..a08e25230 100644 --- a/packages/tasks/src/tasks/summarization/inference.ts +++ b/packages/tasks/src/tasks/summarization/inference.ts @@ -6,43 +6,44 @@ /** * Inputs for Summarization inference - * - * Inputs for Text2text Generation inference */ export interface SummarizationInput { /** - * The input text data + * The input text to summarize. */ inputs: string; /** - * Additional inference parameters + * Additional inference parameters. */ - parameters?: Text2TextGenerationParameters; + parameters?: SummarizationParameters; [property: string]: unknown; } /** - * Additional inference parameters + * Additional inference parameters. * - * Additional inference parameters for Text2text Generation + * Additional inference parameters for summarization. */ -export interface Text2TextGenerationParameters { +export interface SummarizationParameters { /** * Whether to clean up the potential extra spaces in the text output. */ clean_up_tokenization_spaces?: boolean; /** - * Additional parametrization of the text generation algorithm + * Additional parametrization of the text generation algorithm. */ generate_parameters?: { [key: string]: unknown }; /** - * The truncation strategy to use + * The truncation strategy to use. */ - truncation?: Text2TextGenerationTruncationStrategy; + truncation?: SummarizationTruncationStrategy; [property: string]: unknown; } -export type Text2TextGenerationTruncationStrategy = "do_not_truncate" | "longest_first" | "only_first" | "only_second"; +/** + * The truncation strategy to use. + */ +export type SummarizationTruncationStrategy = "do_not_truncate" | "longest_first" | "only_first" | "only_second"; /** * Outputs of inference for the Summarization task diff --git a/packages/tasks/src/tasks/summarization/spec/input.json b/packages/tasks/src/tasks/summarization/spec/input.json index 629da31ea..d33152857 100644 --- a/packages/tasks/src/tasks/summarization/spec/input.json +++ b/packages/tasks/src/tasks/summarization/spec/input.json @@ -1,7 +1,42 @@ { - "$ref": "/inference/schemas/text2text-generation/input.json", "$id": "/inference/schemas/summarization/input.json", "$schema": "http://json-schema.org/draft-06/schema#", + "description": "Inputs for Summarization inference", "title": "SummarizationInput", - "description": "Inputs for Summarization inference" + "type": "object", + "properties": { + "inputs": { + "description": "The input text to summarize.", + "type": "string" + }, + "parameters": { + "description": "Additional inference parameters.", + "$ref": "#/$defs/SummarizationParameters" + } + }, + "$defs": { + "SummarizationParameters": { + "title": "SummarizationParameters", + "description": "Additional inference parameters for summarization.", + "type": "object", + "properties": { + "clean_up_tokenization_spaces": { + "type": "boolean", + "description": "Whether to clean up the potential extra spaces in the text output." + }, + "truncation": { + "title": "SummarizationTruncationStrategy", + "type": "string", + "description": "The truncation strategy to use.", + "enum": ["do_not_truncate", "longest_first", "only_first", "only_second"] + }, + "generate_parameters": { + "title": "generateParameters", + "type": "object", + "description": "Additional parametrization of the text generation algorithm." + } + } + } + }, + "required": ["inputs"] } diff --git a/packages/tasks/src/tasks/text-classification/spec/output.json b/packages/tasks/src/tasks/text-classification/spec/output.json index 704b82225..2bf3def35 100644 --- a/packages/tasks/src/tasks/text-classification/spec/output.json +++ b/packages/tasks/src/tasks/text-classification/spec/output.json @@ -5,6 +5,7 @@ "title": "TextClassificationOutput", "type": "array", "items": { + "type": "object", "$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutput" } } diff --git a/packages/tasks/src/tasks/text-to-image/inference.ts b/packages/tasks/src/tasks/text-to-image/inference.ts index 4997165b8..b2e735746 100644 --- a/packages/tasks/src/tasks/text-to-image/inference.ts +++ b/packages/tasks/src/tasks/text-to-image/inference.ts @@ -9,7 +9,7 @@ */ export interface TextToImageInput { /** - * The input text data (sometimes called "prompt" + * The input text data (sometimes called "prompt") */ inputs: string; /** @@ -64,7 +64,7 @@ export interface TargetSize { */ export interface TextToImageOutput { /** - * The generated image + * The generated image returned as raw bytes in the payload. */ image: unknown; [property: string]: unknown; diff --git a/packages/tasks/src/tasks/text-to-image/spec/input.json b/packages/tasks/src/tasks/text-to-image/spec/input.json index 49acc7ed3..467b848f6 100644 --- a/packages/tasks/src/tasks/text-to-image/spec/input.json +++ b/packages/tasks/src/tasks/text-to-image/spec/input.json @@ -6,7 +6,7 @@ "type": "object", "properties": { "inputs": { - "description": "The input text data (sometimes called \"prompt\"", + "description": "The input text data (sometimes called \"prompt\")", "type": "string" }, "parameters": { diff --git a/packages/tasks/src/tasks/text-to-image/spec/output.json b/packages/tasks/src/tasks/text-to-image/spec/output.json index ff952a3a3..f90a1eee1 100644 --- a/packages/tasks/src/tasks/text-to-image/spec/output.json +++ b/packages/tasks/src/tasks/text-to-image/spec/output.json @@ -6,7 +6,7 @@ "type": "object", "properties": { "image": { - "description": "The generated image" + "description": "The generated image returned as raw bytes in the payload." } }, "required": ["image"] diff --git a/packages/tasks/src/tasks/translation/inference.ts b/packages/tasks/src/tasks/translation/inference.ts index d5ea57eed..a78c7e940 100644 --- a/packages/tasks/src/tasks/translation/inference.ts +++ b/packages/tasks/src/tasks/translation/inference.ts @@ -6,43 +6,54 @@ /** * Inputs for Translation inference - * - * Inputs for Text2text Generation inference */ export interface TranslationInput { /** - * The input text data + * The text to translate. */ inputs: string; /** * Additional inference parameters */ - parameters?: Text2TextGenerationParameters; + parameters?: TranslationParameters; [property: string]: unknown; } /** * Additional inference parameters * - * Additional inference parameters for Text2text Generation + * Additional inference parameters for Translation */ -export interface Text2TextGenerationParameters { +export interface TranslationParameters { /** * Whether to clean up the potential extra spaces in the text output. */ clean_up_tokenization_spaces?: boolean; /** - * Additional parametrization of the text generation algorithm + * Additional parametrization of the text generation algorithm. */ generate_parameters?: { [key: string]: unknown }; /** - * The truncation strategy to use + * The source language of the text. Required for models that can translate from multiple + * languages. + */ + src_lang?: string; + /** + * Target language to translate to. Required for models that can translate to multiple + * languages. */ - truncation?: Text2TextGenerationTruncationStrategy; + tgt_lang?: string; + /** + * The truncation strategy to use. + */ + truncation?: TranslationTruncationStrategy; [property: string]: unknown; } -export type Text2TextGenerationTruncationStrategy = "do_not_truncate" | "longest_first" | "only_first" | "only_second"; +/** + * The truncation strategy to use. + */ +export type TranslationTruncationStrategy = "do_not_truncate" | "longest_first" | "only_first" | "only_second"; /** * Outputs of inference for the Translation task diff --git a/packages/tasks/src/tasks/translation/spec/input.json b/packages/tasks/src/tasks/translation/spec/input.json index 0695bc672..0c2d196cf 100644 --- a/packages/tasks/src/tasks/translation/spec/input.json +++ b/packages/tasks/src/tasks/translation/spec/input.json @@ -1,7 +1,50 @@ { - "$ref": "/inference/schemas/text2text-generation/input.json", "$id": "/inference/schemas/translation/input.json", "$schema": "http://json-schema.org/draft-06/schema#", + "description": "Inputs for Translation inference", "title": "TranslationInput", - "description": "Inputs for Translation inference" + "type": "object", + "properties": { + "inputs": { + "description": "The text to translate.", + "type": "string" + }, + "parameters": { + "description": "Additional inference parameters", + "$ref": "#/$defs/TranslationParameters" + } + }, + "$defs": { + "TranslationParameters": { + "title": "TranslationParameters", + "description": "Additional inference parameters for Translation", + "type": "object", + "properties": { + "src_lang": { + "type": "string", + "description": "The source language of the text. Required for models that can translate from multiple languages." + }, + "tgt_lang": { + "type": "string", + "description": "Target language to translate to. Required for models that can translate to multiple languages." + }, + "clean_up_tokenization_spaces": { + "type": "boolean", + "description": "Whether to clean up the potential extra spaces in the text output." + }, + "truncation": { + "title": "TranslationTruncationStrategy", + "type": "string", + "description": "The truncation strategy to use.", + "enum": ["do_not_truncate", "longest_first", "only_first", "only_second"] + }, + "generate_parameters": { + "title": "generateParameters", + "type": "object", + "description": "Additional parametrization of the text generation algorithm." + } + } + } + }, + "required": ["inputs"] } diff --git a/packages/tasks/src/tasks/zero-shot-classification/spec/output.json b/packages/tasks/src/tasks/zero-shot-classification/spec/output.json index 83ed1098f..1b5ac0cc3 100644 --- a/packages/tasks/src/tasks/zero-shot-classification/spec/output.json +++ b/packages/tasks/src/tasks/zero-shot-classification/spec/output.json @@ -5,6 +5,7 @@ "title": "ZeroShotClassificationOutput", "type": "array", "items": { + "type": "object", "$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutput" } } From 94cb7fec9ca5cc33e38a509e19196e1526153ca4 Mon Sep 17 00:00:00 2001 From: Arda Atahan Ibis Date: Fri, 30 Aug 2024 01:52:06 -0700 Subject: [PATCH 10/19] Add Argmax DiffusionKit Snippet (#869) This PR: 1. Adds a new `diffusionkit` snippet in [model-libraries-snippets.ts](https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/model-libraries-snippets.ts). 2. Adds `diffusionkit` library with necessary information in `MODEL_LIBRARIES_UI_ELEMENTS` in [model-libraries.ts](https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/model-libraries.ts). --------- Co-authored-by: Pedro Cuenca --- .../tasks/src/model-libraries-snippets.ts | 42 +++++++++++++++++++ packages/tasks/src/model-libraries.ts | 6 +++ 2 files changed, 48 insertions(+) diff --git a/packages/tasks/src/model-libraries-snippets.ts b/packages/tasks/src/model-libraries-snippets.ts index cf5b256f6..f9c096095 100644 --- a/packages/tasks/src/model-libraries-snippets.ts +++ b/packages/tasks/src/model-libraries-snippets.ts @@ -170,6 +170,48 @@ export const diffusers = (model: ModelData): string[] => { } }; +export const diffusionkit = (model: ModelData): string[] => { + const sd3Snippet = `# Pipeline for Stable Diffusion 3 +from diffusionkit.mlx import DiffusionPipeline + +pipeline = DiffusionPipeline( + shift=3.0, + use_t5=False, + model_version=${model.id}, + low_memory_mode=True, + a16=True, + w16=True, +)`; + + const fluxSnippet = `# Pipeline for Flux +from diffusionkit.mlx import FluxPipeline + +pipeline = FluxPipeline( + shift=1.0, + model_version=${model.id}, + low_memory_mode=True, + a16=True, + w16=True, +)`; + + const generateSnippet = `# Image Generation +HEIGHT = 512 +WIDTH = 512 +NUM_STEPS = ${model.tags.includes("flux") ? 4 : 50} +CFG_WEIGHT = ${model.tags.includes("flux") ? 0 : 5} + +image, _ = pipeline.generate_image( + "a photo of a cat", + cfg_weight=CFG_WEIGHT, + num_steps=NUM_STEPS, + latent_size=(HEIGHT // 8, WIDTH // 8), +)`; + + const pipelineSnippet = model.tags.includes("flux") ? fluxSnippet : sd3Snippet; + + return [pipelineSnippet, generateSnippet]; +}; + export const cartesia_pytorch = (model: ModelData): string[] => [ `# pip install --no-binary :all: cartesia-pytorch from cartesia_pytorch import ReneLMHeadModel diff --git a/packages/tasks/src/model-libraries.ts b/packages/tasks/src/model-libraries.ts index c32cd7adb..71b8747b9 100644 --- a/packages/tasks/src/model-libraries.ts +++ b/packages/tasks/src/model-libraries.ts @@ -181,6 +181,12 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { filter: true, /// diffusers has its own more complex "countDownloads" query }, + diffusionkit: { + prettyLabel: "DiffusionKit", + repoName: "DiffusionKit", + repoUrl: "https://github.com/argmaxinc/DiffusionKit", + snippets: snippets.diffusionkit, + }, doctr: { prettyLabel: "docTR", repoName: "doctr", From 678eba85a800f73d2c62408bfd30206504c8f0bd Mon Sep 17 00:00:00 2001 From: Merve Noyan Date: Mon, 2 Sep 2024 14:28:32 +0300 Subject: [PATCH 11/19] Add keypoint-detection task to Hub (#870) --------- Co-authored-by: Merve Noyan Co-authored-by: Pedro Cuenca --- packages/tasks/src/pipelines.ts | 12 ++++ packages/tasks/src/tasks/index.ts | 2 + .../src/tasks/keypoint-detection/about.md | 59 +++++++++++++++++++ .../src/tasks/keypoint-detection/data.ts | 46 +++++++++++++++ .../Icons/IconKeypointDetection.svelte | 1 + .../PipelineIcon/PipelineIcon.svelte | 2 + 6 files changed, 122 insertions(+) create mode 100644 packages/tasks/src/tasks/keypoint-detection/about.md create mode 100644 packages/tasks/src/tasks/keypoint-detection/data.ts create mode 100644 packages/widgets/src/lib/components/Icons/IconKeypointDetection.svelte diff --git a/packages/tasks/src/pipelines.ts b/packages/tasks/src/pipelines.ts index 3c9d9a6ac..7edc61605 100644 --- a/packages/tasks/src/pipelines.ts +++ b/packages/tasks/src/pipelines.ts @@ -656,6 +656,18 @@ export const PIPELINE_DATA = { name: "Video-Text-to-Text", modality: "multimodal", color: "blue", + hideInDatasets: false, + }, + "keypoint-detection": { + name: "Keypoint Detection", + subtasks: [ + { + type: "pose-estimation", + name: "Pose Estimation", + }, + ], + modality: "cv", + color: "red", hideInDatasets: true, }, other: { diff --git a/packages/tasks/src/tasks/index.ts b/packages/tasks/src/tasks/index.ts index b3c0e0e8e..a72bb9c88 100644 --- a/packages/tasks/src/tasks/index.ts +++ b/packages/tasks/src/tasks/index.ts @@ -126,6 +126,7 @@ export const TASKS_MODEL_LIBRARIES: Record = { "image-to-image": ["diffusers", "transformers", "transformers.js"], "image-to-text": ["transformers", "transformers.js"], "image-to-video": ["diffusers"], + "keypoint-detection": ["transformers"], "video-classification": ["transformers"], "mask-generation": ["transformers"], "multiple-choice": ["transformers"], @@ -205,6 +206,7 @@ export const TASKS_DATA: Record = { "image-text-to-text": getData("image-text-to-text", imageTextToText), "image-to-text": getData("image-to-text", imageToText), "image-to-video": undefined, + "keypoint-detection": getData("keypoint-detection", placeholder), "mask-generation": getData("mask-generation", maskGeneration), "multiple-choice": undefined, "object-detection": getData("object-detection", objectDetection), diff --git a/packages/tasks/src/tasks/keypoint-detection/about.md b/packages/tasks/src/tasks/keypoint-detection/about.md new file mode 100644 index 000000000..7067695cc --- /dev/null +++ b/packages/tasks/src/tasks/keypoint-detection/about.md @@ -0,0 +1,59 @@ +## Task Variants + +### Pose Estimation + +Pose estimation is the process of determining the position and orientation of an object or a camera in a 3D space. It is a fundamental task in computer vision and is widely used in various applications such as robotics, augmented reality, and 3D reconstruction. + +## Use Cases for Keypoint Detection + +### Facial Landmark Estimation + +Keypoint detection models can be used to estimate the position of facial landmarks. Facial landmarks are points on the face such as the corners of the mouth, the outer corners of the eyes, and the tip of the nose. These landmarks can be used for a variety of applications, such as facial expression recognition, 3D face reconstruction, and cinematic animation. + +### Fitness Tracking + +Keypoint detection models can be used to track the movement of the human body, e.g. position of the joints in a 3D space. This can be used for a variety of applications, such as fitness tracking, sports analysis or virtual reality applications. + +## Inference Code + +Below you can find an example of how to use a keypoint detection model and how to visualize the results. + +```python +from transformers import AutoImageProcessor, SuperPointForKeypointDetection +import torch +import matplotlib.pyplot as plt +from PIL import Image +import requests + +url_image = "http://images.cocodataset.org/val2017/000000039769.jpg" +image = Image.open(requests.get(url_image_1, stream=True).raw) + +# initialize the model and processor +processor = AutoImageProcessor.from_pretrained("magic-leap-community/superpoint") +model = SuperPointForKeypointDetection.from_pretrained("magic-leap-community/superpoint") + +# infer +inputs = processor(image, return_tensors="pt").to(model.device, model.dtype) +outputs = model(**inputs) + +# visualize the output +image_width, image_height = image.size +image_mask = outputs.mask +image_indices = torch.nonzero(image_mask).squeeze() + +image_scores = outputs.scores.squeeze() +image_keypoints = outputs.keypoints.squeeze() +keypoints = image_keypoints.detach().numpy() +scores = image_scores.detach().numpy() + +plt.axis('off') +plt.imshow(image) +plt.scatter( + keypoints[:, 0], + keypoints[:, 1], + s=scores * 100, + c='cyan', + alpha=0.4 +) +plt.show() +``` diff --git a/packages/tasks/src/tasks/keypoint-detection/data.ts b/packages/tasks/src/tasks/keypoint-detection/data.ts new file mode 100644 index 000000000..6b029d0e0 --- /dev/null +++ b/packages/tasks/src/tasks/keypoint-detection/data.ts @@ -0,0 +1,46 @@ +import type { TaskDataCustom } from ".."; + +const taskData: TaskDataCustom = { + datasets: [ + { + description: "A dataset of hand keypoints of over 500k examples.", + id: "Vincent-luo/hagrid-mediapipe-hands", + }, + ], + demo: { + inputs: [ + { + filename: "keypoint-detection-input.png", + type: "img", + }, + ], + outputs: [ + { + filename: "keypoint-detection-output.png", + type: "img", + }, + ], + }, + metrics: [], + models: [ + { + description: "A robust keypoint detection model.", + id: "magic-leap-community/superpoint", + }, + { + description: "Strong keypoint detection model used to detect human pose.", + id: "qualcomm/MediaPipe-Pose-Estimation", + }, + ], + spaces: [ + { + description: "An application that detects hand keypoints in real-time.", + id: "datasciencedojo/Hand-Keypoint-Detection-Realtime", + }, + ], + summary: "Keypoint detection is the task of identifying meaningful distinctive points or features in an image.", + widgetModels: [], + youtubeId: "", +}; + +export default taskData; diff --git a/packages/widgets/src/lib/components/Icons/IconKeypointDetection.svelte b/packages/widgets/src/lib/components/Icons/IconKeypointDetection.svelte new file mode 100644 index 000000000..312d1b11c --- /dev/null +++ b/packages/widgets/src/lib/components/Icons/IconKeypointDetection.svelte @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/packages/widgets/src/lib/components/PipelineIcon/PipelineIcon.svelte b/packages/widgets/src/lib/components/PipelineIcon/PipelineIcon.svelte index cd15d43f3..02cb84f27 100644 --- a/packages/widgets/src/lib/components/PipelineIcon/PipelineIcon.svelte +++ b/packages/widgets/src/lib/components/PipelineIcon/PipelineIcon.svelte @@ -43,6 +43,7 @@ import IconImageTo3D from "../Icons/IconImageTo3D.svelte"; import IconImageFeatureExtraction from "../Icons/IconImageFeatureExtraction.svelte"; import IconVideoTextToText from "../Icons/IconVideoTextToText.svelte"; + import IconKeypointDetection from "../Icons/IconKeypointDetection.svelte"; import type { WidgetType } from "@huggingface/tasks"; export let classNames = ""; @@ -96,6 +97,7 @@ "image-to-3d": IconImageTo3D, "image-feature-extraction": IconImageFeatureExtraction, "video-text-to-text": IconVideoTextToText, + "keypoint-detection": IconKeypointDetection, }; $: iconComponent = From 3bc437cafd121ce7d2b72ff01ec24885a214ba5f Mon Sep 17 00:00:00 2001 From: pngwn Date: Mon, 2 Sep 2024 22:06:20 +0800 Subject: [PATCH 12/19] Handle user and org avatars (#816) Currently if there is no avatar then the space header can display a broken image link. This PR adds a simple check to see if the avatar exists. If it doesn't then we don't add the DOM for it. I decided to do it outside of the 'component' rendering just to keep things cleaner + sync after the main function but I'm happy to change this. Could be optimised to make the fetches in parallel but the difference is probably minor in most cases. Screenshot: Screenshot 2024-07-23 at 10 19 56 cc @enzostvs @coyotte508 @julien-c --------- Co-authored-by: enzo --- .../src/header/components/content/avatar.ts | 6 ++++-- .../src/header/components/content/index.ts | 4 +++- packages/space-header/src/index.ts | 6 +++++- packages/space-header/src/type.ts | 1 + packages/space-header/src/utils/check_avatar.ts | 10 ++++++++++ packages/space-header/src/{ => utils}/get_space.ts | 3 ++- 6 files changed, 25 insertions(+), 5 deletions(-) create mode 100644 packages/space-header/src/utils/check_avatar.ts rename packages/space-header/src/{ => utils}/get_space.ts (81%) diff --git a/packages/space-header/src/header/components/content/avatar.ts b/packages/space-header/src/header/components/content/avatar.ts index 88445faa1..468b6b5a9 100644 --- a/packages/space-header/src/header/components/content/avatar.ts +++ b/packages/space-header/src/header/components/content/avatar.ts @@ -1,6 +1,8 @@ -export const Avatar = (username: string): HTMLImageElement => { +export const Avatar = (username: string, type: "user" | "org" = "user"): HTMLImageElement => { + const route = type === "user" ? "users" : "organizations"; + const element = document.createElement("img"); - element.src = `https://huggingface.co/api/users/${username}/avatar`; + element.src = `https://huggingface.co/api/${route}/${username}/avatar`; element.style.width = "0.875rem"; element.style.height = "0.875rem"; diff --git a/packages/space-header/src/header/components/content/index.ts b/packages/space-header/src/header/components/content/index.ts index b1679a8b0..a1b86705c 100644 --- a/packages/space-header/src/header/components/content/index.ts +++ b/packages/space-header/src/header/components/content/index.ts @@ -15,7 +15,9 @@ export const Content = (space: Space): HTMLDivElement => { content.style.paddingRight = "12px"; content.style.height = "40px"; - content.appendChild(Avatar(space.author)); + if (space.type !== "unknown") { + content.appendChild(Avatar(space.author, space.type)); + } content.appendChild(Username(space.author)); content.appendChild(Separation()); content.appendChild(Namespace(space.id)); diff --git a/packages/space-header/src/index.ts b/packages/space-header/src/index.ts index 6c66a5fba..35fe129d1 100644 --- a/packages/space-header/src/index.ts +++ b/packages/space-header/src/index.ts @@ -3,7 +3,8 @@ import type { Options, Space, Header } from "./type"; import { inject_fonts } from "./inject_fonts"; import { create } from "./header/create"; -import { get_space } from "./get_space"; +import { check_avatar } from "./utils/check_avatar"; +import { get_space } from "./utils/get_space"; import { inject } from "./inject"; async function main(initialSpace: string | Space, options?: Options) { @@ -27,6 +28,9 @@ async function main(initialSpace: string | Space, options?: Options) { space = initialSpace; } + const [user, org] = await Promise.all([check_avatar(space.author, "user"), check_avatar(space.author, "org")]); + space.type = user ? "user" : org ? "org" : "unknown"; + const mini_header_element = create(space as Space); inject(mini_header_element, options); diff --git a/packages/space-header/src/type.ts b/packages/space-header/src/type.ts index ac50d3420..c4fe97fa5 100644 --- a/packages/space-header/src/type.ts +++ b/packages/space-header/src/type.ts @@ -2,6 +2,7 @@ export interface Space { id: string; likes: number; author: string; + type?: "user" | "org" | "unknown"; } export interface User { diff --git a/packages/space-header/src/utils/check_avatar.ts b/packages/space-header/src/utils/check_avatar.ts new file mode 100644 index 000000000..0faa666a0 --- /dev/null +++ b/packages/space-header/src/utils/check_avatar.ts @@ -0,0 +1,10 @@ +export const check_avatar = async (username: string, type: "user" | "org" = "user"): Promise => { + const route = type === "user" ? "users" : "organizations"; + + try { + const response = await fetch(`https://huggingface.co/api/${route}/${username}/avatar`); + return response.ok; + } catch (error) { + return false; + } +}; diff --git a/packages/space-header/src/get_space.ts b/packages/space-header/src/utils/get_space.ts similarity index 81% rename from packages/space-header/src/get_space.ts rename to packages/space-header/src/utils/get_space.ts index 84a11ddeb..608350bd1 100644 --- a/packages/space-header/src/get_space.ts +++ b/packages/space-header/src/utils/get_space.ts @@ -1,9 +1,10 @@ -import type { Space } from "./type"; +import type { Space } from "./../type"; export const get_space = async (space_id: string): Promise => { try { const response = await fetch(`https://huggingface.co/api/spaces/${space_id}`); const data = await response.json(); + console.log(data); return data as Space; } catch (error) { return null; From fa428c2e2b0a7eb6dd8e5f0bc76e913fd311320f Mon Sep 17 00:00:00 2001 From: Luke Chang Date: Mon, 2 Sep 2024 11:35:00 -0400 Subject: [PATCH 13/19] register py-feat library (#879) The goal of this PR is to register a new library [py-feat](https://py-feat.org/) with huggingface and add supported tasks of `image-feature-extraction`. Please let me know if I should make any changes. --------- Co-authored-by: Lucain --- packages/tasks/src/model-libraries.ts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/packages/tasks/src/model-libraries.ts b/packages/tasks/src/model-libraries.ts index 71b8747b9..55bf8d09c 100644 --- a/packages/tasks/src/model-libraries.ts +++ b/packages/tasks/src/model-libraries.ts @@ -446,6 +446,13 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { snippets: snippets.pyannote_audio, filter: true, }, + "py-feat": { + prettyLabel: "Py-Feat", + repoName: "Py-Feat", + repoUrl: "https://github.com/cosanlab/py-feat", + docsUrl: "https://py-feat.org/", + filter: false, + }, pythae: { prettyLabel: "pythae", repoName: "pythae", From 82d822473fdcec1dd206ff1ac1576515046ce763 Mon Sep 17 00:00:00 2001 From: machineuser Date: Mon, 2 Sep 2024 16:55:39 +0000 Subject: [PATCH 14/19] =?UTF-8?q?=F0=9F=94=96=20@hugginface/tasks=200.11.1?= =?UTF-8?q?3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- packages/tasks/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/tasks/package.json b/packages/tasks/package.json index 06ea1cc0b..25c37c4ae 100644 --- a/packages/tasks/package.json +++ b/packages/tasks/package.json @@ -1,7 +1,7 @@ { "name": "@huggingface/tasks", "packageManager": "pnpm@8.10.5", - "version": "0.11.12", + "version": "0.11.13", "description": "List of ML tasks for huggingface.co/tasks", "repository": "https://github.com/huggingface/huggingface.js.git", "publishConfig": { From 1701fac9088845de393a924f12bfd09f9ca70056 Mon Sep 17 00:00:00 2001 From: Lucain Date: Mon, 2 Sep 2024 18:59:59 +0200 Subject: [PATCH 15/19] Add seed in text to image specs (#888) Following @apolinario's PR https://github.com/huggingface/api-inference-community/pull/450. This PR adds a "seed" input parameter in the `text-to-image` specs. --------- Co-authored-by: Pedro Cuenca --- .../tasks/src/tasks/text-to-image/inference.ts | 14 +++++++++----- .../tasks/src/tasks/text-to-image/spec/input.json | 10 +++++++--- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/packages/tasks/src/tasks/text-to-image/inference.ts b/packages/tasks/src/tasks/text-to-image/inference.ts index b2e735746..8c30d3e9e 100644 --- a/packages/tasks/src/tasks/text-to-image/inference.ts +++ b/packages/tasks/src/tasks/text-to-image/inference.ts @@ -26,8 +26,8 @@ export interface TextToImageInput { */ export interface TextToImageParameters { /** - * For diffusion models. A higher guidance scale value encourages the model to generate - * images closely linked to the text prompt at the expense of lower image quality. + * A higher guidance scale value encourages the model to generate images closely linked to + * the text prompt, but values too high may cause saturation and other artifacts. */ guidance_scale?: number; /** @@ -35,14 +35,18 @@ export interface TextToImageParameters { */ negative_prompt?: string[]; /** - * For diffusion models. The number of denoising steps. More denoising steps usually lead to - * a higher quality image at the expense of slower inference. + * The number of denoising steps. More denoising steps usually lead to a higher quality + * image at the expense of slower inference. */ num_inference_steps?: number; /** - * For diffusion models. Override the scheduler with a compatible one + * Override the scheduler with a compatible one. */ scheduler?: string; + /** + * Seed for the random number generator. + */ + seed?: number; /** * The size in pixel of the output image */ diff --git a/packages/tasks/src/tasks/text-to-image/spec/input.json b/packages/tasks/src/tasks/text-to-image/spec/input.json index 467b848f6..569f3c33a 100644 --- a/packages/tasks/src/tasks/text-to-image/spec/input.json +++ b/packages/tasks/src/tasks/text-to-image/spec/input.json @@ -22,7 +22,7 @@ "properties": { "guidance_scale": { "type": "number", - "description": "For diffusion models. A higher guidance scale value encourages the model to generate images closely linked to the text prompt at the expense of lower image quality." + "description": "A higher guidance scale value encourages the model to generate images closely linked to the text prompt, but values too high may cause saturation and other artifacts." }, "negative_prompt": { "type": "array", @@ -33,7 +33,7 @@ }, "num_inference_steps": { "type": "integer", - "description": "For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference." + "description": "The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference." }, "target_size": { "type": "object", @@ -50,7 +50,11 @@ }, "scheduler": { "type": "string", - "description": "For diffusion models. Override the scheduler with a compatible one" + "description": "Override the scheduler with a compatible one." + }, + "seed": { + "type": "integer", + "description": "Seed for the random number generator." } } } From e0d6c958dc3a5ce3c67b46d10d7f5224557a346d Mon Sep 17 00:00:00 2001 From: Linoy Tsaban <57615435+linoytsaban@users.noreply.github.com> Date: Tue, 3 Sep 2024 09:54:09 +0300 Subject: [PATCH 16/19] improve image-to-image task page (#867) some changes to improve clarity of task description, and general updates to improve task page --------- Co-authored-by: Pedro Cuenca Co-authored-by: Merve Noyan Co-authored-by: Omar Sanseviero --- .../tasks/src/tasks/image-to-image/about.md | 91 ++++++++++++++----- .../tasks/src/tasks/image-to-image/data.ts | 2 +- 2 files changed, 71 insertions(+), 22 deletions(-) diff --git a/packages/tasks/src/tasks/image-to-image/about.md b/packages/tasks/src/tasks/image-to-image/about.md index 63f490f82..3750b34e5 100644 --- a/packages/tasks/src/tasks/image-to-image/about.md +++ b/packages/tasks/src/tasks/image-to-image/about.md @@ -1,15 +1,10 @@ -## Use Cases - -### Style transfer +Image-to-image pipelines can also be used in text-to-image tasks, to provide visual guidance to the text-guided generation process. -One of the most popular use cases of image-to-image is style transfer. Style transfer models can convert a normal photography into a painting in the style of a famous painter. - -## Task Variants +## Use Cases ### Image inpainting -Image inpainting is widely used during photography editing to remove unwanted objects, such as poles, wires, or sensor -dust. +Image inpainting is widely used during photography editing to remove unwanted objects, such as poles, wires, or sensor dust. ### Image colorization @@ -24,18 +19,27 @@ Super-resolution models increase the resolution of an image, allowing for higher You can use pipelines for image-to-image in ๐Ÿงจdiffusers library to easily use image-to-image models. See an example for `StableDiffusionImg2ImgPipeline` below. ```python -from PIL import Image -from diffusers import StableDiffusionImg2ImgPipeline +import torch +from diffusers import AutoPipelineForImage2Image +from diffusers.utils import make_image_grid, load_image -model_id_or_path = "runwayml/stable-diffusion-v1-5" -pipe = StableDiffusionImg2ImgPipeline.from_pretrained(model_id_or_path, torch_dtype=torch.float16) -pipe = pipe.to(cuda) +pipeline = AutoPipelineForImage2Image.from_pretrained( + "stabilityai/stable-diffusion-xl-refiner-1.0", torch_dtype=torch.float16, variant="fp16", use_safetensors=True +) -init_image = Image.open("mountains_image.jpeg").convert("RGB").resize((768, 512)) -prompt = "A fantasy landscape, trending on artstation" +# this helps us to reduce memory usage- since SDXL is a bit heavy, this could help by +# offloading the model to CPU w/o hurting performance. +pipeline.enable_model_cpu_offload() -images = pipe(prompt=prompt, image=init_image, strength=0.75, guidance_scale=7.5).images -images[0].save("fantasy_landscape.png") +# prepare image +url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/img2img-sdxl-init.png" +init_image = load_image(url) + +prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k" + +# pass prompt and image to pipeline +image = pipeline(prompt, image=init_image, strength=0.5).images[0] +make_image_grid([init_image, image], rows=1, cols=2) ``` You can use [huggingface.js](https://github.com/huggingface/huggingface.js) to infer image-to-image models on Hugging Face Hub. @@ -53,13 +57,53 @@ await inference.imageToImage({ }); ``` -## ControlNet +## Uses Cases for Text Guided Image Generation -Controlling the outputs of diffusion models only with a text prompt is a challenging problem. ControlNet is a neural network model that provides image-based control to diffusion models. Control images can be edges or other landmarks extracted from a source image. +### Style Transfer + +One of the most popular use cases of image-to-image is style transfer. With style transfer models: -Many ControlNet models were trained in our community event, JAX Diffusers sprint. You can see the full list of the ControlNet models available [here](https://huggingface.co/spaces/jax-diffusers-event/leaderboard). +- a regular photo can be transformed into a variety of artistic styles or genres, such as a watercolor painting, a comic book illustration and more. +- new images can be generated using a text prompt, in the style of a reference input image. + +See ๐Ÿงจdiffusers example for style transfer with `AutoPipelineForText2Image` below. + +```python +from diffusers import AutoPipelineForText2Image +from diffusers.utils import load_image +import torch + +# load pipeline +pipeline = AutoPipelineForText2Image.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16).to("cuda") +pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin") + +# set the adapter and scales - this is a component that lets us add the style control from an image to the text-to-image model +scale = { + "down": {"block_2": [0.0, 1.0]}, + "up": {"block_0": [0.0, 1.0, 0.0]}, +} +pipeline.set_ip_adapter_scale(scale) + +style_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg") + +generator = torch.Generator(device="cpu").manual_seed(26) +image = pipeline( + prompt="a cat, masterpiece, best quality, high quality", + ip_adapter_image=style_image, + negative_prompt="text, watermark, lowres, low quality, worst quality, deformed, glitch, low contrast, noisy, saturation, blurry", + guidance_scale=5, + num_inference_steps=30, + generator=generator, +).images[0] +image +``` + +### ControlNet + +Controlling the outputs of diffusion models only with a text prompt is a challenging problem. ControlNet is a neural network model that provides image-based control to diffusion models. Control images can be edges or other landmarks extracted from a source image. +![Examples](https://huggingface.co/datasets/optimum/documentation-images/resolve/main/neuron/models/12-sdxl-text2img-controlnet.png) -## Most Used Model for the Task +## Pix2Pix Pix2Pix is a popular model used for image-to-image translation tasks. It is based on a conditional-GAN (generative adversarial network) where instead of a noise vector a 2D image is given as input. More information about Pix2Pix can be retrieved from this [link](https://phillipi.github.io/pix2pix/) where the associated paper and the GitHub repository can be found. @@ -70,8 +114,13 @@ The images below show some examples extracted from the Pix2Pix paper. This model ## Useful Resources - [Image-to-image guide with diffusers](https://huggingface.co/docs/diffusers/using-diffusers/img2img) +- Image inpainting: [inpainting with ๐Ÿงจdiffusers](https://huggingface.co/docs/diffusers/main/en/api/pipelines/stable_diffusion/inpaint), [demo](https://huggingface.co/spaces/diffusers/stable-diffusion-xl-inpainting) +- Colorization: [demo](https://huggingface.co/spaces/modelscope/old_photo_restoration) +- Super resolution: [image upscaling with ๐Ÿงจdiffusers](https://huggingface.co/docs/diffusers/main/en/api/pipelines/stable_diffusion/upscale#super-resolution), [demo](https://huggingface.co/spaces/radames/Enhance-This-HiDiffusion-SDXL) +- [Style transfer and layout control with diffusers ๐Ÿงจ](https://huggingface.co/docs/diffusers/main/en/using-diffusers/ip_adapter#style--layout-control) - [Train your ControlNet with diffusers ๐Ÿงจ](https://huggingface.co/blog/train-your-controlnet) - [Ultra fast ControlNet with ๐Ÿงจ Diffusers](https://huggingface.co/blog/controlnet) +- [List of ControlNets trained in the community JAX Diffusers sprint](https://huggingface.co/spaces/jax-diffusers-event/leaderboard) ## References diff --git a/packages/tasks/src/tasks/image-to-image/data.ts b/packages/tasks/src/tasks/image-to-image/data.ts index 99e91557a..65200fd92 100644 --- a/packages/tasks/src/tasks/image-to-image/data.ts +++ b/packages/tasks/src/tasks/image-to-image/data.ts @@ -93,7 +93,7 @@ const taskData: TaskDataCustom = { }, ], summary: - "Image-to-image is the task of transforming a source image to match the characteristics of a target image or a target image domain. Any image manipulation and enhancement is possible with image to image models.", + "Image-to-image is the task of transforming an input image through a variety of possible manipulations and enhancements, such as super-resolution, image inpainting, colorization, and more.", widgetModels: ["lllyasviel/sd-controlnet-canny"], youtubeId: "", }; From 1c9a2a281065a6da581317caeb4d5bd4174b7645 Mon Sep 17 00:00:00 2001 From: enzo Date: Tue, 3 Sep 2024 13:20:15 -0400 Subject: [PATCH 17/19] remove unused console.log (#891) --- packages/space-header/src/utils/get_space.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/space-header/src/utils/get_space.ts b/packages/space-header/src/utils/get_space.ts index 608350bd1..4250b2206 100644 --- a/packages/space-header/src/utils/get_space.ts +++ b/packages/space-header/src/utils/get_space.ts @@ -4,7 +4,6 @@ export const get_space = async (space_id: string): Promise => { try { const response = await fetch(`https://huggingface.co/api/spaces/${space_id}`); const data = await response.json(); - console.log(data); return data as Space; } catch (error) { return null; From 787c7cae842361a55d3808f6cacb4c2ba6bd2234 Mon Sep 17 00:00:00 2001 From: machineuser Date: Tue, 3 Sep 2024 17:24:52 +0000 Subject: [PATCH 18/19] =?UTF-8?q?=F0=9F=94=96=20@hugginface/space-header?= =?UTF-8?q?=201.0.4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- packages/space-header/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/space-header/package.json b/packages/space-header/package.json index 735315c8d..c9c2c43c9 100644 --- a/packages/space-header/package.json +++ b/packages/space-header/package.json @@ -1,6 +1,6 @@ { "name": "@huggingface/space-header", - "version": "1.0.3", + "version": "1.0.4", "packageManager": "pnpm@8.10.5", "description": "Use the Space mini_header outside Hugging Face", "repository": "https://github.com/huggingface/huggingface.js.git", From a9047d5af078a6bc8b649bde8677195c528d9f10 Mon Sep 17 00:00:00 2001 From: Lucain Date: Wed, 4 Sep 2024 09:15:30 +0200 Subject: [PATCH 19/19] We are Hugging Face, or huggingface, or Huggingface, who knows (#893) --- .github/workflows/agents-publish.yml | 2 +- .github/workflows/gguf-publish.yml | 2 +- .github/workflows/hub-publish.yml | 2 +- .github/workflows/inference-publish.yml | 2 +- .github/workflows/jinja-publish.yml | 2 +- .github/workflows/languages-publish.yml | 2 +- .github/workflows/space-header-publish.yml | 2 +- .github/workflows/tasks-publish.yml | 2 +- packages/doc-internal/README.md | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/agents-publish.yml b/.github/workflows/agents-publish.yml index 08108b585..edc72455f 100644 --- a/.github/workflows/agents-publish.yml +++ b/.github/workflows/agents-publish.yml @@ -50,7 +50,7 @@ jobs: node -e "const fs = require('fs'); const package = JSON.parse(fs.readFileSync('./package.json')); package.version = '$BUMPED_VERSION'; fs.writeFileSync('./package.json', JSON.stringify(package, null, '\t') + '\n');" pnpm --filter doc-internal run fix-cdn-versions git add ../.. - git commit -m "๐Ÿ”– @hugginface/agents $BUMPED_VERSION" + git commit -m "๐Ÿ”– @huggingface/agents $BUMPED_VERSION" git tag "agents-v$BUMPED_VERSION" - run: pnpm --filter agents... build && pnpm publish --no-git-checks . env: diff --git a/.github/workflows/gguf-publish.yml b/.github/workflows/gguf-publish.yml index dec6cd891..5e5cf81f0 100644 --- a/.github/workflows/gguf-publish.yml +++ b/.github/workflows/gguf-publish.yml @@ -47,7 +47,7 @@ jobs: BUMPED_VERSION=$(node -p "require('semver').inc('$PACKAGE_VERSION', '${{ github.event.inputs.newversion }}')") # Update package.json with the new version node -e "const fs = require('fs'); const package = JSON.parse(fs.readFileSync('./package.json')); package.version = '$BUMPED_VERSION'; fs.writeFileSync('./package.json', JSON.stringify(package, null, '\t') + '\n');" - git commit . -m "๐Ÿ”– @hugginface/gguf $BUMPED_VERSION" + git commit . -m "๐Ÿ”– @huggingface/gguf $BUMPED_VERSION" git tag "gguf-v$BUMPED_VERSION" - run: pnpm publish --no-git-checks . env: diff --git a/.github/workflows/hub-publish.yml b/.github/workflows/hub-publish.yml index 19bb63dcf..5937ad212 100644 --- a/.github/workflows/hub-publish.yml +++ b/.github/workflows/hub-publish.yml @@ -50,7 +50,7 @@ jobs: node -e "const fs = require('fs'); const package = JSON.parse(fs.readFileSync('./package.json')); package.version = '$BUMPED_VERSION'; fs.writeFileSync('./package.json', JSON.stringify(package, null, '\t') + '\n');" pnpm --filter doc-internal run fix-cdn-versions git add ../.. - git commit -m "๐Ÿ”– @hugginface/hub $BUMPED_VERSION" + git commit -m "๐Ÿ”– @huggingface/hub $BUMPED_VERSION" git tag "hub-v$BUMPED_VERSION" - name: Make sure that the latest version of @huggingface/tasks is consistent with the local version diff --git a/.github/workflows/inference-publish.yml b/.github/workflows/inference-publish.yml index 5ba387645..bdbb47d7d 100644 --- a/.github/workflows/inference-publish.yml +++ b/.github/workflows/inference-publish.yml @@ -50,7 +50,7 @@ jobs: node -e "const fs = require('fs'); const package = JSON.parse(fs.readFileSync('./package.json')); package.version = '$BUMPED_VERSION'; fs.writeFileSync('./package.json', JSON.stringify(package, null, '\t') + '\n');" pnpm --filter doc-internal run fix-cdn-versions git add ../.. - git commit -m "๐Ÿ”– @hugginface/inference $BUMPED_VERSION" + git commit -m "๐Ÿ”– @huggingface/inference $BUMPED_VERSION" git tag "inference-v$BUMPED_VERSION" - name: Make sure that the latest version of @huggingface/tasks is consistent with the local version diff --git a/.github/workflows/jinja-publish.yml b/.github/workflows/jinja-publish.yml index 5057d4ed5..47d7cad7f 100644 --- a/.github/workflows/jinja-publish.yml +++ b/.github/workflows/jinja-publish.yml @@ -47,7 +47,7 @@ jobs: BUMPED_VERSION=$(node -p "require('semver').inc('$PACKAGE_VERSION', '${{ github.event.inputs.newversion }}')") # Update package.json with the new version node -e "const fs = require('fs'); const package = JSON.parse(fs.readFileSync('./package.json')); package.version = '$BUMPED_VERSION'; fs.writeFileSync('./package.json', JSON.stringify(package, null, '\t') + '\n');" - git commit . -m "๐Ÿ”– @hugginface/jinja $BUMPED_VERSION" + git commit . -m "๐Ÿ”– @huggingface/jinja $BUMPED_VERSION" git tag "jinja-v$BUMPED_VERSION" - run: pnpm publish --no-git-checks . env: diff --git a/.github/workflows/languages-publish.yml b/.github/workflows/languages-publish.yml index 5dca90f89..913687a52 100644 --- a/.github/workflows/languages-publish.yml +++ b/.github/workflows/languages-publish.yml @@ -47,7 +47,7 @@ jobs: BUMPED_VERSION=$(node -p "require('semver').inc('$PACKAGE_VERSION', '${{ github.event.inputs.newversion }}')") # Update package.json with the new version node -e "const fs = require('fs'); const package = JSON.parse(fs.readFileSync('./package.json')); package.version = '$BUMPED_VERSION'; fs.writeFileSync('./package.json', JSON.stringify(package, null, '\t') + '\n');" - git commit . -m "๐Ÿ”– @hugginface/languages $BUMPED_VERSION" + git commit . -m "๐Ÿ”– @huggingface/languages $BUMPED_VERSION" git tag "languages-v$BUMPED_VERSION" - run: pnpm publish --no-git-checks . env: diff --git a/.github/workflows/space-header-publish.yml b/.github/workflows/space-header-publish.yml index 76ce2d89b..05c584514 100644 --- a/.github/workflows/space-header-publish.yml +++ b/.github/workflows/space-header-publish.yml @@ -47,7 +47,7 @@ jobs: BUMPED_VERSION=$(node -p "require('semver').inc('$PACKAGE_VERSION', '${{ github.event.inputs.newversion }}')") # Update package.json with the new version node -e "const fs = require('fs'); const package = JSON.parse(fs.readFileSync('./package.json')); package.version = '$BUMPED_VERSION'; fs.writeFileSync('./package.json', JSON.stringify(package, null, '\t') + '\n');" - git commit . -m "๐Ÿ”– @hugginface/space-header $BUMPED_VERSION" + git commit . -m "๐Ÿ”– @huggingface/space-header $BUMPED_VERSION" git tag "space-header-v$BUMPED_VERSION" - run: pnpm publish --no-git-checks . env: diff --git a/.github/workflows/tasks-publish.yml b/.github/workflows/tasks-publish.yml index 4c8b4567e..0dd797dd2 100644 --- a/.github/workflows/tasks-publish.yml +++ b/.github/workflows/tasks-publish.yml @@ -47,7 +47,7 @@ jobs: BUMPED_VERSION=$(node -p "require('semver').inc('$PACKAGE_VERSION', '${{ github.event.inputs.newversion }}')") # Update package.json with the new version node -e "const fs = require('fs'); const package = JSON.parse(fs.readFileSync('./package.json')); package.version = '$BUMPED_VERSION'; fs.writeFileSync('./package.json', JSON.stringify(package, null, '\t') + '\n');" - git commit . -m "๐Ÿ”– @hugginface/tasks $BUMPED_VERSION" + git commit . -m "๐Ÿ”– @huggingface/tasks $BUMPED_VERSION" git tag "tasks-v$BUMPED_VERSION" - run: pnpm publish --no-git-checks . env: diff --git a/packages/doc-internal/README.md b/packages/doc-internal/README.md index 8d51e72ab..71d8f8314 100644 --- a/packages/doc-internal/README.md +++ b/packages/doc-internal/README.md @@ -2,7 +2,7 @@ This package generates `.md` files inside the [docs](../../docs) folder using [typedoc](https://typedoc.org/) and [typedoc-plugin-markdown](https://github.com/tgreyuk/typedoc-plugin-markdown). -The `.md` files are generated when releasing packages. They are then published to [hugginface.co](https://huggingface.co/docs/huggingface.js/index) through the [doc-builder](https://github.com/huggingface/doc-builder)'s github action. +The `.md` files are generated when releasing packages. They are then published to [huggingface.co](https://huggingface.co/docs/huggingface.js/index) through the [doc-builder](https://github.com/huggingface/doc-builder)'s github action. We run a few scripts in between, [fix-md-links](./fix-md-links.ts) and [update-toc](./update-toc.ts) to preprocess the files for `doc-builder`.