huggingface · coyotte508 · May 13, 2024 · May 1, 2024 · May 1, 2024 · May 1, 2024
diff --git a/packages/inference/src/HfInference.ts b/packages/inference/src/HfInference.ts
@@ -14,9 +14,9 @@ type TaskWithNoAccessToken = {
 	) => ReturnType<Task[key]>;
 };
 
-type TaskWithNoAccessTokenNoModel = {
+type TaskWithNoAccessTokenNoEndpointUrl = {
 	[key in keyof Task]: (
-		args: DistributiveOmit<Parameters<Task[key]>[0], "accessToken" | "model">,
+		args: DistributiveOmit<Parameters<Task[key]>[0], "accessToken" | "endpointUrl">,
 		options?: Parameters<Task[key]>[1]
 	) => ReturnType<Task[key]>;
 };
@@ -57,12 +57,12 @@ export class HfInferenceEndpoint {
 				enumerable: false,
 				value: (params: RequestArgs, options: Options) =>
 					// eslint-disable-next-line @typescript-eslint/no-explicit-any
-					fn({ ...params, accessToken, model: endpointUrl } as any, { ...defaultOptions, ...options }),
+					fn({ ...params, accessToken, endpointUrl } as any, { ...defaultOptions, ...options }),
 			});
 		}
 	}
 }
 
 export interface HfInference extends TaskWithNoAccessToken {}
 
-export interface HfInferenceEndpoint extends TaskWithNoAccessTokenNoModel {}
+export interface HfInferenceEndpoint extends TaskWithNoAccessTokenNoEndpointUrl {}
diff --git a/packages/inference/src/lib/isEmpty.ts b/packages/inference/src/lib/isEmpty.ts
@@ -0,0 +1,8 @@
+export function isObjectEmpty(object: object): boolean {
+	for (const prop in object) {
+		if (Object.prototype.hasOwnProperty.call(object, prop)) {
+			return false;
+		}
+	}
+	return true;
+}
diff --git a/packages/inference/src/lib/makeRequestOptions.ts b/packages/inference/src/lib/makeRequestOptions.ts
@@ -1,4 +1,6 @@
 import type { InferenceTask, Options, RequestArgs } from "../types";
+import { isObjectEmpty } from "../lib/isEmpty";
+import { omit } from "../utils/omit";
 import { HF_HUB_URL } from "./getDefaultTask";
 import { isUrl } from "./isUrl";
 
@@ -24,8 +26,7 @@ export async function makeRequestOptions(
 		taskHint?: InferenceTask;
 	}
 ): Promise<{ url: string; info: RequestInit }> {
-	// eslint-disable-next-line @typescript-eslint/no-unused-vars
-	const { accessToken, model: _model, ...otherArgs } = args;
+	const { accessToken, endpointUrl, ...otherArgs } = args;
 	let { model } = args;
 	const {
 		forceTask: task,
@@ -78,10 +79,16 @@ export async function makeRequestOptions(
 	}
 
 	const url = (() => {
+		if (endpointUrl && isUrl(model)) {
+			throw new TypeError("Both model and endpointUrl cannot be URLs");
+		}
 		if (isUrl(model)) {
+			console.warn("Using a model URL is deprecated, please use the `endpointUrl` parameter instead");
 			return model;
 		}
-
+		if (endpointUrl) {
+			return endpointUrl;
+		}
 		if (task) {
 			return `${HF_INFERENCE_API_BASE_URL}/pipeline/${task}/${model}`;
 		}
@@ -105,8 +112,8 @@ export async function makeRequestOptions(
 		body: binary
 			? args.data
 			: JSON.stringify({
-					...otherArgs,
-					options: options && otherOptions,
+					...(otherArgs.model && isUrl(otherArgs.model) ? omit(otherArgs, "model") : otherArgs),
+					...(otherOptions && !isObjectEmpty(otherOptions) && { options: otherOptions }),
 			  }),
 		...(credentials && { credentials }),
 		signal: options?.signal,

diff --git a/packages/inference/src/tasks/custom/streamingRequest.ts b/packages/inference/src/tasks/custom/streamingRequest.ts
@@ -67,6 +67,9 @@ export async function* streamingRequest<T>(
 			onChunk(value);
 			for (const event of events) {
 				if (event.data.length > 0) {
+					if (event.data === "[DONE]") {
+						return;
+					}
 					const data = JSON.parse(event.data);
 					if (typeof data === "object" && data !== null && "error" in data) {
 						throw new Error(data.error);

diff --git a/packages/inference/src/tasks/index.ts b/packages/inference/src/tasks/index.ts
@@ -30,6 +30,8 @@ export * from "./nlp/textGenerationStream";
 export * from "./nlp/tokenClassification";
 export * from "./nlp/translation";
 export * from "./nlp/zeroShotClassification";
+export * from "./nlp/chatCompletion";
+export * from "./nlp/chatCompletionStream";
 
 // Multimodal tasks
 export * from "./multimodal/documentQuestionAnswering";

diff --git a/packages/inference/src/tasks/nlp/chatCompletion.ts b/packages/inference/src/tasks/nlp/chatCompletion.ts
@@ -0,0 +1,31 @@
+import { InferenceOutputError } from "../../lib/InferenceOutputError";
+import type { BaseArgs, Options } from "../../types";
+import { request } from "../custom/request";
+import type { ChatCompletionInput, ChatCompletionOutput } from "@huggingface/tasks";
+
+/**
+ * Use the chat completion endpoint to generate a response to a prompt, using OpenAI message completion API no stream
+ */
+
+export async function chatCompletion(
+	args: BaseArgs & ChatCompletionInput,
+	options?: Options
+): Promise<ChatCompletionOutput> {
+	const res = await request<ChatCompletionOutput>(args, {
+		...options,
+		taskHint: "text-generation",
+	});
+	const isValidOutput =
+		typeof res === "object" &&
+		Array.isArray(res?.choices) &&
+		typeof res?.created === "number" &&
+		typeof res?.id === "string" &&
+		typeof res?.model === "string" &&
+		typeof res?.system_fingerprint === "string" &&
+		typeof res?.usage === "object";
+
+	if (!isValidOutput) {
+		throw new InferenceOutputError("Expected ChatCompletionOutput");
+	}
+	return res;
+}
diff --git a/packages/inference/src/tasks/nlp/chatCompletionStream.ts b/packages/inference/src/tasks/nlp/chatCompletionStream.ts
@@ -0,0 +1,16 @@
+import type { BaseArgs, Options } from "../../types";
+import { streamingRequest } from "../custom/streamingRequest";
+import type { ChatCompletionInput, ChatCompletionStreamOutput } from "@huggingface/tasks";
+
+/**
+ * Use to continue text from a prompt. Same as `textGeneration` but returns generator that can be read one token at a time
+ */
+export async function* chatCompletionStream(
+	args: BaseArgs & ChatCompletionInput,
+	options?: Options
+): AsyncGenerator<ChatCompletionStreamOutput> {
+	yield* streamingRequest<ChatCompletionStreamOutput>(args, {
+		...options,
+		taskHint: "text-generation",
+	});
+}
diff --git a/packages/inference/src/tasks/nlp/textGenerationStream.ts b/packages/inference/src/tasks/nlp/textGenerationStream.ts
@@ -67,6 +67,7 @@ export interface TextGenerationStreamDetails {
 }
 
 export interface TextGenerationStreamOutput {
+	index?: number;
 	/** Generated token, one at a time */
 	token: TextGenerationStreamToken;
 	/**

diff --git a/packages/inference/src/types.ts b/packages/inference/src/types.ts
@@ -1,4 +1,5 @@
 import type { PipelineType } from "@huggingface/tasks";
+import type { ChatCompletionInput } from "@huggingface/tasks";
 
 export interface Options {
 	/**
@@ -32,7 +33,7 @@ export interface Options {
 	signal?: AbortSignal;
 
 	/**
-	 * Credentials to use for the request. If this is a string, it will be passed straight on. If it's a boolean, true will be "include" and false will not send credentials at all (which defaults to "same-origin" inside browsers).
+	 * (Default: "same-origin"). String | Boolean. Credentials to use for the request. If this is a string, it will be passed straight on. If it's a boolean, true will be "include" and false will not send credentials at all.
 	 */
 	includeCredentials?: string | boolean;
 }
@@ -47,15 +48,25 @@ export interface BaseArgs {
 	 */
 	accessToken?: string;
 	/**
-	 * The model to use. Can be a full URL for a dedicated inference endpoint.
+	 * The model to use.
 	 *
 	 * If not specified, will call huggingface.co/api/tasks to get the default model for the task.
+	 *
+	 * /!\ Legacy behavior allows this to be an URL, but this is deprecated and will be removed in the future.
+	 * Use the `endpointUrl` parameter instead.
 	 */
 	model?: string;
+
+	/**
+	 * The URL of the endpoint to use. If not specified, will call huggingface.co/api/tasks to get the default endpoint for the task.
+	 *
+	 * If specified, will use this URL instead of the default one.
+	 */
+	endpointUrl?: string;
 }
 
 export type RequestArgs = BaseArgs &
-	({ data: Blob | ArrayBuffer } | { inputs: unknown }) & {
+	({ data: Blob | ArrayBuffer } | { inputs: unknown } | ChatCompletionInput) & {
 		parameters?: Record<string, unknown>;
 		accessToken?: string;
 	};