diff --git a/README.md b/README.md index a9d488ac..6fc9cb52 100644 --- a/README.md +++ b/README.md @@ -184,16 +184,16 @@ const chunks = await engine.chat.completions.create({ }); let reply = ""; -let lastChunk: webllm.ChatCompletionChunk | undefined = undefined; for await (const chunk of chunks) { reply += chunk.choices[0]?.delta.content || ""; console.log(reply); - lastChunk = chunk; + if (chunk.usage) { + console.log(chunk.usage); // only last chunk has usage + } } -const fullReply = await engine.getMessage() +const fullReply = await engine.getMessage(); console.log(fullReply); -console.log(lastChunk!.usage); ``` ## Advanced Usage diff --git a/examples/get-started-web-worker/src/main.ts b/examples/get-started-web-worker/src/main.ts index 8507452b..3dec418a 100644 --- a/examples/get-started-web-worker/src/main.ts +++ b/examples/get-started-web-worker/src/main.ts @@ -85,16 +85,16 @@ async function mainStreaming() { const asyncChunkGenerator = await engine.chat.completions.create(request); let message = ""; - let lastChunk: webllm.ChatCompletionChunk | undefined = undefined; for await (const chunk of asyncChunkGenerator) { console.log(chunk); message += chunk.choices[0]?.delta?.content || ""; setLabel("generate-label", message); - lastChunk = chunk; + if (chunk.usage) { + console.log(chunk.usage); // only last chunk has usage + } // engine.interruptGenerate(); // works with interrupt as well } console.log("Final message:\n", await engine.getMessage()); // the concatenated message - console.log(lastChunk!.usage); } // Run one of the function below diff --git a/examples/service-worker/src/main.ts b/examples/service-worker/src/main.ts index 3e054227..98c317b1 100644 --- a/examples/service-worker/src/main.ts +++ b/examples/service-worker/src/main.ts @@ -102,16 +102,16 @@ async function mainStreaming() { const asyncChunkGenerator = await engine.chat.completions.create(request); let message = ""; - let lastChunk: webllm.ChatCompletionChunk | undefined = undefined; for await (const chunk of asyncChunkGenerator) { console.log(chunk); message += chunk.choices[0]?.delta?.content || ""; setLabel("generate-label", message); - lastChunk = chunk; + if (chunk.usage) { + console.log(chunk.usage); // only last chunk has usage + } // engine.interruptGenerate(); // works with interrupt as well } console.log("Final message:\n", await engine.getMessage()); // the concatenated message - console.log(lastChunk!.usage); } registerServiceWorker(); diff --git a/examples/streaming/src/streaming.ts b/examples/streaming/src/streaming.ts index 799a4e64..831b1eda 100644 --- a/examples/streaming/src/streaming.ts +++ b/examples/streaming/src/streaming.ts @@ -15,7 +15,7 @@ async function main() { const initProgressCallback = (report: webllm.InitProgressReport) => { setLabel("init-label", report.text); }; - const selectedModel = "Llama-3-8B-Instruct-q4f32_1-MLC"; + const selectedModel = "TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC"; const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine( selectedModel, { initProgressCallback: initProgressCallback }, @@ -38,16 +38,16 @@ async function main() { const asyncChunkGenerator = await engine.chat.completions.create(request); let message = ""; - let lastChunk: webllm.ChatCompletionChunk | undefined = undefined; for await (const chunk of asyncChunkGenerator) { console.log(chunk); message += chunk.choices[0]?.delta?.content || ""; setLabel("generate-label", message); - lastChunk = chunk; + if (chunk.usage) { + console.log(chunk.usage); // only last chunk has usage + } // engine.interruptGenerate(); // works with interrupt as well } console.log("Final message:\n", await engine.getMessage()); // the concatenated message - console.log(lastChunk!.usage); } main(); diff --git a/src/openai_api_protocols/chat_completion.ts b/src/openai_api_protocols/chat_completion.ts index 593f244b..327634c5 100644 --- a/src/openai_api_protocols/chat_completion.ts +++ b/src/openai_api_protocols/chat_completion.ts @@ -469,7 +469,7 @@ export function postInitAndCheckFields( } // 8. Only set stream_options when streaming - if (request.stream_options !== undefined && request.tools !== null) { + if (request.stream_options !== undefined && request.stream_options !== null) { if (!request.stream) { throw new Error("Only specify stream_options when stream=True."); }