diff --git a/README.md b/README.md
index a9d488ac..6fc9cb52 100644
--- a/README.md
+++ b/README.md
@@ -184,16 +184,16 @@ const chunks = await engine.chat.completions.create({
 });
 
 let reply = "";
-let lastChunk: webllm.ChatCompletionChunk | undefined = undefined;
 for await (const chunk of chunks) {
   reply += chunk.choices[0]?.delta.content || "";
   console.log(reply);
-  lastChunk = chunk;
+  if (chunk.usage) {
+    console.log(chunk.usage); // only last chunk has usage
+  }
 }
 
-const fullReply = await engine.getMessage()
+const fullReply = await engine.getMessage();
 console.log(fullReply);
-console.log(lastChunk!.usage);
 ```
 
 ## Advanced Usage
diff --git a/examples/get-started-web-worker/src/main.ts b/examples/get-started-web-worker/src/main.ts
index 8507452b..3dec418a 100644
--- a/examples/get-started-web-worker/src/main.ts
+++ b/examples/get-started-web-worker/src/main.ts
@@ -85,16 +85,16 @@ async function mainStreaming() {
 
   const asyncChunkGenerator = await engine.chat.completions.create(request);
   let message = "";
-  let lastChunk: webllm.ChatCompletionChunk | undefined = undefined;
   for await (const chunk of asyncChunkGenerator) {
     console.log(chunk);
     message += chunk.choices[0]?.delta?.content || "";
     setLabel("generate-label", message);
-    lastChunk = chunk;
+    if (chunk.usage) {
+      console.log(chunk.usage); // only last chunk has usage
+    }
     // engine.interruptGenerate();  // works with interrupt as well
   }
   console.log("Final message:\n", await engine.getMessage()); // the concatenated message
-  console.log(lastChunk!.usage);
 }
 
 // Run one of the function below
diff --git a/examples/service-worker/src/main.ts b/examples/service-worker/src/main.ts
index 3e054227..98c317b1 100644
--- a/examples/service-worker/src/main.ts
+++ b/examples/service-worker/src/main.ts
@@ -102,16 +102,16 @@ async function mainStreaming() {
 
   const asyncChunkGenerator = await engine.chat.completions.create(request);
   let message = "";
-  let lastChunk: webllm.ChatCompletionChunk | undefined = undefined;
   for await (const chunk of asyncChunkGenerator) {
     console.log(chunk);
     message += chunk.choices[0]?.delta?.content || "";
     setLabel("generate-label", message);
-    lastChunk = chunk;
+    if (chunk.usage) {
+      console.log(chunk.usage); // only last chunk has usage
+    }
     // engine.interruptGenerate();  // works with interrupt as well
   }
   console.log("Final message:\n", await engine.getMessage()); // the concatenated message
-  console.log(lastChunk!.usage);
 }
 
 registerServiceWorker();
diff --git a/examples/streaming/src/streaming.ts b/examples/streaming/src/streaming.ts
index 799a4e64..831b1eda 100644
--- a/examples/streaming/src/streaming.ts
+++ b/examples/streaming/src/streaming.ts
@@ -15,7 +15,7 @@ async function main() {
   const initProgressCallback = (report: webllm.InitProgressReport) => {
     setLabel("init-label", report.text);
   };
-  const selectedModel = "Llama-3-8B-Instruct-q4f32_1-MLC";
+  const selectedModel = "TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC";
   const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
     selectedModel,
     { initProgressCallback: initProgressCallback },
@@ -38,16 +38,16 @@ async function main() {
 
   const asyncChunkGenerator = await engine.chat.completions.create(request);
   let message = "";
-  let lastChunk: webllm.ChatCompletionChunk | undefined = undefined;
   for await (const chunk of asyncChunkGenerator) {
     console.log(chunk);
     message += chunk.choices[0]?.delta?.content || "";
     setLabel("generate-label", message);
-    lastChunk = chunk;
+    if (chunk.usage) {
+      console.log(chunk.usage); // only last chunk has usage
+    }
     // engine.interruptGenerate();  // works with interrupt as well
   }
   console.log("Final message:\n", await engine.getMessage()); // the concatenated message
-  console.log(lastChunk!.usage);
 }
 
 main();
diff --git a/src/openai_api_protocols/chat_completion.ts b/src/openai_api_protocols/chat_completion.ts
index 593f244b..327634c5 100644
--- a/src/openai_api_protocols/chat_completion.ts
+++ b/src/openai_api_protocols/chat_completion.ts
@@ -469,7 +469,7 @@ export function postInitAndCheckFields(
   }
 
   // 8. Only set stream_options when streaming
-  if (request.stream_options !== undefined && request.tools !== null) {
+  if (request.stream_options !== undefined && request.stream_options !== null) {
     if (!request.stream) {
       throw new Error("Only specify stream_options when stream=True.");
     }