SAP · ZhongpinWang · Oct 21, 2024 · Oct 22, 2024 · Oct 22, 2024 · Oct 22, 2024
diff --git a/.changeset/seven-chairs-change.md b/.changeset/seven-chairs-change.md
@@ -0,0 +1,5 @@
+---
+'@sap-ai-sdk/foundation-models': minor
+---
+
+[New Functionality] Support streaming for Azure OpenAI chat completion in `foudation-models`.
diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.test.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.test.ts
@@ -3,11 +3,12 @@ import {
   mockClientCredentialsGrantCall,
   mockDeploymentsList,
   mockInference,
+  parseFileToString,
   parseMockResponse
 } from '../../../../test-util/mock-http.js';
 import { AzureOpenAiChatClient } from './azure-openai-chat-client.js';
 import { apiVersion } from './model-types.js';
-import type { AzureOpenAiCreateChatCompletionResponse } from './client/inference/schema';
+import type { AzureOpenAiCreateChatCompletionResponse } from './client/inference/schema/index.js';
 
 describe('Azure OpenAI chat client', () => {
   const chatCompletionEndpoint = {
@@ -159,4 +160,46 @@ describe('Azure OpenAI chat client', () => {
     const response = await clientWithResourceGroup.run(prompt);
     expect(response.data).toEqual(mockResponse);
   });
+
+  it('executes a streaming request with correct chunk response', async () => {
+    const prompt = {
+      messages: [
+        {
+          role: 'user' as const,
+          content: 'Where is the deepest place on earth located'
+        }
+      ],
+      stream: true,
+      stream_options: {
+        include_usage: true
+      }
+    };
+
+    const mockResponse = await parseFileToString(
+      'foundation-models',
+      'azure-openai-chat-completion-stream-chunks.txt'
+    );
+
+    mockInference(
+      {
+        data: prompt
+      },
+      {
+        data: mockResponse,
+        status: 200
+      },
+      chatCompletionEndpoint
+    );
+
+    const initialResponse = await parseFileToString(
+      'foundation-models',
+      'azure-openai-chat-completion-stream-chunk-response-initial.json'
+    );
+
+    const response = await client.stream(prompt);
+    for await (const chunk of response.stream) {
+      expect(JSON.stringify(chunk.data)).toEqual(initialResponse);
+      break;
+    }
+  });
 });
diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts
@@ -6,6 +6,10 @@ import {
 } from '@sap-ai-sdk/ai-api/internal.js';
 import { apiVersion, type AzureOpenAiChatModel } from './model-types.js';
 import { AzureOpenAiChatCompletionResponse } from './azure-openai-chat-completion-response.js';
+import { AzureOpenAiChatCompletionStreamResponse } from './azure-openai-chat-completion-stream-response.js';
+import { AzureOpenAiChatCompletionStream } from './azure-openai-chat-completion-stream.js';
+import type { AzureOpenAiChatCompletionStreamChunkResponse } from './azure-openai-chat-completion-stream-chunk-response.js';
+import type { HttpResponse } from '@sap-cloud-sdk/http-client';
 import type { AzureOpenAiCreateChatCompletionRequest } from './client/inference/schema/index.js';
 
 /**
@@ -28,12 +32,43 @@ export class AzureOpenAiChatClient {
     data: AzureOpenAiCreateChatCompletionRequest,
     requestConfig?: CustomRequestConfig
   ): Promise<AzureOpenAiChatCompletionResponse> {
+    const response = await this.executeRequest(data, requestConfig);
+    return new AzureOpenAiChatCompletionResponse(response);
+  }
+
+  /**
+   * Creates a completion stream for the chat messages.
+   * @param data - The input parameters for the chat completion.
+   * @param controller - The abort controller.
+   * @param requestConfig - The request configuration.
+   * @returns The completion stream.
+   */
+  async stream(
+    data: AzureOpenAiCreateChatCompletionRequest,
+    controller = new AbortController(),
+    requestConfig?: CustomRequestConfig
+  ): Promise<
+    AzureOpenAiChatCompletionStreamResponse<AzureOpenAiChatCompletionStreamChunkResponse>
+  > {
+    const response =
+      new AzureOpenAiChatCompletionStreamResponse<AzureOpenAiChatCompletionStreamChunkResponse>();
+    response.stream = (await this.createStream(data, controller, requestConfig))
+      ._pipe(AzureOpenAiChatCompletionStream._processChunk)
+      ._pipe(AzureOpenAiChatCompletionStream._processFinishReason, response)
+      ._pipe(AzureOpenAiChatCompletionStream._processTokenUsage, response);
+    return response;
+  }
+
+  private async executeRequest(
+    data: AzureOpenAiCreateChatCompletionRequest,
+    requestConfig?: CustomRequestConfig
+  ): Promise<HttpResponse> {
     const deploymentId = await getDeploymentId(
       this.modelDeployment,
       'azure-openai'
     );
     const resourceGroup = getResourceGroup(this.modelDeployment);
-    const response = await executeRequest(
+    return executeRequest(
       {
         url: `/inference/deployments/${deploymentId}/chat/completions`,
         apiVersion,
@@ -42,6 +77,27 @@ export class AzureOpenAiChatClient {
       data,
       requestConfig
     );
-    return new AzureOpenAiChatCompletionResponse(response);
+  }
+
+  private async createStream(
+    data: AzureOpenAiCreateChatCompletionRequest,
+    controller: AbortController,
+    requestConfig?: CustomRequestConfig
+  ): Promise<AzureOpenAiChatCompletionStream<any>> {
+    const response = await this.executeRequest(
+      {
+        ...data,
+        stream: true,
+        stream_options: {
+          include_usage: true
+        }
+      },
+      {
+        ...requestConfig,
+        responseType: 'stream',
+        signal: controller.signal
+      }
+    );
+    return AzureOpenAiChatCompletionStream._create(response, controller);
   }
 }
diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-response.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-response.ts
@@ -32,9 +32,7 @@ export class AzureOpenAiChatCompletionResponse {
    * @param choiceIndex - The index of the choice to parse.
    * @returns The finish reason.
    */
-  getFinishReason(
-    choiceIndex = 0
-  ): this['data']['choices'][0]['finish_reason'] {
+  getFinishReason(choiceIndex = 0): string | undefined | null {
     this.logInvalidChoiceIndex(choiceIndex);
     return this.data.choices[choiceIndex]?.finish_reason;
   }

diff --git a/...dation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.test.ts b/...dation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.test.ts
@@ -0,0 +1,77 @@
+import { parseMockResponse } from '../../../../test-util/mock-http.js';
+import { AzureOpenAiChatCompletionStreamChunkResponse } from './azure-openai-chat-completion-stream-chunk-response.js';
+
+describe('OpenAI chat completion stream chunk response', () => {
+  let mockResponses: {
+    tokenUsageResponse: any;
+    finishReasonResponse: any;
+    deltaContentResponse: any;
+  };
+  let azureOpenAiChatCompletionStreamChunkResponses: {
+    tokenUsageResponse: AzureOpenAiChatCompletionStreamChunkResponse;
+    finishReasonResponse: AzureOpenAiChatCompletionStreamChunkResponse;
+    deltaContentResponse: AzureOpenAiChatCompletionStreamChunkResponse;
+  };
+
+  beforeAll(async () => {
+    mockResponses = {
+      tokenUsageResponse: await parseMockResponse<any>(
+        'foundation-models',
+        'azure-openai-chat-completion-stream-chunk-response-token-usage.json'
+      ),
+      finishReasonResponse: await parseMockResponse<any>(
+        'foundation-models',
+        'azure-openai-chat-completion-stream-chunk-response-finish-reason.json'
+      ),
+      deltaContentResponse: await parseMockResponse<any>(
+        'foundation-models',
+        'azure-openai-chat-completion-stream-chunk-response-delta-content.json'
+      )
+    };
+    azureOpenAiChatCompletionStreamChunkResponses = {
+      tokenUsageResponse: new AzureOpenAiChatCompletionStreamChunkResponse(
+        mockResponses.tokenUsageResponse
+      ),
+      finishReasonResponse: new AzureOpenAiChatCompletionStreamChunkResponse(
+        mockResponses.finishReasonResponse
+      ),
+      deltaContentResponse: new AzureOpenAiChatCompletionStreamChunkResponse(
+        mockResponses.deltaContentResponse
+      )
+    };
+  });
+
+  it('should return the chat completion stream chunk response', () => {
+    expect(
+      azureOpenAiChatCompletionStreamChunkResponses.tokenUsageResponse.data
+    ).toStrictEqual(mockResponses.tokenUsageResponse);
+    expect(
+      azureOpenAiChatCompletionStreamChunkResponses.finishReasonResponse.data
+    ).toStrictEqual(mockResponses.finishReasonResponse);
+    expect(
+      azureOpenAiChatCompletionStreamChunkResponses.deltaContentResponse.data
+    ).toStrictEqual(mockResponses.deltaContentResponse);
+  });
+
+  it('should get token usage', () => {
+    expect(
+      azureOpenAiChatCompletionStreamChunkResponses.tokenUsageResponse.getTokenUsage()
+    ).toMatchObject({
+      completion_tokens: expect.any(Number),
+      prompt_tokens: expect.any(Number),
+      total_tokens: expect.any(Number)
+    });
+  });
+
+  it('should return finish reason', () => {
+    expect(
+      azureOpenAiChatCompletionStreamChunkResponses.finishReasonResponse.getFinishReason()
+    ).toBe('stop');
+  });
+
+  it('should return delta content with default index 0', () => {
+    expect(
+      azureOpenAiChatCompletionStreamChunkResponses.deltaContentResponse.getDeltaContent()
+    ).toBe(' is');
+  });
+});
diff --git a/.../foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts b/.../foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts
@@ -0,0 +1,47 @@
+import type { AzureOpenAiCompletionUsage } from './client/inference/schema/index.js';
+
+/**
+ * Azure OpenAI chat completion stream chunk response.
+ */
+export class AzureOpenAiChatCompletionStreamChunkResponse {
+  constructor(public readonly data: any) {
+    // TODO: Change `any` to `CreateChatCompletionStreamResponse` once the preview spec becomes stable.
+    this.data = data;
+  }
+
+  /**
+   * Usage of tokens in the chunk response.
+   * @returns Token usage.
+   */
+  getTokenUsage(): AzureOpenAiCompletionUsage {
+    return this.data.usage;
+  }
+
+  /**
+   * Reason for stopping the completion stream chunk.
+   * @param choiceIndex - The index of the choice to parse.
+   * @returns The finish reason.
+   */
+  getFinishReason(choiceIndex = 0): string | undefined | null {
+    for (const choice of this.data.choices) {
+      if (choice.index === choiceIndex) {
+        return choice.finish_reason;
+      }
+    }
+    return undefined;
+  }
+
+  /**
+   * Parses the chunk response and returns the delta content.
+   * @param choiceIndex - The index of the choice to parse.
+   * @returns The message delta content.
+   */
+  getDeltaContent(choiceIndex = 0): string | undefined | null {
+    for (const choice of this.data.choices) {
+      if (choice.index === choiceIndex) {
+        return choice.delta.content;
+      }
+    }
+    return undefined;
+  }
+}
diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts
@@ -0,0 +1,54 @@
+import type { AzureOpenAiCompletionUsage } from './client/inference/schema/index.js';
+import type { AzureOpenAiChatCompletionStream } from './azure-openai-chat-completion-stream.js';
+
+/**
+ * Azure OpenAI chat completion stream response.
+ */
+export class AzureOpenAiChatCompletionStreamResponse<T> {
+  private _usage: AzureOpenAiCompletionUsage | undefined;
+  private _finishReasons: Map<number, string> = new Map();
+  private _stream: AzureOpenAiChatCompletionStream<T> | undefined;
+
+  public getTokenUsage(): AzureOpenAiCompletionUsage | undefined {
+    return this._usage;
+  }
+
+  /**
+   * @internal
+   */
+  _setTokenUsage(usage: AzureOpenAiCompletionUsage): void {
+    this._usage = usage;
+  }
+
+  public getFinishReason(choiceIndex = 0): string | undefined | null {
+    return this._finishReasons.get(choiceIndex);
+  }
+
+  /**
+   * @internal
+   */
+  _getFinishReasons(): Map<number, string> {
+    return this._finishReasons;
+  }
+
+  /**
+   * @internal
+   */
+  _setFinishReasons(finishReasons: Map<number, string>): void {
+    this._finishReasons = finishReasons;
+  }
+
+  get stream(): AzureOpenAiChatCompletionStream<T> {
+    if (!this._stream) {
+      throw new Error('Response stream is undefined.');
+    }
+    return this._stream;
+  }
+
+  /**
+   * @internal
+   */
+  set stream(stream: AzureOpenAiChatCompletionStream<T>) {
+    this._stream = stream;
+  }
+}