Merge commit '54784005b57c2235a7669e0c12e8dafa68f0ca5f'

* commit '54784005b57c2235a7669e0c12e8dafa68f0ca5f': Log server error responses without `-GoogleGenerativeAIDebugLogEnabled` (google-gemini#177) Send `GenerateContentRequest` in `CountTokensRequest` (google-gemini#175) Add `responseSchema` to `GenerationConfig` (google-gemini#176) Update models in samples and README to `gemini-1.5-flash-latest` (google-gemini#173) Sync UI from VertexAI for Firebase (google-gemini#172) Increment SDK version to `0.5.4` (google-gemini#171) Add default `RequestOptions.timeout` of 300 seconds (google-gemini#170) Delete repo specific Issue Template (google-gemini#169) Increment SDK version to `0.5.3` (google-gemini#167) Make `text` computed property handle mixed-parts responses (google-gemini#165) update doc comments (google-gemini#166)
enefry · Jun 12, 2024 · 07b03f9 · 07b03f9
2 parents db301f5 + 5478400
commit 07b03f9
Show file tree

Hide file tree

Showing 28 changed files with 448 additions and 122 deletions.
diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml
diff --git a/Examples/GenerativeAICLI/Sources/GenerateContent.swift b/Examples/GenerativeAICLI/Sources/GenerateContent.swift
@@ -55,23 +55,7 @@ struct GenerateContent: AsyncParsableCommand {
 
   mutating func run() async throws {
     do {
-      let safetySettings = [SafetySetting(harmCategory: .dangerousContent, threshold: .blockNone)]
-      // Let the server pick the default config.
-      let config = GenerationConfig(
-        temperature: 0.2,
-        topP: 0.1,
-        topK: 16,
-        candidateCount: 1,
-        maxOutputTokens: isStreaming ? nil : 256,
-        stopSequences: nil
-      )
-
-      let model = GenerativeModel(
-        name: modelNameOrDefault(),
-        apiKey: apiKey,
-        generationConfig: config,
-        safetySettings: safetySettings
-      )
+      let model = GenerativeModel(name: modelNameOrDefault(), apiKey: apiKey)
 
       var parts = [ModelContent.Part]()
 
@@ -115,12 +99,10 @@ struct GenerateContent: AsyncParsableCommand {
   }
 
   func modelNameOrDefault() -> String {
-    if let modelName = modelName {
+    if let modelName {
       return modelName
-    } else if imageURL != nil {
-      return "gemini-1.0-pro-vision-latest"
     } else {
-      return "gemini-1.0-pro"
+      return "gemini-1.5-flash-latest"
     }
   }
 }

diff --git a/Examples/GenerativeAISample/ChatSample/Screens/ConversationScreen.swift b/Examples/GenerativeAISample/ChatSample/Screens/ConversationScreen.swift
@@ -94,6 +94,8 @@ struct ConversationScreen: View {
   }
 
   private func sendOrStop() {
+    focusedField = nil
+
     if viewModel.busy {
       viewModel.stop()
     } else {

diff --git a/Examples/GenerativeAISample/ChatSample/ViewModels/ConversationViewModel.swift b/Examples/GenerativeAISample/ChatSample/ViewModels/ConversationViewModel.swift
@@ -36,7 +36,7 @@ class ConversationViewModel: ObservableObject {
   private var chatTask: Task<Void, Never>?
 
   init() {
-    model = GenerativeModel(name: "gemini-1.0-pro", apiKey: APIKey.default)
+    model = GenerativeModel(name: "gemini-1.5-flash-latest", apiKey: APIKey.default)
     chat = model.startChat()
   }
 

diff --git a/Examples/GenerativeAISample/FunctionCallingSample/Screens/FunctionCallingScreen.swift b/Examples/GenerativeAISample/FunctionCallingSample/Screens/FunctionCallingScreen.swift
@@ -65,6 +65,9 @@ struct FunctionCallingScreen: View {
             }
           }
         })
+        .onTapGesture {
+          focusedField = nil
+        }
       }
       InputField("Message...", text: $userPrompt) {
         Image(systemName: viewModel.busy ? "stop.circle.fill" : "arrow.up.circle.fill")

diff --git a/Examples/GenerativeAISample/FunctionCallingSample/ViewModels/FunctionCallingViewModel.swift b/Examples/GenerativeAISample/FunctionCallingSample/ViewModels/FunctionCallingViewModel.swift
@@ -39,7 +39,7 @@ class FunctionCallingViewModel: ObservableObject {
 
   init() {
     model = GenerativeModel(
-      name: "gemini-1.0-pro",
+      name: "gemini-1.5-flash-latest",
       apiKey: APIKey.default,
       tools: [Tool(functionDeclarations: [
         FunctionDeclaration(

diff --git a/Examples/GenerativeAISample/GenerativeAIMultimodalSample/Screens/PhotoReasoningScreen.swift b/Examples/GenerativeAISample/GenerativeAIMultimodalSample/Screens/PhotoReasoningScreen.swift
@@ -20,9 +20,17 @@ import SwiftUI
 struct PhotoReasoningScreen: View {
   @StateObject var viewModel = PhotoReasoningViewModel()
 
+  enum FocusedField: Hashable {
+    case message
+  }
+
+  @FocusState
+  var focusedField: FocusedField?
+
   var body: some View {
     VStack {
       MultimodalInputField(text: $viewModel.userInput, selection: $viewModel.selectedItems)
+        .focused($focusedField, equals: .message)
         .onSubmit {
           onSendTapped()
         }
@@ -47,11 +55,16 @@ struct PhotoReasoningScreen: View {
       }
     }
     .navigationTitle("Multimodal sample")
+    .onAppear {
+      focusedField = .message
+    }
   }
 
   // MARK: - Actions
 
   private func onSendTapped() {
+    focusedField = nil
+
     Task {
       await viewModel.reason()
     }

diff --git a/.../GenerativeAISample/GenerativeAIMultimodalSample/ViewModels/PhotoReasoningViewModel.swift b/.../GenerativeAISample/GenerativeAIMultimodalSample/ViewModels/PhotoReasoningViewModel.swift
@@ -44,7 +44,7 @@ class PhotoReasoningViewModel: ObservableObject {
   private var model: GenerativeModel?
 
   init() {
-    model = GenerativeModel(name: "gemini-1.0-pro-vision-latest", apiKey: APIKey.default)
+    model = GenerativeModel(name: "gemini-1.5-flash-latest", apiKey: APIKey.default)
   }
 
   func reason() async {

diff --git a/Examples/GenerativeAISample/GenerativeAITextSample/Screens/SummarizeScreen.swift b/Examples/GenerativeAISample/GenerativeAITextSample/Screens/SummarizeScreen.swift
@@ -28,19 +28,23 @@ struct SummarizeScreen: View {
 
   var body: some View {
     VStack {
-      Text("Enter some text, then tap on _Go_ to summarize it.")
-      HStack(alignment: .top) {
-        TextField("Enter text summarize", text: $userInput, axis: .vertical)
-          .textFieldStyle(.roundedBorder)
-          .onSubmit {
+      VStack(alignment: .leading) {
+        Text("Enter some text, then tap on _Go_ to summarize it.")
+          .padding(.horizontal, 6)
+        HStack(alignment: .top) {
+          TextField("Enter text summarize", text: $userInput, axis: .vertical)
+            .focused($focusedField, equals: .message)
+            .textFieldStyle(.roundedBorder)
+            .onSubmit {
+              onSummarizeTapped()
+            }
+          Button("Go") {
             onSummarizeTapped()
           }
-        Button("Go") {
-          onSummarizeTapped()
+          .padding(.top, 4)
         }
-        .padding(.top, 4)
       }
-      .padding([.horizontal, .bottom])
+      .padding(.horizontal, 16)
 
       List {
         HStack(alignment: .top) {
@@ -61,6 +65,8 @@ struct SummarizeScreen: View {
   }
 
   private func onSummarizeTapped() {
+    focusedField = nil
+
     Task {
       await viewModel.summarize(inputText: userInput)
     }

diff --git a/Examples/GenerativeAISample/GenerativeAITextSample/ViewModels/SummarizeViewModel.swift b/Examples/GenerativeAISample/GenerativeAITextSample/ViewModels/SummarizeViewModel.swift
@@ -32,7 +32,7 @@ class SummarizeViewModel: ObservableObject {
   private var model: GenerativeModel?
 
   init() {
-    model = GenerativeModel(name: "gemini-1.0-pro", apiKey: APIKey.default)
+    model = GenerativeModel(name: "gemini-1.5-flash-latest", apiKey: APIKey.default)
   }
 
   func summarize(inputText: String) async {

diff --git a/...rativeAISample/GenerativeAIUIComponents/Sources/GenerativeAIUIComponents/InputField.swift b/...rativeAISample/GenerativeAIUIComponents/Sources/GenerativeAIUIComponents/InputField.swift
@@ -60,10 +60,10 @@ public struct InputField<Label>: View where Label: View {
         }
 
         Button(action: submit, label: label)
-          .padding(.top, 4)
+          .padding(.bottom, 4)
       }
     }
-    .padding(.horizontal)
+    .padding(8)
   }
 }
 

diff --git a/...mple/GenerativeAIUIComponents/Sources/GenerativeAIUIComponents/MultimodalInputField.swift b/...mple/GenerativeAIUIComponents/Sources/GenerativeAIUIComponents/MultimodalInputField.swift
@@ -69,7 +69,7 @@ public struct MultimodalInputField: View {
         Button(action: showChooseAttachmentTypePicker) {
           Image(systemName: "plus")
         }
-        .padding(.top, 4)
+        .padding(.top, 10)
 
         VStack(alignment: .leading) {
           TextField(
@@ -110,7 +110,7 @@ public struct MultimodalInputField: View {
         Button(action: submit) {
           Text("Go")
         }
-        .padding(.top, 4)
+        .padding(.top, 8)
       }
     }
     .padding(.horizontal)

diff --git a/README.md b/README.md
@@ -19,7 +19,7 @@ For example, with just a few lines of code, you can access Gemini's multimodal c
 generate text from text-and-image input:
 
 ```swift
-let model = GenerativeModel(name: "gemini-1.5-pro-latest", apiKey: "YOUR_API_KEY")
+let model = GenerativeModel(name: "gemini-1.5-flash-latest", apiKey: "YOUR_API_KEY")
 let cookieImage = UIImage(...)
 let prompt = "Do these look store-bought or homemade?"
 

diff --git a/Sources/GoogleAI/Chat.swift b/Sources/GoogleAI/Chat.swift
@@ -30,7 +30,11 @@ public class Chat {
   /// model. This will be provided to the model for each message sent as context for the discussion.
   public var history: [ModelContent]
 
-  /// See ``sendMessage(_:)-3ify5``.
+  /// Sends a message using the existing history of this chat as context. If successful, the message
+  /// and response will be added to the history. If unsuccessful, history will remain unchanged.
+  /// - Parameter parts: The new content to send as a single chat message.
+  /// - Returns: The model's response if no error occurred.
+  /// - Throws: A ``GenerateContentError`` if an error occurred.
   public func sendMessage(_ parts: any ThrowingPartsRepresentable...) async throws
     -> GenerateContentResponse {
     return try await sendMessage([ModelContent(parts: parts)])
@@ -76,7 +80,10 @@ public class Chat {
     return result
   }
 
-  /// See ``sendMessageStream(_:)-4abs3``.
+  /// Sends a message using the existing history of this chat as context. If successful, the message
+  /// and response will be added to the history. If unsuccessful, history will remain unchanged.
+  /// - Parameter parts: The new content to send as a single chat message.
+  /// - Returns: A stream containing the model's response or an error if an error occurred.
   @available(macOS 12.0, *)
   public func sendMessageStream(_ parts: any ThrowingPartsRepresentable...)
     -> AsyncThrowingStream<GenerateContentResponse, Error> {

diff --git a/Sources/GoogleAI/CountTokensRequest.swift b/Sources/GoogleAI/CountTokensRequest.swift
@@ -17,7 +17,7 @@ import Foundation
 @available(iOS 15.0, macOS 11.0, macCatalyst 15.0, *)
 struct CountTokensRequest {
   let model: String
-  let contents: [ModelContent]
+  let generateContentRequest: GenerateContentRequest
   let options: RequestOptions
 }
 
@@ -45,7 +45,7 @@ public struct CountTokensResponse {
 @available(iOS 15.0, macOS 11.0, macCatalyst 15.0, *)
 extension CountTokensRequest: Encodable {
   enum CodingKeys: CodingKey {
-    case contents
+    case generateContentRequest
   }
 }
 

diff --git a/Sources/GoogleAI/GenerateContentRequest.swift b/Sources/GoogleAI/GenerateContentRequest.swift
@@ -31,6 +31,7 @@ struct GenerateContentRequest {
 @available(iOS 15.0, macOS 11.0, macCatalyst 15.0, *)
 extension GenerateContentRequest: Encodable {
   enum CodingKeys: String, CodingKey {
+    case model
     case contents
     case generationConfig
     case safetySettings

diff --git a/Sources/GoogleAI/GenerateContentResponse.swift b/Sources/GoogleAI/GenerateContentResponse.swift
@@ -45,11 +45,17 @@ public struct GenerateContentResponse {
       Logging.default.error("Could not get text from a response that had no candidates.")
       return nil
     }
-    guard let text = candidate.content.parts.first?.text else {
+    let textValues: [String] = candidate.content.parts.compactMap { part in
+      guard case let .text(text) = part else {
+        return nil
+      }
+      return text
+    }
+    guard textValues.count > 0 else {
       Logging.default.error("Could not get a text part from the first candidate.")
       return nil
     }
-    return text
+    return textValues.joined(separator: " ")
   }
 
   /// Returns function calls found in any `Part`s of the first candidate of the response, if any.

diff --git a/Sources/GoogleAI/GenerationConfig.swift b/Sources/GoogleAI/GenerationConfig.swift
@@ -70,6 +70,12 @@ public struct GenerationConfig {
   /// - `application/json`: JSON response in the candidates.
   public let responseMIMEType: String?
 
+  /// Output response schema of the generated candidate text.
+  ///
+  /// - Note: This only applies when the specified ``responseMIMEType`` supports a schema; currently
+  ///   this is limited to `application/json`.
+  public let responseSchema: Schema?
+
   /// Creates a new `GenerationConfig` value.
   ///
   /// - Parameters:
@@ -80,9 +86,11 @@ public struct GenerationConfig {
   ///   - maxOutputTokens: See ``maxOutputTokens``.
   ///   - stopSequences: See ``stopSequences``.
   ///   - responseMIMEType: See ``responseMIMEType``.
+  ///   - responseSchema: See ``responseSchema``.
   public init(temperature: Float? = nil, topP: Float? = nil, topK: Int? = nil,
               candidateCount: Int? = nil, maxOutputTokens: Int? = nil,
-              stopSequences: [String]? = nil, responseMIMEType: String? = nil) {
+              stopSequences: [String]? = nil, responseMIMEType: String? = nil,
+              responseSchema: Schema? = nil) {
     // Explicit init because otherwise if we re-arrange the above variables it changes the API
     // surface.
     self.temperature = temperature
@@ -92,6 +100,7 @@ public struct GenerationConfig {
     self.maxOutputTokens = maxOutputTokens
     self.stopSequences = stopSequences
     self.responseMIMEType = responseMIMEType
+    self.responseSchema = responseSchema
   }
 }
 

diff --git a/Sources/GoogleAI/GenerativeAIRequest.swift b/Sources/GoogleAI/GenerativeAIRequest.swift
@@ -26,20 +26,18 @@ protocol GenerativeAIRequest: Encodable {
 /// Configuration parameters for sending requests to the backend.
 @available(iOS 15.0, macOS 11.0, macCatalyst 15.0, *)
 public struct RequestOptions {
-  /// The request’s timeout interval in seconds; if not specified uses the default value for a
-  /// `URLRequest`.
-  let timeout: TimeInterval?
+  /// The request’s timeout interval in seconds.
+  let timeout: TimeInterval
 
   /// The API version to use in requests to the backend.
   let apiVersion: String
 
   /// Initializes a request options object.
   ///
   /// - Parameters:
-  ///   - timeout The request’s timeout interval in seconds; if not specified uses the default value
-  ///   for a `URLRequest`.
-  ///   - apiVersion The API version to use in requests to the backend; defaults to "v1beta".
-  public init(timeout: TimeInterval? = nil, apiVersion: String = "v1beta") {
+  ///   - timeout: The request’s timeout interval in seconds; defaults to 300 seconds (5 minutes).
+  ///   - apiVersion: The API version to use in requests to the backend; defaults to "v1beta".
+  public init(timeout: TimeInterval = 300.0, apiVersion: String = "v1beta") {
     self.timeout = timeout
     self.apiVersion = apiVersion
   }