document VLM

huggingface · Sep 20, 2024 · 94ac490 · 94ac490
1 parent e0d39b7
commit 94ac490
Show file tree

Hide file tree

Showing 2 changed files with 81 additions and 12 deletions.
diff --git a/src/huggingface_hub/inference/_client.py b/src/huggingface_hub/inference/_client.py
@@ -453,7 +453,7 @@ def automatic_speech_recognition(
     @overload
     def chat_completion(  # type: ignore
         self,
-        messages: List[Dict[str, str]],
+        messages: List[Dict],
         *,
         model: Optional[str] = None,
         stream: Literal[False] = False,
@@ -478,7 +478,7 @@ def chat_completion(  # type: ignore
     @overload
     def chat_completion(  # type: ignore
         self,
-        messages: List[Dict[str, str]],
+        messages: List[Dict],
         *,
         model: Optional[str] = None,
         stream: Literal[True] = True,
@@ -503,7 +503,7 @@ def chat_completion(  # type: ignore
     @overload
     def chat_completion(
         self,
-        messages: List[Dict[str, str]],
+        messages: List[Dict],
         *,
         model: Optional[str] = None,
         stream: bool = False,
@@ -527,7 +527,7 @@ def chat_completion(
 
     def chat_completion(
         self,
-        messages: List[Dict[str, str]],
+        messages: List[Dict],
         *,
         model: Optional[str] = None,
         stream: bool = False,
@@ -562,7 +562,7 @@ def chat_completion(
         </Tip>
 
         Args:
-            messages (List[Union[`SystemMessage`, `UserMessage`, `AssistantMessage`]]):
+            messages (List of [`ChatCompletionInputMessage`]):
                 Conversation history consisting of roles and content pairs.
             model (`str`, *optional*):
                 The model to use for chat-completion. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
@@ -665,7 +665,7 @@ def chat_completion(
         )
         ```
 
-        Example (stream=True):
+        Example using streaming:
         ```py
         >>> from huggingface_hub import InferenceClient
         >>> messages = [{"role": "user", "content": "What is the capital of France?"}]
@@ -703,6 +703,40 @@ def chat_completion(
             print(chunk.choices[0].delta.content)
         ```
 
+        Example using Image + Text as input:
+        ```py
+        >>> from huggingface_hub import InferenceClient
+
+        # provide a remote URL
+        >>> image_url ="https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+        # or a base64-encoded image
+        >>> image_path = "/path/to/image.jpeg"
+        >>> with open(image_path, "rb") as f:
+        ...     base64_image = base64.b64encode(f.read()).decode("utf-8")
+        >>> image_url = f"data:image/jpeg;base64,{base64_image}"
+
+        >>> client = InferenceClient("mistralai/Mistral-Nemo-Instruct-2407")
+        >>> output = client.chat.completions.create(
+        ...     messages=[
+        ...         {
+        ...             "role": "user",
+        ...             "content": [
+        ...                 {
+        ...                     "type": "image_url",
+        ...                     "image_url": {"url": image_url},
+        ...                 },
+        ...                 {
+        ...                     "type": "text",
+        ...                     "text": "Describe this image in one sentence.",
+        ...                 },
+        ...             ],
+        ...         },
+        ...     ],
+        ... )
+        >>> output
+        A determine figure of Lady Liberty stands tall, holding a torch aloft, atop a pedestal on an island.
+        ```
+
         Example using tools:
         ```py
         >>> client = InferenceClient("meta-llama/Meta-Llama-3-70B-Instruct")

diff --git a/src/huggingface_hub/inference/_generated/_async_client.py b/src/huggingface_hub/inference/_generated/_async_client.py
@@ -488,7 +488,7 @@ async def automatic_speech_recognition(
     @overload
     async def chat_completion(  # type: ignore
         self,
-        messages: List[Dict[str, str]],
+        messages: List[Dict],
         *,
         model: Optional[str] = None,
         stream: Literal[False] = False,
@@ -513,7 +513,7 @@ async def chat_completion(  # type: ignore
     @overload
     async def chat_completion(  # type: ignore
         self,
-        messages: List[Dict[str, str]],
+        messages: List[Dict],
         *,
         model: Optional[str] = None,
         stream: Literal[True] = True,
@@ -538,7 +538,7 @@ async def chat_completion(  # type: ignore
     @overload
     async def chat_completion(
         self,
-        messages: List[Dict[str, str]],
+        messages: List[Dict],
         *,
         model: Optional[str] = None,
         stream: bool = False,
@@ -562,7 +562,7 @@ async def chat_completion(
 
     async def chat_completion(
         self,
-        messages: List[Dict[str, str]],
+        messages: List[Dict],
         *,
         model: Optional[str] = None,
         stream: bool = False,
@@ -597,7 +597,7 @@ async def chat_completion(
         </Tip>
 
         Args:
-            messages (List[Union[`SystemMessage`, `UserMessage`, `AssistantMessage`]]):
+            messages (List of [`ChatCompletionInputMessage`]):
                 Conversation history consisting of roles and content pairs.
             model (`str`, *optional*):
                 The model to use for chat-completion. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
@@ -701,7 +701,7 @@ async def chat_completion(
         )
         ```
 
-        Example (stream=True):
+        Example using streaming:
         ```py
         # Must be run in an async context
         >>> from huggingface_hub import AsyncInferenceClient
@@ -741,6 +741,41 @@ async def chat_completion(
             print(chunk.choices[0].delta.content)
         ```
 
+        Example using Image + Text as input:
+        ```py
+        # Must be run in an async context
+        >>> from huggingface_hub import AsyncInferenceClient
+
+        # provide a remote URL
+        >>> image_url ="https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+        # or a base64-encoded image
+        >>> image_path = "/path/to/image.jpeg"
+        >>> with open(image_path, "rb") as f:
+        ...     base64_image = base64.b64encode(f.read()).decode("utf-8")
+        >>> image_url = f"data:image/jpeg;base64,{base64_image}"
+
+        >>> client = AsyncInferenceClient("mistralai/Mistral-Nemo-Instruct-2407")
+        >>> output = await client.chat.completions.create(
+        ...     messages=[
+        ...         {
+        ...             "role": "user",
+        ...             "content": [
+        ...                 {
+        ...                     "type": "image_url",
+        ...                     "image_url": {"url": image_url},
+        ...                 },
+        ...                 {
+        ...                     "type": "text",
+        ...                     "text": "Describe this image in one sentence.",
+        ...                 },
+        ...             ],
+        ...         },
+        ...     ],
+        ... )
+        >>> output
+        A determine figure of Lady Liberty stands tall, holding a torch aloft, atop a pedestal on an island.
+        ```
+
         Example using tools:
         ```py
         # Must be run in an async context