updated docs

unifyai · Sep 3, 2024 · 9780f9b · 9780f9b
1 parent 64ecce5
commit 9780f9b
Show file tree

Hide file tree

Showing 9 changed files with 198 additions and 141 deletions.
diff --git a/api-reference/llm_queries/chat_completions.mdx b/api-reference/llm_queries/chat_completions.mdx
@@ -182,8 +182,8 @@ curl --request POST \
     "response_format": "{ \"type\": \"json_mode\"}",
     "seed": 11,
     "stream_options": [
-        true,
-        "include_usage"
+        "include_usage",
+        true
     ],
     "top_p": 0.5,
     "tool_choice": "{\"type\": \"function\", \"function\": {\"name\": \"my_function\"}}",
@@ -203,7 +203,7 @@ url = "https://api.unify.ai/v0/chat/completions"
 
 headers = {"Authorization": "Bearer <token>"}
 
-json_input = {"messages": [{"content": "Tell me a joke", "role": "user"}], "model": "gpt-4o-mini@openai", "max_tokens": 1024, "stop": ["The End.", " is the answer."], "stream": False, "temperature": 0.9, "frequency_penalty": 1.5, "logit_bias": {"0": 10, "1": -75, "2": 90}, "logprobs": False, "top_logprobs": 15, "n": 15, "presence_penalty": -1.1, "response_format": "{ "type": "json_mode"}", "seed": 11, "stream_options": [True, "include_usage"], "top_p": 0.5, "tool_choice": "{"type": "function", "function": {"name": "my_function"}}", "parallel_tool_calls": True, "user": "some_user", "signature": "python", "use_custom_keys": True, "tags": True, "drop_params": True}
+json_input = {"messages": [{"content": "Tell me a joke", "role": "user"}], "model": "gpt-4o-mini@openai", "max_tokens": 1024, "stop": ["The End.", " is the answer."], "stream": False, "temperature": 0.9, "frequency_penalty": 1.5, "logit_bias": {"0": 10, "1": -75, "2": 90}, "logprobs": False, "top_logprobs": 15, "n": 15, "presence_penalty": -1.1, "response_format": "{ "type": "json_mode"}", "seed": 11, "stream_options": ["include_usage", True], "top_p": 0.5, "tool_choice": "{"type": "function", "function": {"name": "my_function"}}", "parallel_tool_calls": True, "user": "some_user", "signature": "python", "use_custom_keys": True, "tags": True, "drop_params": True}
 
 response = requests.request("POST", url, json=json_input, headers=headers)
 

diff --git a/api-reference/openapi.json b/api-reference/openapi.json
@@ -3389,8 +3389,8 @@
                         "title": "Stream Options",
                         "description": "Options for streaming response. Only set this when you set `stream: true`.",
                         "example": [
-                            true,
-                            "include_usage"
+                            "include_usage",
+                            true
                         ]
                     },
                     "top_p": {

diff --git a/python/queries/chat.mdx b/python/queries/chat.mdx
@@ -37,8 +37,9 @@ Initializes the ChatBot object.
 
 - `provider` - Name of the provider. If None, endpoint must be provided.
 
-- `api_key` - API key for accessing the Unify API. If None, it attempts to retrieve the API key from the
-  environment variable UNIFY_KEY. Defaults to None.
+- `api_key` - API key for accessing the Unify API. If None, it attempts to
+  retrieve the API key from the environment variable UNIFY_KEY.
+  Defaults to None.
 
 
 **Raises**:

diff --git a/python/queries/clients.mdx b/python/queries/clients.mdx
@@ -59,100 +59,120 @@ Generate content using the Unify API.
 
 - `system_prompt` - An optional string containing the system prompt.
 
-- `messages` - A list of messages comprising the conversation so far. If provided, user_prompt must be None.
+- `messages` - A list of messages comprising the conversation so far.
+  If provided, user_prompt must be None.
 
-- `max_tokens` - The maximum number of tokens that can be generated in the chat completion.
-  The total length of input tokens and generated tokens is limited by the model's context length.
-  Defaults to the provider's default max_tokens when the value is None.
+- `max_tokens` - The maximum number of tokens that can be generated in the chat
+  completion. The total length of input tokens and generated tokens is limited
+  by the model's context length. Defaults to the provider's default max_tokens
+  when the value is None.
 
 - `stop` - Up to 4 sequences where the API will stop generating further tokens.
 
-- `stream` - If True, generates content as a stream. If False, generates content as a single response.
-  Defaults to False.
+- `stream` - If True, generates content as a stream. If False, generates content
+  as a single response. Defaults to False.
 
 - `temperature` - What sampling temperature to use, between 0 and 2.
   Higher values like 0.8 will make the output more random,
   while lower values like 0.2 will make it more focused and deterministic.
   It is generally recommended to alter this or top_p, but not both.
   Defaults to the provider's default max_tokens when the value is None.
 
-- `frequency_penalty` - Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing
-  frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
+- `frequency_penalty` - Number between -2.0 and 2.0. Positive values penalize new
+  tokens based on their existing frequency in the text so far, decreasing the
+  model's likelihood to repeat the same line verbatim.
 
-- `logit_bias` - Modify the likelihood of specified tokens appearing in the completion.
-  Accepts a JSON object that maps tokens (specified by their token ID in the tokenizer) to an associated bias
-  value from -100 to 100. Mathematically, the bias is added to the logits generated by the model prior to
-  sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase
-  likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the
-  relevant token.
+- `logit_bias` - Modify the likelihood of specified tokens appearing in the
+  completion. Accepts a JSON object that maps tokens (specified by their token
+  ID in the tokenizer) to an associated bias value from -100 to 100.
+  Mathematically, the bias is added to the logits generated by the model prior
+  to sampling. The exact effect will vary per model, but values between -1 and
+  1 should decrease or increase likelihood of selection; values like -100 or
+  100 should result in a ban or exclusive selection of the relevant token.
 
-- `logprobs` - Whether to return log probabilities of the output tokens or not. If true, returns the log
-  probabilities of each output token returned in the content of message.
+- `logprobs` - Whether to return log probabilities of the output tokens or not.
+  If true, returns the log probabilities of each output token returned in the
+  content of message.
 
-- `top_logprobs` - An integer between 0 and 20 specifying the number of most likely tokens to return at each
-  token position, each with an associated log probability. logprobs must be set to true if this parameter
-  is used.
+- `top_logprobs` - An integer between 0 and 20 specifying the number of most
+  likely tokens to return at each token position, each with an associated log
+  probability. logprobs must be set to true if this parameter is used.
 
-- `n` - How many chat completion choices to generate for each input message. Note that you will be charged based
-  on the number of generated tokens across all of the choices. Keep n as 1 to minimize costs.
+- `n` - How many chat completion choices to generate for each input message. Note
+  that you will be charged based on the number of generated tokens across all
+  of the choices. Keep n as 1 to minimize costs.
 
-- `presence_penalty` - Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they
-  appear in the text so far, increasing the model's likelihood to talk about new topics.
+- `presence_penalty` - Number between -2.0 and 2.0. Positive values penalize new
+  tokens based on whether they appear in the text so far, increasing the
+  model's likelihood to talk about new topics.
 
 - `response_format` - An object specifying the format that the model must output.
-  Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured Outputs which ensures the
-  model will match your supplied JSON schema. Learn more in the Structured Outputs guide.
-  Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is
-  valid JSON.
-
-- `seed` - If specified, a best effort attempt is made to sample deterministically, such that
-  repeated requests with the same seed and parameters should return the same result. Determinism is not
-  guaranteed, and you should refer to the system_fingerprint response parameter to monitor changes in the
-  backend.
-
-- `stream_options` - Options for streaming response. Only set this when you set stream: true.
-
-- `top_p` - An alternative to sampling with temperature, called nucleus sampling, where the
-  model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens
-  comprising the top 10% probability mass are considered. Generally recommended to alter this or temperature,
-  but not both.
-
-- `tools` - A list of tools the model may call. Currently, only
-  functions are supported as a tool. Use this to provide a list of functions the model may generate JSON
-  inputs for. A max of 128 functions are supported.
+  Setting to `{ "type": "json_schema", "json_schema": {...} }` enables
+  Structured Outputs which ensures the model will match your supplied JSON
+  schema. Learn more in the Structured Outputs guide. Setting to
+  `{ "type": "json_object" }` enables JSON mode, which ensures the message the
+  model generates is valid JSON.
+
+- `seed` - If specified, a best effort attempt is made to sample
+  deterministically, such that repeated requests with the same seed and
+  parameters should return the same result. Determinism is not guaranteed, and
+  you should refer to the system_fingerprint response parameter to monitor
+  changes in the backend.
+
+- `stream_options` - Options for streaming response. Only set this when you set
+- `stream` - true.
+
+- `top_p` - An alternative to sampling with temperature, called nucleus sampling,
+  where the model considers the results of the tokens with top_p probability
+  mass. So 0.1 means only the tokens comprising the top 10% probability mass
+  are considered. Generally recommended to alter this or temperature, but not
+  both.
+
+- `tools` - A list of tools the model may call. Currently, only functions are
+  supported as a tool. Use this to provide a list of functions the model may
+  generate JSON inputs for. A max of 128 functions are supported.
 
 - `tool_choice` - Controls which (if any) tool is called by the
-  model. none means the model will not call any tool and instead generates a message. auto means the model can
-  pick between generating a message or calling one or more tools. required means the model must call one or
-  more tools. Specifying a particular tool via `{"type": "function", "function": {"name": "my_function"}}`
-  forces the model to call that tool.
-  none is the default when no tools are present. auto is the default if tools are present.
+  model. none means the model will not call any tool and instead generates a
+  message. auto means the model can pick between generating a message or
+  calling one or more tools. required means the model must call one or more
+  tools. Specifying a particular tool via
+- ``{"type"` - "function", "function": {"name": "my_function"}}` forces the model
+  to call that tool.
+  none is the default when no tools are present. auto is the default if tools
+  are present.
 
-- `parallel_tool_calls` - Whether to enable parallel function calling during tool use.
+- `parallel_tool_calls` - Whether to enable parallel function calling during tool
+  use.
 
-- `use_custom_keys` - Whether to use custom API keys or our unified API keys with the backend provider.
+- `use_custom_keys` - Whether to use custom API keys or our unified API keys
+  with the backend provider.
 
-- `tags` - Arbitrary number of tags to classify this API query as needed. Helpful for
-  generally grouping queries across tasks and users, for logging purposes.
+- `tags` - Arbitrary number of tags to classify this API query as needed. Helpful
+  for generally grouping queries across tasks and users, for logging purposes.
 
 - `message_content_only` - If True, only return the message content
-  chat_completion.choices[0].message.content.strip(" ") from the OpenAI return.
-  Otherwise, the full response chat_completion is returned.
+  chat_completion.choices[0].message.content.strip(" ") from the OpenAI
+  return. Otherwise, the full response chat_completion is returned.
   Defaults to True.
 
-- `cache` - If True, then the arguments will be stored in a local cache file, and any future calls with
-  identical arguments will read from the cache instead of running the LLM query. This can help to
-  save costs and also debug multi-step LLM applications, while keeping early steps fixed.
+- `cache` - If True, then the arguments will be stored in a local cache file, and
+  any future calls with identical arguments will read from the cache instead
+  of running the LLM query. This can help to save costs and also debug
+  multi-step LLM applications, while keeping early steps fixed.
   This argument only has any effect when stream=False.
 
-- `extra_headers` - Additional "passthrough" headers for the request which are provider-specific, and are not
-  part of the OpenAI standard. They are handled by the provider-specific API.
+- `extra_headers` - Additional "passthrough" headers for the request which are
+  provider-specific, and are not part of the OpenAI standard. They are handled
+  by the provider-specific API.
 
-- `extra_query` - Additional "passthrough" query parameters for the request which are provider-specific, and are
-  not part of the OpenAI standard. They are handled by the provider-specific API.
+- `extra_query` - Additional "passthrough" query parameters for the request which
+  are provider-specific, and are not part of the OpenAI standard. They are
+  handled by the provider-specific API.
 
-- `kwargs` - Additional "passthrough" JSON properties for the body of the request, which are provider-specific,
-  and are not part of the OpenAI standard. They will be handled by the provider-specific API.
+- `kwargs` - Additional "passthrough" JSON properties for the body of the
+  request, which are provider-specific, and are not part of the OpenAI
+  standard. They will be handled by the provider-specific API.
 
 
 **Returns**:
@@ -328,7 +348,8 @@ Get the remaining credits left on your account.
 class Unify(Client)
 ```
 
-Class for interacting with the Unify chat completions endpoint in a synchronous manner.
+Class for interacting with the Unify chat completions endpoint in a synchronous
+manner.
 
 <a id="queries.clients.AsyncUnify"></a>
 
@@ -338,6 +359,7 @@ Class for interacting with the Unify chat completions endpoint in a synchronous
 class AsyncUnify(Client)
 ```
 
-Class for interacting with the Unify chat completions endpoint in a synchronous manner.
+Class for interacting with the Unify chat completions endpoint in a synchronous
+manner.
 
 <a id="queries.chat"></a>