From a3c9479cb509407fe98900ccdaa705e1da5f3486 Mon Sep 17 00:00:00 2001 From: JosselinSomervilleRoberts Date: Fri, 3 Nov 2023 16:54:21 -0700 Subject: [PATCH] Added almost all model deployments (except palm, neurips and simple) [NOT TESTED] --- .../benchmark/model_deployment_registry.py | 5 + src/helm/config/model_deployments.yaml | 1163 ++++++++++++++++- src/helm/config/model_metadatas.yaml | 16 +- 3 files changed, 1169 insertions(+), 15 deletions(-) diff --git a/src/helm/benchmark/model_deployment_registry.py b/src/helm/benchmark/model_deployment_registry.py index 2c478e23d5..fadd44be65 100644 --- a/src/helm/benchmark/model_deployment_registry.py +++ b/src/helm/benchmark/model_deployment_registry.py @@ -63,6 +63,11 @@ class ModelDeployment: # If unset, defaults to the same value as max_sequence_length. max_request_length: Optional[int] = None + # The max length of the model input and output tokens. + # Some models (like Anthropic/Claude and Megatron) have a specific limit sequence length + max_token. + # If unset, defaults to INT_MAX (i.e. bo limit). + max_sequence_and_generated_tokens_length: Optional[int] = None + @property def host_group(self) -> str: """ diff --git a/src/helm/config/model_deployments.yaml b/src/helm/config/model_deployments.yaml index 026bcd064d..61cd20e0a0 100644 --- a/src/helm/config/model_deployments.yaml +++ b/src/helm/config/model_deployments.yaml @@ -13,6 +13,7 @@ model_deployments: - name: ai21/j1-jumbo # DEPRECATED model_name: ai21/j1-jumbo tokenizer_name: ai21/j1 + max_sequence_length: 2047 client_spec: class_name: "helm.proxy.clients.ai21_client.AI21Client" args: {} @@ -26,6 +27,7 @@ model_deployments: - name: ai21/j1-large # DEPRECATED model_name: ai21/j1-large tokenizer_name: ai21/j1 + max_sequence_length: 2047 client_spec: class_name: "helm.proxy.clients.ai21_client.AI21Client" args: {} @@ -39,6 +41,7 @@ model_deployments: - name: ai21/j1-grande # DEPRECATED model_name: ai21/j1-grande tokenizer_name: ai21/j1 + max_sequence_length: 2047 client_spec: class_name: "helm.proxy.clients.ai21_client.AI21Client" args: {} @@ -52,6 +55,7 @@ model_deployments: - name: ai21/j1-grande-v2-beta # DEPRECATED model_name: ai21/j1-grande-v2-beta tokenizer_name: ai21/j1 + max_sequence_length: 2047 client_spec: class_name: "helm.proxy.clients.ai21_client.AI21Client" args: {} @@ -65,6 +69,7 @@ model_deployments: - name: ai21/j2-jumbo model_name: ai21/j2-jumbo tokenizer_name: ai21/j1 + max_sequence_length: 6000 client_spec: class_name: "helm.proxy.clients.ai21_client.AI21Client" args: {} @@ -78,6 +83,7 @@ model_deployments: - name: ai21/j2-large model_name: ai21/j2-large tokenizer_name: ai21/j1 + max_sequence_length: 2047 client_spec: class_name: "helm.proxy.clients.ai21_client.AI21Client" args: {} @@ -91,6 +97,7 @@ model_deployments: - name: ai21/j2-grande model_name: ai21/j2-grande tokenizer_name: ai21/j1 + max_sequence_length: 2047 client_spec: class_name: "helm.proxy.clients.ai21_client.AI21Client" args: {} @@ -108,6 +115,7 @@ model_deployments: - name: AlephAlpha/luminous-base model_name: AlephAlpha/luminous-base tokenizer_name: AlephAlpha/luminous-base + max_sequence_length: 2048 client_spec: class_name: "helm.proxy.clients.aleph_alpha_client.AlephAlphaClient" args: {} @@ -118,6 +126,7 @@ model_deployments: - name: AlephAlpha/luminous-extended model_name: AlephAlpha/luminous-extended tokenizer_name: AlephAlpha/luminous-extended + max_sequence_length: 2048 client_spec: class_name: "helm.proxy.clients.aleph_alpha_client.AlephAlphaClient" args: {} @@ -128,6 +137,7 @@ model_deployments: - name: AlephAlpha/luminous-supreme model_name: AlephAlpha/luminous-supreme tokenizer_name: AlephAlpha/luminous-supreme + max_sequence_length: 2048 client_spec: class_name: "helm.proxy.clients.aleph_alpha_client.AlephAlphaClient" args: {} @@ -144,6 +154,8 @@ model_deployments: - name: anthropic/claude-v1.3 model_name: anthropic/claude-v1.3 tokenizer_name: anthropic/claude + max_sequence_length: 8000 + max_sequence_and_generated_tokens_length: 9016 client_spec: class_name: "helm.proxy.clients.anthropic_client.AnthropicClient" args: {} @@ -154,6 +166,8 @@ model_deployments: - name: anthropic/claude-instant-v1 model_name: anthropic/claude-instant-v1 tokenizer_name: anthropic/claude + max_sequence_length: 8000 + max_sequence_and_generated_tokens_length: 9016 client_spec: class_name: "helm.proxy.clients.anthropic_client.AnthropicClient" args: {} @@ -164,6 +178,8 @@ model_deployments: - name: anthropic/claude-2.0 model_name: anthropic/claude-2.0 tokenizer_name: anthropic/claude + max_sequence_length: 8000 + max_sequence_and_generated_tokens_length: 9016 client_spec: class_name: "helm.proxy.clients.anthropic_client.AnthropicClient" args: {} @@ -174,6 +190,7 @@ model_deployments: - name: anthropic/stanford-online-all-v4-s3 # Not longer served: "Connexion to remote was lost." model_name: anthropic/stanford-online-all-v4-s3 tokenizer_name: huggingface/gpt2 + max_sequence_length: 8192 client_spec: class_name: "helm.proxy.clients.anthropic_client.AnthropicLegacyClient" args: {} @@ -188,6 +205,8 @@ model_deployments: - name: cohere/xlarge-20220609 model_name: cohere/xlarge-20220609 tokenizer_name: cohere/cohere + max_sequence_length: 2047 + max_request_length: 2048 client_spec: class_name: "helm.proxy.clients.cohere_client.CohereClient" args: {} @@ -198,6 +217,8 @@ model_deployments: - name: cohere/large-20220720 model_name: cohere/large-20220720 tokenizer_name: cohere/cohere + max_sequence_length: 2047 + max_request_length: 2048 client_spec: class_name: "helm.proxy.clients.cohere_client.CohereClient" args: {} @@ -208,6 +229,8 @@ model_deployments: - name: cohere/medium-20220720 model_name: cohere/medium-20220720 tokenizer_name: cohere/cohere + max_sequence_length: 2047 + max_request_length: 2048 client_spec: class_name: "helm.proxy.clients.cohere_client.CohereClient" args: {} @@ -218,6 +241,8 @@ model_deployments: - name: cohere/small-20220720 model_name: cohere/small-20220720 tokenizer_name: cohere/cohere + max_sequence_length: 2047 + max_request_length: 2048 client_spec: class_name: "helm.proxy.clients.cohere_client.CohereClient" args: {} @@ -228,6 +253,8 @@ model_deployments: - name: cohere/xlarge-20221108 model_name: cohere/xlarge-20221108 tokenizer_name: cohere/cohere + max_sequence_length: 2047 + max_request_length: 2048 client_spec: class_name: "helm.proxy.clients.cohere_client.CohereClient" args: {} @@ -238,6 +265,8 @@ model_deployments: - name: cohere/medium-20221108 model_name: cohere/medium-20221108 tokenizer_name: cohere/cohere + max_sequence_length: 2047 + max_request_length: 2048 client_spec: class_name: "helm.proxy.clients.cohere_client.CohereClient" args: {} @@ -248,6 +277,8 @@ model_deployments: - name: cohere/command-medium-beta model_name: cohere/command-medium-beta tokenizer_name: cohere/cohere + max_sequence_length: 2019 + max_request_length: 2020 client_spec: class_name: "helm.proxy.clients.cohere_client.CohereClient" args: {} @@ -258,6 +289,8 @@ model_deployments: - name: cohere/command-xlarge-beta model_name: cohere/command-xlarge-beta tokenizer_name: cohere/cohere + max_sequence_length: 2019 + max_request_length: 2020 client_spec: class_name: "helm.proxy.clients.cohere_client.CohereClient" args: {} @@ -268,6 +301,8 @@ model_deployments: - name: cohere/command model_name: cohere/command tokenizer_name: cohere/cohere + max_sequence_length: 2019 # TODO: verify this + max_request_length: 2020 # TODO: verify this client_spec: class_name: "helm.proxy.clients.cohere_client.CohereClient" args: {} @@ -278,6 +313,8 @@ model_deployments: - name: cohere/command-light model_name: cohere/command-light tokenizer_name: cohere/cohere + max_sequence_length: 2019 # TODO: verify this + max_request_length: 2020 # TODO: verify this client_spec: class_name: "helm.proxy.clients.cohere_client.CohereClient" args: {} @@ -288,7 +325,39 @@ model_deployments: + # =========== Gooseai =========== # + + # ---------- EleutherAI ---------- # + - name: gooseai/gpt-neo-20b + model_name: eleutherai/gpt-neo-20b + tokenizer_name: EleutherAI/gpt-neox-20b + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.goose_ai_client.GooseAIClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.gptneox_window_service.GPTNeoXWindowService" + args: {} + + - name: gooseai/gpt-j-6b + model_name: eleutherai/gpt-j-6b + tokenizer_name: EleutherAI/gpt-j-6B + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.goose_ai_client.GooseAIClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.gptj_window_service.GPTJWindowService" + args: {} + # =============================== # + + + # =========== HuggingFace =========== # + + # ---------- Bigcode ---------- # - name: huggingface/santacoder model_name: bigcode/santacoder tokenizer_name: bigcode/santacoder @@ -308,14 +377,473 @@ model_deployments: window_service_spec: class_name: "helm.benchmark.window_services.starcoder_window_service.StarCoderWindowService" args: {} + + # ---------- EleutherAI ---------- # + - name: huggingface/gpt-j-6b + model_name: eleutherai/gpt-j-6b + tokenizer_name: EleutherAI/gpt-j-6B + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.huggingface_client.HuggingFaceClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.gptj_window_service.GPTJWindowService" + args: {} + + # ---------- OpenAI (HuggingFace) ---------- # + - name: huggingface/gpt2 + model_name: huggingface/gpt2 + tokenizer_name: huggingface/gpt2 + max_sequence_length: 1024 + max_request_length: 1025 + client_spec: + class_name: "helm.proxy.clients.huggingface_client.HuggingFaceClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.gpt2_window_service.GPT2WindowService" + args: {} + # =================================== # + + + + # ========== HuggingFaceM4 ========== # + - name: HuggingFaceM4/idefics-9b + model_name: HuggingFaceM4/idefics-9b + tokenizer_name: HuggingFaceM4/idefics-9b + max_sequence_length: 2048 + client_spec: + class_name: "helm.proxy.clients.vision_language.idefics_client.IDEFICSClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.huggingface_window_service.HuggingFaceWindowService" + args: {} + + - name: HuggingFaceM4/idefics-9b-instruct + model_name: HuggingFaceM4/idefics-9b-instruct + tokenizer_name: HuggingFaceM4/idefics-9b-instruct + max_sequence_length: 2048 + client_spec: + class_name: "helm.proxy.clients.vision_language.idefics_client.IDEFICSClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.huggingface_window_service.HuggingFaceWindowService" + args: {} + + - name: HuggingFaceM4/idefics-80b + model_name: HuggingFaceM4/idefics-80b + tokenizer_name: HuggingFaceM4/idefics-80b + max_sequence_length: 2048 + client_spec: + class_name: "helm.proxy.clients.vision_language.idefics_client.IDEFICSClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.huggingface_window_service.HuggingFaceWindowService" + args: {} + + - name: HuggingFaceM4/idefics-80b-instruct + model_name: HuggingFaceM4/idefics-80b-instruct + tokenizer_name: HuggingFaceM4/idefics-80b-instruct + max_sequence_length: 2048 + client_spec: + class_name: "helm.proxy.clients.vision_language.idefics_client.IDEFICSClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.huggingface_window_service.HuggingFaceWindowService" + args: {} # =================================== # + # ========== Microsoft ========== # + - name: microsoft/TNLGv2_530B + model_name: microsoft/TNLGv2_530B + tokenizer_name: huggingface/gpt2 + max_sequence_length: 2047 + max_request_length: 2048 + client_spec: + class_name: "helm.proxy.clients.microsoft_client.MicrosoftClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.mt_nlg_window_service.MTNLGWindowService" + args: {} + + - name: microsoft/TNLGv2_7B + model_name: microsoft/TNLGv2_7B + tokenizer_name: huggingface/gpt2 + max_sequence_length: 2047 + max_request_length: 2048 + client_spec: + class_name: "helm.proxy.clients.microsoft_client.MicrosoftClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.mt_nlg_window_service.MTNLGWindowService" + args: {} + # =============================== # + + + + # ========== Nvidia ========== # + - name: nvidia/megatron-gpt2 + model_name: nvidia/megatron-gpt2 + tokenizer_name: huggingface/gpt2 + max_sequence_length: 1024 + client_spec: + class_name: "helm.proxy.clients.megatron_client.MegatronClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.megatron_window_service.MegatronWindowService" + args: {} + # ============================ # + + + + # ========== OpenAI ========== # + + # ----- GPT 3 Models ----- # + # The list of models can be found here: https://beta.openai.com/docs/engines/gpt-3 + # DEPRECATED: Announced on July 06 2023 that these models will be shut down on January 04 2024. + + - name: openai/davinci # DEPRECATED + model_name: openai/davinci + tokenizer_name: huggingface/gpt2 + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.openai_client.OpenAIClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.openai_window_service.OpenAIWindowService" + args: {} + + - name: openai/curie # DEPRECATED + model_name: openai/curie + tokenizer_name: huggingface/gpt2 + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.openai_client.OpenAIClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.openai_window_service.OpenAIWindowService" + args: {} + + - name: openai/babbage # DEPRECATED + model_name: openai/babbage + tokenizer_name: huggingface/gpt2 + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.openai_client.OpenAIClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.openai_window_service.OpenAIWindowService" + args: {} + + - name: openai/ada # DEPRECATED + model_name: openai/ada + tokenizer_name: huggingface/gpt2 + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.openai_client.OpenAIClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.openai_window_service.OpenAIWindowService" + args: {} + + - name: openai/text-davinci-003 # DEPRECATED + model_name: openai/text-davinci-003 + tokenizer_name: huggingface/gpt2 + max_sequence_length: 4000 + max_request_length: 4001 + client_spec: + class_name: "helm.proxy.clients.openai_client.OpenAIClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.wider_openai_window_service.WiderOpenAIWindowService" + args: {} + + - name: openai/text-davinci-002 # DEPRECATED + model_name: openai/text-davinci-002 + tokenizer_name: huggingface/gpt2 + max_sequence_length: 4000 + max_request_length: 4001 + client_spec: + class_name: "helm.proxy.clients.openai_client.OpenAIClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.wider_openai_window_service.WiderOpenAIWindowService" + args: {} + + - name: openai/text-davinci-001 # DEPRECATED + model_name: openai/text-davinci-001 + tokenizer_name: huggingface/gpt2 + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.openai_client.OpenAIClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.openai_window_service.OpenAIWindowService" + args: {} + + - name: openai/text-curie-001 # DEPRECATED + model_name: openai/text-curie-001 + tokenizer_name: huggingface/gpt2 + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.openai_client.OpenAIClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.openai_window_service.OpenAIWindowService" + args: {} + + - name: openai/text-babbage-001 # DEPRECATED + model_name: openai/text-babbage-001 + tokenizer_name: huggingface/gpt2 + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.openai_client.OpenAIClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.openai_window_service.OpenAIWindowService" + args: {} + + - name: openai/text-ada-001 # DEPRECATED + model_name: openai/text-ada-001 + tokenizer_name: huggingface/gpt2 + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.openai_client.OpenAIClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.openai_window_service.OpenAIWindowService" + args: {} + + + # ----- GPT 3.5 Turbo Models ----- # + # ChatGPT: https://openai.com/blog/chatgpt + + # The claimed sequence length is 4096, but as of 2023-03-07, the empirical usable + # sequence length is smaller at 4087 with one user input message and one assistant + # output message because ChatGPT uses special tokens for message roles and boundaries. + # We use a rounded-down sequence length of 4000 to account for these special tokens. + - name: openai/gpt-3.5-turbo-0301 + model_name: openai/gpt-3.5-turbo-0301 + tokenizer_name: openai/cl100k_base + max_sequence_length: 4000 + max_request_length: 4001 + client_spec: + class_name: "helm.proxy.clients.openai_client.OpenAIClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.wider_openai_window_service.GPTTurboWindowService" + args: {} + + # The claimed sequence length is 4096, but as of 2023-03-07, the empirical usable + # sequence length is smaller at 4087 with one user input message and one assistant + # output message because ChatGPT uses special tokens for message roles and boundaries. + # We use a rounded-down sequence length of 4000 to account for these special tokens. + - name: openai/gpt-3.5-turbo-0613 + model_name: openai/gpt-3.5-turbo-0613 + tokenizer_name: openai/cl100k_base + max_sequence_length: 4000 + max_request_length: 4001 + client_spec: + class_name: "helm.proxy.clients.openai_client.OpenAIClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.wider_openai_window_service.GPTTurboWindowService" + args: {} + + # Claimed length is 16,384; we round down to 16,000 for the same reasons as explained + # in the openai/gpt-3.5-turbo-0613 comment + - name: openai/gpt-3.5-turbo-16k-0613 + model_name: openai/gpt-3.5-turbo-16k-0613 + tokenizer_name: openai/cl100k_base + max_sequence_length: 16000 + max_request_length: 16001 + client_spec: + class_name: "helm.proxy.clients.openai_client.OpenAIClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.wider_openai_window_service.GPTTurbo16KWindowService" + args: {} + + + # ----- GPT 4 Models ----- # + + - name: openai/gpt-4-0314 + model_name: openai/gpt-4-0314 + tokenizer_name: openai/cl100k_base + max_sequence_length: 8192 + max_request_length: 8193 + client_spec: + class_name: "helm.proxy.clients.openai_client.OpenAIClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.wider_openai_window_service.GPT4WindowService" + args: {} + + - name: openai/gpt-4-32k-0314 + model_name: openai/gpt-4-32k-0314 + tokenizer_name: openai/cl100k_base + max_sequence_length: 32768 + max_request_length: 32769 + client_spec: + class_name: "helm.proxy.clients.openai_client.OpenAIClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.wider_openai_window_service.GPT432KWindowService" + args: {} + + - name: openai/gpt-4-0613 + model_name: openai/gpt-4-0613 + tokenizer_name: openai/cl100k_base + max_sequence_length: 8192 + max_request_length: 8193 + client_spec: + class_name: "helm.proxy.clients.openai_client.OpenAIClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.wider_openai_window_service.GPT4WindowService" + args: {} + + - name: openai/gpt-4-32k-0613 + model_name: openai/gpt-4-32k-0613 + tokenizer_name: openai/cl100k_base + max_sequence_length: 32768 + max_request_length: 32769 + client_spec: + class_name: "helm.proxy.clients.openai_client.OpenAIClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.wider_openai_window_service.GPT432KWindowService" + args: {} + + + # ----- Codex Models ----- # + # DEPRECATED: Codex models have been shut down on March 23 2023. + + - name: openai/code-davinci-002 # DEPRECATED + model_name: openai/code-davinci-002 + tokenizer_name: huggingface/gpt2 + max_sequence_length: 4000 + max_request_length: 4001 + client_spec: + class_name: "helm.proxy.clients.openai_client.OpenAIClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.wider_openai_window_service.WiderOpenAIWindowService" + args: {} + + - name: openai/code-davinci-001 # DEPRECATED + model_name: openai/code-davinci-001 + tokenizer_name: huggingface/gpt2 + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.openai_client.OpenAIClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.openai_window_service.OpenAIWindowService" + args: {} + + - name: openai/code-cushman-001 # DEPRECATED + model_name: openai/code-cushman-001 + tokenizer_name: huggingface/gpt2 + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.openai_client.OpenAIClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.openai_window_service.OpenAIWindowService" + args: {} + + + # ----- Text Similarity Models ----- # + # OpenAI similarity embedding models: https://beta.openai.com/docs/guides/embeddings + # The number of parameters is guessed based on the number of parameters of the + # corresponding GPT-3 model. + # DEPRECATED: Announced on July 06 2023 that first generation embeddings models + # will be shut down on January 04 2024. + + - name: openai/text-similarity-davinci-001 # DEPRECATED + model_name: openai/text-similarity-davinci-001 + tokenizer_name: huggingface/gpt2 + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.openai_client.OpenAIClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.openai_window_service.OpenAIWindowService" + args: {} + + - name: openai/text-similarity-curie-001 # DEPRECATED + model_name: openai/text-similarity-curie-001 + tokenizer_name: huggingface/gpt2 + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.openai_client.OpenAIClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.openai_window_service.OpenAIWindowService" + args: {} + + - name: openai/text-similarity-babbage-001 # DEPRECATED + model_name: openai/text-similarity-babbage-001 + tokenizer_name: huggingface/gpt2 + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.openai_client.OpenAIClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.openai_window_service.OpenAIWindowService" + args: {} + + - name: openai/text-similarity-ada-001 # DEPRECATED + model_name: openai/text-similarity-ada-001 + tokenizer_name: huggingface/gpt2 + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.openai_client.OpenAIClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.openai_window_service.OpenAIWindowService" + args: {} + + - name: openai/text-embedding-ada-002 # DEPRECATED + model_name: openai/text-embedding-ada-002 + tokenizer_name: huggingface/gpt2 + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.openai_client.OpenAIClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.openai_window_service.OpenAIWindowService" + args: {} + # ============================ # + + + # =========== Together =========== # - - name: together/bloom # Removed from together: "bloom is not a supported model" + + # ---------- BigScience ---------- # + - name: together/bloom # DEPRECATED: Removed from together model_name: bigscience/bloom tokenizer_name: bigscience/bloom + max_sequence_length: 2048 + max_request_length: 2049 client_spec: class_name: "helm.proxy.clients.together_client.TogetherClient" args: {} @@ -323,13 +851,642 @@ model_deployments: class_name: "helm.benchmark.window_services.bloom_window_service.BloomWindowService" args: {} - - name: together/t0pp # Removed from together: "t0pp is not a supported model" + - name: together/t0pp # DEPRECATED: Removed from together model_name: bigscience/t0pp tokenizer_name: bigscience/T0pp + max_sequence_length: 1024 client_spec: class_name: "helm.proxy.clients.together_client.TogetherClient" args: {} window_service_spec: class_name: "helm.benchmark.window_services.t0pp_window_service.T0ppWindowService" args: {} - # ================================ # \ No newline at end of file + + # ---------- Databricks ---------- # + - name: together/dolly-v2-3b + model_name: databricks/dolly-v2-3b + tokenizer_name: EleutherAI/gpt-neox-20b + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.gptneox_window_service.GPTNeoXWindowService" + args: {} + + - name: together/dolly-v2-7b + model_name: databricks/dolly-v2-7b + tokenizer_name: EleutherAI/gpt-neox-20b + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.gptneox_window_service.GPTNeoXWindowService" + args: {} + + - name: together/dolly-v2-12b + model_name: databricks/dolly-v2-12b + tokenizer_name: EleutherAI/gpt-neox-20b + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.gptneox_window_service.GPTNeoXWindowService" + args: {} + + # ---------- EleutherAI ---------- # + - name: together/gpt-j-6b + model_name: eleutherai/gpt-j-6b + tokenizer_name: EleutherAI/gpt-j-6B + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.gptj_window_service.GPTJWindowService" + args: {} + + - name: together/gpt-neox-20b + model_name: eleutherai/gpt-neox-20b + tokenizer_name: EleutherAI/gpt-neox-20b + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.gptneox_window_service.GPTNeoXWindowService" + args: {} + + - name: together/pythia-1b-v0 + model_name: eleutherai/pythia-1b-v0 + tokenizer_name: EleutherAI/gpt-neox-20b + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.gptneox_window_service.GPTNeoXWindowService" + args: {} + + - name: together/pythia-2.8b-v0 + model_name: eleutherai/pythia-2.8b-v0 + tokenizer_name: EleutherAI/gpt-neox-20b + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.gptneox_window_service.GPTNeoXWindowService" + args: {} + + - name: together/pythia-6.9b + model_name: eleutherai/pythia-6.9b + tokenizer_name: EleutherAI/gpt-neox-20b + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.gptneox_window_service.GPTNeoXWindowService" + args: {} + + - name: together/pythia-12b-v0 + model_name: eleutherai/pythia-12b-v0 + tokenizer_name: EleutherAI/gpt-neox-20b + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.gptneox_window_service.GPTNeoXWindowService" + args: {} + + # ---------- Google ---------- # + - name: together/t5-11b + model_name: google/t5-11b + tokenizer_name: google/t5-11b + max_sequence_length: 511 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.t511b_window_service.T511bWindowService" + args: {} + + - name: together/flan-t5-xxl + model_name: google/flan-t5-xxl + tokenizer_name: google/flan-t5-xxl + max_sequence_length: 511 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.flan_t5_window_service.FlanT5WindowService" + args: {} + + - name: together/ul2 + model_name: google/ul2 + tokenizer_name: google/ul2 + max_sequence_length: 511 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.ul2_window_service.UL2WindowService" + args: {} + + # ---------- HazyResearch ---------- # + - name: together/h3-2.7b + model_name: hazyresearch/h3-2.7b + tokenizer_name: huggingface/gpt2 + max_sequence_length: 1024 + max_request_length: 1025 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.gpt2_window_service.GPT2WindowService" + args: {} + + # ---------- Lmsys ---------- # + - name: together/vicuna-7b-v1.3 + model_name: lmsys/vicuna-7b-v1.3 + tokenizer_name: hf-internal-testing/llama-tokenizer + max_sequence_length: 2048 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.llama_window_service.LlamaWindowService" + args: {} + + - name: together/vicuna-13b-v1.3 + model_name: lmsys/vicuna-13b-v1.3 + tokenizer_name: hf-internal-testing/llama-tokenizer + max_sequence_length: 2048 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.llama_window_service.LlamaWindowService" + args: {} + + # ---------- Meta ---------- # + - name: together/llama-7b + model_name: meta/llama-7b + tokenizer_name: hf-internal-testing/llama-tokenizer + max_sequence_length: 2048 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.llama_window_service.LlamaWindowService" + args: {} + + - name: together/llama-13b + model_name: meta/llama-13b + tokenizer_name: hf-internal-testing/llama-tokenizer + max_sequence_length: 2048 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.llama_window_service.LlamaWindowService" + args: {} + + - name: together/llama-30b + model_name: meta/llama-30b + tokenizer_name: hf-internal-testing/llama-tokenizer + max_sequence_length: 2048 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.llama_window_service.LlamaWindowService" + args: {} + + - name: together/llama-65b + model_name: meta/llama-65b + tokenizer_name: hf-internal-testing/llama-tokenizer + max_sequence_length: 2048 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.llama_window_service.LlamaWindowService" + args: {} + + - name: together/llama-2-7b + model_name: meta/llama-2-7b + tokenizer_name: meta-llama/Llama-2-7b-hf + max_sequence_length: 4096 + max_request_length: 1000000000000000019884624838656 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.llama_window_service.Llama2WindowService" + args: {} + + - name: together/llama-2-13b + model_name: meta/llama-2-13b + tokenizer_name: meta-llama/Llama-2-7b-hf + max_sequence_length: 4096 + max_request_length: 1000000000000000019884624838656 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.llama_window_service.Llama2WindowService" + args: {} + + - name: together/llama-2-70b + model_name: meta/llama-2-70b + tokenizer_name: meta-llama/Llama-2-7b-hf + max_sequence_length: 4096 + max_request_length: 1000000000000000019884624838656 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.llama_window_service.Llama2WindowService" + args: {} + + - name: together/opt-175b + model_name: meta/opt-175b + tokenizer_name: facebook/opt-66b + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.opt_window_service.OPTWindowService" + args: {} + + - name: together/opt-66b + model_name: meta/opt-66b + tokenizer_name: facebook/opt-66b + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.opt_window_service.OPTWindowService" + args: {} + + - name: together/opt-6.7b + model_name: meta/opt-6.7b + tokenizer_name: facebook/opt-66b + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.opt_window_service.OPTWindowService" + args: {} + + - name: together/opt-1.3b + model_name: meta/opt-1.3b + tokenizer_name: facebook/opt-66b + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.opt_window_service.OPTWindowService" + args: {} + + # ----------- MistralAI ----------- # + - name: together/mistral-7b-v0.1 + model_name: mistralai/mistral-7b-v0.1 + tokenizer_name: mistralai/Mistral-7B-v0.1 + max_sequence_length: 1000000000000000019884624838656 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.huggingface_window_service.HuggingFaceWindowService" + args: {} + + # ----------- MosaicML ----------- # + - name: together/mpt-7b + model_name: mosaicml/mpt-7b + tokenizer_name: EleutherAI/gpt-neox-20b + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.gptneox_window_service.GPTNeoXWindowService" + args: {} + + - name: together/mpt-instruct-7b + model_name: mosaicml/mpt-instruct-7b + tokenizer_name: EleutherAI/gpt-neox-20b + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.gptneox_window_service.GPTNeoXWindowService" + args: {} + + - name: together/mpt-30b + model_name: mosaicml/mpt-30b + tokenizer_name: EleutherAI/gpt-neox-20b + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.gptneox_window_service.GPTNeoXWindowService" + args: {} + + - name: together/mpt-instruct-30b + model_name: mosaicml/mpt-instruct-30b + tokenizer_name: EleutherAI/gpt-neox-20b + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.gptneox_window_service.GPTNeoXWindowService" + args: {} + + # ----------- StabilityAI ----------- # + - name: together/stablelm-base-alpha-3b + model_name: stabilityai/stablelm-base-alpha-3b + tokenizer_name: EleutherAI/gpt-neox-20b + max_sequence_length: 4096 + max_request_length: 4097 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.gptneox_window_service.StableLMAlphaWindowService" + args: {} + + - name: together/stablelm-base-alpha-7b + model_name: stabilityai/stablelm-base-alpha-7b + tokenizer_name: EleutherAI/gpt-neox-20b + max_sequence_length: 4096 + max_request_length: 4097 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.gptneox_window_service.StableLMAlphaWindowService" + args: {} + + # ----------- Stanford ----------- # + - name: together/alpaca-7b + model_name: stanford/alpaca-7b + tokenizer_name: hf-internal-testing/llama-tokenizer + max_sequence_length: 2048 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.llama_window_service.LlamaWindowService" + args: {} + + # ----------- Tiiuae ----------- # + - name: together/falcon-7b + model_name: tiiuae/falcon-7b + tokenizer_name: tiiuae/falcon-7b + max_sequence_length: 2048 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.huggingface_window_service.HuggingFaceWindowService" + args: {} + + - name: together/falcon-7b-instruct + model_name: tiiuae/falcon-7b-instruct + tokenizer_name: tiiuae/falcon-7b + max_sequence_length: 2048 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.huggingface_window_service.HuggingFaceWindowService" + args: {} + + - name: together/falcon-40b + model_name: tiiuae/falcon-40b + tokenizer_name: tiiuae/falcon-7b + max_sequence_length: 2048 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.huggingface_window_service.HuggingFaceWindowService" + args: {} + + - name: together/falcon-40b-instruct + model_name: tiiuae/falcon-40b-instruct + tokenizer_name: tiiuae/falcon-7b + max_sequence_length: 2048 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.huggingface_window_service.HuggingFaceWindowService" + args: {} + + # ---------- Together ---------- # + # These are models fine-tuned by Together (and not simply hosted by Together). + - name: together/gpt-jt-6b-v1 + model_name: together/gpt-jt-6b-v1 + tokenizer_name: EleutherAI/gpt-j-6B + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.gptj_window_service.GPTJWindowService" + args: {} + + - name: together/gpt-neoxt-chat-base-20b + model_name: together/gpt-neoxt-chat-base-20b + tokenizer_name: EleutherAI/gpt-neox-20b + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.gptneox_window_service.GPTNeoXWindowService" + args: {} + + - name: together/redpajama-incite-base-3b-v1 + model_name: together/redpajama-incite-base-3b-v1 + tokenizer_name: EleutherAI/gpt-neox-20b + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.gptneox_window_service.GPTNeoXWindowService" + args: {} + + - name: together/redpajama-incite-instruct-3b-v1 + model_name: together/redpajama-incite-instruct-3b-v1 + tokenizer_name: EleutherAI/gpt-neox-20b + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.gptneox_window_service.GPTNeoXWindowService" + args: {} + + - name: together/redpajama-incite-base-7b + model_name: together/redpajama-incite-base-7b + tokenizer_name: EleutherAI/gpt-neox-20b + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.gptneox_window_service.GPTNeoXWindowService" + args: {} + + - name: together/redpajama-incite-instruct-7b + model_name: together/redpajama-incite-instruct-7b + tokenizer_name: EleutherAI/gpt-neox-20b + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.gptneox_window_service.GPTNeoXWindowService" + args: {} + + # ---------- Tsinghua ---------- # + - name: together/glm + model_name: tsinghua/glm + tokenizer_name: TsinghuaKEG/ice + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.ice_window_service.ICEWindowService" + args: {} + + # ---------- Yandex ---------- # + - name: together/yalm + model_name: yandex/yalm + tokenizer_name: Yandex/yalm + max_sequence_length: 2048 + max_request_length: 2049 + client_spec: + class_name: "helm.proxy.clients.together_client.TogetherClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.yalm_window_service.YaLMWindowService" + args: {} + # ================================ # + + + + # ========== Writer ========== # + - name: writer/palmyra-base + model_name: writer/palmyra-base + tokenizer_name: huggingface/gpt2 + max_sequence_length: 2048 + max_sequence_and_generated_tokens_length: 2048 + client_spec: + class_name: "helm.proxy.clients.palmyra_client.PalmyraClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.palmyra_window_service.PalmyraWindowService" + args: {} + + - name: writer/palmyra-large + model_name: writer/palmyra-large + tokenizer_name: huggingface/gpt2 + max_sequence_length: 2048 + max_sequence_and_generated_tokens_length: 2048 + client_spec: + class_name: "helm.proxy.clients.palmyra_client.PalmyraClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.palmyra_window_service.PalmyraWindowService" + args: {} + + - name: writer/palmyra-instruct-30 + model_name: writer/palmyra-instruct-30 + tokenizer_name: huggingface/gpt2 + max_sequence_length: 2048 + max_sequence_and_generated_tokens_length: 2048 + client_spec: + class_name: "helm.proxy.clients.palmyra_client.PalmyraClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.palmyra_window_service.PalmyraWindowService" + args: {} + + - name: writer/palmyra-e + model_name: writer/palmyra-e + tokenizer_name: huggingface/gpt2 + max_sequence_length: 2048 + max_sequence_and_generated_tokens_length: 2048 + client_spec: + class_name: "helm.proxy.clients.palmyra_client.PalmyraClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.palmyra_window_service.PalmyraWindowService" + args: {} + + - name: writer/silk-road + model_name: writer/silk-road + tokenizer_name: huggingface/gpt2 + max_sequence_length: 8192 + max_sequence_and_generated_tokens_length: 8192 + client_spec: + class_name: "helm.proxy.clients.palmyra_client.PalmyraClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.palmyra_window_service.LongerPalmyraWindowService" + args: {} + + - name: writer/palmyra-x + model_name: writer/palmyra-x + tokenizer_name: huggingface/gpt2 + max_sequence_length: 8192 + max_sequence_and_generated_tokens_length: 8192 + client_spec: + class_name: "helm.proxy.clients.palmyra_client.PalmyraClient" + args: {} + window_service_spec: + class_name: "helm.benchmark.window_services.palmyra_window_service.LongerPalmyraWindowService" + args: {} + # ============================ # \ No newline at end of file diff --git a/src/helm/config/model_metadatas.yaml b/src/helm/config/model_metadatas.yaml index 722ba7ab76..332b5ab0e1 100644 --- a/src/helm/config/model_metadatas.yaml +++ b/src/helm/config/model_metadatas.yaml @@ -414,7 +414,7 @@ models: # =========== EleutherAI =========== # - - name: eleutherai/gpt-j-6b + - name: eleutherai/gpt-j-6b # Served by GooseAi, HuggingFace and Together. display_name: GPT-J (6B) description: GPT-J (6B parameters) autoregressive language model trained on The Pile ([details](https://arankomatsuzaki.wordpress.com/2021/06/04/gpt-j/)). creator_organization_name: EleutherAI @@ -424,7 +424,7 @@ models: # TODO: The BUGGY_TEMP_0_TAG is a deployment related tag (Together). tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, BUGGY_TEMP_0_TAG] - - name: eleutherai/gpt-neox-20b + - name: eleutherai/gpt-neox-20b # Served by GooseAi and Together. display_name: GPT-NeoX (20B) description: GPT-NeoX (20B parameters) autoregressive language model trained on The Pile ([paper](https://arxiv.org/pdf/2204.06745.pdf)). creator_organization_name: EleutherAI @@ -810,7 +810,7 @@ models: # =========== OpenAI =========== # # ----- GPT 3 Models ----- # - # The list fo models can be found here: https://beta.openai.com/docs/engines/gpt-3 + # The list of models can be found here: https://beta.openai.com/docs/engines/gpt-3 # DEPRECATED: Announced on July 06 2023 that these models will be shut down on January 04 2024. - name: openai/davinci # DEPRECATED @@ -908,11 +908,7 @@ models: # ----- GPT 3.5 Turbo Models ----- # # ChatGPT: https://openai.com/blog/chatgpt - - # The claimed sequence length is 4096, but as of 2023-03-07, the empirical usable - # sequence length is smaller at 4087 with one user input message and one assistant - # output message because ChatGPT uses special tokens for message roles and boundaries. - # We use a rounded-down sequence length of 4000 to account for these special tokens. + - name: openai/gpt-3.5-turbo-0301 display_name: gpt-3.5-turbo-0301 description: Sibling model of text-davinci-003 is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-03-01. @@ -921,10 +917,6 @@ models: release_date: 2023-03-01 tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG] - # The claimed sequence length is 4096, but as of 2023-03-07, the empirical usable - # sequence length is smaller at 4087 with one user input message and one assistant - # output message because ChatGPT uses special tokens for message roles and boundaries. - # We use a rounded-down sequence length of 4000 to account for these special tokens. - name: openai/gpt-3.5-turbo-0613 display_name: gpt-3.5-turbo-0613 description: Sibling model of text-davinci-003 is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-06-13.