diff --git a/src/helm/benchmark/model_deployment_registry.py b/src/helm/benchmark/model_deployment_registry.py index a9daf4f2fa..a3b4720362 100644 --- a/src/helm/benchmark/model_deployment_registry.py +++ b/src/helm/benchmark/model_deployment_registry.py @@ -1,7 +1,6 @@ import os from typing import Dict, Optional, List from dataclasses import dataclass -from datetime import date import cattrs import yaml @@ -10,10 +9,7 @@ from helm.common.object_spec import ObjectSpec from helm.benchmark.model_metadata_registry import ( ModelMetadata, - register_model_metadata, get_model_metadata, - TEXT_MODEL_TAG, - FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ) @@ -100,7 +96,7 @@ class ModelDeployments: # ===================== REGISTRATION FUNCTIONS ==================== # def register_model_deployment(model_deployment: ModelDeployment) -> None: - hlog(f"Registered model deployment {model_deployment.name}") + # hlog(f"Registered model deployment {model_deployment.name}") DEPLOYMENT_NAME_TO_MODEL_DEPLOYMENT[model_deployment.name] = model_deployment ALL_MODEL_DEPLOYMENTS.append(model_deployment) @@ -114,21 +110,7 @@ def register_model_deployment(model_deployment: ModelDeployment) -> None: model_metadata.deployment_names = [] model_metadata.deployment_names.append(model_deployment.name) except ValueError: - # No model metadata exists for this model name. - # Register a default model metadata. - model_metadata = ModelMetadata( - name=model_name, - display_name=model_name, - description="", - access="limited", - num_parameters=-1, - release_date=date.today(), - creator_organization_name="unknown", - tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG], - deployment_names=[model_deployment.name], - ) - register_model_metadata(model_metadata) - hlog(f"Registered default metadata for model {model_name}") + raise ValueError(f"Model deployment {model_deployment.name} has no corresponding model metadata") def register_model_deployments_from_path(path: str) -> None: @@ -157,6 +139,26 @@ def get_model_deployment(name: str) -> ModelDeployment: return deployment +# TODO: Remove when we no longer want to offer backwards compatibility for model names +# that are now moved to model deployments (PR #1903). +def get_deployment_name_from_model_arg(model_arg: str) -> str: + if model_arg in DEPLOYMENT_NAME_TO_MODEL_DEPLOYMENT: + return model_arg + + if model_arg in [deployment.model_name for deployment in ALL_MODEL_DEPLOYMENTS]: + hlog("WARNING: Model name is deprecated. Please use the model deployment name instead.") + available_deployments: List[str] = [ + deployment.name for deployment in ALL_MODEL_DEPLOYMENTS if deployment.model_name == model_arg + ] + hlog(f"Available model deployments for model {model_arg}: {available_deployments}") + chosen_deployment: str = available_deployments[0] + hlog(f"Choosing {chosen_deployment} (the first one) as the default model deployment for model {model_arg}") + hlog("If you want to use a different model deployment, please specify it explicitly.") + return chosen_deployment + + raise ValueError(f"Model deployment {model_arg} not found") + + def get_model_deployments_by_host_group(host_group: str) -> List[str]: """ Gets models by host group. diff --git a/src/helm/benchmark/model_metadata_registry.py b/src/helm/benchmark/model_metadata_registry.py index 644a78cf62..9c91704971 100644 --- a/src/helm/benchmark/model_metadata_registry.py +++ b/src/helm/benchmark/model_metadata_registry.py @@ -6,8 +6,6 @@ import dacite import yaml -from helm.common.hierarchical_logger import hlog - # Different modalities TEXT_MODEL_TAG: str = "TEXT_MODEL_TAG" @@ -140,7 +138,7 @@ def register_model_metadata_from_path(path: str) -> None: def register_model_metadata(model_metadata: ModelMetadata) -> None: """Register a single model configuration.""" - hlog(f"Registered model metadata {model_metadata.name}") + # hlog(f"Registered model metadata {model_metadata.name}") ALL_MODELS_METADATA.append(model_metadata) MODEL_NAME_TO_MODEL_METADATA[model_metadata.name] = model_metadata diff --git a/src/helm/benchmark/run_specs.py b/src/helm/benchmark/run_specs.py index 51975ca4d6..da3eb14705 100644 --- a/src/helm/benchmark/run_specs.py +++ b/src/helm/benchmark/run_specs.py @@ -50,6 +50,7 @@ from helm.benchmark.model_deployment_registry import ( ModelDeployment, get_model_deployment, + get_deployment_name_from_model_arg, ) from helm.benchmark.model_metadata_registry import ( ModelMetadata, @@ -2544,7 +2545,8 @@ def construct_run_specs(spec: ObjectSpec) -> List[RunSpec]: # All users should be using the model_deployment keyword argument instead. # TODO: Remove this once we've migrated all the configs if args.get("model", None) is not None and args.get("model_deployment", None) is None: - args.update({"model_deployment": args["model"]}) + model_deployment: str = get_deployment_name_from_model_arg(args["model"]) + args.update({"model_deployment": model_deployment}) args.pop("model") # Peel off the run expanders (e.g., model) diff --git a/src/helm/config/model_deployments.yaml b/src/helm/config/model_deployments.yaml index c0658fa89a..753971cd2c 100644 --- a/src/helm/config/model_deployments.yaml +++ b/src/helm/config/model_deployments.yaml @@ -333,7 +333,7 @@ model_deployments: # ---------- EleutherAI ---------- # - name: gooseai/gpt-neo-20b - model_name: eleutherai/gpt-neo-20b + model_name: eleutherai/gpt-neox-20b tokenizer_name: EleutherAI/gpt-neox-20b max_sequence_length: 2048 max_request_length: 2049 @@ -395,9 +395,9 @@ model_deployments: class_name: "helm.benchmark.window_services.gptj_window_service.GPTJWindowService" args: {} - # ---------- OpenAI (HuggingFace) ---------- # + # ---------- OpenAI ---------- # - name: huggingface/gpt2 - model_name: huggingface/gpt2 + model_name: openai/gpt2 tokenizer_name: huggingface/gpt2 max_sequence_length: 1024 max_request_length: 1025 @@ -413,7 +413,7 @@ model_deployments: # ========== HuggingFaceM4 ========== # - name: HuggingFaceM4/idefics-9b - model_name: HuggingFaceM4/idefics-9b + model_name: huggingface/idefics-9b tokenizer_name: HuggingFaceM4/idefics-9b max_sequence_length: 2048 client_spec: @@ -424,7 +424,7 @@ model_deployments: args: {} - name: HuggingFaceM4/idefics-9b-instruct - model_name: HuggingFaceM4/idefics-9b-instruct + model_name: huggingface/idefics-9b-instruct tokenizer_name: HuggingFaceM4/idefics-9b-instruct max_sequence_length: 2048 client_spec: @@ -435,7 +435,7 @@ model_deployments: args: {} - name: HuggingFaceM4/idefics-80b - model_name: HuggingFaceM4/idefics-80b + model_name: huggingface/idefics-80b tokenizer_name: HuggingFaceM4/idefics-80b max_sequence_length: 2048 client_spec: @@ -446,7 +446,7 @@ model_deployments: args: {} - name: HuggingFaceM4/idefics-80b-instruct - model_name: HuggingFaceM4/idefics-80b-instruct + model_name: huggingface/idefics-80b-instruct tokenizer_name: HuggingFaceM4/idefics-80b-instruct max_sequence_length: 2048 client_spec: diff --git a/src/helm/config/model_metadatas.yaml b/src/helm/config/model_metadatas.yaml index 332b5ab0e1..3d6bdd6c40 100644 --- a/src/helm/config/model_metadatas.yaml +++ b/src/helm/config/model_metadatas.yaml @@ -525,6 +525,46 @@ models: + # =========== HuggingFace =========== # + - name: huggingface/idefics-9b + display_name: IDEFICS (9B) + description: IDEFICS (9B parameters) is an open-source model based on DeepMind's Flamingo. ([blog](https://huggingface.co/blog/idefics)) + creator_organization_name: HuggingFace + access: open + num_parameters: 9000000000 + release_date: 2023-08-22 + tags: [VISION_LANGUAGE_MODEL_TAG] + + - name: huggingface/idefics-9b-instruct + display_name: IDEFICS instruct (9B) + description: IDEFICS instruct (9B parameters) is an open-source model based on DeepMind's Flamingo. ([blog](https://huggingface.co/blog/idefics)) + creator_organization_name: HuggingFace + access: open + num_parameters: 9000000000 + release_date: 2023-08-22 + tags: [VISION_LANGUAGE_MODEL_TAG] + + - name: huggingface/idefics-80b + display_name: IDEFICS (80B) + description: IDEFICS (80B parameters) is an open-source model based on DeepMind's Flamingo. ([blog](https://huggingface.co/blog/idefics)) + creator_organization_name: HuggingFace + access: open + num_parameters: 80000000000 + release_date: 2023-08-22 + tags: [VISION_LANGUAGE_MODEL_TAG] + + - name: huggingface/idefics-80b-instruct + display_name: IDEFICS instruct (80B) + description: IDEFICS instruct (80B parameters) is an open-source model based on DeepMind's Flamingo. ([blog](https://huggingface.co/blog/idefics)) + creator_organization_name: HuggingFace + access: open + num_parameters: 80000000000 + release_date: 2023-08-22 + tags: [VISION_LANGUAGE_MODEL_TAG] + # =================================== # + + + # =========== Lightning AI =========== # - name: lightningai/lit-gpt display_name: Lit-GPT @@ -809,6 +849,19 @@ models: # =========== OpenAI =========== # + # ----- GPT 2 Models ----- # + # Not served by OpenAI, instead served by HuggingFace. + + - name: openai/gpt2 + display_name: GPT-2 (1.5B) + description: GPT-2 (1.5B parameters) is a transformer model trained on a large corpus of English text in a self-supervised fashion ([paper](https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf)). + creator_organization_name: OpenAI + access: open + num_parameters: 1500000000 + release_date: 2019-02-14 + tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG] + + # ----- GPT 3 Models ----- # # The list of models can be found here: https://beta.openai.com/docs/engines/gpt-3 # DEPRECATED: Announced on July 06 2023 that these models will be shut down on January 04 2024. @@ -1298,7 +1351,7 @@ models: # ========== Yandex ========== # - - name: together/yalm + - name: yandex/yalm display_name: YaLM (100B) description: YaLM (100B parameters) is an autoregressive language model trained on English and Russian text ([GitHub](https://github.com/yandex/YaLM-100B)). creator_organization_name: Yandex