Skip to content

Commit

Permalink
Configurable models for NeurIPS Efficiency Challenge (#1861)
Browse files Browse the repository at this point in the history
  • Loading branch information
yifanmai authored Oct 3, 2023
1 parent d3a0cc1 commit 10a27a6
Show file tree
Hide file tree
Showing 11 changed files with 294 additions and 54 deletions.
57 changes: 49 additions & 8 deletions src/helm/benchmark/model_deployment_registry.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
from typing import Dict, Optional, List
from dataclasses import dataclass

Expand All @@ -6,32 +7,52 @@

from helm.common.hierarchical_logger import hlog
from helm.common.object_spec import ObjectSpec
from helm.proxy.models import ALL_MODELS, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, MODEL_NAME_TO_MODEL, TEXT_MODEL_TAG, Model


MODEL_DEPLOYMENTS_FILE = "model_deployments.yaml"


class ClientSpec(ObjectSpec):
pass


class WindowServiceSpec(ObjectSpec):
pass


@dataclass(frozen=True)
class ModelDeployment:
"""A model deployment is an accessible instance of this model (e.g. a hosted endpoint).
A model can have model deployments."""
A model can have multiple model deployments."""

name: str
"""Name of the model deployment."""

model_name: str
"""Name of the model that this model deployment is for."""

client_spec: ClientSpec
"""Specification for instantiating the client for this model deployment."""

max_sequence_length: Optional[int]
"""Maximum equence length for this model deployment."""
model_name: Optional[str] = None
"""Name of the model that this model deployment is for.
If unset, defaults to the the same value as `name`."""

tokenizer_name: Optional[str]
"""Tokenizer for this model deployment."""
tokenizer_name: Optional[str] = None
"""Tokenizer for this model deployment.
If unset, auto-inferred by the WindowService."""

window_service_spec: Optional[WindowServiceSpec] = None
"""Specification for instantiating the window service for this model deplooyment"""

max_sequence_length: Optional[int] = None
"""Maximum sequence length for this model deployment."""

max_request_length: Optional[int] = None
"""Maximum request length for this model deployment.
If unset, defaults to the same value as max_sequence_length."""


@dataclass(frozen=True)
Expand All @@ -49,8 +70,28 @@ def register_model_deployments_from_path(path: str) -> None:
raw = yaml.safe_load(f)
model_deployments: ModelDeployments = cattrs.structure(raw, ModelDeployments)
for model_deployment in model_deployments.model_deployments:
hlog(f"Registered model deployment {model_deployment.name}")
_name_to_model_deployment[model_deployment.name] = model_deployment

# Auto-register a model with this name if none exists
model_name = model_deployment.model_name or model_deployment.name
if model_name not in MODEL_NAME_TO_MODEL:
model = Model(
group="none",
name=model_name,
tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG],
)
MODEL_NAME_TO_MODEL[model_name] = model
ALL_MODELS.append(model)
hlog(f"Registered default metadata for model {model_name}")


def maybe_register_model_deployments_from_base_path(base_path: str) -> None:
"""Register model deployments from prod_env/model_deployments.yaml"""
path = os.path.join(base_path, MODEL_DEPLOYMENTS_FILE)
if os.path.exists(path):
register_model_deployments_from_path(path)


def get_model_deployment(name: str) -> Optional[ModelDeployment]:
return _name_to_model_deployment.get(name)
11 changes: 11 additions & 0 deletions src/helm/benchmark/model_metadata_registry.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
from typing import Optional, List
from dataclasses import dataclass, field
from datetime import date
Expand All @@ -8,6 +9,9 @@
from helm.proxy.models import ALL_MODELS, MODEL_NAME_TO_MODEL, Model


MODEL_METADATA_FILE = "model_metadata.yaml"


@dataclass(frozen=True)
class ModelMetadata:
name: str
Expand Down Expand Up @@ -58,3 +62,10 @@ def register_model_metadata_from_path(path: str) -> None:
)
MODEL_NAME_TO_MODEL[model_metadata.name] = model
ALL_MODELS.append(model)


def maybe_register_model_metadata_from_base_path(base_path: str) -> None:
"""Register model metadata from prod_env/model_metadata.yaml"""
path = os.path.join(base_path, MODEL_METADATA_FILE)
if os.path.exists(path):
register_model_metadata_from_path(path)
8 changes: 7 additions & 1 deletion src/helm/benchmark/run_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2516,7 +2516,13 @@ def construct_run_specs(spec: ObjectSpec) -> List[RunSpec]:
]

def alter_run_spec(run_spec: RunSpec) -> RunSpec:
model = get_model(run_spec.adapter_spec.model)
try:
model = get_model(run_spec.adapter_spec.model)
except ValueError:
# Models registered from configs cannot have expanders applied to them,
# because the models will not have been registered yet at this point.
# TODO: Figure out a cleaner way to deal with this.
return run_spec
# For models that strip newlines, when we're generating, we need to set
# the delimiter to be '###' so we stop properly.
if NO_NEWLINES_TAG in model.tags and run_spec.adapter_spec.method in (
Expand Down
57 changes: 57 additions & 0 deletions src/helm/benchmark/tokenizer_config_registry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import os
from typing import Dict, Optional, List
from dataclasses import dataclass

import cattrs
import yaml

from helm.common.hierarchical_logger import hlog
from helm.common.object_spec import ObjectSpec


TOKENIEZR_CONFIGS_FILE = "tokenizer_configs.yaml"


class TokenizerSpec(ObjectSpec):
pass


@dataclass(frozen=True)
class TokenizerConfig:
"""Configuration for a tokenizer."""

name: str
"""Name of the tokenizer."""

tokenizer_spec: TokenizerSpec
"""Specification for instantiating the client for this tokenizer."""

# TODO: Add `end_of_text_token`` and `prefix_token``


@dataclass(frozen=True)
class TokenizerConfigs:
tokenizer_configs: List[TokenizerConfig]


_name_to_tokenizer_config: Dict[str, TokenizerConfig] = {}


def register_tokenizer_configs_from_path(path: str) -> None:
global _name_to_tokenizer_config
hlog(f"Reading tokenizer configs from {path}...")
with open(path, "r") as f:
raw = yaml.safe_load(f)
tokenizer_configs: TokenizerConfigs = cattrs.structure(raw, TokenizerConfigs)
for tokenizer_config in tokenizer_configs.tokenizer_configs:
_name_to_tokenizer_config[tokenizer_config.name] = tokenizer_config


def maybe_register_tokenizer_configs_from_base_path(base_path: str) -> None:
path = os.path.join(base_path, TOKENIEZR_CONFIGS_FILE)
if os.path.exists(path):
register_tokenizer_configs_from_path(path)


def get_tokenizer_config(name: str) -> Optional[TokenizerConfig]:
return _name_to_tokenizer_config.get(name)
39 changes: 39 additions & 0 deletions src/helm/benchmark/window_services/default_window_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from typing import Optional
from .local_window_service import LocalWindowService
from .tokenizer_service import TokenizerService


class DefaultWindowService(LocalWindowService):
def __init__(
self,
service: TokenizerService,
tokenizer_name: str,
max_sequence_length: int,
max_request_length: Optional[int] = None,
):
super().__init__(service)
self._tokenizer_name = tokenizer_name
self._max_sequence_length = max_sequence_length
self._max_request_length = max_request_length

@property
def max_sequence_length(self) -> int:
return self._max_sequence_length

@property
def max_request_length(self) -> int:
return self._max_request_length or self._max_sequence_length

@property
def end_of_text_token(self) -> str:
# TODO: Support this
return ""

@property
def tokenizer_name(self) -> str:
return self._tokenizer_name

@property
def prefix_token(self) -> str:
# TODO: Support this
return ""
12 changes: 8 additions & 4 deletions src/helm/benchmark/window_services/huggingface_window_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,18 @@
from helm.proxy.clients.huggingface_tokenizer import HuggingFaceTokenizers
from .local_window_service import LocalWindowService
from .tokenizer_service import TokenizerService
from helm.proxy.clients.huggingface_client import HuggingFaceModelConfig


class HuggingFaceWindowService(LocalWindowService):
def __init__(
self, service: TokenizerService, model_config: HuggingFaceModelConfig, max_sequence_length: Optional[int] = None
self,
service: TokenizerService,
tokenizer_name: str,
max_sequence_length: Optional[int] = None,
max_reqeust_length: Optional[int] = None,
):
super().__init__(service)
self._tokenizer_name = model_config.model_id
self._tokenizer_name = tokenizer_name
tokenizer = HuggingFaceTokenizers.get_tokenizer(self._tokenizer_name)
self._prefix_token = tokenizer.bos_token
self._end_of_text_token = tokenizer.eos_token
Expand All @@ -22,6 +25,7 @@ def __init__(
self._max_sequence_length = max_sequence_length
else:
self._max_sequence_length = tokenizer.model_max_length
self._max_request_length = max_reqeust_length

@property
def max_sequence_length(self) -> int:
Expand All @@ -31,7 +35,7 @@ def max_sequence_length(self) -> int:
@property
def max_request_length(self) -> int:
"""Return the max request length of this tokenizer."""
return self.max_sequence_length
return self._max_request_length or self._max_sequence_length

@property
def end_of_text_token(self) -> str:
Expand Down
9 changes: 2 additions & 7 deletions src/helm/benchmark/window_services/llama_window_service.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from helm.proxy.clients.huggingface_client import HuggingFaceHubModelConfig
from helm.benchmark.window_services.huggingface_window_service import HuggingFaceWindowService
from helm.benchmark.window_services.tokenizer_service import TokenizerService

Expand All @@ -7,10 +6,7 @@ class LlamaWindowService(HuggingFaceWindowService):
def __init__(self, service: TokenizerService):
# Tokenizer name hf-internal-testing/llama-tokenizer is taken from:
# https://huggingface.co/docs/transformers/main/en/model_doc/llama#transformers.LlamaTokenizerFast.example
model_config = HuggingFaceHubModelConfig(
namespace="hf-internal-testing", model_name="llama-tokenizer", revision=None
)
super().__init__(service, model_config)
super().__init__(service, "hf-internal-testing/llama-tokenizer")


class Llama2WindowService(HuggingFaceWindowService):
Expand All @@ -25,8 +21,7 @@ class Llama2WindowService(HuggingFaceWindowService):
# meta-llama/Llama-2-70b-hf is not a local folder and is not a valid model identifier listed on
# 'https://huggingface.co/models'
def __init__(self, service: TokenizerService):
model_config = HuggingFaceHubModelConfig(namespace="meta-llama", model_name="Llama-2-7b-hf", revision=None)
super().__init__(service, model_config)
super().__init__(service, "meta-llama/Llama-2-7b-hf")

@property
def max_sequence_length(self) -> int:
Expand Down
45 changes: 28 additions & 17 deletions src/helm/benchmark/window_services/window_service_factory.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from helm.benchmark.model_deployment_registry import get_model_deployment
from helm.proxy.clients.huggingface_model_registry import HuggingFaceHubModelConfig
from helm.benchmark.model_deployment_registry import WindowServiceSpec, get_model_deployment
from helm.proxy.models import (
get_model,
get_model_names_with_tag,
Expand All @@ -20,6 +19,7 @@
from helm.benchmark.window_services.tokenizer_service import TokenizerService
from helm.proxy.clients.huggingface_client import get_huggingface_model_config
from helm.proxy.clients.remote_model_registry import get_remote_model
from helm.common.object_spec import create_object, inject_object_spec_args


class WindowServiceFactory:
Expand All @@ -40,25 +40,39 @@ def get_window_service(model_name: str, service: TokenizerService) -> WindowServ
# TODO: Migrate all window services to use use model deployments
model_deployment = get_model_deployment(model_name)
if model_deployment:
# TODO: Allow tokenizer name auto-inference in some cases.
if not model_deployment.tokenizer_name:
raise Exception("Tokenizer name must be set on model deplyment")
tokenizer_name = model_deployment.tokenizer_name
# Only use HuggingFaceWindowService for now.
# TODO: Allow using other window services.
window_service = HuggingFaceWindowService(
service=service,
model_config=HuggingFaceHubModelConfig.from_string(tokenizer_name),
max_sequence_length=model_deployment.max_sequence_length,
# If the model deployment specifies a WindowServiceSpec, instantiate it.
window_service_spec: WindowServiceSpec
if model_deployment.window_service_spec:
window_service_spec = model_deployment.window_service_spec
else:
window_service_spec = WindowServiceSpec(
class_name="helm.benchmark.window_services.default_window_service.DefaultWindowService", args={}
)
# Perform dependency injection to fill in remaining arguments.
# Dependency injection is needed here for these reasons:
#
# 1. Different window services have different parameters. Dependency injection provides arguments
# that match the parameters of the window services.
# 2. Some arguments, such as the tokenizer service, are not static data objects that can be
# in the users configuration file. Instead, they have to be constructed dynamically at runtime.
window_service_spec = inject_object_spec_args(
window_service_spec,
{
"service": service,
"tokenizer_name": model_deployment.tokenizer_name,
"max_sequence_length": model_deployment.max_sequence_length,
"max_request_length": model_deployment.max_request_length,
},
)
window_service = create_object(window_service_spec)
elif get_remote_model(model_name):
window_service = get_remote_window_service(service, model_name)
elif organization == "neurips":
from helm.benchmark.window_services.http_model_window_service import HTTPModelWindowServce

window_service = HTTPModelWindowServce(service)
elif huggingface_model_config:
window_service = HuggingFaceWindowService(service=service, model_config=huggingface_model_config)
window_service = HuggingFaceWindowService(service=service, tokenizer_name=huggingface_model_config.model_id)
elif organization == "openai":
from helm.benchmark.window_services.openai_window_service import OpenAIWindowService
from helm.benchmark.window_services.wider_openai_window_service import (
Expand Down Expand Up @@ -189,10 +203,7 @@ def get_window_service(model_name: str, service: TokenizerService) -> WindowServ
"tiiuae/falcon-40b",
"tiiuae/falcon-40b-instruct",
]:
window_service = HuggingFaceWindowService(
service=service,
model_config=HuggingFaceHubModelConfig(namespace="tiiuae", model_name="falcon-7b", revision=None),
)
window_service = HuggingFaceWindowService(service=service, tokenizer_name="tiiuae/falcon-7b")
elif model_name in [
"stabilityai/stablelm-base-alpha-3b",
"stabilityai/stablelm-base-alpha-7b",
Expand Down
Loading

0 comments on commit 10a27a6

Please sign in to comment.