Skip to content

Commit

Permalink
Merge branch 'model_gauntlet' of github.com:mosaicml/llm-foundry into…
Browse files Browse the repository at this point in the history
… model_gauntlet
  • Loading branch information
codestar12 committed Jun 29, 2023
2 parents 743518b + 90ff7dd commit 2ccb445
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 47 deletions.
80 changes: 40 additions & 40 deletions mcli/mcli-hf-eval.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ command: |
run_name: all-eval
gpu_num: 8
# gpu_type:
# cluster: # replace with your cluster here!
# cluster: # replace with your cluster here!

image: mosaicml/llm-foundry:2.0.1_cu118-latest

Expand Down Expand Up @@ -97,48 +97,48 @@ parameters:
# device: cpu
# pretrained: true
# use_auth_token: false
-
model_name: mosaicml/mpt-7b
# Tokenizer
tokenizer:
name: EleutherAI/gpt-neox-20b
kwargs:
model_max_length: ${max_seq_len}
# -
# model_name: mosaicml/mpt-7b
# # Tokenizer
# tokenizer:
# name: EleutherAI/gpt-neox-20b
# kwargs:
# model_max_length: ${max_seq_len}

model:
name: hf_causal_lm
pretrained_model_name_or_path: mosaicml/mpt-7b
device: cpu
pretrained: true
use_auth_token: false
-
model_name: mosaicml/mpt-7b-chat
# Tokenizer
tokenizer:
name: mosaicml/mpt-7b-chat
kwargs:
model_max_length: ${max_seq_len}
# model:
# name: hf_causal_lm
# pretrained_model_name_or_path: mosaicml/mpt-7b
# device: cpu
# pretrained: true
# use_auth_token: false
# -
# model_name: mosaicml/mpt-7b-chat
# # Tokenizer
# tokenizer:
# name: mosaicml/mpt-7b-chat
# kwargs:
# model_max_length: ${max_seq_len}

model:
name: hf_causal_lm
pretrained_model_name_or_path: mosaicml/mpt-7b-chat
device: cpu
pretrained: true
use_auth_token: false
-
model_name: mosaicml/mpt-7b-instruct
# Tokenizer
tokenizer:
name: EleutherAI/gpt-neox-20b
kwargs:
model_max_length: ${max_seq_len}
# model:
# name: hf_causal_lm
# pretrained_model_name_or_path: mosaicml/mpt-7b-chat
# device: cpu
# pretrained: true
# use_auth_token: false
# -
# model_name: mosaicml/mpt-7b-instruct
# # Tokenizer
# tokenizer:
# name: EleutherAI/gpt-neox-20b
# kwargs:
# model_max_length: ${max_seq_len}

model:
name: hf_causal_lm
pretrained_model_name_or_path: mosaicml/mpt-7b-instruct
device: cpu
pretrained: true
use_auth_token: false
# model:
# name: hf_causal_lm
# pretrained_model_name_or_path: mosaicml/mpt-7b-instruct
# device: cpu
# pretrained: true
# use_auth_token: false
# -
# model_name: tiiuae/falcon-7b
# # Tokenizer
Expand Down
16 changes: 9 additions & 7 deletions scripts/eval/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def load_model(model_cfg, tokenizer, num_retries):
)


def evaluate_model(model_cfg, run_name):
def evaluate_model(model_cfg, run_name, model_gauntlet_df):
print(f'Evaluating model: {model_cfg.model_name}', flush=True)
# Build tokenizer and model
tokenizer = build_tokenizer(model_cfg.tokenizer)
Expand Down Expand Up @@ -66,7 +66,7 @@ def evaluate_model(model_cfg, run_name):

if model_gauntlet_df is None and model_gauntlet is not None:
model_gauntlet_df = pd.DataFrame(columns=['model_name', 'average'] +
[t.name for t in model_gauntlet.tasks])
[t.name for t in model_gauntlet.categories])

in_memory_logger = InMemoryLogger() # track metrics in the in_memory_logger
loggers: List[LoggerDestination] = [
Expand Down Expand Up @@ -103,7 +103,7 @@ def evaluate_model(model_cfg, run_name):
b = time.time()
print(f'Ran {model_cfg.model_name} eval in: {b-a} seconds')
return (in_memory_logger, logger_keys, model_gauntlet_callback,
model_gauntlet)
model_gauntlet, model_gauntlet_df)


def main(cfg):
Expand All @@ -120,17 +120,19 @@ def main(cfg):

try:
(in_memory_logger, logger_keys, model_gauntlet_callback,
model_gauntlet) = evaluate_model(model_cfg, cfg.run_name)
model_gauntlet,
model_gauntlet_df) = evaluate_model(model_cfg, cfg.run_name,
model_gauntlet_df)

composite_scores = model_gauntlet_callback.eval_end(
None, in_memory_logger)

benchmark_to_taxonomy = {}
for t in model_gauntlet.tasks:
for t in model_gauntlet.categories:
for b in t.benchmarks:
benchmark_to_taxonomy[b.name] = t.name

[t.name for t in model_gauntlet.tasks]

model_results = calculate_markdown_results(logger_keys,
in_memory_logger.data,
benchmark_to_taxonomy,
Expand All @@ -146,7 +148,7 @@ def main(cfg):

row.update({
t.name: composite_scores[f'metrics/model_gauntlet/{t.name}']
for t in model_gauntlet.tasks
for t in model_gauntlet.categories
})
row.update({
'average': composite_scores[f'metrics/model_gauntlet/average']
Expand Down

0 comments on commit 2ccb445

Please sign in to comment.