diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 684eaafd5d..06ed1d8581 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -29,10 +29,12 @@ jobs: key: pip-${{ hashFiles('requirements.txt') }}-${{ matrix.python-version }} restore-keys: | pip- - - run: pip install -e . - - run: helm-run -h - - run: helm-summarize -h - - run: echo "Finished installation." + - run: python3 -m pip install --upgrade build + - run: python3 -m build + - run: python3 -m pip install dist/crfm_helm-*.whl + - run: helm-run --run-specs simple1:model=simple/model1 --max-eval-instances 10 --suite test + - run: helm-summarize --suite test + - run: helm-server --help test: name: Tests diff --git a/.gitignore b/.gitignore index 3f3d0b4f16..cb3d5237f8 100644 --- a/.gitignore +++ b/.gitignore @@ -44,3 +44,5 @@ notes.otl # Miscellaneous .nfs* + +node_modules diff --git a/MANIFEST.in b/MANIFEST.in index 72af21c530..ba863069aa 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,4 @@ recursive-include src/helm/proxy/clients/ *.sp recursive-include src/helm/benchmark/ *.json recursive-include src/helm/benchmark/static/ *.css *.html *.js *.png *.yaml +recursive-include src/helm/config/ *.yaml diff --git a/docs/mkdocs_macros.py b/docs/mkdocs_macros.py index ba78537f86..914113eb22 100644 --- a/docs/mkdocs_macros.py +++ b/docs/mkdocs_macros.py @@ -2,7 +2,7 @@ from dataclasses import dataclass, field from typing import Optional, List -from helm.benchmark.presentation.schema import read_schema, ModelField +from helm.benchmark.presentation.schema import read_schema, SCHEMA_CLASSIC_YAML_FILENAME, ModelField from helm.benchmark.run_expander import RUN_EXPANDERS from helm.proxy.models import ALL_MODELS, Model @@ -27,7 +27,8 @@ def from_model_field_and_model_object(model_field: ModelField, model_object: Opt def define_env(env): @env.macro def models_by_organization(): - schema = read_schema() + # TODO: make this customizable + schema = read_schema(SCHEMA_CLASSIC_YAML_FILENAME) result = defaultdict(list) # Create dict name -> madel_object (ALL_MODELS) diff --git a/docs/tutorial.md b/docs/tutorial.md index cc103e11f6..14ac4bdd37 100644 --- a/docs/tutorial.md +++ b/docs/tutorial.md @@ -63,7 +63,7 @@ This reads the pre-existing files in `benchmark_output/runs/v1/` that were writt - `groups.json` contains a serialized list of `Table`, each containing information about groups in a group category. - `groups_metadata.json` contains a list of all the groups along with a human-readable description and a taxonomy. -Additionally, for each group and group-relavent metric, it will output a pair of files: `benchmark_output/runs/v1/groups/latex/_.tex` and `benchmark_output/runs/v1/groups/latex/_.json`. These files contain the statistics for that metric from each run within the group. +Additionally, for each group and group-relavent metric, it will output a pair of files: `benchmark_output/runs/v1/groups/latex/_.tex` and `benchmark_output/runs/v1/groups/json/_.json`. These files contain the statistics for that metric from each run within the group.