Skip to content

Commit

Permalink
Merge branch 'main' of github.com:stanford-crfm/helm into DecodingTrust
Browse files Browse the repository at this point in the history
  • Loading branch information
danielz02 committed Dec 20, 2023
2 parents d4e1695 + e3cf155 commit bbfae71
Show file tree
Hide file tree
Showing 130 changed files with 4,578 additions and 2,411 deletions.
10 changes: 6 additions & 4 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,12 @@ jobs:
key: pip-${{ hashFiles('requirements.txt') }}-${{ matrix.python-version }}
restore-keys: |
pip-
- run: pip install -e .
- run: helm-run -h
- run: helm-summarize -h
- run: echo "Finished installation."
- run: python3 -m pip install --upgrade build
- run: python3 -m build
- run: python3 -m pip install dist/crfm_helm-*.whl
- run: helm-run --run-specs simple1:model=simple/model1 --max-eval-instances 10 --suite test
- run: helm-summarize --suite test
- run: helm-server --help

test:
name: Tests
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,5 @@ notes.otl

# Miscellaneous
.nfs*

node_modules
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
recursive-include src/helm/proxy/clients/ *.sp
recursive-include src/helm/benchmark/ *.json
recursive-include src/helm/benchmark/static/ *.css *.html *.js *.png *.yaml
recursive-include src/helm/config/ *.yaml
5 changes: 3 additions & 2 deletions docs/mkdocs_macros.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from dataclasses import dataclass, field
from typing import Optional, List

from helm.benchmark.presentation.schema import read_schema, ModelField
from helm.benchmark.presentation.schema import read_schema, SCHEMA_CLASSIC_YAML_FILENAME, ModelField
from helm.benchmark.run_expander import RUN_EXPANDERS
from helm.proxy.models import ALL_MODELS, Model

Expand All @@ -27,7 +27,8 @@ def from_model_field_and_model_object(model_field: ModelField, model_object: Opt
def define_env(env):
@env.macro
def models_by_organization():
schema = read_schema()
# TODO: make this customizable
schema = read_schema(SCHEMA_CLASSIC_YAML_FILENAME)
result = defaultdict(list)

# Create dict name -> madel_object (ALL_MODELS)
Expand Down
2 changes: 1 addition & 1 deletion docs/tutorial.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ This reads the pre-existing files in `benchmark_output/runs/v1/` that were writt
- `groups.json` contains a serialized list of `Table`, each containing information about groups in a group category.
- `groups_metadata.json` contains a list of all the groups along with a human-readable description and a taxonomy.

Additionally, for each group and group-relavent metric, it will output a pair of files: `benchmark_output/runs/v1/groups/latex/<group_name>_<metric_name>.tex` and `benchmark_output/runs/v1/groups/latex/<group_name>_<metric_name>.json`. These files contain the statistics for that metric from each run within the group.
Additionally, for each group and group-relavent metric, it will output a pair of files: `benchmark_output/runs/v1/groups/latex/<group_name>_<metric_name>.tex` and `benchmark_output/runs/v1/groups/json/<group_name>_<metric_name>.json`. These files contain the statistics for that metric from each run within the group.

<!--
# TODO(#1441): Enable plots
Expand Down
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ google-api-core==2.10.1
google-api-python-client==2.64.0
google-auth==2.12.0
google-auth-httplib2==0.1.0
google-cloud-aiplatform==1.36.4
googleapis-common-protos==1.56.4
greenlet==1.1.3
gunicorn==20.1.0
Expand All @@ -67,6 +68,7 @@ jmespath==1.0.1
joblib==1.2.0
kiwisolver==1.4.4
langcodes==3.3.0
langdetect==1.0.9
llvmlite==0.39.1
lxml==4.9.1
Mako==1.2.3
Expand Down
4 changes: 2 additions & 2 deletions scripts/cache/copy_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import os

from sqlitedict import SqliteDict
from helm.common.cache import _MongoKeyValueStore
from helm.common.mongo_key_value_store import MongoKeyValueStore
from helm.common.hierarchical_logger import hlog, htrack
from typing import Optional

Expand Down Expand Up @@ -60,7 +60,7 @@ def copy_cache(
hlog(f"Opening Sqlite cache {cache_path}")
with SqliteDict(cache_path) as source_cache:
hlog(f"Copying to MongoDB {mongo_host}")
with _MongoKeyValueStore(mongo_host, collection_name=organization) as target_cache:
with MongoKeyValueStore(mongo_host, collection_name=organization) as target_cache:
for key, value in source_cache.items():
if not dry_run and (not range_start or num_items >= range_start):
try:
Expand Down
5 changes: 1 addition & 4 deletions scripts/cache/fix_anthropic_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from helm.common.hierarchical_logger import hlog, htrack
from helm.proxy.clients.anthropic_client import AnthropicLegacyClient
from helm.proxy.retry import get_retry_decorator
from helm.proxy.tokenizers.huggingface_tokenizer import HuggingFaceTokenizer


"""
Expand Down Expand Up @@ -48,9 +47,7 @@ def add_logprobs(mongo_uri: str, credentials_path: str, dry_run: bool):
api_key: str = credentials["anthropicApiKey"]

cache_config = MongoCacheConfig(mongo_uri, collection_name="anthropic")
client = AnthropicLegacyClient(
api_key=api_key, tokenizer=HuggingFaceTokenizer(cache_config), cache_config=cache_config
)
client = AnthropicLegacyClient(api_key=api_key, cache_config=cache_config)

with create_key_value_store(cache_config) as cache:
for i, (request, response) in enumerate(cache.get_all()):
Expand Down
6 changes: 4 additions & 2 deletions scripts/compute_request_limits.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,15 +124,16 @@ def figure_out_max_prompt_length(


def figure_out_max_prompt_length_plus_tokens(
client: Any, # Client,
client: AutoClient,
auto_tokenizer: AutoTokenizer,
model_deployment_name: str,
model_name: str,
tokenizer_name: str,
max_prompt_length: int,
prefix: str = "",
suffix: str = "",
) -> int:
tokenizer = client._get_tokenizer(tokenizer_name)
tokenizer = auto_tokenizer._get_tokenizer(tokenizer_name)
lower_bound = 1
upper_bound = 2 * max_prompt_length + 1

Expand Down Expand Up @@ -387,6 +388,7 @@ def main():
print("========== Figure out max_prompt_length_plus_tokens ==========")
max_prompt_length_plus_tokens: int = figure_out_max_prompt_length_plus_tokens(
client,
auto_tokenizer,
args.model_deployment_name,
args.model_name,
args.tokenizer_name,
Expand Down
2 changes: 1 addition & 1 deletion scripts/offline_eval/export_requests.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
MongoCacheConfig,
SqliteCacheConfig,
create_key_value_store,
request_to_key,
)
from helm.common.key_value_store import request_to_key
from helm.common.hierarchical_logger import hlog, htrack, htrack_block
from helm.proxy.clients.google_client import GoogleClient
from helm.proxy.clients.together_client import TogetherClient
Expand Down
2 changes: 1 addition & 1 deletion scripts/offline_eval/import_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
MongoCacheConfig,
SqliteCacheConfig,
create_key_value_store,
request_to_key,
)
from helm.common.key_value_store import request_to_key
from helm.common.hierarchical_logger import hlog, htrack
from .export_requests import SUPPORTED_ORGS

Expand Down
10 changes: 8 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@ install_requires=
# Keep sqlitedict version at 1.7.0.
sqlitedict~=1.7.0
bottle~=0.12.23
# TODO: Remove these from common
pymongo~=4.2.0

# Basic Scenarios
datasets~=2.5.2
Expand Down Expand Up @@ -103,6 +101,9 @@ images =
accelerate~=0.23.0 # For the newer versions of Transformers
pillow~=9.4.0

mongo =
pymongo~=4.2.0

# Model extras
aleph-alpha =
aleph-alpha-client~=2.14.0
Expand All @@ -116,6 +117,9 @@ openai =
openai~=0.27.8
tiktoken~=0.3.3

google =
google-cloud-aiplatform~=1.36.4

tsinghua =
icetk~=0.0.4

Expand All @@ -125,6 +129,7 @@ yandex =
models =
crfm-helm[aleph-alpha]
crfm-helm[anthropic]
crfm-helm[google]
crfm-helm[openai]
crfm-helm[tsinghua]
crfm-helm[yandex]
Expand All @@ -141,6 +146,7 @@ all =
crfm-helm[cleva]
crfm-helm[images]
crfm-helm[models]
crfm-helm[mongo]

# Development only
# Do not include in all
Expand Down
32 changes: 6 additions & 26 deletions src/helm-frontend/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@ This app makes use of [React](https://react.dev/) + [TypeScript](https://www.typ
### Installation

```bash
npm Install
yarn install
```

### Develop

This will open a development server

```bash
npm run dev
yarn dev
```

You will also want to start `helm-server` locally as well. In the `src/helm` directory run the following
Expand All @@ -27,13 +27,13 @@ helm-server
### Testing

```
npm run test
yarn test
```

### Build

```bash
npm run build
yarn build
```

### Deployment
Expand All @@ -45,33 +45,13 @@ You can rename the build directory to the desired release name and upload it to
### Linting

```bash
npm run lint
yarn lint
```

### Formatting

If you don't have prettier configured in your IDE or Node environment, you will have to run the following before commiting, in order to pass tests.

```bash
npm run format
```

### Environment Variables

Requires the following environment variables for development and deployment. In development these can be placed in a `.env.local` file with the following:

```
# The default location of local `helm-server`
VITE_HELM_BENCHMARKS_ENDPOINT="http://localhost:8000/"
# The suites available based on local runs
VITE_HELM_BENCHMARKS_SUITE="v1"
```

This can instead be pointed to the public HELM data to avoid needing to run `helm-server` locally.

```
# Example
VITE_HELM_BENCHMARKS_ENDPOINT="https://storage.googleapis.com/crfm-helm-public/"
# Change to current version
VITE_HELM_BENCHMARKS_SUITE="v0.2.3"
yarn format
```
4 changes: 2 additions & 2 deletions src/helm-frontend/public/config.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
window.BENCHMARK_OUTPUT_BASE_URL =
"https://storage.googleapis.com/crfm-helm-public/";
"https://storage.googleapis.com/crfm-helm-public/lite/";
window.SUITE = null;
window.RELEASE = "v0.4.0";
window.RELEASE = "v1.0.0";
4 changes: 2 additions & 2 deletions src/helm-frontend/src/App.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,17 @@ import Groups from "@/routes/Groups";
import Group from "@/routes/Group";
import Runs from "@/routes/Runs";
import Run from "@/routes/Run";
import Landing from "@/routes/Landing";
import Leaderboard from "@/routes/Leaderboard";
import Landing from "@/routes/Landing";

export default function App() {
return (
<Router>
<Routes>
<Route path={`/`} element={<Layout />}>
<Route index element={<Landing />} />
<Route path="models" element={<Models />} />
<Route path="leaderboard" element={<Leaderboard />} />
<Route path="models" element={<Models />} />
<Route path="scenarios" element={<Scenarios />} />
<Route path="groups" element={<Groups />} />
<Route path="groups/:groupName" element={<Group />} />
Expand Down
Binary file modified src/helm-frontend/src/assets/logos/together.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion src/helm-frontend/src/components/Footer.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ export default function Footer() {
const version = getBenchmarkRelease();
return (
<div className="bottom-0 right-0 p-4 bg-white-800 text-black text-right">
<p>Release: {version}</p>
<p>Release {version}</p>
</div>
);
}
73 changes: 40 additions & 33 deletions src/helm-frontend/src/components/Hero.tsx
Original file line number Diff line number Diff line change
@@ -1,43 +1,50 @@
import helmHero from "@/assets/helmhero.png";
import { Link } from "react-router-dom";
import MiniLeaderboard from "./MiniLeaderboard";

export default function Hero() {
return (
<div className="flex px-6 py-14">
{/* Left side content */}
<div className="flex-1 p-4 flex flex-col justify-center">
{" "}
{/* Added flex and justify-center */}
<div className="flex justify-start">
<h1 className="text-5xl mb-4 mx-4 mt-2">
<strong>
A holistic framework for evaluating foundation models.
</strong>
</h1>
</div>
<div className="flex justify-start mt-6 ml-4">
<Link to="leaderboard">
<button className="px-6 btn btn-grey rounded-md">
<body>Leaderboard</body>
</button>
</Link>
<Link to="https://github.com/stanford-crfm/helm" className="ml-4">
{" "}
{/* Added margin-left for spacing */}
<button className="px-6 btn btn-grey rounded-md">Github</button>
</Link>
</div>
<div className="flex flex-col px-4 sm:px-6 py-100 sm:py-10 sm:mb-96 md:mb-96 lg:mb-0 xl:mb-0 2xl:mb-0">
{/* Text section */}
<div className="flex flex-col text-center mb-10 justify-start">
<h1 className="text-3xl sm:text-4xl mb-3 sm:mb-4 mx-2 mt-2">
<strong>
A holistic framework for evaluating foundation models.
</strong>
</h1>
</div>

{/* Right side image */}
<div className="w-1/3 mx-4">
{" "}
{/* Added mx-4 for horizontal margin */}
<img
src={helmHero}
alt="HELM Hero"
className="object-cover w-full h-full"
/>
{/* Container for Image and Leaderboard */}
<div
className="flex flex-col md:flex-col lg:flex-row lg:justify-center"
style={{ height: "525px", transform: "scale(0.9)" }} // Reduced height by 10%
>
{/* Image section */}
<div className="w-full lg:w-1/2 flex justify-center mb-4 lg:mb-0 h-full py-10">
<img
src={helmHero}
alt="HELM Hero"
className="object-cover h-full" // Stretched to full height
style={{ maxWidth: "100%" }}
/>
</div>

{/* Leaderboard section */}
<div className="w-full lg:w-1/2 flex justify-center h-full py-10">
<div
className="py-2 pb-6 rounded-3xl bg-gray-100 h-full" // Stretched to full height
style={{ maxWidth: "100%" }}
>
<MiniLeaderboard></MiniLeaderboard>
<div className="flex justify-end">
<Link to="leaderboard">
<button className="px-4 mx-3 mt-1 btn bg-white rounded-md">
<span>See More</span>
</button>
</Link>
</div>
</div>
</div>
</div>
</div>
);
Expand Down
Loading

0 comments on commit bbfae71

Please sign in to comment.