Merge branch 'main' of github.com:stanford-crfm/helm into DecodingTrust

AI-secure · Dec 20, 2023 · bbfae71 · bbfae71
2 parents d4e1695 + e3cf155
commit bbfae71
Show file tree

Hide file tree

Showing 130 changed files with 4,578 additions and 2,411 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -29,10 +29,12 @@ jobs:
           key: pip-${{ hashFiles('requirements.txt') }}-${{ matrix.python-version }}
           restore-keys: |
             pip-
-      - run: pip install -e .
-      - run: helm-run -h
-      - run: helm-summarize -h
-      - run: echo "Finished installation."
+      - run: python3 -m pip install --upgrade build
+      - run: python3 -m build
+      - run: python3 -m pip install dist/crfm_helm-*.whl
+      - run: helm-run --run-specs simple1:model=simple/model1 --max-eval-instances 10 --suite test
+      - run: helm-summarize --suite test
+      - run: helm-server --help
 
   test:
     name: Tests

diff --git a/.gitignore b/.gitignore
@@ -44,3 +44,5 @@ notes.otl
 
 # Miscellaneous
 .nfs*
+
+node_modules
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,3 +1,4 @@
 recursive-include src/helm/proxy/clients/ *.sp
 recursive-include src/helm/benchmark/ *.json
 recursive-include src/helm/benchmark/static/ *.css *.html *.js *.png *.yaml
+recursive-include src/helm/config/ *.yaml
diff --git a/docs/mkdocs_macros.py b/docs/mkdocs_macros.py
@@ -2,7 +2,7 @@
 from dataclasses import dataclass, field
 from typing import Optional, List
 
-from helm.benchmark.presentation.schema import read_schema, ModelField
+from helm.benchmark.presentation.schema import read_schema, SCHEMA_CLASSIC_YAML_FILENAME, ModelField
 from helm.benchmark.run_expander import RUN_EXPANDERS
 from helm.proxy.models import ALL_MODELS, Model
 
@@ -27,7 +27,8 @@ def from_model_field_and_model_object(model_field: ModelField, model_object: Opt
 def define_env(env):
     @env.macro
     def models_by_organization():
-        schema = read_schema()
+        # TODO: make this customizable
+        schema = read_schema(SCHEMA_CLASSIC_YAML_FILENAME)
         result = defaultdict(list)
 
         # Create dict name -> madel_object (ALL_MODELS)

diff --git a/docs/tutorial.md b/docs/tutorial.md
@@ -63,7 +63,7 @@ This reads the pre-existing files in `benchmark_output/runs/v1/` that were writt
 - `groups.json` contains a serialized list of `Table`, each containing information about groups in a group category.
 - `groups_metadata.json` contains a list of all the groups along with a human-readable description and a taxonomy.
 
-Additionally, for each group and group-relavent metric, it will output a pair of files: `benchmark_output/runs/v1/groups/latex/<group_name>_<metric_name>.tex` and `benchmark_output/runs/v1/groups/latex/<group_name>_<metric_name>.json`. These files contain the statistics for that metric from each run within the group.
+Additionally, for each group and group-relavent metric, it will output a pair of files: `benchmark_output/runs/v1/groups/latex/<group_name>_<metric_name>.tex` and `benchmark_output/runs/v1/groups/json/<group_name>_<metric_name>.json`. These files contain the statistics for that metric from each run within the group.
 
 <!--
 # TODO(#1441): Enable plots

diff --git a/requirements.txt b/requirements.txt
@@ -50,6 +50,7 @@ google-api-core==2.10.1
 google-api-python-client==2.64.0
 google-auth==2.12.0
 google-auth-httplib2==0.1.0
+google-cloud-aiplatform==1.36.4
 googleapis-common-protos==1.56.4
 greenlet==1.1.3
 gunicorn==20.1.0
@@ -67,6 +68,7 @@ jmespath==1.0.1
 joblib==1.2.0
 kiwisolver==1.4.4
 langcodes==3.3.0
+langdetect==1.0.9
 llvmlite==0.39.1
 lxml==4.9.1
 Mako==1.2.3

diff --git a/scripts/cache/copy_cache.py b/scripts/cache/copy_cache.py
@@ -14,7 +14,7 @@
 import os
 
 from sqlitedict import SqliteDict
-from helm.common.cache import _MongoKeyValueStore
+from helm.common.mongo_key_value_store import MongoKeyValueStore
 from helm.common.hierarchical_logger import hlog, htrack
 from typing import Optional
 
@@ -60,7 +60,7 @@ def copy_cache(
     hlog(f"Opening Sqlite cache {cache_path}")
     with SqliteDict(cache_path) as source_cache:
         hlog(f"Copying to MongoDB {mongo_host}")
-        with _MongoKeyValueStore(mongo_host, collection_name=organization) as target_cache:
+        with MongoKeyValueStore(mongo_host, collection_name=organization) as target_cache:
             for key, value in source_cache.items():
                 if not dry_run and (not range_start or num_items >= range_start):
                     try:

diff --git a/scripts/cache/fix_anthropic_cache.py b/scripts/cache/fix_anthropic_cache.py
@@ -8,7 +8,6 @@
 from helm.common.hierarchical_logger import hlog, htrack
 from helm.proxy.clients.anthropic_client import AnthropicLegacyClient
 from helm.proxy.retry import get_retry_decorator
-from helm.proxy.tokenizers.huggingface_tokenizer import HuggingFaceTokenizer
 
 
 """
@@ -48,9 +47,7 @@ def add_logprobs(mongo_uri: str, credentials_path: str, dry_run: bool):
         api_key: str = credentials["anthropicApiKey"]
 
     cache_config = MongoCacheConfig(mongo_uri, collection_name="anthropic")
-    client = AnthropicLegacyClient(
-        api_key=api_key, tokenizer=HuggingFaceTokenizer(cache_config), cache_config=cache_config
-    )
+    client = AnthropicLegacyClient(api_key=api_key, cache_config=cache_config)
 
     with create_key_value_store(cache_config) as cache:
         for i, (request, response) in enumerate(cache.get_all()):

diff --git a/scripts/compute_request_limits.py b/scripts/compute_request_limits.py
@@ -124,15 +124,16 @@ def figure_out_max_prompt_length(
 
 
 def figure_out_max_prompt_length_plus_tokens(
-    client: Any,  # Client,
+    client: AutoClient,
+    auto_tokenizer: AutoTokenizer,
     model_deployment_name: str,
     model_name: str,
     tokenizer_name: str,
     max_prompt_length: int,
     prefix: str = "",
     suffix: str = "",
 ) -> int:
-    tokenizer = client._get_tokenizer(tokenizer_name)
+    tokenizer = auto_tokenizer._get_tokenizer(tokenizer_name)
     lower_bound = 1
     upper_bound = 2 * max_prompt_length + 1
 
@@ -387,6 +388,7 @@ def main():
     print("========== Figure out max_prompt_length_plus_tokens ==========")
     max_prompt_length_plus_tokens: int = figure_out_max_prompt_length_plus_tokens(
         client,
+        auto_tokenizer,
         args.model_deployment_name,
         args.model_name,
         args.tokenizer_name,

diff --git a/scripts/offline_eval/export_requests.py b/scripts/offline_eval/export_requests.py
@@ -12,8 +12,8 @@
     MongoCacheConfig,
     SqliteCacheConfig,
     create_key_value_store,
-    request_to_key,
 )
+from helm.common.key_value_store import request_to_key
 from helm.common.hierarchical_logger import hlog, htrack, htrack_block
 from helm.proxy.clients.google_client import GoogleClient
 from helm.proxy.clients.together_client import TogetherClient

diff --git a/scripts/offline_eval/import_results.py b/scripts/offline_eval/import_results.py
@@ -9,8 +9,8 @@
     MongoCacheConfig,
     SqliteCacheConfig,
     create_key_value_store,
-    request_to_key,
 )
+from helm.common.key_value_store import request_to_key
 from helm.common.hierarchical_logger import hlog, htrack
 from .export_requests import SUPPORTED_ORGS
 

diff --git a/setup.cfg b/setup.cfg
@@ -38,8 +38,6 @@ install_requires=
     # Keep sqlitedict version at 1.7.0.
     sqlitedict~=1.7.0
     bottle~=0.12.23
-    # TODO: Remove these from common
-    pymongo~=4.2.0
 
     # Basic Scenarios
     datasets~=2.5.2
@@ -103,6 +101,9 @@ images =
     accelerate~=0.23.0  # For the newer versions of Transformers
     pillow~=9.4.0
 
+mongo =
+    pymongo~=4.2.0
+
 # Model extras
 aleph-alpha =
     aleph-alpha-client~=2.14.0
@@ -116,6 +117,9 @@ openai =
     openai~=0.27.8
     tiktoken~=0.3.3
 
+google =
+    google-cloud-aiplatform~=1.36.4
+
 tsinghua =
     icetk~=0.0.4
 
@@ -125,6 +129,7 @@ yandex =
 models =
     crfm-helm[aleph-alpha]
     crfm-helm[anthropic]
+    crfm-helm[google]
     crfm-helm[openai]
     crfm-helm[tsinghua]
     crfm-helm[yandex]
@@ -141,6 +146,7 @@ all =
     crfm-helm[cleva]
     crfm-helm[images]
     crfm-helm[models]
+    crfm-helm[mongo]
 
 # Development only
 # Do not include in all

diff --git a/src/helm-frontend/README.md b/src/helm-frontend/README.md
@@ -7,15 +7,15 @@ This app makes use of [React](https://react.dev/) + [TypeScript](https://www.typ
 ### Installation
 
 ```bash
-npm Install
+yarn install
 ```
 
 ### Develop
 
 This will open a development server
 
 ```bash
-npm run dev
+yarn dev
 ```
 
 You will also want to start `helm-server` locally as well. In the `src/helm` directory run the following
@@ -27,13 +27,13 @@ helm-server
 ### Testing
 
 ```
-npm run test
+yarn test
 ```
 
 ### Build
 
 ```bash
-npm run build
+yarn build
 ```
 
 ### Deployment
@@ -45,33 +45,13 @@ You can rename the build directory to the desired release name and upload it to
 ### Linting
 
 ```bash
-npm run lint
+yarn lint
 ```
 
 ### Formatting
 
 If you don't have prettier configured in your IDE or Node environment, you will have to run the following before commiting, in order to pass tests.
 
 ```bash
-npm run format
-```
-
-### Environment Variables
-
-Requires the following environment variables for development and deployment. In development these can be placed in a `.env.local` file with the following:
-
-```
-# The default location of local `helm-server`
-VITE_HELM_BENCHMARKS_ENDPOINT="http://localhost:8000/"
-# The suites available based on local runs
-VITE_HELM_BENCHMARKS_SUITE="v1"
-```
-
-This can instead be pointed to the public HELM data to avoid needing to run `helm-server` locally.
-
-```
-# Example
-VITE_HELM_BENCHMARKS_ENDPOINT="https://storage.googleapis.com/crfm-helm-public/"
-# Change to current version
-VITE_HELM_BENCHMARKS_SUITE="v0.2.3"
+yarn format
 ```
diff --git a/src/helm-frontend/public/config.js b/src/helm-frontend/public/config.js
@@ -1,4 +1,4 @@
 window.BENCHMARK_OUTPUT_BASE_URL =
-	"https://storage.googleapis.com/crfm-helm-public/";
+  "https://storage.googleapis.com/crfm-helm-public/lite/";
 window.SUITE = null;
-window.RELEASE = "v0.4.0";
+window.RELEASE = "v1.0.0";
diff --git a/src/helm-frontend/src/App.tsx b/src/helm-frontend/src/App.tsx
@@ -7,17 +7,17 @@ import Groups from "@/routes/Groups";
 import Group from "@/routes/Group";
 import Runs from "@/routes/Runs";
 import Run from "@/routes/Run";
-import Landing from "@/routes/Landing";
 import Leaderboard from "@/routes/Leaderboard";
+import Landing from "@/routes/Landing";
 
 export default function App() {
   return (
     <Router>
       <Routes>
         <Route path={`/`} element={<Layout />}>
           <Route index element={<Landing />} />
-          <Route path="models" element={<Models />} />
           <Route path="leaderboard" element={<Leaderboard />} />
+          <Route path="models" element={<Models />} />
           <Route path="scenarios" element={<Scenarios />} />
           <Route path="groups" element={<Groups />} />
           <Route path="groups/:groupName" element={<Group />} />

diff --git a/src/helm-frontend/src/assets/logos/together.png b/src/helm-frontend/src/assets/logos/together.png
diff --git a/src/helm-frontend/src/components/Footer.tsx b/src/helm-frontend/src/components/Footer.tsx
@@ -5,7 +5,7 @@ export default function Footer() {
   const version = getBenchmarkRelease();
   return (
     <div className="bottom-0 right-0 p-4 bg-white-800 text-black text-right">
-      <p>Release: {version}</p>
+      <p>Release {version}</p>
     </div>
   );
 }
diff --git a/src/helm-frontend/src/components/Hero.tsx b/src/helm-frontend/src/components/Hero.tsx
@@ -1,43 +1,50 @@
 import helmHero from "@/assets/helmhero.png";
 import { Link } from "react-router-dom";
+import MiniLeaderboard from "./MiniLeaderboard";
 
 export default function Hero() {
   return (
-    <div className="flex px-6 py-14">
-      {/* Left side content */}
-      <div className="flex-1 p-4 flex flex-col justify-center">
-        {" "}
-        {/* Added flex and justify-center */}
-        <div className="flex justify-start">
-          <h1 className="text-5xl mb-4 mx-4 mt-2">
-            <strong>
-              A holistic framework for evaluating foundation models.
-            </strong>
-          </h1>
-        </div>
-        <div className="flex justify-start mt-6 ml-4">
-          <Link to="leaderboard">
-            <button className="px-6 btn btn-grey rounded-md">
-              <body>Leaderboard</body>
-            </button>
-          </Link>
-          <Link to="https://github.com/stanford-crfm/helm" className="ml-4">
-            {" "}
-            {/* Added margin-left for spacing */}
-            <button className="px-6 btn btn-grey rounded-md">Github</button>
-          </Link>
-        </div>
+    <div className="flex flex-col px-4 sm:px-6 py-100 sm:py-10 sm:mb-96 md:mb-96 lg:mb-0 xl:mb-0 2xl:mb-0">
+      {/* Text section */}
+      <div className="flex flex-col text-center mb-10 justify-start">
+        <h1 className="text-3xl sm:text-4xl mb-3 sm:mb-4 mx-2 mt-2">
+          <strong>
+            A holistic framework for evaluating foundation models.
+          </strong>
+        </h1>
       </div>
 
-      {/* Right side image */}
-      <div className="w-1/3 mx-4">
-        {" "}
-        {/* Added mx-4 for horizontal margin */}
-        <img
-          src={helmHero}
-          alt="HELM Hero"
-          className="object-cover w-full h-full"
-        />
+      {/* Container for Image and Leaderboard */}
+      <div
+        className="flex flex-col md:flex-col lg:flex-row lg:justify-center"
+        style={{ height: "525px", transform: "scale(0.9)" }} // Reduced height by 10%
+      >
+        {/* Image section */}
+        <div className="w-full lg:w-1/2 flex justify-center mb-4 lg:mb-0 h-full py-10">
+          <img
+            src={helmHero}
+            alt="HELM Hero"
+            className="object-cover h-full" // Stretched to full height
+            style={{ maxWidth: "100%" }}
+          />
+        </div>
+
+        {/* Leaderboard section */}
+        <div className="w-full lg:w-1/2 flex justify-center h-full py-10">
+          <div
+            className="py-2 pb-6 rounded-3xl bg-gray-100 h-full" // Stretched to full height
+            style={{ maxWidth: "100%" }}
+          >
+            <MiniLeaderboard></MiniLeaderboard>
+            <div className="flex justify-end">
+              <Link to="leaderboard">
+                <button className="px-4 mx-3 mt-1 btn bg-white rounded-md">
+                  <span>See More</span>
+                </button>
+              </Link>
+            </div>
+          </div>
+        </div>
       </div>
     </div>
   );