merge

EleutherAI · Feb 24, 2024 · 29d60d5 · 29d60d5
2 parents a9bd129 + f78e2da
commit 29d60d5
Show file tree

Hide file tree

Showing 4 changed files with 65 additions and 26 deletions.
diff --git a/docs/interface.md b/docs/interface.md
@@ -48,6 +48,8 @@ This mode supports a number of command-line arguments, the details of which can
 
 * `--seed`: Set seed for python's random, numpy and torch.  Accepts a comma-separated list of 3 values for python's random, numpy, and torch seeds, respectively, or a single integer to set the same seed for all three.  The values are either an integer or 'None' to not set the seed. Default is `0,1234,1234` (for backward compatibility).  E.g. `--seed 0,None,8` sets `random.seed(0)` and `torch.manual_seed(8)`. Here numpy's seed is not set since the second value is `None`.  E.g, `--seed 42` sets all three seeds to 42.
 
+* `--wandb_args`:  Tracks logging to Weights and Biases for evaluation runs and includes args passed to `wandb.init`, such as `project` and `job_type`. Full list (here.)[https://docs.wandb.ai/ref/python/init]
+
 ## External Library Usage
 
 We also support using the library's external API for use within model training loops or other scripts.

diff --git a/lm_eval/evaluator.py b/lm_eval/evaluator.py
@@ -19,10 +19,10 @@
     print_writeout,
     run_task_tests,
 )
+from lm_eval.logging_utils import add_env_info, get_git_commit_hash
 from lm_eval.tasks import TaskManager, get_task_dict
 from lm_eval.utils import (
     eval_logger,
-    get_git_commit_hash,
     positional_deprecated,
     simple_parse_args_string,
 )
@@ -233,6 +233,7 @@ def simple_evaluate(
             "gen_kwargs": gen_kwargs,
         }
         results["git_hash"] = get_git_commit_hash()
+        add_env_info(results)  # additional environment info to results
         return results
     else:
         return None

diff --git a/lm_eval/logging_utils.py b/lm_eval/logging_utils.py
@@ -1,14 +1,19 @@
 import copy
 import json
 import logging
+import os
 import re
-from typing import Any, Dict, List, Literal, Tuple, Union
+import subprocess
+from pathlib import Path
+from typing import Any, Dict, List, Literal, Optional, Tuple, Union
 
 import numpy as np
 import pandas as pd
 from packaging.version import Version
+from torch.utils.collect_env import get_pretty_env_info
+from transformers import __version__ as trans_version
 
-from lm_eval import utils
+from lm_eval.utils import simple_parse_args_string
 
 
 logger = logging.getLogger(__name__)
@@ -85,9 +90,7 @@ def __init__(self, args: Any) -> None:
             results (Dict[str, Any]): The results dictionary.
             args (Any): Arguments for configuration.
         """
-        self.wandb_args: Dict[str, Any] = utils.simple_parse_args_string(
-            args.wandb_args
-        )
+        self.wandb_args: Dict[str, Any] = simple_parse_args_string(args.wandb_args)
 
         # initialize a W&B run
         if wandb.run is None:
@@ -384,3 +387,55 @@ def log_eval_samples(self, samples: Dict[str, List[Dict[str, Any]]]) -> None:
                 self._log_samples_as_artifact(eval_preds, task_name)
 
             self.run.log({f"{group}_eval_results": grouped_df})
+
+
+def get_commit_from_path(repo_path: Path) -> Optional[str]:
+    git_folder = Path(repo_path, ".git")
+    if git_folder.is_file():
+        git_folder = Path(
+            git_folder.parent,
+            git_folder.read_text(encoding="utf-8").split("\n")[0].split(" ")[-1],
+        )
+    if Path(git_folder, "HEAD").exists():
+        head_name = (
+            Path(git_folder, "HEAD")
+            .read_text(encoding="utf-8")
+            .split("\n")[0]
+            .split(" ")[-1]
+        )
+        head_ref = Path(git_folder, head_name)
+        git_hash = head_ref.read_text(encoding="utf-8").replace("\n", "")
+    else:
+        git_hash = None
+    return git_hash
+
+
+def get_git_commit_hash():
+    """
+    Gets the git commit hash of your current repo (if it exists).
+    Source: https://github.com/EleutherAI/gpt-neox/blob/b608043be541602170bfcfb8ec9bf85e8a0799e0/megatron/neox_arguments/neox_args.py#L42
+    """
+    try:
+        git_hash = subprocess.check_output(["git", "describe", "--always"]).strip()
+        git_hash = git_hash.decode()
+    except (subprocess.CalledProcessError, FileNotFoundError):
+        # FileNotFoundError occurs when git not installed on system
+        git_hash = get_commit_from_path(os.getcwd())  # git hash of repo if exists
+    return git_hash
+
+
+def add_env_info(storage: Dict[str, Any]):
+    try:
+        pretty_env_info = get_pretty_env_info()
+    except Exception as err:
+        pretty_env_info = str(err)
+    transformers_version = trans_version
+    upper_dir_commit = get_commit_from_path(
+        Path(os.getcwd(), "..")
+    )  # git hash of upper repo if exists
+    added_info = {
+        "pretty_env_info": pretty_env_info,
+        "transformers_version": transformers_version,
+        "upper_git_hash": upper_dir_commit,  # in case this repo is submodule
+    }
+    storage.update(added_info)
diff --git a/lm_eval/utils.py b/lm_eval/utils.py
@@ -6,13 +6,8 @@
 import logging
 import os
 import re
-import subprocess
 from itertools import islice
-from typing import (
-    Any,
-    Callable,
-    List,
-)
+from typing import Any, Callable, List
 
 import yaml
 from jinja2 import BaseLoader, Environment, StrictUndefined
@@ -288,20 +283,6 @@ def _wrapper(*args, **kwargs):
     return _wrapper
 
 
-def get_git_commit_hash():
-    """
-    Gets the git commit hash of your current repo (if it exists).
-    Source: https://github.com/EleutherAI/gpt-neox/blob/b608043be541602170bfcfb8ec9bf85e8a0799e0/megatron/neox_arguments/neox_args.py#L42
-    """
-    try:
-        git_hash = subprocess.check_output(["git", "describe", "--always"]).strip()
-        git_hash = git_hash.decode()
-    except subprocess.CalledProcessError or FileNotFoundError:
-        # FileNotFoundError occurs when git not installed on system
-        git_hash = None
-    return git_hash
-
-
 def ignore_constructor(loader, node):
     return node