Skip to content

Commit

Permalink
merge
Browse files Browse the repository at this point in the history
  • Loading branch information
baberabb committed Feb 24, 2024
2 parents a9bd129 + f78e2da commit 29d60d5
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 26 deletions.
2 changes: 2 additions & 0 deletions docs/interface.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ This mode supports a number of command-line arguments, the details of which can

* `--seed`: Set seed for python's random, numpy and torch. Accepts a comma-separated list of 3 values for python's random, numpy, and torch seeds, respectively, or a single integer to set the same seed for all three. The values are either an integer or 'None' to not set the seed. Default is `0,1234,1234` (for backward compatibility). E.g. `--seed 0,None,8` sets `random.seed(0)` and `torch.manual_seed(8)`. Here numpy's seed is not set since the second value is `None`. E.g, `--seed 42` sets all three seeds to 42.

* `--wandb_args`: Tracks logging to Weights and Biases for evaluation runs and includes args passed to `wandb.init`, such as `project` and `job_type`. Full list (here.)[https://docs.wandb.ai/ref/python/init]

## External Library Usage

We also support using the library's external API for use within model training loops or other scripts.
Expand Down
3 changes: 2 additions & 1 deletion lm_eval/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@
print_writeout,
run_task_tests,
)
from lm_eval.logging_utils import add_env_info, get_git_commit_hash
from lm_eval.tasks import TaskManager, get_task_dict
from lm_eval.utils import (
eval_logger,
get_git_commit_hash,
positional_deprecated,
simple_parse_args_string,
)
Expand Down Expand Up @@ -233,6 +233,7 @@ def simple_evaluate(
"gen_kwargs": gen_kwargs,
}
results["git_hash"] = get_git_commit_hash()
add_env_info(results) # additional environment info to results
return results
else:
return None
Expand Down
65 changes: 60 additions & 5 deletions lm_eval/logging_utils.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
import copy
import json
import logging
import os
import re
from typing import Any, Dict, List, Literal, Tuple, Union
import subprocess
from pathlib import Path
from typing import Any, Dict, List, Literal, Optional, Tuple, Union

import numpy as np
import pandas as pd
from packaging.version import Version
from torch.utils.collect_env import get_pretty_env_info
from transformers import __version__ as trans_version

from lm_eval import utils
from lm_eval.utils import simple_parse_args_string


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -85,9 +90,7 @@ def __init__(self, args: Any) -> None:
results (Dict[str, Any]): The results dictionary.
args (Any): Arguments for configuration.
"""
self.wandb_args: Dict[str, Any] = utils.simple_parse_args_string(
args.wandb_args
)
self.wandb_args: Dict[str, Any] = simple_parse_args_string(args.wandb_args)

# initialize a W&B run
if wandb.run is None:
Expand Down Expand Up @@ -384,3 +387,55 @@ def log_eval_samples(self, samples: Dict[str, List[Dict[str, Any]]]) -> None:
self._log_samples_as_artifact(eval_preds, task_name)

self.run.log({f"{group}_eval_results": grouped_df})


def get_commit_from_path(repo_path: Path) -> Optional[str]:
git_folder = Path(repo_path, ".git")
if git_folder.is_file():
git_folder = Path(
git_folder.parent,
git_folder.read_text(encoding="utf-8").split("\n")[0].split(" ")[-1],
)
if Path(git_folder, "HEAD").exists():
head_name = (
Path(git_folder, "HEAD")
.read_text(encoding="utf-8")
.split("\n")[0]
.split(" ")[-1]
)
head_ref = Path(git_folder, head_name)
git_hash = head_ref.read_text(encoding="utf-8").replace("\n", "")
else:
git_hash = None
return git_hash


def get_git_commit_hash():
"""
Gets the git commit hash of your current repo (if it exists).
Source: https://github.com/EleutherAI/gpt-neox/blob/b608043be541602170bfcfb8ec9bf85e8a0799e0/megatron/neox_arguments/neox_args.py#L42
"""
try:
git_hash = subprocess.check_output(["git", "describe", "--always"]).strip()
git_hash = git_hash.decode()
except (subprocess.CalledProcessError, FileNotFoundError):
# FileNotFoundError occurs when git not installed on system
git_hash = get_commit_from_path(os.getcwd()) # git hash of repo if exists
return git_hash


def add_env_info(storage: Dict[str, Any]):
try:
pretty_env_info = get_pretty_env_info()
except Exception as err:
pretty_env_info = str(err)
transformers_version = trans_version
upper_dir_commit = get_commit_from_path(
Path(os.getcwd(), "..")
) # git hash of upper repo if exists
added_info = {
"pretty_env_info": pretty_env_info,
"transformers_version": transformers_version,
"upper_git_hash": upper_dir_commit, # in case this repo is submodule
}
storage.update(added_info)
21 changes: 1 addition & 20 deletions lm_eval/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,8 @@
import logging
import os
import re
import subprocess
from itertools import islice
from typing import (
Any,
Callable,
List,
)
from typing import Any, Callable, List

import yaml
from jinja2 import BaseLoader, Environment, StrictUndefined
Expand Down Expand Up @@ -288,20 +283,6 @@ def _wrapper(*args, **kwargs):
return _wrapper


def get_git_commit_hash():
"""
Gets the git commit hash of your current repo (if it exists).
Source: https://github.com/EleutherAI/gpt-neox/blob/b608043be541602170bfcfb8ec9bf85e8a0799e0/megatron/neox_arguments/neox_args.py#L42
"""
try:
git_hash = subprocess.check_output(["git", "describe", "--always"]).strip()
git_hash = git_hash.decode()
except subprocess.CalledProcessError or FileNotFoundError:
# FileNotFoundError occurs when git not installed on system
git_hash = None
return git_hash


def ignore_constructor(loader, node):
return node

Expand Down

0 comments on commit 29d60d5

Please sign in to comment.