Skip to content

Commit

Permalink
add expandable segments var (#775)
Browse files Browse the repository at this point in the history
* adding new notebook for using fairchem models with NEBs without CatTSunami enumeration (#764)

* adding new notebook for using fairchem models with NEBs

* adding md tutorials

* blocking code cells that arent needed or take too long

* add expandable segments var

* add note

---------

Co-authored-by: Brook Wander <[email protected]>
Co-authored-by: Muhammed Shuaibi <[email protected]>
  • Loading branch information
3 people committed Jul 22, 2024
1 parent 0a6f62f commit bceb1a5
Showing 1 changed file with 13 additions and 0 deletions.
13 changes: 13 additions & 0 deletions src/fairchem/core/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@
from torch.nn.modules.module import _IncompatibleKeys


DEFAULT_ENV_VARS = {
# Expandable segments is a new cuda feature that helps with memory fragmentation during frequent allocations (ie: in the case of variable batch sizes).
# see https://pytorch.org/docs/stable/notes/cuda.html.
"PYTORCH_CUDA_ALLOC_CONF" : "expandable_segments:True",
}

# copied from https://stackoverflow.com/questions/33490870/parsing-yaml-in-python-detect-duplicated-keys
# prevents loading YAMLS where keys have been overwritten
class UniqueKeyLoader(yaml.SafeLoader):
Expand Down Expand Up @@ -953,6 +959,12 @@ def check_traj_files(batch, traj_dir) -> bool:
return all(fl.exists() for fl in traj_files)


def setup_env_vars() -> None:
for k, v in DEFAULT_ENV_VARS.items():
os.environ[k] = v
logging.info(f"Setting env {k}={v}")


@contextmanager
def new_trainer_context(*, config: dict[str, Any], distributed: bool = False):
from fairchem.core.common import distutils, gp_utils
Expand All @@ -969,6 +981,7 @@ class _TrainingContext:
trainer: BaseTrainer

setup_logging()
setup_env_vars()
original_config = config
config = copy.deepcopy(original_config)

Expand Down

0 comments on commit bceb1a5

Please sign in to comment.