From b082511496b2946c88b868fe8840d4502a771467 Mon Sep 17 00:00:00 2001 From: Cheng Li Date: Tue, 27 Feb 2024 09:30:59 -0800 Subject: [PATCH] add oom observer callback (#932) * add oom observer callback * fix format --- llmfoundry/utils/builders.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/llmfoundry/utils/builders.py b/llmfoundry/utils/builders.py index 9f799cb547..b4d4143006 100644 --- a/llmfoundry/utils/builders.py +++ b/llmfoundry/utils/builders.py @@ -12,8 +12,9 @@ import torch from composer import algorithms from composer.callbacks import (EarlyStopper, Generate, LRMonitor, - MemoryMonitor, MemorySnapshot, OptimizerMonitor, - RuntimeEstimator, SpeedMonitor) + MemoryMonitor, MemorySnapshot, OOMObserver, + OptimizerMonitor, RuntimeEstimator, + SpeedMonitor) from composer.core import Algorithm, Callback, Evaluator from composer.datasets.in_context_learning_evaluation import \ get_icl_task_dataloader @@ -165,6 +166,8 @@ def build_callback( return LRMonitor() elif name == 'memory_monitor': return MemoryMonitor() + elif name == 'oom_observer': + return OOMObserver(**kwargs) elif name == 'memory_snapshot': return MemorySnapshot(**kwargs) elif name == 'speed_monitor':