From fea4a88002639bec3498e9a592d4e1a35814db9b Mon Sep 17 00:00:00 2001 From: Irene Dea Date: Wed, 11 Sep 2024 06:30:09 -0700 Subject: [PATCH] Remove tensor option for _global_exception_occured (#3611) Co-authored-by: Mihir Patel --- composer/loggers/mlflow_logger.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/composer/loggers/mlflow_logger.py b/composer/loggers/mlflow_logger.py index 660c315c8e..3da777a0ec 100644 --- a/composer/loggers/mlflow_logger.py +++ b/composer/loggers/mlflow_logger.py @@ -312,10 +312,7 @@ def init(self, state: State, logger: Logger) -> None: if self.run_name is None: self.run_name = state.run_name - if hasattr(state, 'device'): - self._global_exception_occurred = state.device.tensor_to_device(torch.tensor([0], dtype=torch.uint8),) - else: - self._global_exception_occurred = 0 + self._global_exception_occurred = 0 # Store the Composer run name in the MLFlow run tags so it can be retrieved for autoresume self.tags['run_name'] = os.environ.get('RUN_NAME', state.run_name) @@ -615,10 +612,7 @@ def post_close(self): if hasattr(self, 'monitor_process'): # Check if there is an uncaught exception, which means `post_close()` is triggered # due to program crash. - if isinstance(self._global_exception_occurred, torch.Tensor): - finish_with_exception = (self._global_exception_occurred == 1).item() - else: - finish_with_exception = (self._global_exception_occurred == 1) + finish_with_exception = self._global_exception_occurred == 1 if finish_with_exception: self.monitor_process.crash() return