Skip to content

Commit

Permalink
add more logs
Browse files Browse the repository at this point in the history
  • Loading branch information
mvpatel2000 committed Sep 23, 2024
1 parent a8d0e0e commit 40cd087
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 1 deletion.
3 changes: 3 additions & 0 deletions composer/trainer/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1902,13 +1902,16 @@ def __init__(
log.info('No previous autoresume checkpoint found')
# Actually load the checkpoint from potentially updated arguments
if load_path is not None:
log.info(f'Loading checkpoint from {load_path}')
if load_object_store is None:
load_object_store = maybe_create_object_store_from_uri(load_path)
log.debug(f'Created object store from load path: {load_object_store}')
if isinstance(load_object_store, WandBLogger):
import wandb
if wandb.run is None:
load_object_store.init(self.state, self.logger)
_, _, parsed_load_path = parse_uri(load_path)
log.debug(f'Parsed load path: {parsed_load_path}')

self._rng_state = checkpoint.load_checkpoint(
state=self.state,
Expand Down
2 changes: 1 addition & 1 deletion composer/utils/checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,7 @@ def load_checkpoint(
:attr:`load_weights_only` is not None. Otherwise, None.
"""
path = partial_format(path, run_name=state.run_name)
log.debug(f'Loading checkpoint from formatted path: {path}')

if state.fsdp_sharded_state_dict_enabled:
rng_state_dicts = load_sharded_checkpoint(
Expand All @@ -530,7 +531,6 @@ def load_checkpoint(
)
else:
# Download the checkpoint to the node-local folder
log.debug('Loading checkpoint at %s', path)
# Each node gets one unique folder to store checkpoints that is shared amongst all local ranks in that node.
# If fsdp sharded state_dicts is enabled then EVERY rank gets a unique checkpoint folder.
needs_unique_checkpoint_folder = state.fsdp_sharded_state_dict_enabled or dist.get_local_rank() == 0
Expand Down

0 comments on commit 40cd087

Please sign in to comment.