Skip to content

Commit

Permalink
Merge branch 'dev' into fix-autoresume-oom
Browse files Browse the repository at this point in the history
  • Loading branch information
bigning authored Apr 10, 2024
2 parents 379969b + 2a262b4 commit 2b0dfc3
Showing 1 changed file with 3 additions and 8 deletions.
11 changes: 3 additions & 8 deletions composer/trainer/mosaic_fsdp_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,16 +177,11 @@ def _get_process_group(pg, process_group_cache=None):
return process_group_cache[ranks]

log.info(f'Instantiating custom process groups with {ranks=} on rank={dist.get_global_rank()}.')

ranks_per_subgroup_list = list(set(dist.all_gather_object(ranks)))
(
current_group,
_subgroups,
) = distributed.distributed_c10d.new_subgroups_by_enumeration(ranks_per_subgroup_list)
process_group = distributed.new_group(ranks)

if process_group_cache is not None:
process_group_cache[ranks] = current_group
return current_group
process_group_cache[ranks] = process_group
return process_group


def _set_custom_fsdp_module_kwargs(module_kwargs: Dict, process_group_cache: Dict[Tuple[int], Any]) -> Dict:
Expand Down

0 comments on commit 2b0dfc3

Please sign in to comment.