Skip to content

Commit

Permalink
remove barrier prints
Browse files Browse the repository at this point in the history
  • Loading branch information
FabianIsensee committed Jan 9, 2024
1 parent 8cb4084 commit d4a9817
Showing 1 changed file with 0 additions and 2 deletions.
2 changes: 0 additions & 2 deletions nnunetv2/training/nnUNetTrainer/nnUNetTrainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1151,7 +1151,6 @@ def perform_actual_validation(self, save_probabilities: bool = False):
_, val_keys = self.do_split()
if self.is_ddp:
last_barrier_at_idx = len(val_keys) // dist.get_world_size() - 1
print(f'last barrier at idx {last_barrier_at_idx}')

val_keys = val_keys[self.local_rank:: dist.get_world_size()]
# we cannot just have barriers all over the place because the number of keys each GPU receives can be
Expand Down Expand Up @@ -1240,7 +1239,6 @@ def perform_actual_validation(self, save_probabilities: bool = False):
))
# if we don't barrier from time to time we will get nccl timeouts for large datsets. Yuck.

Check failure on line 1240 in nnunetv2/training/nnUNetTrainer/nnUNetTrainer.py

View workflow job for this annotation

GitHub Actions / Check for spelling errors

datsets ==> datasets
if self.is_ddp and i < last_barrier_at_idx and (i + 1) % 20 == 0:
print(f'index {i}. Barrier rank {self.local_rank}')
dist.barrier()

_ = [r.get() for r in results]
Expand Down

0 comments on commit d4a9817

Please sign in to comment.