Skip to content

Commit

Permalink
add check_overflow even no loss scale enable
Browse files Browse the repository at this point in the history
  • Loading branch information
MayDomine committed Jul 8, 2024
1 parent 88601be commit 053eee5
Showing 1 changed file with 6 additions and 0 deletions.
6 changes: 6 additions & 0 deletions bmtrain/optim/optim_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,12 @@ def step(self):
self.zero_grad()
return
for optimizer, lr_scheduler in zip(self.optimizers, self.lr_schedulers):
try:
check_overflow(optimizer.param_groups)
except OverflowError:
has_overflow = True
print_rank("Gradient overflow, change scale from %lf to %lf" % (self.loss_scale, self.loss_scale / self.loss_scale_factor))
break
if hasattr(optimizer, "_bmtrain_optimizer") and optimizer._bmtrain_optimizer:
optimizer.step(scale=self.loss_scale)
else:
Expand Down

0 comments on commit 053eee5

Please sign in to comment.