Skip to content

Commit

Permalink
enabling dampening of the train_completion with admin control
Browse files Browse the repository at this point in the history
  • Loading branch information
brandon-edwards committed Oct 26, 2024
1 parent b5b6fa4 commit 4e2aae1
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 1 deletion.
7 changes: 7 additions & 0 deletions examples/fl_post/fl/mlcube/workspace/training_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,19 @@ aggregator :
min: 10 # 10 seconds
max: 86400 # one day
value: 86400 # one day
train_completion_dampener: # train_completed -> (train_completed)**(train_completion_dampener)
admin_settable: True
min: 1e-2 # shifts non 0.0 completion rates much closer to 1.0
max: 1.0 # leaves completion rates as is
value: 1.0

aggregated_model_validation:
val_cutoff_time:
admin_settable: True
min: 10 # 10 seconds
max: 86400 # one day
value: 86400 # one day
weights_alpha: *weights_alpha


collaborator :
Expand Down
10 changes: 9 additions & 1 deletion examples/fl_post/fl/project/src/runner_nnunetv1.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def write_tensors_into_checkpoint(self, tensor_dict, with_opt_vars):
return epoch


def train(self, col_name, round_num, input_tensor_dict, epochs, val_cutoff_time, train_cutoff_time, **kwargs):
def train(self, col_name, round_num, input_tensor_dict, epochs, val_cutoff_time, train_cutoff_time, train_completion_dampener, **kwargs):
# TODO: Figure out the right name to use for this method and the default assigner
"""Perform training for a specified number of epochs."""

Expand All @@ -169,6 +169,14 @@ def train(self, col_name, round_num, input_tensor_dict, epochs, val_cutoff_time,
val_epoch=True,
train_epoch=True)

# dampen the train_completion
"""
values in range: (0, 1] with values near 0.0 making all train_completion rates shift nearer to 1.0, thus making the
trained model update weighting during aggregation stay closer to the plain data size weighting
specifically, update_weight = train_data_size / train_completed**train_completion_dampener
"""
train_completed = train_completed**train_completion_dampener

# update amount of task completed
self.task_completed['train'] = train_completed
self.task_completed['locally_tuned_model_validation'] = val_completed
Expand Down

0 comments on commit 4e2aae1

Please sign in to comment.