From 4e2aae1e4c4ae1a9f073aeb6aa28252266a9bc75 Mon Sep 17 00:00:00 2001
From: "Edwards, Brandon" <brandon.edwards@intel.com>
Date: Fri, 25 Oct 2024 17:53:59 -0700
Subject: [PATCH] enabling dampening of the train_completion with admin control

---
 .../fl_post/fl/mlcube/workspace/training_config.yaml   |  7 +++++++
 examples/fl_post/fl/project/src/runner_nnunetv1.py     | 10 +++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/examples/fl_post/fl/mlcube/workspace/training_config.yaml b/examples/fl_post/fl/mlcube/workspace/training_config.yaml
index cb1bbf476..bd9daf629 100644
--- a/examples/fl_post/fl/mlcube/workspace/training_config.yaml
+++ b/examples/fl_post/fl/mlcube/workspace/training_config.yaml
@@ -24,12 +24,19 @@ aggregator :
           min: 10    # 10 seconds
           max: 86400 # one day
           value: 86400   # one day
+        train_completion_dampener: # train_completed -> (train_completed)**(train_completion_dampener)
+          admin_settable: True
+          min: 1e-2    # shifts non 0.0 completion rates much closer to 1.0
+          max: 1.0 # leaves completion rates as is
+          value: 1.0
+
       aggregated_model_validation:
         val_cutoff_time:
           admin_settable: True
           min: 10    # 10 seconds
           max: 86400 # one day
           value: 86400   # one day
+        weights_alpha: *weights_alpha
 
 
 collaborator :
diff --git a/examples/fl_post/fl/project/src/runner_nnunetv1.py b/examples/fl_post/fl/project/src/runner_nnunetv1.py
index db84b0acd..96cb257cc 100644
--- a/examples/fl_post/fl/project/src/runner_nnunetv1.py
+++ b/examples/fl_post/fl/project/src/runner_nnunetv1.py
@@ -143,7 +143,7 @@ def write_tensors_into_checkpoint(self, tensor_dict, with_opt_vars):
         return epoch
 
         
-    def train(self, col_name, round_num, input_tensor_dict, epochs, val_cutoff_time, train_cutoff_time, **kwargs):
+    def train(self, col_name, round_num, input_tensor_dict, epochs, val_cutoff_time, train_cutoff_time, train_completion_dampener, **kwargs):
         # TODO: Figure out the right name to use for this method and the default assigner
         """Perform training for a specified number of epochs."""
 
@@ -169,6 +169,14 @@ def train(self, col_name, round_num, input_tensor_dict, epochs, val_cutoff_time,
                                                       val_epoch=True,
                                                       train_epoch=True)
 
+        # dampen the train_completion
+        """
+        values in range: (0, 1] with values near 0.0 making all train_completion rates shift nearer to 1.0, thus making the
+        trained model update weighting during aggregation stay closer to the plain data size weighting
+        specifically, update_weight = train_data_size / train_completed**train_completion_dampener
+        """
+        train_completed = train_completed**train_completion_dampener
+
         # update amount of task completed
         self.task_completed['train'] = train_completed
         self.task_completed['locally_tuned_model_validation'] = val_completed