From 0039fd544ed11b2a424a226df36f1036950a2a13 Mon Sep 17 00:00:00 2001 From: nikosChalk Date: Fri, 22 Jul 2022 19:32:24 +0300 Subject: [PATCH] shuffling of run table --- .../Config/Models/RunTableModel.py | 20 ++++- .../Experiment/ExperimentController.py | 22 +++-- test-standalone/core/shuffling/Crasher.py | 26 ++++++ .../core/shuffling/RunnerConfig.py | 90 +++++++++++++++++++ test-standalone/core/shuffling/Validator.py | 26 ++++++ test-standalone/runner.sh | 1 + .../Config/Models/test_FactorModel.py | 4 +- .../Config/Models/test_RunTableModel.py | 32 ++++++- 8 files changed, 208 insertions(+), 13 deletions(-) create mode 100644 test-standalone/core/shuffling/Crasher.py create mode 100644 test-standalone/core/shuffling/RunnerConfig.py create mode 100644 test-standalone/core/shuffling/Validator.py diff --git a/experiment-runner/ConfigValidator/Config/Models/RunTableModel.py b/experiment-runner/ConfigValidator/Config/Models/RunTableModel.py index 0a7ffc3e..dd1a24c1 100644 --- a/experiment-runner/ConfigValidator/Config/Models/RunTableModel.py +++ b/experiment-runner/ConfigValidator/Config/Models/RunTableModel.py @@ -1,6 +1,8 @@ import itertools +import random from typing import Dict, List, Tuple +from ConfigValidator.CustomErrors.BaseError import BaseError from ExtendedTyping.Typing import SupportsStr from ProgressManager.RunTable.Models.RunProgress import RunProgress from ConfigValidator.Config.Models.FactorModel import FactorModel @@ -10,16 +12,24 @@ class RunTableModel: def __init__(self, factors: List[FactorModel], exclude_variations: List[Dict[FactorModel, List[SupportsStr]]] = None, - data_columns: List[str] = None + data_columns: List[str] = None, + shuffle: bool = False ): if exclude_variations is None: exclude_variations = {} if data_columns is None: data_columns = [] - # TODO: Prevent duplicate factors with the same name + + if len(set([factor.factor_name for factor in factors])) != len(factors): + raise BaseError("Duplicate factor name detected!") + + if len(set(data_columns)) != len(data_columns): + raise BaseError("Duplicate data column detected!") + self.__factors = factors self.__exclude_variations = exclude_variations self.__data_columns = data_columns + self.__shuffle = shuffle def get_factors(self) -> List[FactorModel]: return self.__factors @@ -72,8 +82,10 @@ def __filter_list(full_list: List[Tuple]): row_list.insert(1, RunProgress.TODO) # __done if self.__data_columns: - for data_column in self.__data_columns: + for _ in self.__data_columns: row_list.append(" ") - experiment_run_table.append(dict(zip(column_names, row_list))) + + if self.__shuffle: + random.shuffle(experiment_run_table) return experiment_run_table diff --git a/experiment-runner/ExperimentOrchestrator/Experiment/ExperimentController.py b/experiment-runner/ExperimentOrchestrator/Experiment/ExperimentController.py index a2a9590c..73130afa 100644 --- a/experiment-runner/ExperimentOrchestrator/Experiment/ExperimentController.py +++ b/experiment-runner/ExperimentOrchestrator/Experiment/ExperimentController.py @@ -73,22 +73,32 @@ def __init__(self, config: RunnerConfig, metadata: Metadata): self.json_data_manager.write_metadata(self.metadata) self.restarted = True + assert(len(existing_run_table) == len(self.run_table)) + + # Re-order the generated run table to match the already existing one + tmp_run_table = [] + for existing_var in existing_run_table: + for generated_var in self.run_table: + if existing_var['__run_id'] == generated_var['__run_id']: + tmp_run_table.append(generated_var) + break + self.run_table = tmp_run_table + for existing_var, generated_var in zip(existing_run_table, self.run_table): + assert(existing_var['__run_id'] == generated_var['__run_id']) # Fill in the run_table. # Note that the stored run_table has only a str() representation of the factor treatment levels. # The generated one can have arbitrary python objects. - for i, variation in enumerate(existing_run_table): - upd_variation = self.run_table[i] # variation that will be updated - assert (i == int(variation['__run_id'][4:])) - assert (i == int(upd_variation['__run_id'][4:])) + for existing_var, generated_var in zip(existing_run_table, self.run_table): + assert (existing_var['__run_id'] == generated_var['__run_id']) for k in map(lambda factor: factor.factor_name, self.config.run_table_model.get_factors()): # treatment levels remain the same - assert (str(upd_variation[k]) == str(variation[k])) + assert (str(generated_var[k]) == str(existing_var[k])) for k in set(self.config.run_table_model.get_data_columns()).union( ['__done']): # update data columns and __done column - upd_variation[k] = variation[k] + generated_var[k] = existing_var[k] output.console_log_WARNING(">> WARNING << -- Experiment is restarted!") if not self.restarted: diff --git a/test-standalone/core/shuffling/Crasher.py b/test-standalone/core/shuffling/Crasher.py new file mode 100644 index 00000000..5e759392 --- /dev/null +++ b/test-standalone/core/shuffling/Crasher.py @@ -0,0 +1,26 @@ + +from copy import deepcopy +import shutil +from ConfigValidator.Config.RunnerConfig import RunnerConfig as OriginalRunnerConfig +from ProgressManager.Output.CSVOutputManager import CSVOutputManager +from ProgressManager.RunTable.Models.RunProgress import RunProgress + +import TestUtilities + +if __name__ == '__main__': + TEST_DIR = TestUtilities.get_test_dir(__file__) + + config_file = TestUtilities.load_and_get_config_file_as_module(TEST_DIR) + RunnerConfig: OriginalRunnerConfig = config_file.RunnerConfig + + csv_data_manager = CSVOutputManager(RunnerConfig.results_output_path / RunnerConfig.name) + run_table = csv_data_manager.read_run_table() + + # keep old successful run table for comparison in the validator + shutil.move(csv_data_manager._experiment_path / 'run_table.csv', csv_data_manager._experiment_path / 'run_table.old.csv') + + for row in run_table: + if row['__run_id'] in ['run_2', 'run_5']: + row['__done'] = RunProgress.TODO + row['avg_cpu'] = 0 + csv_data_manager.write_run_table(run_table) diff --git a/test-standalone/core/shuffling/RunnerConfig.py b/test-standalone/core/shuffling/RunnerConfig.py new file mode 100644 index 00000000..16095eb1 --- /dev/null +++ b/test-standalone/core/shuffling/RunnerConfig.py @@ -0,0 +1,90 @@ +from EventManager.Models.RunnerEvents import RunnerEvents +from EventManager.EventSubscriptionController import EventSubscriptionController +from ConfigValidator.Config.Models.RunTableModel import RunTableModel +from ConfigValidator.Config.Models.FactorModel import FactorModel +from ConfigValidator.Config.Models.RunnerContext import RunnerContext +from ConfigValidator.Config.Models.OperationType import OperationType +from ExtendedTyping.Typing import SupportsStr +from ProgressManager.Output.OutputProcedure import OutputProcedure as output + +from typing import Dict, List, Any, Optional +from pathlib import Path +from os.path import dirname, realpath + +''' +Test Description: + +Test functionality for shuffling + * When recovering from a crash, the order of the run table should remain the same +''' + +class RunnerConfig: + ROOT_DIR = Path(dirname(realpath(__file__))) + + # ================================ USER SPECIFIC CONFIG ================================ + name: str = "new_runner_experiment" + results_output_path: Path = ROOT_DIR / 'experiments' + operation_type: OperationType = OperationType.AUTO + time_between_runs_in_ms: int = 100 + + def __init__(self): + """Executes immediately after program start, on config load""" + + EventSubscriptionController.subscribe_to_multiple_events([ + (RunnerEvents.BEFORE_EXPERIMENT, self.before_experiment), + (RunnerEvents.BEFORE_RUN , self.before_run ), + (RunnerEvents.START_RUN , self.start_run ), + (RunnerEvents.START_MEASUREMENT, self.start_measurement), + (RunnerEvents.INTERACT , self.interact ), + (RunnerEvents.STOP_MEASUREMENT , self.stop_measurement ), + (RunnerEvents.STOP_RUN , self.stop_run ), + (RunnerEvents.POPULATE_RUN_DATA, self.populate_run_data), + (RunnerEvents.AFTER_EXPERIMENT , self.after_experiment ) + ]) + self.run_table_model = None # Initialized later + + output.console_log("Custom config loaded") + + def create_run_table_model(self) -> RunTableModel: + factor1 = FactorModel("example_factor1", ["level1", "level2", "level3"]) + factor2 = FactorModel("example_factor2", [True, False]) + self.run_table_model = RunTableModel( + factors=[factor1, factor2], + data_columns=['avg_cpu', 'avg_mem'], + shuffle=True + ) + return self.run_table_model + + def before_experiment(self) -> None: + output.console_log("Config.before_experiment() called!") + + def before_run(self) -> None: + output.console_log("Config.before_run() called!") + + def start_run(self, context: RunnerContext) -> None: + output.console_log("Config.start_run() called!") + + def start_measurement(self, context: RunnerContext) -> None: + output.console_log("Config.start_measurement() called!") + + def interact(self, context: RunnerContext) -> None: + output.console_log("Config.interact() called!") + + def stop_measurement(self, context: RunnerContext) -> None: + output.console_log("Config.stop_measurement called!") + + def stop_run(self, context: RunnerContext) -> None: + output.console_log("Config.stop_run() called!") + + def populate_run_data(self, context: RunnerContext) -> Optional[Dict[str, SupportsStr]]: + output.console_log("Config.populate_run_data() called!") + return { + 'avg_cpu': 13, + 'avg_mem': 18.1 + } + + def after_experiment(self) -> None: + output.console_log("Config.after_experiment() called!") + + # ================================ DO NOT ALTER BELOW THIS LINE ================================ + experiment_path: Path = None diff --git a/test-standalone/core/shuffling/Validator.py b/test-standalone/core/shuffling/Validator.py new file mode 100644 index 00000000..70696c6c --- /dev/null +++ b/test-standalone/core/shuffling/Validator.py @@ -0,0 +1,26 @@ + +import csv + +from ConfigValidator.Config.RunnerConfig import RunnerConfig as OriginalRunnerConfig +from ProgressManager.Output.CSVOutputManager import CSVOutputManager +from ProgressManager.RunTable.Models.RunProgress import RunProgress + +import TestUtilities + +if __name__ == 'main': + TEST_DIR = TestUtilities.get_test_dir(__file__) + + config_file = TestUtilities.load_and_get_config_file_as_module(TEST_DIR) + RunnerConfig: OriginalRunnerConfig = config_file.RunnerConfig + + with open(RunnerConfig.results_output_path / RunnerConfig.name / 'run_table.old.csv') as f: + old = f.read() # this is before the crash. + with open(RunnerConfig.results_output_path / RunnerConfig.name / 'run_table.csv') as f: + new = f.read() + assert(old == new) + + csv_data_manager = CSVOutputManager(RunnerConfig.results_output_path / RunnerConfig.name) + run_table = csv_data_manager.read_run_table() + for row in run_table: + assert(row['__done']) == RunProgress.DONE.name + assert(int(row['avg_cpu'])) == 13 diff --git a/test-standalone/runner.sh b/test-standalone/runner.sh index cc6a6191..e394ed09 100755 --- a/test-standalone/runner.sh +++ b/test-standalone/runner.sh @@ -15,6 +15,7 @@ fi set -e tests=( # TODO: gather_tests recursively + "${PROJECT_DIR}/test-standalone/core/shuffling" "${PROJECT_DIR}/test-standalone/core/arbitrary-objects" "${PROJECT_DIR}/test-standalone/plugins/CodecarbonWrapper/individual" "${PROJECT_DIR}/test-standalone/plugins/CodecarbonWrapper/combined" diff --git a/test/ConfigValidator/Config/Models/test_FactorModel.py b/test/ConfigValidator/Config/Models/test_FactorModel.py index 7e4a85f9..7e34298c 100644 --- a/test/ConfigValidator/Config/Models/test_FactorModel.py +++ b/test/ConfigValidator/Config/Models/test_FactorModel.py @@ -7,8 +7,8 @@ class TestFactorModelUniqueness(unittest.TestCase): def test_uniqueness(self): try: - factorModel = FactorModel('example_factore', [1, 2, 1]) - self.assertFalse() + factorModel = FactorModel('example_factor', [1, 2, 1]) + self.assert_(False) except BaseError: pass diff --git a/test/ConfigValidator/Config/Models/test_RunTableModel.py b/test/ConfigValidator/Config/Models/test_RunTableModel.py index 8367df0c..691f89b3 100644 --- a/test/ConfigValidator/Config/Models/test_RunTableModel.py +++ b/test/ConfigValidator/Config/Models/test_RunTableModel.py @@ -3,9 +3,38 @@ from ConfigValidator.Config.Models.FactorModel import FactorModel from ConfigValidator.Config.Models.RunTableModel import RunTableModel +from ConfigValidator.CustomErrors.BaseError import BaseError from ProgressManager.RunTable.Models.RunProgress import RunProgress +class TestRunTableModelDuplicateNames(unittest.TestCase): + + def test_duplicate_factor_names(self): + try: + RunTableModel( + factors=[ + FactorModel("example_factor1", ['example_treatment1', 'example_treatment2', 'example_treatment3']), + FactorModel("example_factor1", [True, False]), + ] + ) + self.assert_(False) + except BaseError: + pass + + def test_duplicate_data_columns(self): + try: + RunTableModel( + factors=[ + FactorModel("example_factor1", ['example_treatment1', 'example_treatment2', 'example_treatment3']), + FactorModel("example_factor2", [True, False]), + ], + data_columns=['data_col1', 'data_col2', 'data_col1'] + ) + self.assert_(False) + except BaseError: + pass + + class TestRunTableModelSimple(unittest.TestCase): def setUp(self): self.runTableModel = RunTableModel( @@ -13,7 +42,8 @@ def setUp(self): FactorModel("example_factor1", ['example_treatment1', 'example_treatment2', 'example_treatment3']), FactorModel("example_factor2", [True, False]), ], - data_columns=['avg_cpu', 'avg_mem'] + data_columns=['avg_cpu', 'avg_mem'], + shuffle=True ) def test_generate_experiment_run_table(self):