Merge branch 'main' into atemerev/pypi-publish

BlueBrain · Mar 11, 2024 · 9b2742b · 9b2742b
2 parents 135ccef + 7232c5b
commit 9b2742b
Show file tree

Hide file tree

Showing 7 changed files with 105 additions and 16 deletions.
diff --git a/docs/architecture.rst b/docs/architecture.rst
@@ -396,8 +396,6 @@ Dry Run Memory Load Balancing
 
 The dry run mode also provides a memory load balancing feature. It helps balance the memory usage
 of the ranks of the simulation, so that the user does not incur easily in out-of-memory errors.
-At the moment the implementation is still WIP so the user can save the allocation data to a file
-but cannot import to use it in a real simulation.
 
 The workflow of the memory load balancing is as follows: for each cell in the circuit we have an
 estimate of both the memory load of the cell itself based on their METype and the amount of synapses
@@ -413,8 +411,17 @@ and fast. The algorithm is as follows:
 
 The user can specify the number of ranks to target using the `--num-target-ranks` flag in the CLI of neurodamus.
 The default value is 40. The allocation dictionary, containing the assignment of gids to ranks per each population,
-is then saved to the `allocation.pkl.gz` file in a pickled gzipped format. In the near future users will be able to
-import this data in any following simulation in order to improve the memory balance.
+is then saved to the `allocation.pkl.gz` file in a pickled gzipped format.
+
+Now that the `allocation.pkl.gz` has been generated, the user can load it in the main simulation and use it to load balance the
+simulation. The user can do this by using the `--lb-mode=Memory` flag in the CLI of neurodamus. During the execution
+Neurodamus will check if the amount of ranks used in the simulation is the same as the amount of ranks used in the
+dry run. If the amount of ranks is different, the user will be prompted to run a new dry run with the new amount of
+ranks. If the amount of ranks is the same, the allocation dictionary will be loaded and used to load balance the
+simulation.
+
+This way the exact gids that were assigned to each rank in the dry run will be assigned to the actual simulation,
+possibly avoiding out-of-memory errors.
 
 Development
 ------------

diff --git a/neurodamus/cell_distributor.py b/neurodamus/cell_distributor.py
@@ -201,11 +201,15 @@ def load_nodes(self, load_balancer=None, *, _loader=None, loader_opts=None):
         loader_f = (lambda *args: _loader(*args, **loader_opts)) if loader_opts else _loader
 
         logging.info("Reading Nodes (METype) info from '%s'", conf.CellLibraryFile)
-        if load_balancer and load_balancer.population != self._target_spec.population:
+        if load_balancer and \
+           hasattr(load_balancer, 'population') and \
+           load_balancer.population != self._target_spec.population:
             log_verbose("Load balance object doesn't apply to '%s'", self._target_spec.population)
             load_balancer = None
         if not load_balancer or SimConfig.dry_run:
             gidvec, me_infos, *cell_counts = self._load_nodes(loader_f)
+        elif load_balancer and SimConfig.loadbal_mode == LoadBalanceMode.Memory:
+            gidvec, me_infos, *cell_counts = self._load_nodes_balance_mem(loader_f, load_balancer)
         else:
             gidvec, me_infos, *cell_counts = self._load_nodes_balance(loader_f, load_balancer)
         self._local_nodes.add_gids(gidvec, me_infos)
@@ -244,6 +248,16 @@ def _load_nodes_balance(self, loader_f, load_balancer):
         gidvec, me_infos, full_size = loader_f(self._circuit_conf, all_gids)
         return gidvec, me_infos, total_cells, full_size
 
+    def _load_nodes_balance_mem(self, loader_f, load_balancer):
+        targetspec: TargetSpec = self._target_spec
+
+        population = targetspec.population
+        all_gids = load_balancer[population][MPI.rank]
+        logging.debug("Loading %d cells in rank %d", len(all_gids), MPI.rank)
+        total_cells = len(all_gids)
+        gidvec, me_infos, full_size = loader_f(self._circuit_conf, all_gids)
+        return gidvec, me_infos, total_cells, full_size
+
     # -
     def finalize(self, **opts):
         """Instantiates cells and initializes the network in the simulator.

diff --git a/neurodamus/commands.py b/neurodamus/commands.py
@@ -41,13 +41,15 @@ def neurodamus(args=None):
         --modelbuilding-steps=<number>
                                 Set the number of ModelBuildingSteps for the CoreNeuron sim
         --experimental-stims    Shall use only Python stimuli? [default: False]
-        --lb-mode=[RoundRobin, WholeCell, MultiSplit]
+        --lb-mode=[RoundRobin, WholeCell, MultiSplit, Memory]
                                 The Load Balance mode.
                                 - RoundRobin: Disable load balancing. Good for quick simulations
                                 - WholeCell: Does a first pass to compute load balancing and
                                     redistributes cells so that CPU load is similar among ranks
                                 - MultiSplit: Allows splitting cells into pieces for distribution.
                                     WARNING: This mode is incompatible with CoreNeuron
+                                - Memory: Load balance based on memory usage. By default, it uses
+                                    the "allocation.pkl.gz" file to load a pre-computed load balance
         --save=<PATH>           Path to create a save point to enable resume.
         --save-time=<TIME>      The simulation time [ms] to save the state. (Default: At the end)
         --restore=<PATH>        Restore and resume simulation from a save point on disk
@@ -56,7 +58,6 @@ def neurodamus(args=None):
         --model-stats           Show model stats in CoreNEURON simulations [default: False]
         --dry-run               Dry-run simulation to estimate memory usage [default: False]
         --num-target-ranks=<number>  Number of ranks to target for dry-run load balancing
-                                [default: 40]
     """
     options = docopt_sanitize(docopt(neurodamus.__doc__, args))
     config_file = options.pop("ConfigFile")

diff --git a/neurodamus/core/configuration.py b/neurodamus/core/configuration.py
@@ -75,7 +75,7 @@ class CliOptions(ConfigT):
     model_stats = False
     simulator = None
     dry_run = False
-    num_target_ranks = 40
+    num_target_ranks = None
 
     # Restricted Functionality support, mostly for testing
 
@@ -136,6 +136,7 @@ class LoadBalanceMode(Enum):
     RoundRobin = 0
     WholeCell = 1
     MultiSplit = 2
+    Memory = 3
 
     @classmethod
     def parse(cls, lb_mode):
@@ -149,7 +150,8 @@ def parse(cls, lb_mode):
             "roundrobin": cls.RoundRobin,
             "wholecell": cls.WholeCell,
             "loadbalance": cls.MultiSplit,
-            "multisplit": cls.MultiSplit
+            "multisplit": cls.MultiSplit,
+            "memory": cls.Memory
         }
         lb_mode_enum = _modes.get(lb_mode.lower())
         if lb_mode_enum is None:
@@ -234,7 +236,7 @@ class _SimConfig(object):
     spike_location = "soma"
     spike_threshold = -30
     dry_run = False
-    num_target_ranks = 40
+    num_target_ranks = None
 
     _validators = []
     _requisitors = []

diff --git a/neurodamus/node.py b/neurodamus/node.py
@@ -32,6 +32,7 @@
 from .utils import compat
 from .utils.logging import log_stage, log_verbose, log_all
 from .utils.memory import DryRunStats, trim_memory, pool_shrink, free_event_queues, print_mem_usage
+from .utils.memory import import_allocation_stats
 from .utils.timeit import TimerManager, timeit
 from .core.coreneuron_configuration import CoreConfig, CompartmentMapping
 from .io.sonata_config import ConnectionTypes
@@ -371,6 +372,22 @@ def compute_load_balance(self):
         lb_mode = LoadBalance.select_lb_mode(SimConfig, self._run_conf, target)
         if lb_mode == LoadBalanceMode.RoundRobin:
             return None
+        elif lb_mode == LoadBalanceMode.Memory:
+            logging.info("Load Balancing ENABLED. Mode: Memory")
+            alloc = import_allocation_stats("allocation.pkl.gz")
+            for pop, ranks in alloc.items():
+                for rank, gids in ranks.items():
+                    logging.debug(f"Population: {pop}, Rank: {rank}, Number of GIDs: {len(gids)}")
+            if MPI.rank == 0:
+                unique_ranks = set(rank for pop in alloc.values() for rank in pop.keys())
+                logging.debug("Unique ranks in allocation file: %s", len(unique_ranks))
+                if MPI.size != len(unique_ranks):
+                    raise ConfigurationError(
+                        "The number of ranks in the allocation file is different from the number "
+                        "of ranks in the current run. The allocation file was created with a "
+                        "different number of ranks."
+                    )
+            return alloc
 
         # Build load balancer as per requested options
         data_src = circuit.CircuitPath
@@ -1772,7 +1789,7 @@ def run(self):
             log_stage("============= DRY RUN (SKIP SIMULATION) =============")
             self._dry_run_stats.display_total()
             self._dry_run_stats.display_node_suggestions()
-            ranks = int(SimConfig.num_target_ranks)
+            ranks = self._dry_run_stats.get_num_target_ranks(SimConfig.num_target_ranks)
             self._dry_run_stats.collect_all_mpi()
             self._dry_run_stats.distribute_cells(ranks)
             return

diff --git a/neurodamus/utils/memory.py b/neurodamus/utils/memory.py
@@ -201,14 +201,21 @@ def export_allocation_stats(rank_allocation, filename):
 
 
 @run_only_rank0
-def import_allocation_stats(filename):
+def import_allocation_stats(filename) -> dict:
     """
     Import allocation dictionary from serialized pickle file.
     """
+    def convert_to_standard_types(obj):
+        """Converts an object containing defaultdicts of Vectors to standard Python types."""
+        result = {}
+        for node, vectors in obj.items():
+            result[node] = {key: np.array(vector) for key, vector in vectors.items()}
+        return result
+
     with open(filename, 'rb') as f:
         compressed_data = f.read()
 
-    return pickle.loads(gzip.decompress(compressed_data))
+    return convert_to_standard_types(pickle.loads(gzip.decompress(compressed_data)))
 
 
 @run_only_rank0
@@ -246,6 +253,7 @@ def __init__(self) -> None:
         self.metype_gids = {}
         self.metype_counts = Counter()
         self.synapse_counts = Counter()
+        self.suggested_nodes = 0
         _, _, self.base_memory, _ = get_task_level_mem_usage()
 
     @run_only_rank0
@@ -390,16 +398,28 @@ def display_node_suggestions(self):
         if node_total_memory is None:
             logging.warning("Unable to get the total memory available on the current node.")
             return
-        suggested_nodes = self.suggest_nodes(0.3)
+        self.suggested_nodes = self.suggest_nodes(0.3)
         logging.info(f"Based on the memory available on the current node, "
-                     f"it is suggested to use at least {suggested_nodes} node(s).")
+                     f"it is suggested to use at least {self.suggested_nodes} node(s).")
         logging.info("This is just a suggestion and the actual number of nodes "
                      "needed to run the simulation may be different.")
         logging.info(f"The calculation was based on a total memory available of "
                      f"{pretty_printing_memory_mb(node_total_memory)} on the current node.")
         logging.info("Please remember that it is suggested to use the same class of nodes "
                      "for both the dryrun and the actual simulation.")
 
+    @run_only_rank0
+    def get_num_target_ranks(self, num_ranks):
+        """
+        Return the number of ranks to target for dry-run load balancing
+        """
+        if num_ranks is None:
+            logging.info("No number of ranks specified. Using suggested number of nodes.")
+            logging.info("Detected number of physical cores: %d", psutil.cpu_count(logical=False))
+            return self.suggested_nodes * psutil.cpu_count(logical=False)
+        else:
+            return int(num_ranks)
+
     @run_only_rank0
     def distribute_cells(self, num_ranks, batch_size=10) -> (dict, dict):
         """
@@ -418,7 +438,7 @@ def distribute_cells(self, num_ranks, batch_size=10) -> (dict, dict):
             rank_memory (dict): A dictionary where keys are rank IDs
                                 and values are the total memory load on each rank.
         """
-        logging.debug("Distributing cells across %d ranks", num_ranks)
+        logging.info("Distributing cells across %d ranks", num_ranks)
 
         self.validate_inputs_distribute(num_ranks, batch_size)
 

diff --git a/...s/integration-e2e/test_dry_run_worflow.py → .../integration-e2e/test_dry_run_workflow.py b/...s/integration-e2e/test_dry_run_worflow.py → .../integration-e2e/test_dry_run_workflow.py
@@ -46,3 +46,31 @@ def test_dry_run_workflow(USECASE3):
     }
 
     assert rank_allocation_standard == expected_items
+
+    # Test that the allocation works and can be saved and loaded
+    # and generate allocation.pkl.gz for 1 rank
+    rank_allocation, _ = nd._dry_run_stats.distribute_cells(1)
+    export_allocation_stats(rank_allocation, USECASE3 / "allocation.pkl.gz")
+    rank_allocation = import_allocation_stats(USECASE3 / "allocation.pkl.gz")
+    rank_allocation_standard = convert_to_standard_types(rank_allocation)
+
+    expected_items = {
+        'NodeA': {0: [1, 2, 3]},
+        'NodeB': {0: [1, 2]}
+    }
+
+    assert rank_allocation_standard == expected_items
+
+
+def test_memory_load_balance_workflow(USECASE3):
+    """
+    Test that the memory load balance works
+    """
+
+    from neurodamus import Neurodamus
+    nd = Neurodamus(
+        str(USECASE3 / "simulation_sonata.json"),
+        lb_mode="Memory",
+    )
+
+    nd.run()