Removed unnecessary comments, reworked export to file with gzip compr…

…ession
BlueBrain · Feb 8, 2024 · 824051d · 824051d
1 parent 5a155bd
commit 824051d
Show file tree

Hide file tree

Showing 3 changed files with 10 additions and 13 deletions.
diff --git a/docs/architecture.rst b/docs/architecture.rst
@@ -413,7 +413,7 @@ and fast. The algorithm is as follows:
 
 The user can specify the number of ranks to target using the `--num-target-ranks` flag in the CLI of neurodamus.
 The default value is 40. The allocation dictionary, containing the assignment of gids to ranks per each population,
-is then saved to the `allocation.bin` file in a pickled format. In the near future users will be able to
+is then saved to the `allocation.gz` file in a pickled gzipped format. In the near future users will be able to
 import this data in any following simulation in order to improve the memory balance.
 
 Development

diff --git a/neurodamus/utils/memory.py b/neurodamus/utils/memory.py
@@ -12,6 +12,7 @@
 import multiprocessing
 import heapq
 import pickle
+import gzip
 
 from ..core import MPI, NeurodamusCore as Nd, run_only_rank0
 from .compat import Vector
@@ -194,8 +195,9 @@ def export_allocation_stats(rank_allocation, filename):
     """
     Export allocation dictionary to serialized pickle file.
     """
+    compressed_data = gzip.compress(pickle.dumps(rank_allocation))
     with open(filename, 'wb') as f:
-        pickle.dump(rank_allocation, f)
+        f.write(compressed_data)
 
 
 @run_only_rank0
@@ -204,7 +206,9 @@ def import_allocation_stats(filename):
     Import allocation dictionary from serialized pickle file.
     """
     with open(filename, 'rb') as f:
-        return pickle.load(f)
+        compressed_data = f.read()
+
+    return pickle.loads(gzip.decompress(compressed_data))
 
 
 @run_only_rank0
@@ -234,7 +238,7 @@ def get_memory_usage(cls, count, synapse_type):
 
 class DryRunStats:
     _MEMORY_USAGE_FILENAME = "cell_memory_usage.json"
-    _ALLOCATION_FILENAME = "allocation.bin"
+    _ALLOCATION_FILENAME = "allocation.gz"
 
     def __init__(self) -> None:
         self.metype_memory = {}
@@ -416,7 +420,6 @@ def distribute_cells(self, num_ranks, batch_size=10) -> (dict, dict):
         """
         logging.debug("Distributing cells across %d ranks", num_ranks)
 
-        # Check inputs
         self.validate_inputs_distribute(num_ranks, batch_size)
 
         # Multiply the average number of synapses per cell by 2.0
@@ -434,7 +437,6 @@ def generate_cells(metype_gids):
                 for gid in gids:
                     yield gid, memory_usage
 
-        # Initialize structures
         ranks = [(0, i) for i in range(num_ranks)]  # (total_memory, rank_id)
         heapq.heapify(ranks)
         all_allocation = {}
@@ -444,13 +446,10 @@ def assign_cells_to_rank(rank_allocation, rank_memory, batch, batch_memory):
             total_memory, rank_id = heapq.heappop(ranks)
             logging.debug("Assigning batch to rank %d", rank_id)
             rank_allocation[rank_id].extend(batch)
-            # Update the total memory load of the rank
             total_memory += batch_memory
             rank_memory[rank_id] = total_memory
-            # Update total memory and re-add to the heap
             heapq.heappush(ranks, (total_memory, rank_id))
 
-        # Start distributing cells across ranks
         for pop, metype_gids in self.metype_gids.items():
             logging.info("Distributing cells of population %s", pop)
             rank_allocation = defaultdict(Vector)
@@ -466,14 +465,12 @@ def assign_cells_to_rank(rank_allocation, rank_memory, batch, batch_memory):
                     batch = []
                     batch_memory = 0
 
-            # Assign any remaining cells in the last, potentially incomplete batch
             if batch:
                 assign_cells_to_rank(rank_allocation, rank_memory, batch, batch_memory)
 
             all_allocation[pop] = rank_allocation
             all_memory[pop] = rank_memory
 
-        # Print and export allocation stats
         print_allocation_stats(all_allocation, all_memory)
         export_allocation_stats(all_allocation, self._ALLOCATION_FILENAME)
 

diff --git a/tests/integration-e2e/test_dry_run_worflow.py b/tests/integration-e2e/test_dry_run_worflow.py
@@ -36,8 +36,8 @@ def test_dry_run_workflow(USECASE3):
 
     # Test that the allocation works and can be saved and loaded
     rank_allocation, _ = nd._dry_run_stats.distribute_cells(2)
-    export_allocation_stats(rank_allocation, USECASE3 / "allocation.bin")
-    rank_allocation = import_allocation_stats(USECASE3 / "allocation.bin")
+    export_allocation_stats(rank_allocation, USECASE3 / "allocation.gz")
+    rank_allocation = import_allocation_stats(USECASE3 / "allocation.gz")
     rank_allocation_standard = convert_to_standard_types(rank_allocation)
 
     expected_items = {