Skip to content

Commit

Permalink
Merge branch 'main' into atemerev/pypi-publish
Browse files Browse the repository at this point in the history
  • Loading branch information
atemerev authored Mar 11, 2024
2 parents 135ccef + 7232c5b commit 9b2742b
Show file tree
Hide file tree
Showing 7 changed files with 105 additions and 16 deletions.
15 changes: 11 additions & 4 deletions docs/architecture.rst
Original file line number Diff line number Diff line change
Expand Up @@ -396,8 +396,6 @@ Dry Run Memory Load Balancing

The dry run mode also provides a memory load balancing feature. It helps balance the memory usage
of the ranks of the simulation, so that the user does not incur easily in out-of-memory errors.
At the moment the implementation is still WIP so the user can save the allocation data to a file
but cannot import to use it in a real simulation.

The workflow of the memory load balancing is as follows: for each cell in the circuit we have an
estimate of both the memory load of the cell itself based on their METype and the amount of synapses
Expand All @@ -413,8 +411,17 @@ and fast. The algorithm is as follows:

The user can specify the number of ranks to target using the `--num-target-ranks` flag in the CLI of neurodamus.
The default value is 40. The allocation dictionary, containing the assignment of gids to ranks per each population,
is then saved to the `allocation.pkl.gz` file in a pickled gzipped format. In the near future users will be able to
import this data in any following simulation in order to improve the memory balance.
is then saved to the `allocation.pkl.gz` file in a pickled gzipped format.

Now that the `allocation.pkl.gz` has been generated, the user can load it in the main simulation and use it to load balance the
simulation. The user can do this by using the `--lb-mode=Memory` flag in the CLI of neurodamus. During the execution
Neurodamus will check if the amount of ranks used in the simulation is the same as the amount of ranks used in the
dry run. If the amount of ranks is different, the user will be prompted to run a new dry run with the new amount of
ranks. If the amount of ranks is the same, the allocation dictionary will be loaded and used to load balance the
simulation.

This way the exact gids that were assigned to each rank in the dry run will be assigned to the actual simulation,
possibly avoiding out-of-memory errors.

Development
------------
Expand Down
16 changes: 15 additions & 1 deletion neurodamus/cell_distributor.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,11 +201,15 @@ def load_nodes(self, load_balancer=None, *, _loader=None, loader_opts=None):
loader_f = (lambda *args: _loader(*args, **loader_opts)) if loader_opts else _loader

logging.info("Reading Nodes (METype) info from '%s'", conf.CellLibraryFile)
if load_balancer and load_balancer.population != self._target_spec.population:
if load_balancer and \
hasattr(load_balancer, 'population') and \
load_balancer.population != self._target_spec.population:
log_verbose("Load balance object doesn't apply to '%s'", self._target_spec.population)
load_balancer = None
if not load_balancer or SimConfig.dry_run:
gidvec, me_infos, *cell_counts = self._load_nodes(loader_f)
elif load_balancer and SimConfig.loadbal_mode == LoadBalanceMode.Memory:
gidvec, me_infos, *cell_counts = self._load_nodes_balance_mem(loader_f, load_balancer)
else:
gidvec, me_infos, *cell_counts = self._load_nodes_balance(loader_f, load_balancer)
self._local_nodes.add_gids(gidvec, me_infos)
Expand Down Expand Up @@ -244,6 +248,16 @@ def _load_nodes_balance(self, loader_f, load_balancer):
gidvec, me_infos, full_size = loader_f(self._circuit_conf, all_gids)
return gidvec, me_infos, total_cells, full_size

def _load_nodes_balance_mem(self, loader_f, load_balancer):
targetspec: TargetSpec = self._target_spec

population = targetspec.population
all_gids = load_balancer[population][MPI.rank]
logging.debug("Loading %d cells in rank %d", len(all_gids), MPI.rank)
total_cells = len(all_gids)
gidvec, me_infos, full_size = loader_f(self._circuit_conf, all_gids)
return gidvec, me_infos, total_cells, full_size

# -
def finalize(self, **opts):
"""Instantiates cells and initializes the network in the simulator.
Expand Down
5 changes: 3 additions & 2 deletions neurodamus/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,15 @@ def neurodamus(args=None):
--modelbuilding-steps=<number>
Set the number of ModelBuildingSteps for the CoreNeuron sim
--experimental-stims Shall use only Python stimuli? [default: False]
--lb-mode=[RoundRobin, WholeCell, MultiSplit]
--lb-mode=[RoundRobin, WholeCell, MultiSplit, Memory]
The Load Balance mode.
- RoundRobin: Disable load balancing. Good for quick simulations
- WholeCell: Does a first pass to compute load balancing and
redistributes cells so that CPU load is similar among ranks
- MultiSplit: Allows splitting cells into pieces for distribution.
WARNING: This mode is incompatible with CoreNeuron
- Memory: Load balance based on memory usage. By default, it uses
the "allocation.pkl.gz" file to load a pre-computed load balance
--save=<PATH> Path to create a save point to enable resume.
--save-time=<TIME> The simulation time [ms] to save the state. (Default: At the end)
--restore=<PATH> Restore and resume simulation from a save point on disk
Expand All @@ -56,7 +58,6 @@ def neurodamus(args=None):
--model-stats Show model stats in CoreNEURON simulations [default: False]
--dry-run Dry-run simulation to estimate memory usage [default: False]
--num-target-ranks=<number> Number of ranks to target for dry-run load balancing
[default: 40]
"""
options = docopt_sanitize(docopt(neurodamus.__doc__, args))
config_file = options.pop("ConfigFile")
Expand Down
8 changes: 5 additions & 3 deletions neurodamus/core/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ class CliOptions(ConfigT):
model_stats = False
simulator = None
dry_run = False
num_target_ranks = 40
num_target_ranks = None

# Restricted Functionality support, mostly for testing

Expand Down Expand Up @@ -136,6 +136,7 @@ class LoadBalanceMode(Enum):
RoundRobin = 0
WholeCell = 1
MultiSplit = 2
Memory = 3

@classmethod
def parse(cls, lb_mode):
Expand All @@ -149,7 +150,8 @@ def parse(cls, lb_mode):
"roundrobin": cls.RoundRobin,
"wholecell": cls.WholeCell,
"loadbalance": cls.MultiSplit,
"multisplit": cls.MultiSplit
"multisplit": cls.MultiSplit,
"memory": cls.Memory
}
lb_mode_enum = _modes.get(lb_mode.lower())
if lb_mode_enum is None:
Expand Down Expand Up @@ -234,7 +236,7 @@ class _SimConfig(object):
spike_location = "soma"
spike_threshold = -30
dry_run = False
num_target_ranks = 40
num_target_ranks = None

_validators = []
_requisitors = []
Expand Down
19 changes: 18 additions & 1 deletion neurodamus/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from .utils import compat
from .utils.logging import log_stage, log_verbose, log_all
from .utils.memory import DryRunStats, trim_memory, pool_shrink, free_event_queues, print_mem_usage
from .utils.memory import import_allocation_stats
from .utils.timeit import TimerManager, timeit
from .core.coreneuron_configuration import CoreConfig, CompartmentMapping
from .io.sonata_config import ConnectionTypes
Expand Down Expand Up @@ -371,6 +372,22 @@ def compute_load_balance(self):
lb_mode = LoadBalance.select_lb_mode(SimConfig, self._run_conf, target)
if lb_mode == LoadBalanceMode.RoundRobin:
return None
elif lb_mode == LoadBalanceMode.Memory:
logging.info("Load Balancing ENABLED. Mode: Memory")
alloc = import_allocation_stats("allocation.pkl.gz")
for pop, ranks in alloc.items():
for rank, gids in ranks.items():
logging.debug(f"Population: {pop}, Rank: {rank}, Number of GIDs: {len(gids)}")
if MPI.rank == 0:
unique_ranks = set(rank for pop in alloc.values() for rank in pop.keys())
logging.debug("Unique ranks in allocation file: %s", len(unique_ranks))
if MPI.size != len(unique_ranks):
raise ConfigurationError(
"The number of ranks in the allocation file is different from the number "
"of ranks in the current run. The allocation file was created with a "
"different number of ranks."
)
return alloc

# Build load balancer as per requested options
data_src = circuit.CircuitPath
Expand Down Expand Up @@ -1772,7 +1789,7 @@ def run(self):
log_stage("============= DRY RUN (SKIP SIMULATION) =============")
self._dry_run_stats.display_total()
self._dry_run_stats.display_node_suggestions()
ranks = int(SimConfig.num_target_ranks)
ranks = self._dry_run_stats.get_num_target_ranks(SimConfig.num_target_ranks)
self._dry_run_stats.collect_all_mpi()
self._dry_run_stats.distribute_cells(ranks)
return
Expand Down
30 changes: 25 additions & 5 deletions neurodamus/utils/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,14 +201,21 @@ def export_allocation_stats(rank_allocation, filename):


@run_only_rank0
def import_allocation_stats(filename):
def import_allocation_stats(filename) -> dict:
"""
Import allocation dictionary from serialized pickle file.
"""
def convert_to_standard_types(obj):
"""Converts an object containing defaultdicts of Vectors to standard Python types."""
result = {}
for node, vectors in obj.items():
result[node] = {key: np.array(vector) for key, vector in vectors.items()}
return result

with open(filename, 'rb') as f:
compressed_data = f.read()

return pickle.loads(gzip.decompress(compressed_data))
return convert_to_standard_types(pickle.loads(gzip.decompress(compressed_data)))


@run_only_rank0
Expand Down Expand Up @@ -246,6 +253,7 @@ def __init__(self) -> None:
self.metype_gids = {}
self.metype_counts = Counter()
self.synapse_counts = Counter()
self.suggested_nodes = 0
_, _, self.base_memory, _ = get_task_level_mem_usage()

@run_only_rank0
Expand Down Expand Up @@ -390,16 +398,28 @@ def display_node_suggestions(self):
if node_total_memory is None:
logging.warning("Unable to get the total memory available on the current node.")
return
suggested_nodes = self.suggest_nodes(0.3)
self.suggested_nodes = self.suggest_nodes(0.3)
logging.info(f"Based on the memory available on the current node, "
f"it is suggested to use at least {suggested_nodes} node(s).")
f"it is suggested to use at least {self.suggested_nodes} node(s).")
logging.info("This is just a suggestion and the actual number of nodes "
"needed to run the simulation may be different.")
logging.info(f"The calculation was based on a total memory available of "
f"{pretty_printing_memory_mb(node_total_memory)} on the current node.")
logging.info("Please remember that it is suggested to use the same class of nodes "
"for both the dryrun and the actual simulation.")

@run_only_rank0
def get_num_target_ranks(self, num_ranks):
"""
Return the number of ranks to target for dry-run load balancing
"""
if num_ranks is None:
logging.info("No number of ranks specified. Using suggested number of nodes.")
logging.info("Detected number of physical cores: %d", psutil.cpu_count(logical=False))
return self.suggested_nodes * psutil.cpu_count(logical=False)
else:
return int(num_ranks)

@run_only_rank0
def distribute_cells(self, num_ranks, batch_size=10) -> (dict, dict):
"""
Expand All @@ -418,7 +438,7 @@ def distribute_cells(self, num_ranks, batch_size=10) -> (dict, dict):
rank_memory (dict): A dictionary where keys are rank IDs
and values are the total memory load on each rank.
"""
logging.debug("Distributing cells across %d ranks", num_ranks)
logging.info("Distributing cells across %d ranks", num_ranks)

self.validate_inputs_distribute(num_ranks, batch_size)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,31 @@ def test_dry_run_workflow(USECASE3):
}

assert rank_allocation_standard == expected_items

# Test that the allocation works and can be saved and loaded
# and generate allocation.pkl.gz for 1 rank
rank_allocation, _ = nd._dry_run_stats.distribute_cells(1)
export_allocation_stats(rank_allocation, USECASE3 / "allocation.pkl.gz")
rank_allocation = import_allocation_stats(USECASE3 / "allocation.pkl.gz")
rank_allocation_standard = convert_to_standard_types(rank_allocation)

expected_items = {
'NodeA': {0: [1, 2, 3]},
'NodeB': {0: [1, 2]}
}

assert rank_allocation_standard == expected_items


def test_memory_load_balance_workflow(USECASE3):
"""
Test that the memory load balance works
"""

from neurodamus import Neurodamus
nd = Neurodamus(
str(USECASE3 / "simulation_sonata.json"),
lb_mode="Memory",
)

nd.run()

0 comments on commit 9b2742b

Please sign in to comment.