Skip to content

Commit

Permalink
added CLI options for prospective hosts, implemented mechanism to tak…
Browse files Browse the repository at this point in the history
…e into account synapses in load balancing
  • Loading branch information
st4rl3ss committed Jan 12, 2024
1 parent 589cd88 commit e514246
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 6 deletions.
1 change: 1 addition & 0 deletions neurodamus/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def neurodamus(args=None):
--enable-shm=[ON, OFF] Enables the use of /dev/shm for coreneuron_input [default: ON]
--model-stats Show model stats in CoreNEURON simulations [default: False]
--dry-run Dry-run simulation to estimate memory usage [default: False]
--prosp-hosts=<number> Number of prospective hosts for dry-run load balancing [default: 40]
"""
options = docopt_sanitize(docopt(neurodamus.__doc__, args))
config_file = options.pop("ConfigFile")
Expand Down
4 changes: 3 additions & 1 deletion neurodamus/connection_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -796,8 +796,10 @@ def _get_conn_stats(self, dst_target):
# Extrapolation
logging.debug("Cells samples / total: %d / %s", sampled_gids_count, me_gids_count)
me_estimated_sum = sum(metype_estimate.values())
average_syns_per_cell = me_estimated_sum / me_gids_count
self._dry_run_stats.average_syns_per_cell[metype] = average_syns_per_cell
log_all(VERBOSE_LOGLEVEL, "%s: Average syns/cell: %.1f, Estimated total: %d ",
metype, me_estimated_sum / me_gids_count, me_estimated_sum)
metype, average_syns_per_cell, me_estimated_sum)
local_counter.update(metype_estimate)

return local_counter
Expand Down
3 changes: 3 additions & 0 deletions neurodamus/core/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ class CliOptions(ConfigT):
model_stats = False
simulator = None
dry_run = False
prosp_hosts = 40

# Restricted Functionality support, mostly for testing

Expand Down Expand Up @@ -235,6 +236,7 @@ class _SimConfig(object):
spike_location = "soma"
spike_threshold = -30
dry_run = False
prosp_hosts = 40

_validators = []
_requisitors = []
Expand Down Expand Up @@ -274,6 +276,7 @@ def init(cls, config_file, cli_options):
cls.modifications = compat.Map(cls._config_parser.parsedModifications or {})
cls.cli_options = CliOptions(**(cli_options or {}))
cls.dry_run = cls.cli_options.dry_run
cls.prosp_hosts = cls.cli_options.prosp_hosts
# change simulator by request before validator and init hoc config
if cls.cli_options.simulator:
cls._parsed_run["Simulator"] = cls.cli_options.simulator
Expand Down
15 changes: 12 additions & 3 deletions neurodamus/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -1959,10 +1959,19 @@ def run(self):
from .utils.memory import distribute_cells
self._dry_run_stats.display_total()
self._dry_run_stats.display_node_suggestions()
ranks = 40
ranks = int(SimConfig.prosp_hosts)
self._dry_run_stats.collect_all_mpi()
allocation, total_memory_per_rank = distribute_cells(self._dry_run_stats, ranks)
print("Allocation: ", allocation)
print("Total memory per rank: ", total_memory_per_rank)
# TODO: split this print into a separate function and make it available
# only when logging is on DEBUG level
if MPI.rank == 0:
print("Allocation: ", allocation)
print("Total memory per rank: ", total_memory_per_rank)
import statistics
values = list(total_memory_per_rank.values())
print("Mean: ", statistics.mean(values))
print("Median: ", statistics.median(values))
print("Stdev: ", statistics.stdev(values))
return
if not SimConfig.simulate_model:
self.sim_init()
Expand Down
19 changes: 17 additions & 2 deletions neurodamus/utils/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,9 @@ def pretty_printing_memory_mb(memory_mb):
else:
return "%.2lf PB" % (memory_mb / 1024 ** 3)


@run_only_rank0
def distribute_cells(dry_run_stats, num_ranks):
def distribute_cells(dry_run_stats, num_ranks) -> (dict, dict):
"""
Distributes cells across ranks based on their memory load.
Expand All @@ -180,10 +181,21 @@ def distribute_cells(dry_run_stats, num_ranks):
"""
# Check inputs
assert set(dry_run_stats.metype_gids.keys()) == set(dry_run_stats.metype_memory.keys())
average_syns_keys = set(dry_run_stats.average_syns_per_cell.keys())
metype_memory_keys = set(dry_run_stats.metype_memory.keys())
assert average_syns_keys == metype_memory_keys
assert num_ranks > 0, "num_ranks must be a positive integer"

# Multiply the average number of synapses per cell by 2.0
# This is done since the biggest memory load for a synapse is 2.0 kB and at this point in the
# code we have lost the information on whether they are excitatory or inhibitory
# so we just take the biggest value to be safe. (the difference between the two is minimal)
average_syns_mem_per_cell = {k: v * 2.0 for k, v in dry_run_stats.average_syns_per_cell.items()}

# Prepare a list of tuples (cell_id, memory_load)
cells = [(gid, dry_run_stats.metype_memory[cell_type])
# We sum the memory load of the cell type and the average number of synapses per cell
cells = [(gid, dry_run_stats.metype_memory[cell_type] +
average_syns_mem_per_cell[cell_type])
for cell_type, gids in dry_run_stats.metype_gids.items() for gid in gids]
# Distribute cells with higher memory load first
cells.sort(key=lambda x: x[1], reverse=True)
Expand Down Expand Up @@ -234,6 +246,7 @@ class DryRunStats:

def __init__(self) -> None:
self.metype_memory = {}
self.average_syns_per_cell = {}
self.metype_counts = Counter()
self.synapse_counts = Counter()
_, _, self.base_memory, _ = get_task_level_mem_usage()
Expand Down Expand Up @@ -265,6 +278,8 @@ def collect_all_mpi(self):
# We combine memory dict via update(). That means if a previous circuit computed
# cells for the same METype (hopefully unlikely!) the last estimate prevails.
self.metype_memory = MPI.py_reduce(self.metype_memory, {}, lambda x, y: x.update(y))
self.average_syns_per_cell = MPI.py_reduce(self.average_syns_per_cell, {},
lambda x, y: x.update(y))
self.metype_counts = self.metype_counts # Cell counts is complete in every rank

@run_only_rank0
Expand Down

0 comments on commit e514246

Please sign in to comment.