From 7a02361e6cb0959cd6a039e11998a2d0e2f0f544 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sun, 31 Dec 2023 10:54:06 -0600 Subject: [PATCH 1/5] Add mlx to ares install --- ci/hermes/packages/hermes/package.py | 2 +- ci/hermes/packages/hermes_shm/package.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/hermes/packages/hermes/package.py b/ci/hermes/packages/hermes/package.py index a21ec63b6..983bf3bdb 100644 --- a/ci/hermes/packages/hermes/package.py +++ b/ci/hermes/packages/hermes/package.py @@ -47,7 +47,7 @@ class Hermes(CMakePackage): depends_on('libaio') depends_on('doxygen') # @1.9.3 depends_on('boost@1.7: +context +fiber +filesystem +system +atomic +chrono +serialization +signals +pic +regex') - depends_on('libfabric fabrics=sockets,tcp,udp,verbs', + depends_on('libfabric fabrics=sockets,tcp,udp,verbs,mlx', when='+ares') depends_on('libfabric fabrics=verbs', when='+only_verbs') diff --git a/ci/hermes/packages/hermes_shm/package.py b/ci/hermes/packages/hermes_shm/package.py index 1749fb515..73ed89a2a 100644 --- a/ci/hermes/packages/hermes_shm/package.py +++ b/ci/hermes/packages/hermes_shm/package.py @@ -26,7 +26,7 @@ class HermesShm(CMakePackage): depends_on('libaio') depends_on('doxygen') # @1.9.3 depends_on('boost@1.7: +context +fiber +filesystem +system +atomic +chrono +serialization +signals +pic +regex') - depends_on('libfabric fabrics=sockets,tcp,udp,verbs', + depends_on('libfabric fabrics=sockets,tcp,udp,verbs,mlx', when='+ares') depends_on('libfabric fabrics=verbs', when='+only_verbs') From cf68b917570796f396f09b822ac6ea001b7fc47c Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Mon, 1 Jan 2024 02:52:55 -0600 Subject: [PATCH 2/5] Add rxm to fabric --- ci/hermes/packages/hermes/package.py | 2 +- ci/hermes/packages/hermes_shm/package.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/hermes/packages/hermes/package.py b/ci/hermes/packages/hermes/package.py index 983bf3bdb..2533343e2 100644 --- a/ci/hermes/packages/hermes/package.py +++ b/ci/hermes/packages/hermes/package.py @@ -47,7 +47,7 @@ class Hermes(CMakePackage): depends_on('libaio') depends_on('doxygen') # @1.9.3 depends_on('boost@1.7: +context +fiber +filesystem +system +atomic +chrono +serialization +signals +pic +regex') - depends_on('libfabric fabrics=sockets,tcp,udp,verbs,mlx', + depends_on('libfabric fabrics=sockets,tcp,udp,verbs,mlx,rxm,rxd,shm', when='+ares') depends_on('libfabric fabrics=verbs', when='+only_verbs') diff --git a/ci/hermes/packages/hermes_shm/package.py b/ci/hermes/packages/hermes_shm/package.py index 73ed89a2a..12eda214e 100644 --- a/ci/hermes/packages/hermes_shm/package.py +++ b/ci/hermes/packages/hermes_shm/package.py @@ -26,7 +26,7 @@ class HermesShm(CMakePackage): depends_on('libaio') depends_on('doxygen') # @1.9.3 depends_on('boost@1.7: +context +fiber +filesystem +system +atomic +chrono +serialization +signals +pic +regex') - depends_on('libfabric fabrics=sockets,tcp,udp,verbs,mlx', + depends_on('libfabric fabrics=fabrics=sockets,tcp,udp,verbs,mlx,rxm', when='+ares') depends_on('libfabric fabrics=verbs', when='+only_verbs') From f614e184680799cadda6b9b510f590700536330d Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Mon, 1 Jan 2024 02:54:23 -0600 Subject: [PATCH 3/5] Add rxd,shm to fabrics --- ci/hermes/packages/hermes_shm/package.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/hermes/packages/hermes_shm/package.py b/ci/hermes/packages/hermes_shm/package.py index 12eda214e..2eb69bab2 100644 --- a/ci/hermes/packages/hermes_shm/package.py +++ b/ci/hermes/packages/hermes_shm/package.py @@ -26,7 +26,7 @@ class HermesShm(CMakePackage): depends_on('libaio') depends_on('doxygen') # @1.9.3 depends_on('boost@1.7: +context +fiber +filesystem +system +atomic +chrono +serialization +signals +pic +regex') - depends_on('libfabric fabrics=fabrics=sockets,tcp,udp,verbs,mlx,rxm', + depends_on('libfabric fabrics=fabrics=sockets,tcp,udp,verbs,mlx,rxm,rxd,shm', when='+ares') depends_on('libfabric fabrics=verbs', when='+only_verbs') From e9c0e8a5c2e238857e50a1c2dc21a0e8aa66c6f7 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Mon, 1 Jan 2024 03:01:47 -0600 Subject: [PATCH 4/5] Change variants --- ci/hermes/packages/hermes_shm/package.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/hermes/packages/hermes_shm/package.py b/ci/hermes/packages/hermes_shm/package.py index 2eb69bab2..6e352b574 100644 --- a/ci/hermes/packages/hermes_shm/package.py +++ b/ci/hermes/packages/hermes_shm/package.py @@ -26,7 +26,7 @@ class HermesShm(CMakePackage): depends_on('libaio') depends_on('doxygen') # @1.9.3 depends_on('boost@1.7: +context +fiber +filesystem +system +atomic +chrono +serialization +signals +pic +regex') - depends_on('libfabric fabrics=fabrics=sockets,tcp,udp,verbs,mlx,rxm,rxd,shm', + depends_on('libfabric fabrics=sockets,tcp,udp,verbs,mlx,rxm,rxd,shm', when='+ares') depends_on('libfabric fabrics=verbs', when='+only_verbs') From 7105ab3ca23c488d75c7bf42279b5cafd26b4101 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Mon, 8 Jan 2024 11:48:23 -0600 Subject: [PATCH 5/5] Add adios variant to hermes --- ci/hermes/packages/hermes/package.py | 1 + hrun/include/hrun/work_orchestrator/worker.h | 96 ++++++++++---------- 2 files changed, 49 insertions(+), 48 deletions(-) diff --git a/ci/hermes/packages/hermes/package.py b/ci/hermes/packages/hermes/package.py index 2533343e2..d6abae443 100644 --- a/ci/hermes/packages/hermes/package.py +++ b/ci/hermes/packages/hermes/package.py @@ -38,6 +38,7 @@ class Hermes(CMakePackage): variant('only_verbs', default=False, description='Only verbs') variant('debug', default=False, description='Build shared libraries') variant('zmq', default=False, description='Build ZeroMQ tests') + variant('adios', default=False, description='Build Adios tests') depends_on('mochi-thallium~cereal@0.10.1') depends_on('catch2@3.0.1') diff --git a/hrun/include/hrun/work_orchestrator/worker.h b/hrun/include/hrun/work_orchestrator/worker.h index f0e0ac9a0..e7c1a68c9 100644 --- a/hrun/include/hrun/work_orchestrator/worker.h +++ b/hrun/include/hrun/work_orchestrator/worker.h @@ -302,6 +302,23 @@ class Worker { } } + /** Allocate a stack for a task */ + void* AllocateStack() { + void *stack; + if (!stacks_.pop(stack).IsNull()) { + return stack; + } + return malloc(stack_size_); + } + + /** Free a stack */ + void FreeStack(void *stack) { + if(!stacks_.emplace(stack).IsNull()) { + return; + } + stacks_.Resize(stacks_.size() + num_stacks_); + } + /**=============================================================== * Run tasks * =============================================================== */ @@ -357,54 +374,6 @@ class Worker { } } - /** Print all queues */ - void PrintQueues(bool no_long_run = false) { - for (std::unique_ptr &worker : HRUN_WORK_ORCHESTRATOR->workers_) { - for (WorkEntry &work_entry : worker->work_queue_) { - Lane *&lane = work_entry.lane_; - LaneData *entry; - int off = 0; - while (!lane->peek(entry, off).IsNull()) { - Task *task = HRUN_CLIENT->GetMainPointer(entry->p_); - TaskState *exec = HRUN_TASK_REGISTRY->GetTaskState(task->task_state_); - bool is_remote = task->domain_id_.IsRemote(HRUN_RPC->GetNumHosts(), - HRUN_CLIENT->node_id_); - if (no_long_run && task->IsLongRunning()) { - off += 1; - continue; - } - HILOG(kInfo, - "(node {}, worker {}) Task {} state {}, method {}, is remote: {}, long_running: {}", - HRUN_CLIENT->node_id_, - worker->id_, - task->task_node_, - exec->name_, - task->method_, - is_remote, - task->IsLongRunning()); - off += 1; - } - } - } - } - - /** Allocate a stack for a task */ - void* AllocateStack() { - void *stack; - if (!stacks_.pop(stack).IsNull()) { - return stack; - } - return malloc(stack_size_); - } - - /** Free a stack */ - void FreeStack(void *stack) { - if(!stacks_.emplace(stack).IsNull()) { - return; - } - stacks_.Resize(stacks_.size() + num_stacks_); - } - /** Run an iteration over a particular queue */ HSHM_ALWAYS_INLINE void PollGrouped(WorkEntry &work_entry, bool flushing) { @@ -609,6 +578,37 @@ class Worker { off += 1; } } + + /** Print all queues */ + void PrintQueues(bool no_long_run = false) { + for (std::unique_ptr &worker : HRUN_WORK_ORCHESTRATOR->workers_) { + for (WorkEntry &work_entry : worker->work_queue_) { + Lane *&lane = work_entry.lane_; + LaneData *entry; + int off = 0; + while (!lane->peek(entry, off).IsNull()) { + Task *task = HRUN_CLIENT->GetMainPointer(entry->p_); + TaskState *exec = HRUN_TASK_REGISTRY->GetTaskState(task->task_state_); + bool is_remote = task->domain_id_.IsRemote(HRUN_RPC->GetNumHosts(), + HRUN_CLIENT->node_id_); + if (no_long_run && task->IsLongRunning()) { + off += 1; + continue; + } + HILOG(kInfo, + "(node {}, worker {}) Task {} state {}, method {}, is remote: {}, long_running: {}", + HRUN_CLIENT->node_id_, + worker->id_, + task->task_node_, + exec->name_, + task->method_, + is_remote, + task->IsLongRunning()); + off += 1; + } + } + } + } }; } // namespace hrun