Skip to content

Commit

Permalink
Merge pull request #639 from lukemartinlogan/dev
Browse files Browse the repository at this point in the history
Make Metadata snapshot task stop segfaulting and return correct number of values
  • Loading branch information
lukemartinlogan authored Oct 31, 2023
2 parents a3055ee + 8e5a53b commit 15d1b6c
Show file tree
Hide file tree
Showing 43 changed files with 576 additions and 302 deletions.
28 changes: 22 additions & 6 deletions ci/hermes/packages/hermes/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ class Hermes(CMakePackage):
version('master',
branch='master', submodules=True)
version('dev', branch='dev', submodules=True)
version('priv', branch='dev',
github='https://github.com/lukemartinlogan/hermes.git', submodules=True)
version("1.0.5-beta", sha256="1f3ba51a8beda4bc1314d6541b800de1525f5e233a6f498fcde6dc43562ddcb7")
version("1.0.0-beta", sha256="301084cced32aa00532ab4bebd638c31b0512c881ffab20bf5da4b7739defac2")
version("0.9.9-beta", sha256="d2e0025a9bd7a3f05d3ab608c727ed15d86ed30cf582549fe996875daf6cb649")
Expand All @@ -26,33 +28,47 @@ class Hermes(CMakePackage):
version("0.4.0-beta", sha256="06020836e203b2f680bea24007dc73760dfb977eb61e442b795b264f0267c16b")
version("0.3.0-beta...v0.4.0-beta", sha256="7729b115598277adcab019dee24e5276698fb595066bca758bfa59dc8d51c5a4")

depends_on('hermes_shm')

# Common across hermes_shm and hermes
variant('mpiio', default=True, description='Enable MPI I/O adapter')
variant('stdio', default=True, description='Enable STDIO adapter')
variant('vfd', default=False, description='Enable HDF5 VFD')
variant('ares', default=False, description='Enable full libfabric install')
variant('only_verbs', default=False, description='Only verbs')
variant('vfd', default=False, description='Enable HDF5 VFD')
variant('debug', default=False, description='Build shared libraries')
variant('zmq', default=False, description='Build ZeroMQ tests')

depends_on('hermes_shm')
depends_on('[email protected]')
depends_on('[email protected]')
depends_on('[email protected]')
depends_on('mpi')
depends_on('cereal')
depends_on('yaml-cpp')
depends_on('[email protected]')
depends_on('[email protected]: +context +fiber +filesystem +system +atomic +chrono +serialization +signals +pic')
depends_on('libfabric fabrics=sockets,tcp,udp,rxm,rxd,verbs',
depends_on('[email protected]: +context +fiber +filesystem +system +atomic +chrono +serialization +signals +pic +regex')
depends_on('libfabric fabrics=sockets,tcp,udp,verbs',
when='+ares')
depends_on('libfabric fabrics=verbs',
when='+only_verbs')
depends_on('libzmq', '+zmq')
depends_on('[email protected]', when='+vfd')

def cmake_args(self):
args = ['-DCMAKE_INSTALL_PREFIX={}'.format(self.prefix)]
args = []
if '+debug' in self.spec:
args.append('-DCMAKE_BUILD_TYPE=Debug')
else:
args.append('-DCMAKE_BUILD_TYPE=Release')
if '+mpiio' in self.spec:
args.append('-DHERMES_ENABLE_MPIIO_ADAPTER=ON')
if 'openmpi' in self.spec:
args.append('-DHERMES_OPENMPI=ON')
elif 'mpich' in self.spec:
args.append('-DHERMES_MPICH=ON')
if '+stdio' in self.spec:
args.append('-HERMES_ENABLE_STDIO_ADAPTER=ON')
if '+vfd' in self.spec:
args.append('-HERMES_ENABLE_VFD=ON')
return args

def set_include(self, env, path):
Expand Down
2 changes: 1 addition & 1 deletion codegen/refresh_methods
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def refresh_methods(TASK_ROOT):
method_name = method_enum_name.replace('k', '', 1)
task_name = method_name + "Task"
lines += [f' case Method::{method_enum_name}: {{',
f' HRUN_CLIENT->DelTask(reinterpret_cast<{task_name} *>(task));',
f' HRUN_CLIENT->DelTask<{task_name}>(reinterpret_cast<{task_name} *>(task));',
f' break;',
f' }}']
lines += [' }']
Expand Down
11 changes: 3 additions & 8 deletions config/hermes_server_default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,6 @@ devices:
is_shared_device: true
borg_capacity_thresh: [ 0.0, 1.0 ]

# Define the maximum amount of memory Hermes can use for non-buffering tasks.
# This includes metadata management and memory allocations.
# This memory will not be preallocated, so if you don't know, 0 indicates
# any amount of memory
max_memory: 0g

### Define properties of the BORG
buffer_organizer:
# The number of threads used in the background organization of internal Hermes buffers.
Expand Down Expand Up @@ -146,7 +140,7 @@ work_orchestrator:
### Queue Manager settings
queue_manager:
# The default depth of allocated queues
queue_depth: 256
queue_depth: 100000
# The maximum number of lanes per queue
max_lanes: 16
# The maximum number of queues
Expand All @@ -158,6 +152,7 @@ queue_manager:
# The size of the shared memory region to allocate for general data structures
shm_size: 0g
# The size of the shared memory to allocate for data buffers
data_shm_size: 4g

### Define properties of RPCs
rpc:
Expand All @@ -183,7 +178,7 @@ rpc:
port: 8080

# The number of handler threads for each RPC server.
num_threads: 4
num_threads: 32

### Task Registry
task_registry: [
Expand Down
21 changes: 11 additions & 10 deletions hermes_adapters/filesystem/filesystem.cc
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ void Filesystem::Open(AdapterStat &stat, File &f, const std::string &path) {
// Update file position pointer
if (stat.hflags_.Any(HERMES_FS_APPEND)) {
stat.st_ptr_ = std::numeric_limits<size_t>::max();
} else {
stat.st_ptr_ = 0;
}
// Allocate internal hermes data
auto stat_ptr = std::make_shared<AdapterStat>(stat);
Expand All @@ -92,7 +94,6 @@ size_t Filesystem::Write(File &f, AdapterStat &stat, const void *ptr,
(void) f;
hapi::Bucket &bkt = stat.bkt_id_;
std::string filename = bkt.GetName();

bool is_append = stat.st_ptr_ == std::numeric_limits<size_t>::max();

HILOG(kDebug, "Write called for filename: {}"
Expand Down Expand Up @@ -244,7 +245,7 @@ size_t Filesystem::GetSize(File &f, AdapterStat &stat) {
}
}

off_t Filesystem::Seek(File &f, AdapterStat &stat,
size_t Filesystem::Seek(File &f, AdapterStat &stat,
SeekMode whence, off64_t offset) {
auto mdm = HERMES_FS_METADATA_MANAGER;
switch (whence) {
Expand Down Expand Up @@ -281,7 +282,7 @@ off_t Filesystem::Seek(File &f, AdapterStat &stat,
return offset;
}

off_t Filesystem::Tell(File &f, AdapterStat &stat) {
size_t Filesystem::Tell(File &f, AdapterStat &stat) {
(void) f;
if (stat.st_ptr_ != std::numeric_limits<size_t>::max()) {
return stat.st_ptr_;
Expand Down Expand Up @@ -360,28 +361,28 @@ int Filesystem::Remove(const std::string &pathname) {
size_t Filesystem::Write(File &f, AdapterStat &stat, const void *ptr,
size_t total_size, IoStatus &io_status,
FsIoOptions opts) {
off_t off = stat.st_ptr_;
size_t off = stat.st_ptr_;
return Write(f, stat, ptr, off, total_size, io_status, opts);
}

size_t Filesystem::Read(File &f, AdapterStat &stat, void *ptr,
size_t total_size,
IoStatus &io_status, FsIoOptions opts) {
off_t off = stat.st_ptr_;
size_t off = stat.st_ptr_;
return Read(f, stat, ptr, off, total_size, io_status, opts);
}

Task* Filesystem::AWrite(File &f, AdapterStat &stat, const void *ptr,
size_t total_size, size_t req_id,
IoStatus &io_status, FsIoOptions opts) {
off_t off = stat.st_ptr_;
size_t off = stat.st_ptr_;
return AWrite(f, stat, ptr, off, total_size, req_id, io_status, opts);
}

Task* Filesystem::ARead(File &f, AdapterStat &stat, void *ptr,
size_t total_size, size_t req_id,
IoStatus &io_status, FsIoOptions opts) {
off_t off = stat.st_ptr_;
size_t off = stat.st_ptr_;
return ARead(f, stat, ptr, off, total_size, req_id, io_status, opts);
}

Expand Down Expand Up @@ -499,8 +500,8 @@ Task* Filesystem::ARead(File &f, bool &stat_exists, void *ptr,
return ARead(f, *stat, ptr, off, total_size, req_id, io_status, opts);
}

off_t Filesystem::Seek(File &f, bool &stat_exists,
SeekMode whence, off_t offset) {
size_t Filesystem::Seek(File &f, bool &stat_exists,
SeekMode whence, size_t offset) {
auto mdm = HERMES_FS_METADATA_MANAGER;
auto stat = mdm->Find(f);
if (!stat) {
Expand All @@ -522,7 +523,7 @@ size_t Filesystem::GetSize(File &f, bool &stat_exists) {
return GetSize(f, *stat);
}

off_t Filesystem::Tell(File &f, bool &stat_exists) {
size_t Filesystem::Tell(File &f, bool &stat_exists) {
auto mdm = HERMES_FS_METADATA_MANAGER;
auto stat = mdm->Find(f);
if (!stat) {
Expand Down
8 changes: 4 additions & 4 deletions hermes_adapters/filesystem/filesystem.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,11 @@ class Filesystem {
/** wait for request IDs in \a req_id vector */
void Wait(std::vector<uint64_t> &req_id, std::vector<size_t> &ret);
/** seek */
off_t Seek(File &f, AdapterStat &stat, SeekMode whence, off64_t offset);
size_t Seek(File &f, AdapterStat &stat, SeekMode whence, off64_t offset);
/** file size */
size_t GetSize(File &f, AdapterStat &stat);
/** tell */
off_t Tell(File &f, AdapterStat &stat);
size_t Tell(File &f, AdapterStat &stat);
/** sync */
int Sync(File &f, AdapterStat &stat);
/** truncate */
Expand Down Expand Up @@ -153,11 +153,11 @@ class Filesystem {
size_t total_size, size_t req_id, IoStatus &io_status,
FsIoOptions opts);
/** seek */
off_t Seek(File &f, bool &stat_exists, SeekMode whence, off_t offset);
size_t Seek(File &f, bool &stat_exists, SeekMode whence, size_t offset);
/** file sizes */
size_t GetSize(File &f, bool &stat_exists);
/** tell */
off_t Tell(File &f, bool &stat_exists);
size_t Tell(File &f, bool &stat_exists);
/** sync */
int Sync(File &f, bool &stat_exists);
/** truncate */
Expand Down
5 changes: 3 additions & 2 deletions hrun/config/hrun_server_default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ work_orchestrator:
### Queue Manager settings
queue_manager:
# The default depth of allocated queues
queue_depth: 256
queue_depth: 100000
# The maximum number of lanes per queue
max_lanes: 16
# The maximum number of queues
Expand All @@ -18,6 +18,7 @@ queue_manager:
# The size of the shared memory region to allocate for general data structures
shm_size: 0g
# The size of the shared memory to allocate for data buffers
data_shm_size: 4g

### Define properties of RPCs
rpc:
Expand All @@ -43,7 +44,7 @@ rpc:
port: 8080

# The number of handler threads for each RPC server.
num_threads: 4
num_threads: 32

### Task Registry
task_registry: [
Expand Down
Loading

0 comments on commit 15d1b6c

Please sign in to comment.