Skip to content

Commit

Permalink
MAINT: PR 784 revisions
Browse files Browse the repository at this point in the history
* more efficient `log_get_dxt_record()` by reading
directly from the C-contiguous segment buffer into
NumPy recarrays

* simplify the changes to `test_dxt_records()`
  • Loading branch information
tylerjereddy committed Sep 19, 2022
1 parent 9fa66b8 commit 3d3af72
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 78 deletions.
37 changes: 8 additions & 29 deletions darshan-util/pydarshan/darshan/backend/cffi_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -559,37 +559,16 @@ def log_get_dxt_record(log, mod_name, reads=True, writes=True, dtype='dict'):
rec['write_count'] = wcnt
rec['read_count'] = rcnt

rec['write_segments'] = []
rec['read_segments'] = []


size_of = ffi.sizeof("struct dxt_file_record")
segments = ffi.cast("struct segment_info *", buf[0] + size_of )
arr_write = np.recarray(wcnt, dtype=[("offset", int),
("length", int),
("start_time", float),
("end_time", float)])
arr_read = np.recarray(rcnt, dtype=[("offset", int),
("length", int),
("start_time", float),
("end_time", float)])

for i in range(wcnt):
arr_write[i, ...] = (segments[i].offset,
segments[i].length,
segments[i].start_time,
segments[i].end_time)

for k in range(rcnt):
i = k + wcnt
arr_read[k, ...] = (segments[i].offset,
segments[i].length,
segments[i].start_time,
segments[i].end_time)


rec['write_segments'] = arr_write
rec['read_segments'] = arr_read
segments_buf = ffi.buffer(segments, (rcnt + wcnt) * 64 * 4)
segment_arr = np.frombuffer(buffer=segments_buf,
dtype=[("offset", int),
("length", int),
("start_time", float),
("end_time", float)])
rec['write_segments'] = segment_arr[:wcnt]
rec['read_segments'] = segment_arr[wcnt: rcnt + wcnt]
if dtype == "pandas":
rec['read_segments'] = pd.DataFrame(rec['read_segments'])
rec['write_segments'] = pd.DataFrame(rec['write_segments'])
Expand Down
80 changes: 31 additions & 49 deletions darshan-util/pydarshan/darshan/tests/test_moddxt.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,70 +18,52 @@
'hostname': 'sn176.localdomain',
'write_count': 1,
'read_count': 0,
'write_segments': [{'offset': 0,
'length': 40,
'start_time': 0.10337884305045009,
'end_time': 0.10338771319948137}],
'read_segments': []}),
'write_segments': np.array([(0,
40,
0.10337884305045009,
0.10338771319948137)],
dtype=[("offset", int),
("length", int),
("start_time", float),
("end_time", float)]),
'read_segments': np.array([],
dtype=[("offset", int),
("length", int),
("start_time", float),
("end_time", float)])}),
('DXT_MPIIO', {'id': 9457796068806373448,
'rank': 0,
'hostname': 'sn176.localdomain',
'write_count': 1,
'read_count': 0,
'write_segments': [{'offset': 0,
'length': 4000,
'start_time': 0.10368914622813463,
'end_time': 0.1053433942142874}],
'read_segments': []})])
'write_segments': np.array([(0,
4000,
0.10368914622813463,
0.1053433942142874)],
dtype=[("offset", int),
("length", int),
("start_time", float),
("end_time", float)]),
'read_segments': np.array([],
dtype=[("offset", int),
("length", int),
("start_time", float),
("end_time", float)])})])
def test_dxt_records(logfile, mod, expected_dict):
# regression guard for DXT records values
# regression guard for DXT records values;
# write_segments and read_segments are now NumPy
# recarrays, to save considerable memory
# per gh-779
# TODO: refactor for simplicity--we can probably
# just initialize the expected values via
# np.array() with the appropriate structured dtypes
expected_write_segs = np.recarray(1, dtype=[("offset", int),
("length", int),
("start_time", float),
("end_time", float)])
expected_read_segs = np.recarray(1, dtype=[("offset", int),
("length", int),
("start_time", float),
("end_time", float)])
if expected_dict["write_segments"]:
expected_write_segs.offset = expected_dict["write_segments"][0]["offset"]
expected_write_segs.length = expected_dict["write_segments"][0]["length"]
expected_write_segs.start_time = expected_dict["write_segments"][0]["start_time"]
expected_write_segs.end_time = expected_dict["write_segments"][0]["end_time"]
else:
expected_write_segs = np.recarray(0, dtype=[("offset", int),
("length", int),
("start_time", float),
("end_time", float)])
if expected_dict["read_segments"]:
expected_read_segs.offset = expected_dict["read_segments"][0]["offset"]
expected_read_segs.length = expected_dict["read_segments"][0]["length"]
expected_read_segs.start_time = expected_dict["read_segments"][0]["start_time"]
expected_read_segs.end_time = expected_dict["read_segments"][0]["end_time"]
else:
expected_read_segs = np.recarray(0, dtype=[("offset", int),
("length", int),
("start_time", float),
("end_time", float)])
expected_dict["write_segments"] = expected_write_segs
expected_dict["read_segments"] = expected_read_segs

logfile = get_log_path(logfile)
log = backend.log_open(logfile)
rec = backend.log_get_record(log, mod)
for key in expected_dict.keys():
if "segments" in key:
# careful, can't use assert_allclose directly
# on recarrays
assert_allclose(rec[key].offset, expected_dict[key].offset)
assert_allclose(rec[key].length, expected_dict[key].length)
assert_allclose(rec[key].start_time, expected_dict[key].start_time)
assert_allclose(rec[key].end_time, expected_dict[key].end_time)
assert_allclose(rec[key]["offset"], expected_dict[key]["offset"])
assert_allclose(rec[key]["length"], expected_dict[key]["length"])
assert_allclose(rec[key]["start_time"], expected_dict[key]["start_time"])
assert_allclose(rec[key]["end_time"], expected_dict[key]["end_time"])
else:
assert rec[key] == expected_dict[key]

0 comments on commit 3d3af72

Please sign in to comment.