diff --git a/darshan-util/pydarshan/darshan/backend/cffi_backend.py b/darshan-util/pydarshan/darshan/backend/cffi_backend.py index d4ffbcdbd..310000b39 100644 --- a/darshan-util/pydarshan/darshan/backend/cffi_backend.py +++ b/darshan-util/pydarshan/darshan/backend/cffi_backend.py @@ -559,37 +559,16 @@ def log_get_dxt_record(log, mod_name, reads=True, writes=True, dtype='dict'): rec['write_count'] = wcnt rec['read_count'] = rcnt - rec['write_segments'] = [] - rec['read_segments'] = [] - - size_of = ffi.sizeof("struct dxt_file_record") segments = ffi.cast("struct segment_info *", buf[0] + size_of ) - arr_write = np.recarray(wcnt, dtype=[("offset", int), - ("length", int), - ("start_time", float), - ("end_time", float)]) - arr_read = np.recarray(rcnt, dtype=[("offset", int), - ("length", int), - ("start_time", float), - ("end_time", float)]) - - for i in range(wcnt): - arr_write[i, ...] = (segments[i].offset, - segments[i].length, - segments[i].start_time, - segments[i].end_time) - - for k in range(rcnt): - i = k + wcnt - arr_read[k, ...] = (segments[i].offset, - segments[i].length, - segments[i].start_time, - segments[i].end_time) - - - rec['write_segments'] = arr_write - rec['read_segments'] = arr_read + segments_buf = ffi.buffer(segments, (rcnt + wcnt) * 64 * 4) + segment_arr = np.frombuffer(buffer=segments_buf, + dtype=[("offset", int), + ("length", int), + ("start_time", float), + ("end_time", float)]) + rec['write_segments'] = segment_arr[:wcnt] + rec['read_segments'] = segment_arr[wcnt: rcnt + wcnt] if dtype == "pandas": rec['read_segments'] = pd.DataFrame(rec['read_segments']) rec['write_segments'] = pd.DataFrame(rec['write_segments']) diff --git a/darshan-util/pydarshan/darshan/tests/test_moddxt.py b/darshan-util/pydarshan/darshan/tests/test_moddxt.py index 53532a7be..1cdd7bd80 100644 --- a/darshan-util/pydarshan/darshan/tests/test_moddxt.py +++ b/darshan-util/pydarshan/darshan/tests/test_moddxt.py @@ -18,60 +18,42 @@ 'hostname': 'sn176.localdomain', 'write_count': 1, 'read_count': 0, - 'write_segments': [{'offset': 0, - 'length': 40, - 'start_time': 0.10337884305045009, - 'end_time': 0.10338771319948137}], - 'read_segments': []}), + 'write_segments': np.array([(0, + 40, + 0.10337884305045009, + 0.10338771319948137)], + dtype=[("offset", int), + ("length", int), + ("start_time", float), + ("end_time", float)]), + 'read_segments': np.array([], + dtype=[("offset", int), + ("length", int), + ("start_time", float), + ("end_time", float)])}), ('DXT_MPIIO', {'id': 9457796068806373448, 'rank': 0, 'hostname': 'sn176.localdomain', 'write_count': 1, 'read_count': 0, - 'write_segments': [{'offset': 0, - 'length': 4000, - 'start_time': 0.10368914622813463, - 'end_time': 0.1053433942142874}], - 'read_segments': []})]) + 'write_segments': np.array([(0, + 4000, + 0.10368914622813463, + 0.1053433942142874)], + dtype=[("offset", int), + ("length", int), + ("start_time", float), + ("end_time", float)]), + 'read_segments': np.array([], + dtype=[("offset", int), + ("length", int), + ("start_time", float), + ("end_time", float)])})]) def test_dxt_records(logfile, mod, expected_dict): - # regression guard for DXT records values + # regression guard for DXT records values; # write_segments and read_segments are now NumPy # recarrays, to save considerable memory # per gh-779 - # TODO: refactor for simplicity--we can probably - # just initialize the expected values via - # np.array() with the appropriate structured dtypes - expected_write_segs = np.recarray(1, dtype=[("offset", int), - ("length", int), - ("start_time", float), - ("end_time", float)]) - expected_read_segs = np.recarray(1, dtype=[("offset", int), - ("length", int), - ("start_time", float), - ("end_time", float)]) - if expected_dict["write_segments"]: - expected_write_segs.offset = expected_dict["write_segments"][0]["offset"] - expected_write_segs.length = expected_dict["write_segments"][0]["length"] - expected_write_segs.start_time = expected_dict["write_segments"][0]["start_time"] - expected_write_segs.end_time = expected_dict["write_segments"][0]["end_time"] - else: - expected_write_segs = np.recarray(0, dtype=[("offset", int), - ("length", int), - ("start_time", float), - ("end_time", float)]) - if expected_dict["read_segments"]: - expected_read_segs.offset = expected_dict["read_segments"][0]["offset"] - expected_read_segs.length = expected_dict["read_segments"][0]["length"] - expected_read_segs.start_time = expected_dict["read_segments"][0]["start_time"] - expected_read_segs.end_time = expected_dict["read_segments"][0]["end_time"] - else: - expected_read_segs = np.recarray(0, dtype=[("offset", int), - ("length", int), - ("start_time", float), - ("end_time", float)]) - expected_dict["write_segments"] = expected_write_segs - expected_dict["read_segments"] = expected_read_segs - logfile = get_log_path(logfile) log = backend.log_open(logfile) rec = backend.log_get_record(log, mod) @@ -79,9 +61,9 @@ def test_dxt_records(logfile, mod, expected_dict): if "segments" in key: # careful, can't use assert_allclose directly # on recarrays - assert_allclose(rec[key].offset, expected_dict[key].offset) - assert_allclose(rec[key].length, expected_dict[key].length) - assert_allclose(rec[key].start_time, expected_dict[key].start_time) - assert_allclose(rec[key].end_time, expected_dict[key].end_time) + assert_allclose(rec[key]["offset"], expected_dict[key]["offset"]) + assert_allclose(rec[key]["length"], expected_dict[key]["length"]) + assert_allclose(rec[key]["start_time"], expected_dict[key]["start_time"]) + assert_allclose(rec[key]["end_time"], expected_dict[key]["end_time"]) else: assert rec[key] == expected_dict[key]