Skip to content

Commit 3d3af72

Browse files
committed
MAINT: PR 784 revisions
* more efficient `log_get_dxt_record()` by reading directly from the C-contiguous segment buffer into NumPy recarrays * simplify the changes to `test_dxt_records()`
1 parent 9fa66b8 commit 3d3af72

File tree

2 files changed

+39
-78
lines changed

2 files changed

+39
-78
lines changed

darshan-util/pydarshan/darshan/backend/cffi_backend.py

Lines changed: 8 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -559,37 +559,16 @@ def log_get_dxt_record(log, mod_name, reads=True, writes=True, dtype='dict'):
559559
rec['write_count'] = wcnt
560560
rec['read_count'] = rcnt
561561

562-
rec['write_segments'] = []
563-
rec['read_segments'] = []
564-
565-
566562
size_of = ffi.sizeof("struct dxt_file_record")
567563
segments = ffi.cast("struct segment_info *", buf[0] + size_of )
568-
arr_write = np.recarray(wcnt, dtype=[("offset", int),
569-
("length", int),
570-
("start_time", float),
571-
("end_time", float)])
572-
arr_read = np.recarray(rcnt, dtype=[("offset", int),
573-
("length", int),
574-
("start_time", float),
575-
("end_time", float)])
576-
577-
for i in range(wcnt):
578-
arr_write[i, ...] = (segments[i].offset,
579-
segments[i].length,
580-
segments[i].start_time,
581-
segments[i].end_time)
582-
583-
for k in range(rcnt):
584-
i = k + wcnt
585-
arr_read[k, ...] = (segments[i].offset,
586-
segments[i].length,
587-
segments[i].start_time,
588-
segments[i].end_time)
589-
590-
591-
rec['write_segments'] = arr_write
592-
rec['read_segments'] = arr_read
564+
segments_buf = ffi.buffer(segments, (rcnt + wcnt) * 64 * 4)
565+
segment_arr = np.frombuffer(buffer=segments_buf,
566+
dtype=[("offset", int),
567+
("length", int),
568+
("start_time", float),
569+
("end_time", float)])
570+
rec['write_segments'] = segment_arr[:wcnt]
571+
rec['read_segments'] = segment_arr[wcnt: rcnt + wcnt]
593572
if dtype == "pandas":
594573
rec['read_segments'] = pd.DataFrame(rec['read_segments'])
595574
rec['write_segments'] = pd.DataFrame(rec['write_segments'])

darshan-util/pydarshan/darshan/tests/test_moddxt.py

Lines changed: 31 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -18,70 +18,52 @@
1818
'hostname': 'sn176.localdomain',
1919
'write_count': 1,
2020
'read_count': 0,
21-
'write_segments': [{'offset': 0,
22-
'length': 40,
23-
'start_time': 0.10337884305045009,
24-
'end_time': 0.10338771319948137}],
25-
'read_segments': []}),
21+
'write_segments': np.array([(0,
22+
40,
23+
0.10337884305045009,
24+
0.10338771319948137)],
25+
dtype=[("offset", int),
26+
("length", int),
27+
("start_time", float),
28+
("end_time", float)]),
29+
'read_segments': np.array([],
30+
dtype=[("offset", int),
31+
("length", int),
32+
("start_time", float),
33+
("end_time", float)])}),
2634
('DXT_MPIIO', {'id': 9457796068806373448,
2735
'rank': 0,
2836
'hostname': 'sn176.localdomain',
2937
'write_count': 1,
3038
'read_count': 0,
31-
'write_segments': [{'offset': 0,
32-
'length': 4000,
33-
'start_time': 0.10368914622813463,
34-
'end_time': 0.1053433942142874}],
35-
'read_segments': []})])
39+
'write_segments': np.array([(0,
40+
4000,
41+
0.10368914622813463,
42+
0.1053433942142874)],
43+
dtype=[("offset", int),
44+
("length", int),
45+
("start_time", float),
46+
("end_time", float)]),
47+
'read_segments': np.array([],
48+
dtype=[("offset", int),
49+
("length", int),
50+
("start_time", float),
51+
("end_time", float)])})])
3652
def test_dxt_records(logfile, mod, expected_dict):
37-
# regression guard for DXT records values
53+
# regression guard for DXT records values;
3854
# write_segments and read_segments are now NumPy
3955
# recarrays, to save considerable memory
4056
# per gh-779
41-
# TODO: refactor for simplicity--we can probably
42-
# just initialize the expected values via
43-
# np.array() with the appropriate structured dtypes
44-
expected_write_segs = np.recarray(1, dtype=[("offset", int),
45-
("length", int),
46-
("start_time", float),
47-
("end_time", float)])
48-
expected_read_segs = np.recarray(1, dtype=[("offset", int),
49-
("length", int),
50-
("start_time", float),
51-
("end_time", float)])
52-
if expected_dict["write_segments"]:
53-
expected_write_segs.offset = expected_dict["write_segments"][0]["offset"]
54-
expected_write_segs.length = expected_dict["write_segments"][0]["length"]
55-
expected_write_segs.start_time = expected_dict["write_segments"][0]["start_time"]
56-
expected_write_segs.end_time = expected_dict["write_segments"][0]["end_time"]
57-
else:
58-
expected_write_segs = np.recarray(0, dtype=[("offset", int),
59-
("length", int),
60-
("start_time", float),
61-
("end_time", float)])
62-
if expected_dict["read_segments"]:
63-
expected_read_segs.offset = expected_dict["read_segments"][0]["offset"]
64-
expected_read_segs.length = expected_dict["read_segments"][0]["length"]
65-
expected_read_segs.start_time = expected_dict["read_segments"][0]["start_time"]
66-
expected_read_segs.end_time = expected_dict["read_segments"][0]["end_time"]
67-
else:
68-
expected_read_segs = np.recarray(0, dtype=[("offset", int),
69-
("length", int),
70-
("start_time", float),
71-
("end_time", float)])
72-
expected_dict["write_segments"] = expected_write_segs
73-
expected_dict["read_segments"] = expected_read_segs
74-
7557
logfile = get_log_path(logfile)
7658
log = backend.log_open(logfile)
7759
rec = backend.log_get_record(log, mod)
7860
for key in expected_dict.keys():
7961
if "segments" in key:
8062
# careful, can't use assert_allclose directly
8163
# on recarrays
82-
assert_allclose(rec[key].offset, expected_dict[key].offset)
83-
assert_allclose(rec[key].length, expected_dict[key].length)
84-
assert_allclose(rec[key].start_time, expected_dict[key].start_time)
85-
assert_allclose(rec[key].end_time, expected_dict[key].end_time)
64+
assert_allclose(rec[key]["offset"], expected_dict[key]["offset"])
65+
assert_allclose(rec[key]["length"], expected_dict[key]["length"])
66+
assert_allclose(rec[key]["start_time"], expected_dict[key]["start_time"])
67+
assert_allclose(rec[key]["end_time"], expected_dict[key]["end_time"])
8668
else:
8769
assert rec[key] == expected_dict[key]

0 commit comments

Comments
 (0)