Skip to content

Commit ea97550

Browse files
authored
i#7480: Move syscall injection after all pre-syscall event markers (#7483)
Moves injection point for syscall traces to the point after all syscall related markers added by the pre-syscall callback, but before the ones added by the post-syscall callback. Specifically, moves injection to after these markers: `TRACE_MARKER_TYPE_MAYBE_BLOCKING_SYSCALL`, the syscall function tracing related ones including `TRACE_MARKER_TYPE_FUNC_ID` and `TRACE_MARKER_TYPE_FUNC_ARG`, and the scheduling related ones added by raw2trace sub-classes based on the syscall function tracing ones including `TRACE_MARKER_TYPE_SYSCALL_SCHEDULE`, `TRACE_MARKER_TYPE_SYSCALL_UNSCHEDULE`, `TRACE_MARKER_TYPE_SYSCALL_ARG_TIMEOUT`, and `TRACE_MARKER_TYPE_DIRECT_THREAD_SWITCH`. These markers are not present for all syscalls, but for only the ones that are considered non-blocking by raw2trace, and the ones that are specified in `-record_syscall`. This change is made for both static injection in raw2trace, and dynamic injection in the drmemtrace scheduler. Adds a view test that shows how the syscall trace is dynamically injected on a deterministic pure assembly test app that includes maybe-blocking syscalls and syscalls traced using `-record_syscall`. Updates the mock syscall sequence file to include dummy sequences for the syscalls in the pure asm app. Updates the `burst_syscall_inject` test which verifies static injection. Updates one of the syscalls in `burst_syscall_inject` to `SYS_membarrier` which is identified as a maybe blocking syscall by raw2trace, and add the other one to `-record_syscall`. Updates the `scheduler_unit_test` and `raw2trace_unit_test` to show injection in various different cases. Adds more trace invariant checks to the drmemtrace invariant checker to ensure the location of injected records is correct. Also adds tests in `invariant_checker_unit_test` and an invariant checker run on the pure-asm app dynamically injected trace. Disallows empty templates for any kernel sequence in the drmemtrace scheduler. Verifies some more view tool output for the existing `allasm-record-syscall` test, which lets a reader see the non-injected version. Also manually tested on a large internal trace that the new static and dynamic injection logic generates traces without any invariant error. Issue: #7480, #7157, #6495
1 parent f993308 commit ea97550

22 files changed

+1021
-158
lines changed

clients/drcachesim/scheduler/scheduler_impl.cpp

Lines changed: 133 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,13 @@ scheduler_impl_tmpl_t<memref_t, reader_t>::record_type_is_instr_boundary(
278278
return record_type_is_instr(record);
279279
}
280280

281+
template <>
282+
bool
283+
scheduler_impl_tmpl_t<memref_t, reader_t>::record_type_is_thread_exit(memref_t record)
284+
{
285+
return record.exit.type == TRACE_TYPE_THREAD_EXIT;
286+
}
287+
281288
template <>
282289
bool
283290
scheduler_impl_tmpl_t<memref_t, reader_t>::record_type_is_marker(
@@ -556,6 +563,14 @@ scheduler_impl_tmpl_t<trace_entry_t, record_reader_t>::record_type_is_instr_boun
556563
!record_reader_t::record_is_pre_instr(&prev_record);
557564
}
558565

566+
template <>
567+
bool
568+
scheduler_impl_tmpl_t<trace_entry_t, record_reader_t>::record_type_is_thread_exit(
569+
trace_entry_t record)
570+
{
571+
return record.type == TRACE_TYPE_THREAD_EXIT;
572+
}
573+
559574
template <>
560575
bool
561576
scheduler_impl_tmpl_t<trace_entry_t, record_reader_t>::record_type_set_marker_value(
@@ -1623,6 +1638,10 @@ scheduler_impl_tmpl_t<RecordType, ReaderType>::read_kernel_sequences(
16231638
error_string_ += sequence_type + " marker values mismatched";
16241639
return sched_type_t::STATUS_ERROR_INVALID_PARAMETER;
16251640
}
1641+
if (sequence[sequence_key].empty()) {
1642+
error_string_ += sequence_type + " sequence empty";
1643+
return sched_type_t::STATUS_ERROR_INVALID_PARAMETER;
1644+
}
16261645
VPRINT(this, 1, "Read %zu kernel %s records for key %d\n",
16271646
sequence[sequence_key].size(), sequence_type.c_str(), sequence_key);
16281647
sequence_key = INVALID_SEQ_KEY;
@@ -1791,6 +1810,83 @@ scheduler_impl_tmpl_t<RecordType, ReaderType>::get_initial_input_content(
17911810
return sched_type_t::STATUS_SUCCESS;
17921811
}
17931812

1813+
template <typename RecordType, typename ReaderType>
1814+
typename scheduler_tmpl_t<RecordType, ReaderType>::stream_status_t
1815+
scheduler_impl_tmpl_t<RecordType, ReaderType>::inject_pending_syscall_sequence(
1816+
output_ordinal_t output, input_info_t *input, RecordType &record)
1817+
{
1818+
assert(!input->in_syscall_injection);
1819+
assert(input->to_inject_syscall != input_info_t::INJECT_NONE);
1820+
if (!record_type_is_invalid(record)) {
1821+
// May be invalid if we're at input eof, in which case we do not need to
1822+
// save it.
1823+
input->queue.push_front(record);
1824+
}
1825+
int syscall_num = input->to_inject_syscall;
1826+
input->to_inject_syscall = input_info_t::INJECT_NONE;
1827+
assert(syscall_sequence_.find(syscall_num) != syscall_sequence_.end());
1828+
stream_status_t res = inject_kernel_sequence(syscall_sequence_[syscall_num], input);
1829+
if (res != stream_status_t::STATUS_OK)
1830+
return res;
1831+
++outputs_[output]
1832+
.stats[memtrace_stream_t::SCHED_STAT_KERNEL_SYSCALL_SEQUENCE_INJECTIONS];
1833+
VPRINT(this, 3, "Inserted %zu syscall records for syscall %d to %d.%d\n",
1834+
syscall_sequence_[syscall_num].size(), syscall_num, input->workload,
1835+
input->index);
1836+
1837+
// Return the first injected record.
1838+
assert(!input->queue.empty());
1839+
record = input->queue.front();
1840+
input->queue.pop_front();
1841+
input->cur_from_queue = true;
1842+
input->in_syscall_injection = true;
1843+
return stream_status_t::STATUS_OK;
1844+
}
1845+
1846+
template <typename RecordType, typename ReaderType>
1847+
typename scheduler_tmpl_t<RecordType, ReaderType>::stream_status_t
1848+
scheduler_impl_tmpl_t<RecordType, ReaderType>::maybe_inject_pending_syscall_sequence(
1849+
output_ordinal_t output, input_info_t *input, RecordType &record)
1850+
{
1851+
if (input->to_inject_syscall == input_info_t::INJECT_NONE)
1852+
return stream_status_t::STATUS_OK;
1853+
1854+
trace_marker_type_t marker_type;
1855+
uintptr_t marker_value_unused;
1856+
uintptr_t timestamp_unused;
1857+
bool is_marker = record_type_is_marker(record, marker_type, marker_value_unused);
1858+
bool is_injection_point = false;
1859+
if (
1860+
// For syscalls not specified in -record_syscall, which do not have
1861+
// the func_id-func_retval markers.
1862+
record_type_is_timestamp(record, timestamp_unused) ||
1863+
// For syscalls that did not have a post-event because the trace ended.
1864+
record_type_is_thread_exit(record) ||
1865+
// For syscalls interrupted by a signal and did not have a post-syscall
1866+
// event.
1867+
(is_marker && marker_type == TRACE_MARKER_TYPE_KERNEL_EVENT)) {
1868+
is_injection_point = true;
1869+
} else if (is_marker && marker_type == TRACE_MARKER_TYPE_FUNC_ID) {
1870+
if (!input->saw_first_func_id_marker_after_syscall) {
1871+
// XXX i#7482: If we allow recording zero args for syscalls in
1872+
// -record_syscall, we would need to update this logic.
1873+
input->saw_first_func_id_marker_after_syscall = true;
1874+
} else {
1875+
// For syscalls specified in -record_syscall, for which we inject
1876+
// after the func_id-func_arg markers (if any) but before the
1877+
// func_id-func_retval markers.
1878+
is_injection_point = true;
1879+
}
1880+
}
1881+
if (is_injection_point) {
1882+
stream_status_t res = inject_pending_syscall_sequence(output, input, record);
1883+
if (res != stream_status_t::STATUS_OK)
1884+
return res;
1885+
input->saw_first_func_id_marker_after_syscall = false;
1886+
}
1887+
return stream_status_t::STATUS_OK;
1888+
}
1889+
17941890
template <typename RecordType, typename ReaderType>
17951891
typename scheduler_tmpl_t<RecordType, ReaderType>::stream_status_t
17961892
scheduler_impl_tmpl_t<RecordType, ReaderType>::inject_kernel_sequence(
@@ -1804,8 +1900,7 @@ scheduler_impl_tmpl_t<RecordType, ReaderType>::inject_kernel_sequence(
18041900
// not affect input stream ordinals.
18051901
// XXX: These will appear before the top headers of a new thread which is slightly
18061902
// odd to have regular records with the new tid before the top headers.
1807-
if (sequence.empty())
1808-
return stream_status_t::STATUS_EOF;
1903+
assert(!sequence.empty());
18091904
bool saw_any_instr = false;
18101905
bool set_branch_target_marker = false;
18111906
trace_marker_type_t marker_type;
@@ -2072,6 +2167,10 @@ scheduler_impl_tmpl_t<RecordType, ReaderType>::get_input_record_ordinal(
20722167
// own counts for every input and just ignore the input stream's tracking.
20732168
ord -= inputs_[index].queue.size() + (inputs_[index].cur_from_queue ? 1 : 0);
20742169
}
2170+
if (inputs_[index].in_syscall_injection) {
2171+
// We readahead by one record when injecting syscalls.
2172+
--ord;
2173+
}
20752174
return ord;
20762175
}
20772176

@@ -2732,6 +2831,7 @@ scheduler_impl_tmpl_t<RecordType, ReaderType>::next_record(output_ordinal_t outp
27322831
input_info_t *&input,
27332832
uint64_t cur_time)
27342833
{
2834+
record = create_invalid_record();
27352835
// We do not enforce a globally increasing time to avoid the synchronization cost; we
27362836
// do return an error on a time smaller than an input's current start time when we
27372837
// check for quantum end.
@@ -2830,7 +2930,14 @@ scheduler_impl_tmpl_t<RecordType, ReaderType>::next_record(output_ordinal_t outp
28302930
} else {
28312931
input->needs_advance = true;
28322932
}
2833-
if (input->at_eof || *input->reader == *input->reader_end) {
2933+
bool input_at_eof = input->at_eof || *input->reader == *input->reader_end;
2934+
if (input_at_eof && input->to_inject_syscall != input_info_t::INJECT_NONE) {
2935+
// The input's at eof but we have a syscall trace yet to be injected.
2936+
stream_status_t res =
2937+
inject_pending_syscall_sequence(output, input, record);
2938+
if (res != stream_status_t::STATUS_OK)
2939+
return res;
2940+
} else if (input_at_eof) {
28342941
if (!input->at_eof) {
28352942
stream_status_t status = mark_input_eof(*input);
28362943
if (status != sched_type_t::STATUS_OK)
@@ -2853,6 +2960,22 @@ scheduler_impl_tmpl_t<RecordType, ReaderType>::next_record(output_ordinal_t outp
28532960
record = **input->reader;
28542961
}
28552962
}
2963+
2964+
stream_status_t res =
2965+
maybe_inject_pending_syscall_sequence(output, input, record);
2966+
if (res != stream_status_t::STATUS_OK)
2967+
return res;
2968+
2969+
// Check whether all syscall injected records have been passed along
2970+
// to the caller.
2971+
trace_marker_type_t marker_type;
2972+
uintptr_t marker_value_unused;
2973+
if (input->in_syscall_injection &&
2974+
record_type_is_marker(outputs_[output].last_record, marker_type,
2975+
marker_value_unused) &&
2976+
marker_type == TRACE_MARKER_TYPE_SYSCALL_TRACE_END) {
2977+
input->in_syscall_injection = false;
2978+
}
28562979
VPRINT(this, 5,
28572980
"next_record[%d]: candidate record from %d (@%" PRId64 "): ", output,
28582981
input->index, get_instr_ordinal(*input));
@@ -2868,8 +2991,8 @@ scheduler_impl_tmpl_t<RecordType, ReaderType>::next_record(output_ordinal_t outp
28682991
bool preempt = false;
28692992
uint64_t blocked_time = 0;
28702993
uint64_t prev_time_in_quantum = input->prev_time_in_quantum;
2871-
stream_status_t res = check_for_input_switch(
2872-
output, record, input, cur_time, need_new_input, preempt, blocked_time);
2994+
res = check_for_input_switch(output, record, input, cur_time, need_new_input,
2995+
preempt, blocked_time);
28732996
if (res != sched_type_t::STATUS_OK && res != sched_type_t::STATUS_SKIPPED)
28742997
return res;
28752998
if (need_new_input) {
@@ -2988,18 +3111,11 @@ scheduler_impl_tmpl_t<RecordType, ReaderType>::finalize_next_record(
29883111
marker_type == TRACE_MARKER_TYPE_SYSCALL &&
29893112
syscall_sequence_.find(static_cast<int>(marker_value)) !=
29903113
syscall_sequence_.end()) {
2991-
int syscall_num = static_cast<int>(marker_value);
2992-
stream_status_t res =
2993-
inject_kernel_sequence(syscall_sequence_[syscall_num], input);
2994-
if (res == stream_status_t::STATUS_OK) {
2995-
++outputs_[output].stats
2996-
[memtrace_stream_t::SCHED_STAT_KERNEL_SYSCALL_SEQUENCE_INJECTIONS];
2997-
VPRINT(this, 3, "Inserted %zu syscall records for syscall %d to %d.%d\n",
2998-
syscall_sequence_[syscall_num].size(), syscall_num, input->workload,
2999-
input->index);
3000-
} else if (res != stream_status_t::STATUS_EOF) {
3001-
return res;
3002-
}
3114+
assert(!input->in_syscall_injection);
3115+
// The actual injection of the syscall trace happens later at the intended
3116+
// point between the syscall function tracing markers.
3117+
input->to_inject_syscall = static_cast<int>(marker_value);
3118+
input->saw_first_func_id_marker_after_syscall = false;
30033119
} else if (record_type_is_instr(record, &instr_pc, &instr_size)) {
30043120
input->last_pc_fallthrough = instr_pc + instr_size;
30053121
}

clients/drcachesim/scheduler/scheduler_impl.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,8 @@ template <typename RecordType, typename ReaderType> class scheduler_impl_tmpl_t
198198
std::deque<RecordType> queue;
199199
bool cur_from_queue;
200200
addr_t last_pc_fallthrough = 0;
201+
// Whether we're in the middle of returning injected syscall records.
202+
bool in_syscall_injection = false;
201203

202204
std::set<output_ordinal_t> binding;
203205
int priority = 0;
@@ -262,6 +264,11 @@ template <typename RecordType, typename ReaderType> class scheduler_impl_tmpl_t
262264
// Causes the next unscheduled entry to abort.
263265
bool skip_next_unscheduled = false;
264266
uint64_t last_run_time = 0;
267+
int to_inject_syscall = INJECT_NONE;
268+
bool saw_first_func_id_marker_after_syscall = false;
269+
270+
// Sentinel value for to_inject_syscall.
271+
static constexpr int INJECT_NONE = -1;
265272
};
266273

267274
// XXX i#6831: Should this live entirely inside the dynamic subclass?
@@ -874,6 +881,9 @@ template <typename RecordType, typename ReaderType> class scheduler_impl_tmpl_t
874881
bool
875882
record_type_is_instr_boundary(RecordType record, RecordType prev_record);
876883

884+
bool
885+
record_type_is_thread_exit(RecordType record);
886+
877887
// Creates the marker we insert between regions of interest.
878888
RecordType
879889
create_region_separator_marker(memref_tid_t tid, uintptr_t value);
@@ -893,9 +903,23 @@ template <typename RecordType, typename ReaderType> class scheduler_impl_tmpl_t
893903
void
894904
update_next_record(output_ordinal_t output, RecordType &record);
895905

906+
// Performs the actual injection of the kernel sequence.
896907
stream_status_t
897908
inject_kernel_sequence(std::vector<RecordType> &sequence, input_info_t *input);
898909

910+
// Performs the actual injection of a kernel syscall sequence, using
911+
// inject_kernel_sequence as helper.
912+
stream_status_t
913+
inject_pending_syscall_sequence(output_ordinal_t output, input_info_t *input,
914+
RecordType &record);
915+
916+
// Checks whether we're at a suitable injection point for a yet to be injected
917+
// syscall sequence, and performs the injection using
918+
// inject_pending_syscall_sequence as helper.
919+
stream_status_t
920+
maybe_inject_pending_syscall_sequence(output_ordinal_t output, input_info_t *input,
921+
RecordType &record);
922+
899923
// Actions that must be taken only when we know for sure that the given record
900924
// is going to be the next record for some output stream.
901925
stream_status_t

clients/drcachesim/tests/allasm-repstr-basic-counts.templatex

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,16 @@ Adios world!
1111
---- <application exited with code 0> ----
1212
Basic counts tool results:
1313
Total counts:
14-
103 total \(fetched\) instructions
15-
31 total unique \(fetched\) instructions
14+
110 total \(fetched\) instructions
15+
38 total unique \(fetched\) instructions
1616
4 total non-fetched instructions
1717
0 total prefetches
1818
7 total data loads
1919
5 total data stores
2020
0 total icache flushes
2121
0 total dcache flushes
2222
1 total threads
23-
48 total timestamp \+ cpuid markers
23+
56 total timestamp \+ cpuid markers
2424
0 total idle markers
2525
0 total wait markers
2626
0 total kernel transfer markers
@@ -30,20 +30,20 @@ Total counts:
3030
0 total function return value markers
3131
0 total physical address \+ virtual address marker pairs
3232
0 total physical address unavailable markers
33-
12 total system call number markers
33+
14 total system call number markers
3434
0 total blocking system call markers
3535
4 total other markers
36-
107 total encodings
36+
114 total encodings
3737
Thread [0-9]* counts:
38-
103 \(fetched\) instructions
39-
31 unique \(fetched\) instructions
38+
110 \(fetched\) instructions
39+
38 unique \(fetched\) instructions
4040
4 non-fetched instructions
4141
0 prefetches
4242
7 data loads
4343
5 data stores
4444
0 icache flushes
4545
0 dcache flushes
46-
48 timestamp \+ cpuid markers
46+
56 timestamp \+ cpuid markers
4747
0 idle markers
4848
0 wait markers
4949
0 kernel transfer markers
@@ -53,7 +53,7 @@ Thread [0-9]* counts:
5353
0 function return value markers
5454
0 physical address \+ virtual address marker pairs
5555
0 physical address unavailable markers
56-
12 system call number markers
56+
14 system call number markers
5757
0 blocking system call markers
5858
4 other markers
59-
107 encodings
59+
114 encodings

clients/drcachesim/tests/allasm_repstr.asm

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/* **********************************************************
2-
* Copyright (c) 2021-2023 Google, Inc. All rights reserved.
2+
* Copyright (c) 2021-2025 Google, Inc. All rights reserved.
33
* **********************************************************/
44

55
/*
@@ -53,6 +53,17 @@ _start:
5353
cld
5454
rep movsb
5555

56+
// Make a getpid syscall, which is a non-blocking one.
57+
mov eax, 39 // SYS_getpid
58+
syscall
59+
60+
// Make a membarrier syscall, which is a blocking one.
61+
mov rdi, 0 // MEMBARRIER_CMD_QUERY
62+
mov rsi, 0 // flags
63+
mov rdx, 0 // cpuid
64+
mov eax, 324 // SYS_membarrier
65+
syscall
66+
5667
// Test page-spanning accesses.
5768
lea rcx, page_str
5869
// Somehow the GNU assembler 2.38 is adding the load size (4 here) to

0 commit comments

Comments
 (0)