Skip to content

Commit

Permalink
i#5505 kernel tracing: Add syscall instr encodings (#6479)
Browse files Browse the repository at this point in the history
Adds encodings for kernel system call instructions to the trace in
raw2trace. Kernel system call traces are decoded using libipt which also
provides the instruction encodings. We add support to drir_t to write
these encodings to a new buffer which is re-used for all dynamic
instances of that instr even across multiple system call traces.

Fixes taken/not-taken detection for conditional branches in the syscall
trace.

Adds support in the syscall_mix tool to report the counts of each system
call's traces also. Adds sysnum to system call trace start and end
markers to achieve this.

Ran all Intel-PT tests locally:

```
$ ctest -VV -R 'SUDO'
...
The following tests passed:
	code_api|client.drpttracer_SUDO-test
	code_api|tool.drcachesim.phys_SUDO # not really PT. Just included because of ctest -R.
	code_api|tool.drcachesim.phys-threads_SUDO # not really PT. Just included because of ctest -R.
	code_api|tool.drcacheoff.phys_SUDO # not really PT. Just included because of ctest -R.
	code_api|tool.drcacheoff.kernel.simple_SUDO
	code_api|tool.drcacheoff.kernel.opcode-mix_SUDO
	code_api|tool.drcacheoff.kernel.syscall-mix_SUDO

100% tests passed, 0 tests failed out of 7

```

Found some flakiness due to #6486 in local runs of the kernel sudo
tests, which will be addressed separately.

Issue: #5505
  • Loading branch information
abhinav92003 authored Dec 1, 2023
1 parent 30031e0 commit b9441b3
Show file tree
Hide file tree
Showing 20 changed files with 224 additions and 78 deletions.
6 changes: 4 additions & 2 deletions clients/drcachesim/common/trace_entry.h
Original file line number Diff line number Diff line change
Expand Up @@ -531,12 +531,14 @@ typedef enum {
TRACE_MARKER_TYPE_MAYBE_BLOCKING_SYSCALL,

/**
* Indicates a point in the trace where a syscall's kernel trace starts.
* Indicates a point in the trace where a syscall's kernel trace starts. The value
* of the marker is set to the syscall number.
*/
TRACE_MARKER_TYPE_SYSCALL_TRACE_START,

/**
* Indicates a point in the trace where a syscall's trace end.
* Indicates a point in the trace where a syscall's trace ends. The value of the
* marker is set to the syscall number.
*/
TRACE_MARKER_TYPE_SYSCALL_TRACE_END,

Expand Down
73 changes: 72 additions & 1 deletion clients/drcachesim/drpt2trace/drir.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@
#include "dr_api.h"
#include "utils.h"

#include <cstring>
#include <memory>
#include <unordered_map>

namespace dynamorio {
namespace drmemtrace {

Expand All @@ -62,8 +66,10 @@ class drir_t {
}
}

// Appends the given instr to the internal ilist, and records (replaces if
// one already exists) the given encoding for the orig_pc.
void
append(instr_t *instr)
append(instr_t *instr, app_pc orig_pc, int instr_length, uint8_t *encoding)
{
ASSERT(drcontext_ != nullptr, "drir_t: invalid drcontext_");
ASSERT(ilist_ != nullptr, "drir_t: invalid ilist_");
Expand All @@ -72,23 +78,88 @@ class drir_t {
return;
}
instrlist_append(ilist_, instr);
record_encoding(orig_pc, instr_length, encoding);
}

// Returns the opaque pointer to the dcontext_t used to construct this
// object.
void *
get_drcontext()
{
return drcontext_;
}

// Returns the instrlist_t of instrs accumulated so far.
instrlist_t *
get_ilist()
{
return ilist_;
}

// Clears the instrs accumulated in the ilist. Note that this does
// not clear the encodings accumulated.
void
clear_ilist()
{
instrlist_clear(drcontext_, ilist_);
}

// Returns the address of the encoding recorded for the given orig_pc.
// Encodings are persisted across clear_ilist() calls, so we will
// return the same decode_pc for the same orig_pc unless a new encoding
// is added for the same orig_pc.
app_pc
get_decode_pc(app_pc orig_pc)
{
if (decode_pc_.find(orig_pc) == decode_pc_.end()) {
return nullptr;
}
return decode_pc_[orig_pc].first;
}

private:
void *drcontext_;
instrlist_t *ilist_;
#define SYSCALL_PT_ENCODING_BUF_SIZE (1024 * 1024)
// For each original app pc key, this stores a pair value: the first
// element is the address where the encoding is stored for the instruction
// at that app pc, the second element is the length of the encoding.
std::unordered_map<app_pc, std::pair<app_pc, int>> decode_pc_;
// A vector of buffers of size SYSCALL_PT_ENCODING_BUF_SIZE. Each buffer
// stores some encoded instructions back-to-back. Note that each element
// in the buffer is a single byte, so one instr's encoding occupies possibly
// multiple consecutive elements.
// We allocate new memory to store kernel instruction encodings in
// increments of SYSCALL_PT_ENCODING_BUF_SIZE. We do not treat this like a
// cache and clear previously stored encodings because we want to ensure
// decode_pc uniqueness to callers of get_decode_pc.
std::vector<std::unique_ptr<uint8_t[]>> instr_encodings_;
// Next available offset into instr_encodings_.back().
size_t next_encoding_offset_ = 0;

void
record_encoding(app_pc orig_pc, int instr_len, uint8_t *encoding)
{
auto it = decode_pc_.find(orig_pc);
// We record the encoding only if we don't already have the same encoding for
// the given orig_pc.
if (it != decode_pc_.end() &&
// We confirm that the instruction encoding has not changed. Just in case
// the kernel is doing JIT.
it->second.second == instr_len &&
memcmp(it->second.first, encoding, it->second.second) == 0) {
return;
}
if (instr_encodings_.empty() ||
next_encoding_offset_ + instr_len >= SYSCALL_PT_ENCODING_BUF_SIZE) {
instr_encodings_.emplace_back(new uint8_t[SYSCALL_PT_ENCODING_BUF_SIZE]);
next_encoding_offset_ = 0;
}
app_pc encode_pc = &instr_encodings_.back()[next_encoding_offset_];
memcpy(encode_pc, encoding, instr_len);
decode_pc_[orig_pc] = std::make_pair(encode_pc, instr_len);
next_encoding_offset_ += instr_len;
}
};

} // namespace drmemtrace
Expand Down
7 changes: 4 additions & 3 deletions clients/drcachesim/drpt2trace/drpt2trace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,8 @@ main(int argc, const char *argv[])

uint8_t *pt_data = pt_raw_buffer.data();
size_t pt_data_size = pt_raw_buffer.size();
pt2ir_convert_status_t status = ptconverter->convert(pt_data, pt_data_size, drir);
pt2ir_convert_status_t status =
ptconverter->convert(pt_data, pt_data_size, &drir);
if (status != PT2IR_CONV_SUCCESS) {
std::cerr << CLIENT_NAME << ": failed to convert PT raw trace to DR IR."
<< "[error status: " << status << "]" << std::endl;
Expand Down Expand Up @@ -521,7 +522,7 @@ main(int argc, const char *argv[])

/* Convert the PT Data to DR IR. */
pt2ir_convert_status_t status =
ptconverter->convert(pt_data, pt_data_size, drir);
ptconverter->convert(pt_data, pt_data_size, &drir);
if (status != PT2IR_CONV_SUCCESS) {
std::cerr << CLIENT_NAME << ": failed to convert PT raw trace to DR IR."
<< "[error status: " << status << "]" << std::endl;
Expand All @@ -542,7 +543,7 @@ main(int argc, const char *argv[])
/* Convert the DR IR to trace entries. */
std::vector<trace_entry_t> entries;
ir2trace_convert_status_t ir2trace_convert_status =
ir2trace_t::convert(drir, entries);
ir2trace_t::convert(&drir, entries);
if (ir2trace_convert_status != IR2TRACE_CONV_SUCCESS) {
std::cerr << CLIENT_NAME << ": failed to convert DR IR to trace entries."
<< "[error status: " << ir2trace_convert_status << "]" << std::endl;
Expand Down
23 changes: 16 additions & 7 deletions clients/drcachesim/drpt2trace/ir2trace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,16 +55,19 @@ namespace drmemtrace {
#define ERRMSG_HEADER "[drpt2ir] "

ir2trace_convert_status_t
ir2trace_t::convert(DR_PARAM_IN drir_t &drir,
ir2trace_t::convert(DR_PARAM_IN drir_t *drir,
DR_PARAM_INOUT std::vector<trace_entry_t> &trace,
DR_PARAM_IN int verbosity)
{
if (drir.get_ilist() == NULL) {
if (drir == nullptr || drir->get_ilist() == NULL) {
return IR2TRACE_CONV_ERROR_INVALID_PARAMETER;
}
instr_t *instr = instrlist_first(drir.get_ilist());
instr_t *instr = instrlist_first(drir->get_ilist());
bool prev_was_repstr = false;
while (instr != NULL) {
trace_entry_t entry = {};
entry.size = instr_length(GLOBAL_DCONTEXT, instr);
entry.addr = reinterpret_cast<uintptr_t>(instr_get_app_pc(instr));

if (!trace.empty() && trace.back().type == TRACE_TYPE_INSTR_CONDITIONAL_JUMP) {
if (instr_get_prev(instr) == nullptr ||
Expand All @@ -87,6 +90,7 @@ ir2trace_t::convert(DR_PARAM_IN drir_t &drir,
*/
entry.type = TRACE_TYPE_INSTR;
if (instr_opcode_valid(instr)) {
bool cur_is_repstr = false;
if (instr_is_call_direct(instr)) {
entry.type = TRACE_TYPE_INSTR_DIRECT_CALL;
} else if (instr_is_call_indirect(instr)) {
Expand All @@ -103,15 +107,20 @@ ir2trace_t::convert(DR_PARAM_IN drir_t &drir,
} else if (instr_get_opcode(instr) == OP_sysenter) {
entry.type = TRACE_TYPE_INSTR_SYSENTER;
} else if (instr_is_rep_string_op(instr)) {
entry.type = TRACE_TYPE_INSTR_MAYBE_FETCH;
cur_is_repstr = true;
if (prev_was_repstr) {
entry.type = TRACE_TYPE_INSTR_MAYBE_FETCH;
} else {
prev_was_repstr = true;
}
}
if (!cur_is_repstr) {
prev_was_repstr = false;
}
} else {
VPRINT(1, "Trying to convert an invalid instruction.\n");
}

entry.size = instr_length(GLOBAL_DCONTEXT, instr);
entry.addr = (uintptr_t)instr_get_app_pc(instr);

trace.push_back(entry);

instr = instr_get_next(instr);
Expand Down
2 changes: 1 addition & 1 deletion clients/drcachesim/drpt2trace/ir2trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ class ir2trace_t {
* error code.
*/
static ir2trace_convert_status_t
convert(DR_PARAM_IN drir_t &drir, DR_PARAM_INOUT std::vector<trace_entry_t> &trace,
convert(DR_PARAM_IN drir_t *drir, DR_PARAM_INOUT std::vector<trace_entry_t> &trace,
DR_PARAM_IN int verbosity = 0);
};

Expand Down
26 changes: 8 additions & 18 deletions clients/drcachesim/drpt2trace/pt2ir.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,13 +257,13 @@ pt2ir_t::init(DR_PARAM_IN pt2ir_config_t &pt2ir_config, DR_PARAM_IN int verbosit

pt2ir_convert_status_t
pt2ir_t::convert(DR_PARAM_IN const uint8_t *pt_data, DR_PARAM_IN size_t pt_data_size,
DR_PARAM_INOUT drir_t &drir)
DR_PARAM_INOUT drir_t *drir)
{
if (!pt2ir_initialized_) {
return PT2IR_CONV_ERROR_NOT_INITIALIZED;
}

if (pt_data == nullptr || pt_data_size <= 0) {
if (pt_data == nullptr || pt_data_size <= 0 || drir == nullptr) {
return PT2IR_CONV_ERROR_INVALID_INPUT;
}

Expand Down Expand Up @@ -379,24 +379,14 @@ pt2ir_t::convert(DR_PARAM_IN const uint8_t *pt_data, DR_PARAM_IN size_t pt_data_
}

/* Use drdecode to decode insn(pt_insn) to instr_t. */
instr_t *instr = instr_create(drir.get_drcontext());
instr_init(drir.get_drcontext(), instr);
instr_t *instr = instr_create(drir->get_drcontext());
instr_init(drir->get_drcontext(), instr);
instr_set_isa_mode(instr,
insn.mode == ptem_32bit ? DR_ISA_IA32 : DR_ISA_AMD64);
bool instr_valid = false;
if (decode(drir.get_drcontext(), insn.raw, instr) != nullptr)
instr_valid = true;
instr_set_translation(instr, (app_pc)insn.ip);
instr_allocate_raw_bits(drir.get_drcontext(), instr, insn.size);
if (!instr_valid) {
/* The decode() function will not correctly identify the raw bits for
* invalid instruction. So we need to set the raw bits of instr manually.
*/
instr_free_raw_bits(drir.get_drcontext(), instr);
instr_set_raw_bits(instr, insn.raw, insn.size);
instr_allocate_raw_bits(drir.get_drcontext(), instr, insn.size);
app_pc instr_ip = reinterpret_cast<app_pc>(insn.ip);
if (decode_from_copy(drir->get_drcontext(), insn.raw, instr_ip, instr) ==
nullptr) {
#ifdef DEBUG

/* Print the invalid instruction‘s PC and raw bytes in DEBUG builds. */
if (verbosity_ >= 1) {
fprintf(stderr,
Expand All @@ -409,7 +399,7 @@ pt2ir_t::convert(DR_PARAM_IN const uint8_t *pt_data, DR_PARAM_IN size_t pt_data_
}
#endif
}
drir.append(instr);
drir->append(instr, instr_ip, insn.size, insn.raw);
}
}
return PT2IR_CONV_SUCCESS;
Expand Down
2 changes: 1 addition & 1 deletion clients/drcachesim/drpt2trace/pt2ir.h
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ class pt2ir_t {
*/
pt2ir_convert_status_t
convert(DR_PARAM_IN const uint8_t *pt_data, DR_PARAM_IN size_t pt_data_size,
DR_PARAM_INOUT drir_t &drir);
DR_PARAM_INOUT drir_t *drir);

private:
/* Diagnose converting errors and output diagnostic results.
Expand Down
14 changes: 0 additions & 14 deletions clients/drcachesim/drpt2trace/test_simple.expect

This file was deleted.

14 changes: 14 additions & 0 deletions clients/drcachesim/drpt2trace/test_simple.templatex
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
TAG 0x0000000000000000
\+0 L3 .* mov \$0x00000001 -> %eax
\+5 L3 .* mov \$0x00000001 -> %edi
\+10 L3 .* \$0x0000000000402000 -> %rsi
.*
\+20 L3 .* mov \$0x0000000e -> %edx
\+25 L3 .* syscall -> %rcx %r11
\+27 L3 .* mov \$0x0000003c -> %eax
\+32 L3 .* mov \$0x00000000 -> %edi
\+37 L3 .* syscall -> %rcx %r11
END 0x0000000000000000
.*
Number of Instructions: 8
Number of Trace Entries: 8
3 changes: 1 addition & 2 deletions clients/drcachesim/reader/reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -321,8 +321,7 @@ reader_t::process_input_entry()
version_ = cur_ref_.marker.marker_value;
else if (cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_FILETYPE) {
filetype_ = cur_ref_.marker.marker_value;
if (TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, filetype_) &&
!TESTANY(OFFLINE_FILE_TYPE_KERNEL_SYSCALLS, filetype_)) {
if (TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, filetype_)) {
expect_no_encodings_ = false;
}
} else if (cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_CACHE_LINE_SIZE)
Expand Down
6 changes: 6 additions & 0 deletions clients/drcachesim/tests/offline-kernel-opcode-mix.templatex
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Hello, world!
Opcode mix tool results:
.*: total executed instructions
.*
.*: .*clac
.*
6 changes: 6 additions & 0 deletions clients/drcachesim/tests/offline-kernel-syscall-mix.templatex
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Hello, world!
Syscall mix tool results:
syscall count : syscall_num
.*
syscall trace count : syscall_num
.*
2 changes: 1 addition & 1 deletion clients/drcachesim/tests/offline-syscall-mix.templatex
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Hello, world!
Syscall mix tool results:
count : syscall_num
syscall count : syscall_num
( *[1-9][0-9]* : *[0-9]*.*)+
2 changes: 1 addition & 1 deletion clients/drcachesim/tests/syscall-mix.templatex
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Hello, world!
---- <application exited with code 0> ----
Syscall mix tool results:
count : syscall_num
syscall count : syscall_num
( *[1-9][0-9]* : *[0-9]*.*)+
Loading

0 comments on commit b9441b3

Please sign in to comment.