Skip to content

Commit

Permalink
Use dynamic cs_ins.bytes array for long insns. Breaking API change.
Browse files Browse the repository at this point in the history
This allows to support archutectures with a very long instruction words.
  • Loading branch information
numas13 committed May 30, 2024
1 parent 40ca3de commit 39538b4
Show file tree
Hide file tree
Showing 7 changed files with 80 additions and 23 deletions.
20 changes: 18 additions & 2 deletions bindings/python/capstone/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,13 +489,29 @@ class _cs_detail(ctypes.Structure):
('arch', _cs_arch),
)

def smallvec(ty, cap):
class cls(ctypes.Union):
_fields_ = (
('arr', ty * cap),
('ptr', ctypes.POINTER(ty))
)
capacity = cap

def __call__(self, size):
if size > self.capacity:
return self.ptr
else:
return self.arr

return cls

class _cs_insn(ctypes.Structure):
_fields_ = (
('id', ctypes.c_uint),
('alias_id', ctypes.c_uint64),
('address', ctypes.c_uint64),
('size', ctypes.c_uint16),
('bytes', ctypes.c_ubyte * 24),
('bytes', smallvec(ctypes.c_ubyte, 16)),
('mnemonic', ctypes.c_char * 32),
('op_str', ctypes.c_char * 160),
('is_alias', ctypes.c_bool),
Expand Down Expand Up @@ -704,7 +720,7 @@ def size(self):
# return instruction's machine bytes (which should have @size bytes).
@property
def bytes(self):
return bytearray(self._raw.bytes)[:self._raw.size]
return bytearray(self._raw.bytes(self._raw.size))[:self._raw.size]

# return instruction's mnemonic.
@property
Expand Down
6 changes: 5 additions & 1 deletion bindings/python/pyx/ccapstone.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,16 @@ cdef extern from "<capstone/capstone.h>":
ctypedef struct cs_detail:
pass

ctypedef union cs_insn_bytes:
uint8_t arr[16]
uint8_t *ptr

ctypedef struct cs_insn:
unsigned int id
uint64_t alias_id;
uint64_t address
uint16_t size
uint8_t bytes[24]
cs_insn_bytes bytes
char mnemonic[32]
char op_str[160]
bool is_alias;
Expand Down
6 changes: 5 additions & 1 deletion bindings/python/pyx/ccapstone.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,11 @@ cdef class CsInsn(object):
# return instruction's machine bytes (which should have @size bytes).
@property
def bytes(self):
return bytearray(self._raw.bytes[:self._raw.size])
if self._raw.size > 16:
raw = self._raw.bytes.ptr
else:
raw = self._raw.bytes.arr
return bytearray(raw[:self._raw.size])

# return instruction's mnemonic.
@property
Expand Down
53 changes: 39 additions & 14 deletions cs.c
Original file line number Diff line number Diff line change
Expand Up @@ -822,6 +822,33 @@ static void fixup_asm_string(char *asm_str) {
asm_str[k] = '\0';
}

static void cs_insn_free(cs_insn *insn) {
if (insn->size > sizeof(insn->bytes.arr)) {
cs_mem_free(insn->bytes.ptr);
}
if (insn->detail) {
cs_mem_free(insn->detail);
}
}

static bool cs_insn_set_size(cs_insn *insn, uint16_t size) {
// TODO: reuse buffer?
if (insn->size > sizeof(insn->bytes.arr)) {
// free old buffer
cs_mem_free(insn->bytes.ptr);
}
if (size > sizeof(insn->bytes.arr)) {
// allocate new buffer
insn->bytes.ptr = cs_mem_malloc(size);
if (!insn->bytes.ptr) {
insn->size = 0;
return false;
}
}
insn->size = size;
return true;
}

// fill insn with mnemonic & operands info
static void fill_insn(struct cs_struct *handle, cs_insn *insn, char *buffer, MCInst *mci,
PostPrinter_t postprinter, const uint8_t *code)
Expand All @@ -830,13 +857,10 @@ static void fill_insn(struct cs_struct *handle, cs_insn *insn, char *buffer, MCI
char *sp, *mnem;
#endif
fixup_asm_string(buffer);
uint16_t copy_size = MIN(sizeof(insn->bytes), insn->size);

// fill the instruction bytes.
// we might skip some redundant bytes in front in the case of X86
memcpy(insn->bytes, code + insn->size - copy_size, copy_size);
memcpy(CS_INSN_BYTES(insn), code, insn->size);
insn->op_str[0] = '\0';
insn->size = copy_size;

// alias instruction might have ID saved in OpcodePub
if (MCInst_getOpcodePub(mci))
Expand Down Expand Up @@ -1113,10 +1137,7 @@ cs_buffer * CAPSTONE_API cs_buffer_new(size_t capacity) {
CAPSTONE_EXPORT
void CAPSTONE_API cs_buffer_free(cs_buffer *buffer) {
for (size_t i = 0; i < buffer->capacity; ++i) {
// can be allocated in cs_disasm()
if (buffer->insn[i].detail) {
cs_mem_free(buffer->insn[i].detail);
}
cs_insn_free(&buffer->insn[i]);
}
cs_mem_free(buffer->insn);
cs_mem_free(buffer);
Expand All @@ -1141,9 +1162,7 @@ bool CAPSTONE_API cs_buffer_reserve_exact(cs_buffer *buffer, size_t required) {
}

for (size_t i = required; i < buffer->capacity; ++i) {
if (buffer->insn[i].detail) {
cs_mem_free(buffer->insn[i].detail);
}
cs_insn_free(&buffer->insn[i]);
}
}

Expand Down Expand Up @@ -1277,7 +1296,10 @@ size_t CAPSTONE_API cs_disasm(csh ud, const uint8_t *code, size_t code_size,
SStream ss;
SStream_Init(&ss);

insn->size = insn_size;
if (!cs_insn_set_size(insn, insn_size)) {
handle->errnum = CS_ERR_MEM;
return 0;
}

// map internal instruction opcode to public insn ID
handle->insn_id(handle, insn, mci.Opcode);
Expand Down Expand Up @@ -1320,8 +1342,11 @@ size_t CAPSTONE_API cs_disasm(csh ud, const uint8_t *code, size_t code_size,
// we have to skip some amount of data, depending on arch & mode
insn->id = 0; // invalid ID for this "data" instruction
insn->address = address;
insn->size = (uint16_t) skipdata_bytes;
memcpy(insn->bytes, code, skipdata_bytes);
if (!cs_insn_set_size(insn, (uint16_t) skipdata_bytes)) {
handle->errnum = CS_ERR_MEM;
return 0;
}
memcpy(CS_INSN_BYTES(insn), code, skipdata_bytes);
#ifdef CAPSTONE_DIET
insn->mnemonic[0] = '\0';
insn->op_str[0] = '\0';
Expand Down
3 changes: 2 additions & 1 deletion cstool/cstool.c
Original file line number Diff line number Diff line change
Expand Up @@ -653,13 +653,14 @@ int main(int argc, char **argv)
if (count > 0) {
cs_insn *insn = buffer->insn;
for (i = 0; i < count; i++) {
uint8_t *bytes = CS_INSN_BYTES(&insn[i]);
int j;

printf("%2"PRIx64" ", insn[i].address);
for (j = 0; j < insn[i].size; j++) {
if (j > 0)
putchar(' ');
printf("%02x", insn[i].bytes[j]);
printf("%02x", bytes[j]);
}
// Align instruction when it varies in size.
// ex: x86, s390x or compressed riscv
Expand Down
13 changes: 10 additions & 3 deletions include/capstone/capstone.h
Original file line number Diff line number Diff line change
Expand Up @@ -450,9 +450,12 @@ typedef struct cs_insn {
/// This information is available even when CS_OPT_DETAIL = CS_OPT_OFF
uint16_t size;

/// Machine bytes of this instruction, with number of bytes indicated by @size above
/// This information is available even when CS_OPT_DETAIL = CS_OPT_OFF
uint8_t bytes[24];
/// Use CS_INSN_BYTES() to access instruction bytes.
union cs_insn_bytes {
// NOTE: Size is selected based on the length of x86 instructions.
uint8_t arr[16];
uint8_t *ptr;
} bytes;

/// Ascii text of instruction mnemonic
/// This information is available even when CS_OPT_DETAIL = CS_OPT_OFF
Expand Down Expand Up @@ -481,6 +484,10 @@ typedef struct cs_insn {
cs_detail *detail;
} cs_insn;

/// Machine bytes of this instruction, with number of bytes indicated by @size above
/// This information is available even when CS_OPT_DETAIL = CS_OPT_OFF
#define CS_INSN_BYTES(INSN) \
((INSN)->size > sizeof((INSN)->bytes.arr) ? (INSN)->bytes.ptr : (INSN)->bytes.arr)

/// Calculate the offset of a disassembled instruction in its buffer, given its position
/// in its array of disassembled insn
Expand Down
2 changes: 1 addition & 1 deletion tests/test_m680x.c
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ static void test()
for (j = 0; j < count; j++) {
int slen;
printf("0x%04x: ", (uint16_t)insn[j].address);
print_string_hex_short(insn[j].bytes,
print_string_hex_short(CS_INSN_BYTES(&insn[j]),
insn[j].size);
printf("%.*s", 1 + ((5 - insn[j].size) * 2),
nine_spaces);
Expand Down

0 comments on commit 39538b4

Please sign in to comment.