diff --git a/bindings/python/capstone/__init__.py b/bindings/python/capstone/__init__.py index 3a782a4af4..b0a9d3689b 100755 --- a/bindings/python/capstone/__init__.py +++ b/bindings/python/capstone/__init__.py @@ -489,13 +489,29 @@ class _cs_detail(ctypes.Structure): ('arch', _cs_arch), ) +def smallvec(ty, cap): + class cls(ctypes.Union): + _fields_ = ( + ('arr', ty * cap), + ('ptr', ctypes.POINTER(ty)) + ) + capacity = cap + + def __call__(self, size): + if size > self.capacity: + return self.ptr + else: + return self.arr + + return cls + class _cs_insn(ctypes.Structure): _fields_ = ( ('id', ctypes.c_uint), ('alias_id', ctypes.c_uint64), ('address', ctypes.c_uint64), ('size', ctypes.c_uint16), - ('bytes', ctypes.c_ubyte * 24), + ('bytes', smallvec(ctypes.c_ubyte, 16)), ('mnemonic', ctypes.c_char * 32), ('op_str', ctypes.c_char * 160), ('is_alias', ctypes.c_bool), @@ -704,7 +720,7 @@ def size(self): # return instruction's machine bytes (which should have @size bytes). @property def bytes(self): - return bytearray(self._raw.bytes)[:self._raw.size] + return bytearray(self._raw.bytes(self._raw.size))[:self._raw.size] # return instruction's mnemonic. @property diff --git a/bindings/python/pyx/ccapstone.pxd b/bindings/python/pyx/ccapstone.pxd index 4970c45a4b..b9ed8f3c14 100644 --- a/bindings/python/pyx/ccapstone.pxd +++ b/bindings/python/pyx/ccapstone.pxd @@ -16,12 +16,16 @@ cdef extern from "": ctypedef struct cs_detail: pass + ctypedef union cs_insn_bytes: + uint8_t arr[16] + uint8_t *ptr + ctypedef struct cs_insn: unsigned int id uint64_t alias_id; uint64_t address uint16_t size - uint8_t bytes[24] + cs_insn_bytes bytes char mnemonic[32] char op_str[160] bool is_alias; diff --git a/bindings/python/pyx/ccapstone.pyx b/bindings/python/pyx/ccapstone.pyx index 3079ae8424..3b34dd99ef 100644 --- a/bindings/python/pyx/ccapstone.pyx +++ b/bindings/python/pyx/ccapstone.pyx @@ -108,7 +108,11 @@ cdef class CsInsn(object): # return instruction's machine bytes (which should have @size bytes). @property def bytes(self): - return bytearray(self._raw.bytes[:self._raw.size]) + if self._raw.size > 16: + raw = self._raw.bytes.ptr + else: + raw = self._raw.bytes.arr + return bytearray(raw[:self._raw.size]) # return instruction's mnemonic. @property diff --git a/cs.c b/cs.c index d65675f5e1..962d47c888 100644 --- a/cs.c +++ b/cs.c @@ -822,6 +822,33 @@ static void fixup_asm_string(char *asm_str) { asm_str[k] = '\0'; } +static void cs_insn_free(cs_insn *insn) { + if (insn->size > sizeof(insn->bytes.arr)) { + cs_mem_free(insn->bytes.ptr); + } + if (insn->detail) { + cs_mem_free(insn->detail); + } +} + +static bool cs_insn_set_size(cs_insn *insn, uint16_t size) { + // TODO: reuse buffer? + if (insn->size > sizeof(insn->bytes.arr)) { + // free old buffer + cs_mem_free(insn->bytes.ptr); + } + if (size > sizeof(insn->bytes.arr)) { + // allocate new buffer + insn->bytes.ptr = cs_mem_malloc(size); + if (!insn->bytes.ptr) { + insn->size = 0; + return false; + } + } + insn->size = size; + return true; +} + // fill insn with mnemonic & operands info static void fill_insn(struct cs_struct *handle, cs_insn *insn, char *buffer, MCInst *mci, PostPrinter_t postprinter, const uint8_t *code) @@ -830,13 +857,10 @@ static void fill_insn(struct cs_struct *handle, cs_insn *insn, char *buffer, MCI char *sp, *mnem; #endif fixup_asm_string(buffer); - uint16_t copy_size = MIN(sizeof(insn->bytes), insn->size); // fill the instruction bytes. - // we might skip some redundant bytes in front in the case of X86 - memcpy(insn->bytes, code + insn->size - copy_size, copy_size); + memcpy(CS_INSN_BYTES(insn), code, insn->size); insn->op_str[0] = '\0'; - insn->size = copy_size; // alias instruction might have ID saved in OpcodePub if (MCInst_getOpcodePub(mci)) @@ -1113,10 +1137,7 @@ cs_buffer * CAPSTONE_API cs_buffer_new(size_t capacity) { CAPSTONE_EXPORT void CAPSTONE_API cs_buffer_free(cs_buffer *buffer) { for (size_t i = 0; i < buffer->capacity; ++i) { - // can be allocated in cs_disasm() - if (buffer->insn[i].detail) { - cs_mem_free(buffer->insn[i].detail); - } + cs_insn_free(&buffer->insn[i]); } cs_mem_free(buffer->insn); cs_mem_free(buffer); @@ -1141,9 +1162,7 @@ bool CAPSTONE_API cs_buffer_reserve_exact(cs_buffer *buffer, size_t required) { } for (size_t i = required; i < buffer->capacity; ++i) { - if (buffer->insn[i].detail) { - cs_mem_free(buffer->insn[i].detail); - } + cs_insn_free(&buffer->insn[i]); } } @@ -1277,7 +1296,10 @@ size_t CAPSTONE_API cs_disasm(csh ud, const uint8_t *code, size_t code_size, SStream ss; SStream_Init(&ss); - insn->size = insn_size; + if (!cs_insn_set_size(insn, insn_size)) { + handle->errnum = CS_ERR_MEM; + return 0; + } // map internal instruction opcode to public insn ID handle->insn_id(handle, insn, mci.Opcode); @@ -1320,8 +1342,11 @@ size_t CAPSTONE_API cs_disasm(csh ud, const uint8_t *code, size_t code_size, // we have to skip some amount of data, depending on arch & mode insn->id = 0; // invalid ID for this "data" instruction insn->address = address; - insn->size = (uint16_t) skipdata_bytes; - memcpy(insn->bytes, code, skipdata_bytes); + if (!cs_insn_set_size(insn, (uint16_t) skipdata_bytes)) { + handle->errnum = CS_ERR_MEM; + return 0; + } + memcpy(CS_INSN_BYTES(insn), code, skipdata_bytes); #ifdef CAPSTONE_DIET insn->mnemonic[0] = '\0'; insn->op_str[0] = '\0'; diff --git a/cstool/cstool.c b/cstool/cstool.c index 51399673ec..9472e215f6 100644 --- a/cstool/cstool.c +++ b/cstool/cstool.c @@ -653,13 +653,14 @@ int main(int argc, char **argv) if (count > 0) { cs_insn *insn = buffer->insn; for (i = 0; i < count; i++) { + uint8_t *bytes = CS_INSN_BYTES(&insn[i]); int j; printf("%2"PRIx64" ", insn[i].address); for (j = 0; j < insn[i].size; j++) { if (j > 0) putchar(' '); - printf("%02x", insn[i].bytes[j]); + printf("%02x", bytes[j]); } // Align instruction when it varies in size. // ex: x86, s390x or compressed riscv diff --git a/include/capstone/capstone.h b/include/capstone/capstone.h index 7d4cc28308..d091590c0f 100644 --- a/include/capstone/capstone.h +++ b/include/capstone/capstone.h @@ -450,9 +450,12 @@ typedef struct cs_insn { /// This information is available even when CS_OPT_DETAIL = CS_OPT_OFF uint16_t size; - /// Machine bytes of this instruction, with number of bytes indicated by @size above - /// This information is available even when CS_OPT_DETAIL = CS_OPT_OFF - uint8_t bytes[24]; + /// Use CS_INSN_BYTES() to access instruction bytes. + union cs_insn_bytes { + // NOTE: Size is selected based on the length of x86 instructions. + uint8_t arr[16]; + uint8_t *ptr; + } bytes; /// Ascii text of instruction mnemonic /// This information is available even when CS_OPT_DETAIL = CS_OPT_OFF @@ -481,6 +484,10 @@ typedef struct cs_insn { cs_detail *detail; } cs_insn; +/// Machine bytes of this instruction, with number of bytes indicated by @size above +/// This information is available even when CS_OPT_DETAIL = CS_OPT_OFF +#define CS_INSN_BYTES(INSN) \ + ((INSN)->size > sizeof((INSN)->bytes.arr) ? (INSN)->bytes.ptr : (INSN)->bytes.arr) /// Calculate the offset of a disassembled instruction in its buffer, given its position /// in its array of disassembled insn diff --git a/tests/test_m680x.c b/tests/test_m680x.c index 4cfb450266..a7b15f99c9 100644 --- a/tests/test_m680x.c +++ b/tests/test_m680x.c @@ -361,7 +361,7 @@ static void test() for (j = 0; j < count; j++) { int slen; printf("0x%04x: ", (uint16_t)insn[j].address); - print_string_hex_short(insn[j].bytes, + print_string_hex_short(CS_INSN_BYTES(&insn[j]), insn[j].size); printf("%.*s", 1 + ((5 - insn[j].size) * 2), nine_spaces);