From 4ca1e2a1dafbd2294f8ec3db3e029195117afdf9 Mon Sep 17 00:00:00 2001 From: Denis Drakhnia Date: Thu, 30 May 2024 08:50:31 +0300 Subject: [PATCH] Use dynamic cs_ins.bytes array for long insns. Breaking API change. This allows to support archutectures with a very long instruction words. --- cs.c | 41 ++++++++++++++++++++++++++++++------- cstool/cstool.c | 3 ++- include/capstone/capstone.h | 16 ++++++++++++--- tests/test_m680x.c | 2 +- 4 files changed, 50 insertions(+), 12 deletions(-) diff --git a/cs.c b/cs.c index d65675f5e1..4875b70ab3 100644 --- a/cs.c +++ b/cs.c @@ -822,6 +822,24 @@ static void fixup_asm_string(char *asm_str) { asm_str[k] = '\0'; } +static bool cs_insn_set_size(cs_insn *insn, uint16_t size) { + // TODO: reuse buffer? + if (insn->size > sizeof(insn->bytes_arr)) { + // free old buffer + cs_mem_free(insn->bytes_ptr); + } + if (size > sizeof(insn->bytes_arr)) { + // allocate new buffer + insn->bytes_ptr = cs_mem_malloc(size); + if (!insn->bytes_ptr) { + insn->size = 0; + return false; + } + } + insn->size = size; + return true; +} + // fill insn with mnemonic & operands info static void fill_insn(struct cs_struct *handle, cs_insn *insn, char *buffer, MCInst *mci, PostPrinter_t postprinter, const uint8_t *code) @@ -830,13 +848,10 @@ static void fill_insn(struct cs_struct *handle, cs_insn *insn, char *buffer, MCI char *sp, *mnem; #endif fixup_asm_string(buffer); - uint16_t copy_size = MIN(sizeof(insn->bytes), insn->size); // fill the instruction bytes. - // we might skip some redundant bytes in front in the case of X86 - memcpy(insn->bytes, code + insn->size - copy_size, copy_size); + memcpy(CS_INSN_BYTES(insn), code, insn->size); insn->op_str[0] = '\0'; - insn->size = copy_size; // alias instruction might have ID saved in OpcodePub if (MCInst_getOpcodePub(mci)) @@ -1114,6 +1129,9 @@ CAPSTONE_EXPORT void CAPSTONE_API cs_buffer_free(cs_buffer *buffer) { for (size_t i = 0; i < buffer->capacity; ++i) { // can be allocated in cs_disasm() + if (buffer->insn[i].size > sizeof(buffer->insn[i].bytes_arr)) { + cs_mem_free(buffer->insn[i].bytes_ptr); + } if (buffer->insn[i].detail) { cs_mem_free(buffer->insn[i].detail); } @@ -1141,6 +1159,9 @@ bool CAPSTONE_API cs_buffer_reserve_exact(cs_buffer *buffer, size_t required) { } for (size_t i = required; i < buffer->capacity; ++i) { + if (buffer->insn[i].size > sizeof(buffer->insn[i].bytes_arr)) { + cs_mem_free(buffer->insn[i].bytes_ptr); + } if (buffer->insn[i].detail) { cs_mem_free(buffer->insn[i].detail); } @@ -1277,7 +1298,10 @@ size_t CAPSTONE_API cs_disasm(csh ud, const uint8_t *code, size_t code_size, SStream ss; SStream_Init(&ss); - insn->size = insn_size; + if (!cs_insn_set_size(insn, insn_size)) { + handle->errnum = CS_ERR_MEM; + return 0; + } // map internal instruction opcode to public insn ID handle->insn_id(handle, insn, mci.Opcode); @@ -1320,8 +1344,11 @@ size_t CAPSTONE_API cs_disasm(csh ud, const uint8_t *code, size_t code_size, // we have to skip some amount of data, depending on arch & mode insn->id = 0; // invalid ID for this "data" instruction insn->address = address; - insn->size = (uint16_t) skipdata_bytes; - memcpy(insn->bytes, code, skipdata_bytes); + if (!cs_insn_set_size(insn, (uint16_t) skipdata_bytes)) { + handle->errnum = CS_ERR_MEM; + return 0; + } + memcpy(CS_INSN_BYTES(insn), code, skipdata_bytes); #ifdef CAPSTONE_DIET insn->mnemonic[0] = '\0'; insn->op_str[0] = '\0'; diff --git a/cstool/cstool.c b/cstool/cstool.c index 51399673ec..9472e215f6 100644 --- a/cstool/cstool.c +++ b/cstool/cstool.c @@ -653,13 +653,14 @@ int main(int argc, char **argv) if (count > 0) { cs_insn *insn = buffer->insn; for (i = 0; i < count; i++) { + uint8_t *bytes = CS_INSN_BYTES(&insn[i]); int j; printf("%2"PRIx64" ", insn[i].address); for (j = 0; j < insn[i].size; j++) { if (j > 0) putchar(' '); - printf("%02x", insn[i].bytes[j]); + printf("%02x", bytes[j]); } // Align instruction when it varies in size. // ex: x86, s390x or compressed riscv diff --git a/include/capstone/capstone.h b/include/capstone/capstone.h index 7d4cc28308..168ae8d76f 100644 --- a/include/capstone/capstone.h +++ b/include/capstone/capstone.h @@ -450,9 +450,15 @@ typedef struct cs_insn { /// This information is available even when CS_OPT_DETAIL = CS_OPT_OFF uint16_t size; - /// Machine bytes of this instruction, with number of bytes indicated by @size above - /// This information is available even when CS_OPT_DETAIL = CS_OPT_OFF - uint8_t bytes[24]; + union { + // NOTE: Size of bytes_arr is selected based on the length of x86 instructions. + + /// Use CS_INSN_BYTES() to access instruction bytes. + uint8_t bytes_arr[16]; + + /// Use CS_INSN_BYTES() to access instruction bytes. + uint8_t *bytes_ptr; + }; /// Ascii text of instruction mnemonic /// This information is available even when CS_OPT_DETAIL = CS_OPT_OFF @@ -481,6 +487,10 @@ typedef struct cs_insn { cs_detail *detail; } cs_insn; +/// Machine bytes of this instruction, with number of bytes indicated by @size above +/// This information is available even when CS_OPT_DETAIL = CS_OPT_OFF +#define CS_INSN_BYTES(INSN) \ + ((INSN)->size <= sizeof((INSN)->bytes_arr) ? (INSN)->bytes_arr : (INSN)->bytes_ptr) /// Calculate the offset of a disassembled instruction in its buffer, given its position /// in its array of disassembled insn diff --git a/tests/test_m680x.c b/tests/test_m680x.c index 4cfb450266..a7b15f99c9 100644 --- a/tests/test_m680x.c +++ b/tests/test_m680x.c @@ -361,7 +361,7 @@ static void test() for (j = 0; j < count; j++) { int slen; printf("0x%04x: ", (uint16_t)insn[j].address); - print_string_hex_short(insn[j].bytes, + print_string_hex_short(CS_INSN_BYTES(&insn[j]), insn[j].size); printf("%.*s", 1 + ((5 - insn[j].size) * 2), nine_spaces);