Skip to content

Commit

Permalink
Use dynamic cs_ins.bytes array for long insns. Breaking API change.
Browse files Browse the repository at this point in the history
This allows to support archutectures with a very long instruction words.
  • Loading branch information
numas13 committed May 30, 2024
1 parent 40ca3de commit 4ca1e2a
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 12 deletions.
41 changes: 34 additions & 7 deletions cs.c
Original file line number Diff line number Diff line change
Expand Up @@ -822,6 +822,24 @@ static void fixup_asm_string(char *asm_str) {
asm_str[k] = '\0';
}

static bool cs_insn_set_size(cs_insn *insn, uint16_t size) {
// TODO: reuse buffer?
if (insn->size > sizeof(insn->bytes_arr)) {
// free old buffer
cs_mem_free(insn->bytes_ptr);
}
if (size > sizeof(insn->bytes_arr)) {
// allocate new buffer
insn->bytes_ptr = cs_mem_malloc(size);
if (!insn->bytes_ptr) {
insn->size = 0;
return false;
}
}
insn->size = size;
return true;
}

// fill insn with mnemonic & operands info
static void fill_insn(struct cs_struct *handle, cs_insn *insn, char *buffer, MCInst *mci,
PostPrinter_t postprinter, const uint8_t *code)
Expand All @@ -830,13 +848,10 @@ static void fill_insn(struct cs_struct *handle, cs_insn *insn, char *buffer, MCI
char *sp, *mnem;
#endif
fixup_asm_string(buffer);
uint16_t copy_size = MIN(sizeof(insn->bytes), insn->size);

// fill the instruction bytes.
// we might skip some redundant bytes in front in the case of X86
memcpy(insn->bytes, code + insn->size - copy_size, copy_size);
memcpy(CS_INSN_BYTES(insn), code, insn->size);
insn->op_str[0] = '\0';
insn->size = copy_size;

// alias instruction might have ID saved in OpcodePub
if (MCInst_getOpcodePub(mci))
Expand Down Expand Up @@ -1114,6 +1129,9 @@ CAPSTONE_EXPORT
void CAPSTONE_API cs_buffer_free(cs_buffer *buffer) {
for (size_t i = 0; i < buffer->capacity; ++i) {
// can be allocated in cs_disasm()
if (buffer->insn[i].size > sizeof(buffer->insn[i].bytes_arr)) {
cs_mem_free(buffer->insn[i].bytes_ptr);
}
if (buffer->insn[i].detail) {
cs_mem_free(buffer->insn[i].detail);
}
Expand Down Expand Up @@ -1141,6 +1159,9 @@ bool CAPSTONE_API cs_buffer_reserve_exact(cs_buffer *buffer, size_t required) {
}

for (size_t i = required; i < buffer->capacity; ++i) {
if (buffer->insn[i].size > sizeof(buffer->insn[i].bytes_arr)) {
cs_mem_free(buffer->insn[i].bytes_ptr);
}
if (buffer->insn[i].detail) {
cs_mem_free(buffer->insn[i].detail);
}
Expand Down Expand Up @@ -1277,7 +1298,10 @@ size_t CAPSTONE_API cs_disasm(csh ud, const uint8_t *code, size_t code_size,
SStream ss;
SStream_Init(&ss);

insn->size = insn_size;
if (!cs_insn_set_size(insn, insn_size)) {
handle->errnum = CS_ERR_MEM;
return 0;
}

// map internal instruction opcode to public insn ID
handle->insn_id(handle, insn, mci.Opcode);
Expand Down Expand Up @@ -1320,8 +1344,11 @@ size_t CAPSTONE_API cs_disasm(csh ud, const uint8_t *code, size_t code_size,
// we have to skip some amount of data, depending on arch & mode
insn->id = 0; // invalid ID for this "data" instruction
insn->address = address;
insn->size = (uint16_t) skipdata_bytes;
memcpy(insn->bytes, code, skipdata_bytes);
if (!cs_insn_set_size(insn, (uint16_t) skipdata_bytes)) {
handle->errnum = CS_ERR_MEM;
return 0;
}
memcpy(CS_INSN_BYTES(insn), code, skipdata_bytes);
#ifdef CAPSTONE_DIET
insn->mnemonic[0] = '\0';
insn->op_str[0] = '\0';
Expand Down
3 changes: 2 additions & 1 deletion cstool/cstool.c
Original file line number Diff line number Diff line change
Expand Up @@ -653,13 +653,14 @@ int main(int argc, char **argv)
if (count > 0) {
cs_insn *insn = buffer->insn;
for (i = 0; i < count; i++) {
uint8_t *bytes = CS_INSN_BYTES(&insn[i]);
int j;

printf("%2"PRIx64" ", insn[i].address);
for (j = 0; j < insn[i].size; j++) {
if (j > 0)
putchar(' ');
printf("%02x", insn[i].bytes[j]);
printf("%02x", bytes[j]);
}
// Align instruction when it varies in size.
// ex: x86, s390x or compressed riscv
Expand Down
16 changes: 13 additions & 3 deletions include/capstone/capstone.h
Original file line number Diff line number Diff line change
Expand Up @@ -450,9 +450,15 @@ typedef struct cs_insn {
/// This information is available even when CS_OPT_DETAIL = CS_OPT_OFF
uint16_t size;

/// Machine bytes of this instruction, with number of bytes indicated by @size above
/// This information is available even when CS_OPT_DETAIL = CS_OPT_OFF
uint8_t bytes[24];
union {
// NOTE: Size of bytes_arr is selected based on the length of x86 instructions.

/// Use CS_INSN_BYTES() to access instruction bytes.
uint8_t bytes_arr[16];

/// Use CS_INSN_BYTES() to access instruction bytes.
uint8_t *bytes_ptr;
};

/// Ascii text of instruction mnemonic
/// This information is available even when CS_OPT_DETAIL = CS_OPT_OFF
Expand Down Expand Up @@ -481,6 +487,10 @@ typedef struct cs_insn {
cs_detail *detail;
} cs_insn;

/// Machine bytes of this instruction, with number of bytes indicated by @size above
/// This information is available even when CS_OPT_DETAIL = CS_OPT_OFF
#define CS_INSN_BYTES(INSN) \
((INSN)->size <= sizeof((INSN)->bytes_arr) ? (INSN)->bytes_arr : (INSN)->bytes_ptr)

/// Calculate the offset of a disassembled instruction in its buffer, given its position
/// in its array of disassembled insn
Expand Down
2 changes: 1 addition & 1 deletion tests/test_m680x.c
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ static void test()
for (j = 0; j < count; j++) {
int slen;
printf("0x%04x: ", (uint16_t)insn[j].address);
print_string_hex_short(insn[j].bytes,
print_string_hex_short(CS_INSN_BYTES(&insn[j]),
insn[j].size);
printf("%.*s", 1 + ((5 - insn[j].size) * 2),
nine_spaces);
Expand Down

0 comments on commit 4ca1e2a

Please sign in to comment.