Skip to content

Commit

Permalink
Add cs_buffer. Breaking API change.
Browse files Browse the repository at this point in the history
Remove cs_malloc() and cs_free().

API change unifies disassembly process. Before, there were two separate
functions to disassemble one instruction at a time in a loop and many
instructions into a dynamic buffer. Commit will introduce user allocatable
buffer that can be used in both situations with one function.

cs_disasm_iter() is a tiny wrapper around cs_disasm().

Updating the use of cs_disasm_iter():

    // old api
    cs_insn *insn = cs_malloc(handle);
    while (cs_disasm_iter(handle, &code, &code_size, &ip, insn)) {
        disassembled_instructions += 1;
    }
    cs_free(insn);

Must be changed to:

    // new api
    cs_buffer *buffer = cs_buffer_new(1); // create buffer with 1 element
    while (cs_disasm_iter(handle, &code, &code_size, &ip, buffer)) {
        cs_insn *insn = &buffer->insn[0]; // get first insn in a buffer
        disassembled_instructions += 1;
    }
    cs_buffer_free(buffer); // free buffer

Updating the use of cs_disasm() is straightforward, just use
cs_buffer_new(0) to create a buffer and pass it to cs_disasm().
  • Loading branch information
numas13 committed May 26, 2024
1 parent a8c0998 commit 40ca3de
Show file tree
Hide file tree
Showing 37 changed files with 531 additions and 568 deletions.
123 changes: 70 additions & 53 deletions bindings/python/capstone/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,13 @@ class _cs_insn(ctypes.Structure):
('detail', ctypes.POINTER(_cs_detail)),
)

class _cs_buffer(ctypes.Structure):
_fields_ = (
('insn', ctypes.POINTER(_cs_insn)),
('capacity', ctypes.c_size_t),
('count', ctypes.c_size_t),
)

# callback for SKIPDATA option
CS_SKIPDATA_CALLBACK = ctypes.CFUNCTYPE(ctypes.c_size_t, ctypes.POINTER(ctypes.c_char), ctypes.c_size_t, ctypes.c_size_t, ctypes.c_void_p)

Expand All @@ -525,11 +532,12 @@ def _setup_prototype(lib, fname, restype, *argtypes):
getattr(lib, fname).argtypes = argtypes

_setup_prototype(_cs, "cs_open", ctypes.c_int, ctypes.c_uint, ctypes.c_uint, ctypes.POINTER(ctypes.c_size_t))
_setup_prototype(_cs, "cs_buffer_new", ctypes.POINTER(_cs_buffer), ctypes.c_size_t)
_setup_prototype(_cs, "cs_buffer_free", None, ctypes.POINTER(_cs_buffer))
_setup_prototype(_cs, "cs_disasm", ctypes.c_size_t, ctypes.c_size_t, ctypes.POINTER(ctypes.c_char), ctypes.c_size_t, \
ctypes.c_uint64, ctypes.c_size_t, ctypes.POINTER(ctypes.POINTER(_cs_insn)))
ctypes.c_uint64, ctypes.c_size_t, ctypes.POINTER(_cs_buffer))
_setup_prototype(_cs, "cs_disasm_iter", ctypes.c_bool, ctypes.c_size_t, ctypes.POINTER(ctypes.POINTER(ctypes.c_char)), ctypes.POINTER(ctypes.c_size_t), \
ctypes.POINTER(ctypes.c_uint64), ctypes.POINTER(_cs_insn))
_setup_prototype(_cs, "cs_free", None, ctypes.c_void_p, ctypes.c_size_t)
ctypes.POINTER(ctypes.c_uint64), ctypes.POINTER(_cs_buffer))
_setup_prototype(_cs, "cs_close", ctypes.c_int, ctypes.POINTER(ctypes.c_size_t))
_setup_prototype(_cs, "cs_reg_name", ctypes.c_char_p, ctypes.c_size_t, ctypes.c_uint)
_setup_prototype(_cs, "cs_insn_name", ctypes.c_char_p, ctypes.c_size_t, ctypes.c_uint)
Expand Down Expand Up @@ -599,20 +607,21 @@ def cs_disasm_quick(arch, mode, code, offset, count=0):
if status != CS_ERR_OK:
raise CsError(status)

all_insn = ctypes.POINTER(_cs_insn)()
res = _cs.cs_disasm(csh, code, len(code), offset, count, ctypes.byref(all_insn))
if res > 0:
try:
buffer = _cs.cs_buffer_new(0)
try:
res = _cs.cs_disasm(csh, code, len(code), offset, count, buffer)
all_insn = buffer.contents.insn
if res > 0:
for i in range(res):
yield CsInsn(_dummy_cs(csh, arch), all_insn[i])
finally:
_cs.cs_free(all_insn, res)
else:
status = _cs.cs_errno(csh)
if status != CS_ERR_OK:
raise CsError(status)
return
yield
else:
status = _cs.cs_errno(csh)
if status != CS_ERR_OK:
raise CsError(status)
return
yield
finally:
_cs.cs_buffer_free(buffer)

status = _cs.cs_close(ctypes.byref(csh))
if status != CS_ERR_OK:
Expand All @@ -639,21 +648,22 @@ def cs_disasm_lite(arch, mode, code, offset, count=0):
if status != CS_ERR_OK:
raise CsError(status)

all_insn = ctypes.POINTER(_cs_insn)()
res = _cs.cs_disasm(csh, code, len(code), offset, count, ctypes.byref(all_insn))
if res > 0:
try:
buffer = _cs.cs_buffer_new(0)
res = _cs.cs_disasm(csh, code, len(code), offset, count, buffer)
all_insn = buffer.contents.insn
try:
if res > 0:
for i in range(res):
insn = all_insn[i]
yield (insn.address, insn.size, insn.mnemonic.decode('ascii'), insn.op_str.decode('ascii'))
finally:
_cs.cs_free(all_insn, res)
else:
status = _cs.cs_errno(csh)
if status != CS_ERR_OK:
raise CsError(status)
return
yield
else:
status = _cs.cs_errno(csh)
if status != CS_ERR_OK:
raise CsError(status)
return
yield
finally:
_cs.cs_buffer_free(buffer)

status = _cs.cs_close(ctypes.byref(csh))
if status != CS_ERR_OK:
Expand Down Expand Up @@ -1214,7 +1224,6 @@ def group_name(self, group_id, default=None):

# Disassemble binary & return disassembled instructions in CsInsn objects
def disasm(self, code, offset, count=0):
all_insn = ctypes.POINTER(_cs_insn)()
'''if not _python2:
print(code)
code = code.encode()
Expand All @@ -1226,19 +1235,21 @@ def disasm(self, code, offset, count=0):
code = ctypes.byref(ctypes.c_char.from_buffer(view))
elif not isinstance(code, bytes):
code = view.tobytes()
res = _cs.cs_disasm(self.csh, code, size, offset, count, ctypes.byref(all_insn))
if res > 0:
try:
buffer = _cs.cs_buffer_new(0)
res = _cs.cs_disasm(self.csh, code, size, offset, count, buffer)
all_insn = buffer.contents.insn
try:
if res > 0:
for i in range(res):
yield CsInsn(self, all_insn[i])
finally:
_cs.cs_free(all_insn, res)
else:
status = _cs.cs_errno(self.csh)
if status != CS_ERR_OK:
raise CsError(status)
return
yield
else:
status = _cs.cs_errno(self.csh)
if status != CS_ERR_OK:
raise CsError(status)
return
yield
finally:
_cs.cs_buffer_free(buffer)

# This function matches the cs_disasm_iter implementation which
# *should* be much faster via the C API due to pre-allocating
Expand All @@ -1264,8 +1275,13 @@ def disasm_iter(self, code, offset):
# the typical auto conversion, so we have to cast it here.
code = ctypes.cast(code, ctypes.POINTER(ctypes.c_char))
address = ctypes.c_uint64(offset)
while _cs.cs_disasm_iter(self.csh, ctypes.byref(code), ctypes.byref(size), ctypes.byref(address), ctypes.byref(insn)):
yield (insn.address, insn.size, insn.mnemonic.decode('ascii'), insn.op_str.decode('ascii'))
buffer = _cs.cs_buffer_new(0)
try:
while _cs.cs_disasm_iter(self.csh, ctypes.byref(code), ctypes.byref(size), ctypes.byref(address), buffer):
insn = buffer.contents.insn[0]
yield (insn.address, insn.size, insn.mnemonic.decode('ascii'), insn.op_str.decode('ascii'))
finally:
_cs.cs_buffer_free(buffer)

# Light function to disassemble binary. This is about 20% faster than disasm() because
# unlike disasm(), disasm_lite() only return tuples of (address, size, mnemonic, op_str),
Expand All @@ -1275,28 +1291,29 @@ def disasm_lite(self, code, offset, count=0):
# Diet engine cannot provide @mnemonic & @op_str
raise CsError(CS_ERR_DIET)

all_insn = ctypes.POINTER(_cs_insn)()
size = len(code)
# Pass a bytearray by reference
view = memoryview(code)
if not view.readonly:
code = ctypes.byref(ctypes.c_char.from_buffer(view))
elif not isinstance(code, bytes):
code = view.tobytes()
res = _cs.cs_disasm(self.csh, code, size, offset, count, ctypes.byref(all_insn))
if res > 0:
try:
buffer = _cs.cs_buffer_new(0)
res = _cs.cs_disasm(self.csh, code, size, offset, count, buffer)
all_insn = buffer.contents.insn
try:
if res > 0:
for i in range(res):
insn = all_insn[i]
yield (insn.address, insn.size, insn.mnemonic.decode('ascii'), insn.op_str.decode('ascii'))
finally:
_cs.cs_free(all_insn, res)
else:
status = _cs.cs_errno(self.csh)
if status != CS_ERR_OK:
raise CsError(status)
return
yield
else:
status = _cs.cs_errno(self.csh)
if status != CS_ERR_OK:
raise CsError(status)
return
yield
finally:
_cs.cs_buffer_free(buffer)


# print out debugging info
Expand Down
17 changes: 16 additions & 1 deletion bindings/python/pyx/ccapstone.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ cdef extern from "<capstone/capstone.h>":
bool usesAliasDetails;
cs_detail *detail

ctypedef struct cs_buffer:
cs_insn *insn
size_t capacity;
size_t count;

ctypedef enum cs_err:
pass

Expand All @@ -44,11 +49,21 @@ cdef extern from "<capstone/capstone.h>":

cs_err cs_errno(csh handle)

cs_buffer * cs_buffer_new(size_t capacity)

void cs_buffer_free(cs_buffer *buffer)

void cs_buffer_clear(cs_buffer *buffer)

bool cs_buffer_reserve_exact(cs_buffer *buffer, size_t capacity)

bool cs_buffer_reserve(cs_buffer *buffer, size_t additional)

size_t cs_disasm(csh handle,
const uint8_t *code, size_t code_size,
uint64_t address,
size_t count,
cs_insn **insn)
cs_buffer *buffer)

cs_err cs_option(csh handle, cs_opt_type type, size_t value)

Expand Down
16 changes: 8 additions & 8 deletions bindings/python/pyx/ccapstone.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -311,11 +311,11 @@ cdef class Cs(object):

# Disassemble binary & return disassembled instructions in CsInsn objects
def disasm(self, code, addr, count=0):
cdef cc.cs_insn *allinsn

cdef res = cc.cs_disasm(self._csh, code, len(code), addr, count, &allinsn)
buffer = cc.cs_buffer_new(0)
cdef res = cc.cs_disasm(self._csh, code, len(code), addr, count, buffer)
detail = self._cs.detail
arch = self._cs.arch
allinsn = buffer.insn

try:
for i from 0 <= i < res:
Expand All @@ -328,28 +328,28 @@ cdef class Cs(object):
dummy._csh = self._csh
yield dummy
finally:
cc.cs_free(allinsn, res)
cc.cs_buffer_free(buffer)


# Light function to disassemble binary. This is about 20% faster than disasm() because
# unlike disasm(), disasm_lite() only return tuples of (address, size, mnemonic, op_str),
# rather than CsInsn objects.
def disasm_lite(self, code, addr, count=0):
# TODO: don't need detail, so we might turn off detail, then turn on again when done
cdef cc.cs_insn *allinsn

if _diet:
# Diet engine cannot provide @mnemonic & @op_str
raise CsError(capstone.CS_ERR_DIET)

cdef res = cc.cs_disasm(self._csh, code, len(code), addr, count, &allinsn)
buffer = cc.cs_buffer_new(0)
cdef res = cc.cs_disasm(self._csh, code, len(code), addr, count, buffer)

try:
for i from 0 <= i < res:
insn = allinsn[i]
insn = buffer.insn[i]
yield (insn.address, insn.size, insn.mnemonic, insn.op_str)
finally:
cc.cs_free(allinsn, res)
cc.cs_buffer_free(buffer)


# print out debugging info
Expand Down
Loading

0 comments on commit 40ca3de

Please sign in to comment.