Add cs_buffer. Breaking API change.

Remove cs_malloc() and cs_free(). API change unifies disassembly process. Before, there were two separate functions to disassemble one instruction at a time in a loop and many instructions into a dynamic buffer. Commit will introduce user allocatable buffer that can be used in both situations with one function. cs_disasm_iter() is a tiny wrapper around cs_disasm(). Updating the use of cs_disasm_iter(): // old api cs_insn *insn = cs_malloc(handle); while (cs_disasm_iter(handle, &code, &code_size, &ip, insn)) { disassembled_instructions += 1; } cs_free(insn); Must be changed to: // new api cs_buffer *buffer = cs_buffer_new(1); // create buffer with 1 element while (cs_disasm_iter(handle, &code, &code_size, &ip, buffer)) { cs_insn *insn = &buffer->insn[0]; // get first insn in a buffer disassembled_instructions += 1; } cs_buffer_free(buffer); // free buffer Updating the use of cs_disasm() is straightforward, just use cs_buffer_new(0) to create a buffer and pass it to cs_disasm().
capstone-engine · May 26, 2024 · 40ca3de · 40ca3de
1 parent a8c0998
commit 40ca3de
Show file tree

Hide file tree

Showing 37 changed files with 531 additions and 568 deletions.
diff --git a/bindings/python/capstone/__init__.py b/bindings/python/capstone/__init__.py
@@ -503,6 +503,13 @@ class _cs_insn(ctypes.Structure):
         ('detail', ctypes.POINTER(_cs_detail)),
     )
 
+class _cs_buffer(ctypes.Structure):
+    _fields_ = (
+        ('insn', ctypes.POINTER(_cs_insn)),
+        ('capacity', ctypes.c_size_t),
+        ('count', ctypes.c_size_t),
+    )
+
 # callback for SKIPDATA option
 CS_SKIPDATA_CALLBACK = ctypes.CFUNCTYPE(ctypes.c_size_t, ctypes.POINTER(ctypes.c_char), ctypes.c_size_t, ctypes.c_size_t, ctypes.c_void_p)
 
@@ -525,11 +532,12 @@ def _setup_prototype(lib, fname, restype, *argtypes):
     getattr(lib, fname).argtypes = argtypes
 
 _setup_prototype(_cs, "cs_open", ctypes.c_int, ctypes.c_uint, ctypes.c_uint, ctypes.POINTER(ctypes.c_size_t))
+_setup_prototype(_cs, "cs_buffer_new", ctypes.POINTER(_cs_buffer), ctypes.c_size_t)
+_setup_prototype(_cs, "cs_buffer_free", None, ctypes.POINTER(_cs_buffer))
 _setup_prototype(_cs, "cs_disasm", ctypes.c_size_t, ctypes.c_size_t, ctypes.POINTER(ctypes.c_char), ctypes.c_size_t, \
-        ctypes.c_uint64, ctypes.c_size_t, ctypes.POINTER(ctypes.POINTER(_cs_insn)))
+        ctypes.c_uint64, ctypes.c_size_t, ctypes.POINTER(_cs_buffer))
 _setup_prototype(_cs, "cs_disasm_iter", ctypes.c_bool, ctypes.c_size_t, ctypes.POINTER(ctypes.POINTER(ctypes.c_char)), ctypes.POINTER(ctypes.c_size_t), \
-                 ctypes.POINTER(ctypes.c_uint64), ctypes.POINTER(_cs_insn))
-_setup_prototype(_cs, "cs_free", None, ctypes.c_void_p, ctypes.c_size_t)
+                         ctypes.POINTER(ctypes.c_uint64), ctypes.POINTER(_cs_buffer))
 _setup_prototype(_cs, "cs_close", ctypes.c_int, ctypes.POINTER(ctypes.c_size_t))
 _setup_prototype(_cs, "cs_reg_name", ctypes.c_char_p, ctypes.c_size_t, ctypes.c_uint)
 _setup_prototype(_cs, "cs_insn_name", ctypes.c_char_p, ctypes.c_size_t, ctypes.c_uint)
@@ -599,20 +607,21 @@ def cs_disasm_quick(arch, mode, code, offset, count=0):
     if status != CS_ERR_OK:
         raise CsError(status)
 
-    all_insn = ctypes.POINTER(_cs_insn)()
-    res = _cs.cs_disasm(csh, code, len(code), offset, count, ctypes.byref(all_insn))
-    if res > 0:
-        try:
+    buffer = _cs.cs_buffer_new(0)
+    try:
+        res = _cs.cs_disasm(csh, code, len(code), offset, count, buffer)
+        all_insn = buffer.contents.insn
+        if res > 0:
             for i in range(res):
                 yield CsInsn(_dummy_cs(csh, arch), all_insn[i])
-        finally:
-            _cs.cs_free(all_insn, res)
-    else:
-        status = _cs.cs_errno(csh)
-        if status != CS_ERR_OK:
-            raise CsError(status)
-        return
-        yield
+        else:
+            status = _cs.cs_errno(csh)
+            if status != CS_ERR_OK:
+                raise CsError(status)
+            return
+            yield
+    finally:
+        _cs.cs_buffer_free(buffer)
 
     status = _cs.cs_close(ctypes.byref(csh))
     if status != CS_ERR_OK:
@@ -639,21 +648,22 @@ def cs_disasm_lite(arch, mode, code, offset, count=0):
     if status != CS_ERR_OK:
         raise CsError(status)
 
-    all_insn = ctypes.POINTER(_cs_insn)()
-    res = _cs.cs_disasm(csh, code, len(code), offset, count, ctypes.byref(all_insn))
-    if res > 0:
-        try:
+    buffer = _cs.cs_buffer_new(0)
+    res = _cs.cs_disasm(csh, code, len(code), offset, count, buffer)
+    all_insn = buffer.contents.insn
+    try:
+        if res > 0:
             for i in range(res):
                 insn = all_insn[i]
                 yield (insn.address, insn.size, insn.mnemonic.decode('ascii'), insn.op_str.decode('ascii'))
-        finally:
-            _cs.cs_free(all_insn, res)
-    else:
-        status = _cs.cs_errno(csh)
-        if status != CS_ERR_OK:
-            raise CsError(status)
-        return
-        yield
+        else:
+            status = _cs.cs_errno(csh)
+            if status != CS_ERR_OK:
+                raise CsError(status)
+            return
+            yield
+    finally:
+        _cs.cs_buffer_free(buffer)
 
     status = _cs.cs_close(ctypes.byref(csh))
     if status != CS_ERR_OK:
@@ -1214,7 +1224,6 @@ def group_name(self, group_id, default=None):
 
     # Disassemble binary & return disassembled instructions in CsInsn objects
     def disasm(self, code, offset, count=0):
-        all_insn = ctypes.POINTER(_cs_insn)()
         '''if not _python2:
             print(code)
             code = code.encode()
@@ -1226,19 +1235,21 @@ def disasm(self, code, offset, count=0):
             code = ctypes.byref(ctypes.c_char.from_buffer(view))
         elif not isinstance(code, bytes):
             code = view.tobytes()
-        res = _cs.cs_disasm(self.csh, code, size, offset, count, ctypes.byref(all_insn))
-        if res > 0:
-            try:
+        buffer = _cs.cs_buffer_new(0)
+        res = _cs.cs_disasm(self.csh, code, size, offset, count, buffer)
+        all_insn = buffer.contents.insn
+        try:
+            if res > 0:
                 for i in range(res):
                     yield CsInsn(self, all_insn[i])
-            finally:
-                _cs.cs_free(all_insn, res)
-        else:
-            status = _cs.cs_errno(self.csh)
-            if status != CS_ERR_OK:
-                raise CsError(status)
-            return
-            yield
+            else:
+                status = _cs.cs_errno(self.csh)
+                if status != CS_ERR_OK:
+                    raise CsError(status)
+                return
+                yield
+        finally:
+            _cs.cs_buffer_free(buffer)
 
     # This function matches the cs_disasm_iter implementation which
     # *should* be much faster via the C API due to pre-allocating
@@ -1264,8 +1275,13 @@ def disasm_iter(self, code, offset):
         # the typical auto conversion, so we have to cast it here.
         code = ctypes.cast(code, ctypes.POINTER(ctypes.c_char))
         address = ctypes.c_uint64(offset)
-        while _cs.cs_disasm_iter(self.csh, ctypes.byref(code), ctypes.byref(size), ctypes.byref(address), ctypes.byref(insn)):
-            yield (insn.address, insn.size, insn.mnemonic.decode('ascii'), insn.op_str.decode('ascii'))
+        buffer = _cs.cs_buffer_new(0)
+        try:
+            while _cs.cs_disasm_iter(self.csh, ctypes.byref(code), ctypes.byref(size), ctypes.byref(address), buffer):
+                insn = buffer.contents.insn[0]
+                yield (insn.address, insn.size, insn.mnemonic.decode('ascii'), insn.op_str.decode('ascii'))
+        finally:
+            _cs.cs_buffer_free(buffer)
 
     # Light function to disassemble binary. This is about 20% faster than disasm() because
     # unlike disasm(), disasm_lite() only return tuples of (address, size, mnemonic, op_str),
@@ -1275,28 +1291,29 @@ def disasm_lite(self, code, offset, count=0):
             # Diet engine cannot provide @mnemonic & @op_str
             raise CsError(CS_ERR_DIET)
 
-        all_insn = ctypes.POINTER(_cs_insn)()
         size = len(code)
         # Pass a bytearray by reference
         view = memoryview(code)
         if not view.readonly:
             code = ctypes.byref(ctypes.c_char.from_buffer(view))
         elif not isinstance(code, bytes):
             code = view.tobytes()
-        res = _cs.cs_disasm(self.csh, code, size, offset, count, ctypes.byref(all_insn))
-        if res > 0:
-            try:
+        buffer = _cs.cs_buffer_new(0)
+        res = _cs.cs_disasm(self.csh, code, size, offset, count, buffer)
+        all_insn = buffer.contents.insn
+        try:
+            if res > 0:
                 for i in range(res):
                     insn = all_insn[i]
                     yield (insn.address, insn.size, insn.mnemonic.decode('ascii'), insn.op_str.decode('ascii'))
-            finally:
-                _cs.cs_free(all_insn, res)
-        else:
-            status = _cs.cs_errno(self.csh)
-            if status != CS_ERR_OK:
-                raise CsError(status)
-            return
-            yield
+            else:
+                status = _cs.cs_errno(self.csh)
+                if status != CS_ERR_OK:
+                    raise CsError(status)
+                return
+                yield
+        finally:
+            _cs.cs_buffer_free(buffer)
 
 
 # print out debugging info

diff --git a/bindings/python/pyx/ccapstone.pxd b/bindings/python/pyx/ccapstone.pxd
@@ -28,6 +28,11 @@ cdef extern from "<capstone/capstone.h>":
         bool usesAliasDetails;
         cs_detail *detail
 
+    ctypedef struct cs_buffer:
+        cs_insn *insn
+        size_t capacity;
+        size_t count;
+
     ctypedef enum cs_err:
         pass
 
@@ -44,11 +49,21 @@ cdef extern from "<capstone/capstone.h>":
 
     cs_err cs_errno(csh handle)
 
+    cs_buffer * cs_buffer_new(size_t capacity)
+
+    void cs_buffer_free(cs_buffer *buffer)
+
+    void cs_buffer_clear(cs_buffer *buffer)
+
+    bool cs_buffer_reserve_exact(cs_buffer *buffer, size_t capacity)
+
+    bool cs_buffer_reserve(cs_buffer *buffer, size_t additional)
+
     size_t cs_disasm(csh handle,
         const uint8_t *code, size_t code_size,
         uint64_t address,
         size_t count,
-        cs_insn **insn)
+        cs_buffer *buffer)
 
     cs_err cs_option(csh handle, cs_opt_type type, size_t value)
 

diff --git a/bindings/python/pyx/ccapstone.pyx b/bindings/python/pyx/ccapstone.pyx
@@ -311,11 +311,11 @@ cdef class Cs(object):
 
     # Disassemble binary & return disassembled instructions in CsInsn objects
     def disasm(self, code, addr, count=0):
-        cdef cc.cs_insn *allinsn
-
-        cdef res = cc.cs_disasm(self._csh, code, len(code), addr, count, &allinsn)
+        buffer = cc.cs_buffer_new(0)
+        cdef res = cc.cs_disasm(self._csh, code, len(code), addr, count, buffer)
         detail = self._cs.detail
         arch = self._cs.arch
+        allinsn = buffer.insn
 
         try:
             for i from 0 <= i < res:
@@ -328,28 +328,28 @@ cdef class Cs(object):
                 dummy._csh = self._csh
                 yield dummy
         finally:
-            cc.cs_free(allinsn, res)
+            cc.cs_buffer_free(buffer)
 
 
     # Light function to disassemble binary. This is about 20% faster than disasm() because
     # unlike disasm(), disasm_lite() only return tuples of (address, size, mnemonic, op_str),
     # rather than CsInsn objects.
     def disasm_lite(self, code, addr, count=0):
         # TODO: don't need detail, so we might turn off detail, then turn on again when done
-        cdef cc.cs_insn *allinsn
 
         if _diet:
             # Diet engine cannot provide @mnemonic & @op_str
             raise CsError(capstone.CS_ERR_DIET)
 
-        cdef res = cc.cs_disasm(self._csh, code, len(code), addr, count, &allinsn)
+        buffer = cc.cs_buffer_new(0)
+        cdef res = cc.cs_disasm(self._csh, code, len(code), addr, count, buffer)
 
         try:
             for i from 0 <= i < res:
-                insn = allinsn[i]
+                insn = buffer.insn[i]
                 yield (insn.address, insn.size, insn.mnemonic, insn.op_str)
         finally:
-            cc.cs_free(allinsn, res)
+            cc.cs_buffer_free(buffer)
 
 
 # print out debugging info