diff --git a/Mapping.c b/Mapping.c index e21b7de30d..ca74082650 100644 --- a/Mapping.c +++ b/Mapping.c @@ -219,6 +219,18 @@ void map_groups(MCInst *MI, const insn_map *imap) #endif // CAPSTONE_DIET } +/// Copies the opcode encoding info from @imap to @MI->flat_insn. +void map_opcode_encoding(MCInst *MI, const insn_map *imap) +{ +#ifndef CAPSTONE_DIET + if (!MI->flat_insn->detail) + return; + + unsigned Opcode = MCInst_getOpcode(MI); + MI->flat_insn->detail->opcode_encoding = imap[Opcode].opcode_encoding; +#endif +} + // Search for the CS instruction id for the given @MC_Opcode in @imap. // return -1 if none is found. unsigned int find_cs_id(unsigned MC_Opcode, const insn_map *imap, @@ -298,6 +310,18 @@ const cs_ac_type mapping_get_op_access(MCInst *MI, unsigned OpNum, return access; } +const cs_operand_encoding +mapping_get_op_encoding(MCInst *MI, unsigned OpNum, + const map_insn_ops *insn_ops_map, size_t map_size) +{ + assert(MI); + assert(MI->Opcode < map_size); + assert(OpNum < sizeof(insn_ops_map[MI->Opcode].ops) / + sizeof(insn_ops_map[MI->Opcode].ops[0])); + + return insn_ops_map[MI->Opcode].ops[OpNum].encoding; +} + /// Returns the operand at detail->arch.operands[op_count + offset] /// Or NULL if detail is not set. #define DEFINE_get_detail_op(arch, ARCH) \ diff --git a/Mapping.h b/Mapping.h index 1bed623d0b..f2d98296c4 100644 --- a/Mapping.h +++ b/Mapping.h @@ -31,6 +31,7 @@ typedef struct insn_map { union { ppc_suppl_info ppc; } suppl_info; // Supplementary information for each instruction. + cs_opcode_encoding opcode_encoding; // the opcode encoding info of the instruction #endif } insn_map; @@ -51,6 +52,7 @@ typedef struct { uint8_t /* cs_data_type */ dtypes[MAX_NO_DATA_TYPES]; ///< List of op types. Terminated by ///< CS_DATA_TYPE_LAST + cs_operand_encoding encoding; ///< The encoding of the operand } mapping_op; #define MAX_NO_INSN_MAP_OPS 16 @@ -71,6 +73,10 @@ const cs_ac_type mapping_get_op_access(MCInst *MI, unsigned OpNum, const map_insn_ops *insn_ops_map, size_t map_size); +const cs_operand_encoding +mapping_get_op_encoding(MCInst *MI, unsigned OpNum, + const map_insn_ops *insn_ops_map, size_t map_size); + /// Macro for easier access of operand types from the map. /// Assumes the istruction operands map is called "insn_operands" /// Only usable by `auto-sync` archs! @@ -86,6 +92,10 @@ const cs_ac_type mapping_get_op_access(MCInst *MI, unsigned OpNum, sizeof(insn_operands) / \ sizeof(insn_operands[0])) +#define map_get_op_encoding(MI, OpNum) \ + mapping_get_op_encoding(MI, OpNum, insn_operands, \ + sizeof(insn_operands) / sizeof(insn_operands[0])) + ///< Map for ids to their string typedef struct name_map { unsigned int id; @@ -112,6 +122,8 @@ void add_group(MCInst *MI, unsigned /* arch_group */ group); void map_groups(MCInst *MI, const insn_map *imap); +void map_opcode_encoding(MCInst *MI, const insn_map *imap); + void map_cs_id(MCInst *MI, const insn_map *imap, unsigned int imap_size); #define DECL_get_detail_op(arch, ARCH) \ @@ -206,4 +218,4 @@ bool map_use_alias_details(const MCInst *MI); void map_set_alias_id(MCInst *MI, const SStream *O, const name_map *alias_mnem_id_map, int map_size); -#endif // CS_MAPPING_H \ No newline at end of file +#endif // CS_MAPPING_H diff --git a/arch/ARM/ARMMapping.c b/arch/ARM/ARMMapping.c index 24f044d353..106df88333 100644 --- a/arch/ARM/ARMMapping.c +++ b/arch/ARM/ARMMapping.c @@ -2,13 +2,14 @@ /* By Nguyen Anh Quynh , 2013-2019 */ /* Rot127 , 2022-2023 */ +#include #ifdef CAPSTONE_HAS_ARM #include #include -#include "capstone/arm.h" #include "capstone/capstone.h" +#include "capstone/arm.h" #include "../../Mapping.h" #include "../../MCDisassembler.h" @@ -574,6 +575,28 @@ static void ARM_post_index_detection(MCInst *MI) ARM_dec_op_count(MI); } +/// Gets the encoding of the register in the reglist. +/// Most of the times it's entirely based on the bit position of the register in the reglist since for example +/// bit 3 represents r3 and bit 4 r4 and so on. But there are some exceptions (for thumb instructions mainly) +static void ARM_add_reglist_reg_encoding(MCInst *MI, arm_reg reg) +{ + cs_operand_encoding *encoding = &ARM_get_detail_op(MI, 0)->encoding; + encoding->operand_pieces_count = 1; + encoding->sizes[0] = 1; + unsigned Opcode = MCInst_getOpcode(MI); + // Since r0-r12 are placed next to each other on the arm_reg enum we subtract + // value of r0 to get the respective bit num of the register. + // lr, pc and sp are exceptions + unsigned RegBitNum = reg == ARM_REG_R13 ? 13 : + reg >= ARM_REG_R0 ? reg - ARM_REG_R0 : + reg + 1; + if ((Opcode == ARM_tPOP && RegBitNum == 15) || + (Opcode == ARM_tPUSH && RegBitNum == 14)) + encoding->indexes[0] = 8; + else + encoding->indexes[0] = RegBitNum; +} + /// Decodes the asm string for a given instruction /// and fills the detail information about the instruction and its operands. void ARM_printer(MCInst *MI, SStream *O, void * /* MCRegisterInfo* */ info) @@ -721,6 +744,7 @@ void ARM_set_instr_map_data(MCInst *MI) map_implicit_writes(MI, arm_insns); ARM_check_updates_flags(MI); map_groups(MI, arm_insns); + map_opcode_encoding(MI, arm_insns); } bool ARM_getInstruction(csh handle, const uint8_t *code, size_t code_len, @@ -973,6 +997,7 @@ static void add_cs_detail_general(MCInst *MI, arm_op_group op_group, if ((unsigned)CC == 15 && op_group == ARM_OP_GROUP_PredicateOperand) { ARM_get_detail(MI)->cc = ARMCC_UNDEF; + return; } if (CC == ARMCC_HS && @@ -1028,19 +1053,37 @@ static void add_cs_detail_general(MCInst *MI, arm_op_group op_group, ARM_set_detail_op_imm(MI, OpNum, ARM_OP_CIMM, MCInst_getOpVal(MI, OpNum)); break; - case ARM_OP_GROUP_AddrMode6Operand: + case ARM_OP_GROUP_AddrMode6Operand: { if (!doing_mem(MI)) ARM_set_mem_access(MI, true); ARM_set_detail_op_mem(MI, OpNum, false, 0, 0, MCInst_getOpVal(MI, OpNum)); - ARM_get_detail_op(MI, 0)->mem.align = - MCInst_getOpVal(MI, OpNum + 1) << 3; + if (MCInst_getOpVal(MI, OpNum + 1)) { + ARM_get_detail_op(MI, 0)->mem.align = + MCInst_getOpVal(MI, OpNum + 1) << 3; + } + // Adding the operand that might be missing, if not missing this whole + // thing will be overriden anyways + cs_operand_encoding *encoding = &ARM_get_detail_op(MI, 0)->encoding; + encoding->indexes[encoding->operand_pieces_count] = 0; + encoding->sizes[encoding->operand_pieces_count++] = 4; + ARM_get_detail_op(MI, 0)->mem.format = ARM_MEM_FMT_REG_ALIGN_REG; ARM_set_mem_access(MI, false); break; + } case ARM_OP_GROUP_AddrMode6OffsetOperand: { arm_reg reg = MCInst_getOpVal(MI, OpNum); if (reg != 0) { - ARM_set_detail_op_mem_offset(MI, OpNum, reg, false); + ARM_set_detail_op_mem_offset(MI, OpNum, reg, false, + ARM_MEM_FMT_REG_ALIGN_REG); + } + // Align & index register are treated as one due to an issue with the + // .td files so this should fix it + cs_operand_encoding *encoding = &ARM_get_detail_op(MI, -1)->encoding; + if (encoding->sizes[encoding->operand_pieces_count - 1] != 4) { + encoding->sizes[encoding->operand_pieces_count - 1] -= 4; + encoding->indexes[encoding->operand_pieces_count] = 0; + encoding->sizes[encoding->operand_pieces_count++] = 4; } break; } @@ -1049,6 +1092,7 @@ static void add_cs_detail_general(MCInst *MI, arm_op_group op_group, ARM_set_mem_access(MI, true); ARM_set_detail_op_mem(MI, OpNum, false, 0, 0, MCInst_getOpVal(MI, OpNum)); + ARM_get_detail_op(MI, 0)->mem.format = ARM_MEM_FMT_REG; ARM_set_mem_access(MI, false); break; case ARM_OP_GROUP_SBitModifierOperand: { @@ -1140,14 +1184,13 @@ static void add_cs_detail_general(MCInst *MI, arm_op_group op_group, // But the MappingInsnOps.inc has only a single entry for the whole // list. So all registers in the list share those attributes. unsigned access = map_get_op_access(MI, OpNum); - for (unsigned i = OpNum, e = MCInst_getNumOperands(MI); i != e; - ++i) { - unsigned Reg = - MCOperand_getReg(MCInst_getOperand(MI, i)); + for (unsigned i = OpNum, e = MCInst_getNumOperands(MI); i != e; ++i) { + unsigned Reg = MCOperand_getReg(MCInst_getOperand(MI, i)); ARM_get_detail_op(MI, 0)->type = ARM_OP_REG; ARM_get_detail_op(MI, 0)->reg = Reg; ARM_get_detail_op(MI, 0)->access = access; + ARM_add_reglist_reg_encoding(MI, Reg); ARM_inc_op_count(MI); } break; @@ -1213,7 +1256,7 @@ static void add_cs_detail_general(MCInst *MI, arm_op_group op_group, if (TheReg && MClassSysReg_isInRequiredFeatures( TheReg, ARM_FeatureDSP)) { ARM_set_detail_op_sysop( - MI, TheReg->sysreg.mclasssysreg, + MI, OpNum, TheReg->sysreg.mclasssysreg, ARM_OP_SYSREG, IsOutReg, Mask, SYSm); return; @@ -1228,7 +1271,7 @@ static void add_cs_detail_general(MCInst *MI, arm_op_group op_group, SYSm); if (TheReg) { ARM_set_detail_op_sysop( - MI, TheReg->sysreg.mclasssysreg, + MI, OpNum, TheReg->sysreg.mclasssysreg, ARM_OP_SYSREG, IsOutReg, Mask, SYSm); return; @@ -1239,7 +1282,7 @@ static void add_cs_detail_general(MCInst *MI, arm_op_group op_group, SYSm); if (TheReg) { ARM_set_detail_op_sysop( - MI, TheReg->sysreg.mclasssysreg, + MI, OpNum, TheReg->sysreg.mclasssysreg, ARM_OP_SYSREG, IsOutReg, Mask, SYSm); return; } @@ -1247,7 +1290,7 @@ static void add_cs_detail_general(MCInst *MI, arm_op_group op_group, if (detail_is_set(MI)) MCOperand_CreateImm0(MI, SYSm); - ARM_set_detail_op_sysop(MI, SYSm, ARM_OP_SYSREG, + ARM_set_detail_op_sysop(MI, OpNum, SYSm, ARM_OP_SYSREG, IsOutReg, Mask, SYSm); return; @@ -1258,20 +1301,20 @@ static void add_cs_detail_general(MCInst *MI, arm_op_group op_group, default: assert(0 && "Unexpected mask value!"); case 4: - ARM_set_detail_op_sysop(MI, + ARM_set_detail_op_sysop(MI, OpNum, ARM_MCLASSSYSREG_APSR_G, ARM_OP_SYSREG, IsOutReg, Mask, UINT16_MAX); return; case 8: ARM_set_detail_op_sysop( - MI, ARM_MCLASSSYSREG_APSR_NZCVQ, + MI, OpNum, ARM_MCLASSSYSREG_APSR_NZCVQ, ARM_OP_SYSREG, IsOutReg, Mask, UINT16_MAX); return; case 12: ARM_set_detail_op_sysop( - MI, ARM_MCLASSSYSREG_APSR_NZCVQG, + MI, OpNum, ARM_MCLASSSYSREG_APSR_NZCVQG, ARM_OP_SYSREG, IsOutReg, Mask, UINT16_MAX); return; @@ -1293,7 +1336,7 @@ static void add_cs_detail_general(MCInst *MI, arm_op_group op_group, field += SpecRegRBit ? ARM_FIELD_SPSR_C : ARM_FIELD_CPSR_C; - ARM_set_detail_op_sysop(MI, field, + ARM_set_detail_op_sysop(MI, OpNum, field, SpecRegRBit ? ARM_OP_SPSR : ARM_OP_CPSR, IsOutReg, Mask, UINT16_MAX); @@ -1369,22 +1412,24 @@ static void add_cs_detail_general(MCInst *MI, arm_op_group op_group, break; case ARM_OP_GROUP_PostIdxRegOperand: { bool sub = MCInst_getOpVal(MI, OpNum + 1) ? false : true; - ARM_set_detail_op_mem_offset(MI, OpNum, - MCInst_getOpVal(MI, OpNum), sub); + ARM_set_detail_op_mem_offset(MI, OpNum, MCInst_getOpVal(MI, OpNum), sub, + ARM_MEM_FMT_U_REG_REG); ARM_get_detail(MI)->post_index = true; break; } case ARM_OP_GROUP_PostIdxImm8Operand: { unsigned Imm = MCInst_getOpVal(MI, OpNum); bool sub = !(Imm & 256); - ARM_set_detail_op_mem_offset(MI, OpNum, (Imm & 0xff), sub); + ARM_set_detail_op_mem_offset(MI, OpNum, (Imm & 0xff), sub, + ARM_MEM_FMT_U_REG_IMM2); ARM_get_detail(MI)->post_index = true; break; } case ARM_OP_GROUP_PostIdxImm8s4Operand: { unsigned Imm = MCInst_getOpVal(MI, OpNum); bool sub = !(Imm & 256); - ARM_set_detail_op_mem_offset(MI, OpNum, (Imm & 0xff) << 2, sub); + ARM_set_detail_op_mem_offset(MI, OpNum, (Imm & 0xff) << 2, sub, + ARM_MEM_FMT_U_REG_IMM); ARM_get_detail(MI)->post_index = true; break; } @@ -1394,7 +1439,8 @@ static void add_cs_detail_general(MCInst *MI, arm_op_group op_group, ARM_set_detail_op_mem(MI, OpNum, false, 0, 0, MCInst_getOpVal(MI, OpNum)); ARM_set_detail_op_mem(MI, OpNum + 1, true, 0, 0, - MCInst_getOpVal(MI, OpNum + 1)); + MCInst_getOpVal(MI, OpNum + 1)); + ARM_get_detail_op(MI, 0)->mem.format = ARM_MEM_FMT_REG_REG; if (op_group == ARM_OP_GROUP_AddrModeTBH) { ARM_get_detail_op(MI, 0)->shift.type = ARM_SFT_LSL; ARM_get_detail_op(MI, 0)->shift.value = 1; @@ -1410,7 +1456,8 @@ static void add_cs_detail_general(MCInst *MI, arm_op_group op_group, ARM_set_mem_access(MI, true); ARM_set_detail_op_mem(MI, OpNum, false, 0, 0, - MCInst_getOpVal(MI, OpNum)); + MCInst_getOpVal(MI, OpNum)); + ARM_get_detail_op(MI, 0)->mem.format = ARM_MEM_FMT_U_REG_SHIFT_REG; unsigned int imm3 = MCInst_getOpVal(MI, OpNum + 2); unsigned ShOff = ARM_AM_getAM2Offset(imm3); ARM_AM_AddrOpc subtracted = ARM_AM_getAM2Op(imm3); @@ -1435,16 +1482,22 @@ static void add_cs_detail_general(MCInst *MI, arm_op_group op_group, case ARM_OP_GROUP_AddrMode2OffsetOperand: { uint64_t imm2 = MCInst_getOpVal(MI, OpNum + 1); ARM_AM_AddrOpc subtracted = ARM_AM_getAM2Op(imm2); + arm_mem_format format; if (!MCInst_getOpVal(MI, OpNum)) { + format = (map_get_op_type(MI, OpNum + 1) & ~CS_OP_MEM) == CS_OP_REG + ? ARM_MEM_FMT_U_REG_SHIFT_REG + : ARM_MEM_FMT_U_REG_IMM; ARM_set_detail_op_mem_offset(MI, OpNum + 1, - ARM_AM_getAM2Offset(imm2), - subtracted == ARM_AM_sub); + ARM_AM_getAM2Offset(imm2), + subtracted == ARM_AM_sub, format); ARM_get_detail(MI)->post_index = true; return; } - ARM_set_detail_op_mem_offset(MI, OpNum, - MCInst_getOpVal(MI, OpNum), - subtracted == ARM_AM_sub); + format = (map_get_op_type(MI, OpNum) & ~CS_OP_MEM) == CS_OP_REG + ? ARM_MEM_FMT_U_REG_SHIFT_REG + : ARM_MEM_FMT_U_REG_IMM; + ARM_set_detail_op_mem_offset(MI, OpNum, MCInst_getOpVal(MI, OpNum), + subtracted == ARM_AM_sub, format); ARM_get_detail(MI)->post_index = true; add_cs_detail_RegImmShift(MI, ARM_AM_getAM2ShiftOpc(imm2), ARM_AM_getAM2Offset(imm2)); @@ -1453,20 +1506,40 @@ static void add_cs_detail_general(MCInst *MI, arm_op_group op_group, case ARM_OP_GROUP_AddrMode3OffsetOperand: { MCOperand *MO1 = MCInst_getOperand(MI, OpNum); MCOperand *MO2 = MCInst_getOperand(MI, OpNum + 1); - ARM_AM_AddrOpc subtracted = - ARM_AM_getAM3Op(MCOperand_getImm(MO2)); - if (MCOperand_getReg(MO1)) { - ARM_set_detail_op_mem_offset(MI, OpNum, - MCInst_getOpVal(MI, OpNum), - subtracted == ARM_AM_sub); - ARM_get_detail(MI)->post_index = true; - return; + ARM_AM_AddrOpc subtracted = ARM_AM_getAM3Op(MCOperand_getImm(MO2)); + uint8_t offset = 0; + if (MCOperand_getReg(MO1)) + ARM_set_detail_op_mem_offset(MI, OpNum, MCInst_getOpVal(MI, OpNum), + subtracted == ARM_AM_sub, ARM_MEM_FMT_NONE); + else { + offset = 1; + ARM_set_detail_op_mem_offset( + MI, OpNum + 1, + ARM_AM_getAM3Offset(MCInst_getOpVal(MI, OpNum + 1)), + subtracted == ARM_AM_sub, ARM_MEM_FMT_NONE); + } + if (!doing_mem(MI)) + ARM_dec_op_count(MI); + switch (map_get_op_type(MI, OpNum + offset) & ~CS_OP_MEM) { + case CS_OP_REG: { + cs_operand_encoding *encoding = &ARM_get_detail_op(MI, 0)->encoding; + // Remove the extra operand piece that was mapped (index register + // has only one) + --(encoding->operand_pieces_count); + encoding->indexes[encoding->operand_pieces_count - 1] = + encoding->indexes[encoding->operand_pieces_count]; + encoding->sizes[encoding->operand_pieces_count - 1] = + encoding->sizes[encoding->operand_pieces_count]; + ARM_get_detail_op(MI, 0)->mem.format = ARM_MEM_FMT_U_REG_REG; + break; + } + case CS_OP_IMM: + ARM_get_detail_op(MI, 0)->mem.format = ARM_MEM_FMT_U_REG_IMM2; + break; } - ARM_set_detail_op_mem_offset( - MI, OpNum + 1, - ARM_AM_getAM3Offset(MCInst_getOpVal(MI, OpNum + 1)), - subtracted == ARM_AM_sub); ARM_get_detail(MI)->post_index = true; + if (!doing_mem(MI)) + ARM_inc_op_count(MI); break; } case ARM_OP_GROUP_ThumbAddrModeSPOperand: @@ -1502,6 +1575,10 @@ static void add_cs_detail_general(MCInst *MI, arm_op_group op_group, ARM_set_detail_op_mem(MI, OpNum + 1, false, 0, 0, ImmOffs * Scale); } + if (op_group == ARM_OP_GROUP_ThumbAddrModeSPOperand) + ARM_get_detail_op(MI, 0)->mem.format = ARM_MEM_FMT_IMM; + else + ARM_get_detail_op(MI, 0)->mem.format = ARM_MEM_FMT_IMM_REG; ARM_set_mem_access(MI, false); break; } @@ -1518,18 +1595,26 @@ static void add_cs_detail_general(MCInst *MI, arm_op_group op_group, if (RegNum) ARM_set_detail_op_mem(MI, OpNum + 1, true, 0, 0, RegNum); + ARM_get_detail_op(MI, 0)->mem.format = ARM_MEM_FMT_IREG_BREG; ARM_set_mem_access(MI, false); break; } case ARM_OP_GROUP_T2AddrModeImm8OffsetOperand: case ARM_OP_GROUP_T2AddrModeImm8s4OffsetOperand: { int32_t OffImm = MCInst_getOpVal(MI, OpNum); + // If it's only a thumb2 instruction that follows the first group in + // this case then register goes first and U flag follows + arm_mem_format format = + op_group == ARM_OP_GROUP_T2AddrModeImm8s4OffsetOperand || + get_detail(MI)->groups_count != 1 + ? ARM_MEM_FMT_U_REG_IMM + : ARM_MEM_FMT_REG_U_IMM; if (OffImm == INT32_MIN) - ARM_set_detail_op_mem_offset(MI, OpNum, 0, false); + ARM_set_detail_op_mem_offset(MI, OpNum, 0, false, format); else { bool sub = OffImm < 0; OffImm = OffImm < 0 ? OffImm * -1 : OffImm; - ARM_set_detail_op_mem_offset(MI, OpNum, OffImm, sub); + ARM_set_detail_op_mem_offset(MI, OpNum, OffImm, sub, format); } ARM_get_detail(MI)->post_index = true; break; @@ -1542,6 +1627,7 @@ static void add_cs_detail_general(MCInst *MI, arm_op_group op_group, MCInst_getOpVal(MI, OpNum)); ARM_set_detail_op_mem(MI, OpNum + 1, true, 0, 0, MCInst_getOpVal(MI, OpNum + 1)); + ARM_get_detail_op(MI, 0)->mem.format = ARM_MEM_FMT_REG_SHIFT_REG; unsigned ShAmt = MCInst_getOpVal(MI, OpNum + 2); if (ShAmt) { ARM_get_detail_op(MI, 0)->shift.type = ARM_SFT_LSL; @@ -1558,6 +1644,7 @@ static void add_cs_detail_general(MCInst *MI, arm_op_group op_group, if (Imm) ARM_set_detail_op_mem(MI, OpNum + 1, false, 0, 0, Imm * 4); + ARM_get_detail_op(MI, 0)->mem.format = ARM_MEM_FMT_REG_IMM; ARM_set_mem_access(MI, false); break; case ARM_OP_GROUP_PKHLSLShiftImm: { @@ -1646,11 +1733,16 @@ static void add_cs_detail_general(MCInst *MI, arm_op_group op_group, int32_t OffImm = MCInst_getOpVal(MI, OpNum); if (OffImm == INT32_MIN) OffImm = 0; + cs_operand_encoding *encoding = &ARM_get_detail_op(MI, 0)->encoding; + *encoding = map_get_op_encoding(MI, OpNum); ARM_get_detail_op(MI, 0)->type = ARM_OP_MEM; ARM_get_detail_op(MI, 0)->mem.base = ARM_REG_PC; ARM_get_detail_op(MI, 0)->mem.index = ARM_REG_INVALID; ARM_get_detail_op(MI, 0)->mem.scale = 1; ARM_get_detail_op(MI, 0)->mem.disp = OffImm; + ARM_get_detail_op(MI, 0)->mem.format = + encoding->operand_pieces_count == 1 ? ARM_MEM_FMT_IMM : + ARM_MEM_FMT_U_REG_IMM; ARM_get_detail_op(MI, 0)->access = CS_AC_READ; ARM_inc_op_count(MI); break; @@ -1660,7 +1752,7 @@ static void add_cs_detail_general(MCInst *MI, arm_op_group op_group, const ARMBankedReg_BankedReg *TheReg = ARMBankedReg_lookupBankedRegByEncoding(Banked); bool IsOutReg = OpNum == 0; - ARM_set_detail_op_sysop(MI, TheReg->sysreg.bankedreg, + ARM_set_detail_op_sysop(MI, OpNum, TheReg->sysreg.bankedreg, ARM_OP_BANKEDREG, IsOutReg, UINT8_MAX, TheReg->Encoding & 0xf); // Bit[4:0] are SYSm @@ -1675,6 +1767,7 @@ static void add_cs_detail_general(MCInst *MI, arm_op_group op_group, ARM_get_detail_op(MI, 0)->type = ARM_OP_SETEND; ARM_get_detail_op(MI, 0)->setend = ARM_SETEND_LE; } + ARM_get_detail_op(MI, 0)->encoding = map_get_op_encoding(MI, OpNum); ARM_inc_op_count(MI); break; } @@ -1721,7 +1814,15 @@ static void add_cs_detail_template_1(MCInst *MI, arm_op_group op_group, ARM_set_detail_op_mem(MI, OpNum + 1, false, 0, 0, Imm); if (AlwaysPrintImm0) map_add_implicit_write(MI, MCInst_getOpVal(MI, OpNum)); - + const cs_operand_encoding *encoding = + &ARM_get_detail_op(MI, 0)->encoding; + // If less than 3 operands then it's just reg and imm, otherwise if + // first operand is of size greater than 1 then it defenately isn't the + // U bit so it starts with the base register, otherwise it's U bit first + ARM_get_detail_op(MI, 0)->mem.format = + encoding->operand_pieces_count < 3 ? ARM_MEM_REG_IMM + : encoding->sizes[0] != 1 ? ARM_MEM_FMT_REG_U_IMM + : ARM_MEM_FMT_U_REG_IMM; ARM_set_mem_access(MI, false); break; } @@ -1752,9 +1853,16 @@ static void add_cs_detail_template_1(MCInst *MI, arm_op_group op_group, if (MCOperand_getReg(MO2)) { ARM_set_detail_op_mem(MI, OpNum + 1, true, 0, 0, - MCInst_getOpVal(MI, OpNum + 1)); - ARM_get_detail_op(MI, 0)->subtracted = Sign == - ARM_AM_sub; + MCInst_getOpVal(MI, OpNum + 1)); + ARM_get_detail_op(MI, 0)->subtracted = Sign == ARM_AM_sub; + cs_operand_encoding *encoding = &ARM_get_detail_op(MI, 0)->encoding; + // Since it's a register then it's only in one piece (not two), so + // we remove one + // clang-format off + --(encoding->operand_pieces_count); + encoding->indexes[encoding->operand_pieces_count - 1] = 0; + // clang-format on + ARM_get_detail_op(MI, 0)->mem.format = ARM_MEM_FMT_U_REG_REG; ARM_set_mem_access(MI, false); break; } @@ -1767,6 +1875,7 @@ static void add_cs_detail_template_1(MCInst *MI, arm_op_group op_group, ARM_get_detail_op(MI, 0)->subtracted = Sign == ARM_AM_sub; } + ARM_get_detail_op(MI, 0)->mem.format = ARM_MEM_FMT_U_REG_IMM2; ARM_set_mem_access(MI, false); break; } @@ -1784,7 +1893,9 @@ static void add_cs_detail_template_1(MCInst *MI, arm_op_group op_group, Op->mem.index = ARM_REG_INVALID; Op->mem.scale = 1; Op->mem.disp = 0; + Op->mem.format = ARM_MEM_FMT_U_REG_IMM; Op->access = CS_AC_READ; + Op->encoding = map_get_op_encoding(MI, OpNum); ARM_AM_AddrOpc SubFlag = ARM_AM_getAM5Op(MCInst_getOpVal(MI, OpNum + 1)); @@ -1815,6 +1926,7 @@ static void add_cs_detail_template_1(MCInst *MI, arm_op_group op_group, if (Shift > 0) { add_cs_detail_RegImmShift(MI, ARM_AM_uxtw, Shift); } + ARM_get_detail_op(MI, 0)->mem.format = ARM_MEM_FMT_REG_REG; ARM_set_mem_access(MI, false); break; } @@ -1973,6 +2085,7 @@ void ARM_set_detail_op_reg(MCInst *MI, unsigned OpNum, arm_reg Reg) ARM_get_detail_op(MI, 0)->type = ARM_OP_REG; ARM_get_detail_op(MI, 0)->reg = Reg; ARM_get_detail_op(MI, 0)->access = map_get_op_access(MI, OpNum); + ARM_get_detail_op(MI, 0)->encoding = map_get_op_encoding(MI, OpNum); ARM_inc_op_count(MI); } @@ -1991,12 +2104,13 @@ void ARM_set_detail_op_imm(MCInst *MI, unsigned OpNum, arm_op_type ImmType, ARM_get_detail_op(MI, 0)->type = ImmType; ARM_get_detail_op(MI, 0)->imm = Imm; ARM_get_detail_op(MI, 0)->access = map_get_op_access(MI, OpNum); + ARM_get_detail_op(MI, 0)->encoding = map_get_op_encoding(MI, OpNum); ARM_inc_op_count(MI); } /// Adds the operand as to the previously added memory operand. void ARM_set_detail_op_mem_offset(MCInst *MI, unsigned OpNum, uint64_t Val, - bool subtracted) + bool subtracted, arm_mem_format Format) { assert(map_get_op_type(MI, OpNum) & CS_OP_MEM); @@ -2013,6 +2127,7 @@ void ARM_set_detail_op_mem_offset(MCInst *MI, unsigned OpNum, uint64_t Val, else assert(0 && "Memory type incorrect."); ARM_get_detail_op(MI, 0)->subtracted = subtracted; + ARM_get_detail_op(MI, 0)->mem.format = Format; if (!doing_mem(MI)) ARM_inc_op_count(MI); @@ -2070,6 +2185,23 @@ void ARM_set_detail_op_mem(MCInst *MI, unsigned OpNum, bool is_index_reg, ARM_get_detail_op(MI, 0)->type = ARM_OP_MEM; ARM_get_detail_op(MI, 0)->access = map_get_op_access(MI, OpNum); + cs_operand_encoding *encoding = &ARM_get_detail_op(MI, 0)->encoding; + cs_operand_encoding new_encoding = map_get_op_encoding(MI, OpNum); + + // If the operand's encoding already contains some pieces and those pieces aren't + // the same as the ones we just looked up then we merge the rest of the encoding. + if (encoding->operand_pieces_count && + encoding->indexes[0] != new_encoding.indexes[0]) { + for (uint8_t i = 0; i != new_encoding.operand_pieces_count; + ++i) { + encoding->indexes[encoding->operand_pieces_count] = + new_encoding.indexes[i]; + encoding->sizes[encoding->operand_pieces_count++] = + new_encoding.sizes[i]; + } + return; + } + *encoding = new_encoding; } /// Sets the neon_lane in the previous operand to the value of @@ -2087,7 +2219,7 @@ void ARM_set_detail_op_neon_lane(MCInst *MI, unsigned OpNum) /// Adds a System Register and increments op_count by one. /// @type ARM_OP_SYSREG, ARM_OP_BANKEDREG, ARM_OP_SYSM... /// @p Mask is the MSR mask or UINT8_MAX if not set. -void ARM_set_detail_op_sysop(MCInst *MI, int Val, arm_op_type type, +void ARM_set_detail_op_sysop(MCInst *MI, uint32_t OpNum, int Val, arm_op_type type, bool IsOutReg, uint8_t Mask, uint16_t Sysm) { if (!detail_is_set(MI)) @@ -2112,6 +2244,8 @@ void ARM_set_detail_op_sysop(MCInst *MI, int Val, arm_op_type type, ARM_get_detail_op(MI, 0)->sysop.sysm = Sysm; ARM_get_detail_op(MI, 0)->sysop.msr_mask = Mask; ARM_get_detail_op(MI, 0)->access = IsOutReg ? CS_AC_WRITE : CS_AC_READ; + if (OpNum != UINT32_MAX) + ARM_get_detail_op(MI, 0)->encoding = map_get_op_encoding(MI, OpNum); ARM_inc_op_count(MI); } @@ -2123,6 +2257,7 @@ void ARM_set_detail_op_float(MCInst *MI, unsigned OpNum, uint64_t Imm) return; ARM_get_detail_op(MI, 0)->type = ARM_OP_FP; ARM_get_detail_op(MI, 0)->fp = ARM_AM_getFPImmFloat(Imm); + ARM_get_detail_op(MI, 0)->encoding = map_get_op_encoding(MI, OpNum); ARM_inc_op_count(MI); } diff --git a/arch/ARM/ARMMapping.h b/arch/ARM/ARMMapping.h index 045530302c..e153c7cc73 100644 --- a/arch/ARM/ARMMapping.h +++ b/arch/ARM/ARMMapping.h @@ -68,7 +68,7 @@ void ARM_insert_detail_op_reg_at(MCInst *MI, unsigned index, arm_reg Reg, void ARM_insert_detail_op_imm_at(MCInst *MI, unsigned index, int64_t Val, cs_ac_type access); void ARM_set_detail_op_reg(MCInst *MI, unsigned OpNum, arm_reg Reg); -void ARM_set_detail_op_sysop(MCInst *MI, int SysReg, arm_op_type type, +void ARM_set_detail_op_sysop(MCInst *MI, unsigned OpNum, int SysReg, arm_op_type type, bool IsOutReg, uint8_t Mask, uint16_t Sysm); void ARM_set_detail_op_imm(MCInst *MI, unsigned OpNum, arm_op_type ImmType, int64_t Imm); @@ -76,7 +76,7 @@ void ARM_set_detail_op_float(MCInst *MI, unsigned OpNum, uint64_t Imm); void ARM_set_detail_op_mem(MCInst *MI, unsigned OpNum, bool is_index_reg, int scale, int lshift, uint64_t Val); void ARM_set_detail_op_mem_offset(MCInst *MI, unsigned OpNum, uint64_t Val, - bool subtracted); + bool subtracted, arm_mem_format Format); void ARM_set_detail_op_neon_lane(MCInst *MI, unsigned OpNum); void ARM_check_updates_flags(MCInst *MI); diff --git a/include/capstone/arm.h b/include/capstone/arm.h index dfc69944cf..fdfe0d1118 100644 --- a/include/capstone/arm.h +++ b/include/capstone/arm.h @@ -44,70 +44,47 @@ typedef enum CondCodes { inline static ARMCC_CondCodes ARMCC_getOppositeCondition(ARMCC_CondCodes CC) { - switch (CC) { - default: - // llvm_unreachable("Unknown condition code"); - assert(0); - case ARMCC_EQ: - return ARMCC_NE; - case ARMCC_NE: - return ARMCC_EQ; - case ARMCC_HS: - return ARMCC_LO; - case ARMCC_LO: - return ARMCC_HS; - case ARMCC_MI: - return ARMCC_PL; - case ARMCC_PL: - return ARMCC_MI; - case ARMCC_VS: - return ARMCC_VC; - case ARMCC_VC: - return ARMCC_VS; - case ARMCC_HI: - return ARMCC_LS; - case ARMCC_LS: - return ARMCC_HI; - case ARMCC_GE: - return ARMCC_LT; - case ARMCC_LT: - return ARMCC_GE; - case ARMCC_GT: - return ARMCC_LE; - case ARMCC_LE: - return ARMCC_GT; - } + // clang-format off + switch (CC) { + default: assert(0); + case ARMCC_EQ: return ARMCC_NE; + case ARMCC_NE: return ARMCC_EQ; + case ARMCC_HS: return ARMCC_LO; + case ARMCC_LO: return ARMCC_HS; + case ARMCC_MI: return ARMCC_PL; + case ARMCC_PL: return ARMCC_MI; + case ARMCC_VS: return ARMCC_VC; + case ARMCC_VC: return ARMCC_VS; + case ARMCC_HI: return ARMCC_LS; + case ARMCC_LS: return ARMCC_HI; + case ARMCC_GE: return ARMCC_LT; + case ARMCC_LT: return ARMCC_GE; + case ARMCC_GT: return ARMCC_LE; + case ARMCC_LE: return ARMCC_GT; + } + // clang-format on } -/// getSwappedCondition - assume the flags are set by MI(a,b), return +/// get_swapped_condition - assume the flags are set by MI(a,b), return /// the condition code if we modify the instructions such that flags are /// set by MI(b,a). inline static ARMCC_CondCodes ARMCC_getSwappedCondition(ARMCC_CondCodes CC) { - switch (CC) { - default: - return ARMCC_AL; - case ARMCC_EQ: - return ARMCC_EQ; - case ARMCC_NE: - return ARMCC_NE; - case ARMCC_HS: - return ARMCC_LS; - case ARMCC_LO: - return ARMCC_HI; - case ARMCC_HI: - return ARMCC_LO; - case ARMCC_LS: - return ARMCC_HS; - case ARMCC_GE: - return ARMCC_LE; - case ARMCC_LT: - return ARMCC_GT; - case ARMCC_GT: - return ARMCC_LT; - case ARMCC_LE: - return ARMCC_GE; - } + // clang-format off + switch (CC) { + default: return ARMCC_AL; + case ARMCC_EQ: return ARMCC_EQ; + case ARMCC_NE: return ARMCC_NE; + case ARMCC_HS: return ARMCC_LS; + case ARMCC_LO: return ARMCC_HI; + case ARMCC_HI: return ARMCC_LO; + case ARMCC_LS: return ARMCC_HS; + case ARMCC_GE: return ARMCC_LE; + case ARMCC_LT: return ARMCC_GT; + case ARMCC_GT: return ARMCC_LT; + case ARMCC_LE: return ARMCC_GE; + } + // clang-format on } typedef enum VPTCodes { @@ -159,93 +136,64 @@ inline static const char *ARMVPTPredToString(ARMVCC_VPTCodes CC) return ""; } -inline static unsigned ARMVectorCondCodeFromString(const char CC) +inline static ARMVCC_VPTCodes ARMVectorCondCodeFromString(const char CC) { - switch (CC) { - default: - return ~0U; - case 't': - return ARMVCC_Then; - case 'e': - return ARMVCC_Else; - } + switch (CC) { + default: + return (ARMVCC_VPTCodes)~0U; + case 't': + return ARMVCC_Then; + case 'e': + return ARMVCC_Else; + } } inline static const char *ARMCondCodeToString(ARMCC_CondCodes CC) { - switch (CC) { - default: - assert(0 && "Unknown condition code"); - case ARMCC_EQ: - return "eq"; - case ARMCC_NE: - return "ne"; - case ARMCC_HS: - return "hs"; - case ARMCC_LO: - return "lo"; - case ARMCC_MI: - return "mi"; - case ARMCC_PL: - return "pl"; - case ARMCC_VS: - return "vs"; - case ARMCC_VC: - return "vc"; - case ARMCC_HI: - return "hi"; - case ARMCC_LS: - return "ls"; - case ARMCC_GE: - return "ge"; - case ARMCC_LT: - return "lt"; - case ARMCC_GT: - return "gt"; - case ARMCC_LE: - return "le"; - case ARMCC_AL: - return "al"; - } + // clang-format off + switch (CC) { + default: assert(0 && "Unknown condition code"); + case ARMCC_EQ: return "eq"; + case ARMCC_NE: return "ne"; + case ARMCC_HS: return "hs"; + case ARMCC_LO: return "lo"; + case ARMCC_MI: return "mi"; + case ARMCC_PL: return "pl"; + case ARMCC_VS: return "vs"; + case ARMCC_VC: return "vc"; + case ARMCC_HI: return "hi"; + case ARMCC_LS: return "ls"; + case ARMCC_GE: return "ge"; + case ARMCC_LT: return "lt"; + case ARMCC_GT: return "gt"; + case ARMCC_LE: return "le"; + case ARMCC_AL: return "al"; + } + // clang-format on } -inline static unsigned ARMCondCodeFromString(const char *CC) +inline static ARMCC_CondCodes ARMCondCodeFromString(const char *CC) { - if (strcmp("eq", CC) == 0) - return ARMCC_EQ; - else if (strcmp("ne", CC) == 0) - return ARMCC_NE; - else if (strcmp("hs", CC) == 0) - return ARMCC_HS; - else if (strcmp("cs", CC) == 0) - return ARMCC_HS; - else if (strcmp("lo", CC) == 0) - return ARMCC_LO; - else if (strcmp("cc", CC) == 0) - return ARMCC_LO; - else if (strcmp("mi", CC) == 0) - return ARMCC_MI; - else if (strcmp("pl", CC) == 0) - return ARMCC_PL; - else if (strcmp("vs", CC) == 0) - return ARMCC_VS; - else if (strcmp("vc", CC) == 0) - return ARMCC_VC; - else if (strcmp("hi", CC) == 0) - return ARMCC_HI; - else if (strcmp("ls", CC) == 0) - return ARMCC_LS; - else if (strcmp("ge", CC) == 0) - return ARMCC_GE; - else if (strcmp("lt", CC) == 0) - return ARMCC_LT; - else if (strcmp("gt", CC) == 0) - return ARMCC_GT; - else if (strcmp("le", CC) == 0) - return ARMCC_LE; - else if (strcmp("al", CC) == 0) - return ARMCC_AL; - return (~0U); + // clang-format off + if (!strcmp("eq", CC)) return ARMCC_EQ; + if (!strcmp("ne", CC)) return ARMCC_NE; + if (!strcmp("hs", CC)) return ARMCC_HS; + if (!strcmp("cs", CC)) return ARMCC_HS; + if (!strcmp("lo", CC)) return ARMCC_LO; + if (!strcmp("cc", CC)) return ARMCC_LO; + if (!strcmp("mi", CC)) return ARMCC_MI; + if (!strcmp("pl", CC)) return ARMCC_PL; + if (!strcmp("vs", CC)) return ARMCC_VS; + if (!strcmp("vc", CC)) return ARMCC_VC; + if (!strcmp("hi", CC)) return ARMCC_HI; + if (!strcmp("ls", CC)) return ARMCC_LS; + if (!strcmp("ge", CC)) return ARMCC_GE; + if (!strcmp("lt", CC)) return ARMCC_LT; + if (!strcmp("gt", CC)) return ARMCC_GT; + if (!strcmp("le", CC)) return ARMCC_LE; + if (!strcmp("al", CC)) return ARMCC_AL; + // clang-format on + return (ARMCC_CondCodes)(~0U); } /// ARM shift type @@ -430,6 +378,46 @@ typedef enum arm_op_type { ARM_OP_MEM = CS_OP_MEM, ///< Memory operand } arm_op_type; +inline static const char *ARMOpTypeToString(arm_op_type type) +{ + // clang-format off + switch (type) { + default: assert(0 && "Unknown operand type"); + case ARM_OP_INVALID: return "ARM_OP_INVALID"; + case ARM_OP_REG: return "ARM_OP_REG"; + case ARM_OP_IMM: return "ARM_OP_IMM"; + case ARM_OP_FP: return "ARM_OP_FP"; + case ARM_OP_PRED: return "ARM_OP_PRED"; + case ARM_OP_CIMM: return "ARM_OP_CIMM"; + case ARM_OP_PIMM: return "ARM_OP_PIMM"; + case ARM_OP_SETEND: return "ARM_OP_SETEND"; + case ARM_OP_SYSREG: return "ARM_OP_SYSREG"; + case ARM_OP_VPRED_R: return "ARM_OP_VPRED_R"; + case ARM_OP_VPRED_N: return "ARM_OP_VPRED_N"; + case ARM_OP_MEM: return "ARM_OP_MEM"; + } + // clang-format on +} + +inline static arm_op_type ARMOpTypeFromString(const char *type) +{ + // clang-format off + if (!strcmp("ARM_OP_INVALID", type)) return ARM_OP_INVALID; + if (!strcmp("ARM_OP_REG", type)) return ARM_OP_REG; + if (!strcmp("ARM_OP_IMM", type)) return ARM_OP_IMM; + if (!strcmp("ARM_OP_FP", type)) return ARM_OP_FP; + if (!strcmp("ARM_OP_PRED", type)) return ARM_OP_PRED; + if (!strcmp("ARM_OP_CIMM", type)) return ARM_OP_CIMM; + if (!strcmp("ARM_OP_PIMM", type)) return ARM_OP_PIMM; + if (!strcmp("ARM_OP_SETEND", type)) return ARM_OP_SETEND; + if (!strcmp("ARM_OP_SYSREG", type)) return ARM_OP_SYSREG; + if (!strcmp("ARM_OP_VPRED_R", type)) return ARM_OP_VPRED_R; + if (!strcmp("ARM_OP_VPRED_N", type)) return ARM_OP_VPRED_N; + if (!strcmp("ARM_OP_MEM", type)) return ARM_OP_MEM; + // clang-format on + return (arm_op_type)(~0U); +} + /// Operand type for SETEND instruction typedef enum arm_setend_type { ARM_SETEND_INVALID = 0, ///< Uninitialized. @@ -437,6 +425,29 @@ typedef enum arm_setend_type { ARM_SETEND_LE, ///< LE operand } arm_setend_type; +inline static const char *ARMSetEndTypeToString(arm_setend_type se) +{ + // clang-format off + switch (se) { + default: assert(0 && "Unknown setend type"); + case ARM_SETEND_INVALID: return "ARM_SETEND_INVALID"; + case ARM_SETEND_BE: return "ARM_SETEND_BE"; + case ARM_SETEND_LE: return "ARM_SETEND_LE"; + } + // clang-format on +} + +inline static arm_setend_type ARMSetEndTypeFromString(const char *se) +{ + // clang-format off + if (!strcmp("ARM_SETEND_INVALID", se)) return ARM_SETEND_INVALID; + if (!strcmp("ARM_SETEND_BE", se)) return ARM_SETEND_BE; + if (!strcmp("ARM_SETEND_LE", se)) return ARM_SETEND_LE; + // clang-format on + assert(0 && "Unknown setend type string"); + return (arm_setend_type)(~0U); +} + typedef enum arm_cpsmode_type { ARM_CPSMODE_INVALID = 0, ARM_CPSMODE_IE = 2, @@ -835,6 +846,67 @@ typedef enum arm_reg { ARM_REG_IP = ARM_REG_R12, } arm_reg; +/// The format a memory operand follows +typedef enum arm_mem_format { + ARM_MEM_FMT_NONE, + ARM_MEM_FMT_U_REG_IMM, ///< U flag, base register, immediate + ARM_MEM_FMT_U_REG_REG, ///< U flag, base register, index (or offset) register + ARM_MEM_FMT_U_REG_IMM2, ///< U flag, base register, two immediate offsets (that form the full immediate) + ARM_MEM_FMT_REG_U_IMM, ///< base register, U flag, immediate offset + ARM_MEM_FMT_IMM_REG, ///< immediate offset first then base register + ARM_MEM_FMT_REG_IMM, ///< base register, immediate offset + ARM_MEM_FMT_IREG_BREG, ///< index register, base register (in this order) + ARM_MEM_FMT_REG_ALIGN_REG, ///< base register, alignment, offset register + ARM_MEM_FMT_U_REG_SHIFT_REG, ///< U flag, base register, shift value (5 bits) with shift type (2 bits), index register + ARM_MEM_FMT_REG_SHIFT_REG, ///< base register, shift, index register + ARM_MEM_FMT_REG_REG, ///< base register, index register + ARM_MEM_FMT_REG, ///< only base register (no offset) + ARM_MEM_FMT_IMM, ///< only immediate offset (implies that the base register is not encoded) + ARM_MEM_FMT_INVALID = 0xffff, +} arm_mem_format; + +inline static const char *ARMMemFormatToString(arm_mem_format format) +{ + switch (format) { + default: + assert(0 && "Unknown memory operand format"); + return "invalid"; + case ARM_MEM_FMT_NONE: return "ARM_MEM_NONE"; + case ARM_MEM_FMT_U_REG_IMM: return "ARM_MEM_U_REG_IMM"; + case ARM_MEM_FMT_U_REG_REG: return "ARM_MEM_U_REG_REG"; + case ARM_MEM_FMT_U_REG_IMM2: return "ARM_MEM_U_REG_IMM2"; + case ARM_MEM_FMT_REG_U_IMM: return "ARM_MEM_REG_U_IMM"; + case ARM_MEM_FMT_IMM_REG: return "ARM_MEM_IMM_REG"; + case ARM_MEM_FMT_REG_IMM: return "ARM_MEM_REG_IMM"; + case ARM_MEM_FMT_IREG_BREG: return "ARM_MEM_IREG_BREG"; + case ARM_MEM_FMT_REG_ALIGN_REG: return "ARM_MEM_REG_ALIGN_REG"; + case ARM_MEM_FMT_U_REG_SHIFT_REG: return "ARM_MEM_U_REG_SHIFT_REG"; + case ARM_MEM_FMT_REG_SHIFT_REG: return "ARM_MEM_REG_SHIFT_REG"; + case ARM_MEM_FMT_REG_REG: return "ARM_MEM_REG_REG"; + case ARM_MEM_FMT_REG: return "ARM_MEM_REG"; + case ARM_MEM_FMT_IMM: return "ARM_MEM_IMM"; + } +} + +inline static arm_mem_format ARMMemFormatFromString(const char *format) +{ + if (!strcmp("ARM_MEM_NONE", format)) return ARM_MEM_FMT_NONE; + if (!strcmp("ARM_MEM_U_REG_IMM", format)) return ARM_MEM_FMT_U_REG_IMM; + if (!strcmp("ARM_MEM_U_REG_REG", format)) return ARM_MEM_FMT_U_REG_REG; + if (!strcmp("ARM_MEM_U_REG_IMM2", format)) return ARM_MEM_FMT_U_REG_IMM2; + if (!strcmp("ARM_MEM_REG_U_IMM", format)) return ARM_MEM_FMT_REG_U_IMM; + if (!strcmp("ARM_MEM_IMM_REG", format)) return ARM_MEM_FMT_IMM_REG; + if (!strcmp("ARM_MEM_REG_IMM", format)) return ARM_MEM_FMT_REG_IMM; + if (!strcmp("ARM_MEM_IREG_BREG", format)) return ARM_MEM_FMT_IREG_BREG; + if (!strcmp("ARM_MEM_REG_ALIGN_REG", format)) return ARM_MEM_FMT_REG_ALIGN_REG; + if (!strcmp("ARM_MEM_U_REG_SHIFT_REG", format)) return ARM_MEM_FMT_U_REG_SHIFT_REG; + if (!strcmp("ARM_MEM_REG_SHIFT_REG", format)) return ARM_MEM_FMT_REG_SHIFT_REG; + if (!strcmp("ARM_MEM_REG_REG", format)) return ARM_MEM_FMT_REG_REG; + if (!strcmp("ARM_MEM_REG", format)) return ARM_MEM_FMT_REG; + if (!strcmp("ARM_MEM_IMM", format)) return ARM_MEM_FMT_IMM; + return ARM_MEM_FMT_INVALID; +} + /// Instruction's operand referring to memory /// This is associated with ARM_OP_MEM operand type above typedef struct arm_op_mem { @@ -846,6 +918,7 @@ typedef struct arm_op_mem { /// NOTE: this value can also be fetched via operand.shift.value int lshift; unsigned align; ///< Alignment of base register. 0 If not set. + arm_mem_format format; ///< The format of the memory operand } arm_op_mem; typedef struct { @@ -888,6 +961,8 @@ typedef struct cs_arm_op { /// Neon lane index for NEON instructions (or -1 if irrelevant) int8_t neon_lane; + + cs_operand_encoding encoding; ///< The encoding of the operand. } cs_arm_op; #define MAX_ARM_OPS 36 diff --git a/include/capstone/capstone.h b/include/capstone/capstone.h index afd11f444e..504fa84b26 100644 --- a/include/capstone/capstone.h +++ b/include/capstone/capstone.h @@ -352,6 +352,39 @@ typedef struct cs_opt_skipdata { void *user_data; } cs_opt_skipdata; +#define MAX_NUM_OP_ENC_ITEMS 8 +#define MAX_NUM_OPC_BITS 64 + +/// Provides information about an operand's encoding in the instruction +typedef struct cs_operand_encoding { + /// Specifies how many pieces that form the full operand are encoded in the + /// instruction separately. For example if count is 2 it means a few bits of + /// this operand are in one location and the rest on another. If it's 0 then + /// the operand is NOT encoded anywhere in the instruction. + uint8_t operand_pieces_count; + /// The bit positions of each piece that form the full operand in order. If + /// there is only one piece then there is only one index as well. Likewise + /// if there are 4 pieces, there are 4 indexes and so on. + uint8_t indexes[MAX_NUM_OP_ENC_ITEMS]; + /// The bit widths of each piece that form the full operand in order. If + /// there is only one piece then there is only one size as well. Likewise if + /// there are 4 pieces, there are 4 sizes and so on. + uint8_t sizes[MAX_NUM_OP_ENC_ITEMS]; +} cs_operand_encoding; + +/// Provides information about an operand's opcode in the instruction +typedef struct cs_opcode_encoding { + /// Contains all the bits (in order) that form the full opcode. + /// Note that each bit is NOT necessarily next to each other in the + /// instruction bytes. (see below) + uint64_t bits; + /// As mentioned above, since the opcode bits may not be next to each other + /// this array comes to the rescue by providing the location of each bit + /// individually. + uint8_t indexes[MAX_NUM_OPC_BITS]; + uint8_t + bit_count; ///< Specifies the number of bits that form the full opcode. +} cs_opcode_encoding; #include "arm.h" #include "aarch64.h" @@ -396,6 +429,8 @@ typedef struct cs_detail { bool writeback; ///< Instruction has writeback operands. + cs_opcode_encoding opcode_encoding; ///< The encoding of the opcode. (If bit count is 0 then no info is provided) + /// Architecture-specific instruction info union { cs_x86 x86; ///< X86 architecture, including 16-bit, 32-bit & 64-bit mode @@ -475,7 +510,6 @@ typedef struct cs_insn { cs_detail *detail; } cs_insn; - /// Calculate the offset of a disassembled instruction in its buffer, given its position /// in its array of disassembled insn /// NOTE: this macro works with position (>=1), not index diff --git a/suite/auto-sync/Updater/CppTranslator/Differ.py b/suite/auto-sync/Updater/CppTranslator/Differ.py index b3dd44f778..332c4213c3 100755 --- a/suite/auto-sync/Updater/CppTranslator/Differ.py +++ b/suite/auto-sync/Updater/CppTranslator/Differ.py @@ -137,9 +137,9 @@ class Differ: ts_cpp_lang: Language = None parser: Parser = None - translated_files: [Path] - diff_dest_files: [Path] = list() - old_files: [Path] + translated_files: list[Path] + diff_dest_files: list[Path] = list() + old_files: list[Path] conf_arch: dict conf_general: dict tree: Tree = None