diff --git a/librz/arch/fcn.c b/librz/arch/fcn.c index a27f119c408..4017afb2f11 100644 --- a/librz/arch/fcn.c +++ b/librz/arch/fcn.c @@ -538,6 +538,17 @@ static inline void set_bb_branches(RZ_OUT RzAnalysisBlock *bb, const ut64 jump, bb->fail = fail; } +/** + * \brief Peaks into the memory at the jump address. + * If it finds a function prelude, at it it returns true. + * False otherwise. + */ +static inline bool jumps_to_prelude(RzAnalysis *analysis, ut64 jmp_addr) { + ut8 buf[32] = { 0 }; + (void)analysis->iob.read_at(analysis->iob.io, jmp_addr, (ut8 *)buf, sizeof(buf)); + return rz_analysis_is_prelude(analysis, buf, sizeof(buf)); +} + /** * \brief Analyses the given task item \p item for branches. * @@ -1092,26 +1103,21 @@ static RzAnalysisBBEndCause run_basic_block_analysis(RzAnalysisTaskItem *item, R if (!overlapped) { set_bb_branches(bb, op.jump, UT64_MAX); } + if (jumps_to_prelude(analysis, op.jump) || op.type & RZ_ANALYSIS_OP_TYPE_TAIL) { + rz_analysis_xrefs_set(analysis, op.addr, op.jump, RZ_ANALYSIS_XREF_TYPE_CALL); + if (is_hexagon) { + // After the jump should always follow a dealloc instruction. + // It is not included in the block, if we do RET_END here. + break; + } + gotoBeach(RZ_ANALYSIS_RET_END); + } + rz_analysis_task_item_new(analysis, tasks, fcn, NULL, op.jump, sp); if (continue_after_jump && (is_hexagon || (is_dalvik && op.cond == RZ_TYPE_COND_EXCEPTION))) { rz_analysis_task_item_new(analysis, tasks, fcn, NULL, op.addr + op.size, sp); gotoBeach(RZ_ANALYSIS_RET_BRANCH); } - int tc = analysis->opt.tailcall; - if (tc) { - int diff = op.jump - op.addr; - if (tc < 0) { - ut8 buf[32]; - (void)analysis->iob.read_at(analysis->iob.io, op.jump, (ut8 *)buf, sizeof(buf)); - if (rz_analysis_is_prelude(analysis, buf, sizeof(buf))) { - rz_analysis_task_item_new(analysis, tasks, fcn, NULL, op.jump, sp); - } - } else if (RZ_ABS(diff) > tc) { - (void)rz_analysis_xrefs_set(analysis, op.addr, op.jump, RZ_ANALYSIS_XREF_TYPE_CALL); - rz_analysis_task_item_new(analysis, tasks, fcn, NULL, op.jump, sp); - gotoBeach(RZ_ANALYSIS_RET_END); - } - } goto beach; break; case RZ_ANALYSIS_OP_TYPE_SUB: diff --git a/librz/arch/isa/hexagon/hexagon_arch.c b/librz/arch/isa/hexagon/hexagon_arch.c index c548344ab99..12321a8778a 100644 --- a/librz/arch/isa/hexagon/hexagon_arch.c +++ b/librz/arch/isa/hexagon/hexagon_arch.c @@ -9,6 +9,8 @@ // Do not edit. Repository of code generator: // https://github.com/rizinorg/rz-hexagon +#include +#include #include #include #include @@ -1074,6 +1076,51 @@ static void copy_asm_ana_ops(const HexState *state, RZ_BORROW HexReversedOpcode } } +/** + * \brief Checks if the packet \p pkt has a jump and deallocframe instructions. + * This indicates it is a tail call. + * It sets the relevant flags accordingly. + * + * \param pkt The instruction packet to check. + */ +RZ_IPI void hexagon_pkt_mark_tail_calls(HexPkt *pkt) { + rz_return_if_fail(pkt); + ut32 n = rz_list_length(pkt->bin); + if (!pkt->last_instr_present || n < 2) { + return; + } + HexInsnContainer *hic = rz_list_get_n(pkt->bin, 0); + HexInsnContainer *hic1 = rz_list_get_n(pkt->bin, 1); + if (hic->identifier != HEX_INS_L2_DEALLOCFRAME && hic1->identifier != HEX_INS_L2_DEALLOCFRAME) { + // deallocframe is a store/load instruction and can only inhabit slot 0 and 1. + return; + } + bool is_tail_call = false; + for (size_t i = 0; i < n; ++i) { + hic = rz_list_get_n(pkt->bin, i); + if (hic->identifier == HEX_INS_J2_JUMP) { + is_tail_call = true; + break; + } + } + if (!is_tail_call) { + return; + } + for (size_t i = 0; i < n; ++i) { + hic = rz_list_get_n(pkt->bin, i); + hic->ana_op.type |= RZ_ANALYSIS_OP_TYPE_TAIL; + } + hic = rz_list_get_n(pkt->bin, n - 1); + hic->ana_op.eob = true; + // This is nonesense. And we can just hope it doesn't + // break anything. The instruction is no return instruction. + // But we just don't have any other way currently to signal the + // block analysis, that the function ends here. + // eob (end of block) is ignored. + // So until RzArch is not done, there is no other way. + hic->ana_op.type = RZ_ANALYSIS_OP_TYPE_TAIL | RZ_ANALYSIS_OP_TYPE_RET; +} + /** * \brief Reverses a given opcode and copies the result into one of the rizin structs in rz_reverse * if \p copy_result is set. diff --git a/librz/arch/isa/hexagon/hexagon_arch.h b/librz/arch/isa/hexagon/hexagon_arch.h index e29c4b53d79..a41bf2c14d3 100644 --- a/librz/arch/isa/hexagon/hexagon_arch.h +++ b/librz/arch/isa/hexagon/hexagon_arch.h @@ -79,4 +79,5 @@ RZ_API void hex_move_insn_container(RZ_OUT HexInsnContainer *dest, const HexInsn RZ_API HexPkt *hex_get_pkt(RZ_BORROW HexState *state, const ut32 addr); RZ_API HexInsnContainer *hex_get_hic_at_addr(HexState *state, const ut32 addr); RZ_API const HexOp hex_nreg_to_op(const HexInsnPktBundle *bundle, const char isa_id); -#endif \ No newline at end of file +RZ_IPI void hexagon_pkt_mark_tail_calls(HexPkt *pkt); +#endif diff --git a/librz/arch/isa/hexagon/hexagon_disas.c b/librz/arch/isa/hexagon/hexagon_disas.c index e23bc5e00d2..b8c452450ae 100644 --- a/librz/arch/isa/hexagon/hexagon_disas.c +++ b/librz/arch/isa/hexagon/hexagon_disas.c @@ -34293,5 +34293,6 @@ int hexagon_disasm_instruction(HexState *state, const ut32 hi_u32, RZ_INOUT HexI snprintf(hic->bin.insn->text_infix, sizeof(hic->bin.insn->text_infix), "invalid"); } hex_set_hic_text(hic); + hexagon_pkt_mark_tail_calls(pkt); return 4; } diff --git a/librz/core/cconfig.c b/librz/core/cconfig.c index 916670c2289..9665c4679a6 100644 --- a/librz/core/cconfig.c +++ b/librz/core/cconfig.c @@ -139,12 +139,6 @@ static bool cb_analysis_jmpretpoline(void *user, void *data) { core->analysis->opt.retpoline = node->i_value; return true; } -static bool cb_analysis_jmptailcall(void *user, void *data) { - RzCore *core = (RzCore *)user; - RzConfigNode *node = (RzConfigNode *)data; - core->analysis->opt.tailcall = node->i_value; - return true; -} static bool cb_analysis_armthumb(void *user, void *data) { RzCore *core = (RzCore *)user; @@ -3003,7 +2997,6 @@ RZ_API int rz_core_config_init(RzCore *core) { NULL); SETI("analysis.timeout", 0, "Stop analyzing after a couple of seconds"); SETCB("analysis.jmp.retpoline", "true", &cb_analysis_jmpretpoline, "Analyze retpolines, may be slower if not needed"); - SETICB("analysis.jmp.tailcall", 0, &cb_analysis_jmptailcall, "Consume a branch as a call if delta is big"); SETCB("analysis.armthumb", "false", &cb_analysis_armthumb, "aae computes arm/thumb changes (lot of false positives ahead)"); SETCB("analysis.jmp.after", "true", &cb_analysis_afterjmp, "Continue analysis after jmp/ujmp"); diff --git a/librz/include/rz_analysis.h b/librz/include/rz_analysis.h index b4915444984..3d326ac92f4 100644 --- a/librz/include/rz_analysis.h +++ b/librz/include/rz_analysis.h @@ -257,7 +257,7 @@ typedef enum { // XXX: this definition is plain wrong. use enum or empower bits #define RZ_ANALYSIS_OP_TYPE_MASK 0x8000ffff -#define RZ_ANALYSIS_OP_HINT_MASK 0xf0000000 +#define RZ_ANALYSIS_OP_HINT_MASK 0xff000000 typedef enum { RZ_ANALYSIS_OP_TYPE_COND = 0x80000000, // TODO must be moved to prefix? // TODO: MOVE TO PREFIX .. it is used by analysis_java.. must be updated @@ -266,6 +266,7 @@ typedef enum { RZ_ANALYSIS_OP_TYPE_REG = 0x10000000, // operand is a register RZ_ANALYSIS_OP_TYPE_IND = 0x08000000, // operand is indirect RZ_ANALYSIS_OP_TYPE_SIMD = 0x04000000, // SIMD + RZ_ANALYSIS_OP_TYPE_TAIL = 0x02000000, ///< Part of a tail call. This effectively marks the end of a sub-routine. RZ_ANALYSIS_OP_TYPE_NULL = 0, RZ_ANALYSIS_OP_TYPE_JMP = 1, /* mandatory jump */ RZ_ANALYSIS_OP_TYPE_UJMP = 2, /* unknown jump (register or so) */ @@ -432,7 +433,6 @@ typedef struct rz_analysis_options_t { bool pushret; // analyze push+ret as jmp bool armthumb; // bool delay; - int tailcall; bool retpoline; } RzAnalysisOptions; diff --git a/test/db/analysis/hexagon b/test/db/analysis/hexagon index 0d0ea55a067..98f655c1f2a 100644 --- a/test/db/analysis/hexagon +++ b/test/db/analysis/hexagon @@ -1434,3 +1434,29 @@ offset - 0 1 2 3 4 5 3400 0008 3400 0000 3400 0000 7cff ffff EOF RUN + +NAME=hexagon tail calls +FILE=bins/elf/analysis/hexagon-hello-loop +CMDS=< 0x00008f20 / R1:0 = combine(R18,R19) +| 0x00008f24 | R3:2 = combine(R16,R17) +| 0x00008f28 | immext(##0xd980) +| 0x00008f2c \ R4 = ##obj._Mbstate +| 0x00008f30 [ R17:16 = memd(R29+#0x8) ; R19:18 = memd(R29+#0x0) +| 0x00008f34 / jump sym._Mbtowcx +\ 0x00008f38 \ LR:FP = deallocframe(FP):raw +EOF +RUN diff --git a/test/prj/v12-types-empty.rzdb b/test/prj/v12-types-empty.rzdb index fb5f437104a..88c269c1cb3 100644 --- a/test/prj/v12-types-empty.rzdb +++ b/test/prj/v12-types-empty.rzdb @@ -206,7 +206,6 @@ analysis.jmp.indir=false analysis.jmp.mid=true analysis.jmp.ref=true analysis.jmp.retpoline=true -analysis.jmp.tailcall=0 analysis.jmp.tbl=true analysis.jmp.tblmax=512 analysis.jmp.tblmaxoffset=0x00001000 diff --git a/test/prj/v2-typelink-callables.rzdb b/test/prj/v2-typelink-callables.rzdb index 99e482fc16e..071bcbb5492 100644 --- a/test/prj/v2-typelink-callables.rzdb +++ b/test/prj/v2-typelink-callables.rzdb @@ -2887,7 +2887,6 @@ analysis.jmp.indir=false analysis.jmp.mid=true analysis.jmp.ref=true analysis.jmp.retpoline=true -analysis.jmp.tailcall=0 analysis.jmp.tbl=true analysis.jmp.tblmax=512 analysis.limits=false diff --git a/test/prj/v2-types-empty.rzdb b/test/prj/v2-types-empty.rzdb index efe527a6985..21c1ad5a76a 100644 --- a/test/prj/v2-types-empty.rzdb +++ b/test/prj/v2-types-empty.rzdb @@ -206,7 +206,6 @@ analysis.jmp.indir=false analysis.jmp.mid=true analysis.jmp.ref=true analysis.jmp.retpoline=true -analysis.jmp.tailcall=0 analysis.jmp.tbl=true analysis.jmp.tblmax=512 analysis.limits=false