From fcf4f7182ffb16b32a13643c8d5c7b9d7e3a3990 Mon Sep 17 00:00:00 2001 From: Rot127 <45763064+Rot127@users.noreply.github.com> Date: Fri, 9 Feb 2024 14:23:36 +0000 Subject: [PATCH] Add different types of RzGraphNodeInfos. (#4189) * Add different types of GraphNodeInfos. This adds support for multiple graph node infos. It removes the assumption that information of a graph node must be some strings and an offset. * Increase buffer for printing graph node body * Don't hardcode maximum label size * Add graph node info for icfg * Add node type info to CFG. * Access address members properly for iCFG and CFG nodes * Update test. Function names are no longer saved. * Fix reachable double free of label * Split graph node type from its sub-type to make it less confusing. * Add integration tests for iCFG nodes. * Fix, add mising buffer read * Recognize more calls * Add test for iCFG generation and its node details. --- librz/analysis/fcn.c | 19 ++ librz/core/agraph.c | 33 +++- librz/core/cgraph.c | 141 ++++++++++++--- librz/include/rz_analysis.h | 1 + librz/include/rz_util/rz_graph_drawable.h | 73 +++++++- librz/util/graph_drawable.c | 209 ++++++++++++++++++---- test/db/cmd/cmd_graph | 134 +++++++------- test/integration/test_analysis_graph.c | 124 ++++++++++++- test/unit/test_analysis_class_graph.c | 18 +- 9 files changed, 605 insertions(+), 147 deletions(-) diff --git a/librz/analysis/fcn.c b/librz/analysis/fcn.c index e5c8567a0aa..f15feb921e9 100644 --- a/librz/analysis/fcn.c +++ b/librz/analysis/fcn.c @@ -3,6 +3,7 @@ // SPDX-FileCopyrightText: 2010-2021 pancake // SPDX-License-Identifier: LGPL-3.0-only +#include #include #include #include @@ -2692,3 +2693,21 @@ RZ_API RZ_OWN RzCallable *rz_analysis_function_derive_type(RzAnalysis *analysis, } return callable; } + +/** + * \brief Determines if the given function is a memory allocating function (malloc, calloc etc.). + * + * The current methods of detection (tested in order): + * - Name matches regex ".*\.([mc]|(re))?alloc.*" + * + * \param fcn The function to test. + * + * \return true If the function \p fcn is considered a memory allocating. + * \return false Otherwise. + */ +RZ_API bool rz_analysis_function_is_malloc(const RzAnalysisFunction *fcn) { + rz_return_val_if_fail(fcn, false); + // TODO We need more metrics here. Just the name is pretty naive. + // E.g. we should compare it to signatures and other characterisitics. + return rz_regex_contains(".*\\.([mc]|(re))?alloc.*", fcn->name, RZ_REGEX_ZERO_TERMINATED, RZ_REGEX_EXTENDED, RZ_REGEX_DEFAULT); +} diff --git a/librz/core/agraph.c b/librz/core/agraph.c index 7894d6e5ff2..58598704836 100644 --- a/librz/core/agraph.c +++ b/librz/core/agraph.c @@ -3727,11 +3727,36 @@ RZ_API void rz_agraph_set_title(RzAGraph *g, const char *title) { */ RZ_API RZ_BORROW RzANode *rz_agraph_add_node_from_node_info(RZ_NONNULL const RzAGraph *g, RZ_NONNULL const RzGraphNodeInfo *info) { rz_return_val_if_fail(g && info, NULL); - RzANode *an = rz_agraph_add_node(g, info->title, info->body); - if (!an) { - return NULL; + RzANode *an = NULL; + char title[20] = { 0 }; + switch (info->type) { + default: + RZ_LOG_ERROR("Node type %d not handled.\n", info->type); + break; + case RZ_GRAPH_NODE_TYPE_DEFAULT: + an = rz_agraph_add_node(g, info->def.title, info->def.body); + if (!an) { + return NULL; + } + an->offset = info->def.offset; + break; + case RZ_GRAPH_NODE_TYPE_CFG: + rz_strf(title, "0x%" PFMT64x, info->cfg.address); + an = rz_agraph_add_node(g, title, ""); + if (!an) { + return NULL; + } + an->offset = info->cfg.address; + break; + case RZ_GRAPH_NODE_TYPE_ICFG: + rz_strf(title, "0x%" PFMT64x, info->icfg.address); + an = rz_agraph_add_node(g, title, ""); + if (!an) { + return NULL; + } + an->offset = info->icfg.address; + break; } - an->offset = info->offset; return an; } diff --git a/librz/core/cgraph.c b/librz/core/cgraph.c index 4282e3b0033..1615f0d4d29 100644 --- a/librz/core/cgraph.c +++ b/librz/core/cgraph.c @@ -792,6 +792,25 @@ RZ_API RZ_OWN RzGraph /**/ *rz_core_graph_il(RZ_NONNULL RzCor return graph; } +static RzGraphNode *rz_graph_add_node_info_icfg(RzGraph /**/ *graph, const RzAnalysisFunction *fcn) { + rz_return_val_if_fail(graph, NULL); + RzGraphNodeInfo *data = NULL; + if (rz_analysis_function_is_malloc(fcn)) { + data = rz_graph_create_node_info_icfg(fcn->addr, RZ_GRAPH_NODE_TYPE_ICFG, RZ_GRAPH_NODE_SUBTYPE_ICFG_MALLOC); + } else { + data = rz_graph_create_node_info_icfg(fcn->addr, RZ_GRAPH_NODE_TYPE_ICFG, RZ_GRAPH_NODE_SUBTYPE_NONE); + } + if (!data) { + rz_warn_if_reached(); + return NULL; + } + RzGraphNode *node = rz_graph_add_nodef(graph, data, rz_graph_free_node_info); + if (!node) { + rz_graph_free_node_info(data); + } + return node; +} + /** * \brief Returns the graph node of a given \p fcn. If the function * is not yet added as node to the graph, it adds it to the graph and returns its reference. @@ -812,7 +831,7 @@ static RZ_OWN RzGraphNode *get_graph_node_of_fcn(RZ_BORROW RzGraph /*addr, rz_list_length(rz_graph_get_nodes(icfg))); - return rz_graph_add_node_info(icfg, fcn->name, NULL, fcn->addr); + return rz_graph_add_node_info_icfg(icfg, fcn); } /** @@ -886,7 +905,14 @@ static inline bool is_leaf_op(const RzAnalysisOp *op) { } static inline bool is_call(const RzAnalysisOp *op) { - return (op->type & RZ_ANALYSIS_OP_TYPE_MASK) == RZ_ANALYSIS_OP_TYPE_CALL; + _RzAnalysisOpType type = (op->type & RZ_ANALYSIS_OP_TYPE_MASK); + return type == RZ_ANALYSIS_OP_TYPE_CALL || + type == RZ_ANALYSIS_OP_TYPE_UCALL || + type == RZ_ANALYSIS_OP_TYPE_RCALL || + type == RZ_ANALYSIS_OP_TYPE_ICALL || + type == RZ_ANALYSIS_OP_TYPE_IRCALL || + type == RZ_ANALYSIS_OP_TYPE_CCALL || + type == RZ_ANALYSIS_OP_TYPE_UCCALL; } static inline bool is_uncond_jump(const RzAnalysisOp *op) { @@ -894,25 +920,72 @@ static inline bool is_uncond_jump(const RzAnalysisOp *op) { !((op->type & RZ_ANALYSIS_OP_HINT_MASK) & RZ_ANALYSIS_OP_TYPE_COND); } +static inline bool is_return(const RzAnalysisOp *op) { + return (op->type & RZ_ANALYSIS_OP_TYPE_MASK) == RZ_ANALYSIS_OP_TYPE_RET; +} + +static inline bool is_cond(const RzAnalysisOp *op) { + return (op->type & RZ_ANALYSIS_OP_HINT_MASK) == RZ_ANALYSIS_OP_TYPE_COND; +} + static inline bool ignore_next_instr(const RzAnalysisOp *op) { // Ignore if: return is_uncond_jump(op) || (op->fail != UT64_MAX && !is_call(op)); // Except calls, everything which has set fail } +static RzGraphNodeSubType get_cfg_node_flags(const RzAnalysisOp *op) { + rz_return_val_if_fail(op, RZ_GRAPH_NODE_SUBTYPE_NONE); + RzGraphNodeSubType subtype = RZ_GRAPH_NODE_SUBTYPE_NONE; + if (is_call(op)) { + subtype |= RZ_GRAPH_NODE_SUBTYPE_CFG_CALL; + } + if (is_return(op)) { + subtype |= RZ_GRAPH_NODE_SUBTYPE_CFG_RETURN; + } + if (is_cond(op)) { + subtype |= RZ_GRAPH_NODE_SUBTYPE_CFG_COND; + } + return subtype; +} + +static RzGraphNode *add_node_info_cfg(RzGraph /**/ *cfg, const RzAnalysisOp *op, bool is_entry) { + rz_return_val_if_fail(cfg, NULL); + RzGraphNodeSubType subtype = get_cfg_node_flags(op); + if (is_entry) { + subtype |= RZ_GRAPH_NODE_SUBTYPE_CFG_ENTRY; + } + ut64 call_target = is_call(op) ? op->jump : UT64_MAX; + RzGraphNodeInfo *data = rz_graph_create_node_info_cfg(op->addr, call_target, RZ_GRAPH_NODE_TYPE_CFG, subtype); + if (!data) { + return NULL; + } + RzGraphNode *node = rz_graph_add_nodef(cfg, data, rz_graph_free_node_info); + if (!node) { + rz_graph_free_node_info(data); + } + return node; +} + /** * \brief Add an edge to the graph and update \p to_visit vector and the \p nodes_visited hash table. * * \param graph The graph to work on. * \param to_visit The vector with addresses to visit. * \param nodes_visited The hash table holding already visited addresses and their node indices in the graph. - * \param from The parent node. - * \param to The target node of the edge. + * \param op_from The RzAnalysisOp the edge originates from. + * \param op_to The RzAnalysisOp the edge goes to. * * \return true On success. * \return false On failure. */ -static bool add_edge_to_cfg(RZ_NONNULL RzGraph /**/ *graph, RZ_NONNULL RzVector /**/ *to_visit, RZ_NONNULL HtUU *nodes_visited, ut64 from, ut64 to) { - rz_return_val_if_fail(graph && to_visit && nodes_visited, -1); +static bool add_edge_to_cfg(RZ_NONNULL RzGraph /**/ *graph, + RZ_NONNULL RzVector /**/ *to_visit, + RZ_NONNULL HtUU *nodes_visited, + const RzAnalysisOp *op_from, + const RzAnalysisOp *op_to) { + rz_return_val_if_fail(graph && to_visit && nodes_visited && op_from && op_to, -1); + ut64 from = op_from->addr; + ut64 to = op_to->addr; bool visited = false; ut64 from_idx = ht_uu_find(nodes_visited, from, &visited); if (!visited && from != to) { @@ -920,7 +993,7 @@ static bool add_edge_to_cfg(RZ_NONNULL RzGraph /**/ *graph, R return false; } - RzGraphNode *to_node = rz_graph_add_node_info(graph, rz_str_newf("0x%" PFMT64x, to), NULL, to); + RzGraphNode *to_node = add_node_info_cfg(graph, op_to, false); if (!to_node) { RZ_LOG_ERROR("Could not add node at 0x%" PFMT64x "\n", to); return false; @@ -963,7 +1036,15 @@ RZ_API RZ_OWN RzGraph /**/ *rz_core_graph_cfg(RZ_NONNULL RzCo RzVector *to_visit = rz_vector_new(sizeof(ut64), NULL, NULL); // Add entry node - RzGraphNode *entry = rz_graph_add_node_info(graph, rz_str_newf("0x%" PFMT64x, addr), NULL, addr); + ut8 buf[64] = { 0 }; + if (rz_io_nread_at(core->io, addr, buf, sizeof(buf)) < 0) { + RZ_LOG_ERROR("Could not generate CFG at 0x%" PFMT64x ". rz_io_nread_at() failed at 0x%" PFMT64x ".\n", addr, addr); + goto error; + } + RzAnalysisOp curr_op = { 0 }; + RzAnalysisOp target_op = { 0 }; + int disas_bytes = rz_analysis_op(core->analysis, &curr_op, addr, buf, sizeof(buf), RZ_ANALYSIS_OP_MASK_DISASM); + RzGraphNode *entry = add_node_info_cfg(graph, &curr_op, true); ht_uu_insert(nodes_visited, addr, entry->idx); rz_vector_push(to_visit, &addr); @@ -971,42 +1052,59 @@ RZ_API RZ_OWN RzGraph /**/ *rz_core_graph_cfg(RZ_NONNULL RzCo ut64 cur_addr = 0; rz_vector_pop(to_visit, &cur_addr); - ut8 buf[64] = { 0 }; if (rz_io_nread_at(core->io, cur_addr, buf, sizeof(buf)) < 0) { RZ_LOG_ERROR("Could not generate CFG at 0x%" PFMT64x ". rz_io_nread_at() failed at 0x%" PFMT64x ".\n", addr, cur_addr); goto error; } - RzAnalysisOp op = { 0 }; - int disas_bytes = rz_analysis_op(core->analysis, &op, cur_addr, buf, sizeof(buf), RZ_ANALYSIS_OP_MASK_DISASM); - if (disas_bytes <= 0 || is_leaf_op(&op)) { + disas_bytes = rz_analysis_op(core->analysis, &curr_op, cur_addr, buf, sizeof(buf), RZ_ANALYSIS_OP_MASK_DISASM); + if (disas_bytes <= 0 || is_leaf_op(&curr_op)) { // A leaf. It was added before to the graph by the parent node. - rz_analysis_op_fini(&op); + rz_analysis_op_fini(&curr_op); continue; } - if (op.jump != UT64_MAX && !is_call(&op)) { - if (!add_edge_to_cfg(graph, to_visit, nodes_visited, cur_addr, op.jump)) { + if (curr_op.jump != UT64_MAX && !is_call(&curr_op)) { + if (rz_analysis_op(core->analysis, &target_op, curr_op.jump, buf, sizeof(buf), RZ_ANALYSIS_OP_MASK_DISASM) <= 0) { + rz_analysis_op_fini(&target_op); goto error; } + if (!add_edge_to_cfg(graph, to_visit, nodes_visited, &curr_op, &target_op)) { + goto error; + } + rz_analysis_op_fini(&target_op); } - if (op.fail != UT64_MAX && !is_call(&op)) { - if (!add_edge_to_cfg(graph, to_visit, nodes_visited, cur_addr, op.fail)) { + if (curr_op.fail != UT64_MAX && !is_call(&curr_op)) { + if (rz_analysis_op(core->analysis, &target_op, curr_op.fail, buf, sizeof(buf), RZ_ANALYSIS_OP_MASK_DISASM) <= 0) { + rz_analysis_op_fini(&target_op); + goto error; + } + if (!add_edge_to_cfg(graph, to_visit, nodes_visited, &curr_op, &target_op)) { goto error; } + rz_analysis_op_fini(&target_op); } - if (ignore_next_instr(&op)) { - rz_analysis_op_fini(&op); + if (ignore_next_instr(&curr_op)) { + rz_analysis_op_fini(&curr_op); continue; } // Add next instruction ut64 next_addr = cur_addr + disas_bytes; - if (!add_edge_to_cfg(graph, to_visit, nodes_visited, cur_addr, next_addr)) { + if (rz_io_nread_at(core->io, next_addr, buf, sizeof(buf)) < 0) { + RZ_LOG_ERROR("Could not generate CFG at 0x%" PFMT64x ". rz_io_nread_at() failed at 0x%" PFMT64x ".\n", addr, cur_addr); + goto error; + } + if (rz_analysis_op(core->analysis, &target_op, next_addr, buf, sizeof(buf), RZ_ANALYSIS_OP_MASK_DISASM) <= 0) { + rz_analysis_op_fini(&target_op); + goto error; + } + if (!add_edge_to_cfg(graph, to_visit, nodes_visited, &curr_op, &target_op)) { goto error; } - rz_analysis_op_fini(&op); + rz_analysis_op_fini(&target_op); + rz_analysis_op_fini(&curr_op); } fini: @@ -1015,6 +1113,7 @@ RZ_API RZ_OWN RzGraph /**/ *rz_core_graph_cfg(RZ_NONNULL RzCo return graph; error: + rz_warn_if_reached(); rz_graph_free(graph); graph = NULL; goto fini; diff --git a/librz/include/rz_analysis.h b/librz/include/rz_analysis.h index 5875cc08a9b..8be013fc7dc 100644 --- a/librz/include/rz_analysis.h +++ b/librz/include/rz_analysis.h @@ -2323,6 +2323,7 @@ RZ_API RZ_OWN RzPVector /**/ *rz_analysis_function_vars(RZ_NONN RZ_API RZ_BORROW RzAnalysisVar *rz_analysis_function_get_arg_idx(RZ_NONNULL RzAnalysis *analysis, RZ_NONNULL RzAnalysisFunction *f, size_t index); RZ_API RZ_OWN RzList /**/ *rz_analysis_types_from_fcn(RzAnalysis *analysis, RzAnalysisFunction *fcn); RZ_API RZ_OWN RzCallable *rz_analysis_function_derive_type(RzAnalysis *analysis, RzAnalysisFunction *f); +RZ_API bool rz_analysis_function_is_malloc(const RzAnalysisFunction *fcn); /* PDB */ RZ_API RzType *rz_type_db_pdb_parse(const RzTypeDB *typedb, RzPdbTpiStream *stream, RzPdbTpiType *type); diff --git a/librz/include/rz_util/rz_graph_drawable.h b/librz/include/rz_util/rz_graph_drawable.h index 4b55527e1ad..8f7ffc32184 100644 --- a/librz/include/rz_util/rz_graph_drawable.h +++ b/librz/include/rz_util/rz_graph_drawable.h @@ -9,25 +9,80 @@ extern "C" { #endif -/** - * @brief Generic drawable graph node. - * - * Provides minimal information to draw something without output format specific details. - */ -typedef struct rz_analysis_graph_node_info_t { +#define RZ_GRAPH_NODE_TYPE_GROUP_MASK 0xff000000 + +typedef enum { + RZ_GRAPH_NODE_TYPE_NONE = 0, ///< No type for this node specified. + RZ_GRAPH_NODE_TYPE_DEFAULT, ///< Node contains a title string, a body string and an absract offset value. + RZ_GRAPH_NODE_TYPE_CFG, ///< Node is part of an control flow graph of a procedure. + RZ_GRAPH_NODE_TYPE_ICFG, ///< Node is part of an inter-procedural control flow graph. +} RzGraphNodeType; + +typedef enum { + RZ_GRAPH_NODE_SUBTYPE_NONE = 0, ///< No details given to this node. + RZ_GRAPH_NODE_SUBTYPE_CFG_ENTRY = 1 << 0, ///< Entry node of the procedure CFG. + RZ_GRAPH_NODE_SUBTYPE_CFG_CALL = 1 << 1, ///> A node which calls another procedure. + RZ_GRAPH_NODE_SUBTYPE_CFG_RETURN = 1 << 2, ///< A return node of the procedure. + RZ_GRAPH_NODE_SUBTYPE_CFG_EXIT = 1 << 3, ///< A node which exits the program (precedure does not return). + RZ_GRAPH_NODE_SUBTYPE_CFG_COND = 1 << 4, ///< A conditional instruction node. + RZ_GRAPH_NODE_SUBTYPE_ICFG_MALLOC = 1 << 5, ///< Node represents a memory allocating procedure. +} RzGraphNodeSubType; + +typedef struct { char *title; char *body; /** - * @brief Optional offset for the object corresponding to node. + * \brief Optional offset for the object corresponding to node. * * Interactive output modes can use it to provide actions like seeking to * this position or modify the object. */ ut64 offset; +} RzGraphNodeInfoDataDefault; + +typedef struct { + /** + * \brief Address of the node. + */ + ut64 address; + /** + * \brief Address of called procedure, if node is of type RZ_GRAPH_NODE_TYPE_CFG_CALL. + * It is set to UT64_MAX if invalid. + */ + ut64 call_address; +} RzGraphNodeInfoDataCFG; + +typedef struct { + /** + * \brief Address of the node. + */ + ut64 address; + bool is_malloc; ///< Flag set if this node is a memory allocating function. +} RzGraphNodeInfoDataICFG; + +/** + * \brief Generic drawable graph node. + * + * Provides minimal information to draw something without output format specific details. + */ +typedef struct rz_analysis_graph_node_info_t { + /** + * \brief Optional flags which describe the node further. + */ + RzGraphNodeType type; + RzGraphNodeSubType subtype; + union { + RzGraphNodeInfoDataDefault def; + RzGraphNodeInfoDataCFG cfg; + RzGraphNodeInfoDataICFG icfg; + }; } RzGraphNodeInfo; -RZ_API void rz_graph_free_node_info(void *ptr); -RZ_API RzGraphNodeInfo *rz_graph_create_node_info(const char *title, const char *body, ut64 offset); +RZ_API RZ_OWN RzGraphNodeInfo *rz_graph_get_node_info_data(RZ_BORROW void *data); +RZ_API void rz_graph_free_node_info(RZ_NULLABLE void *ptr); +RZ_API RzGraphNodeInfo *rz_graph_create_node_info_default(const char *title, const char *body, ut64 offset); +RZ_API RzGraphNodeInfo *rz_graph_create_node_info_icfg(ut64 address, RzGraphNodeType type, RzGraphNodeSubType subtype); +RZ_API RzGraphNodeInfo *rz_graph_create_node_info_cfg(ut64 address, ut64 call_target_addr, RzGraphNodeType type, RzGraphNodeSubType subtype); RZ_API RzGraphNode *rz_graph_add_node_info(RzGraph /**/ *graph, const char *title, const char *body, ut64 offset); /** diff --git a/librz/util/graph_drawable.c b/librz/util/graph_drawable.c index 29094882448..8c27c9af540 100644 --- a/librz/util/graph_drawable.c +++ b/librz/util/graph_drawable.c @@ -5,29 +5,115 @@ #include #include -RZ_API void rz_graph_free_node_info(void *ptr) { +/** + * \brief Casts the given graph node data pointer to a + * RzGraphNodeInfo pointer and makes some plausibility tests on the data. + * + * \param data The data pointer from a graph node. + * + * \return A pointer to the graph node info struct or NULL in case of failure. + */ +RZ_API RZ_OWN RzGraphNodeInfo *rz_graph_get_node_info_data(RZ_BORROW void *data) { + rz_return_val_if_fail(data, NULL); + RzGraphNodeInfo *info = data; + switch (info->type) { + default: + RZ_LOG_ERROR("Unhandled graph node info type %" PFMT32d "\n", info->type); + return NULL; + case RZ_GRAPH_NODE_TYPE_DEFAULT: + case RZ_GRAPH_NODE_TYPE_CFG: + case RZ_GRAPH_NODE_TYPE_ICFG: + break; + } + return info; +} + +RZ_API void rz_graph_free_node_info(RZ_NULLABLE void *ptr) { if (!ptr) { return; } RzGraphNodeInfo *info = ptr; - free(info->body); - free(info->title); + switch (info->type) { + default: + RZ_LOG_WARN("Not handled RzGraphNodeInfoType\n"); + break; + case RZ_GRAPH_NODE_TYPE_CFG: + case RZ_GRAPH_NODE_TYPE_ICFG: + break; + case RZ_GRAPH_NODE_TYPE_DEFAULT: + free(info->def.body); + free(info->def.title); + break; + } free(info); } -RZ_API RzGraphNodeInfo *rz_graph_create_node_info(const char *title, const char *body, ut64 offset) { +/** + * \brief Initializes a node info struct of a CFG node. + * + * \param title The title describing the node. + * \param body The body text describing the node. + * \param offset A numeric offset of this node. 0 if invalid. + * + * \return The initialized RzGraphNodeInfo or NULL in case of failure. + */ +RZ_API RzGraphNodeInfo *rz_graph_create_node_info_default(const char *title, const char *body, ut64 offset) { + RzGraphNodeInfo *data = RZ_NEW0(RzGraphNodeInfo); + if (!data) { + return NULL; + } + data->type = RZ_GRAPH_NODE_TYPE_DEFAULT; + data->subtype = RZ_GRAPH_NODE_SUBTYPE_NONE; + data->def.title = RZ_STR_DUP(title); + data->def.body = RZ_STR_DUP(body); + data->def.offset = offset; + return data; +} + +/** + * \brief Initializes a node info struct of a CFG node. + * + * \param address The address of the instruction this node represents. + * \param call_target_addr The address of the procedure called, if this node is a call. + * \param flags Additional flags which describe the node. + * + * \return The initialized RzGraphNodeInfo or NULL in case of failure. + */ +RZ_API RzGraphNodeInfo *rz_graph_create_node_info_cfg(ut64 address, ut64 call_target_addr, RzGraphNodeType type, RzGraphNodeSubType subtype) { RzGraphNodeInfo *data = RZ_NEW0(RzGraphNodeInfo); - if (data) { - data->title = RZ_STR_DUP(title); - data->body = RZ_STR_DUP(body); - data->offset = offset; + if (!data) { + return NULL; } + data->type = RZ_GRAPH_NODE_TYPE_CFG; + data->subtype = subtype; + data->cfg.address = address; + data->cfg.call_address = call_target_addr; + return data; +} + +/** + * \brief Initializes a node info struct of an iCFG node. + * + * \param address The address of the procedure this node represents. + * \param flags Additional flags which describe the node. + * + * \return The initialized RzGraphNodeInfo or NULL in case of failure. + */ +RZ_API RzGraphNodeInfo *rz_graph_create_node_info_icfg(ut64 address, RzGraphNodeType type, RzGraphNodeSubType subtype) { + RzGraphNodeInfo *data = RZ_NEW0(RzGraphNodeInfo); + if (!data) { + return NULL; + } + data->type = RZ_GRAPH_NODE_TYPE_ICFG; + data->subtype = subtype; + data->icfg.address = address; + data->icfg.is_malloc = subtype & RZ_GRAPH_NODE_SUBTYPE_ICFG_MALLOC; return data; } RZ_API RzGraphNode *rz_graph_add_node_info(RzGraph /**/ *graph, const char *title, const char *body, ut64 offset) { rz_return_val_if_fail(graph, NULL); - RzGraphNodeInfo *data = rz_graph_create_node_info(title, body, offset); + RzGraphNodeInfo *data = rz_graph_create_node_info_default(title, body, offset); if (!data) { return NULL; } @@ -38,6 +124,13 @@ RZ_API RzGraphNode *rz_graph_add_node_info(RzGraph /**/ *grap return node; } +/** + * \brief Prints the given RzGraph as dot graph. + * + * \param graph The graph to print. + * \param node_properties Edge property string, added to the dot graph header. + * \param edge_properties Node property string, added to the dot graph header. + */ RZ_API RZ_OWN char *rz_graph_drawable_to_dot(RZ_NONNULL RzGraph /**/ *graph, RZ_NULLABLE const char *node_properties, RZ_NULLABLE const char *edge_properties) { rz_return_val_if_fail(graph, NULL); @@ -56,16 +149,34 @@ RZ_API RZ_OWN char *rz_graph_drawable_to_dot(RZ_NONNULL RzGraph /*data; - char *body = print_node->body; + char *url; + char *label; - if (!body || !*body) { - rz_strbuf_appendf(&buf, "%d [URL=\"%s\", color=\"lightgray\", label=\"%s\"]\n", - node->idx, print_node->title, print_node->title); - } else { - rz_str_replace_ch(body, '\"', '\'', true); - rz_strbuf_appendf(&buf, "%d [URL=\"%s\", color=\"lightgray\", label=\"%s\\n%s\"]\n", - node->idx, print_node->title, print_node->title, body); + switch (print_node->type) { + default: + RZ_LOG_ERROR("Unhandled node type. Graph node either doesn't support dot graph printing or it isn't implemented.\n"); + return NULL; + case RZ_GRAPH_NODE_TYPE_CFG: + label = rz_str_newf("0x%" PFMT64x, print_node->cfg.address); + url = label; + break; + case RZ_GRAPH_NODE_TYPE_ICFG: + label = rz_str_newf("0x%" PFMT64x, print_node->icfg.address); + url = label; + break; + case RZ_GRAPH_NODE_TYPE_DEFAULT: + url = print_node->def.title; + if (print_node->def.body && print_node->def.body[0]) { + rz_str_replace_ch(print_node->def.body, '\"', '\'', true); + label = rz_str_newf("%s\\n%s", print_node->def.title, print_node->def.body); + } else { + label = rz_str_dup(print_node->def.title); + } } + + rz_strbuf_appendf(&buf, "%d [URL=\"%s\", color=\"lightgray\", label=\"%s\"]\n", + node->idx, url, label); + free(label); rz_list_foreach (node->out_nodes, itt, target) { rz_strbuf_appendf(&buf, "%d -> %d\n", node->idx, target->idx); } @@ -91,14 +202,28 @@ RZ_API void rz_graph_drawable_to_json(RZ_NONNULL RzGraph /**/ RzGraphNodeInfo *print_node = (RzGraphNodeInfo *)node->data; pj_o(pj); pj_kn(pj, "id", node->idx); - if (print_node->title) { - pj_ks(pj, "title", print_node->title); - } - if (print_node->body) { - pj_ks(pj, "body", print_node->body); - } - if (use_offset) { - pj_kn(pj, "offset", print_node->offset); + if (print_node->type == RZ_GRAPH_NODE_TYPE_DEFAULT) { + if (print_node->def.title) { + pj_ks(pj, "title", print_node->def.title); + } + if (print_node->def.body) { + pj_ks(pj, "body", print_node->def.body); + } + if (use_offset) { + pj_kn(pj, "offset", print_node->def.offset); + } + } else if (print_node->type == RZ_GRAPH_NODE_TYPE_ICFG) { + pj_kn(pj, "address", print_node->icfg.address); + pj_kb(pj, "is_malloc", print_node->type & RZ_GRAPH_NODE_SUBTYPE_ICFG_MALLOC); + } else if (print_node->type == RZ_GRAPH_NODE_TYPE_CFG) { + pj_kn(pj, "address", print_node->cfg.address); + pj_kb(pj, "is_call", print_node->type & RZ_GRAPH_NODE_SUBTYPE_CFG_CALL); + if (print_node->subtype & RZ_GRAPH_NODE_SUBTYPE_CFG_CALL && print_node->cfg.call_address != UT64_MAX) { + pj_kn(pj, "call_address", print_node->cfg.call_address); + } + pj_kb(pj, "is_entry", print_node->subtype & RZ_GRAPH_NODE_SUBTYPE_CFG_ENTRY); + pj_kb(pj, "is_exit", print_node->subtype & RZ_GRAPH_NODE_SUBTYPE_CFG_EXIT); + pj_kb(pj, "is_return", print_node->subtype & RZ_GRAPH_NODE_SUBTYPE_CFG_RETURN); } pj_k(pj, "out_nodes"); pj_a(pj); @@ -151,23 +276,23 @@ RZ_API RZ_OWN char *rz_graph_drawable_to_cmd(RZ_NONNULL RzGraph /*nodes, it, node) { RzGraphNodeInfo *print_node = node->data; - if (RZ_STR_ISNOTEMPTY(print_node->body)) { - ut32 len = strlen(print_node->body); - if (len > 0 && print_node->body[len - 1] == '\n') { + if (RZ_STR_ISNOTEMPTY(print_node->def.body)) { + ut32 len = strlen(print_node->def.body); + if (len > 0 && print_node->def.body[len - 1] == '\n') { len--; } - char *body = rz_base64_encode_dyn((const ut8 *)print_node->body, len); - rz_strbuf_appendf(sb, "agn \"%s\" base64:%s\n", print_node->title, body); + char *body = rz_base64_encode_dyn((const ut8 *)print_node->def.body, len); + rz_strbuf_appendf(sb, "agn \"%s\" base64:%s\n", print_node->def.title, body); free(body); } else { - rz_strbuf_appendf(sb, "agn \"%s\"\n", print_node->title); + rz_strbuf_appendf(sb, "agn \"%s\"\n", print_node->def.title); } } rz_list_foreach (graph->nodes, it, node) { RzGraphNodeInfo *print_node = node->data; rz_list_foreach (node->out_nodes, edge_it, target) { RzGraphNodeInfo *to = target->data; - rz_strbuf_appendf(sb, "age \"%s\" \"%s\"\n", print_node->title, to->title); + rz_strbuf_appendf(sb, "age \"%s\" \"%s\"\n", print_node->def.title, to->def.title); } } return rz_strbuf_drain(sb); @@ -189,13 +314,31 @@ RZ_API RZ_OWN char *rz_graph_drawable_to_gml(RZ_NONNULL RzGraph /*nodes, it, graphNode) { RzGraphNodeInfo *print_node = graphNode->data; + + switch (print_node->type) { + default: + RZ_LOG_ERROR("Unhandled node type. Graph node either doesn't support dot graph printing or it isn't implemented.\n"); + return NULL; + case RZ_GRAPH_NODE_TYPE_CFG: + label = rz_strf(tmp, "0x%" PFMT64x, print_node->cfg.address); + break; + case RZ_GRAPH_NODE_TYPE_ICFG: + label = rz_strf(tmp, "0x%" PFMT64x, print_node->icfg.address); + break; + case RZ_GRAPH_NODE_TYPE_DEFAULT: + label = print_node->def.title; + break; + } + rz_strbuf_appendf(sb, " node [\n" " id %d\n" " label \"%s\"\n" " ]\n", - graphNode->idx, print_node->title); + graphNode->idx, label); } RzListIter *edge_it; rz_list_foreach (graph->nodes, it, graphNode) { diff --git a/test/db/cmd/cmd_graph b/test/db/cmd/cmd_graph index b340252a8f9..865a2ce08ae 100644 --- a/test/db/cmd/cmd_graph +++ b/test/db/cmd/cmd_graph @@ -362,73 +362,73 @@ EOF EXPECT=< #include "../unit/minunit.h" +#include static inline RzGraph *graph_by_function_name(RzCore *core, RzCoreGraphType t, const char *name) { RzAnalysisFunction *f = rz_analysis_get_function_byname(core->analysis, name); @@ -122,7 +123,7 @@ bool test_analysis_graph_more() { RzGraphNodeInfo *ni = n->data; mu_assert_notnull(ni, "graph node info"); - mu_assert_streq(ni->title, "0x8048be4", "graph node"); + mu_assert_streq(ni->def.title, "0x8048be4", "graph node"); const RzList *list = rz_graph_get_neighbours(g, n); mu_assert_notnull(list, "node neighbours"); @@ -139,7 +140,7 @@ bool test_analysis_graph_more() { ni = n->data; mu_assert_notnull(ni, "graph node info"); - mu_assert_streq(ni->title, "main", "graph node"); + mu_assert_streq(ni->def.title, "main", "graph node"); list = rz_graph_get_neighbours(g, n); mu_assert_notnull(list, "node neighbours"); @@ -162,7 +163,7 @@ bool test_analysis_graph_more() { ni = n->data; mu_assert_notnull(ni, "graph node info"); - mu_assert_streq(ni->title, "sym.main", "graph node"); + mu_assert_streq(ni->def.title, "sym.main", "graph node"); list = rz_graph_get_neighbours(g, n); mu_assert_notnull(list, "node neighbours"); @@ -179,7 +180,7 @@ bool test_analysis_graph_more() { ni = n->data; mu_assert_notnull(ni, "graph node info"); - mu_assert_streq(ni->title, "0x08048a3e", "graph node"); + mu_assert_streq(ni->def.title, "0x08048a3e", "graph node"); list = rz_graph_get_neighbours(g, n); mu_assert_notnull(list, "node neighbours"); @@ -191,9 +192,124 @@ bool test_analysis_graph_more() { mu_end; } +bool test_analysis_graph_icfg() { + // Open the file + RzCore *core = rz_core_new(); + mu_assert_notnull(core, "new RzCore instance"); + const char *fpath = "bins/elf/analysis/x86_icfg_malloc_test"; + mu_assert_true(rz_core_file_open_load(core, fpath, 0, RZ_PERM_R, false), "load file"); + + // Analyse the file + rz_core_analysis_all(core); + rz_core_analysis_everything(core, false, "esil"); + rz_core_analysis_flag_every_function(core); + + RzGraph *g = rz_core_graph_icfg(core); + mu_assert_eq(g->n_nodes, 13, "data graph node count"); + mu_assert_eq(g->n_edges, 6, "data graph edge count"); + + // Testing the node content is a little annoying. The nodes + // are indexed by their position in the list. + // Although in case of a CFG and iCFG it would be better to + // have them indexed by their address in the binary. + // But the current graph implementation (list and not hashmap based) + // doesn't support this. + // So, if this test breaks due to some changes in the analysis, + // make sure the order of the nodes did not change + // (because they might have been added in different order). + RzGraphNodeInfo *info = rz_graph_get_node_info_data(rz_graph_get_node(g, 7)->data); + mu_assert_eq(info->type, RZ_GRAPH_NODE_TYPE_ICFG, "info type"); + mu_assert_eq(info->icfg.address, 0x1159, "info address"); + mu_assert_false(info->icfg.is_malloc, "info address"); + + info = rz_graph_get_node_info_data(rz_graph_get_node(g, 8)->data); + mu_assert_eq(info->type, RZ_GRAPH_NODE_TYPE_ICFG, "info type"); + mu_assert_eq(info->icfg.address, 0x1040, "info address"); + mu_assert_true(info->icfg.is_malloc, "info is_malloc"); + + info = rz_graph_get_node_info_data(rz_graph_get_node(g, 9)->data); + mu_assert_eq(info->type, RZ_GRAPH_NODE_TYPE_ICFG, "info type"); + mu_assert_eq(info->icfg.address, 0x1030, "info address"); + mu_assert_true(info->icfg.is_malloc, "info is_malloc"); + + info = rz_graph_get_node_info_data(rz_graph_get_node(g, 10)->data); + mu_assert_eq(info->type, RZ_GRAPH_NODE_TYPE_ICFG, "info type"); + mu_assert_eq(info->icfg.address, 0x1050, "info address"); + mu_assert_true(info->icfg.is_malloc, "info is_malloc"); + + rz_graph_free(g); + + // Close the file + rz_core_free(core); + mu_end; +} + +bool test_analysis_graph_cfg() { + // Open the file + RzCore *core = rz_core_new(); + mu_assert_notnull(core, "new RzCore instance"); + const char *fpath = "bins/elf/analysis/x86_cfg_node_details_test"; + mu_assert_true(rz_core_file_open_load(core, fpath, 0, RZ_PERM_R, false), "load file"); + + // Analyse the file + rz_core_analysis_all(core); + rz_core_analysis_everything(core, false, "esil"); + rz_core_analysis_flag_every_function(core); + + RzGraph *g = rz_core_graph_cfg(core, 0x117a); // main() + mu_assert_eq(g->n_nodes, 26, "data graph node count"); + mu_assert_eq(g->n_edges, 25, "data graph edge count"); + + // Testing the node content is a little annoying. The nodes + // are indexed by their position in the list. + // Although in case of a CFG and iCFG it would be better to + // have them indexed by their address in the binary. + // But the current graph implementation (list and not hashmap based) + // doesn't support this. + // So, if this test breaks due to some changes in the analysis, + // make sure the order of the nodes did not change + // (because they might have been added in different order). + RzGraphNodeInfo *info = rz_graph_get_node_info_data(rz_graph_get_node(g, 0)->data); + mu_assert_eq(info->type, RZ_GRAPH_NODE_TYPE_CFG, "info type"); + mu_assert_eq(info->subtype, RZ_GRAPH_NODE_SUBTYPE_CFG_ENTRY, "info subtype"); + mu_assert_eq(info->cfg.address, 0x117a, "info address"); + mu_assert_eq(info->cfg.call_address, UT64_MAX, "info call address"); + + info = rz_graph_get_node_info_data(rz_graph_get_node(g, 3)->data); + mu_assert_eq(info->type, RZ_GRAPH_NODE_TYPE_CFG, "info type"); + mu_assert_eq(info->subtype, RZ_GRAPH_NODE_SUBTYPE_CFG_CALL, "info subtype"); + mu_assert_eq(info->cfg.address, 0x1182, "info address"); + mu_assert_eq(info->cfg.call_address, 0x1050, "info call address"); + + info = rz_graph_get_node_info_data(rz_graph_get_node(g, 10)->data); + mu_assert_eq(info->type, RZ_GRAPH_NODE_TYPE_CFG, "info type"); + mu_assert_eq(info->subtype, RZ_GRAPH_NODE_SUBTYPE_CFG_COND, "info subtype"); + mu_assert_eq(info->cfg.address, 0x11a7, "info address"); + mu_assert_eq(info->cfg.call_address, UT64_MAX, "info call address"); + + info = rz_graph_get_node_info_data(rz_graph_get_node(g, 24)->data); + mu_assert_eq(info->type, RZ_GRAPH_NODE_TYPE_CFG, "info type"); + mu_assert_eq(info->subtype, RZ_GRAPH_NODE_SUBTYPE_CFG_CALL, "info subtype"); + mu_assert_eq(info->cfg.address, 0x11cd, "info address"); + mu_assert_eq(info->cfg.call_address, UT64_MAX, "info call address"); + + info = rz_graph_get_node_info_data(rz_graph_get_node(g, 18)->data); + mu_assert_eq(info->type, RZ_GRAPH_NODE_TYPE_CFG, "info type"); + mu_assert_eq(info->subtype, RZ_GRAPH_NODE_SUBTYPE_CFG_RETURN, "info subtype"); + mu_assert_eq(info->cfg.address, 0x11d3, "info address"); + mu_assert_eq(info->cfg.call_address, UT64_MAX, "info call address"); + + rz_graph_free(g); + + // Close the file + rz_core_free(core); + mu_end; +} int all_tests() { mu_run_test(test_analysis_graph); mu_run_test(test_analysis_graph_more); + mu_run_test(test_analysis_graph_icfg); + mu_run_test(test_analysis_graph_cfg); return tests_passed != tests_run; } diff --git a/test/unit/test_analysis_class_graph.c b/test/unit/test_analysis_class_graph.c index 44738fcd549..c192816f657 100644 --- a/test/unit/test_analysis_class_graph.c +++ b/test/unit/test_analysis_class_graph.c @@ -28,7 +28,7 @@ bool test_inherit_graph_creation() { RzGraphNodeInfo *info = node->data; switch (i++) { case 0: - mu_assert_streq(info->title, "A", "Wrong node name"); + mu_assert_streq(info->def.title, "A", "Wrong node name"); mu_assert_eq(node->out_nodes->length, 2, "Wrong node out-nodes"); { RzListIter *iter; @@ -38,17 +38,17 @@ bool test_inherit_graph_creation() { RzGraphNodeInfo *info = out_node->data; switch (i++) { case 0: - mu_assert_streq(info->title, "B", "Wrong node name"); + mu_assert_streq(info->def.title, "B", "Wrong node name"); break; case 1: - mu_assert_streq(info->title, "C", "Wrong node name"); + mu_assert_streq(info->def.title, "C", "Wrong node name"); break; } } } break; case 1: - mu_assert_streq(info->title, "B", "Wrong node name"); + mu_assert_streq(info->def.title, "B", "Wrong node name"); mu_assert_eq(node->out_nodes->length, 1, "Wrong node out-nodes"); mu_assert_eq(node->in_nodes->length, 1, "Wrong node in-nodes"); { @@ -59,14 +59,14 @@ bool test_inherit_graph_creation() { RzGraphNodeInfo *info = out_node->data; switch (i++) { case 0: - mu_assert_streq(info->title, "D", "Wrong node name"); + mu_assert_streq(info->def.title, "D", "Wrong node name"); break; } } } break; case 2: - mu_assert_streq(info->title, "C", "Wrong node name"); + mu_assert_streq(info->def.title, "C", "Wrong node name"); mu_assert_eq(node->out_nodes->length, 1, "Wrong node out-nodes"); mu_assert_eq(node->in_nodes->length, 1, "Wrong node in-nodes"); { @@ -77,14 +77,14 @@ bool test_inherit_graph_creation() { RzGraphNodeInfo *info = out_node->data; switch (i++) { case 0: - mu_assert_streq(info->title, "D", "Wrong node name"); + mu_assert_streq(info->def.title, "D", "Wrong node name"); break; } } } break; case 3: - mu_assert_streq(info->title, "D", "Wrong node name"); + mu_assert_streq(info->def.title, "D", "Wrong node name"); mu_assert_eq(node->in_nodes->length, 2, "Wrong node in-nodes"); break; default: @@ -101,4 +101,4 @@ int all_tests() { return tests_passed != tests_run; } -mu_main(all_tests) \ No newline at end of file +mu_main(all_tests)