Skip to content

Commit

Permalink
Add different types of RzGraphNodeInfos. (#4189)
Browse files Browse the repository at this point in the history
* Add different types of GraphNodeInfos.

This adds support for multiple graph node infos.
It removes the assumption that information of
a graph node must be some strings and an offset.

* Increase buffer for printing graph node body

* Don't hardcode maximum label size

* Add graph node info for icfg

* Add node type info to CFG.

* Access address members properly for iCFG and CFG nodes

* Update test. Function names are no longer saved.

* Fix reachable double free of label

* Split graph node type from its sub-type to make it less confusing.

* Add integration tests for iCFG nodes.

* Fix, add mising buffer read

* Recognize more calls

* Add test for iCFG generation and its node details.
  • Loading branch information
Rot127 authored Feb 9, 2024
1 parent 24ecc7e commit fcf4f71
Show file tree
Hide file tree
Showing 9 changed files with 605 additions and 147 deletions.
19 changes: 19 additions & 0 deletions librz/analysis/fcn.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// SPDX-FileCopyrightText: 2010-2021 pancake <[email protected]>
// SPDX-License-Identifier: LGPL-3.0-only

#include <rz_util/rz_regex.h>
#include <rz_analysis.h>
#include <rz_parse.h>
#include <rz_util.h>
Expand Down Expand Up @@ -2692,3 +2693,21 @@ RZ_API RZ_OWN RzCallable *rz_analysis_function_derive_type(RzAnalysis *analysis,
}
return callable;
}

/**
* \brief Determines if the given function is a memory allocating function (malloc, calloc etc.).
*
* The current methods of detection (tested in order):
* - Name matches regex ".*\.([mc]|(re))?alloc.*"
*
* \param fcn The function to test.
*
* \return true If the function \p fcn is considered a memory allocating.
* \return false Otherwise.
*/
RZ_API bool rz_analysis_function_is_malloc(const RzAnalysisFunction *fcn) {
rz_return_val_if_fail(fcn, false);
// TODO We need more metrics here. Just the name is pretty naive.
// E.g. we should compare it to signatures and other characterisitics.
return rz_regex_contains(".*\\.([mc]|(re))?alloc.*", fcn->name, RZ_REGEX_ZERO_TERMINATED, RZ_REGEX_EXTENDED, RZ_REGEX_DEFAULT);
}
33 changes: 29 additions & 4 deletions librz/core/agraph.c
Original file line number Diff line number Diff line change
Expand Up @@ -3727,11 +3727,36 @@ RZ_API void rz_agraph_set_title(RzAGraph *g, const char *title) {
*/
RZ_API RZ_BORROW RzANode *rz_agraph_add_node_from_node_info(RZ_NONNULL const RzAGraph *g, RZ_NONNULL const RzGraphNodeInfo *info) {
rz_return_val_if_fail(g && info, NULL);
RzANode *an = rz_agraph_add_node(g, info->title, info->body);
if (!an) {
return NULL;
RzANode *an = NULL;
char title[20] = { 0 };
switch (info->type) {
default:
RZ_LOG_ERROR("Node type %d not handled.\n", info->type);
break;
case RZ_GRAPH_NODE_TYPE_DEFAULT:
an = rz_agraph_add_node(g, info->def.title, info->def.body);
if (!an) {
return NULL;
}
an->offset = info->def.offset;
break;
case RZ_GRAPH_NODE_TYPE_CFG:
rz_strf(title, "0x%" PFMT64x, info->cfg.address);
an = rz_agraph_add_node(g, title, "");
if (!an) {
return NULL;
}
an->offset = info->cfg.address;
break;
case RZ_GRAPH_NODE_TYPE_ICFG:
rz_strf(title, "0x%" PFMT64x, info->icfg.address);
an = rz_agraph_add_node(g, title, "");
if (!an) {
return NULL;
}
an->offset = info->icfg.address;
break;
}
an->offset = info->offset;
return an;
}

Expand Down
141 changes: 120 additions & 21 deletions librz/core/cgraph.c
Original file line number Diff line number Diff line change
Expand Up @@ -792,6 +792,25 @@ RZ_API RZ_OWN RzGraph /*<RzGraphNodeInfo *>*/ *rz_core_graph_il(RZ_NONNULL RzCor
return graph;
}

static RzGraphNode *rz_graph_add_node_info_icfg(RzGraph /*<RzGraphNodeInfo *>*/ *graph, const RzAnalysisFunction *fcn) {
rz_return_val_if_fail(graph, NULL);
RzGraphNodeInfo *data = NULL;
if (rz_analysis_function_is_malloc(fcn)) {
data = rz_graph_create_node_info_icfg(fcn->addr, RZ_GRAPH_NODE_TYPE_ICFG, RZ_GRAPH_NODE_SUBTYPE_ICFG_MALLOC);
} else {
data = rz_graph_create_node_info_icfg(fcn->addr, RZ_GRAPH_NODE_TYPE_ICFG, RZ_GRAPH_NODE_SUBTYPE_NONE);
}
if (!data) {
rz_warn_if_reached();
return NULL;
}
RzGraphNode *node = rz_graph_add_nodef(graph, data, rz_graph_free_node_info);
if (!node) {
rz_graph_free_node_info(data);
}
return node;
}

/**
* \brief Returns the graph node of a given \p fcn. If the function
* is not yet added as node to the graph, it adds it to the graph and returns its reference.
Expand All @@ -812,7 +831,7 @@ static RZ_OWN RzGraphNode *get_graph_node_of_fcn(RZ_BORROW RzGraph /*<RzGraphNod
return rz_graph_get_node(icfg, i);
}
ht_uu_insert(graph_idx, fcn->addr, rz_list_length(rz_graph_get_nodes(icfg)));
return rz_graph_add_node_info(icfg, fcn->name, NULL, fcn->addr);
return rz_graph_add_node_info_icfg(icfg, fcn);
}

/**
Expand Down Expand Up @@ -886,41 +905,95 @@ static inline bool is_leaf_op(const RzAnalysisOp *op) {
}

static inline bool is_call(const RzAnalysisOp *op) {
return (op->type & RZ_ANALYSIS_OP_TYPE_MASK) == RZ_ANALYSIS_OP_TYPE_CALL;
_RzAnalysisOpType type = (op->type & RZ_ANALYSIS_OP_TYPE_MASK);
return type == RZ_ANALYSIS_OP_TYPE_CALL ||
type == RZ_ANALYSIS_OP_TYPE_UCALL ||
type == RZ_ANALYSIS_OP_TYPE_RCALL ||
type == RZ_ANALYSIS_OP_TYPE_ICALL ||
type == RZ_ANALYSIS_OP_TYPE_IRCALL ||
type == RZ_ANALYSIS_OP_TYPE_CCALL ||
type == RZ_ANALYSIS_OP_TYPE_UCCALL;
}

static inline bool is_uncond_jump(const RzAnalysisOp *op) {
return (op->type & RZ_ANALYSIS_OP_TYPE_MASK) == RZ_ANALYSIS_OP_TYPE_JMP &&
!((op->type & RZ_ANALYSIS_OP_HINT_MASK) & RZ_ANALYSIS_OP_TYPE_COND);
}

static inline bool is_return(const RzAnalysisOp *op) {
return (op->type & RZ_ANALYSIS_OP_TYPE_MASK) == RZ_ANALYSIS_OP_TYPE_RET;
}

static inline bool is_cond(const RzAnalysisOp *op) {
return (op->type & RZ_ANALYSIS_OP_HINT_MASK) == RZ_ANALYSIS_OP_TYPE_COND;
}

static inline bool ignore_next_instr(const RzAnalysisOp *op) {
// Ignore if:
return is_uncond_jump(op) || (op->fail != UT64_MAX && !is_call(op)); // Except calls, everything which has set fail
}

static RzGraphNodeSubType get_cfg_node_flags(const RzAnalysisOp *op) {
rz_return_val_if_fail(op, RZ_GRAPH_NODE_SUBTYPE_NONE);
RzGraphNodeSubType subtype = RZ_GRAPH_NODE_SUBTYPE_NONE;
if (is_call(op)) {
subtype |= RZ_GRAPH_NODE_SUBTYPE_CFG_CALL;
}
if (is_return(op)) {
subtype |= RZ_GRAPH_NODE_SUBTYPE_CFG_RETURN;
}
if (is_cond(op)) {
subtype |= RZ_GRAPH_NODE_SUBTYPE_CFG_COND;
}
return subtype;
}

static RzGraphNode *add_node_info_cfg(RzGraph /*<RzGraphNodeInfo *>*/ *cfg, const RzAnalysisOp *op, bool is_entry) {
rz_return_val_if_fail(cfg, NULL);
RzGraphNodeSubType subtype = get_cfg_node_flags(op);
if (is_entry) {
subtype |= RZ_GRAPH_NODE_SUBTYPE_CFG_ENTRY;
}
ut64 call_target = is_call(op) ? op->jump : UT64_MAX;
RzGraphNodeInfo *data = rz_graph_create_node_info_cfg(op->addr, call_target, RZ_GRAPH_NODE_TYPE_CFG, subtype);
if (!data) {
return NULL;
}
RzGraphNode *node = rz_graph_add_nodef(cfg, data, rz_graph_free_node_info);
if (!node) {
rz_graph_free_node_info(data);
}
return node;
}

/**
* \brief Add an edge to the graph and update \p to_visit vector and the \p nodes_visited hash table.
*
* \param graph The graph to work on.
* \param to_visit The vector with addresses to visit.
* \param nodes_visited The hash table holding already visited addresses and their node indices in the graph.
* \param from The parent node.
* \param to The target node of the edge.
* \param op_from The RzAnalysisOp the edge originates from.
* \param op_to The RzAnalysisOp the edge goes to.
*
* \return true On success.
* \return false On failure.
*/
static bool add_edge_to_cfg(RZ_NONNULL RzGraph /*<RzGraphNodeInfo *>*/ *graph, RZ_NONNULL RzVector /*<ut64>*/ *to_visit, RZ_NONNULL HtUU *nodes_visited, ut64 from, ut64 to) {
rz_return_val_if_fail(graph && to_visit && nodes_visited, -1);
static bool add_edge_to_cfg(RZ_NONNULL RzGraph /*<RzGraphNodeInfo *>*/ *graph,
RZ_NONNULL RzVector /*<ut64>*/ *to_visit,
RZ_NONNULL HtUU *nodes_visited,
const RzAnalysisOp *op_from,
const RzAnalysisOp *op_to) {
rz_return_val_if_fail(graph && to_visit && nodes_visited && op_from && op_to, -1);
ut64 from = op_from->addr;
ut64 to = op_to->addr;
bool visited = false;
ut64 from_idx = ht_uu_find(nodes_visited, from, &visited);
if (!visited && from != to) {
RZ_LOG_ERROR("'from' node should have been added before. 0x%" PFMT64x " -> 0x%" PFMT64x "\n", from, to);
return false;
}

RzGraphNode *to_node = rz_graph_add_node_info(graph, rz_str_newf("0x%" PFMT64x, to), NULL, to);
RzGraphNode *to_node = add_node_info_cfg(graph, op_to, false);
if (!to_node) {
RZ_LOG_ERROR("Could not add node at 0x%" PFMT64x "\n", to);
return false;
Expand Down Expand Up @@ -963,50 +1036,75 @@ RZ_API RZ_OWN RzGraph /*<RzGraphNodeInfo *>*/ *rz_core_graph_cfg(RZ_NONNULL RzCo
RzVector *to_visit = rz_vector_new(sizeof(ut64), NULL, NULL);

// Add entry node
RzGraphNode *entry = rz_graph_add_node_info(graph, rz_str_newf("0x%" PFMT64x, addr), NULL, addr);
ut8 buf[64] = { 0 };
if (rz_io_nread_at(core->io, addr, buf, sizeof(buf)) < 0) {
RZ_LOG_ERROR("Could not generate CFG at 0x%" PFMT64x ". rz_io_nread_at() failed at 0x%" PFMT64x ".\n", addr, addr);
goto error;
}
RzAnalysisOp curr_op = { 0 };
RzAnalysisOp target_op = { 0 };
int disas_bytes = rz_analysis_op(core->analysis, &curr_op, addr, buf, sizeof(buf), RZ_ANALYSIS_OP_MASK_DISASM);
RzGraphNode *entry = add_node_info_cfg(graph, &curr_op, true);
ht_uu_insert(nodes_visited, addr, entry->idx);
rz_vector_push(to_visit, &addr);

while (rz_vector_len(to_visit) > 0) {
ut64 cur_addr = 0;
rz_vector_pop(to_visit, &cur_addr);

ut8 buf[64] = { 0 };
if (rz_io_nread_at(core->io, cur_addr, buf, sizeof(buf)) < 0) {
RZ_LOG_ERROR("Could not generate CFG at 0x%" PFMT64x ". rz_io_nread_at() failed at 0x%" PFMT64x ".\n", addr, cur_addr);
goto error;
}

RzAnalysisOp op = { 0 };
int disas_bytes = rz_analysis_op(core->analysis, &op, cur_addr, buf, sizeof(buf), RZ_ANALYSIS_OP_MASK_DISASM);
if (disas_bytes <= 0 || is_leaf_op(&op)) {
disas_bytes = rz_analysis_op(core->analysis, &curr_op, cur_addr, buf, sizeof(buf), RZ_ANALYSIS_OP_MASK_DISASM);
if (disas_bytes <= 0 || is_leaf_op(&curr_op)) {
// A leaf. It was added before to the graph by the parent node.
rz_analysis_op_fini(&op);
rz_analysis_op_fini(&curr_op);
continue;
}

if (op.jump != UT64_MAX && !is_call(&op)) {
if (!add_edge_to_cfg(graph, to_visit, nodes_visited, cur_addr, op.jump)) {
if (curr_op.jump != UT64_MAX && !is_call(&curr_op)) {
if (rz_analysis_op(core->analysis, &target_op, curr_op.jump, buf, sizeof(buf), RZ_ANALYSIS_OP_MASK_DISASM) <= 0) {
rz_analysis_op_fini(&target_op);
goto error;
}
if (!add_edge_to_cfg(graph, to_visit, nodes_visited, &curr_op, &target_op)) {
goto error;
}
rz_analysis_op_fini(&target_op);
}
if (op.fail != UT64_MAX && !is_call(&op)) {
if (!add_edge_to_cfg(graph, to_visit, nodes_visited, cur_addr, op.fail)) {
if (curr_op.fail != UT64_MAX && !is_call(&curr_op)) {
if (rz_analysis_op(core->analysis, &target_op, curr_op.fail, buf, sizeof(buf), RZ_ANALYSIS_OP_MASK_DISASM) <= 0) {
rz_analysis_op_fini(&target_op);
goto error;
}
if (!add_edge_to_cfg(graph, to_visit, nodes_visited, &curr_op, &target_op)) {
goto error;
}
rz_analysis_op_fini(&target_op);
}

if (ignore_next_instr(&op)) {
rz_analysis_op_fini(&op);
if (ignore_next_instr(&curr_op)) {
rz_analysis_op_fini(&curr_op);
continue;
}

// Add next instruction
ut64 next_addr = cur_addr + disas_bytes;
if (!add_edge_to_cfg(graph, to_visit, nodes_visited, cur_addr, next_addr)) {
if (rz_io_nread_at(core->io, next_addr, buf, sizeof(buf)) < 0) {
RZ_LOG_ERROR("Could not generate CFG at 0x%" PFMT64x ". rz_io_nread_at() failed at 0x%" PFMT64x ".\n", addr, cur_addr);
goto error;
}
if (rz_analysis_op(core->analysis, &target_op, next_addr, buf, sizeof(buf), RZ_ANALYSIS_OP_MASK_DISASM) <= 0) {
rz_analysis_op_fini(&target_op);
goto error;
}
if (!add_edge_to_cfg(graph, to_visit, nodes_visited, &curr_op, &target_op)) {
goto error;
}
rz_analysis_op_fini(&op);
rz_analysis_op_fini(&target_op);
rz_analysis_op_fini(&curr_op);
}

fini:
Expand All @@ -1015,6 +1113,7 @@ RZ_API RZ_OWN RzGraph /*<RzGraphNodeInfo *>*/ *rz_core_graph_cfg(RZ_NONNULL RzCo
return graph;

error:
rz_warn_if_reached();
rz_graph_free(graph);
graph = NULL;
goto fini;
Expand Down
1 change: 1 addition & 0 deletions librz/include/rz_analysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -2323,6 +2323,7 @@ RZ_API RZ_OWN RzPVector /*<RzAnalysisVar *>*/ *rz_analysis_function_vars(RZ_NONN
RZ_API RZ_BORROW RzAnalysisVar *rz_analysis_function_get_arg_idx(RZ_NONNULL RzAnalysis *analysis, RZ_NONNULL RzAnalysisFunction *f, size_t index);
RZ_API RZ_OWN RzList /*<RzType *>*/ *rz_analysis_types_from_fcn(RzAnalysis *analysis, RzAnalysisFunction *fcn);
RZ_API RZ_OWN RzCallable *rz_analysis_function_derive_type(RzAnalysis *analysis, RzAnalysisFunction *f);
RZ_API bool rz_analysis_function_is_malloc(const RzAnalysisFunction *fcn);

/* PDB */
RZ_API RzType *rz_type_db_pdb_parse(const RzTypeDB *typedb, RzPdbTpiStream *stream, RzPdbTpiType *type);
Expand Down
Loading

0 comments on commit fcf4f71

Please sign in to comment.