Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactoring rizin type system: introduce type scope. #4464

Draft
wants to merge 3 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 37 additions & 14 deletions librz/arch/dwarf_process.c
Original file line number Diff line number Diff line change
Expand Up @@ -662,6 +662,12 @@ static bool RzBaseType_eq(const RzBaseType *a, const RzBaseType *b) {
if (a == NULL || b == NULL) {
return a == NULL && b == NULL;
}
printf("CU(a): %s", a->scope.cu_name);
printf("CU(b): %s", b->scope.cu_name);
if (a->scope.cu_name && b->scope.cu_name && RZ_STR_NE(a->scope.cu_name, b->scope.cu_name)) {
printf("types '%s' and '%s' are not equal\n", a->name, b->name);
return false;
}
return a->kind == b->kind && a->attrs == b->attrs && RZ_STR_EQ(a->name, b->name);
}

Expand Down Expand Up @@ -708,6 +714,11 @@ static RzBaseType *RzBaseType_from_die(DwContext *ctx, const RzBinDwarfDie *die)
return NULL;
}

if (ctx->unit->name) {
printf("storing cu_name: '%s'\n", ctx->unit->name);
btype->scope.cu_name = rz_str_dup(ctx->unit->name);
}

RzBinDwarfAttr *attr = NULL;
rz_vector_foreach (&die->attrs, attr) {
switch (attr->at) {
Expand Down Expand Up @@ -1721,6 +1732,8 @@ static RzBinDwarfDie *die_end(RzBinDwarfCompUnit *unit) {
return (RzBinDwarfDie *)((char *)vec->a + vec->elem_size * vec->len);
}

static bool store_base_type(void *u, const char *k, const void *v);

/**
* \brief Parses type and function information out of DWARF entries
* and stores them to analysis->debug_info
Expand All @@ -1742,7 +1755,12 @@ RZ_API void rz_analysis_dwarf_preprocess_info(
.unit = NULL,
};
RzBinDwarfCompUnit *unit;
RzAnalysisDebugInfo *debug_info = analysis->debug_info;
rz_vector_foreach (&dw->info->units, unit) {
debug_info->type_by_offset = ht_up_new(NULL, (HtUPFreeValue)rz_type_free);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a bit awkward here.

Copy link
Contributor Author

@rockrid3r rockrid3r Apr 29, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Kind of. But there is no need to have it until the rizin shuts down.

  • it is not used anywhere except dwarf_process.c.
  • we have copy of all types in typedb->types.

so it just takes RAM for nothing.
The same for debug_info->base_type_by_offset and debug_info->base_types_by_name.

debug_info->base_type_by_offset = ht_up_new(NULL, (HtUPFreeValue)rz_type_base_type_free);
debug_info->base_types_by_name = ht_sp_new(HT_STR_DUP, NULL, (HtSPFreeValue)rz_pvector_free); // borrows RzBaseType pointers from base_type_by_offset

if (rz_vector_empty(&unit->dies)) {
continue;
}
Expand All @@ -1753,6 +1771,12 @@ RZ_API void rz_analysis_dwarf_preprocess_info(

die_parse(&ctx, die);
}

ht_sp_foreach(analysis->debug_info->base_types_by_name, store_base_type, (void *)analysis);

ht_up_free(debug_info->type_by_offset);
ht_up_free(debug_info->base_type_by_offset);
ht_sp_free(debug_info->base_types_by_name);
}
ht_up_free(ctx.str_escaped);
}
Expand All @@ -1764,26 +1788,32 @@ RZ_API void rz_analysis_dwarf_preprocess_info(
b = temp; \
} while (0)

static inline void update_base_type(const RzTypeDB *typedb, RzBaseType *type) {
/**
* \brief
*/
static inline void update_base_type(const RzTypeDB *typedb, RZ_BORROW RzBaseType *type) {
RzBaseType *t = rz_type_db_get_base_type(typedb, type->name);
if (t && t == type) {
return;
}
rz_type_db_update_base_type(typedb, rz_base_type_clone(type));
}

static void db_save_renamed(RzTypeDB *db, RzBaseType *b, char *name) {
static void db_save_renamed(RzTypeDB *db, RZ_BORROW RzBaseType *b, RZ_OWN char *name) {
if (!name) {
rz_warn_if_reached();
return;
}
RzBaseType *t = rz_type_db_get_base_type(db, b->name);
if (t == b) {
free(t->name);
t->name = name;
return;
}
free(b->name);
b->name = name;
rz_type_db_update_base_type(db, b);
RzBaseType *newb = rz_base_type_clone(b);
free(newb->name);
newb->name = name;
rz_type_db_update_base_type(db, newb);
}

static bool store_base_type(void *u, const char *k, const void *v) {
Expand All @@ -1804,7 +1834,7 @@ static bool store_base_type(void *u, const char *k, const void *v) {
}
if (a->kind != RZ_BASE_TYPE_KIND_TYPEDEF) {
update_base_type(analysis->typedb, a);
db_save_renamed(analysis->typedb, rz_base_type_clone(b), rz_str_newf("%s_0", name));
db_save_renamed(analysis->typedb, b, rz_str_newf("%s_0", name));
goto beach;
}
if (a->type->kind != RZ_TYPE_KIND_IDENTIFIER) {
Expand All @@ -1820,7 +1850,7 @@ static bool store_base_type(void *u, const char *k, const void *v) {
a->type->identifier.name = rz_str_dup(newname);
update_base_type(analysis->typedb, a);
}
db_save_renamed(analysis->typedb, rz_base_type_clone(b), newname);
db_save_renamed(analysis->typedb, b, newname);
} else {
RZ_LOG_WARN("BaseType: same name [%s] type count is more than 3\n", name);
}
Expand All @@ -1847,7 +1877,6 @@ static bool store_callable(void *u, ut64 k, const void *v) {
RZ_API void rz_analysis_dwarf_process_info(RzAnalysis *analysis, RzBinDWARF *dw) {
rz_return_if_fail(analysis && dw);
rz_analysis_dwarf_preprocess_info(analysis, dw);
ht_sp_foreach(analysis->debug_info->base_types_by_name, store_base_type, (void *)analysis);
ht_up_foreach(analysis->debug_info->callable_by_offset, store_callable, (void *)analysis);
}

Expand Down Expand Up @@ -2072,10 +2101,7 @@ RZ_API RzAnalysisDebugInfo *rz_analysis_debug_info_new() {
debug_info->function_by_offset = ht_up_new(NULL, (HtUPFreeValue)function_free);
debug_info->function_by_addr = ht_up_new(NULL, NULL);
debug_info->variable_by_offset = ht_up_new(NULL, NULL);
debug_info->type_by_offset = ht_up_new(NULL, (HtUPFreeValue)rz_type_free);
debug_info->callable_by_offset = ht_up_new(NULL, (HtUPFreeValue)rz_type_callable_free);
debug_info->base_type_by_offset = ht_up_new(NULL, (HtUPFreeValue)rz_type_base_type_free);
debug_info->base_types_by_name = ht_sp_new(HT_STR_DUP, NULL, (HtSPFreeValue)rz_pvector_free);
debug_info->visited = set_u_new();
return debug_info;
}
Expand All @@ -2091,10 +2117,7 @@ RZ_API void rz_analysis_debug_info_free(RzAnalysisDebugInfo *debuginfo) {
ht_up_free(debuginfo->function_by_offset);
ht_up_free(debuginfo->function_by_addr);
ht_up_free(debuginfo->variable_by_offset);
ht_up_free(debuginfo->type_by_offset);
ht_up_free(debuginfo->callable_by_offset);
ht_up_free(debuginfo->base_type_by_offset);
ht_sp_free(debuginfo->base_types_by_name);
rz_bin_dwarf_free(debuginfo->dw);
set_u_free(debuginfo->visited);
free(debuginfo);
Expand Down
2 changes: 1 addition & 1 deletion librz/include/rz_analysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,7 @@ typedef struct {
HtUP /*<ut64, RzCallable *>*/ *callable_by_offset; ///< Store all callables parsed from DWARF by DIE offset
HtUP /*<ut64, RzType *>*/ *type_by_offset; ///< Store all RzType parsed from DWARF by DIE offset
HtUP /*<ut64, RzBaseType *>*/ *base_type_by_offset; ///< Store all RzBaseType parsed from DWARF by DIE offset
HtSP /*<const char*, RzPVector<const RzBaseType *>>*/ *base_types_by_name; ///< Store all RzBaseType parsed from DWARF by DIE offset
HtSP /*<const char*, RzPVector<const RzBaseType *>>*/ *base_types_by_name; ///< Store all RzBaseType parsed from DWARF by DIE name
DWARF_RegisterMapping dwarf_register_mapping; ///< Store the mapping function between DWARF registers number and register name in current architecture
RzBinDWARF *dw; ///< Holds ownership of RzBinDwarf, avoid releasing it prematurely
SetU *visited;
Expand Down
11 changes: 9 additions & 2 deletions librz/include/rz_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ typedef struct rz_type_parser_t RzTypeParser;

typedef struct rz_type_db_t {
void *user;
HtSP /*<char *, RzBaseType *>*/ *types; //< name -> base type
HtSP /*<char *, RzPVector<RzBaseType *>*>*/ *types; //< name -> vector<base type>
HtSS /*<char *, char *>*/ *formats; //< name -> `pf` format
HtSP /*<char *, RzCallable *>*/ *callables; //< name -> RzCallable (function type)
RzTypeTarget *target;
Expand All @@ -39,6 +39,11 @@ typedef struct rz_type_db_t {
RzIOBind iob; // for RzIO in formats
} RzTypeDB;


typedef struct rz_type_scope_t {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add Doxygen

char *cu_name;
} RzTypeScope;

// All types in RzTypeDB module are either concrete,
// "base" types that are types already having the
// concrete size and memory layout
Expand Down Expand Up @@ -114,6 +119,7 @@ typedef struct rz_base_type_t {
ut64 size; // size of the whole type in bits
RzBaseTypeKind kind;
RzTypeAttribute attrs;
RzTypeScope scope;
union {
RzBaseTypeStruct struct_data;
RzBaseTypeEnum enum_data;
Expand Down Expand Up @@ -267,14 +273,15 @@ RZ_API bool rz_base_type_clone_into(
RZ_NONNULL RZ_BORROW RZ_IN RzBaseType *src);
RZ_API RZ_OWN RzBaseType *rz_base_type_clone(RZ_NULLABLE RZ_BORROW RzBaseType *b);
RZ_API void rz_type_base_type_free(RzBaseType *type);
RZ_API bool rz_type_base_type_same_scope(const RzBaseType *a, const RzBaseType *b);
RZ_API RZ_OWN RzBaseType *rz_type_base_type_new(RzBaseTypeKind kind);
RZ_API RZ_BORROW const char *rz_type_base_type_kind_as_string(RzBaseTypeKind kind);

RZ_API void rz_type_base_enum_case_free(void *e, void *user);
RZ_API void rz_type_base_struct_member_free(void *e, void *user);
RZ_API void rz_type_base_union_member_free(void *e, void *user);

RZ_API RZ_BORROW RzBaseType *rz_type_db_get_base_type(const RzTypeDB *typedb, RZ_NONNULL const char *name);
RZ_API RZ_BORROW RzPVector /*<RzBaseType*>*/ *rz_type_db_get_base_type(const RzTypeDB *typedb, RZ_NONNULL const char *name);
RZ_API RZ_BORROW RzBaseType *rz_type_db_get_compound_type(const RzTypeDB *typedb, RZ_NONNULL const char *name);
RZ_API bool rz_type_db_save_base_type(const RzTypeDB *typedb, RzBaseType *type);
RZ_API bool rz_type_db_update_base_type(const RzTypeDB *typedb, RzBaseType *type);
Expand Down
105 changes: 83 additions & 22 deletions librz/type/base.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,15 @@ RZ_API RZ_BORROW const char *rz_type_base_type_kind_as_string(RzBaseTypeKind kin
* \param typedb Type Database instance
* \param name Name of the RzBaseType
*/
RZ_API RZ_BORROW RzBaseType *rz_type_db_get_base_type(const RzTypeDB *typedb, RZ_NONNULL const char *name) {
RZ_API RZ_BORROW RzPVector /*<RzBaseType>*/ *rz_type_db_get_base_type(const RzTypeDB *typedb, RZ_NONNULL const char *name) {
rz_return_val_if_fail(typedb && name, NULL);

bool found = false;
RzBaseType *btype = ht_sp_find(typedb->types, name, &found);
if (!found || !btype) {
RzPVector /*<RzBaseType*>*/ *btypes = ht_sp_find(typedb->types, name, &found);
if (!found || !btypes) {
return NULL;
}
return btype;
return btypes;
}

/**
Expand All @@ -71,9 +71,23 @@ RZ_API RZ_BORROW RzBaseType *rz_type_db_get_base_type(const RzTypeDB *typedb, RZ
* \param typedb Type Database instance
* \param type RzBaseType to remove
*/
RZ_API bool rz_type_db_delete_base_type(RzTypeDB *typedb, RZ_NONNULL RzBaseType *type) {
rz_return_val_if_fail(typedb && type && type->name, false);
ht_sp_delete(typedb->types, type->name);
RZ_API bool rz_type_db_delete_base_type(RzTypeDB *typedb, RZ_NONNULL RzBaseType *btype) {
rz_return_val_if_fail(typedb && btype && btype->name, false);
bool found;
RzPVector /*<RzBaseType*>*/ *btypes = ht_sp_find(typedb->types, btype->name, &found);
if (!found) {
return false;
}
void **it;
size_t idx = 0;
rz_pvector_foreach(btypes, it) {
RzBaseType *btype_it = *it;
if (rz_type_base_type_same_scope(btype_it, btype)) {
rz_pvector_remove_at(btypes, idx); // TODO: consider using RzList<RzBaseType*> instead of RzPVector<RzBaseType*> for typedb->types
break;
}
++idx;
}
return true;
}

Expand All @@ -84,9 +98,13 @@ struct list_kind {

static bool base_type_kind_collect_cb(void *user, RZ_UNUSED const char *k, const void *v) {
struct list_kind *l = user;
RzBaseType *btype = (RzBaseType *)v;
if (l->kind == btype->kind) {
rz_list_append(l->types, btype);
RzPVector /*<RzBaseType*>*/ *btypes = (RzPVector*)v;
void **it;
rz_pvector_foreach(btypes, it) {
RzBaseType *btype = *it;
if (l->kind == btype->kind) {
rz_list_append(l->types, btype);
}
}
return true;
}
Expand All @@ -108,7 +126,13 @@ RZ_API RZ_OWN RzList /*<RzBaseType *>*/ *rz_type_db_get_base_types_of_kind(const
static bool base_type_collect_cb(void *user, RZ_UNUSED const char *k, const void *v) {
rz_return_val_if_fail(user && k && v, false);
RzList *l = user;
rz_list_append(l, (void *)v);
RzPVector /*<RzBaseType*>*/ *btypes = (RzPVector*)v;

void **it;
rz_pvector_foreach(btypes, it) {
rz_list_append(l, *it);
}

return true;
}

Expand Down Expand Up @@ -163,6 +187,7 @@ RZ_API bool rz_base_type_clone_into(
rz_mem_copy(dst, sizeof(RzBaseType), src, sizeof(RzBaseType));
dst->name = rz_str_dup(src->name);
dst->type = src->type ? rz_type_clone(src->type) : NULL;
dst->scope.cu_name = rz_str_dup(src->scope.cu_name);

switch (src->kind) {
case RZ_BASE_TYPE_KIND_ENUM:
Expand Down Expand Up @@ -261,17 +286,34 @@ RZ_API RZ_OWN RzBaseType *rz_type_base_type_new(RzBaseTypeKind kind) {
return type;
}

RZ_API bool rz_type_base_type_same_scope(const RzBaseType *a, const RzBaseType *b) {
return RZ_STR_EQ(a->scope.cu_name, b->scope.cu_name);
}

/**
* \brief Saves RzBaseType into the Types DB
* \brief Saves RzBaseType into the Types DB. Frees the type if fails.
*
* \param typedb Type Database instance
* \param type RzBaseType to save
*/
RZ_API bool rz_type_db_save_base_type(const RzTypeDB *typedb, RzBaseType *type) {
rz_return_val_if_fail(typedb && type && type->name, false);
if (!ht_sp_insert(typedb->types, type->name, (void *)type)) {
rz_type_base_type_free(type);
return false;
RZ_API bool rz_type_db_save_base_type(const RzTypeDB *typedb, RzBaseType *btype) {
rz_return_val_if_fail(typedb && btype && btype->name, false);
bool found;
RzPVector /*<RzBaseType*>*/ *btypes = ht_sp_find(typedb->types, btype->name, &found);
if (!found) {
btypes = rz_pvector_new((void (*)(void*))rz_type_base_type_free);
ht_sp_insert(typedb->types, btype->name, btypes);
rz_pvector_push(btypes, btype);
} else {
void** it;
rz_pvector_foreach(btypes, it) {
RzBaseType *btype_it = *it;
if (rz_type_base_type_same_scope(btype, btype_it)) { // same name & same scope => btypes are same
rz_type_base_type_free(btype);
return false;
}
}
rz_pvector_push(btypes, btype);
}
return true;
}
Expand All @@ -282,12 +324,31 @@ RZ_API bool rz_type_db_save_base_type(const RzTypeDB *typedb, RzBaseType *type)
* \param typedb Type Database instance
* \param type RzBaseType to save
*/
RZ_API bool rz_type_db_update_base_type(const RzTypeDB *typedb, RzBaseType *type) {
rz_return_val_if_fail(typedb && type && type->name, false);
if (!ht_sp_update(typedb->types, type->name, (void *)type)) {
rz_type_base_type_free(type);
return false;
RZ_API bool rz_type_db_update_base_type(const RzTypeDB *typedb, RZ_OWN RzBaseType *btype) {
rz_return_val_if_fail(typedb && btype && btype->name, false);
bool found;
RzPVector /*<RzBaseType*>*/ *btypes = ht_sp_find(typedb->types, btype->name, &found);
if (!found) {
btypes = rz_pvector_new((void (*)(void*))rz_type_base_type_free);
ht_sp_insert(typedb->types, btype->name, btypes);
rz_pvector_push(btypes, btype);
} else {
void** it;
found = false;
rz_pvector_foreach(btypes, it) {
RzBaseType *btype_it = *it;
if (rz_type_base_type_same_scope(btype, btype_it)) {
*(RzBaseType**)it = btype; // replace vector element at this position
rz_type_base_type_free(btype_it);
found = true;
break;
}
}
if (!found) {
rz_pvector_push(btypes, btype);
}
}
// TODO: change doxygen OR free on failure indeed.
return true;
}

Expand Down
4 changes: 2 additions & 2 deletions librz/type/type.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ RZ_API RzTypeDB *rz_type_db_new() {
return NULL;
}
typedb->target->default_type = strdup("int");
typedb->types = ht_sp_new(HT_STR_DUP, NULL, (HtSPFreeValue)rz_type_base_type_free);
typedb->types = ht_sp_new(HT_STR_DUP, NULL, (HtSPFreeValue)rz_pvector_free);
if (!typedb->types) {
goto rz_type_db_new_fail;
}
Expand Down Expand Up @@ -82,7 +82,7 @@ RZ_API void rz_type_db_purge(RzTypeDB *typedb) {
ht_sp_free(typedb->callables);
typedb->callables = ht_sp_new(HT_STR_DUP, NULL, (HtSPFreeValue)rz_type_callable_free);
ht_sp_free(typedb->types);
typedb->types = ht_sp_new(HT_STR_DUP, NULL, (HtSPFreeValue)rz_type_base_type_free);
typedb->types = ht_sp_new(HT_STR_DUP, NULL, (HtSPFreeValue)rz_pvector_free);
rz_type_parser_free(typedb->parser);
typedb->parser = rz_type_parser_init(typedb->types, typedb->callables);
}
Expand Down
Loading