Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Recover and serialize types from high pcode #50

Merged
merged 7 commits into from
Oct 24, 2024
Merged
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
270 changes: 252 additions & 18 deletions scripts/ghidra/PatchestryDecompileFunctions.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,38 @@

import ghidra.program.model.listing.Program;

import ghidra.program.model.pcode.FunctionPrototype;
import ghidra.program.model.pcode.HighFunction;
import ghidra.program.model.pcode.HighParam;
import ghidra.program.model.pcode.HighVariable;
import ghidra.program.model.pcode.PcodeBlock;
import ghidra.program.model.pcode.PcodeBlockBasic;
import ghidra.program.model.pcode.PcodeOp;
import ghidra.program.model.pcode.SequenceNumber;
import ghidra.program.model.pcode.Varnode;

import ghidra.program.model.data.AbstractFloatDataType;
import ghidra.program.model.data.AbstractIntegerDataType;
import ghidra.program.model.data.Array;
import ghidra.program.model.data.BooleanDataType;
import ghidra.program.model.data.Composite;
import ghidra.program.model.data.CategoryPath;
import ghidra.program.model.data.DataType;
import ghidra.program.model.data.DataTypeComponent;
import ghidra.program.model.data.Enum;
kumarak marked this conversation as resolved.
Show resolved Hide resolved
import ghidra.program.model.data.FunctionDefinition;
import ghidra.program.model.data.ParameterDefinition;
import ghidra.program.model.data.Pointer;
import ghidra.program.model.data.Structure;
import ghidra.program.model.data.TypeDef;
import ghidra.program.model.data.Undefined;
import ghidra.program.model.data.Union;
import ghidra.program.model.data.VoidDataType;

import ghidra.program.model.symbol.ExternalManager;

import ghidra.util.UniversalID;

import com.google.gson.stream.JsonWriter;

import java.io.BufferedWriter;
Expand All @@ -48,13 +70,19 @@
import java.nio.file.Path;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Collections;
import java.util.Iterator;
import java.util.Set;
import java.util.TreeSet;

public class PatchestryDecompileFunctions extends GhidraScript {

static final int decompilation_timeout = 30;

private HashMap<String, DataType> type_map = new HashMap<>();

private class PcodeSerializer extends JsonWriter {
private String arch;
Expand All @@ -65,6 +93,7 @@ private class PcodeSerializer extends JsonWriter {
private List<Function> functions;
private int original_functions_size;
private Set<Address> seen_functions;
private Set<String> seen_types;
pgoodman marked this conversation as resolved.
Show resolved Hide resolved

public PcodeSerializer(java.io.BufferedWriter writer,
String arch_, FunctionManager fm_,
Expand All @@ -80,8 +109,20 @@ public PcodeSerializer(java.io.BufferedWriter writer,
this.functions = functions_;
this.original_functions_size = functions.size();
this.seen_functions = new TreeSet<>();
this.seen_types = new HashSet<>();
pgoodman marked this conversation as resolved.
Show resolved Hide resolved
}


private static String label(DataType type) throws Exception {
kumarak marked this conversation as resolved.
Show resolved Hide resolved
String name = type.getName();
CategoryPath category = type.getCategoryPath();
String concat_type = category.toString() + name + Integer.toString(type.getLength());
UniversalID uid = type.getUniversalID();
if (uid != null) {
kumarak marked this conversation as resolved.
Show resolved Hide resolved
return Integer.toHexString(concat_type.hashCode()) + Address.SEPARATOR + uid.toString();
}
return Integer.toHexString(concat_type.hashCode());
}

private static String label(Address address) throws Exception {
return address.toString(true /* show address space prefix */);
}
Expand Down Expand Up @@ -156,6 +197,187 @@ private HighVariable lValueOf(Varnode node) throws Exception {
return var;
}

private String getTypeIds(DataType type) throws Exception {
kumarak marked this conversation as resolved.
Show resolved Hide resolved
String type_id = label(type);
DataType lookup_type = type_map.get(type_id);
if (lookup_type == null) {
type_map.put(type_id, type);
}
return type_id;
}

private void serializePointerType(Pointer data_type) throws Exception {
name("name").value(data_type.getDisplayName().replaceAll(" ", ""));
name("kind").value("pointer");
name("size").value(data_type.getLength());
DataType elem_type = data_type.getDataType();

// element data type can be null
kumarak marked this conversation as resolved.
Show resolved Hide resolved
if (elem_type != null) {
String elem_type_id = getTypeIds(elem_type);
kumarak marked this conversation as resolved.
Show resolved Hide resolved
name("element_type").value(elem_type_id);
seen_types.add(elem_type_id);
kumarak marked this conversation as resolved.
Show resolved Hide resolved
}
}

private void serializeTypedefType(TypeDef data_type) throws Exception {
name("name").value(data_type.getDisplayName().replaceAll(" ", ""));
name("kind").value("typedef");
name("size").value(data_type.getLength());
kumarak marked this conversation as resolved.
Show resolved Hide resolved
name("is_pointer").value(data_type.isPointer());
kumarak marked this conversation as resolved.
Show resolved Hide resolved

DataType base_type = data_type.getBaseDataType();
if (base_type != null) {
String base_type_id = getTypeIds(base_type);
seen_types.add(base_type_id);
name("base_type").value(base_type_id);
}
pgoodman marked this conversation as resolved.
Show resolved Hide resolved
}

private void serializeArrayType(Array arr) throws Exception {
name("name").value(arr.getDisplayName().replaceAll(" ", ""));
name("kind").value("array");
name("size").value(arr.getLength());
name("num_elements").value(arr.getNumElements());
pgoodman marked this conversation as resolved.
Show resolved Hide resolved
DataType elem_type = arr.getDataType();
if (elem_type != null) {
kumarak marked this conversation as resolved.
Show resolved Hide resolved
String elem_type_id = getTypeIds(elem_type);
seen_types.add(elem_type_id);
name("element_type").value(elem_type_id);
}
}

private void serializeGenericType(DataType data_type, String kind) throws Exception {
name("name").value(data_type.getDisplayName().replaceAll(" ", ""));
name("size").value(data_type.getLength());
name("kind").value(kind);
}

private void serializeCompositeType(Composite data_type, String kind) throws Exception {
name("name").value(data_type.getDisplayName().replaceAll(" ", ""));
name("kind").value(kind);
name("size").value(data_type.getLength());
name("num_fields").value(data_type.getNumComponents());
kumarak marked this conversation as resolved.
Show resolved Hide resolved
name("fields").beginArray();

for (int i = 0; i < data_type.getNumComponents(); i++) {
DataTypeComponent dtc = data_type.getComponent(i);
beginObject();

String comp_id = getTypeIds(dtc.getDataType());
seen_types.add(comp_id);
name("type").value(comp_id);
name("offset").value(dtc.getOffset());

if (dtc.getFieldName() != null) {
name("name").value(dtc.getFieldName().replaceAll(" ",""));
}
endObject();
}
endArray();
}

private void serializeFunctionDefinition(FunctionDefinition fd) throws Exception {
name("name").value(fd.getDisplayName());
name("kind").value("function");
String ret_type_id = getTypeIds(fd.getReturnType());
seen_types.add(ret_type_id);
name("return_type").value(ret_type_id);
name("has_varargs").value(fd.hasVarArgs());
kumarak marked this conversation as resolved.
Show resolved Hide resolved
ParameterDefinition[] arguments = fd.getArguments();
name("parameters").beginArray();
for (int i = 0; i < arguments.length; i++) {
beginObject();
name("name").value(arguments[i].getName());
kumarak marked this conversation as resolved.
Show resolved Hide resolved
name("size").value(arguments[i].getLength());
String param_type_id = getTypeIds(arguments[i].getDataType());
seen_types.add(param_type_id);
name("type").value(param_type_id);
endObject();
}
endArray();
}

private void serialize(DataType data_type) throws Exception {
if (data_type == null) {
nullValue();
return;
}

if (data_type instanceof Pointer) {
serializePointerType((Pointer) data_type);
} else if (data_type instanceof TypeDef) {
serializeTypedefType((TypeDef) data_type);
} else if (data_type instanceof Array) {
serializeArrayType((Array) data_type);
} else if (data_type instanceof Structure) {
serializeCompositeType((Composite) data_type, "struct");
} else if (data_type instanceof Union) {
serializeCompositeType((Composite) data_type, "union");
} else if (data_type instanceof AbstractIntegerDataType){
serializeGenericType(data_type, "integer");
} else if (data_type instanceof AbstractFloatDataType){
serializeGenericType(data_type, "float");
} else if (data_type instanceof BooleanDataType){
serializeGenericType(data_type, "boolean");
} else if (data_type instanceof Enum) {
serializeGenericType(data_type, "enum");
} else if (data_type instanceof VoidDataType) {
serializeGenericType(data_type, "void");
} else if (data_type instanceof Undefined || data_type.toString().contains("undefined")) {
serializeGenericType(data_type, "undefined");
} else if (data_type instanceof FunctionDefinition) {
serializeFunctionDefinition((FunctionDefinition) data_type);
} else {
println("types: " + data_type.toString());
}
}

private void serializeTypes(Set<String> type_ids) throws Exception {
Set<String> serialized_types = new HashSet<>(); // To keep track of already serialized types
kumarak marked this conversation as resolved.
Show resolved Hide resolved

do {
kumarak marked this conversation as resolved.
Show resolved Hide resolved
serialized_types.addAll(type_ids); // Add current typeIds to serialized set
seen_types.clear(); // Clear the pendingTypes for new iteration

for (String id : type_ids) {
name(id).beginObject();
DataType data_type = type_map.get(id);
serialize(data_type);
endObject();
}

seen_types.removeAll(serialized_types);
type_ids = new HashSet<>(seen_types);
} while (!seen_types.isEmpty());
println("Total serialized types: " + serialized_types.size());
}

private void serialize(FunctionPrototype proto) throws Exception {
if (proto == null) {
nullValue();
return;
}

name("parameters").beginArray();
for (int i = 0; i < proto.getNumParams(); i++) {
serialize(proto.getParam(i).getHighVariable());
kumarak marked this conversation as resolved.
Show resolved Hide resolved
}
endArray();
}

private void serialize(HighVariable high_var) throws Exception {
if (high_var == null) {
nullValue();
return;
}

beginObject();
name("name").value(high_var.getName());
name("type").value(getTypeIds(high_var.getDataType()));
endObject();
}

private void serialize(Varnode node) throws Exception {
if (node == null) {
assert false;
Expand All @@ -172,23 +394,18 @@ private void serialize(Varnode node) throws Exception {

beginObject();

if (node.isConstant()) {
name("type").value("const");
} else if (node.isUnique()) {
name("type").value("unique");
} else if (node.isRegister()) {
name("type").value("register");
} else if (node.isAddress()) {
name("type").value("ram");
} else if (node.getAddress().isStackAddress()) {
name("type").value("stack");
} else {
throw new Exception("Unknown Varnode kind: " + node.toString());
}

Address address = node.getAddress();
name("space").value(address.getAddressSpace().getName());
name("offset").value(node.getOffset());
name("size").value(node.getSize());

name("address").value(label(node.getAddress()));
HighVariable high_var = node.getHigh();
if (high_var != null) {
name("variable").beginObject();
name("name").value(high_var.getName());
name("type").value(getTypeIds(high_var.getDataType()));
endObject();
}
endObject();
}

Expand Down Expand Up @@ -331,6 +548,12 @@ private void serialize(HighFunction function, PcodeBlockBasic block) throws Exce
private void serialize(HighFunction high_function, Function function, boolean visit_pcode) throws Exception {

name("name").value(function.getName());
FunctionPrototype proto = high_function.getFunctionPrototype();
name("prototype").beginObject();
if (proto != null) {
serialize(proto);
}
endObject();

// If we have a high P-Code function, then serialize the blocks.
if (high_function != null) {
Expand Down Expand Up @@ -363,14 +586,25 @@ public void serialize() throws Exception {
continue;
}

DecompileResults res = ifc.decompileFunction(function, 30, null);
DecompileResults res = ifc.decompileFunction(function, decompilation_timeout, null);
HighFunction high_function = res.getHighFunction();

if (high_function == null) {
continue;
}

name(label(function_address)).beginObject();
serialize(high_function, function, i < original_functions_size);
endObject();
}

// Serialize Types
name("types").beginObject();
Set<String> seen_type_ids = new HashSet<>(type_map.keySet());
serializeTypes(seen_type_ids);
endObject();

endObject().endObject();
println("Type map size:" + type_map.size());
}
}

Expand Down