Skip to content

Commit

Permalink
added additional metadata.
Browse files Browse the repository at this point in the history
Signed-off-by: steven <[email protected]>
  • Loading branch information
steven-hh-ding committed Nov 8, 2018
1 parent a475255 commit 6c125d6
Show file tree
Hide file tree
Showing 8 changed files with 27 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,12 @@ public class FunctionDataUnit implements Serializable {
public String functionName;
public String functionId;
public String startAddress;
public long blockSize;
public int blockSize;
public SrcFunction srcFunc;

public ArrayList<BlockDataUnit> nodes = new ArrayList<>();
public ArrayList<Link> links = new ArrayList<>();
public int codeSize;

public static class Link implements Serializable {
private static final long serialVersionUID = -3012737921524201819L;
Expand Down Expand Up @@ -152,7 +153,8 @@ public FunctionDataUnit(Function function, AsmLineNormalizer normalizer, boolean
this.functionId = Long.toString(function.functionId);
this.functionName = function.functionName;
this.startAddress = Long.toString(function.startingAddress);
this.blockSize = function.numBlocks;
this.blockSize = (int) function.numBlocks;
this.codeSize = (int) function.codeSize;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ public class FunctionCloneEntryForWeb implements Serializable {
public String binaryId;
public String binaryName = StringResources.STR_EMPTY;
public int numBbs = 0;
public int codeSize = 0;
public double similarity = -1;
public FunctionDataUnit actualFunc = null;

Expand All @@ -57,6 +58,7 @@ public FunctionCloneEntryForWeb(FunctionCloneEntry entry) {
this.binaryName = entry.binaryName;
this.similarity = entry.similarity;
this.numBbs = (int) entry.numBbs;
this.codeSize = (int) entry.codeSize;

clonedParts = entry.clonedParts.stream() //
.map(set -> set//
Expand All @@ -77,6 +79,7 @@ public FunctionCloneEntryForWeb(FunctionDataUnit func, double similarity, int nu
this.binaryName = func.binaryName;

this.numBbs = numbbs;
this.codeSize = func.codeSize;

}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ public class FunctionCloneEntry implements Serializable, Comparable<FunctionClon
public long binaryId;
public String binaryName = StringResources.STR_EMPTY;
public double similarity = Double.MAX_VALUE;
public long codeSize;
public long startingEA;
public long numBbs = 0;

Expand All @@ -56,15 +57,17 @@ public FunctionCloneEntry(Function function, double similarity) {
this.functionName = function.functionName;
this.similarity = similarity;
this.numBbs = function.numBlocks;
this.codeSize = function.codeSize;
}

public FunctionCloneEntry(Block aBlk, double similarity, int bbcount) {
public FunctionCloneEntry(Block aBlk, double similarity) {
this.binaryId = aBlk.binaryId;
this.binaryName = aBlk.binaryName;
this.functionId = aBlk.functionId;
this.functionName = aBlk.functionName;
this.similarity = similarity;
this.numBbs = bbcount;
this.numBbs = aBlk.peerSize;
this.codeSize = aBlk.funcCodeSize;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ public <E extends VecObject<T, K>> Tuple2<List<Tuple2<Long, E>>, JavaRDD<VecEntr
List<? extends E> objs, int topK, Function<List<T>, List<T>> filter) {
List<Tuple2<Long, E>> hid_tbid_l = this.index_bucket.collectHids(rid, objs, this::hash);
HashSet<Long> hids = hid_tbid_l.stream().map(tp -> tp._1).collect(Collectors.toCollection(HashSet::new));
logger.info("hids {}", hids.size());
// logger.info("hids {}", hids.size());
// hid->info
JavaRDD<VecEntry<T, K>> hid_info = this.index_deduplication.getVecEntryInfoAsRDD(rid, hids, false, filter);// .cache();
return new Tuple2<>(hid_tbid_l, hid_info);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -303,12 +303,8 @@ public static FunctionCloneEntry mergeSingles2(int funcLength, Iterable<Tuple3<B

}

FunctionCloneEntry entry = new FunctionCloneEntry();
Block block = subgraphs.get(0).stream().findAny().get().src;
entry.functionId = block.functionId;
entry.functionName = block.functionName;
entry.binaryId = block.binaryId;
entry.binaryName = block.binaryName;
FunctionCloneEntry entry = new FunctionCloneEntry(block, 0);

subgraphs.sort((g1, g2) -> Double.compare(g1.score, g2.score));
ArrayList<Subgraph2> picks = new ArrayList<>();
Expand Down Expand Up @@ -349,7 +345,8 @@ public static FunctionCloneEntry mergeSingles2(int funcLength, Iterable<Tuple3<B
(k, v) -> v == null ? lk.score * lk.tar.codesSize : Math.max(lk.score * lk.tar.codesSize, v)));
// entry.similarity = picks.stream().mapToDouble(g -> g.score).sum() * 1.0 /
// (Math.abs(funcLength));
entry.similarity = hashMap.values().stream().mapToDouble(v -> v).sum() / Math.abs(funcLength);
double sum = hashMap.values().stream().mapToDouble(v -> v).sum() * 2;
entry.similarity = sum / (Math.abs(funcLength) + Math.abs(entry.codeSize));
return entry;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ public Function toFunction(FunctionSurrogate func) {
ofunc.srcName = func.srcName;
ofunc.blocks = new ArrayList<>();
ofunc.architecture = architecture;
ofunc.codeSize = func.blocks.stream().mapToLong(blk -> blk.asmLines().size()).sum();
func.blocks.forEach(blk -> ofunc.blockIds.add(blk.id));
func.blocks.forEach(blk -> {
Block oblk = new Block();
Expand All @@ -135,6 +136,7 @@ public Function toFunction(FunctionSurrogate func) {
oblk.callingBlocks = blk.call;
oblk.codes = new ArrayList<>(blk.asmLines());
oblk.codesSize = oblk.codes.size();
oblk.funcCodeSize = ofunc.codeSize;
oblk.functionId = func.id;
oblk.functionName = func.name;
oblk.peerSize = func.blocks.size();
Expand All @@ -144,13 +146,12 @@ public Function toFunction(FunctionSurrogate func) {
oblk.architecture = architecture;
ofunc.blocks.add(oblk);
});

ofunc.numBlocks = ofunc.blocks.size();
ofunc.comments = func.comments.stream()
.map(cmm -> {
String content = cmm.comment.replaceAll("(\r\n|\n)", "\n\n");
return new Comment(func.id, content, cmm.type, new Date().getTime(), "user_ida", cmm.offset);
})
.collect(Collectors.toList());
ofunc.comments = func.comments.stream().map(cmm -> {
String content = cmm.comment.replaceAll("(\r\n|\n)", "\n\n");
return new Comment(func.id, content, cmm.type, new Date().getTime(), "user_ida", cmm.offset);
}).collect(Collectors.toList());
return ofunc;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ public class Block implements AsmFragment, Serializable {
public String blockName = StringResources.STR_EMPTY;

public long codesSize = -1;

public long funcCodeSize = -1;

@AsBytes
public List<List<String>> codes;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ public class Function implements Serializable, Iterable<Block> {
public String binaryName = StringResources.STR_EMPTY;

public long numBlocks;

public long codeSize;

@AsBytes
public Set<Long> blockIds;
Expand Down

0 comments on commit 6c125d6

Please sign in to comment.