From 6c125d605c051d624f0caaedf19549b4e8c9f7fd Mon Sep 17 00:00:00 2001 From: steven Date: Thu, 8 Nov 2018 00:38:10 -0500 Subject: [PATCH] added additional metadata. Signed-off-by: steven --- .../sis/dmas/kam1n0/app/adata/FunctionDataUnit.java | 6 ++++-- .../app/clone/adata/FunctionCloneEntryForWeb.java | 3 +++ .../kam1n0/problem/clone/FunctionCloneEntry.java | 7 +++++-- .../problem/clone/detector/kam/index/ALSH.java | 2 +- .../detector/kam/utils/SubgraphBlocksImpl3.java | 9 +++------ .../framework/disassembly/BinarySurrogate.java | 13 +++++++------ .../sis/dmas/kam1n0/framework/storage/Block.java | 2 ++ .../sis/dmas/kam1n0/framework/storage/Function.java | 2 ++ 8 files changed, 27 insertions(+), 17 deletions(-) diff --git a/kam1n0/kam1n0-apps/src/main/java/ca/mcgill/sis/dmas/kam1n0/app/adata/FunctionDataUnit.java b/kam1n0/kam1n0-apps/src/main/java/ca/mcgill/sis/dmas/kam1n0/app/adata/FunctionDataUnit.java index 3a8bc375c..e7e0f0072 100644 --- a/kam1n0/kam1n0-apps/src/main/java/ca/mcgill/sis/dmas/kam1n0/app/adata/FunctionDataUnit.java +++ b/kam1n0/kam1n0-apps/src/main/java/ca/mcgill/sis/dmas/kam1n0/app/adata/FunctionDataUnit.java @@ -37,11 +37,12 @@ public class FunctionDataUnit implements Serializable { public String functionName; public String functionId; public String startAddress; - public long blockSize; + public int blockSize; public SrcFunction srcFunc; public ArrayList nodes = new ArrayList<>(); public ArrayList links = new ArrayList<>(); + public int codeSize; public static class Link implements Serializable { private static final long serialVersionUID = -3012737921524201819L; @@ -152,7 +153,8 @@ public FunctionDataUnit(Function function, AsmLineNormalizer normalizer, boolean this.functionId = Long.toString(function.functionId); this.functionName = function.functionName; this.startAddress = Long.toString(function.startingAddress); - this.blockSize = function.numBlocks; + this.blockSize = (int) function.numBlocks; + this.codeSize = (int) function.codeSize; } } diff --git a/kam1n0/kam1n0-apps/src/main/java/ca/mcgill/sis/dmas/kam1n0/app/clone/adata/FunctionCloneEntryForWeb.java b/kam1n0/kam1n0-apps/src/main/java/ca/mcgill/sis/dmas/kam1n0/app/clone/adata/FunctionCloneEntryForWeb.java index 78c83e431..2fd906891 100644 --- a/kam1n0/kam1n0-apps/src/main/java/ca/mcgill/sis/dmas/kam1n0/app/clone/adata/FunctionCloneEntryForWeb.java +++ b/kam1n0/kam1n0-apps/src/main/java/ca/mcgill/sis/dmas/kam1n0/app/clone/adata/FunctionCloneEntryForWeb.java @@ -32,6 +32,7 @@ public class FunctionCloneEntryForWeb implements Serializable { public String binaryId; public String binaryName = StringResources.STR_EMPTY; public int numBbs = 0; + public int codeSize = 0; public double similarity = -1; public FunctionDataUnit actualFunc = null; @@ -57,6 +58,7 @@ public FunctionCloneEntryForWeb(FunctionCloneEntry entry) { this.binaryName = entry.binaryName; this.similarity = entry.similarity; this.numBbs = (int) entry.numBbs; + this.codeSize = (int) entry.codeSize; clonedParts = entry.clonedParts.stream() // .map(set -> set// @@ -77,6 +79,7 @@ public FunctionCloneEntryForWeb(FunctionDataUnit func, double similarity, int nu this.binaryName = func.binaryName; this.numBbs = numbbs; + this.codeSize = func.codeSize; } diff --git a/kam1n0/kam1n0-clone/src/main/java/ca/mcgill/sis/dmas/kam1n0/problem/clone/FunctionCloneEntry.java b/kam1n0/kam1n0-clone/src/main/java/ca/mcgill/sis/dmas/kam1n0/problem/clone/FunctionCloneEntry.java index 46ad46e8b..fbb4eb3a5 100644 --- a/kam1n0/kam1n0-clone/src/main/java/ca/mcgill/sis/dmas/kam1n0/problem/clone/FunctionCloneEntry.java +++ b/kam1n0/kam1n0-clone/src/main/java/ca/mcgill/sis/dmas/kam1n0/problem/clone/FunctionCloneEntry.java @@ -38,6 +38,7 @@ public class FunctionCloneEntry implements Serializable, Comparable> Tuple2>, JavaRDD objs, int topK, Function, List> filter) { List> hid_tbid_l = this.index_bucket.collectHids(rid, objs, this::hash); HashSet hids = hid_tbid_l.stream().map(tp -> tp._1).collect(Collectors.toCollection(HashSet::new)); - logger.info("hids {}", hids.size()); + // logger.info("hids {}", hids.size()); // hid->info JavaRDD> hid_info = this.index_deduplication.getVecEntryInfoAsRDD(rid, hids, false, filter);// .cache(); return new Tuple2<>(hid_tbid_l, hid_info); diff --git a/kam1n0/kam1n0-clone/src/main/java/ca/mcgill/sis/dmas/kam1n0/problem/clone/detector/kam/utils/SubgraphBlocksImpl3.java b/kam1n0/kam1n0-clone/src/main/java/ca/mcgill/sis/dmas/kam1n0/problem/clone/detector/kam/utils/SubgraphBlocksImpl3.java index e66d6245d..1825950a0 100644 --- a/kam1n0/kam1n0-clone/src/main/java/ca/mcgill/sis/dmas/kam1n0/problem/clone/detector/kam/utils/SubgraphBlocksImpl3.java +++ b/kam1n0/kam1n0-clone/src/main/java/ca/mcgill/sis/dmas/kam1n0/problem/clone/detector/kam/utils/SubgraphBlocksImpl3.java @@ -303,12 +303,8 @@ public static FunctionCloneEntry mergeSingles2(int funcLength, Iterable Double.compare(g1.score, g2.score)); ArrayList picks = new ArrayList<>(); @@ -349,7 +345,8 @@ public static FunctionCloneEntry mergeSingles2(int funcLength, Iterable v == null ? lk.score * lk.tar.codesSize : Math.max(lk.score * lk.tar.codesSize, v))); // entry.similarity = picks.stream().mapToDouble(g -> g.score).sum() * 1.0 / // (Math.abs(funcLength)); - entry.similarity = hashMap.values().stream().mapToDouble(v -> v).sum() / Math.abs(funcLength); + double sum = hashMap.values().stream().mapToDouble(v -> v).sum() * 2; + entry.similarity = sum / (Math.abs(funcLength) + Math.abs(entry.codeSize)); return entry; } diff --git a/kam1n0/kam1n0-commons/src/main/java/ca/mcgill/sis/dmas/kam1n0/framework/disassembly/BinarySurrogate.java b/kam1n0/kam1n0-commons/src/main/java/ca/mcgill/sis/dmas/kam1n0/framework/disassembly/BinarySurrogate.java index 3221c3f52..5e85b2154 100644 --- a/kam1n0/kam1n0-commons/src/main/java/ca/mcgill/sis/dmas/kam1n0/framework/disassembly/BinarySurrogate.java +++ b/kam1n0/kam1n0-commons/src/main/java/ca/mcgill/sis/dmas/kam1n0/framework/disassembly/BinarySurrogate.java @@ -125,6 +125,7 @@ public Function toFunction(FunctionSurrogate func) { ofunc.srcName = func.srcName; ofunc.blocks = new ArrayList<>(); ofunc.architecture = architecture; + ofunc.codeSize = func.blocks.stream().mapToLong(blk -> blk.asmLines().size()).sum(); func.blocks.forEach(blk -> ofunc.blockIds.add(blk.id)); func.blocks.forEach(blk -> { Block oblk = new Block(); @@ -135,6 +136,7 @@ public Function toFunction(FunctionSurrogate func) { oblk.callingBlocks = blk.call; oblk.codes = new ArrayList<>(blk.asmLines()); oblk.codesSize = oblk.codes.size(); + oblk.funcCodeSize = ofunc.codeSize; oblk.functionId = func.id; oblk.functionName = func.name; oblk.peerSize = func.blocks.size(); @@ -144,13 +146,12 @@ public Function toFunction(FunctionSurrogate func) { oblk.architecture = architecture; ofunc.blocks.add(oblk); }); + ofunc.numBlocks = ofunc.blocks.size(); - ofunc.comments = func.comments.stream() - .map(cmm -> { - String content = cmm.comment.replaceAll("(\r\n|\n)", "\n\n"); - return new Comment(func.id, content, cmm.type, new Date().getTime(), "user_ida", cmm.offset); - }) - .collect(Collectors.toList()); + ofunc.comments = func.comments.stream().map(cmm -> { + String content = cmm.comment.replaceAll("(\r\n|\n)", "\n\n"); + return new Comment(func.id, content, cmm.type, new Date().getTime(), "user_ida", cmm.offset); + }).collect(Collectors.toList()); return ofunc; } diff --git a/kam1n0/kam1n0-commons/src/main/java/ca/mcgill/sis/dmas/kam1n0/framework/storage/Block.java b/kam1n0/kam1n0-commons/src/main/java/ca/mcgill/sis/dmas/kam1n0/framework/storage/Block.java index 70031d477..de4a4ed5b 100644 --- a/kam1n0/kam1n0-commons/src/main/java/ca/mcgill/sis/dmas/kam1n0/framework/storage/Block.java +++ b/kam1n0/kam1n0-commons/src/main/java/ca/mcgill/sis/dmas/kam1n0/framework/storage/Block.java @@ -63,6 +63,8 @@ public class Block implements AsmFragment, Serializable { public String blockName = StringResources.STR_EMPTY; public long codesSize = -1; + + public long funcCodeSize = -1; @AsBytes public List> codes; diff --git a/kam1n0/kam1n0-commons/src/main/java/ca/mcgill/sis/dmas/kam1n0/framework/storage/Function.java b/kam1n0/kam1n0-commons/src/main/java/ca/mcgill/sis/dmas/kam1n0/framework/storage/Function.java index ea83e22cc..55a32b516 100644 --- a/kam1n0/kam1n0-commons/src/main/java/ca/mcgill/sis/dmas/kam1n0/framework/storage/Function.java +++ b/kam1n0/kam1n0-commons/src/main/java/ca/mcgill/sis/dmas/kam1n0/framework/storage/Function.java @@ -55,6 +55,8 @@ public class Function implements Serializable, Iterable { public String binaryName = StringResources.STR_EMPTY; public long numBlocks; + + public long codeSize; @AsBytes public Set blockIds;