From f7dc8492642ccda48ad4b9bed30d1df0ffb0ae51 Mon Sep 17 00:00:00 2001 From: greninja Date: Wed, 28 Dec 2016 21:31:10 +0530 Subject: [PATCH] Features added referring to #15 --- conf/cuckooml.conf | 3 +++ modules/processing/cuckooml.py | 28 ++++++++++++++++++++++++++-- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/conf/cuckooml.conf b/conf/cuckooml.conf index 71d65e46..c67c887a 100644 --- a/conf/cuckooml.conf +++ b/conf/cuckooml.conf @@ -38,3 +38,6 @@ compare_new_samples = true # Set folder for samples to be compared against clustering test_directory = sample_data/test + +# Do plotting? +plotting = true diff --git a/modules/processing/cuckooml.py b/modules/processing/cuckooml.py index 14912c24..d466aa67 100644 --- a/modules/processing/cuckooml.py +++ b/modules/processing/cuckooml.py @@ -15,11 +15,19 @@ from lib.cuckoo.common.constants import CUCKOO_ROOT from math import log +if Config("cuckooml").cuckooml.plotting: + try: + import matplotlib.pyplot as plt + import seaborn as sns + except ImportError, e: + print >> sys.stderr, "Plotting libraries \ + (matplotlib and seaborn) are not available." + print >> sys.stderr, e + + try: - import matplotlib.pyplot as plt import numpy as np import pandas as pd - import seaborn as sns from hdbscan import HDBSCAN from sklearn import metrics from sklearn.cluster import DBSCAN @@ -797,6 +805,14 @@ def filter_dataset(self, dataset=None, feature_coverage=0.1, def detect_abnormal_behaviour(self, count_dataset=None, figures=True): """Detect samples that behave significantly different than others.""" + + # Safety check for plotting + if not Config("cuckooml").cuckooml.plotting and figures: + print >> sys.stderr, "Warning:'plotting' flag disabled in conf/cuckooml.conf, \ + 'figures' flag will be overwritten." + figures = False + + if count_dataset is None: # Pull all count features count_features = self.feature_category(":count:") @@ -1133,6 +1149,14 @@ def performance_metric(clustering, labels, data, noise): def clustering_label_distribution(self, clustering, labels, plot=False): """Get statistics about number of ground truth labels per cluster.""" + + # Safety check for plotting + if not Config("cuckooml").cuckooml.plotting and plot: + print >> sys.stderr, "Warning:'plotting' flag disabled in conf/cuckooml.conf, \ + 'plot' flag will be overwritten." + plot = False + + cluster_ids = set(clustering["label"].tolist()) labels_ids = set(labels["label"].tolist()) cluster_distribution = {}