Skip to content

Commit

Permalink
Features added referring to #15
Browse files Browse the repository at this point in the history
  • Loading branch information
greninja committed Feb 18, 2017
1 parent 1c793b5 commit e840ae2
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 2 deletions.
Binary file added conf/.cuckoo.conf.swp
Binary file not shown.
3 changes: 3 additions & 0 deletions conf/cuckooml.conf
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,6 @@ compare_new_samples = true

# Set folder for samples to be compared against clustering
test_directory = sample_data/test

# Enable plotting functionality
plotting = true
38 changes: 36 additions & 2 deletions modules/processing/cuckooml.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,23 @@
from lib.cuckoo.common.constants import CUCKOO_ROOT
from math import log

global imported
imported = True

if Config("cuckooml").cuckooml.plotting:
try:
import matplotlib.pyplot as plt
import seaborn as sns
except ImportError, e:
print >> sys.stderr, "Plotting libraries \
(matplotlib and seaborn) are not available."
print >> sys.stderr, e
imported = False


try:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from hdbscan import HDBSCAN
from sklearn import metrics
from sklearn.cluster import DBSCAN
Expand Down Expand Up @@ -797,6 +809,17 @@ def filter_dataset(self, dataset=None, feature_coverage=0.1,

def detect_abnormal_behaviour(self, count_dataset=None, figures=True):
"""Detect samples that behave significantly different than others."""

# Safety check for plotting
if not imported:
figures = False
else:
if not Config("cuckooml").cuckooml.plotting and figures:
print >> sys.stderr, "Warning:'plotting' flag disabled in conf/cuckooml.conf, \
'figures' flag will be overwritten."
figures = False


if count_dataset is None:
# Pull all count features
count_features = self.feature_category(":count:")
Expand Down Expand Up @@ -1133,6 +1156,17 @@ def performance_metric(clustering, labels, data, noise):

def clustering_label_distribution(self, clustering, labels, plot=False):
"""Get statistics about number of ground truth labels per cluster."""

# Safety check for plotting
if not imported:
plot = False
else:
if not Config("cuckooml").cuckooml.plotting and plot:
print >> sys.stderr, "Warning:'plotting' flag disabled in conf/cuckooml.conf, \
'plot' flag will be overwritten."
plot = False


cluster_ids = set(clustering["label"].tolist())
labels_ids = set(labels["label"].tolist())
cluster_distribution = {}
Expand Down

0 comments on commit e840ae2

Please sign in to comment.