Skip to content

Commit

Permalink
Features added referring to #15
Browse files Browse the repository at this point in the history
  • Loading branch information
greninja committed Jan 15, 2017
1 parent 1c793b5 commit 709f52f
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 2 deletions.
3 changes: 3 additions & 0 deletions conf/cuckooml.conf
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,6 @@ compare_new_samples = true

# Set folder for samples to be compared against clustering
test_directory = sample_data/test

# Do plotting?
plotting = true
28 changes: 26 additions & 2 deletions modules/processing/cuckooml.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,19 @@
from lib.cuckoo.common.constants import CUCKOO_ROOT
from math import log

if Config("cuckooml").cuckooml.plotting:
try:
import matplotlib.pyplot as plt
import seaborn as sns
except ImportError, e:
print >> sys.stderr, "Plotting libraries \
(matplotlib and seaborn) are not available."
print >> sys.stderr, e


try:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from hdbscan import HDBSCAN
from sklearn import metrics
from sklearn.cluster import DBSCAN
Expand Down Expand Up @@ -797,6 +805,14 @@ def filter_dataset(self, dataset=None, feature_coverage=0.1,

def detect_abnormal_behaviour(self, count_dataset=None, figures=True):
"""Detect samples that behave significantly different than others."""

# Safety check for plotting
if not Config("cuckooml").cuckooml.plotting and figures:
print >> sys.stderr, "Warning:'plotting' flag disabled in config file, \
'figures' flag will be overwritten."
figures = False


if count_dataset is None:
# Pull all count features
count_features = self.feature_category(":count:")
Expand Down Expand Up @@ -1133,6 +1149,14 @@ def performance_metric(clustering, labels, data, noise):

def clustering_label_distribution(self, clustering, labels, plot=False):
"""Get statistics about number of ground truth labels per cluster."""

# Safety check for plotting
if not Config("cuckooml").cuckooml.plotting and plot:
print >> sys.stderr, "Warning:'plotting' flag disabled in config file, \
'plot' flag will be overwritten."
plot = False


cluster_ids = set(clustering["label"].tolist())
labels_ids = set(labels["label"].tolist())
cluster_distribution = {}
Expand Down

0 comments on commit 709f52f

Please sign in to comment.