Skip to content

Commit

Permalink
Features added referring to #15
Browse files Browse the repository at this point in the history
  • Loading branch information
greninja committed Jan 2, 2017
1 parent 1c793b5 commit 0f28b18
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 2 deletions.
3 changes: 3 additions & 0 deletions conf/cuckooml.conf
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,6 @@ compare_new_samples = true

# Set folder for samples to be compared against clustering
test_directory = sample_data/test

# Do plotting?
plotting = true
27 changes: 25 additions & 2 deletions modules/processing/cuckooml.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,18 @@
from lib.cuckoo.common.constants import CUCKOO_ROOT
from math import log

if Config("cuckooml").cuckooml.plotting:
try:
import matplotlib.pyplot as plt
import seaborn as sns
except ImportError, e:
print >> sys.stderr, "Some error while importing"
print >> sys.stderr, e


try:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from hdbscan import HDBSCAN
from sklearn import metrics
from sklearn.cluster import DBSCAN
Expand Down Expand Up @@ -797,6 +804,14 @@ def filter_dataset(self, dataset=None, feature_coverage=0.1,

def detect_abnormal_behaviour(self, count_dataset=None, figures=True):
"""Detect samples that behave significantly different than others."""

# Safety check for plotting
if not Config("cuckooml").cuckooml.plotting and figures:
print >> sys.stderr, "Warning: 'plotting' and 'figures' do not match. \
Plotting modules might not be imported."
figures = False


if count_dataset is None:
# Pull all count features
count_features = self.feature_category(":count:")
Expand Down Expand Up @@ -1133,6 +1148,14 @@ def performance_metric(clustering, labels, data, noise):

def clustering_label_distribution(self, clustering, labels, plot=False):
"""Get statistics about number of ground truth labels per cluster."""

# Safety check for plotting
if not Config("cuckooml").cuckooml.plotting and plot:
print >> sys.stderr, "Warning: 'plotting' and 'plot' do not match.\
Plotting modules might not be imported."
plot = False


cluster_ids = set(clustering["label"].tolist())
labels_ids = set(labels["label"].tolist())
cluster_distribution = {}
Expand Down

0 comments on commit 0f28b18

Please sign in to comment.