From 9b47606b446a4d13249b135ee7ed45e4e3e11bee Mon Sep 17 00:00:00 2001
From: Marc Weitz <marc.weitz@onlinehome.de>
Date: Thu, 1 Mar 2018 12:38:33 +0100
Subject: [PATCH] fixes pylint errors and improves code quality (#146)

* uses pycodestyle since pep8 is depreciated

* fixes some pylint errors

* improves code quality
---
 pyndl/__init__.py        |  4 +--
 pyndl/activation.py      | 29 +++++++++-------
 pyndl/corpus.py          |  2 ++
 pyndl/count.py           |  1 -
 pyndl/ndl.py             | 15 ++++----
 pyndl/preprocess.py      | 70 ++++++++++++++++++++-----------------
 tests/conftest.py        |  4 ++-
 tests/test_activation.py | 22 ++++++------
 tests/test_count.py      |  2 +-
 tests/test_ndl.py        | 66 ++++++++++++++++-------------------
 tests/test_preprocess.py | 75 ++++++++++++++--------------------------
 tests/test_pyndl.py      |  5 ++-
 tox.ini                  |  7 ++--
 13 files changed, 144 insertions(+), 158 deletions(-)

diff --git a/pyndl/__init__.py b/pyndl/__init__.py
index 521043e..a7afcf4 100644
--- a/pyndl/__init__.py
+++ b/pyndl/__init__.py
@@ -61,10 +61,10 @@ def sysinfo():
               "CPU: {cpu_count}\n").format(s=uname, cpu_count=mp.cpu_count())
 
     if uname.sysname == "Linux":
-        names, *lines = os.popen("free -m").readlines()
+        _, *lines = os.popen("free -m").readlines()
         for identifier in ["Mem:", "Swap:"]:
             memory = [line for line in lines if identifier in line][0]
-            ix, total, used, *rest = memory.split()
+            _, total, used, *_ = memory.split()
             osinfo += "{} {}MiB/{}MiB\n".format(identifier, used, total)
 
     osinfo += "\n"
diff --git a/pyndl/activation.py b/pyndl/activation.py
index 8c27c75..a1844ac 100644
--- a/pyndl/activation.py
+++ b/pyndl/activation.py
@@ -16,6 +16,7 @@
 from . import ndl
 
 
+# pylint: disable=W0621
 def activation(events, weights, number_of_threads=1, remove_duplicates=None, ignore_missing_cues=False):
     """
     Estimate activations for given events in event file and outcome-cue weights.
@@ -60,18 +61,17 @@ def activation(events, weights, number_of_threads=1, remove_duplicates=None, ign
     if isinstance(events, str):
         events = ndl.events_from_file(events)
 
-    event_cues_list = (cues for cues, outcomes in events)
+    events = (cues for cues, outcomes in events)
     if remove_duplicates is None:
-        def enforce_no_duplicates(cues):
+        def check_no_duplicates(cues):
             if len(cues) != len(set(cues)):
-                raise ValueError('cues needs to be unique: "%s"; use '
-                                 'remove_duplicates=True' %
-                                 (' '.join(cues)))
+                raise ValueError('cues needs to be unique: "{}"; use '
+                                 'remove_duplicates=True'.format(' '.join(cues)))
             else:
                 return set(cues)
-        event_cues_list = (enforce_no_duplicates(cues) for cues in event_cues_list)
+        events = (check_no_duplicates(cues) for cues in events)
     elif remove_duplicates is True:
-        event_cues_list = (set(cues) for cues in event_cues_list)
+        events = (set(cues) for cues in events)
 
     if isinstance(weights, xr.DataArray):
         cues = weights.coords["cues"].values.tolist()
@@ -81,11 +81,13 @@ def enforce_no_duplicates(cues):
         cue_map = OrderedDict(((cue, ii) for ii, cue in enumerate(cues)))
         if ignore_missing_cues:
             event_cue_indices_list = (tuple(cue_map[cue] for cue in event_cues if cue in cues)
-                                      for event_cues in event_cues_list)
+                                      for event_cues in events)
         else:
             event_cue_indices_list = (tuple(cue_map[cue] for cue in event_cues)
-                                      for event_cues in event_cues_list)
-        activations = _activation_matrix(list(event_cue_indices_list), weights.values, number_of_threads)
+                                      for event_cues in events)
+        # pylint: disable=W0621
+        activations = _activation_matrix(list(event_cue_indices_list),
+                                         weights.values, number_of_threads)
         return xr.DataArray(activations,
                             coords={
                                 'outcomes': outcomes
@@ -93,11 +95,11 @@ def enforce_no_duplicates(cues):
                             dims=('outcomes', 'events'))
     elif isinstance(weights, dict):
         assert number_of_threads == 1, "Estimating activations with multiprocessing is not implemented for dicts."
-        activations = defaultdict(lambda: np.zeros(len(event_cues_list)))
-        event_cues_list = list(event_cues_list)
+        activations = defaultdict(lambda: np.zeros(len(events)))
+        events = list(events)
         for outcome, cue_dict in weights.items():
             _activations = activations[outcome]
-            for row, cues in enumerate(event_cues_list):
+            for row, cues in enumerate(events):
                 for cue in cues:
                     _activations[row] += cue_dict[cue]
         return activations
@@ -111,6 +113,7 @@ def _init_mp_activation_matrix(weights_, weights_shape_, activations_, activatio
     Initializes shared variables weights and activations.
 
     """
+    # pylint: disable=C0103, W0621, W0601
     global weights, activations
     weights = np.ctypeslib.as_array(weights_)
     weights.shape = weights_shape_
diff --git a/pyndl/corpus.py b/pyndl/corpus.py
index 0ad08fb..2ba3510 100644
--- a/pyndl/corpus.py
+++ b/pyndl/corpus.py
@@ -100,6 +100,8 @@ def read_clean_gzfile(gz_file_path, *, break_duration=2.0):
 
 
 class JobParseGz():
+    # pylint: disable=E0202,missing-docstring
+
     """
     Stores the persistent information over several jobs and exposes a job
     method that only takes the varying parts as one argument.
diff --git a/pyndl/count.py b/pyndl/count.py
index f9177ad..58b21d0 100644
--- a/pyndl/count.py
+++ b/pyndl/count.py
@@ -14,7 +14,6 @@
 import gzip
 import itertools
 import multiprocessing
-import os
 import sys
 
 
diff --git a/pyndl/ndl.py b/pyndl/ndl.py
index 841ee09..79139cc 100644
--- a/pyndl/ndl.py
+++ b/pyndl/ndl.py
@@ -203,7 +203,7 @@ def worker():
                 for partlist in part_lists:
                     working_queue.put(np.array(partlist, dtype=np.uint32))
 
-            for thread_id in range(number_of_threads):
+            for _ in range(number_of_threads):
                 thread = threading.Thread(target=worker)
                 thread.start()
                 threads.append(thread)
@@ -248,7 +248,7 @@ def _attributes(event_path, number_events, alpha, betas, lambda_, cpu_time,
     def _format(value):
         return '{0: <{width}}'.format(value, width=width)
 
-    if not type(alpha) in (float, int):
+    if not isinstance(alpha, (float, int)):
         alpha = 'varying'
 
     new_attrs = {'date': _format(time.strftime("%Y-%m-%d %H:%M:%S")),
@@ -278,7 +278,7 @@ def _format(value):
             if key in new_attrs:
                 new_val = new_attrs[key]
             else:
-                new_val = format_('')
+                new_val = ''
             new_attrs[key] = old_val + ' | ' + new_val
     return new_attrs
 
@@ -295,9 +295,12 @@ class WeightDict(defaultdict):
 
     """
 
+    # pylint: disable=W0613
     def __init__(self, *args, **kwargs):
         super().__init__(lambda: defaultdict(float))
 
+        self._attrs = OrderedDict()
+
         if 'attrs' in kwargs:
             self.attrs = kwargs['attrs']
         else:
@@ -391,9 +394,9 @@ def dict_ndl(events, alphas, betas, lambda_=1.0, *,
         attrs_to_update = weights_ini.attrs
         coords = weights_ini.coords
         weights = WeightDict()
-        for oi, outcome in enumerate(coords['outcomes'].values):
-            for ci, cue in enumerate(coords['cues'].values):
-                weights[outcome][cue] = weights_ini.item((oi, ci))
+        for outcome_index, outcome in enumerate(coords['outcomes'].values):
+            for cue_index, cue in enumerate(coords['cues'].values):
+                weights[outcome][cue] = weights_ini.item((outcome_index, cue_index))
     elif not isinstance(weights, defaultdict):
         raise ValueError('weights needs to be either defaultdict or None')
 
diff --git a/pyndl/preprocess.py b/pyndl/preprocess.py
index df0f70c..2d48022 100644
--- a/pyndl/preprocess.py
+++ b/pyndl/preprocess.py
@@ -73,6 +73,38 @@ def bandsample(population, sample_size=50000, *, cutoff=5, seed=None,
     return sample
 
 
+def ngrams_to_word(occurrences, n_chars, outfile, remove_duplicates=True):
+    """
+    Process the occurrences and write them to outfile.
+
+    Parameters
+    ----------
+    occurrences : sequence of (cues, outcomes) tuples
+        cues and outcomes are both strings where underscores and # are
+        special symbols.
+    n_chars : number of characters (e.g. 2 for bigrams, 3 for trigrams, ...)
+    outfile : file handle
+
+    remove_duplicates : bool
+        if True make cues and outcomes per event unique
+
+    """
+    for cues, outcomes in occurrences:
+        if cues and outcomes:
+            occurrence = cues + '_' + outcomes
+        else:  # take either
+            occurrence = cues + outcomes
+        phrase_string = "#" + re.sub("_", "#", occurrence) + "#"
+        ngrams = (phrase_string[i:(i + n_chars)] for i in
+                  range(len(phrase_string) - n_chars + 1))
+        if not ngrams or not occurrence:
+            continue
+        if remove_duplicates:
+            outfile.write("{}\t{}\n".format("_".join(set(ngrams)), occurrence))
+        else:
+            outfile.write("{}\t{}\n".format("_".join(ngrams), occurrence))
+
+
 def process_occurrences(occurrences, outfile, *,
                         cue_structure="trigrams_to_word", remove_duplicates=True):
     """
@@ -92,43 +124,17 @@ def process_occurrences(occurrences, outfile, *,
 
     """
     if cue_structure == "bigrams_to_word":
-        for cues, outcomes in occurrences:
-            if cues and outcomes:
-                occurrence = cues + '_' + outcomes
-            else:  # take either
-                occurrence = cues + outcomes
-            phrase_string = "#" + re.sub("_", "#", occurrence) + "#"
-            bigrams = (phrase_string[i:(i + 2)] for i in
-                       range(len(phrase_string) - 2 + 1))
-            if not bigrams or not occurrence:
-                continue
-            if remove_duplicates:
-                outfile.write("_".join(set(bigrams)) + "\t" + occurrence + "\n")
-            else:
-                outfile.write("_".join(bigrams) + "\t" + occurrence + "\n")
+        ngrams_to_word(occurrences, 2, outfile, remove_duplicates=remove_duplicates)
     elif cue_structure == "trigrams_to_word":
-        for cues, outcomes in occurrences:
-            if cues and outcomes:
-                occurrence = cues + '_' + outcomes
-            else:  # take either
-                occurrence = cues + outcomes
-            phrase_string = "#" + re.sub("_", "#", occurrence) + "#"
-            trigrams = (phrase_string[i:(i + 3)] for i in
-                        range(len(phrase_string) - 3 + 1))
-            if not trigrams or not occurrence:
-                continue
-            if remove_duplicates:
-                outfile.write("_".join(set(trigrams)) + "\t" + occurrence + "\n")
-            else:
-                outfile.write("_".join(trigrams) + "\t" + occurrence + "\n")
+        ngrams_to_word(occurrences, 3, outfile, remove_duplicates=remove_duplicates)
     elif cue_structure == "word_to_word":
         for cues, outcomes in occurrences:
             if not cues:
                 continue
             if remove_duplicates:
-                outfile.write("_".join(set(cues.split("_"))) + "\t" + outcomes + "\n")
+                outfile.write("{}\t{}\n".format("_".join(set(cues.split("_"))), outcomes))
             else:
-                outfile.write(cues + "\t" + outcomes + "\n")
+                outfile.write("{}\t{}\n".format(cues, outcomes))
     else:
         raise NotImplementedError('cue_structure=%s is not implemented yet.' % cue_structure)
 
@@ -761,9 +767,9 @@ def create_binary_event_files(event_file,
 
         def _error_callback(error):
             if isinstance(error, StopIteration):
-                msg, result = error.value
+                _, result = error.value
                 nonlocal number_events
-                number_events += result
+                number_events += result  # pylint: disable=undefined-variable
                 pool.close()
             else:
                 raise error
diff --git a/tests/conftest.py b/tests/conftest.py
index 289c0e2..cbabdc9 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,4 +1,3 @@
-
 '''
 Configuration for py.test-3.
 
@@ -6,5 +5,8 @@
 
 
 def pytest_addoption(parser):
+    """
+    adds custom option to the pytest parser
+    """
     parser.addoption("--runslow", action="store_true",
                      help="run slow tests")
diff --git a/tests/test_activation.py b/tests/test_activation.py
index 416fe37..b99b363 100644
--- a/tests/test_activation.py
+++ b/tests/test_activation.py
@@ -15,7 +15,7 @@
 from pyndl import ndl
 from pyndl.activation import activation
 
-slow = pytest.mark.skipif(not pytest.config.getoption("--runslow"),
+slow = pytest.mark.skipif(not pytest.config.getoption("--runslow"),  # pylint: disable=invalid-name
                           reason="need --runslow option to run")
 
 TEST_ROOT = os.path.join(os.path.pardir, os.path.dirname(__file__))
@@ -29,8 +29,8 @@
 
 def test_exceptions():
     with pytest.raises(ValueError) as e_info:
-        wm = ndl.dict_ndl(FILE_PATH_SIMPLE, ALPHA, BETAS, remove_duplicates=None)
-        activation(FILE_PATH_MULTIPLE_CUES, wm)
+        weights = ndl.dict_ndl(FILE_PATH_SIMPLE, ALPHA, BETAS, remove_duplicates=None)
+        activation(FILE_PATH_MULTIPLE_CUES, weights)
         assert e_info == 'cues or outcomes needs to be unique: cues "a a"; outcomes "A"; use remove_duplicates=True'
 
     with pytest.raises(ValueError) as e_info:
@@ -149,22 +149,22 @@ def test_activation_matrix_large():
     print("")
     print("Start setup...")
 
-    def time_test(func, of=""):
+    def time_test(func, of=""):  # pylint: disable=invalid-name
         def dec_func(*args, **kwargs):
             print("start test '{}'".format(of))
-            st = time.clock()
+            start = time.clock()
             res = func(*args, **kwargs)
-            et = time.clock()
+            end = time.clock()
             print("finished test '{}'".format(of))
-            print("  duration: {:.3f}s".format(et-st))
+            print("  duration: {:.3f}s".format(end - start))
             print("")
             return res
         return dec_func
 
-    n = 2000
-    n_cues = 10*n
-    n_outcomes = n
-    n_events = 10*n
+    nn = 2000
+    n_cues = 10*nn
+    n_outcomes = nn
+    n_events = 10*nn
     n_cues_per_event = 30
     weight_mat = np.random.rand(n_cues, n_outcomes)
     cues = ['c'+str(i) for i in range(n_cues)]
diff --git a/tests/test_count.py b/tests/test_count.py
index dcfe67a..173a952 100644
--- a/tests/test_count.py
+++ b/tests/test_count.py
@@ -33,7 +33,7 @@ def test_words_symbols():
 
 def test_save_load():
     file_name = os.path.join(TEST_ROOT, "temp/cues.tab")
-    n_events, cues, outcomes = count.cues_outcomes(EVENT_RESOURCE_FILE)
+    _, cues, _ = count.cues_outcomes(EVENT_RESOURCE_FILE)
     count.save_counter(cues, file_name)
     cues_loaded = count.load_counter(file_name)
     assert cues == cues_loaded
diff --git a/tests/test_ndl.py b/tests/test_ndl.py
index 86e43d1..ca9a98a 100644
--- a/tests/test_ndl.py
+++ b/tests/test_ndl.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 
-# pylint: disable=C0111
+# pylint: disable=C0111, redefined-outer-name
+
 
 from collections import defaultdict, OrderedDict
 import os
@@ -8,15 +9,14 @@
 import tempfile
 import copy
 
-
-import pytest
 import numpy as np
 import xarray as xr
 import pandas as pd
+import pytest
 
 from pyndl import ndl, count
 
-slow = pytest.mark.skipif(not pytest.config.getoption("--runslow"),
+slow = pytest.mark.skipif(not pytest.config.getoption("--runslow"),  # pylint: disable=invalid-name
                           reason="need --runslow option to run")
 
 TEST_ROOT = os.path.join(os.path.pardir, os.path.dirname(__file__))
@@ -66,8 +66,8 @@ def result_continue_learning():
     part_1 = events_simple.head(CONTINUE_SPLIT_POINT)
     part_2 = events_simple.tail(len(events_simple) - CONTINUE_SPLIT_POINT)
 
-    assert len(part_1) > 0
-    assert len(part_2) > 0
+    assert len(part_1) > 0  # pylint: disable=len-as-condition
+    assert len(part_2) > 0  # pylint: disable=len-as-condition
 
     part_path_1 = os.path.join(TMP_PATH, "event_file_simple_1.tab.gz")
     part_path_2 = os.path.join(TMP_PATH, "event_file_simple_2.tab.gz")
@@ -81,11 +81,8 @@ def result_continue_learning():
 
     del events_simple, part_1, part_2
 
-    result_part = ndl.ndl(part_path_1,
-                          ALPHA, BETAS)
-
-    result = ndl.ndl(part_path_2, ALPHA, BETAS,
-                     weights=result_part)
+    result_part = ndl.ndl(part_path_1, ALPHA, BETAS)
+    result = ndl.ndl(part_path_2, ALPHA, BETAS, weights=result_part)
 
     return result
 
@@ -136,8 +133,8 @@ def test_continue_learning_dict():
     part_1 = events_simple.head(CONTINUE_SPLIT_POINT)
     part_2 = events_simple.tail(len(events_simple) - CONTINUE_SPLIT_POINT)
 
-    assert len(part_1) > 0
-    assert len(part_2) > 0
+    assert len(part_1) > 0  # pylint: disable=len-as-condition
+    assert len(part_2) > 0  # pylint: disable=len-as-condition
 
     part_path_1 = os.path.join(TMP_PATH, "event_file_simple_1.tab.gz")
     part_path_2 = os.path.join(TMP_PATH, "event_file_simple_2.tab.gz")
@@ -180,7 +177,7 @@ def test_continue_learning_dict_ndl_data_array(result_dict_ndl, result_dict_ndl_
                                             continue_from_data_array)
     print(continue_from_data_array)
     print('%.2f ratio unequal' % unequal_ratio)
-    assert len(unequal) == 0
+    assert len(unequal) == 0  # pylint: disable=len-as-condition
 
 
 def test_continue_learning(result_continue_learning, result_ndl_openmp):
@@ -194,7 +191,7 @@ def test_continue_learning(result_continue_learning, result_ndl_openmp):
                                             result_continue_learning,
                                             result_ndl_openmp)
     print('%.2f ratio unequal' % unequal_ratio)
-    assert len(unequal) == 0
+    assert len(unequal) == 0  # pylint: disable=len-as-condition
 
 
 def test_save_to_netcdf4(result_ndl_openmp):
@@ -234,14 +231,14 @@ def test_dict_ndl_vs_ndl_threading(result_dict_ndl, result_ndl_threading):
     unequal, unequal_ratio = compare_arrays(FILE_PATH_SIMPLE, result_dict_ndl,
                                             result_ndl_threading)
     print('%.2f ratio unequal' % unequal_ratio)
-    assert len(unequal) == 0
+    assert len(unequal) == 0  # pylint: disable=len-as-condition
 
 
 def test_dict_ndl_vs_dict_ndl_generator(result_dict_ndl, result_dict_ndl_generator):
     unequal, unequal_ratio = compare_arrays(FILE_PATH_SIMPLE, result_dict_ndl,
                                             result_dict_ndl_generator)
     print('%.2f ratio unequal' % unequal_ratio)
-    assert len(unequal) == 0
+    assert len(unequal) == 0  # pylint: disable=len-as-condition
 
 
 def test_dict_ndl_data_array_vs_ndl_threading(result_ndl_threading):
@@ -250,7 +247,7 @@ def test_dict_ndl_data_array_vs_ndl_threading(result_ndl_threading):
     unequal, unequal_ratio = compare_arrays(FILE_PATH_SIMPLE, result_dict_ndl,
                                             result_ndl_threading)
     print('%.2f ratio unequal' % unequal_ratio)
-    assert len(unequal) == 0
+    assert len(unequal) == 0  # pylint: disable=len-as-condition
 
 
 def test_ordering_of_temporary_event_files(result_dict_ndl):
@@ -270,7 +267,7 @@ def test_multiple_cues_dict_ndl_vs_ndl_threading():
     unequal, unequal_ratio = compare_arrays(FILE_PATH_MULTIPLE_CUES, result_dict_ndl,
                                             result_ndl_threading)
     print('%.2f ratio unequal' % unequal_ratio)
-    assert len(unequal) == 0
+    assert len(unequal) == 0  # pylint: disable=len-as-condition
 
 
 def test_dict_ndl_vs_ndl_openmp(result_dict_ndl, result_ndl_openmp):
@@ -278,7 +275,7 @@ def test_dict_ndl_vs_ndl_openmp(result_dict_ndl, result_ndl_openmp):
     unequal, unequal_ratio = compare_arrays(FILE_PATH_SIMPLE, result_dict_ndl,
                                             result_ndl_openmp)
     print('%.2f ratio unequal' % unequal_ratio)
-    assert len(unequal) == 0
+    assert len(unequal) == 0  # pylint: disable=len-as-condition
 
 
 def test_meta_data(result_dict_ndl, result_dict_ndl_data_array, result_ndl_openmp, result_ndl_threading):
@@ -327,7 +324,7 @@ def test_compare_weights_ndl2(result_dict_ndl):
     unequal, unequal_ratio = compare_arrays(FILE_PATH_SIMPLE, result_ndl2, result_dict_ndl)
     print(set(outcome for outcome, *_ in unequal))
     print('%.2f ratio unequal' % unequal_ratio)
-    assert len(unequal) == 0
+    assert len(unequal) == 0  # pylint: disable=len-as-condition
 
 
 def test_multiple_cues_dict_ndl_vs_ndl2():
@@ -362,7 +359,7 @@ def test_multiple_cues_dict_ndl_vs_ndl2():
     unequal, unequal_ratio = compare_arrays(FILE_PATH_MULTIPLE_CUES, result_ndl2, result_python)
     print(set(outcome for outcome, *_ in unequal))
     print('%.2f ratio unequal' % unequal_ratio)
-    assert len(unequal) == 0
+    assert len(unequal) == 0  # pylint: disable=len-as-condition
 
 
 def test_compare_weights_rescorla_vs_ndl2():
@@ -405,13 +402,12 @@ def test_compare_weights_rescorla_vs_ndl2():
 
     unequal, unequal_ratio = compare_arrays(FILE_PATH_SIMPLE, result_ndl2, result_rescorla)
     print('%.2f ratio unequal' % unequal_ratio)
-    assert len(unequal) == 0
+    assert len(unequal) == 0  # pylint: disable=len-as-condition
 
 
 @slow
 def test_compare_time_dict_inplace_parallel_thread():
     file_path = os.path.join(TEST_ROOT, 'resources/event_file_many_cues.tab.gz')
-    cue_map, outcome_map, all_outcomes = generate_mapping(file_path)
 
     result_dict_ndl, duration_not_parallel = clock(ndl.dict_ndl, (file_path, ALPHA, BETAS, LAMBDA_))
 
@@ -423,25 +419,25 @@ def test_compare_time_dict_inplace_parallel_thread():
 
     unequal, unequal_ratio = compare_arrays(file_path, result_thread_ndl, result_dict_ndl)
     print('%.2f ratio unequal' % unequal_ratio)
-    assert len(unequal) == 0
+    assert len(unequal) == 0  # pylint: disable=len-as-condition
 
     print('parallel: %.3e  dict: %.3e' % (duration_parallel, duration_not_parallel))
     assert duration_parallel < duration_not_parallel
 
 
 def test_slice_list():
-    l1 = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+    lst = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
 
-    res = ndl.slice_list(l1, 2)
+    res = ndl.slice_list(lst, 2)
     assert res == [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]]
 
-    res2 = ndl.slice_list(l1, 3)
+    res2 = ndl.slice_list(lst, 3)
     assert res2 == [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
 
 
-def clock(f, args, **kwargs):
+def clock(func, args, **kwargs):
     start = time.time()
-    result = f(*args, **kwargs)
+    result = func(*args, **kwargs)
     stop = time.time()
 
     duration = stop - start
@@ -450,11 +446,9 @@ def clock(f, args, **kwargs):
 
 
 def compare_arrays(file_path, arr1, arr2):
-    n_events, cues, outcomes = count.cues_outcomes(file_path)
-    cue_map, outcome_map, all_outcomes = generate_mapping(file_path)
+    _, cues, outcomes = count.cues_outcomes(file_path)
+    cue_map, outcome_map, _ = generate_mapping(file_path)
 
-    cue_indices = [cue_map[cue] for cue in cues]
-    outcome_indices = [outcome_map[outcome] for outcome in outcomes]
     unequal = list()
 
     for outcome in outcomes:
@@ -472,7 +466,7 @@ def compare_arrays(file_path, arr1, arr2):
                 else:
                     values.append(array[outcome][cue])
 
-            value1, value2 = values
+            value1, value2 = values  # pylint: disable=unbalanced-tuple-unpacking
             if not np.isclose(value1, value2, rtol=1e-02, atol=1e-05):
                 unequal.append((outcome, cue, value1, value2))
 
@@ -481,7 +475,7 @@ def compare_arrays(file_path, arr1, arr2):
 
 
 def generate_mapping(event_path):
-    n_events, cues, outcomes = count.cues_outcomes(event_path)
+    _, cues, outcomes = count.cues_outcomes(event_path)
     all_cues = list(cues.keys())
     all_outcomes = list(outcomes.keys())
     cue_map = OrderedDict(((cue, ii) for ii, cue in enumerate(all_cues)))
diff --git a/tests/test_preprocess.py b/tests/test_preprocess.py
index a4fce21..b4feefd 100644
--- a/tests/test_preprocess.py
+++ b/tests/test_preprocess.py
@@ -23,15 +23,15 @@
 
 def test_bandsample():
     resource_file = os.path.join(TEST_ROOT, "resources/event_file_trigrams_to_word.tab.gz")
-    n_events, cue_freq_map, outcome_freq_map = cues_outcomes(resource_file,
-                                                             number_of_processes=2)
+    _, _, outcome_freq_map = cues_outcomes(resource_file,
+                                           number_of_processes=2)
     outcome_freq_map_filtered = bandsample(outcome_freq_map, 50, cutoff=1, seed=None, verbose=False)
     assert len(outcome_freq_map_filtered) == 50
 
     reference_file = os.path.join(TEST_ROOT, 'reference/bandsampled_outcomes.tab')
     try:
         outcome_freq_map_filtered_reference = load_counter(reference_file)
-    except (FileNotFoundError):
+    except FileNotFoundError:
         temp_file = os.path.join(TEST_ROOT, 'temp/bandsampled_outcomes.tab')
         save_counter(outcome_freq_map_filtered, temp_file)
         raise
@@ -39,7 +39,7 @@ def test_bandsample():
     bandsample(outcome_freq_map, 50, cutoff=1, verbose=True)
 
 
-def test_create_event_file_bad_symbols():
+def test_bad_symbols():
     with pytest.raises(ValueError):
         create_event_file(RESOURCE_FILE, EVENT_FILE,
                           "abcd#")
@@ -50,21 +50,21 @@ def test_create_event_file_bad_symbols():
     assert not os.path.isfile(EVENT_FILE)
 
 
-def test_create_event_file_bad_event_context():
+def test_bad_event_context():
     with pytest.raises(NotImplementedError):
         create_event_file(RESOURCE_FILE, EVENT_FILE,
                           context_structure="UNREASONABLE")
     assert not os.path.isfile(EVENT_FILE)
 
 
-def test_create_event_file_bad_event_event():
+def test_bad_event_event():
     with pytest.raises(NotImplementedError):
         create_event_file(RESOURCE_FILE, EVENT_FILE,
                           event_structure="UNREASONABLE")
     assert not os.path.isfile(EVENT_FILE)
 
 
-def test_create_event_file_upper_case():
+def test_upper_case():
     event_file = os.path.join(TEST_ROOT, "temp/events_corpus_upper_case.tab.gz")
     create_event_file(RESOURCE_FILE, event_file,
                       context_structure="document",
@@ -73,7 +73,7 @@ def test_create_event_file_upper_case():
     os.remove(event_file)
 
 
-def test_create_event_file_trigrams_to_word():
+def test_trigrams_to_word():
     event_file = os.path.join(TEST_ROOT, "temp/event_file_trigrams_to_word.tab.gz")
     reference_file = os.path.join(TEST_ROOT, "reference/event_file_trigrams_to_word.tab.gz")
     create_event_file(RESOURCE_FILE, event_file,
@@ -85,7 +85,7 @@ def test_create_event_file_trigrams_to_word():
     os.remove(event_file)
 
 
-def test_create_event_file_trigrams_to_word_line_based():
+def test_trigrams_to_word_line_based():
     event_file = os.path.join(TEST_ROOT, "temp/event_file_trigrams_to_word_line_based.tab.gz")
     reference_file = os.path.join(TEST_ROOT, "reference/event_file_trigrams_to_word_line_based.tab.gz")
     create_event_file(RESOURCE_FILE, event_file,
@@ -96,7 +96,7 @@ def test_create_event_file_trigrams_to_word_line_based():
     os.remove(event_file)
 
 
-def test_create_event_file_bigrams_to_word():
+def test_bigrams_to_word():
     event_file = os.path.join(TEST_ROOT, "temp/event_file_bigrams_to_word.tab.gz")
     reference_file = os.path.join(TEST_ROOT, "reference/event_file_bigrams_to_word.tab.gz")
     create_event_file(RESOURCE_FILE, event_file,
@@ -109,7 +109,7 @@ def test_create_event_file_bigrams_to_word():
     os.remove(event_file)
 
 
-def test_create_event_file_word_to_word():
+def test_word_to_word():
     event_file = os.path.join(TEST_ROOT, "temp/event_file_word_to_word.tab.gz")
     reference_file = os.path.join(TEST_ROOT, "reference/event_file_word_to_word.tab.gz")
     create_event_file(RESOURCE_FILE, event_file,
@@ -121,7 +121,7 @@ def test_create_event_file_word_to_word():
     os.remove(event_file)
 
 
-def test_filter_event_file_bad_event_file():
+def test_bad_event_file():
     input_event_file = os.path.join(TEST_ROOT, "resources/event_file_trigrams_to_word_BAD.tab.gz")
     output_event_file = os.path.join(TEST_ROOT, "temp/event_file_BAD_output.tab.gz")
     with pytest.raises(ValueError):
@@ -135,19 +135,19 @@ def test_job_filter():
     job = JobFilter(keep_cues, keep_outcomes, None, None, None, None)
     line = '#of_alb_NEI_b_of#_XX\tterm_not_of\n'
     new_line = job.job(line)
-    assert(new_line == '#of_of#\tof\n')
+    assert new_line == '#of_of#\tof\n'
     # no cues
     line = 'alb_NEI_b_XX\tterm_not_of\n'
     new_line = job.job(line)
-    assert(new_line is None)
+    assert new_line is None
     # no outcomes
     line = '#of_alb_NEI_b_of#_XX\tterm_not\n'
     new_line = job.job(line)
-    assert(new_line == '#of_of#\t\n')
+    assert new_line == '#of_of#\t\n'
     # neither cues nor outcomes
     line = '#alb_NEI_b_XX\tterm_not\n'
     new_line = job.job(line)
-    assert(new_line is None)
+    assert new_line is None
     with pytest.raises(ValueError):
         bad_line = 'This is a bad line.'
         job.job(bad_line)
@@ -165,7 +165,7 @@ def test_filter_event_file():
                       keep_outcomes=outcomes,
                       number_of_processes=2,
                       verbose=True)
-    n_events, cue_freq_map, outcome_freq_map = cues_outcomes(output_event_file)
+    _, cue_freq_map, outcome_freq_map = cues_outcomes(output_event_file)
     cues_new = list(cue_freq_map)
     cues_new.sort()
     outcomes_new = list(outcome_freq_map)
@@ -219,13 +219,14 @@ def test_write_events():
         events = event_generator(event_bad_file, cue_id_map,
                                  outcome_id_map)
         # traverse generator
+        # pylint: disable=W0612
         for event in events:
             pass
 
 
 def test_byte_conversion():
-    a = 184729172
-    assert a == to_integer(to_bytes(a))
+    simple_int = 184729172
+    assert simple_int == to_integer(to_bytes(simple_int))
 
 
 def test_read_binary_file():
@@ -236,7 +237,7 @@ def test_read_binary_file():
     abs_binary_path = os.path.join(TEST_ROOT, binary_path)
     abs_binary_file_path = os.path.join(abs_binary_path, "events_0_0.dat")
 
-    n_events, cues, outcomes = cues_outcomes(abs_file_path)
+    _, cues, outcomes = cues_outcomes(abs_file_path)
     cue_id_map = OrderedDict(((cue, ii) for ii, cue in enumerate(cues.keys())))
     outcome_id_map = OrderedDict(((outcome, ii) for ii, outcome in enumerate(outcomes.keys())))
 
@@ -279,47 +280,21 @@ def test_preprocessing():
                       event_options=(3, ),
                       lower_case=True, verbose=True)
 
-    # read in cues and outcomes
-    n_events, cue_freq_map, outcome_freq_map = cues_outcomes(event_file,
-                                                             number_of_processes=2)
-    cues = list(cue_freq_map.keys())
-    cues.sort()
-    cue_id_map = {cue: ii for ii, cue in enumerate(cues)}
+    # read in outcomes
+    _, _, outcome_freq_map = cues_outcomes(event_file, number_of_processes=2)
 
     # reduce number of outcomes through bandsampling
     outcome_freq_map_filtered = bandsample(outcome_freq_map, 50, cutoff=1, seed=None)
     outcomes = list(outcome_freq_map_filtered.keys())
     outcomes.sort()
-    outcome_id_map = {outcome: nn for nn, outcome in enumerate(outcomes)}
 
     # filter outcomes by reduced number of outcomes
     event_file_filtered = event_file + ".filtered"
     filter_event_file(event_file, event_file_filtered, keep_outcomes=outcomes)
 
-    # TODO this is not working at the moment
-    # create binary event files
-    # path_name = event_file_filtered + ".events"
-    # create_binary_event_files(event_file_filtered, path_name, cue_id_map,
-    #                           outcome_id_map, sort_within_event=False,
-    #                           number_of_processes=2, events_per_file=1000,
-    #                           verbose=True)
-    # with pytest.raises(IOError):
-    #    create_binary_event_files(event_file_filtered, path_name, cue_id_map,
-    #                            outcome_id_map, sort_within_event=False,
-    #                            number_of_processes=2, events_per_file=1000,
-    #                            verbose=True)
-    # overwrite=True
-    # create_binary_event_files(event_file_filtered, path_name, cue_id_map,
-    #                        outcome_id_map, sort_within_event=False,
-    #                        number_of_processes=2, events_per_file=1000,
-    #                        overwrite=True, verbose=True)
-
     # clean everything
     os.remove(event_file)
     os.remove(event_file_filtered)
-    # for file_ in os.listdir(path_name):
-    #    os.remove(os.path.join(path_name, file_))
-    # os.rmdir(path_name)
 
 
 def compare_event_files(newfile, oldfile):
@@ -328,8 +303,8 @@ def compare_event_files(newfile, oldfile):
     with gzip.open(oldfile, "rt") as reference:
         lines_reference = reference.readlines()
     assert len(lines_new) == len(lines_reference)
-    for ii in range(len(lines_new)):
-        cues, outcomes = lines_new[ii].strip().split('\t')
+    for ii, line in enumerate(lines_new):
+        cues, outcomes = line.strip().split('\t')
         cues = sorted(cues.split('_'))
         outcomes = sorted(outcomes.split('_'))
         ref_cues, ref_outcomes = lines_reference[ii].strip().split('\t')
diff --git a/tests/test_pyndl.py b/tests/test_pyndl.py
index fc511d2..837e5e6 100644
--- a/tests/test_pyndl.py
+++ b/tests/test_pyndl.py
@@ -2,7 +2,6 @@
 
 # pylint: disable=C0111
 
-import sys
 import re
 from io import StringIO
 from contextlib import redirect_stdout
@@ -16,6 +15,6 @@ def test_sysinfo():
         pyndl.sysinfo()
     out = out.getvalue()
 
-    pattern = re.compile("[a-zA-Z0-9_\. ]*\n[\=]*\n+([a-zA-Z0-9_ ]*\n[\-]*\n"
-                         "([a-zA-Z0-9_ ]*: [a-zA-Z0-9_\.\-/ ]*\n+)+)+")
+    pattern = re.compile(r"[a-zA-Z0-9_\. ]*\n[\=]*\n+([a-zA-Z0-9_ ]*\n[\-]*\n"
+                         r"([a-zA-Z0-9_ ]*: [a-zA-Z0-9_\.\-/ ]*\n+)+)+")
     assert pattern.match(out)
diff --git a/tox.ini b/tox.ini
index af1a19a..54566e3 100644
--- a/tox.ini
+++ b/tox.ini
@@ -5,7 +5,8 @@ envlist = py{35,36}-test, checkstyle, documentation
 usedevelop = True
 whitelist_externals=/bin/rm
 deps =
-     pytest
+     test: pytest
+     -rrequirements.txt
 commands =
      py.test --doctest-glob "*.rst"
      rm doc/data/levent.tab.gz
@@ -40,7 +41,9 @@ commands =
      rm doc/data/levent.tab.gz
 
 [testenv:lint]
-deps = pylint>=1.7.1
+deps =
+     pylint>=1.7.1
+     pytest
 commands = pylint [] --ignore-patterns='.*\.so' --rcfile=setup.cfg -j 2 pyndl tests
 ignore_outcome = True