From 9b47606b446a4d13249b135ee7ed45e4e3e11bee Mon Sep 17 00:00:00 2001 From: Marc Weitz Date: Thu, 1 Mar 2018 12:38:33 +0100 Subject: [PATCH] fixes pylint errors and improves code quality (#146) * uses pycodestyle since pep8 is depreciated * fixes some pylint errors * improves code quality --- pyndl/__init__.py | 4 +-- pyndl/activation.py | 29 +++++++++------- pyndl/corpus.py | 2 ++ pyndl/count.py | 1 - pyndl/ndl.py | 15 ++++---- pyndl/preprocess.py | 70 ++++++++++++++++++++----------------- tests/conftest.py | 4 ++- tests/test_activation.py | 22 ++++++------ tests/test_count.py | 2 +- tests/test_ndl.py | 66 ++++++++++++++++------------------- tests/test_preprocess.py | 75 ++++++++++++++-------------------------- tests/test_pyndl.py | 5 ++- tox.ini | 7 ++-- 13 files changed, 144 insertions(+), 158 deletions(-) diff --git a/pyndl/__init__.py b/pyndl/__init__.py index 521043e..a7afcf4 100644 --- a/pyndl/__init__.py +++ b/pyndl/__init__.py @@ -61,10 +61,10 @@ def sysinfo(): "CPU: {cpu_count}\n").format(s=uname, cpu_count=mp.cpu_count()) if uname.sysname == "Linux": - names, *lines = os.popen("free -m").readlines() + _, *lines = os.popen("free -m").readlines() for identifier in ["Mem:", "Swap:"]: memory = [line for line in lines if identifier in line][0] - ix, total, used, *rest = memory.split() + _, total, used, *_ = memory.split() osinfo += "{} {}MiB/{}MiB\n".format(identifier, used, total) osinfo += "\n" diff --git a/pyndl/activation.py b/pyndl/activation.py index 8c27c75..a1844ac 100644 --- a/pyndl/activation.py +++ b/pyndl/activation.py @@ -16,6 +16,7 @@ from . import ndl +# pylint: disable=W0621 def activation(events, weights, number_of_threads=1, remove_duplicates=None, ignore_missing_cues=False): """ Estimate activations for given events in event file and outcome-cue weights. @@ -60,18 +61,17 @@ def activation(events, weights, number_of_threads=1, remove_duplicates=None, ign if isinstance(events, str): events = ndl.events_from_file(events) - event_cues_list = (cues for cues, outcomes in events) + events = (cues for cues, outcomes in events) if remove_duplicates is None: - def enforce_no_duplicates(cues): + def check_no_duplicates(cues): if len(cues) != len(set(cues)): - raise ValueError('cues needs to be unique: "%s"; use ' - 'remove_duplicates=True' % - (' '.join(cues))) + raise ValueError('cues needs to be unique: "{}"; use ' + 'remove_duplicates=True'.format(' '.join(cues))) else: return set(cues) - event_cues_list = (enforce_no_duplicates(cues) for cues in event_cues_list) + events = (check_no_duplicates(cues) for cues in events) elif remove_duplicates is True: - event_cues_list = (set(cues) for cues in event_cues_list) + events = (set(cues) for cues in events) if isinstance(weights, xr.DataArray): cues = weights.coords["cues"].values.tolist() @@ -81,11 +81,13 @@ def enforce_no_duplicates(cues): cue_map = OrderedDict(((cue, ii) for ii, cue in enumerate(cues))) if ignore_missing_cues: event_cue_indices_list = (tuple(cue_map[cue] for cue in event_cues if cue in cues) - for event_cues in event_cues_list) + for event_cues in events) else: event_cue_indices_list = (tuple(cue_map[cue] for cue in event_cues) - for event_cues in event_cues_list) - activations = _activation_matrix(list(event_cue_indices_list), weights.values, number_of_threads) + for event_cues in events) + # pylint: disable=W0621 + activations = _activation_matrix(list(event_cue_indices_list), + weights.values, number_of_threads) return xr.DataArray(activations, coords={ 'outcomes': outcomes @@ -93,11 +95,11 @@ def enforce_no_duplicates(cues): dims=('outcomes', 'events')) elif isinstance(weights, dict): assert number_of_threads == 1, "Estimating activations with multiprocessing is not implemented for dicts." - activations = defaultdict(lambda: np.zeros(len(event_cues_list))) - event_cues_list = list(event_cues_list) + activations = defaultdict(lambda: np.zeros(len(events))) + events = list(events) for outcome, cue_dict in weights.items(): _activations = activations[outcome] - for row, cues in enumerate(event_cues_list): + for row, cues in enumerate(events): for cue in cues: _activations[row] += cue_dict[cue] return activations @@ -111,6 +113,7 @@ def _init_mp_activation_matrix(weights_, weights_shape_, activations_, activatio Initializes shared variables weights and activations. """ + # pylint: disable=C0103, W0621, W0601 global weights, activations weights = np.ctypeslib.as_array(weights_) weights.shape = weights_shape_ diff --git a/pyndl/corpus.py b/pyndl/corpus.py index 0ad08fb..2ba3510 100644 --- a/pyndl/corpus.py +++ b/pyndl/corpus.py @@ -100,6 +100,8 @@ def read_clean_gzfile(gz_file_path, *, break_duration=2.0): class JobParseGz(): + # pylint: disable=E0202,missing-docstring + """ Stores the persistent information over several jobs and exposes a job method that only takes the varying parts as one argument. diff --git a/pyndl/count.py b/pyndl/count.py index f9177ad..58b21d0 100644 --- a/pyndl/count.py +++ b/pyndl/count.py @@ -14,7 +14,6 @@ import gzip import itertools import multiprocessing -import os import sys diff --git a/pyndl/ndl.py b/pyndl/ndl.py index 841ee09..79139cc 100644 --- a/pyndl/ndl.py +++ b/pyndl/ndl.py @@ -203,7 +203,7 @@ def worker(): for partlist in part_lists: working_queue.put(np.array(partlist, dtype=np.uint32)) - for thread_id in range(number_of_threads): + for _ in range(number_of_threads): thread = threading.Thread(target=worker) thread.start() threads.append(thread) @@ -248,7 +248,7 @@ def _attributes(event_path, number_events, alpha, betas, lambda_, cpu_time, def _format(value): return '{0: <{width}}'.format(value, width=width) - if not type(alpha) in (float, int): + if not isinstance(alpha, (float, int)): alpha = 'varying' new_attrs = {'date': _format(time.strftime("%Y-%m-%d %H:%M:%S")), @@ -278,7 +278,7 @@ def _format(value): if key in new_attrs: new_val = new_attrs[key] else: - new_val = format_('') + new_val = '' new_attrs[key] = old_val + ' | ' + new_val return new_attrs @@ -295,9 +295,12 @@ class WeightDict(defaultdict): """ + # pylint: disable=W0613 def __init__(self, *args, **kwargs): super().__init__(lambda: defaultdict(float)) + self._attrs = OrderedDict() + if 'attrs' in kwargs: self.attrs = kwargs['attrs'] else: @@ -391,9 +394,9 @@ def dict_ndl(events, alphas, betas, lambda_=1.0, *, attrs_to_update = weights_ini.attrs coords = weights_ini.coords weights = WeightDict() - for oi, outcome in enumerate(coords['outcomes'].values): - for ci, cue in enumerate(coords['cues'].values): - weights[outcome][cue] = weights_ini.item((oi, ci)) + for outcome_index, outcome in enumerate(coords['outcomes'].values): + for cue_index, cue in enumerate(coords['cues'].values): + weights[outcome][cue] = weights_ini.item((outcome_index, cue_index)) elif not isinstance(weights, defaultdict): raise ValueError('weights needs to be either defaultdict or None') diff --git a/pyndl/preprocess.py b/pyndl/preprocess.py index df0f70c..2d48022 100644 --- a/pyndl/preprocess.py +++ b/pyndl/preprocess.py @@ -73,6 +73,38 @@ def bandsample(population, sample_size=50000, *, cutoff=5, seed=None, return sample +def ngrams_to_word(occurrences, n_chars, outfile, remove_duplicates=True): + """ + Process the occurrences and write them to outfile. + + Parameters + ---------- + occurrences : sequence of (cues, outcomes) tuples + cues and outcomes are both strings where underscores and # are + special symbols. + n_chars : number of characters (e.g. 2 for bigrams, 3 for trigrams, ...) + outfile : file handle + + remove_duplicates : bool + if True make cues and outcomes per event unique + + """ + for cues, outcomes in occurrences: + if cues and outcomes: + occurrence = cues + '_' + outcomes + else: # take either + occurrence = cues + outcomes + phrase_string = "#" + re.sub("_", "#", occurrence) + "#" + ngrams = (phrase_string[i:(i + n_chars)] for i in + range(len(phrase_string) - n_chars + 1)) + if not ngrams or not occurrence: + continue + if remove_duplicates: + outfile.write("{}\t{}\n".format("_".join(set(ngrams)), occurrence)) + else: + outfile.write("{}\t{}\n".format("_".join(ngrams), occurrence)) + + def process_occurrences(occurrences, outfile, *, cue_structure="trigrams_to_word", remove_duplicates=True): """ @@ -92,43 +124,17 @@ def process_occurrences(occurrences, outfile, *, """ if cue_structure == "bigrams_to_word": - for cues, outcomes in occurrences: - if cues and outcomes: - occurrence = cues + '_' + outcomes - else: # take either - occurrence = cues + outcomes - phrase_string = "#" + re.sub("_", "#", occurrence) + "#" - bigrams = (phrase_string[i:(i + 2)] for i in - range(len(phrase_string) - 2 + 1)) - if not bigrams or not occurrence: - continue - if remove_duplicates: - outfile.write("_".join(set(bigrams)) + "\t" + occurrence + "\n") - else: - outfile.write("_".join(bigrams) + "\t" + occurrence + "\n") + ngrams_to_word(occurrences, 2, outfile, remove_duplicates=remove_duplicates) elif cue_structure == "trigrams_to_word": - for cues, outcomes in occurrences: - if cues and outcomes: - occurrence = cues + '_' + outcomes - else: # take either - occurrence = cues + outcomes - phrase_string = "#" + re.sub("_", "#", occurrence) + "#" - trigrams = (phrase_string[i:(i + 3)] for i in - range(len(phrase_string) - 3 + 1)) - if not trigrams or not occurrence: - continue - if remove_duplicates: - outfile.write("_".join(set(trigrams)) + "\t" + occurrence + "\n") - else: - outfile.write("_".join(trigrams) + "\t" + occurrence + "\n") + ngrams_to_word(occurrences, 3, outfile, remove_duplicates=remove_duplicates) elif cue_structure == "word_to_word": for cues, outcomes in occurrences: if not cues: continue if remove_duplicates: - outfile.write("_".join(set(cues.split("_"))) + "\t" + outcomes + "\n") + outfile.write("{}\t{}\n".format("_".join(set(cues.split("_"))), outcomes)) else: - outfile.write(cues + "\t" + outcomes + "\n") + outfile.write("{}\t{}\n".format(cues, outcomes)) else: raise NotImplementedError('cue_structure=%s is not implemented yet.' % cue_structure) @@ -761,9 +767,9 @@ def create_binary_event_files(event_file, def _error_callback(error): if isinstance(error, StopIteration): - msg, result = error.value + _, result = error.value nonlocal number_events - number_events += result + number_events += result # pylint: disable=undefined-variable pool.close() else: raise error diff --git a/tests/conftest.py b/tests/conftest.py index 289c0e2..cbabdc9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,3 @@ - ''' Configuration for py.test-3. @@ -6,5 +5,8 @@ def pytest_addoption(parser): + """ + adds custom option to the pytest parser + """ parser.addoption("--runslow", action="store_true", help="run slow tests") diff --git a/tests/test_activation.py b/tests/test_activation.py index 416fe37..b99b363 100644 --- a/tests/test_activation.py +++ b/tests/test_activation.py @@ -15,7 +15,7 @@ from pyndl import ndl from pyndl.activation import activation -slow = pytest.mark.skipif(not pytest.config.getoption("--runslow"), +slow = pytest.mark.skipif(not pytest.config.getoption("--runslow"), # pylint: disable=invalid-name reason="need --runslow option to run") TEST_ROOT = os.path.join(os.path.pardir, os.path.dirname(__file__)) @@ -29,8 +29,8 @@ def test_exceptions(): with pytest.raises(ValueError) as e_info: - wm = ndl.dict_ndl(FILE_PATH_SIMPLE, ALPHA, BETAS, remove_duplicates=None) - activation(FILE_PATH_MULTIPLE_CUES, wm) + weights = ndl.dict_ndl(FILE_PATH_SIMPLE, ALPHA, BETAS, remove_duplicates=None) + activation(FILE_PATH_MULTIPLE_CUES, weights) assert e_info == 'cues or outcomes needs to be unique: cues "a a"; outcomes "A"; use remove_duplicates=True' with pytest.raises(ValueError) as e_info: @@ -149,22 +149,22 @@ def test_activation_matrix_large(): print("") print("Start setup...") - def time_test(func, of=""): + def time_test(func, of=""): # pylint: disable=invalid-name def dec_func(*args, **kwargs): print("start test '{}'".format(of)) - st = time.clock() + start = time.clock() res = func(*args, **kwargs) - et = time.clock() + end = time.clock() print("finished test '{}'".format(of)) - print(" duration: {:.3f}s".format(et-st)) + print(" duration: {:.3f}s".format(end - start)) print("") return res return dec_func - n = 2000 - n_cues = 10*n - n_outcomes = n - n_events = 10*n + nn = 2000 + n_cues = 10*nn + n_outcomes = nn + n_events = 10*nn n_cues_per_event = 30 weight_mat = np.random.rand(n_cues, n_outcomes) cues = ['c'+str(i) for i in range(n_cues)] diff --git a/tests/test_count.py b/tests/test_count.py index dcfe67a..173a952 100644 --- a/tests/test_count.py +++ b/tests/test_count.py @@ -33,7 +33,7 @@ def test_words_symbols(): def test_save_load(): file_name = os.path.join(TEST_ROOT, "temp/cues.tab") - n_events, cues, outcomes = count.cues_outcomes(EVENT_RESOURCE_FILE) + _, cues, _ = count.cues_outcomes(EVENT_RESOURCE_FILE) count.save_counter(cues, file_name) cues_loaded = count.load_counter(file_name) assert cues == cues_loaded diff --git a/tests/test_ndl.py b/tests/test_ndl.py index 86e43d1..ca9a98a 100644 --- a/tests/test_ndl.py +++ b/tests/test_ndl.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 -# pylint: disable=C0111 +# pylint: disable=C0111, redefined-outer-name + from collections import defaultdict, OrderedDict import os @@ -8,15 +9,14 @@ import tempfile import copy - -import pytest import numpy as np import xarray as xr import pandas as pd +import pytest from pyndl import ndl, count -slow = pytest.mark.skipif(not pytest.config.getoption("--runslow"), +slow = pytest.mark.skipif(not pytest.config.getoption("--runslow"), # pylint: disable=invalid-name reason="need --runslow option to run") TEST_ROOT = os.path.join(os.path.pardir, os.path.dirname(__file__)) @@ -66,8 +66,8 @@ def result_continue_learning(): part_1 = events_simple.head(CONTINUE_SPLIT_POINT) part_2 = events_simple.tail(len(events_simple) - CONTINUE_SPLIT_POINT) - assert len(part_1) > 0 - assert len(part_2) > 0 + assert len(part_1) > 0 # pylint: disable=len-as-condition + assert len(part_2) > 0 # pylint: disable=len-as-condition part_path_1 = os.path.join(TMP_PATH, "event_file_simple_1.tab.gz") part_path_2 = os.path.join(TMP_PATH, "event_file_simple_2.tab.gz") @@ -81,11 +81,8 @@ def result_continue_learning(): del events_simple, part_1, part_2 - result_part = ndl.ndl(part_path_1, - ALPHA, BETAS) - - result = ndl.ndl(part_path_2, ALPHA, BETAS, - weights=result_part) + result_part = ndl.ndl(part_path_1, ALPHA, BETAS) + result = ndl.ndl(part_path_2, ALPHA, BETAS, weights=result_part) return result @@ -136,8 +133,8 @@ def test_continue_learning_dict(): part_1 = events_simple.head(CONTINUE_SPLIT_POINT) part_2 = events_simple.tail(len(events_simple) - CONTINUE_SPLIT_POINT) - assert len(part_1) > 0 - assert len(part_2) > 0 + assert len(part_1) > 0 # pylint: disable=len-as-condition + assert len(part_2) > 0 # pylint: disable=len-as-condition part_path_1 = os.path.join(TMP_PATH, "event_file_simple_1.tab.gz") part_path_2 = os.path.join(TMP_PATH, "event_file_simple_2.tab.gz") @@ -180,7 +177,7 @@ def test_continue_learning_dict_ndl_data_array(result_dict_ndl, result_dict_ndl_ continue_from_data_array) print(continue_from_data_array) print('%.2f ratio unequal' % unequal_ratio) - assert len(unequal) == 0 + assert len(unequal) == 0 # pylint: disable=len-as-condition def test_continue_learning(result_continue_learning, result_ndl_openmp): @@ -194,7 +191,7 @@ def test_continue_learning(result_continue_learning, result_ndl_openmp): result_continue_learning, result_ndl_openmp) print('%.2f ratio unequal' % unequal_ratio) - assert len(unequal) == 0 + assert len(unequal) == 0 # pylint: disable=len-as-condition def test_save_to_netcdf4(result_ndl_openmp): @@ -234,14 +231,14 @@ def test_dict_ndl_vs_ndl_threading(result_dict_ndl, result_ndl_threading): unequal, unequal_ratio = compare_arrays(FILE_PATH_SIMPLE, result_dict_ndl, result_ndl_threading) print('%.2f ratio unequal' % unequal_ratio) - assert len(unequal) == 0 + assert len(unequal) == 0 # pylint: disable=len-as-condition def test_dict_ndl_vs_dict_ndl_generator(result_dict_ndl, result_dict_ndl_generator): unequal, unequal_ratio = compare_arrays(FILE_PATH_SIMPLE, result_dict_ndl, result_dict_ndl_generator) print('%.2f ratio unequal' % unequal_ratio) - assert len(unequal) == 0 + assert len(unequal) == 0 # pylint: disable=len-as-condition def test_dict_ndl_data_array_vs_ndl_threading(result_ndl_threading): @@ -250,7 +247,7 @@ def test_dict_ndl_data_array_vs_ndl_threading(result_ndl_threading): unequal, unequal_ratio = compare_arrays(FILE_PATH_SIMPLE, result_dict_ndl, result_ndl_threading) print('%.2f ratio unequal' % unequal_ratio) - assert len(unequal) == 0 + assert len(unequal) == 0 # pylint: disable=len-as-condition def test_ordering_of_temporary_event_files(result_dict_ndl): @@ -270,7 +267,7 @@ def test_multiple_cues_dict_ndl_vs_ndl_threading(): unequal, unequal_ratio = compare_arrays(FILE_PATH_MULTIPLE_CUES, result_dict_ndl, result_ndl_threading) print('%.2f ratio unequal' % unequal_ratio) - assert len(unequal) == 0 + assert len(unequal) == 0 # pylint: disable=len-as-condition def test_dict_ndl_vs_ndl_openmp(result_dict_ndl, result_ndl_openmp): @@ -278,7 +275,7 @@ def test_dict_ndl_vs_ndl_openmp(result_dict_ndl, result_ndl_openmp): unequal, unequal_ratio = compare_arrays(FILE_PATH_SIMPLE, result_dict_ndl, result_ndl_openmp) print('%.2f ratio unequal' % unequal_ratio) - assert len(unequal) == 0 + assert len(unequal) == 0 # pylint: disable=len-as-condition def test_meta_data(result_dict_ndl, result_dict_ndl_data_array, result_ndl_openmp, result_ndl_threading): @@ -327,7 +324,7 @@ def test_compare_weights_ndl2(result_dict_ndl): unequal, unequal_ratio = compare_arrays(FILE_PATH_SIMPLE, result_ndl2, result_dict_ndl) print(set(outcome for outcome, *_ in unequal)) print('%.2f ratio unequal' % unequal_ratio) - assert len(unequal) == 0 + assert len(unequal) == 0 # pylint: disable=len-as-condition def test_multiple_cues_dict_ndl_vs_ndl2(): @@ -362,7 +359,7 @@ def test_multiple_cues_dict_ndl_vs_ndl2(): unequal, unequal_ratio = compare_arrays(FILE_PATH_MULTIPLE_CUES, result_ndl2, result_python) print(set(outcome for outcome, *_ in unequal)) print('%.2f ratio unequal' % unequal_ratio) - assert len(unequal) == 0 + assert len(unequal) == 0 # pylint: disable=len-as-condition def test_compare_weights_rescorla_vs_ndl2(): @@ -405,13 +402,12 @@ def test_compare_weights_rescorla_vs_ndl2(): unequal, unequal_ratio = compare_arrays(FILE_PATH_SIMPLE, result_ndl2, result_rescorla) print('%.2f ratio unequal' % unequal_ratio) - assert len(unequal) == 0 + assert len(unequal) == 0 # pylint: disable=len-as-condition @slow def test_compare_time_dict_inplace_parallel_thread(): file_path = os.path.join(TEST_ROOT, 'resources/event_file_many_cues.tab.gz') - cue_map, outcome_map, all_outcomes = generate_mapping(file_path) result_dict_ndl, duration_not_parallel = clock(ndl.dict_ndl, (file_path, ALPHA, BETAS, LAMBDA_)) @@ -423,25 +419,25 @@ def test_compare_time_dict_inplace_parallel_thread(): unequal, unequal_ratio = compare_arrays(file_path, result_thread_ndl, result_dict_ndl) print('%.2f ratio unequal' % unequal_ratio) - assert len(unequal) == 0 + assert len(unequal) == 0 # pylint: disable=len-as-condition print('parallel: %.3e dict: %.3e' % (duration_parallel, duration_not_parallel)) assert duration_parallel < duration_not_parallel def test_slice_list(): - l1 = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + lst = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] - res = ndl.slice_list(l1, 2) + res = ndl.slice_list(lst, 2) assert res == [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]] - res2 = ndl.slice_list(l1, 3) + res2 = ndl.slice_list(lst, 3) assert res2 == [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]] -def clock(f, args, **kwargs): +def clock(func, args, **kwargs): start = time.time() - result = f(*args, **kwargs) + result = func(*args, **kwargs) stop = time.time() duration = stop - start @@ -450,11 +446,9 @@ def clock(f, args, **kwargs): def compare_arrays(file_path, arr1, arr2): - n_events, cues, outcomes = count.cues_outcomes(file_path) - cue_map, outcome_map, all_outcomes = generate_mapping(file_path) + _, cues, outcomes = count.cues_outcomes(file_path) + cue_map, outcome_map, _ = generate_mapping(file_path) - cue_indices = [cue_map[cue] for cue in cues] - outcome_indices = [outcome_map[outcome] for outcome in outcomes] unequal = list() for outcome in outcomes: @@ -472,7 +466,7 @@ def compare_arrays(file_path, arr1, arr2): else: values.append(array[outcome][cue]) - value1, value2 = values + value1, value2 = values # pylint: disable=unbalanced-tuple-unpacking if not np.isclose(value1, value2, rtol=1e-02, atol=1e-05): unequal.append((outcome, cue, value1, value2)) @@ -481,7 +475,7 @@ def compare_arrays(file_path, arr1, arr2): def generate_mapping(event_path): - n_events, cues, outcomes = count.cues_outcomes(event_path) + _, cues, outcomes = count.cues_outcomes(event_path) all_cues = list(cues.keys()) all_outcomes = list(outcomes.keys()) cue_map = OrderedDict(((cue, ii) for ii, cue in enumerate(all_cues))) diff --git a/tests/test_preprocess.py b/tests/test_preprocess.py index a4fce21..b4feefd 100644 --- a/tests/test_preprocess.py +++ b/tests/test_preprocess.py @@ -23,15 +23,15 @@ def test_bandsample(): resource_file = os.path.join(TEST_ROOT, "resources/event_file_trigrams_to_word.tab.gz") - n_events, cue_freq_map, outcome_freq_map = cues_outcomes(resource_file, - number_of_processes=2) + _, _, outcome_freq_map = cues_outcomes(resource_file, + number_of_processes=2) outcome_freq_map_filtered = bandsample(outcome_freq_map, 50, cutoff=1, seed=None, verbose=False) assert len(outcome_freq_map_filtered) == 50 reference_file = os.path.join(TEST_ROOT, 'reference/bandsampled_outcomes.tab') try: outcome_freq_map_filtered_reference = load_counter(reference_file) - except (FileNotFoundError): + except FileNotFoundError: temp_file = os.path.join(TEST_ROOT, 'temp/bandsampled_outcomes.tab') save_counter(outcome_freq_map_filtered, temp_file) raise @@ -39,7 +39,7 @@ def test_bandsample(): bandsample(outcome_freq_map, 50, cutoff=1, verbose=True) -def test_create_event_file_bad_symbols(): +def test_bad_symbols(): with pytest.raises(ValueError): create_event_file(RESOURCE_FILE, EVENT_FILE, "abcd#") @@ -50,21 +50,21 @@ def test_create_event_file_bad_symbols(): assert not os.path.isfile(EVENT_FILE) -def test_create_event_file_bad_event_context(): +def test_bad_event_context(): with pytest.raises(NotImplementedError): create_event_file(RESOURCE_FILE, EVENT_FILE, context_structure="UNREASONABLE") assert not os.path.isfile(EVENT_FILE) -def test_create_event_file_bad_event_event(): +def test_bad_event_event(): with pytest.raises(NotImplementedError): create_event_file(RESOURCE_FILE, EVENT_FILE, event_structure="UNREASONABLE") assert not os.path.isfile(EVENT_FILE) -def test_create_event_file_upper_case(): +def test_upper_case(): event_file = os.path.join(TEST_ROOT, "temp/events_corpus_upper_case.tab.gz") create_event_file(RESOURCE_FILE, event_file, context_structure="document", @@ -73,7 +73,7 @@ def test_create_event_file_upper_case(): os.remove(event_file) -def test_create_event_file_trigrams_to_word(): +def test_trigrams_to_word(): event_file = os.path.join(TEST_ROOT, "temp/event_file_trigrams_to_word.tab.gz") reference_file = os.path.join(TEST_ROOT, "reference/event_file_trigrams_to_word.tab.gz") create_event_file(RESOURCE_FILE, event_file, @@ -85,7 +85,7 @@ def test_create_event_file_trigrams_to_word(): os.remove(event_file) -def test_create_event_file_trigrams_to_word_line_based(): +def test_trigrams_to_word_line_based(): event_file = os.path.join(TEST_ROOT, "temp/event_file_trigrams_to_word_line_based.tab.gz") reference_file = os.path.join(TEST_ROOT, "reference/event_file_trigrams_to_word_line_based.tab.gz") create_event_file(RESOURCE_FILE, event_file, @@ -96,7 +96,7 @@ def test_create_event_file_trigrams_to_word_line_based(): os.remove(event_file) -def test_create_event_file_bigrams_to_word(): +def test_bigrams_to_word(): event_file = os.path.join(TEST_ROOT, "temp/event_file_bigrams_to_word.tab.gz") reference_file = os.path.join(TEST_ROOT, "reference/event_file_bigrams_to_word.tab.gz") create_event_file(RESOURCE_FILE, event_file, @@ -109,7 +109,7 @@ def test_create_event_file_bigrams_to_word(): os.remove(event_file) -def test_create_event_file_word_to_word(): +def test_word_to_word(): event_file = os.path.join(TEST_ROOT, "temp/event_file_word_to_word.tab.gz") reference_file = os.path.join(TEST_ROOT, "reference/event_file_word_to_word.tab.gz") create_event_file(RESOURCE_FILE, event_file, @@ -121,7 +121,7 @@ def test_create_event_file_word_to_word(): os.remove(event_file) -def test_filter_event_file_bad_event_file(): +def test_bad_event_file(): input_event_file = os.path.join(TEST_ROOT, "resources/event_file_trigrams_to_word_BAD.tab.gz") output_event_file = os.path.join(TEST_ROOT, "temp/event_file_BAD_output.tab.gz") with pytest.raises(ValueError): @@ -135,19 +135,19 @@ def test_job_filter(): job = JobFilter(keep_cues, keep_outcomes, None, None, None, None) line = '#of_alb_NEI_b_of#_XX\tterm_not_of\n' new_line = job.job(line) - assert(new_line == '#of_of#\tof\n') + assert new_line == '#of_of#\tof\n' # no cues line = 'alb_NEI_b_XX\tterm_not_of\n' new_line = job.job(line) - assert(new_line is None) + assert new_line is None # no outcomes line = '#of_alb_NEI_b_of#_XX\tterm_not\n' new_line = job.job(line) - assert(new_line == '#of_of#\t\n') + assert new_line == '#of_of#\t\n' # neither cues nor outcomes line = '#alb_NEI_b_XX\tterm_not\n' new_line = job.job(line) - assert(new_line is None) + assert new_line is None with pytest.raises(ValueError): bad_line = 'This is a bad line.' job.job(bad_line) @@ -165,7 +165,7 @@ def test_filter_event_file(): keep_outcomes=outcomes, number_of_processes=2, verbose=True) - n_events, cue_freq_map, outcome_freq_map = cues_outcomes(output_event_file) + _, cue_freq_map, outcome_freq_map = cues_outcomes(output_event_file) cues_new = list(cue_freq_map) cues_new.sort() outcomes_new = list(outcome_freq_map) @@ -219,13 +219,14 @@ def test_write_events(): events = event_generator(event_bad_file, cue_id_map, outcome_id_map) # traverse generator + # pylint: disable=W0612 for event in events: pass def test_byte_conversion(): - a = 184729172 - assert a == to_integer(to_bytes(a)) + simple_int = 184729172 + assert simple_int == to_integer(to_bytes(simple_int)) def test_read_binary_file(): @@ -236,7 +237,7 @@ def test_read_binary_file(): abs_binary_path = os.path.join(TEST_ROOT, binary_path) abs_binary_file_path = os.path.join(abs_binary_path, "events_0_0.dat") - n_events, cues, outcomes = cues_outcomes(abs_file_path) + _, cues, outcomes = cues_outcomes(abs_file_path) cue_id_map = OrderedDict(((cue, ii) for ii, cue in enumerate(cues.keys()))) outcome_id_map = OrderedDict(((outcome, ii) for ii, outcome in enumerate(outcomes.keys()))) @@ -279,47 +280,21 @@ def test_preprocessing(): event_options=(3, ), lower_case=True, verbose=True) - # read in cues and outcomes - n_events, cue_freq_map, outcome_freq_map = cues_outcomes(event_file, - number_of_processes=2) - cues = list(cue_freq_map.keys()) - cues.sort() - cue_id_map = {cue: ii for ii, cue in enumerate(cues)} + # read in outcomes + _, _, outcome_freq_map = cues_outcomes(event_file, number_of_processes=2) # reduce number of outcomes through bandsampling outcome_freq_map_filtered = bandsample(outcome_freq_map, 50, cutoff=1, seed=None) outcomes = list(outcome_freq_map_filtered.keys()) outcomes.sort() - outcome_id_map = {outcome: nn for nn, outcome in enumerate(outcomes)} # filter outcomes by reduced number of outcomes event_file_filtered = event_file + ".filtered" filter_event_file(event_file, event_file_filtered, keep_outcomes=outcomes) - # TODO this is not working at the moment - # create binary event files - # path_name = event_file_filtered + ".events" - # create_binary_event_files(event_file_filtered, path_name, cue_id_map, - # outcome_id_map, sort_within_event=False, - # number_of_processes=2, events_per_file=1000, - # verbose=True) - # with pytest.raises(IOError): - # create_binary_event_files(event_file_filtered, path_name, cue_id_map, - # outcome_id_map, sort_within_event=False, - # number_of_processes=2, events_per_file=1000, - # verbose=True) - # overwrite=True - # create_binary_event_files(event_file_filtered, path_name, cue_id_map, - # outcome_id_map, sort_within_event=False, - # number_of_processes=2, events_per_file=1000, - # overwrite=True, verbose=True) - # clean everything os.remove(event_file) os.remove(event_file_filtered) - # for file_ in os.listdir(path_name): - # os.remove(os.path.join(path_name, file_)) - # os.rmdir(path_name) def compare_event_files(newfile, oldfile): @@ -328,8 +303,8 @@ def compare_event_files(newfile, oldfile): with gzip.open(oldfile, "rt") as reference: lines_reference = reference.readlines() assert len(lines_new) == len(lines_reference) - for ii in range(len(lines_new)): - cues, outcomes = lines_new[ii].strip().split('\t') + for ii, line in enumerate(lines_new): + cues, outcomes = line.strip().split('\t') cues = sorted(cues.split('_')) outcomes = sorted(outcomes.split('_')) ref_cues, ref_outcomes = lines_reference[ii].strip().split('\t') diff --git a/tests/test_pyndl.py b/tests/test_pyndl.py index fc511d2..837e5e6 100644 --- a/tests/test_pyndl.py +++ b/tests/test_pyndl.py @@ -2,7 +2,6 @@ # pylint: disable=C0111 -import sys import re from io import StringIO from contextlib import redirect_stdout @@ -16,6 +15,6 @@ def test_sysinfo(): pyndl.sysinfo() out = out.getvalue() - pattern = re.compile("[a-zA-Z0-9_\. ]*\n[\=]*\n+([a-zA-Z0-9_ ]*\n[\-]*\n" - "([a-zA-Z0-9_ ]*: [a-zA-Z0-9_\.\-/ ]*\n+)+)+") + pattern = re.compile(r"[a-zA-Z0-9_\. ]*\n[\=]*\n+([a-zA-Z0-9_ ]*\n[\-]*\n" + r"([a-zA-Z0-9_ ]*: [a-zA-Z0-9_\.\-/ ]*\n+)+)+") assert pattern.match(out) diff --git a/tox.ini b/tox.ini index af1a19a..54566e3 100644 --- a/tox.ini +++ b/tox.ini @@ -5,7 +5,8 @@ envlist = py{35,36}-test, checkstyle, documentation usedevelop = True whitelist_externals=/bin/rm deps = - pytest + test: pytest + -rrequirements.txt commands = py.test --doctest-glob "*.rst" rm doc/data/levent.tab.gz @@ -40,7 +41,9 @@ commands = rm doc/data/levent.tab.gz [testenv:lint] -deps = pylint>=1.7.1 +deps = + pylint>=1.7.1 + pytest commands = pylint [] --ignore-patterns='.*\.so' --rcfile=setup.cfg -j 2 pyndl tests ignore_outcome = True