Skip to content

Commit

Permalink
fixes pylint errors and improves code quality (#146)
Browse files Browse the repository at this point in the history
* uses pycodestyle since pep8 is depreciated

* fixes some pylint errors

* improves code quality
  • Loading branch information
Trybnetic authored Mar 1, 2018
1 parent 27bb8c6 commit 9b47606
Show file tree
Hide file tree
Showing 13 changed files with 144 additions and 158 deletions.
4 changes: 2 additions & 2 deletions pyndl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,10 @@ def sysinfo():
"CPU: {cpu_count}\n").format(s=uname, cpu_count=mp.cpu_count())

if uname.sysname == "Linux":
names, *lines = os.popen("free -m").readlines()
_, *lines = os.popen("free -m").readlines()
for identifier in ["Mem:", "Swap:"]:
memory = [line for line in lines if identifier in line][0]
ix, total, used, *rest = memory.split()
_, total, used, *_ = memory.split()
osinfo += "{} {}MiB/{}MiB\n".format(identifier, used, total)

osinfo += "\n"
Expand Down
29 changes: 16 additions & 13 deletions pyndl/activation.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from . import ndl


# pylint: disable=W0621
def activation(events, weights, number_of_threads=1, remove_duplicates=None, ignore_missing_cues=False):
"""
Estimate activations for given events in event file and outcome-cue weights.
Expand Down Expand Up @@ -60,18 +61,17 @@ def activation(events, weights, number_of_threads=1, remove_duplicates=None, ign
if isinstance(events, str):
events = ndl.events_from_file(events)

event_cues_list = (cues for cues, outcomes in events)
events = (cues for cues, outcomes in events)
if remove_duplicates is None:
def enforce_no_duplicates(cues):
def check_no_duplicates(cues):
if len(cues) != len(set(cues)):
raise ValueError('cues needs to be unique: "%s"; use '
'remove_duplicates=True' %
(' '.join(cues)))
raise ValueError('cues needs to be unique: "{}"; use '
'remove_duplicates=True'.format(' '.join(cues)))
else:
return set(cues)
event_cues_list = (enforce_no_duplicates(cues) for cues in event_cues_list)
events = (check_no_duplicates(cues) for cues in events)
elif remove_duplicates is True:
event_cues_list = (set(cues) for cues in event_cues_list)
events = (set(cues) for cues in events)

if isinstance(weights, xr.DataArray):
cues = weights.coords["cues"].values.tolist()
Expand All @@ -81,23 +81,25 @@ def enforce_no_duplicates(cues):
cue_map = OrderedDict(((cue, ii) for ii, cue in enumerate(cues)))
if ignore_missing_cues:
event_cue_indices_list = (tuple(cue_map[cue] for cue in event_cues if cue in cues)
for event_cues in event_cues_list)
for event_cues in events)
else:
event_cue_indices_list = (tuple(cue_map[cue] for cue in event_cues)
for event_cues in event_cues_list)
activations = _activation_matrix(list(event_cue_indices_list), weights.values, number_of_threads)
for event_cues in events)
# pylint: disable=W0621
activations = _activation_matrix(list(event_cue_indices_list),
weights.values, number_of_threads)
return xr.DataArray(activations,
coords={
'outcomes': outcomes
},
dims=('outcomes', 'events'))
elif isinstance(weights, dict):
assert number_of_threads == 1, "Estimating activations with multiprocessing is not implemented for dicts."
activations = defaultdict(lambda: np.zeros(len(event_cues_list)))
event_cues_list = list(event_cues_list)
activations = defaultdict(lambda: np.zeros(len(events)))
events = list(events)
for outcome, cue_dict in weights.items():
_activations = activations[outcome]
for row, cues in enumerate(event_cues_list):
for row, cues in enumerate(events):
for cue in cues:
_activations[row] += cue_dict[cue]
return activations
Expand All @@ -111,6 +113,7 @@ def _init_mp_activation_matrix(weights_, weights_shape_, activations_, activatio
Initializes shared variables weights and activations.
"""
# pylint: disable=C0103, W0621, W0601
global weights, activations
weights = np.ctypeslib.as_array(weights_)
weights.shape = weights_shape_
Expand Down
2 changes: 2 additions & 0 deletions pyndl/corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ def read_clean_gzfile(gz_file_path, *, break_duration=2.0):


class JobParseGz():
# pylint: disable=E0202,missing-docstring

"""
Stores the persistent information over several jobs and exposes a job
method that only takes the varying parts as one argument.
Expand Down
1 change: 0 additions & 1 deletion pyndl/count.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import gzip
import itertools
import multiprocessing
import os
import sys


Expand Down
15 changes: 9 additions & 6 deletions pyndl/ndl.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ def worker():
for partlist in part_lists:
working_queue.put(np.array(partlist, dtype=np.uint32))

for thread_id in range(number_of_threads):
for _ in range(number_of_threads):
thread = threading.Thread(target=worker)
thread.start()
threads.append(thread)
Expand Down Expand Up @@ -248,7 +248,7 @@ def _attributes(event_path, number_events, alpha, betas, lambda_, cpu_time,
def _format(value):
return '{0: <{width}}'.format(value, width=width)

if not type(alpha) in (float, int):
if not isinstance(alpha, (float, int)):
alpha = 'varying'

new_attrs = {'date': _format(time.strftime("%Y-%m-%d %H:%M:%S")),
Expand Down Expand Up @@ -278,7 +278,7 @@ def _format(value):
if key in new_attrs:
new_val = new_attrs[key]
else:
new_val = format_('')
new_val = ''
new_attrs[key] = old_val + ' | ' + new_val
return new_attrs

Expand All @@ -295,9 +295,12 @@ class WeightDict(defaultdict):
"""

# pylint: disable=W0613
def __init__(self, *args, **kwargs):
super().__init__(lambda: defaultdict(float))

self._attrs = OrderedDict()

if 'attrs' in kwargs:
self.attrs = kwargs['attrs']
else:
Expand Down Expand Up @@ -391,9 +394,9 @@ def dict_ndl(events, alphas, betas, lambda_=1.0, *,
attrs_to_update = weights_ini.attrs
coords = weights_ini.coords
weights = WeightDict()
for oi, outcome in enumerate(coords['outcomes'].values):
for ci, cue in enumerate(coords['cues'].values):
weights[outcome][cue] = weights_ini.item((oi, ci))
for outcome_index, outcome in enumerate(coords['outcomes'].values):
for cue_index, cue in enumerate(coords['cues'].values):
weights[outcome][cue] = weights_ini.item((outcome_index, cue_index))
elif not isinstance(weights, defaultdict):
raise ValueError('weights needs to be either defaultdict or None')

Expand Down
70 changes: 38 additions & 32 deletions pyndl/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,38 @@ def bandsample(population, sample_size=50000, *, cutoff=5, seed=None,
return sample


def ngrams_to_word(occurrences, n_chars, outfile, remove_duplicates=True):
"""
Process the occurrences and write them to outfile.
Parameters
----------
occurrences : sequence of (cues, outcomes) tuples
cues and outcomes are both strings where underscores and # are
special symbols.
n_chars : number of characters (e.g. 2 for bigrams, 3 for trigrams, ...)
outfile : file handle
remove_duplicates : bool
if True make cues and outcomes per event unique
"""
for cues, outcomes in occurrences:
if cues and outcomes:
occurrence = cues + '_' + outcomes
else: # take either
occurrence = cues + outcomes
phrase_string = "#" + re.sub("_", "#", occurrence) + "#"
ngrams = (phrase_string[i:(i + n_chars)] for i in
range(len(phrase_string) - n_chars + 1))
if not ngrams or not occurrence:
continue
if remove_duplicates:
outfile.write("{}\t{}\n".format("_".join(set(ngrams)), occurrence))
else:
outfile.write("{}\t{}\n".format("_".join(ngrams), occurrence))


def process_occurrences(occurrences, outfile, *,
cue_structure="trigrams_to_word", remove_duplicates=True):
"""
Expand All @@ -92,43 +124,17 @@ def process_occurrences(occurrences, outfile, *,
"""
if cue_structure == "bigrams_to_word":
for cues, outcomes in occurrences:
if cues and outcomes:
occurrence = cues + '_' + outcomes
else: # take either
occurrence = cues + outcomes
phrase_string = "#" + re.sub("_", "#", occurrence) + "#"
bigrams = (phrase_string[i:(i + 2)] for i in
range(len(phrase_string) - 2 + 1))
if not bigrams or not occurrence:
continue
if remove_duplicates:
outfile.write("_".join(set(bigrams)) + "\t" + occurrence + "\n")
else:
outfile.write("_".join(bigrams) + "\t" + occurrence + "\n")
ngrams_to_word(occurrences, 2, outfile, remove_duplicates=remove_duplicates)
elif cue_structure == "trigrams_to_word":
for cues, outcomes in occurrences:
if cues and outcomes:
occurrence = cues + '_' + outcomes
else: # take either
occurrence = cues + outcomes
phrase_string = "#" + re.sub("_", "#", occurrence) + "#"
trigrams = (phrase_string[i:(i + 3)] for i in
range(len(phrase_string) - 3 + 1))
if not trigrams or not occurrence:
continue
if remove_duplicates:
outfile.write("_".join(set(trigrams)) + "\t" + occurrence + "\n")
else:
outfile.write("_".join(trigrams) + "\t" + occurrence + "\n")
ngrams_to_word(occurrences, 3, outfile, remove_duplicates=remove_duplicates)
elif cue_structure == "word_to_word":
for cues, outcomes in occurrences:
if not cues:
continue
if remove_duplicates:
outfile.write("_".join(set(cues.split("_"))) + "\t" + outcomes + "\n")
outfile.write("{}\t{}\n".format("_".join(set(cues.split("_"))), outcomes))
else:
outfile.write(cues + "\t" + outcomes + "\n")
outfile.write("{}\t{}\n".format(cues, outcomes))
else:
raise NotImplementedError('cue_structure=%s is not implemented yet.' % cue_structure)

Expand Down Expand Up @@ -761,9 +767,9 @@ def create_binary_event_files(event_file,

def _error_callback(error):
if isinstance(error, StopIteration):
msg, result = error.value
_, result = error.value
nonlocal number_events
number_events += result
number_events += result # pylint: disable=undefined-variable
pool.close()
else:
raise error
Expand Down
4 changes: 3 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@

'''
Configuration for py.test-3.
'''


def pytest_addoption(parser):
"""
adds custom option to the pytest parser
"""
parser.addoption("--runslow", action="store_true",
help="run slow tests")
22 changes: 11 additions & 11 deletions tests/test_activation.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from pyndl import ndl
from pyndl.activation import activation

slow = pytest.mark.skipif(not pytest.config.getoption("--runslow"),
slow = pytest.mark.skipif(not pytest.config.getoption("--runslow"), # pylint: disable=invalid-name
reason="need --runslow option to run")

TEST_ROOT = os.path.join(os.path.pardir, os.path.dirname(__file__))
Expand All @@ -29,8 +29,8 @@

def test_exceptions():
with pytest.raises(ValueError) as e_info:
wm = ndl.dict_ndl(FILE_PATH_SIMPLE, ALPHA, BETAS, remove_duplicates=None)
activation(FILE_PATH_MULTIPLE_CUES, wm)
weights = ndl.dict_ndl(FILE_PATH_SIMPLE, ALPHA, BETAS, remove_duplicates=None)
activation(FILE_PATH_MULTIPLE_CUES, weights)
assert e_info == 'cues or outcomes needs to be unique: cues "a a"; outcomes "A"; use remove_duplicates=True'

with pytest.raises(ValueError) as e_info:
Expand Down Expand Up @@ -149,22 +149,22 @@ def test_activation_matrix_large():
print("")
print("Start setup...")

def time_test(func, of=""):
def time_test(func, of=""): # pylint: disable=invalid-name
def dec_func(*args, **kwargs):
print("start test '{}'".format(of))
st = time.clock()
start = time.clock()
res = func(*args, **kwargs)
et = time.clock()
end = time.clock()
print("finished test '{}'".format(of))
print(" duration: {:.3f}s".format(et-st))
print(" duration: {:.3f}s".format(end - start))
print("")
return res
return dec_func

n = 2000
n_cues = 10*n
n_outcomes = n
n_events = 10*n
nn = 2000
n_cues = 10*nn
n_outcomes = nn
n_events = 10*nn
n_cues_per_event = 30
weight_mat = np.random.rand(n_cues, n_outcomes)
cues = ['c'+str(i) for i in range(n_cues)]
Expand Down
2 changes: 1 addition & 1 deletion tests/test_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def test_words_symbols():

def test_save_load():
file_name = os.path.join(TEST_ROOT, "temp/cues.tab")
n_events, cues, outcomes = count.cues_outcomes(EVENT_RESOURCE_FILE)
_, cues, _ = count.cues_outcomes(EVENT_RESOURCE_FILE)
count.save_counter(cues, file_name)
cues_loaded = count.load_counter(file_name)
assert cues == cues_loaded
Expand Down
Loading

0 comments on commit 9b47606

Please sign in to comment.