Skip to content

Commit

Permalink
Merge pull request #87 from quantling/activation-checks
Browse files Browse the repository at this point in the history
make activation consistent
  • Loading branch information
Trybnetic authored Apr 19, 2017
2 parents 97efb77 + acd6b93 commit cffe67f
Show file tree
Hide file tree
Showing 10 changed files with 61 additions and 42 deletions.
4 changes: 2 additions & 2 deletions pyndl/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
__author__ = ('David-Elias Künstle, Lennard Schneider, '
'Konstantin Sering, Marc Weitz')
__author_email__ = '[email protected]'
__version__ = '0.2.6'
__version__ = '0.2.7'
__license__ = 'MIT'
__description__ = ('Naive discriminative learning implements learning and '
'classification models based on the Rescorla-Wagner '
Expand All @@ -28,6 +28,6 @@
:version: %s
:author: %s
:contact: %s
:date: 2017-04-11
:date: 2017-04-18
:copyright: %s
""" % (__description__, __version__, __author__, __author_email__, __license__)
28 changes: 15 additions & 13 deletions pyndl/activation.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

def activation(event_list, weights, number_of_threads=1, remove_duplicates=None, ignore_missing_cues=False):
"""
Estimate activations for given events in event file and cue-outcome weights.
Estimate activations for given events in event file and outcome-cue weights.
Memory overhead for multiprocessing is one copy of weights
plus a copy of cues for each thread.
Expand All @@ -22,7 +22,7 @@ def activation(event_list, weights, number_of_threads=1, remove_duplicates=None,
event_list : generator or str
generates cues, outcomes pairs or the path to the event file
weights : xarray.DataArray or dict[dict[float]]
the xarray.DataArray needs to have the dimensions 'cues' and 'outcomes'
the xarray.DataArray needs to have the dimensions 'outcomes' and 'cues'
the dictionaries hold weight[outcome][cue].
number_of_threads : int
a integer giving the number of threads in which the job should
Expand All @@ -40,7 +40,7 @@ def activation(event_list, weights, number_of_threads=1, remove_duplicates=None,
Returns
-------
activations : xarray.DataArray
with dimensions 'events' and 'outcomes'. Contains coords for the outcomes.
with dimensions 'outcomes' and 'events'. Contains coords for the outcomes.
returned if weights is instance of xarray.DataArray
or
Expand Down Expand Up @@ -70,6 +70,8 @@ def enforce_no_duplicates(cues):
if isinstance(weights, xr.DataArray):
cues = weights.coords["cues"].values.tolist()
outcomes = weights.coords["outcomes"].values.tolist()
if not weights.values.shape == (len(outcomes), len(cues)):
raise ValueError('dimensions of weights are wrong. Probably you need to transpose the matrix')
cue_map = OrderedDict(((cue, ii) for ii, cue in enumerate(cues)))
if ignore_missing_cues:
event_cue_indices_list = (tuple(cue_map[cue] for cue in event_cues if cue in cues)
Expand All @@ -82,7 +84,7 @@ def enforce_no_duplicates(cues):
coords={
'outcomes': outcomes
},
dims=('events', 'outcomes'))
dims=('outcomes', 'events'))
elif isinstance(weights, dict):
assert number_of_threads == 1, "Estimating activations with multiprocessing is not implemented for dicts."
activations = defaultdict(lambda: np.zeros(len(event_cues_list)))
Expand Down Expand Up @@ -116,7 +118,7 @@ def _run_mp_activation_matrix(event_index, cue_indices):
Calculate activation for all outcomes while a event.
"""
activations[event_index, :] = weights[cue_indices, :].sum(axis=0)
activations[:, event_index] = weights[:, cue_indices].sum(axis=1)


def _activation_matrix(indices_list, weights, number_of_threads):
Expand All @@ -128,25 +130,25 @@ def _activation_matrix(indices_list, weights, number_of_threads):
Parameters
----------
indices_list : list with iteratables containing the indices of the cues in weight matrix.
weights : Weight matrix as 2d numpy.array with shape (cues, weights)
indices_list : list[int]
events as cue indices in weights
weights : numpy.array
weight matrix with shape (outcomes, cues)
number_of_threads : int
a integer giving the number of threads in which the job should
executed
Returns
-------
activation_matrix : 2d numpy.array
activations for the events and all outcomes in the weights and
activation_matrix : numpy.array
estimated activations as matrix with shape (outcomes, events)
"""
assert number_of_threads >= 1, "Can't run with less than 1 thread"

activations_dim = (len(indices_list), weights.shape[1])
activations_dim = (weights.shape[0], len(indices_list))
if number_of_threads == 1:
activations = np.empty(activations_dim, dtype=np.float64)
for row, event_cues in enumerate(indices_list):
activations[row, :] = weights[event_cues, :].sum(axis=0)
activations[:, row] = weights[:, event_cues].sum(axis=1)
return activations
else:
shared_activations = mp.RawArray(ctypes.c_double, int(np.prod(activations_dim)))
Expand Down
24 changes: 18 additions & 6 deletions pyndl/ndl.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def ndl(event_path, alpha, betas, lambda_=1.0, *,
Returns
-------
weights : xarray.DataArray
with dimensions 'cues' and 'outcomes'. You can lookup the weights
with dimensions 'outcomes' and 'cues'. You can lookup the weights
between a cue and an outcome with ``weights.loc[{'outcomes': outcome,
'cues': cue}]`` or ``weights.loc[outcome].loc[cue]``.
Expand Down Expand Up @@ -326,7 +326,7 @@ def dict_ndl(event_list, alphas, betas, lambda_=1.0, *,
or
weights : xarray.DataArray
with dimensions 'cues' and 'outcomes'. You can lookup the weights
with dimensions 'outcomes' and 'cues'. You can lookup the weights
between a cue and an outcome with ``weights.loc[{'outcomes': outcome,
'cues': cue}]`` or ``weights.loc[outcome].loc[cue]``.
Expand Down Expand Up @@ -409,10 +409,22 @@ def dict_ndl(event_list, alphas, betas, lambda_=1.0, *,
__name__ + "." + dict_ndl.__name__, attrs=attrs_to_update)

if make_data_array:
# post-processing
weights = pd.DataFrame(weights)
# weights.fillna(0.0, inplace=True) # TODO make sure to not remove real NaNs
weights = xr.DataArray(weights.T, dims=('outcomes', 'cues'), attrs=attrs)
outcomes = list(weights.keys())
cues = set()
for outcome in outcomes:
cues.update(set(weights[outcome].keys()))

cues = list(cues)

weights_dict = weights
shape = (len(outcomes), len(cues))
weights = xr.DataArray(np.zeros(shape), attrs=attrs,
coords={'outcomes': outcomes, 'cues': cues},
dims=('outcomes', 'cues'))

for outcome in outcomes:
for cue in cues:
weights.loc[{"outcomes": outcome, "cues": cue}] = weights_dict[outcome][cue]
else:
weights.attrs = attrs

Expand Down
8 changes: 4 additions & 4 deletions tests/reference/weights_event_file_multiple_cues_ndl2.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"","A","B","C"
"a",0.102295856023193,-0.000637584443199,0.00839070911745
"b",0.033396811752139,0.0566582680337533,0.0060580709494085
"c",-0.00220704388828405,0.0376010015902643,0.0471512345263585
"","A","B","C","D"
"a",0.0922958560231934,-0.000637584443199,0.00839070911745,0.01
"b",0.033396811752139,0.0566582680337533,0.0060580709494085,0
"c",-0.00220704388828405,0.0376010015902643,0.0471512345263585,0
8 changes: 4 additions & 4 deletions tests/reference/weights_event_file_simple.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"","A","B","C"
"a",0.0764228958019371,-0.0004679542492416,0.00913237656408
"b",0.0369453054439119,0.0383650579363088,0.0076940833315796
"c",-0.00148720933211834,0.0382611772343088,0.0472921134315796
"","A","B","C","D"
"a",0.0664228958019371,-0.0004679542492416,0.00913237656408,0.01
"b",0.0369453054439119,0.0383650579363088,0.0076940833315796,0
"c",-0.00148720933211834,0.0382611772343088,0.0472921134315796,0
8 changes: 4 additions & 4 deletions tests/reference/weights_event_file_simple_ndl2.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"","A","B","C"
"a",0.0764228958019371,-0.0004679542492416,0.00913237656408
"b",0.0369453054439119,0.0383650579363088,0.0076940833315796
"c",-0.00148720933211834,0.0382611772343088,0.0472921134315796
"","A","B","C","D"
"a",0.0664228958019371,-0.0004679542492416,0.00913237656408,0.01
"b",0.0369453054439119,0.0383650579363088,0.0076940833315796,0
"c",-0.00148720933211834,0.0382611772343088,0.0472921134315796,0
2 changes: 1 addition & 1 deletion tests/resources/event_file_multiple_cues.tab
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ b_c_b B
b_c B
a A
a A
a A
a D
2 changes: 1 addition & 1 deletion tests/resources/event_file_simple.tab
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ b_c B
b_c B
a A
a A
a A
a D
16 changes: 10 additions & 6 deletions tests/test_activation.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,18 @@ def test_exceptions():


def test_activation_matrix():
weights = xr.DataArray(np.array([[0, 1], [1, 0], [0, 0]]),
weights = xr.DataArray(np.array([[0, 1, 0], [1, 0, 0]]),
coords={
'outcomes': ['o1', 'o2'],
'cues': ['c1', 'c2', 'c3']
},
dims=('cues', 'outcomes'))
dims=('outcomes', 'cues'))

events = [(['c1', 'c2', 'c3'], []),
(['c1', 'c3'], []),
(['c2'], []),
(['c1', 'c1'], [])]
reference_activations = np.array([[1, 1], [0, 1], [1, 0], [0, 1]])
reference_activations = np.array([[1, 0, 1, 0], [1, 1, 0, 1]])

with pytest.raises(ValueError):
activations = activation(events, weights, number_of_threads=1)
Expand All @@ -57,16 +59,18 @@ def test_activation_matrix():


def test_ignore_missing_cues():
weights = xr.DataArray(np.array([[0, 1], [1, 0], [0, 0]]),
weights = xr.DataArray(np.array([[0, 1, 0], [1, 0, 0]]),
coords={
'outcomes': ['o1', 'o2'],
'cues': ['c1', 'c2', 'c3']
},
dims=('cues', 'outcomes'))
dims=('outcomes', 'cues'))

events = [(['c1', 'c2', 'c3'], []),
(['c1', 'c3'], []),
(['c2', 'c4'], []),
(['c1', 'c1'], [])]
reference_activations = np.array([[1, 1], [0, 1], [1, 0], [0, 1]])
reference_activations = np.array([[1, 0, 1, 0], [1, 1, 0, 1]])

with pytest.raises(KeyError):
activations = activation(events, weights, number_of_threads=1,
Expand Down
3 changes: 2 additions & 1 deletion tests/test_ndl.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ def test_continue_learning_dict_ndl_data_array(result_dict_ndl, result_dict_ndl_
unequal, unequal_ratio = compare_arrays(FILE_PATH_SIMPLE,
continue_from_dict,
continue_from_data_array)
print(continue_from_data_array)
print('%.2f ratio unequal' % unequal_ratio)
assert len(unequal) == 0

Expand Down Expand Up @@ -438,7 +439,7 @@ def compare_arrays(file_path, arr1, arr2):
cue_index = cue_map[cue]
values.append(array[outcome_index][cue_index])
elif isinstance(array, xr.DataArray):
values.append(array.loc[{'outcomes': outcome, 'cues': cue}])
values.append(array.loc[{'outcomes': outcome, 'cues': cue}].values)
elif isinstance(array, pd.DataFrame):
values.append(array.loc[outcome][cue])
else:
Expand Down

0 comments on commit cffe67f

Please sign in to comment.