Merge pull request #87 from quantling/activation-checks

make activation consistent
quantling · Apr 19, 2017 · cffe67f · cffe67f
2 parents 97efb77 + acd6b93
commit cffe67f
Show file tree

Hide file tree

Showing 10 changed files with 61 additions and 42 deletions.
diff --git a/pyndl/__init__.py b/pyndl/__init__.py
@@ -1,7 +1,7 @@
 __author__ = ('David-Elias Künstle, Lennard Schneider, '
               'Konstantin Sering, Marc Weitz')
 __author_email__ = '[email protected]'
-__version__ = '0.2.6'
+__version__ = '0.2.7'
 __license__ = 'MIT'
 __description__ = ('Naive discriminative learning implements learning and '
                    'classification models based on the Rescorla-Wagner '
@@ -28,6 +28,6 @@
 :version: %s
 :author: %s
 :contact: %s
-:date: 2017-04-11
+:date: 2017-04-18
 :copyright: %s
 """ % (__description__, __version__, __author__, __author_email__, __license__)
diff --git a/pyndl/activation.py b/pyndl/activation.py
@@ -12,7 +12,7 @@
 
 def activation(event_list, weights, number_of_threads=1, remove_duplicates=None, ignore_missing_cues=False):
     """
-    Estimate activations for given events in event file and cue-outcome weights.
+    Estimate activations for given events in event file and outcome-cue weights.
 
     Memory overhead for multiprocessing is one copy of weights
     plus a copy of cues for each thread.
@@ -22,7 +22,7 @@ def activation(event_list, weights, number_of_threads=1, remove_duplicates=None,
     event_list : generator or str
         generates cues, outcomes pairs or the path to the event file
     weights : xarray.DataArray or dict[dict[float]]
-        the xarray.DataArray needs to have the dimensions 'cues' and 'outcomes'
+        the xarray.DataArray needs to have the dimensions 'outcomes' and 'cues'
         the dictionaries hold weight[outcome][cue].
     number_of_threads : int
         a integer giving the number of threads in which the job should
@@ -40,7 +40,7 @@ def activation(event_list, weights, number_of_threads=1, remove_duplicates=None,
     Returns
     -------
     activations : xarray.DataArray
-        with dimensions 'events' and 'outcomes'. Contains coords for the outcomes.
+        with dimensions 'outcomes' and 'events'. Contains coords for the outcomes.
         returned if weights is instance of xarray.DataArray
 
     or
@@ -70,6 +70,8 @@ def enforce_no_duplicates(cues):
     if isinstance(weights, xr.DataArray):
         cues = weights.coords["cues"].values.tolist()
         outcomes = weights.coords["outcomes"].values.tolist()
+        if not weights.values.shape == (len(outcomes), len(cues)):
+            raise ValueError('dimensions of weights are wrong. Probably you need to transpose the matrix')
         cue_map = OrderedDict(((cue, ii) for ii, cue in enumerate(cues)))
         if ignore_missing_cues:
             event_cue_indices_list = (tuple(cue_map[cue] for cue in event_cues if cue in cues)
@@ -82,7 +84,7 @@ def enforce_no_duplicates(cues):
                             coords={
                                 'outcomes': outcomes
                             },
-                            dims=('events', 'outcomes'))
+                            dims=('outcomes', 'events'))
     elif isinstance(weights, dict):
         assert number_of_threads == 1, "Estimating activations with multiprocessing is not implemented for dicts."
         activations = defaultdict(lambda: np.zeros(len(event_cues_list)))
@@ -116,7 +118,7 @@ def _run_mp_activation_matrix(event_index, cue_indices):
     Calculate activation for all outcomes while a event.
 
     """
-    activations[event_index, :] = weights[cue_indices, :].sum(axis=0)
+    activations[:, event_index] = weights[:, cue_indices].sum(axis=1)
 
 
 def _activation_matrix(indices_list, weights, number_of_threads):
@@ -128,25 +130,25 @@ def _activation_matrix(indices_list, weights, number_of_threads):
 
     Parameters
     ----------
-    indices_list : list with iteratables containing the indices of the cues in weight matrix.
-    weights : Weight matrix as 2d numpy.array with shape (cues, weights)
+    indices_list : list[int]
+        events as cue indices in weights
+    weights : numpy.array
+        weight matrix with shape (outcomes, cues)
     number_of_threads : int
-        a integer giving the number of threads in which the job should
-        executed
 
     Returns
     -------
-    activation_matrix : 2d numpy.array
-        activations for the events and all outcomes in the weights and
+    activation_matrix : numpy.array
+        estimated activations as matrix with shape (outcomes, events)
 
     """
     assert number_of_threads >= 1, "Can't run with less than 1 thread"
 
-    activations_dim = (len(indices_list), weights.shape[1])
+    activations_dim = (weights.shape[0], len(indices_list))
     if number_of_threads == 1:
         activations = np.empty(activations_dim, dtype=np.float64)
         for row, event_cues in enumerate(indices_list):
-            activations[row, :] = weights[event_cues, :].sum(axis=0)
+            activations[:, row] = weights[:, event_cues].sum(axis=1)
         return activations
     else:
         shared_activations = mp.RawArray(ctypes.c_double, int(np.prod(activations_dim)))

diff --git a/pyndl/ndl.py b/pyndl/ndl.py
@@ -82,7 +82,7 @@ def ndl(event_path, alpha, betas, lambda_=1.0, *,
     Returns
     -------
     weights : xarray.DataArray
-        with dimensions 'cues' and 'outcomes'. You can lookup the weights
+        with dimensions 'outcomes' and 'cues'. You can lookup the weights
         between a cue and an outcome with ``weights.loc[{'outcomes': outcome,
         'cues': cue}]`` or ``weights.loc[outcome].loc[cue]``.
 
@@ -326,7 +326,7 @@ def dict_ndl(event_list, alphas, betas, lambda_=1.0, *,
     or
 
     weights : xarray.DataArray
-        with dimensions 'cues' and 'outcomes'. You can lookup the weights
+        with dimensions 'outcomes' and 'cues'. You can lookup the weights
         between a cue and an outcome with ``weights.loc[{'outcomes': outcome,
         'cues': cue}]`` or ``weights.loc[outcome].loc[cue]``.
 
@@ -409,10 +409,22 @@ def dict_ndl(event_list, alphas, betas, lambda_=1.0, *,
                         __name__ + "." + dict_ndl.__name__, attrs=attrs_to_update)
 
     if make_data_array:
-        # post-processing
-        weights = pd.DataFrame(weights)
-        # weights.fillna(0.0, inplace=True)  # TODO make sure to not remove real NaNs
-        weights = xr.DataArray(weights.T, dims=('outcomes', 'cues'), attrs=attrs)
+        outcomes = list(weights.keys())
+        cues = set()
+        for outcome in outcomes:
+            cues.update(set(weights[outcome].keys()))
+
+        cues = list(cues)
+
+        weights_dict = weights
+        shape = (len(outcomes), len(cues))
+        weights = xr.DataArray(np.zeros(shape), attrs=attrs,
+                               coords={'outcomes': outcomes, 'cues': cues},
+                               dims=('outcomes', 'cues'))
+
+        for outcome in outcomes:
+            for cue in cues:
+                weights.loc[{"outcomes": outcome, "cues": cue}] = weights_dict[outcome][cue]
     else:
         weights.attrs = attrs
 

diff --git a/tests/reference/weights_event_file_multiple_cues_ndl2.csv b/tests/reference/weights_event_file_multiple_cues_ndl2.csv
@@ -1,4 +1,4 @@
-"","A","B","C"
-"a",0.102295856023193,-0.000637584443199,0.00839070911745
-"b",0.033396811752139,0.0566582680337533,0.0060580709494085
-"c",-0.00220704388828405,0.0376010015902643,0.0471512345263585
+"","A","B","C","D"
+"a",0.0922958560231934,-0.000637584443199,0.00839070911745,0.01
+"b",0.033396811752139,0.0566582680337533,0.0060580709494085,0
+"c",-0.00220704388828405,0.0376010015902643,0.0471512345263585,0
diff --git a/tests/reference/weights_event_file_simple.csv b/tests/reference/weights_event_file_simple.csv
@@ -1,4 +1,4 @@
-"","A","B","C"
-"a",0.0764228958019371,-0.0004679542492416,0.00913237656408
-"b",0.0369453054439119,0.0383650579363088,0.0076940833315796
-"c",-0.00148720933211834,0.0382611772343088,0.0472921134315796
+"","A","B","C","D"
+"a",0.0664228958019371,-0.0004679542492416,0.00913237656408,0.01
+"b",0.0369453054439119,0.0383650579363088,0.0076940833315796,0
+"c",-0.00148720933211834,0.0382611772343088,0.0472921134315796,0
diff --git a/tests/reference/weights_event_file_simple_ndl2.csv b/tests/reference/weights_event_file_simple_ndl2.csv
@@ -1,4 +1,4 @@
-"","A","B","C"
-"a",0.0764228958019371,-0.0004679542492416,0.00913237656408
-"b",0.0369453054439119,0.0383650579363088,0.0076940833315796
-"c",-0.00148720933211834,0.0382611772343088,0.0472921134315796
+"","A","B","C","D"
+"a",0.0664228958019371,-0.0004679542492416,0.00913237656408,0.01
+"b",0.0369453054439119,0.0383650579363088,0.0076940833315796,0
+"c",-0.00148720933211834,0.0382611772343088,0.0472921134315796,0
diff --git a/tests/resources/event_file_multiple_cues.tab b/tests/resources/event_file_multiple_cues.tab
@@ -15,4 +15,4 @@ b_c_b	B
 b_c	B
 a	A
 a	A
-a	A
+a	D
diff --git a/tests/resources/event_file_simple.tab b/tests/resources/event_file_simple.tab
@@ -15,4 +15,4 @@ b_c	B
 b_c	B
 a	A
 a	A
-a	A
+a	D
diff --git a/tests/test_activation.py b/tests/test_activation.py
@@ -35,16 +35,18 @@ def test_exceptions():
 
 
 def test_activation_matrix():
-    weights = xr.DataArray(np.array([[0, 1], [1, 0], [0, 0]]),
+    weights = xr.DataArray(np.array([[0, 1, 0], [1, 0, 0]]),
                            coords={
+                               'outcomes': ['o1', 'o2'],
                                'cues': ['c1', 'c2', 'c3']
                            },
-                           dims=('cues', 'outcomes'))
+                           dims=('outcomes', 'cues'))
+
     events = [(['c1', 'c2', 'c3'], []),
               (['c1', 'c3'], []),
               (['c2'], []),
               (['c1', 'c1'], [])]
-    reference_activations = np.array([[1, 1], [0, 1], [1, 0], [0, 1]])
+    reference_activations = np.array([[1, 0, 1, 0], [1, 1, 0, 1]])
 
     with pytest.raises(ValueError):
         activations = activation(events, weights, number_of_threads=1)
@@ -57,16 +59,18 @@ def test_activation_matrix():
 
 
 def test_ignore_missing_cues():
-    weights = xr.DataArray(np.array([[0, 1], [1, 0], [0, 0]]),
+    weights = xr.DataArray(np.array([[0, 1, 0], [1, 0, 0]]),
                            coords={
+                               'outcomes': ['o1', 'o2'],
                                'cues': ['c1', 'c2', 'c3']
                            },
-                           dims=('cues', 'outcomes'))
+                           dims=('outcomes', 'cues'))
+
     events = [(['c1', 'c2', 'c3'], []),
               (['c1', 'c3'], []),
               (['c2', 'c4'], []),
               (['c1', 'c1'], [])]
-    reference_activations = np.array([[1, 1], [0, 1], [1, 0], [0, 1]])
+    reference_activations = np.array([[1, 0, 1, 0], [1, 1, 0, 1]])
 
     with pytest.raises(KeyError):
         activations = activation(events, weights, number_of_threads=1,

diff --git a/tests/test_ndl.py b/tests/test_ndl.py
@@ -166,6 +166,7 @@ def test_continue_learning_dict_ndl_data_array(result_dict_ndl, result_dict_ndl_
     unequal, unequal_ratio = compare_arrays(FILE_PATH_SIMPLE,
                                             continue_from_dict,
                                             continue_from_data_array)
+    print(continue_from_data_array)
     print('%.2f ratio unequal' % unequal_ratio)
     assert len(unequal) == 0
 
@@ -438,7 +439,7 @@ def compare_arrays(file_path, arr1, arr2):
                     cue_index = cue_map[cue]
                     values.append(array[outcome_index][cue_index])
                 elif isinstance(array, xr.DataArray):
-                    values.append(array.loc[{'outcomes': outcome, 'cues': cue}])
+                    values.append(array.loc[{'outcomes': outcome, 'cues': cue}].values)
                 elif isinstance(array, pd.DataFrame):
                     values.append(array.loc[outcome][cue])
                 else: