diff --git a/doc/source/examples.rst b/doc/source/examples.rst
index fa40baa..9bd12a6 100644
--- a/doc/source/examples.rst
+++ b/doc/source/examples.rst
@@ -109,7 +109,7 @@ former one using `openMP <http://www.openmp.org/>`_ and therefore being expected
 to be much faster when analyzing larger data. Besides, you can set three
 technical arguments which we will not change here:
 
-1. ``number_of_threads`` (int) giving the number of threads in which the job
+1. ``n_jobs`` (int) giving the number of threads in which the job
    should be executed (default=2)
 2. ``sequence`` (int) giving the length of sublists generated from all outcomes
    (default=10)
diff --git a/pyndl/__init__.py b/pyndl/__init__.py
index b3f6594..42f28d6 100644
--- a/pyndl/__init__.py
+++ b/pyndl/__init__.py
@@ -11,20 +11,27 @@
 import os
 import sys
 import multiprocessing as mp
-from pip._vendor import pkg_resources
+try:
+    from importlib.metadata import requires
+except ModuleNotFoundError:  # python 3.7 and before
+    requires = None
+try:
+    from packaging.requirements import Requirement
+except ModuleNotFoundError:  # this should only happend during setup phase
+    Requirement = None
 
 
 __author__ = ('Konstantin Sering, Marc Weitz, '
               'David-Elias Künstle, Lennard Schneider, '
               'Elnaz Shafaei-Bajestan')
 __author_email__ = 'konstantin.sering@uni-tuebingen.de'
-__version__ = '0.8.2'
+__version__ = '0.8.1'
 __license__ = 'MIT'
 __description__ = ('Naive discriminative learning implements learning and '
                    'classification models based on the Rescorla-Wagner '
                    'equations.')
 __classifiers__ = [
-    'Development Status :: 3 - Alpha',
+    'Development Status :: 4 - Beta',
     'Environment :: Console',
     'Intended Audience :: Science/Research',
     'License :: OSI Approved :: MIT License',
@@ -45,8 +52,9 @@ def sysinfo():
     """
     Prints system the dependency information
     """
-    pyndl = pkg_resources.working_set.by_key["pyndl"]
-    dependencies = [r.project_name for r in pyndl.requires()]
+    if requires:
+        dependencies = [Requirement(req).name for req in requires('pyndl')
+                        if not Requirement(req).marker]
 
     header = ("Pyndl Information\n"
               "=================\n\n")
@@ -78,7 +86,10 @@ def sysinfo():
     deps = ("Dependencies\n"
             "------------\n")
 
-    deps += "\n".join("{pkg.__name__}: {pkg.__version__}".format(pkg=__import__(dep))
-                      for dep in dependencies)
+    if requires:
+        deps += "\n".join("{pkg.__name__}: {pkg.__version__}".format(pkg=__import__(dep))
+                          for dep in dependencies)
+    else:
+        deps = 'You need Python 3.8 or higher to show dependencies.'
 
     print(header + general + osinfo + deps)
diff --git a/pyndl/activation.py b/pyndl/activation.py
index 5f0b52f..e546be0 100644
--- a/pyndl/activation.py
+++ b/pyndl/activation.py
@@ -9,6 +9,7 @@
 import multiprocessing as mp
 import ctypes
 from collections import defaultdict, OrderedDict
+import warnings
 
 import numpy as np
 import xarray as xr
@@ -17,7 +18,8 @@
 
 
 # pylint: disable=W0621
-def activation(events, weights, number_of_threads=1, remove_duplicates=None, ignore_missing_cues=False):
+def activation(events, weights, *, n_jobs=1, number_of_threads=None,
+               remove_duplicates=None, ignore_missing_cues=False):
     """
     Estimate activations for given events in event file and outcome-cue weights.
 
@@ -31,7 +33,7 @@ def activation(events, weights, number_of_threads=1, remove_duplicates=None, ign
     weights : xarray.DataArray or dict[dict[float]]
         the xarray.DataArray needs to have the dimensions 'outcomes' and 'cues'
         the dictionaries hold weight[outcome][cue].
-    number_of_threads : int
+    n_jobs : int
         a integer giving the number of threads in which the job should
         executed
     remove_duplicates : {None, True, False}
@@ -58,6 +60,11 @@ def activation(events, weights, number_of_threads=1, remove_duplicates=None, ign
         returned if weights is instance of dict
 
     """
+    if number_of_threads is not None:
+        warnings.warn("Parameter `number_of_threads` is renamed to `n_jobs`. The old name "
+                      "will stop working with v0.9.0.",
+                      DeprecationWarning, stacklevel=2)
+        n_jobs = number_of_threads
     if isinstance(events, str):
         events = io.events_from_file(events)
 
@@ -87,14 +94,14 @@ def check_no_duplicates(cues):
                                       for event_cues in events)
         # pylint: disable=W0621
         activations = _activation_matrix(list(event_cue_indices_list),
-                                         weights.values, number_of_threads)
+                                         weights.values, n_jobs)
         return xr.DataArray(activations,
                             coords={
                                 'outcomes': outcomes
                             },
                             dims=('outcomes', 'events'))
     elif isinstance(weights, dict):
-        assert number_of_threads == 1, "Estimating activations with multiprocessing is not implemented for dicts."
+        assert n_jobs == 1, "Estimating activations with multiprocessing is not implemented for dicts."
         activations = defaultdict(lambda: np.zeros(len(events)))
         events = list(events)
         for outcome, cue_dict in weights.items():
@@ -130,7 +137,7 @@ def _run_mp_activation_matrix(event_index, cue_indices):
     activations[:, event_index] = weights[:, cue_indices].sum(axis=1)
 
 
-def _activation_matrix(indices_list, weights, number_of_threads):
+def _activation_matrix(indices_list, weights, n_jobs):
     """
     Estimate activation for indices in weights
 
@@ -143,7 +150,7 @@ def _activation_matrix(indices_list, weights, number_of_threads):
         events as cue indices in weights
     weights : numpy.array
         weight matrix with shape (outcomes, cues)
-    number_of_threads : int
+    n_jobs : int
 
     Returns
     -------
@@ -151,10 +158,10 @@ def _activation_matrix(indices_list, weights, number_of_threads):
         estimated activations as matrix with shape (outcomes, events)
 
     """
-    assert number_of_threads >= 1, "Can't run with less than 1 thread"
+    assert n_jobs >= 1, "Can't run with less than 1 thread"
 
     activations_dim = (weights.shape[0], len(indices_list))
-    if number_of_threads == 1:
+    if n_jobs == 1:
         activations = np.empty(activations_dim, dtype=np.float64)
         for row, event_cues in enumerate(indices_list):
             activations[:, row] = weights[:, event_cues].sum(axis=1)
@@ -164,7 +171,7 @@ def _activation_matrix(indices_list, weights, number_of_threads):
         weights = np.ascontiguousarray(weights)
         shared_weights = mp.sharedctypes.copy(np.ctypeslib.as_ctypes(np.float64(weights)))
         initargs = (shared_weights, weights.shape, shared_activations, activations_dim)
-        with mp.Pool(number_of_threads, initializer=_init_mp_activation_matrix, initargs=initargs) as pool:
+        with mp.Pool(n_jobs, initializer=_init_mp_activation_matrix, initargs=initargs) as pool:
             pool.starmap(_run_mp_activation_matrix, enumerate(indices_list))
         activations = np.ctypeslib.as_array(shared_activations)
         activations.shape = activations_dim
diff --git a/pyndl/count.py b/pyndl/count.py
index 58b21d0..5e45b8e 100644
--- a/pyndl/count.py
+++ b/pyndl/count.py
@@ -10,11 +10,16 @@
 """
 # pylint: disable=redefined-outer-name, invalid-name
 
-from collections import Counter
+from collections import Counter, namedtuple
 import gzip
 import itertools
 import multiprocessing
 import sys
+import warnings
+
+
+CuesOutcomes = namedtuple('CuesOutcomes', 'n_events, cues, outcomes')
+WordsSymbols = namedtuple('WordsSymbols', 'words, symbols')
 
 
 def _job_cues_outcomes(event_file_name, start, step, verbose=False):
@@ -46,9 +51,9 @@ def _job_cues_outcomes(event_file_name, start, step, verbose=False):
 
 
 def cues_outcomes(event_file_name,
-                  *, number_of_processes=2, verbose=False):
+                  *, n_jobs=2, number_of_processes=None, verbose=False):
     """
-    Counts cues and outcomes in event_file_name using number_of_processes
+    Counts cues and outcomes in event_file_name using n_jobs
     processes.
 
     Returns
@@ -56,14 +61,19 @@ def cues_outcomes(event_file_name,
     (n_events, cues, outcomes) : (int, collections.Counter, collections.Counter)
 
     """
-    with multiprocessing.Pool(number_of_processes) as pool:
-        step = number_of_processes
+    if number_of_processes is not None:
+        warnings.warn("Parameter `number_of_processes` is renamed to `n_jobs`. The old name "
+                      "will stop working with v0.9.0.",
+                      DeprecationWarning, stacklevel=2)
+        n_jobs = number_of_processes
+    with multiprocessing.Pool(n_jobs) as pool:
+        step = n_jobs
         results = pool.starmap(_job_cues_outcomes,
                                ((event_file_name,
                                  start,
                                  step,
                                  verbose)
-                                for start in range(number_of_processes)))
+                                for start in range(n_jobs)))
         n_events = 0
         cues = Counter()
         outcomes = Counter()
@@ -75,7 +85,7 @@ def cues_outcomes(event_file_name,
     if verbose:
         print('\n...counting done.')
 
-    return n_events, cues, outcomes
+    return CuesOutcomes(n_events, cues, outcomes)
 
 
 def _job_words_symbols(corpus_file_name, start, step, lower_case=False,
@@ -117,9 +127,9 @@ def _job_words_symbols(corpus_file_name, start, step, lower_case=False,
 
 
 def words_symbols(corpus_file_name,
-                  *, number_of_processes=2, lower_case=False, verbose=False):
+                  *, n_jobs=2, number_of_processes=None, lower_case=False, verbose=False):
     """
-    Counts words and symbols in corpus_file_name using number_of_processes
+    Counts words and symbols in corpus_file_name using n_jobs
     processes.
 
     Returns
@@ -127,15 +137,20 @@ def words_symbols(corpus_file_name,
     (words, symbols) : (collections.Counter, collections.Counter)
 
     """
-    with multiprocessing.Pool(number_of_processes) as pool:
-        step = number_of_processes
+    if number_of_processes is not None:
+        warnings.warn("Parameter `number_of_processes` is renamed to `n_jobs`. The old name "
+                      "will stop working with v0.9.0.",
+                      DeprecationWarning, stacklevel=2)
+        n_jobs = number_of_processes
+    with multiprocessing.Pool(n_jobs) as pool:
+        step = n_jobs
         results = pool.starmap(_job_words_symbols, ((corpus_file_name,
                                                      start,
                                                      step,
                                                      lower_case,
                                                      verbose)
                                                     for start in
-                                                    range(number_of_processes)))
+                                                    range(n_jobs)))
         words = Counter()
         symbols = Counter()
         for words_process, symbols_process in results:
@@ -145,7 +160,7 @@ def words_symbols(corpus_file_name,
     if verbose:
         print('\n...counting done.')
 
-    return words, symbols
+    return WordsSymbols(words, symbols)
 
 
 def save_counter(counter, filename, *, header='key\tfreq\n'):
diff --git a/pyndl/ndl.py b/pyndl/ndl.py
index f051343..84c16b4 100644
--- a/pyndl/ndl.py
+++ b/pyndl/ndl.py
@@ -16,13 +16,14 @@
 import threading
 import time
 import warnings
+import types
 
 import cython
 import pandas as pd
 import numpy as np
 import xarray as xr
 
-from . import __version__
+from . import __version__ as pyndl_version
 from . import count
 from . import preprocess
 from . import ndl_parallel
@@ -40,12 +41,36 @@
 warnings.simplefilter('always', DeprecationWarning)
 
 
-def events_from_file(event_path):
-    warnings.warn("Usage of pyndl.ndl.events_from_file is depreceated and will "
-                  "be removed in v0.6.0. Please use pyndl.io.events_from_file "
-                  "instead.",
-                  DeprecationWarning, stacklevel=2)
-    return io.events_from_file(event_path)
+class WeightDict(defaultdict):
+    # pylint: disable=missing-docstring
+
+    """
+    Subclass of defaultdict to represent outcome-cue weights.
+
+    Notes
+    -----
+    Weight for each outcome-cue combination is 0 per default.
+
+    """
+
+    # pylint: disable=W0613
+    def __init__(self, *args, **kwargs):
+        super().__init__(lambda: defaultdict(float))
+
+        self._attrs = OrderedDict()
+
+        if 'attrs' in kwargs:
+            self.attrs = kwargs['attrs']
+        else:
+            self.attrs = {}
+
+    @property
+    def attrs(self):
+        return self._attrs
+
+    @attrs.setter
+    def attrs(self, attrs):
+        self._attrs = OrderedDict(attrs)
 
 
 def ndl(events, alpha, betas, lambda_=1.0, *,
@@ -63,8 +88,8 @@ def ndl(events, alpha, betas, lambda_=1.0, *,
 
     Parameters
     ----------
-    events : str
-        path to the event file
+    events : generator or str
+        generates cues, outcomes pairs or the path to the event file
     alpha : float
         saliency of all cues
     betas : (float, float)
@@ -102,6 +127,13 @@ def ndl(events, alpha, betas, lambda_=1.0, *,
 
     """
 
+    # Create temporary file if events is a generator
+    if isinstance(events, types.GeneratorType):
+        file_path = tempfile.NamedTemporaryFile().name
+        io.events_to_file(events, file_path)
+        events = file_path
+        del file_path
+
     if number_of_threads is not None:
         warnings.warn("Parameter `number_of_threads` is renamed to `n_jobs`. The old name "
                       "will stop working with v0.9.0.",
@@ -124,7 +156,7 @@ def ndl(events, alpha, betas, lambda_=1.0, *,
 
     # preprocessing
     n_events, cues, outcomes = count.cues_outcomes(events,
-                                                   number_of_processes=n_jobs,
+                                                   n_jobs=n_jobs,
                                                    verbose=verbose)
     cues = list(cues.keys())
     outcomes = list(outcomes.keys())
@@ -167,12 +199,16 @@ def ndl(events, alpha, betas, lambda_=1.0, *,
     else:
         raise ValueError('weights need to be None or xarray.DataArray with method=%s' % method)
 
+    if any(length > 4294967295 for length in weights.shape):
+        raise ValueError("Neither number of cues nor outcomes shall exceed 4294967295 "
+                         "for now. See https://github.com/quantling/pyndl/issues/169")
+
     beta1, beta2 = betas
 
     with tempfile.TemporaryDirectory(prefix="pyndl", dir=temporary_directory) as binary_path:
         number_events = preprocess.create_binary_event_files(events, binary_path, cue_map,
                                                              outcome_map, overwrite=True,
-                                                             number_of_processes=n_jobs,
+                                                             n_jobs=n_jobs,
                                                              events_per_file=events_per_temporary_file,
                                                              remove_duplicates=remove_duplicates,
                                                              verbose=verbose)
@@ -185,6 +221,8 @@ def ndl(events, alpha, betas, lambda_=1.0, *,
         if verbose:
             print('start learning...')
         # learning
+        if not weights.data.c_contiguous:
+            raise ValueError('weights has to be c_contiguous')
         if method == 'openmp':
             if not sys.platform.startswith('linux'):
                 raise NotImplementedError("OpenMP is linux only at the moment."
@@ -286,7 +324,7 @@ def _format(value):
                  'wall_time': _format(str(wall_time)),
                  'hostname': _format(socket.gethostname()),
                  'username': _format(getpass.getuser()),
-                 'pyndl': _format(__version__),
+                 'pyndl': _format(pyndl_version),
                  'numpy': _format(np.__version__),
                  'pandas': _format(pd.__version__),
                  'xarray': _format(xr.__version__),
@@ -306,38 +344,6 @@ def _format(value):
     return new_attrs
 
 
-class WeightDict(defaultdict):
-    # pylint: disable=missing-docstring
-
-    """
-    Subclass of defaultdict to represent outcome-cue weights.
-
-    Notes
-    -----
-    Weight for each outcome-cue combination is 0 per default.
-
-    """
-
-    # pylint: disable=W0613
-    def __init__(self, *args, **kwargs):
-        super().__init__(lambda: defaultdict(float))
-
-        self._attrs = OrderedDict()
-
-        if 'attrs' in kwargs:
-            self.attrs = kwargs['attrs']
-        else:
-            self.attrs = {}
-
-    @property
-    def attrs(self):
-        return self._attrs
-
-    @attrs.setter
-    def attrs(self, attrs):
-        self._attrs = OrderedDict(attrs)
-
-
 def dict_ndl(events, alphas, betas, lambda_=1.0, *,
              weights=None, inplace=False, remove_duplicates=None,
              make_data_array=False, verbose=False):
@@ -472,24 +478,62 @@ def dict_ndl(events, alphas, betas, lambda_=1.0, *,
                         __name__ + "." + dict_ndl.__name__, attrs=attrs_to_update)
 
     if make_data_array:
-        outcomes = list(weights.keys())
-        cues = set()
-        for outcome in outcomes:
-            cues.update(set(weights[outcome].keys()))
+        weights = data_array(weights, attrs=attrs)
+    else:
+        weights.attrs = attrs
 
-        cues = list(cues)
+    return weights
 
-        weights_dict = weights
-        shape = (len(outcomes), len(cues))
-        weights = xr.DataArray(np.zeros(shape), attrs=attrs,
-                               coords={'outcomes': outcomes, 'cues': cues},
-                               dims=('outcomes', 'cues'))
 
-        for outcome in outcomes:
-            for cue in cues:
-                weights.loc[{"outcomes": outcome, "cues": cue}] = weights_dict[outcome][cue]
-    else:
-        weights.attrs = attrs
+def data_array(weights, *, attrs=None):
+    """
+    Calculate the weights for all_outcomes over all events in event_file.
+
+    Parameters
+    ----------
+    weights : dict of dicts of floats or WeightDict
+        the first dict has outcomes as keys and dicts as values
+        the second dict has cues as keys and weights as values
+        weights[outcome][cue] gives the weight between outcome and cue.
+        If a dict of dicts is given, attrs is required. If a WeightDict is
+        given, attrs is optional
+    attrs : dict
+        A dictionary of attributes
+
+    Returns
+    -------
+    weights : xarray.DataArray
+        with dimensions 'outcomes' and 'cues'. You can lookup the weights
+        between a cue and an outcome with ``weights.loc[{'outcomes': outcome,
+        'cues': cue}]`` or ``weights.loc[outcome].loc[cue]``.
+    """
+
+    if isinstance(weights, xr.DataArray) and weights.dims == ('outcomes', 'cues'):
+        return weights
+
+    if attrs is None:
+        try:
+            attrs = weights.attrs
+        except AttributeError:
+            raise AttributeError("weights does not have attributes and no attrs "
+                                 "argument is given.")
+
+    outcomes = list(weights.keys())
+    cues = set()
+    for outcome in outcomes:
+        cues.update(set(weights[outcome].keys()))
+
+    cues = list(cues)
+
+    weights_dict = weights
+    shape = (len(outcomes), len(cues))
+    weights = xr.DataArray(np.zeros(shape), attrs=attrs,
+                           coords={'outcomes': outcomes, 'cues': cues},
+                           dims=('outcomes', 'cues'))
+
+    for outcome in outcomes:
+        for cue in cues:
+            weights.loc[{"outcomes": outcome, "cues": cue}] = weights_dict[outcome][cue]
 
     return weights
 
diff --git a/pyndl/ndl_openmp.pyx b/pyndl/ndl_openmp.pyx
index 6b2c2d2..d6c3e50 100644
--- a/pyndl/ndl_openmp.pyx
+++ b/pyndl/ndl_openmp.pyx
@@ -24,7 +24,7 @@ def learn_inplace_binary_to_binary(binary_file_paths,
                   np.ndarray[dtype_t, ndim=2] weights,
                   np.ndarray[unsigned int, ndim=1] all_outcomes,
                   unsigned int chunksize,
-                  unsigned int number_of_threads):
+                  unsigned int n_jobs):
 
     cdef unsigned int n_all_cues = weights.shape[1]  # number of cues == columns
     cdef unsigned int* all_outcomes_ptr = <unsigned int *> all_outcomes.data
@@ -45,7 +45,7 @@ def learn_inplace_binary_to_binary(binary_file_paths,
 
         number_parts = math.ceil(<double> length_all_outcomes / chunksize)
 
-        with nogil, parallel(num_threads=number_of_threads):
+        with nogil, parallel(num_threads=n_jobs):
             for ii in prange(number_parts, schedule="dynamic", chunksize=1):
                 start_val = ii * chunksize
                 end_val = min(start_val + chunksize, length_all_outcomes)
@@ -66,7 +66,7 @@ def learn_inplace_binary_to_real(binary_file_paths,
                                  np.ndarray[dtype_t, ndim=2] outcome_vectors,
                                  np.ndarray[dtype_t, ndim=2] weights,
                                  unsigned int chunksize,
-                                 unsigned int number_of_threads):
+                                 unsigned int n_jobs):
 
     cdef unsigned int n_all_cues = weights.shape[1]  # number of cues == columns
     cdef unsigned int n_outcome_vector_dimensions = outcome_vectors.shape[1]
@@ -88,7 +88,7 @@ def learn_inplace_binary_to_real(binary_file_paths,
       if n_outcome_vector_dimensions % chunksize != 0:
           number_parts += 1
 
-      with nogil, parallel(num_threads=number_of_threads):
+      with nogil, parallel(num_threads=n_jobs):
         for ii in prange(number_parts, schedule="dynamic", chunksize=1):
           start_val = ii * chunksize
           end_val = min(start_val + chunksize, n_outcome_vector_dimensions)
@@ -116,7 +116,7 @@ def learn_inplace_real_to_binary(binary_file_paths,
                                  np.ndarray[dtype_t, ndim=2] cue_vectors,
                                  np.ndarray[dtype_t, ndim=2] weights,
                                  unsigned int chunksize,
-                                 unsigned int number_of_threads):
+                                 unsigned int n_jobs):
 
     cdef unsigned int n_all_outcomes = weights.shape[0]  # number of outcomes == rows
     cdef unsigned int n_cue_vector_dimensions = weights.shape[1]  # number of cue vector dimensions == columns
@@ -138,7 +138,7 @@ def learn_inplace_real_to_binary(binary_file_paths,
       if n_all_outcomes % chunksize != 0:
           number_parts += 1
 
-      with nogil, parallel(num_threads=number_of_threads):
+      with nogil, parallel(num_threads=n_jobs):
         for ii in prange(number_parts, schedule="dynamic", chunksize=1):
           start_val = ii * chunksize
           end_val = min(start_val + chunksize, n_all_outcomes)
@@ -166,7 +166,7 @@ def learn_inplace_real_to_real(binary_file_paths,
                   np.ndarray[dtype_t, ndim=2] outcome_vectors,
                   np.ndarray[dtype_t, ndim=2] weights,
                   unsigned int chunksize,
-                  unsigned int number_of_threads):
+                  unsigned int n_jobs):
 
     assert weights.shape[1] == cue_vectors.shape[1]
     assert weights.shape[0] == outcome_vectors.shape[1]
@@ -192,7 +192,7 @@ def learn_inplace_real_to_real(binary_file_paths,
       if n_outcome_vector_dimensions % chunksize != 0:
           number_parts += 1
 
-      with nogil, parallel(num_threads=number_of_threads):
+      with nogil, parallel(num_threads=n_jobs):
         for ii in prange(number_parts, schedule="dynamic", chunksize=1):
           start_val = ii * chunksize
           end_val = min(start_val + chunksize, n_outcome_vector_dimensions)
diff --git a/pyndl/ndl_parallel.pyx b/pyndl/ndl_parallel.pyx
index 9d04fe5..a1d727e 100644
--- a/pyndl/ndl_parallel.pyx
+++ b/pyndl/ndl_parallel.pyx
@@ -5,10 +5,12 @@ from libc.stdio cimport fopen, fread, fclose, FILE
 
 from error_codes cimport ErrorCode, NO_ERROR, MAGIC_NUMBER_DOES_NOT_MATCH, VERSION_NUMBER_DOES_NOT_MATCH, INITIAL_ERROR_CODE, ERROR_CODES
 
+
 cdef unsigned int MAGIC_NUMBER = 14159265
 cdef unsigned int CURRENT_VERSION_WITH_FREQ = 215
 cdef unsigned int CURRENT_VERSION = 2048 + 215
 
+
 # run two sanity checks while loading the extension
 # 1. check
 if sizeof(unsigned int) != 4:
diff --git a/pyndl/preprocess.py b/pyndl/preprocess.py
index 51f7dd9..f762043 100644
--- a/pyndl/preprocess.py
+++ b/pyndl/preprocess.py
@@ -458,7 +458,7 @@ def filter_event_file(input_event_file, output_event_file, *,
                       keep_cues="all", keep_outcomes="all",
                       remove_cues=None, remove_outcomes=None,
                       cue_map=None, outcome_map=None,
-                      number_of_processes=1, chunksize=100000,
+                      n_jobs=1, number_of_processes=None, chunksize=100000,
                       verbose=False):
     """
     Filter an event file by a list or a map of cues and outcomes.
@@ -487,7 +487,7 @@ def filter_event_file(input_event_file, output_event_file, *,
         maps every outcome as key to the value. Removes all outcome that do not have a
         key. This can be used to map several different outcomes to the same
         outcome or to rename outcomes.
-    number_of_processes : int
+    n_jobs : int
         number of threads to use
     chunksize : int
         number of chunks per submitted job, should be around 100000
@@ -502,10 +502,15 @@ def filter_event_file(input_event_file, output_event_file, *,
     is still present in order to capture the background rate of that cues.
 
     """
+    if number_of_processes is not None:
+        warnings.warn("Parameter `number_of_processes` is renamed to `n_jobs`. The old name "
+                      "will stop working with v0.9.0.",
+                      DeprecationWarning, stacklevel=2)
+        n_jobs = number_of_processes
     job = JobFilter(keep_cues, keep_outcomes, remove_cues, remove_outcomes,
                     cue_map, outcome_map)
 
-    with multiprocessing.Pool(number_of_processes) as pool:
+    with multiprocessing.Pool(n_jobs) as pool:
         with gzip.open(input_event_file, "rt") as infile:
             with gzip.open(output_event_file, "wt") as outfile:
                 # copy header
@@ -702,7 +707,7 @@ def create_binary_event_files(event_file,
                               outcome_id_map,
                               *,
                               sort_within_event=False,
-                              number_of_processes=2,
+                              n_jobs=2,
                               events_per_file=10000000,
                               overwrite=False,
                               remove_duplicates=None,
@@ -723,7 +728,7 @@ def create_binary_event_files(event_file,
         outcome to id map
     sort_within_event : bool
         should we sort the cues and outcomes within the event
-    number_of_processes : int
+    n_jobs : int
         number of threads to use
     events_per_file : int
         Number of events in each binary file. Has to be larger than 1
@@ -761,7 +766,7 @@ def create_binary_event_files(event_file,
 
     number_events = 0
 
-    with multiprocessing.Pool(number_of_processes) as pool:
+    with multiprocessing.Pool(n_jobs) as pool:
 
         def _error_callback(error):
             if isinstance(error, StopIteration):
@@ -807,8 +812,8 @@ def _callback(result):
                 else:
                     raise error
             ii += 1
-            # only start jobs in chunks of 4*number_of_processes
-            if ii % (number_of_processes*4) == 0:
+            # only start jobs in chunks of 4*n_jobs
+            if ii % (n_jobs*4) == 0:
                 while True:
                     if result.ready():
                         break
diff --git a/pyndl/wh.py b/pyndl/wh.py
index 667c708..d59c64a 100644
--- a/pyndl/wh.py
+++ b/pyndl/wh.py
@@ -21,7 +21,7 @@
 import numpy as np
 import xarray as xr
 
-from . import __version__
+from . import __version__ as pyndl_version
 from . import count
 from . import preprocess
 from . import io
@@ -363,9 +363,12 @@ def _wh_binary_to_real(events, eta, outcome_vectors, *,
         # TODO: convert dict to xarray here
         raise NotImplementedError('dicts are not supported yet.')
 
+    if not outcome_vectors.data.data.c_contiguous:
+        raise ValueError('outcome_vectors have to be c_contiguous')
+
     # preprocessing
     n_events, cues, outcomes_from_events = count.cues_outcomes(events,
-                                                               number_of_processes=n_jobs,
+                                                               n_jobs=n_jobs,
                                                                verbose=verbose)
     cues = list(cues.keys())
     outcomes_from_events = list(outcomes_from_events.keys())
@@ -408,7 +411,7 @@ def _wh_binary_to_real(events, eta, outcome_vectors, *,
     with tempfile.TemporaryDirectory(prefix="pyndl", dir=temporary_directory) as binary_path:
         number_events = preprocess.create_binary_event_files(events, binary_path, cue_map,
                                                              outcome_map, overwrite=True,
-                                                             number_of_processes=n_jobs,
+                                                             n_jobs=n_jobs,
                                                              events_per_file=events_per_temporary_file,
                                                              remove_duplicates=remove_duplicates,
                                                              verbose=verbose)
@@ -421,6 +424,8 @@ def _wh_binary_to_real(events, eta, outcome_vectors, *,
         if verbose:
             print('start learning...')
         # learning
+        if not weights.data.c_contiguous:
+            raise ValueError('weights has to be c_contiguous')
         if method == 'openmp':
             if not sys.platform.startswith('linux'):
                 raise NotImplementedError("OpenMP is linux only at the moment."
@@ -451,7 +456,7 @@ def _wh_binary_to_real(events, eta, outcome_vectors, *,
         #        for partlist in part_lists:
         #            working_queue.put(np.array(partlist, dtype=np.uint32))
 
-        #    for _ in range(number_of_threads):
+        #    for _ in range(n_jobs):
         #        thread = threading.Thread(target=worker)
         #        thread.start()
         #        threads.append(thread)
@@ -475,8 +480,9 @@ def _wh_binary_to_real(events, eta, outcome_vectors, *,
                         __name__ + "." + ndl.__name__, method=method, attrs=attrs_to_be_updated)
 
     # post-processing
-    weights = xr.DataArray(weights, [('outcome_vector_dimensions', outcome_vectors.coords['outcome_vector_dimensions']),
-                                     ('cues', cues)], attrs=attrs)
+    weights = xr.DataArray(weights, coords=[('outcome_vector_dimensions',
+                                             outcome_vectors.coords['outcome_vector_dimensions'].data),
+                                            ('cues', cues)], attrs=attrs)
     return weights
 
 
@@ -535,16 +541,24 @@ def _wh_real_to_binary(events, betas, lambda_, cue_vectors, *,
     """
     if not (remove_duplicates is None or isinstance(remove_duplicates, bool)):
         raise ValueError("remove_duplicates must be None, True or False")
+
     if not isinstance(events, str):
         raise ValueError("'events' need to be the path to a gzipped event file not {}".format(type(events)))
 
+    if type(cue_vectors) == dict:
+        # TODO: convert dict to xarray here
+        raise NotImplementedError('dicts are not supported yet.')
+
+    if not cue_vectors.data.data.c_contiguous:
+        raise ValueError('cue_vectors have to be c_contiguous')
+
     weights_ini = weights
     wall_time_start = time.perf_counter()
     cpu_time_start = time.process_time()
 
     # preprocessing
     n_events, cues_from_events, outcomes_from_events = count.cues_outcomes(events,
-                                                                           number_of_processes=n_jobs,
+                                                                           n_jobs=n_jobs,
                                                                            verbose=verbose)
 
     cues_from_events = list(cues_from_events.keys())
@@ -595,7 +609,7 @@ def _wh_real_to_binary(events, betas, lambda_, cue_vectors, *,
     with tempfile.TemporaryDirectory(prefix="pyndl", dir=temporary_directory) as binary_path:
         number_events = preprocess.create_binary_event_files(events, binary_path, cue_map,
                                                              outcome_map, overwrite=True,
-                                                             number_of_processes=n_jobs,
+                                                             n_jobs=n_jobs,
                                                              events_per_file=events_per_temporary_file,
                                                              remove_duplicates=remove_duplicates,
                                                              verbose=verbose)
@@ -608,6 +622,8 @@ def _wh_real_to_binary(events, betas, lambda_, cue_vectors, *,
         if verbose:
             print('start learning...')
         # learning
+        if not weights.data.data.c_contiguous:
+            raise ValueError('weights has to be c_contiguous')
         if method == 'openmp':
             if not sys.platform.startswith('linux'):
                 raise NotImplementedError("OpenMP is linux only at the moment."
@@ -715,9 +731,15 @@ def _wh_real_to_real(events, eta, cue_vectors, outcome_vectors, *,
         # TODO: convert dict to xarray here
         raise NotImplementedError('dicts are not supported yet.')
 
+    if not cue_vectors.data.data.c_contiguous:
+        raise ValueError('cue_vectors have to be c_contiguous')
+
+    if not outcome_vectors.data.data.c_contiguous:
+        raise ValueError('outcome_vectors have to be c_contiguous')
+
     # preprocessing
     n_events, cues_from_events, outcomes_from_events = count.cues_outcomes(events,
-                                                                           number_of_processes=n_jobs,
+                                                                           n_jobs=n_jobs,
                                                                            verbose=verbose)
 
     cues_from_events = list(cues_from_events.keys())
@@ -767,6 +789,8 @@ def _wh_real_to_real(events, eta, cue_vectors, outcome_vectors, *,
         raise ValueError('weights need to be None or xarray.DataArray with method=%s' % method)
     del shape
 
+    if not weights.data.data.c_contiguous:
+        raise ValueError('weights has to be c_contiguous')
     if method == 'numpy':
         event_generator = io.events_from_file(events)
         number_events = 0
@@ -809,7 +833,7 @@ def _wh_real_to_real(events, eta, cue_vectors, outcome_vectors, *,
         with tempfile.TemporaryDirectory(prefix="pyndl", dir=temporary_directory) as binary_path:
             number_events = preprocess.create_binary_event_files(events, binary_path, cue_map,
                                                                  outcome_map, overwrite=True,
-                                                                 number_of_processes=n_jobs,
+                                                                 n_jobs=n_jobs,
                                                                  events_per_file=events_per_temporary_file,
                                                                  remove_duplicates=remove_duplicates,
                                                                  verbose=verbose)
@@ -884,7 +908,7 @@ def _format(value):
                  'wall_time': _format(str(wall_time)),
                  'hostname': _format(socket.gethostname()),
                  'username': _format(getpass.getuser()),
-                 'pyndl': _format(__version__),
+                 'pyndl': _format(pyndl_version),
                  'numpy': _format(np.__version__),
                  'pandas': _format(pd.__version__),
                  'xarray': _format(xr.__version__),
diff --git a/requirements.txt b/requirements.txt
index cda3f44..c5d1a52 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,8 @@
 numpy>=1.8.2
-scipy>=1.6.3
+scipy>=1.5.4
 cython>=0.21.1
 pandas>=0.14.1
 xarray>=0.7.2
 netCDF4>=1.3.1
 pip>=9.0.1
+packaging>=20.0
diff --git a/tests/conftest.py b/tests/conftest.py
index 6c11194..d699ae5 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -2,13 +2,13 @@
 Configuration for py.test-3.
 
 '''
+
 import pytest
 
 
 def pytest_addoption(parser):
     parser.addoption("--runslow", action="store_true",
                      help="run slow tests")
-
     parser.addoption("--no-linux", action="store_true",
                      help="run without linux tests")
 
diff --git a/tests/test_activation.py b/tests/test_activation.py
index 385c995..272e79a 100644
--- a/tests/test_activation.py
+++ b/tests/test_activation.py
@@ -15,6 +15,7 @@
 from pyndl import ndl
 from pyndl.activation import activation
 
+
 TEST_ROOT = os.path.join(os.path.pardir, os.path.dirname(__file__))
 FILE_PATH_SIMPLE = os.path.join(TEST_ROOT, "resources/event_file_simple.tab.gz")
 FILE_PATH_MULTIPLE_CUES = os.path.join(TEST_ROOT, "resources/event_file_multiple_cues.tab.gz")
@@ -50,10 +51,10 @@ def test_activation_matrix():
     reference_activations = np.array([[1, 0, 1, 0], [1, 1, 0, 1]])
 
     with pytest.raises(ValueError):
-        activations = activation(events, weights, number_of_threads=1)
+        activations = activation(events, weights, n_jobs=1)
 
-    activations = activation(events, weights, number_of_threads=1, remove_duplicates=True)
-    activations_mp = activation(events, weights, number_of_threads=3, remove_duplicates=True)
+    activations = activation(events, weights, n_jobs=1, remove_duplicates=True)
+    activations_mp = activation(events, weights, n_jobs=3, remove_duplicates=True)
 
     assert np.allclose(reference_activations, activations)
     assert np.allclose(reference_activations, activations_mp)
@@ -74,12 +75,12 @@ def test_ignore_missing_cues():
     reference_activations = np.array([[1, 0, 1, 0], [1, 1, 0, 1]])
 
     with pytest.raises(KeyError):
-        activations = activation(events, weights, number_of_threads=1,
+        activations = activation(events, weights, n_jobs=1,
                                  remove_duplicates=True)
 
-    activations = activation(events, weights, number_of_threads=1,
+    activations = activation(events, weights, n_jobs=1,
                              remove_duplicates=True, ignore_missing_cues=True)
-    activations_mp = activation(events, weights, number_of_threads=3,
+    activations_mp = activation(events, weights, n_jobs=3,
                                 remove_duplicates=True, ignore_missing_cues=True)
 
     assert np.allclose(reference_activations, activations)
@@ -104,9 +105,9 @@ def test_activation_dict():
     }
 
     with pytest.raises(ValueError):
-        activations = activation(events, weights, number_of_threads=1)
+        activations = activation(events, weights, n_jobs=1)
 
-    activations = activation(events, weights, number_of_threads=1, remove_duplicates=True)
+    activations = activation(events, weights, n_jobs=1, remove_duplicates=True)
     for outcome, activation_list in activations.items():
         assert np.allclose(reference_activations[outcome], activation_list)
 
@@ -129,9 +130,9 @@ def test_ignore_missing_cues_dict():
     }
 
     with pytest.raises(ValueError):
-        activations = activation(events, weights, number_of_threads=1)
+        activations = activation(events, weights, n_jobs=1)
 
-    activations = activation(events, weights, number_of_threads=1,
+    activations = activation(events, weights, n_jobs=1,
                              remove_duplicates=True, ignore_missing_cues=True)
     for outcome, activation_list in activations.items():
         assert np.allclose(reference_activations[outcome], activation_list)
@@ -175,10 +176,10 @@ def dec_func(*args, **kwargs):
     print("")
     gc.collect()
     asp = (time_test(activation, of="single threaded")
-           (events, weights, number_of_threads=1, remove_duplicates=True))
+           (events, weights, n_jobs=1, remove_duplicates=True))
     gc.collect()
     amp = (time_test(activation, of="multi threaded (up to 8 threads)")
-           (events, weights, number_of_threads=8, remove_duplicates=True))
+           (events, weights, n_jobs=8, remove_duplicates=True))
     del weights
     del events
     gc.collect()
diff --git a/tests/test_count.py b/tests/test_count.py
index 173a952..dfdb5de 100644
--- a/tests/test_count.py
+++ b/tests/test_count.py
@@ -14,7 +14,7 @@
 def test_cues_outcomes():
     n_events, cues, outcomes = count.cues_outcomes(EVENT_RESOURCE_FILE)
     n_events3, cues3, outcomes3 = count.cues_outcomes(EVENT_RESOURCE_FILE,
-                                                      number_of_processes=6,
+                                                      n_jobs=6,
                                                       verbose=True)
     assert n_events == 2772
     assert n_events == n_events3
@@ -25,7 +25,7 @@ def test_cues_outcomes():
 def test_words_symbols():
     words, symbols = count.words_symbols(CORPUS_RESOURCE_FILE)
     words3, symbols3 = count.words_symbols(CORPUS_RESOURCE_FILE,
-                                           number_of_processes=3,
+                                           n_jobs=3,
                                            verbose=True)
     assert words == words3
     assert symbols == symbols3
diff --git a/tests/test_ndl.py b/tests/test_ndl.py
index ed417b8..e5e19f7 100644
--- a/tests/test_ndl.py
+++ b/tests/test_ndl.py
@@ -124,6 +124,36 @@ def test_exceptions():
     with pytest.raises(ValueError, match="events_per_file has to be larger than 1") as e_info:
         ndl.ndl(FILE_PATH_SIMPLE, ALPHA, BETAS, method='threading', events_per_temporary_file=1)
 
+    with pytest.raises(AttributeError, match="weights does not have attributes "
+                       "and no attrs argument is given.") as e_info:
+        ndl.data_array(dict())
+
+#    # Test usually exeeds memory limit; It demands ~32GB of RAM.
+#    with pytest.raises(ValueError, match="Neither number of cues nor outcomes "
+#                       "shall exceed 4294967295 for now. See "
+#                       "https://github.com/quantling/pyndl/issues/169") as e_info:
+#        ndl.ndl(FILE_PATH_SIMPLE, ALPHA, BETAS,
+#                weights=xr.DataArray(np.zeros(shape=(4294967295 + 1, 1))))
+
+
+def test_generator_learning():
+    events = io.events_from_file(FILE_PATH_SIMPLE)
+    result_ndl_gen = ndl.ndl(events, ALPHA, BETAS, method='threading')
+    result_ndl = ndl.ndl(FILE_PATH_SIMPLE, ALPHA, BETAS, method='threading')
+
+    unequal, unequal_ratio = compare_arrays(FILE_PATH_SIMPLE,
+                                            result_ndl_gen,
+                                            result_ndl)
+    print(result_ndl_gen)
+    print('%.2f ratio unequal' % unequal_ratio)
+    assert len(unequal) == 0  # pylint: disable=len-as-condition
+
+
+def test_data_array_cast():
+    result_ndl = ndl.ndl(FILE_PATH_SIMPLE, ALPHA, BETAS, method='threading')
+    casted_result = ndl.data_array(result_ndl)
+    assert isinstance(casted_result, xr.DataArray) and (result_ndl == casted_result).all()
+
 
 def test_continue_learning_dict():
     events_simple = pd.read_csv(FILE_PATH_SIMPLE, sep="\t")
diff --git a/tests/test_preprocess.py b/tests/test_preprocess.py
index 36b4a23..24553d0 100644
--- a/tests/test_preprocess.py
+++ b/tests/test_preprocess.py
@@ -24,7 +24,7 @@
 def test_bandsample():
     resource_file = os.path.join(TEST_ROOT, "resources/event_file_trigrams_to_word.tab.gz")
     _, _, outcome_freq_map = cues_outcomes(resource_file,
-                                           number_of_processes=2)
+                                           n_jobs=2)
     outcome_freq_map_filtered = bandsample(outcome_freq_map, 50, cutoff=1, seed=None, verbose=False)
     assert len(outcome_freq_map_filtered) == 50
 
@@ -210,7 +210,7 @@ def test_filter_event_file():
     filter_event_file(input_event_file, output_event_file,
                       keep_cues=cues,
                       keep_outcomes=outcomes,
-                      number_of_processes=2,
+                      n_jobs=2,
                       verbose=True)
     _, cue_freq_map, outcome_freq_map = cues_outcomes(output_event_file)
     cues_new = list(cue_freq_map)
@@ -328,7 +328,7 @@ def test_preprocessing():
                       lower_case=True, verbose=True)
 
     # read in outcomes
-    _, _, outcome_freq_map = cues_outcomes(event_file, number_of_processes=2)
+    _, _, outcome_freq_map = cues_outcomes(event_file, n_jobs=2)
 
     # reduce number of outcomes through bandsampling
     outcome_freq_map_filtered = bandsample(outcome_freq_map, 50, cutoff=1, seed=None)