Merge pull request #240 from ICB-DCM/develop

Release 0.9.25
ICB-DCM · Jan 8, 2020 · 48ac49d · 48ac49d
2 parents 50b8ba0 + 50ad2b3
commit 48ac49d
Show file tree

Hide file tree

Showing 44 changed files with 894 additions and 412 deletions.
diff --git a/.codacy.yml b/.codacy.yml
diff --git a/.gitignore b/.gitignore
@@ -89,4 +89,3 @@ doc/*.log
 .pytest_cache
 dask-worker-space
 *.lock
-.coverage*
diff --git a/doc/examples/conversion_reaction.ipynb b/doc/examples/conversion_reaction.ipynb
diff --git a/doc/examples/data_plots.ipynb b/doc/examples/data_plots.ipynb
@@ -20,18 +20,9 @@
    "cell_type": "code",
    "execution_count": 1,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/yannik/anaconda3/lib/python3.7/site-packages/distributed/config.py:20: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.\n",
-      "  defaults = yaml.load(f)\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "from pyabc.visualization import plot_data\n",
+    "from pyabc.visualization import plot_data_default\n",
     "import numpy as np\n",
     "import pandas as pd\n",
     "import matplotlib.pyplot as plt"
@@ -121,10 +112,17 @@
     }
    ],
    "source": [
-    "plot_data(observed, simulated)\n",
+    "plot_data_default(observed, simulated)\n",
     "plt.gcf().set_size_inches(9, 6)\n",
     "plt.show()"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note that there is also a function ``pyabc.visualization.plot_data_callback`` operating via callback functions and thus allowing more flexibility. This function is illustrated in the conversion reaction notebook."
+   ]
   }
  ],
  "metadata": {

diff --git a/doc/examples/noise.ipynb b/doc/examples/noise.ipynb
diff --git a/doc/examples/parameter_inference.ipynb b/doc/examples/parameter_inference.ipynb
diff --git a/doc/releasenotes.rst b/doc/releasenotes.rst
@@ -8,6 +8,21 @@ Release Notes
 ..........
 
 
+0.9.25 (2020-01-08)
+-------------------
+
+* Add summary statistics callback plot function (#231).
+* Add possibility to log employed norms in StochasticAcceptor (#231) and
+  temperature proposals in Temperature (#232).
+* Implement optional early stopping in the MulticoreEvalParallelSampler and
+  the SingleCoreSampler, when a maximum simulation number is exceeded
+  (default behavior untouched).
+* Log stopping reason in ABCSMC.run (all #236).
+* Implement Poisson (#237) and negative binomial (#239) stochastic kernels.
+* Enable password protection for Redis sampler (#238).
+* Fix scipy deprecations (#234, #241).
+
+
 0.9.24 (2019-11-19)
 -------------------
 

diff --git a/doc/sampler.rst b/doc/sampler.rst
@@ -105,23 +105,59 @@ How to setup a Redis based distributed cluster
 ----------------------------------------------
 
 
-Step 1: Start a Redis server without password authentication
+Step 1: Reconfigure the redis.conf file
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Start one some machine, which is reachable by the machine running the pyABC
-main application and by the workers, a Redis server, disabling password
-authentication:
+It is advised to run Redis server in protected mode. Authenticated
+communication will allow only for authenticated access to communicated with
+the server, and reject all unauthorised access. To run Redis server with
+authentication required you need first to modify the redis.conf file.
+
+The redis.conf is the file that contains Redis configuration. Usually,
+it can be found on `/etc/redis/`. To allow safe and secure communication,
+redis.conf file should be reconfigure. You can copy the file to your home
+directory and then modify it as follow:
+
+1. The Redis server should be configured in a way that allows it to bind to
+network interfaces other than localhost (127.0.0.1). To enable that, be sure
+that the bind configuration option is either commented out or modified to an
+appropriate network interface IP address.
 
 
 .. code:: bash
 
-   redis-server --protected-mode no
+    #bind 127.0.0.1
 
+2. The password authentication must be enabled. To configure that, be sure
+that the ``masterauth`` and ``requirepass`` configuration options are
+uncommented, and their values are the SAME. Note: it advised to select complex
+password string.
 
-You should get an output looking similar to the one below:
+.. code:: bash
+
+    masterauth your_redis_password
+
+.. code:: bash
+
+    requirepass your_redis_password
+
+
+Step 2: Start a Redis server with password authentication
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Start one machine, which is reachable by the machine running the pyABC
+main application and by the workers, a Redis server, specifying the
+location of the configuration file for redis server that you modify in the first
+step and the port number:
+
+
+.. code:: bash
+
+   redis-server /path/to/redis.conf --port 6379
+
+Note that if you didn't specify a port, redis will assign a default port,
+that is 6379, for your server.
 
-.. literalinclude:: redis_setup/redis_start_output.txt
-   :language: bash
 
 If you're on Linux, you can install redis either via your package manager
 of if you're using anaconda via
@@ -135,7 +171,7 @@ We assume for now, that the IP address of the machine running the Redis server
 is 111.111.111.111.
 
 
-Step 2 or 3: Start pyABC
+Step 3 or 4: Start pyABC
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
 It does not matter what you do first: starting pyABC or starting the
@@ -161,21 +197,23 @@ server. Then start the ABC-SMC run as usual with
 passing the stopping conditions.
 
 
-Step 2 or 3: Start the workers
+Step 3 or 4: Start the workers
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 It does not matter what you do first: starting pyABC or starting the
 workers. You can even dynamically add workers after the sampling has started.
-Start as many workers as you whish on the machines you whish. Up to 10,000
+Start as many workers as you wish on the machines you wish. Up to 10,000
 workers should not pose any problem if the model evaluation times are on the
 second scale or longer.
 
 .. code:: bash
 
-    abc-redis-worker --host=111.111.111.111
+    abc-redis-worker --host=111.111.111.111 --port 6379 --password mypass
 
 Again, 111.111.111.111 is the IP address of the machine running the Redis
-server. You should get an output similar to
+server and we use the default port number. You also need to specify the password
+that you use in the configuration file ``redis.conf``. In our case that password
+was ``mypass``. You should get an output similar to
 
 
 .. code:: bash
@@ -194,6 +232,30 @@ allows you to start several worker procecces in parallel.
 This might be handy in situations where you have to use a whole cluster node
 with several cores.
 
+Optional: Running redis server without password authentication
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+In some cases, a user might want to run the redis server without password
+authentication. To do so, you can start the redis server without specifying
+the location of the ``redis.conf`` file and use the flag ``--protected-mode``
+with value no
+
+.. code:: bash
+
+   redis-server --protected-mode no
+
+You should get an output looking similar to the one below:
+
+.. literalinclude:: redis_setup/redis_start_output.txt
+   :language: bash
+
+
+Later, to start workers, you don't need use the password flag
+
+.. code:: bash
+
+
+    abc-redis-worker --host=111.111.111.111
+
 
 Optional: Monitoring
 ~~~~~~~~~~~~~~~~~~~~

diff --git a/pyabc/__init__.py b/pyabc/__init__.py
@@ -36,7 +36,9 @@
     NormalKernel,
     IndependentNormalKernel,
     IndependentLaplaceKernel,
-    BinomialKernel)
+    BinomialKernel,
+    PoissonKernel,
+    NegativeBinomialKernel)
 from .epsilon import (
     Epsilon,
     NoEpsilon,
@@ -106,6 +108,8 @@
     "IndependentNormalKernel",
     "IndependentLaplaceKernel",
     "BinomialKernel",
+    "PoissonKernel",
+    "NegativeBinomialKernel",
     # epsilon
     "Epsilon",
     "NoEpsilon",

diff --git a/pyabc/acceptor/acceptor.py b/pyabc/acceptor/acceptor.py
@@ -23,6 +23,7 @@
 from ..epsilon import Epsilon
 from ..parameters import Parameter
 from .pdf_norm import pdf_norm_max_found
+from ..storage import save_dict_to_json
 
 
 logger = logging.getLogger("Acceptor")
@@ -332,7 +333,8 @@ class StochasticAcceptor(Acceptor):
     def __init__(
             self,
             pdf_norm_method: Callable = None,
-            apply_importance_weighting: bool = True):
+            apply_importance_weighting: bool = True,
+            log_file: str = None):
         """
         Parameters
         ----------
@@ -350,14 +352,17 @@ def __init__(
             Whether to apply weights to correct for a bias induced by
             samples exceeding the density normalization. This may be False
             usually only for testing purposes.
+        log_file: str, optional
+            A log file for storing data of the acceptor that are currently not
+            saved in the database. The data are saved in json format.
         """
         super().__init__()
 
         if pdf_norm_method is None:
             pdf_norm_method = pdf_norm_max_found
         self.pdf_norm_method = pdf_norm_method
-
         self.apply_importance_weighting = apply_importance_weighting
+        self.log_file = log_file
 
         # maximum pdfs, indexed by time
         self.pdf_norms = {}
@@ -408,8 +413,14 @@ def _update(self,
             prev_temp=prev_temp)
         self.pdf_norms[t] = pdf_norm
 
+        self.log(t)
+
+    def log(self, t):
         logger.debug(f"pdf_norm={self.pdf_norms[t]:.4e} for t={t}.")
 
+        if self.log_file:
+            save_dict_to_json(self.pdf_norms, self.log_file)
+
     def get_epsilon_config(self, t: int) -> dict:
         """
         Pack the pdf normalization and the kernel scale.

diff --git a/pyabc/distance/__init__.py b/pyabc/distance/__init__.py
@@ -16,7 +16,8 @@
     IdentityFakeDistance,
     AcceptAllDistance,
     SimpleFunctionDistance,
-    to_distance)
+    to_distance,
+)
 from .distance import (
     PNormDistance,
     AdaptivePNormDistance,
@@ -27,7 +28,8 @@
     MinMaxDistance,
     PercentileDistance,
     RangeEstimatorDistance,
-    DistanceWithMeasureList)
+    DistanceWithMeasureList,
+)
 from .scale import (
     median_absolute_deviation,
     mean_absolute_deviation,
@@ -41,7 +43,8 @@
     standard_deviation_to_observation,
     span,
     mean,
-    median)
+    median,
+)
 from .kernel import (
     StochasticKernel,
     SCALE_LIN,
@@ -50,7 +53,10 @@
     NormalKernel,
     IndependentNormalKernel,
     IndependentLaplaceKernel,
-    BinomialKernel)
+    BinomialKernel,
+    PoissonKernel,
+    NegativeBinomialKernel,
+)
 
 
 __all__ = [
@@ -95,4 +101,6 @@
     "IndependentNormalKernel",
     "IndependentLaplaceKernel",
     "BinomialKernel",
+    "PoissonKernel",
+    "NegativeBinomialKernel",
 ]
diff --git a/pyabc/distance/distance.py b/pyabc/distance/distance.py
@@ -1,5 +1,3 @@
-import scipy as sp
-
 import numpy as np
 from scipy import linalg as la
 from typing import List, Callable, Union
@@ -658,18 +656,18 @@ def __init__(self, measures_to_use='all'):
         self._whitening_transformation_matrix = None
 
     def _dict_to_vect(self, x):
-        return sp.asarray([x[key] for key in self.measures_to_use])
+        return np.asarray([x[key] for key in self.measures_to_use])
 
     def _calculate_whitening_transformation_matrix(self, sum_stats):
-        samples_vec = sp.asarray([self._dict_to_vect(x)
+        samples_vec = np.asarray([self._dict_to_vect(x)
                                   for x in sum_stats])
         # samples_vec is an array of shape nr_samples x nr_features
         means = samples_vec.mean(axis=0)
         centered = samples_vec - means
         covariance = centered.T.dot(centered)
         w, v = la.eigh(covariance)
         self._whitening_transformation_matrix = (
-            v.dot(sp.diag(1. / sp.sqrt(w))).dot(v.T))
+            v.dot(np.diag(1. / np.sqrt(w))).dot(v.T))
 
     def initialize(self,
                    t: int,
@@ -813,12 +811,12 @@ class PercentileDistance(RangeEstimatorDistance):
 
     @staticmethod
     def upper(parameter_list):
-        return sp.percentile(parameter_list,
+        return np.percentile(parameter_list,
                              100 - PercentileDistance.PERCENTILE)
 
     @staticmethod
     def lower(parameter_list):
-        return sp.percentile(parameter_list,
+        return np.percentile(parameter_list,
                              PercentileDistance.PERCENTILE)
 
     def get_config(self):