MAINT/ENH: move Fortran optimizers, smash.multiple_optimize() is de…

…precated (#250) * MAINT/ENH: move Fortran optimizers, multiple_optimizer is deprecated * minor fix optimize.py standardize run args * FIX PR: fix doc complilation and make check * FIX PR: doc compilation * FIX PR: doc compilation Samples object * FIX PR: restore _custom_directive and conf.py, fix _doc.py * Remove redundant in Python function sbs_optimize * Fix PR 1: bound check, raise message, check key rr_parameters * FIX PR 2: compute cost and jac at the same time for bfgs optimizer * FIX PR 3: set final control to update the optimized parameters
DassHydro · Jul 29, 2024 · dfa48be · dfa48be
1 parent 069b7af
commit dfa48be
Show file tree

Hide file tree

Showing 25 changed files with 700 additions and 1,364 deletions.
diff --git a/doc/source/api_reference/principal_methods/simulation.rst b/doc/source/api_reference/principal_methods/simulation.rst
@@ -28,13 +28,6 @@ Numerical Optimization
       optimize
       bayesian_optimize
 
-Multiple Numerical Optimization
-*******************************
-.. autosummary::
-      :toctree: smash/
-
-      multiple_optimize
-
 Multiple Sets Estimation
 ************************
 .. autosummary::

diff --git a/doc/source/api_reference/returned_objects/index.rst b/doc/source/api_reference/returned_objects/index.rst
@@ -13,7 +13,6 @@ Simulation
 
     Samples
     MultipleForwardRun
-    MultipleOptimize
     ForwardRun
     Optimize
     MultisetEstimate

diff --git a/smash/__init__.py b/smash/__init__.py
@@ -12,10 +12,8 @@
 from smash.core.simulation.estimate.estimate import MultisetEstimate, multiset_estimate
 from smash.core.simulation.optimize.optimize import (
     BayesianOptimize,
-    MultipleOptimize,
     Optimize,
     bayesian_optimize,
-    multiple_optimize,
     optimize,
 )
 from smash.core.simulation.options import (
@@ -39,7 +37,6 @@ def __getattr__(name):
     "forward_run",
     "multiple_forward_run",
     "optimize",
-    "multiple_optimize",
     "bayesian_optimize",
     "default_optimize_options",
     "default_bayesian_optimize_options",
@@ -51,7 +48,6 @@ def __getattr__(name):
     "ForwardRun",
     "MultipleForwardRun",
     "Optimize",
-    "MultipleOptimize",
     "BayesianOptimize",
     "MultisetEstimate",
     "Signatures",

diff --git a/smash/core/model/model.py b/smash/core/model/model.py
@@ -78,7 +78,6 @@
     from smash.core.simulation.estimate.estimate import MultisetEstimate
     from smash.core.simulation.optimize.optimize import (
         BayesianOptimize,
-        MultipleOptimize,
         Optimize,
     )
     from smash.core.simulation.run.run import ForwardRun, MultipleForwardRun
@@ -2351,7 +2350,7 @@ def optimize(
     @_multiset_estimate_doc_appender
     def multiset_estimate(
         self,
-        multiset: MultipleForwardRun | MultipleOptimize,
+        multiset: MultipleForwardRun,
         alpha: Numeric | ListLike | None = None,
         common_options: dict[str, Any] | None = None,
         return_options: dict[str, Any] | None = None,

diff --git a/smash/core/simulation/_doc.py b/smash/core/simulation/_doc.py
@@ -862,8 +862,8 @@ def _gen_docstring_from_base_doc(
 ----------
 %(model_parameter)s
 
-multiset : `MultipleForwardRun` or `MultipleOptimize`
-    The returned object created by `multiple_forward_run` or `multiple_optimize` method containing
+multiset : `MultipleForwardRun <MultipleForwardRun>`
+    The returned object created by `multiple_forward_run` method containing
     information about multiple sets of rainfall-runoff parameters or initial states.
 
 alpha : `float`, `list[float, ...]`, or None, default None
@@ -906,7 +906,6 @@ def _gen_docstring_from_base_doc(
 --------
 MultisetEstimate : Represents multiset estimate optional results.
 MultipleForwardRun : Represents multiple forward run computation result.
-MultipleOptimize : Represents multiple optimize computation result.
 
 Examples
 --------
@@ -1082,8 +1081,10 @@ def _gen_docstring_from_base_doc(
 model : `Model`
     Primary data structure of the hydrological model `smash`.
 
-samples : `Samples`
-    Represents the generated samples result.
+samples : `Samples` or `dict[str, Any]`
+    Represents the rainfall-runoff parameters and/or initial states sample.
+    This can be either a `Samples` object or a dictionary, where the keys are parameter/state names
+    and the corresponding value is a sequence of specified values, representing multiple samples.
 
 cost_options : `dict[str, Any]` or None, default None
     Dictionary containing computation cost options for simulated and observed responses. The elements are:
@@ -1143,128 +1144,6 @@ def _gen_docstring_from_base_doc(
     """
 )
 
-_multiple_optimize_doc = (
-    # % TODO FC: Add advanced user guide
-    """
-Run multiple optimization processes with multiple sets of parameters (i.e. starting points), yielding multiple
-solutions.
-
-Parameters
-----------
-model : `Model`
-    Primary data structure of the hydrological model `smash`.
-
-samples : `Samples`
-    Represents the generated samples result.
-
-mapping : `str`, default 'uniform'
-    Type of mapping. Should be one of
-
-    - ``'uniform'``
-    - ``'distributed'``
-    - ``'multi-linear'``
-    - ``'multi-polynomial'``
-
-    .. hint::
-        See the :ref:`math_num_documentation.mapping` section
-
-optimizer : `str` or None, default None
-    Name of optimizer. Should be one of
-
-    - ``'sbs'`` (``'uniform'`` **mapping** only)
-    - ``'lbfgsb'`` (``'uniform'``, ``'distributed'``, ``'multi-linear'`` or ``'multi-polynomial'``
-      **mapping** only)
-
-    .. note::
-        If not given, a default optimizer will be set depending on the optimization mapping:
-
-        - **mapping** = ``'uniform'``; **optimizer** = ``'sbs'``
-        - **mapping** = ``'distributed'``, ``'multi-linear'``, or ``'multi-polynomial'``; **optimizer** =
-          ``'lbfgsb'``
-
-    .. hint::
-        See the :ref:`math_num_documentation.optimization_algorithm` section
-
-optimize_options : `dict[str, Any]` or None, default None
-    Dictionary containing optimization options for fine-tuning the optimization process.
-    See `%(default_optimize_options_func)s` to retrieve the default optimize options based on the **mapping**
-    and **optimizer**.
-
-"""
-    + _gen_docstring_from_base_doc(
-        OPTIMIZE_OPTIONS_BASE_DOC,
-        [
-            "parameters",
-            "bounds",
-            "control_tfm",
-            "descriptor",
-            "termination_crit",
-        ],
-        nindent=1,
-    )
-    + """
-cost_options : `dict[str, Any]` or None, default None
-    Dictionary containing computation cost options for simulated and observed responses. The elements are:
-
-"""
-    + _gen_docstring_from_base_doc(
-        COST_OPTIONS_BASE_DOC,
-        DEFAULT_SIMULATION_COST_OPTIONS["optimize"].keys(),
-        nindent=1,
-    )
-    + """
-common_options : `dict[str, Any]` or None, default None
-    Dictionary containing common options with two elements:
-
-"""
-    + _gen_docstring_from_base_doc(
-        COMMON_OPTIONS_BASE_DOC, DEFAULT_SIMULATION_COMMON_OPTIONS.keys(), nindent=1
-    )
-    + """
-
-Returns
--------
-multiple_optimize : `MultipleOptimize`
-    It returns an object containing the results of the multiple optimize.
-
-See Also
---------
-Samples : Represents the generated samples result.
-MultipleOptimize : Represents the multiple optimize result.
-
-Examples
---------
->>> from smash.factory import load_dataset
->>> from smash.factory import generate_samples
->>> setup, mesh = load_dataset("cance")
->>> model = smash.Model(setup, mesh)
-
-Define sampling problem and generate samples
-
->>> problem = {
-...            'num_vars': 4,
-...            'names': ['cp', 'ct', 'kexc', 'llr'],
-...            'bounds': [[1, 2000], [1, 1000], [-20, 5], [1, 1000]]
-... }
->>> sr = generate_samples(problem, n=3, random_state=11)
-
-Run multiple optimization processes
-
->>> mopt = smash.multiple_optimize(
-...     model,
-...     samples=sr,
-...     optimize_options={"termination_crit": {"maxiter": 2}}
-... )
-</> Multiple Optimize
-    Optimize 3/3 (100%(percent)s)
-
-Get the cost values through multiple runs of optimization
-
->>> mopt.cost
-array([0.51374453, 0.0528878 , 0.15056956], dtype=float32)
-"""
-)
-
 _optimize_control_info_doc = (
     """
 Information on the optimization control vector of Model.
@@ -1741,16 +1620,6 @@ def _gen_docstring_from_base_doc(
 
 _multiple_forward_run_doc_appender = DocAppender(_multiple_forward_run_doc, indents=0)
 
-_multiple_optimize_doc_appender = DocAppender(_multiple_optimize_doc, indents=0)
-_smash_multiple_optimize_doc_substitution = DocSubstitution(
-    default_optimize_options_func="default_optimize_options",
-    parameters_serr_mu_parameters="",
-    parameters_serr_sigma_parameters="",
-    parameters_note_serr_parameters="",
-    bounds_get_serr_parameters_bounds="",
-    percent="%",
-)
-
 _optimize_control_info_doc_appender = DocAppender(_optimize_control_info_doc, indents=0)
 _smash_optimize_control_info_doc_substitution = DocSubstitution(
     default_optimize_options_func="default_optimize_options",

diff --git a/smash/core/simulation/estimate/_standardize.py b/smash/core/simulation/estimate/_standardize.py
@@ -7,7 +7,6 @@
     _standardize_simulation_return_options,
     _standardize_simulation_return_options_finalize,
 )
-from smash.core.simulation.optimize.optimize import MultipleOptimize
 from smash.core.simulation.run.run import MultipleForwardRun
 
 if TYPE_CHECKING:
@@ -19,7 +18,7 @@
 
 def _standardize_multiset_estimate_args(
     model: Model,
-    multiset: MultipleForwardRun | MultipleOptimize,
+    multiset: MultipleForwardRun,
     alpha: Numeric | ListLike,
     common_options: dict | None,
     return_options: dict | None,
@@ -39,10 +38,10 @@ def _standardize_multiset_estimate_args(
 
 
 def _standardize_multiset_estimate_multiset(
-    multiset: MultipleForwardRun | MultipleOptimize,
-) -> MultipleForwardRun | MultipleOptimize:
-    if not isinstance(multiset, (MultipleForwardRun, MultipleOptimize)):
-        raise TypeError("multiset must be a MultipleForwardRun or MultipleOptimize object")
+    multiset: MultipleForwardRun,
+) -> MultipleForwardRun:
+    if not isinstance(multiset, MultipleForwardRun):
+        raise TypeError("multiset must be a MultipleForwardRun object")
 
     return multiset
 

diff --git a/smash/core/simulation/estimate/_tools.py b/smash/core/simulation/estimate/_tools.py
@@ -3,7 +3,7 @@
 from typing import TYPE_CHECKING
 
 import numpy as np
-from scipy.stats import gaussian_kde
+from scipy.stats import gaussian_kde as scipy_gaussian_kde
 from tqdm import tqdm
 
 from smash.core.simulation.run.run import _forward_run
@@ -16,19 +16,21 @@
 
 
 def _compute_density(
-    samples: Samples,
-    optimized_parameters: dict,
+    samples: Samples | None,
+    spatialized_samples: dict[np.ndarray],
     active_cell: np.ndarray,
 ) -> dict:
     density = {}
 
-    for p, optim_param in optimized_parameters.items():
-        dst = getattr(samples, "_dst_" + p)
-        density[p] = np.tile(
-            dst, (*active_cell.shape, 1)
-        )  # spatialized density (*active_cell.shape, n_sample)
+    for p, spl_sample in spatialized_samples.items():
+        if samples is not None:
+            dst = getattr(samples, "_dst_" + p)
+            density[p] = np.tile(
+                dst, (*active_cell.shape, 1)
+            )  # convert to spatialized density (*active_cell.shape, n_sample)
 
-        if isinstance(optim_param, np.ndarray):
+        else:
+            density[p] = np.zeros((*active_cell.shape, spl_sample.shape[-1]))
             estimated_cell = np.zeros(active_cell.shape)
 
             for ac in [0, 1]:  # Iterate on two blocs active/inactive cell
@@ -37,33 +39,29 @@ def _compute_density(
                 if np.all(
                     [
                         np.allclose(
-                            optim_param[..., i][mask],
-                            optim_param[..., i][mask][0],
+                            spl_sample[..., i][mask],
+                            spl_sample[..., i][mask][0],
                         )
-                        for i in range(optim_param.shape[-1])
+                        for i in range(spl_sample.shape[-1])
                     ]
-                ):  # if optim_param[mask] contain only uniform values
-                    unf_optim_param = optim_param[mask][0, :]
+                ):  # if spl_sample[mask] contain only uniform values
+                    unif_sample = spl_sample[mask][0, :]
 
-                    if np.allclose(unf_optim_param, unf_optim_param[0]):
-                        estimted_density = np.ones(unf_optim_param.shape)
+                    if np.allclose(unif_sample, unif_sample[0]):
+                        density[p][mask] = np.ones(unif_sample.shape)
                     else:
-                        estimted_density = gaussian_kde(unf_optim_param)(unf_optim_param)
-
-                    density[p][mask] *= estimted_density  # compute joint-probability
+                        density[p][mask] = scipy_gaussian_kde(unif_sample)(unif_sample)
 
                     estimated_cell[mask] = True
 
             for i, j in np.ndindex(active_cell.shape):  # Iterate on all grid cells
                 if not estimated_cell[i, j]:
-                    unf_optim_param_ij = optim_param[i, j, :]
+                    unif_sample_ij = spl_sample[i, j, :]
 
-                    if np.allclose(unf_optim_param_ij, unf_optim_param_ij[0]):
-                        estimted_density = np.ones(unf_optim_param_ij.shape)
+                    if np.allclose(unif_sample_ij, unif_sample_ij[0]):
+                        density[p][i, j] = np.ones(unif_sample_ij.shape)
                     else:
-                        estimted_density = gaussian_kde(unf_optim_param_ij)(unf_optim_param_ij)
-
-                    density[p][i, j] *= estimted_density  # compute joint-probability
+                        density[p][i, j] = scipy_gaussian_kde(unif_sample_ij)(unif_sample_ij)
 
     return density