diff --git a/.gitignore b/.gitignore index 6eba1ba0..e92d8943 100644 --- a/.gitignore +++ b/.gitignore @@ -71,4 +71,5 @@ tmp* # Documentation files doc/source/api_reference/**/smash -doc/*-dataset \ No newline at end of file +doc/*-dataset +doc/*~ diff --git a/Makefile b/Makefile index 76da3553..83beb661 100644 --- a/Makefile +++ b/Makefile @@ -41,7 +41,7 @@ test-coverage: #% Generate baseline for test with args (see argparser in gen_baseline.py) test-baseline: - cd smash/tests ; python3 gen_baseline.py + cd smash/tests ; python3 generate_baseline.py #% Format Python files with ruff and Fortran files with fprettify format: diff --git a/doc/source/_static/Inversion_process_flowchart.png b/doc/source/_static/Inversion_process_flowchart.png new file mode 100644 index 00000000..fb4c96d9 Binary files /dev/null and b/doc/source/_static/Inversion_process_flowchart.png differ diff --git a/doc/source/_static/bib/references.bib b/doc/source/_static/bib/references.bib index 2e93e317..7265dff9 100644 --- a/doc/source/_static/bib/references.bib +++ b/doc/source/_static/bib/references.bib @@ -1258,4 +1258,14 @@ @article{todini1996arno abstract = {This paper describes in detail a semi-distributed conceptual rainfall-runoff model known as the ARNO model, which is now in widespread use both in land-surface-atmosphere process research and as an operational flood forecasting tool on several catchments in different parts of the world. The model, which derives its name from its first application to the Arno River, incorporates the concepts of a spatial probability distribution of soil moisture capacity and of dynamically varying saturated contributing areas. The ARNO model is characterized by two main components: the first and most important component represents the soil moisture balance, and the second describes the transfer of runoff to the outlet of the basin. The relevance of the soil component emerges from the highly nonlinear mechanism with which the soil moisture content and its distribution controls the dynamically varying size of the saturated areas mainly responsible for a direct conversion of rainfall into runoff. The second component describes the way in which runoff is routed and transferred along the hillslopes to the drainage channels and along the channel network to the outlet of the basin. Additional components, such as the evapotranspiration, snowmelt and groundwater modules, are also described. A discussion on the advantages of the model, calibration requirements and techniques is also presented, together with the physical interpretation of model parameters. Finally, after describing the original calibration of the ARNO model on the Arno basin, and a comparison with several conceptual models, recent applications of the ARNO model, as part of a real-time flood forecasting system, as a tool for investigating land use changes and as an interesting approach to the evaluation of land-surface-atmosphere interactions at general circulation model (GCM) scale, are illustrated.} } + +@book{monnier2021coursevariational, + title={Data Assimilation - Inverse Problems, Assimilation, Control, Learning}, + author={Monnier, Jerome}, + year={2024}, + publisher={INSA Toulouse}, + url = {https://www.math.univ-toulouse.fr/~jmonnie/Enseignement/CourseVDA.pdf} +} + + @Comment{jabref-meta: databaseType:bibtex;} diff --git a/doc/source/_static/css/smash.css b/doc/source/_static/css/smash.css index 033fc157..ff5d53dc 100644 --- a/doc/source/_static/css/smash.css +++ b/doc/source/_static/css/smash.css @@ -104,6 +104,10 @@ h4 { background-color: transparent !important; } +details.sd-dropdown { + box-shadow: none !important; +} + .bolditalic { font-weight: bold; font-style: italic; @@ -116,7 +120,7 @@ html[data-theme=dark] .sd-card { html[data-theme=dark] .sd-card .sd-card-header { background-color: transparent; - color: #150458 !important; + color: #F6F1F1 !important; } html[data-theme=dark] .sd-card .sd-card-footer { diff --git a/doc/source/_static/flowchart_forward_gridded.png b/doc/source/_static/flowchart_forward_gridded.png new file mode 100644 index 00000000..b2e63b0c Binary files /dev/null and b/doc/source/_static/flowchart_forward_gridded.png differ diff --git a/doc/source/_static/forward_composition_flowchart.png b/doc/source/_static/forward_composition_flowchart.png new file mode 100644 index 00000000..d830ce2d Binary files /dev/null and b/doc/source/_static/forward_composition_flowchart.png differ diff --git a/doc/source/_static/forward_simple_flowchart.png b/doc/source/_static/forward_simple_flowchart.png new file mode 100644 index 00000000..5c0dd4ef Binary files /dev/null and b/doc/source/_static/forward_simple_flowchart.png differ diff --git a/doc/source/asset/eqs_smash_doc.lyx b/doc/source/asset/eqs_smash_doc.lyx new file mode 100644 index 00000000..2464a7e3 --- /dev/null +++ b/doc/source/asset/eqs_smash_doc.lyx @@ -0,0 +1,541 @@ +#LyX 2.3 created this file. For more info see http://www.lyx.org/ +\lyxformat 544 +\begin_document +\begin_header +\save_transient_properties true +\origin unavailable +\textclass article +\begin_preamble +\usepackage{amsmath} +\usepackage{amsfonts} +\end_preamble +\use_default_options true +\maintain_unincluded_children false +\language english +\language_package default +\inputencoding auto +\fontencoding global +\font_roman "default" "default" +\font_sans "default" "default" +\font_typewriter "default" "default" +\font_math "auto" "auto" +\font_default_family default +\use_non_tex_fonts false +\font_sc false +\font_osf false +\font_sf_scale 100 100 +\font_tt_scale 100 100 +\use_microtype false +\use_dash_ligatures true +\graphics default +\default_output_format default +\output_sync 0 +\bibtex_command default +\index_command default +\paperfontsize default +\spacing single +\use_hyperref false +\papersize default +\use_geometry false +\use_package amsmath 1 +\use_package amssymb 1 +\use_package cancel 1 +\use_package esint 1 +\use_package mathdots 1 +\use_package mathtools 1 +\use_package mhchem 1 +\use_package stackrel 1 +\use_package stmaryrd 1 +\use_package undertilde 1 +\cite_engine basic +\cite_engine_type default +\biblio_style plain +\use_bibtopic false +\use_indices false +\paperorientation portrait +\suppress_date false +\justification true +\use_refstyle 1 +\use_minted 0 +\index Index +\shortcut idx +\color #008000 +\end_index +\secnumdepth 3 +\tocdepth 3 +\paragraph_separation indent +\paragraph_indentation default +\is_math_indent 0 +\math_numbering_side default +\quotes_style english +\dynamic_quotes 0 +\papercolumns 1 +\papersides 1 +\paperpagestyle default +\tracking_changes false +\output_changes false +\html_math_output 0 +\html_css_as_file 0 +\html_be_strict false +\end_header + +\begin_body + +\begin_layout Standard + +\series bold +Flowchart graph: Forward problem statement +\end_layout + +\begin_layout Standard +\begin_inset VSpace defskip +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Formula +\[ +\mathcal{M} +\] + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Formula $\boldsymbol{\theta}(x,t)$ +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Formula $\boldsymbol{h}_{0}(x,t)$ +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Formula +\[ +\boldsymbol{D}(x,t) +\] + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Formula +\[ +\boldsymbol{I}(x,t) +\] + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Formula +\[ +\boldsymbol{U}(x,t)=\left[Q,\boldsymbol{h},\boldsymbol{q}\right](x,t) +\] + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset VSpace defskip +\end_inset + + +\end_layout + +\begin_layout Standard + +\series bold +Flowchart graph: Forward operators composition +\end_layout + +\begin_layout Standard +\begin_inset VSpace defskip +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Formula +\[ +q_{snw\rightarrow rr}(x,t) +\] + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Formula +\[ +q_{rr\rightarrow hy}(x,t) +\] + +\end_inset + + +\begin_inset Formula +\[ +\phi_{1} +\] + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Formula +\[ +\phi_{2} +\] + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Formula +\[ +\phi +\] + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Formula +\[ +\boldsymbol{\rho}_{1} +\] + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Formula +\[ +\boldsymbol{\rho}_{2} +\] + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Formula +\[ +f_{\boldsymbol{q}}(x,t) +\] + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset VSpace defskip +\end_inset + + +\end_layout + +\begin_layout Standard + +\series bold +Flowchart graph: Forward model operators detail +\end_layout + +\begin_layout Standard +\begin_inset VSpace defskip +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Formula $\left[S,T_{e}\right](x,t)$ +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Formula +\[ +\mathcal{M}_{snw} +\] + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Formula $m_{lt}(x,t)$ +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Formula $\left[P,E\right](x,t)$ +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Formula +\[ +\mathcal{M}_{rr} +\] + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Formula $\boldsymbol{h}(x,t)$ +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Formula $\boldsymbol{q}(x,t)$ +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Formula $q_{t}(x,t)$ +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Formula $\mathcal{D}_{\Omega}(x)$ +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Formula +\[ +\mathcal{M}_{hy} +\] + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Formula $Q(x,t)$ +\end_inset + + +\begin_inset VSpace defskip +\end_inset + + +\end_layout + +\begin_layout Standard + +\series bold +Others: +\end_layout + +\begin_layout Standard +\begin_inset Formula +\[ +\boldsymbol{U}(x,t)=(Q,\boldsymbol{h},\boldsymbol{q})(x,t)=\mathcal{M}\left(\left[\mathcal{\boldsymbol{I}},\boldsymbol{\mathcal{D}}\right](x,t);\left[\boldsymbol{\theta},\boldsymbol{h}_{0}\right](x)\right) +\] + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset VSpace defskip +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Formula +\[ +\mathcal{M}=\mathcal{M}_{hy}\left(\,.\,,\mathcal{M}_{rr}\left(\,.\,,\mathcal{M}_{snw}\left(.\right)\right)\right) +\] + +\end_inset + + +\begin_inset VSpace defskip +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Formula +\[ +\boldsymbol{U}\left(\boldsymbol{\rho}\right)=\mathcal{M}\left(\,.\,,\boldsymbol{\rho}\right) +\] + +\end_inset + + +\begin_inset Formula +\[ +D_{\rho}\mathcal{M} +\] + +\end_inset + + +\begin_inset Formula +\[ +J\left(\boldsymbol{U}\left(\boldsymbol{\rho}\right),\boldsymbol{Y}^{*}\right) +\] + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Formula +\[ +\nabla_{\boldsymbol{\rho}}J +\] + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Formula +\[ +\boldsymbol{\rho} +\] + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Formula +\[ +\hat{\boldsymbol{\rho}} +\] + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Formula +\[ +\boldsymbol{\rho}^{*} +\] + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset VSpace defskip +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Graphics + filename /home/pagarambois/Documents/Distant/smash/doc/source/_static/forward_simple_flowchart.png + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Graphics + filename forward_composition_flowchart.png + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset VSpace defskip +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Graphics + filename Inversion_process_flowchart.png + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset VSpace defskip +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Graphics + filename /home/pagarambois/Documents/Distant/smash/doc/source/_static/flowchart_forward_gridded.png + +\end_inset + + +\end_layout + +\end_body +\end_document diff --git a/doc/source/asset/smash_graphes_structures.odp b/doc/source/asset/smash_graphes_structures.odp new file mode 100644 index 00000000..ff9ef235 Binary files /dev/null and b/doc/source/asset/smash_graphes_structures.odp differ diff --git a/doc/source/index.rst b/doc/source/index.rst index 1f2e0760..0b01162c 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -11,9 +11,9 @@ smash documentation `smash` is a computational software framework dedicated to **S**\patially distributed **M**\odelling and **AS**\similation for **H**\ydrology, enabling to tackle spatially distributed differentiable hydrological modeling, with learnable parameterization-regionalization. This platform enables to combine vertical and -lateral flow operators, either physically based or hydrid with neural networks, and perform high dimensional -non linear optimization from multi-source data. It is designed to simulate discharge hydrographs and -hydrological states at any spatial location within a basin and reproduce the hydrological response of +lateral flow operators, either process-based conceptual or hydrid with neural networks, and perform high +dimensional non linear optimization from multi-source data. It is designed to simulate discharge hydrographs +and hydrological states at any spatial location within a basin and reproduce the hydrological response of contrasted catchments, both for operational forecasting of floods and low flows, by taking advantage of spatially distributed meteorological forcings, physiographic data and hydrometric observations. diff --git a/doc/source/math_num_documentation/forward_inverse_problem.rst b/doc/source/math_num_documentation/forward_inverse_problem.rst index 8d920e8b..bce94ac8 100644 --- a/doc/source/math_num_documentation/forward_inverse_problem.rst +++ b/doc/source/math_num_documentation/forward_inverse_problem.rst @@ -4,77 +4,105 @@ Forward & Inverse Problems ========================== -This section explains : +This section explains: -- The **forward hydrologic problem statement**, consisting in modeling the spatio-temporal evolution of water states-fluxes within a basin given atmospheric forcings and basin physical descriptors. - -- The **inverse problem statement**, aiming to use spatio-temporal observations of hydrological state-fluxes to estimate uncertain or unknows model parameters. +- The **hydrological modeling problem statement (forward/direct problem)**, that consists in modeling the spatio-temporal evolution of water states-fluxes within a basin/domain given atmospheric forcings and basin physical descriptors. +- The **parameter estimation problem statement (inverse problem)**, that pertains to estimating uncertain or unknows model parameters from the available spatio-temporal observations of hydrological state-fluxes and from basin physical descriptors. + +Forward problem statement +------------------------- + +The forward/direct hydrological modeling problem statement is formulated here. -Forward problem ---------------- +Let :math:`\Omega\subset\mathbb{R}^{2}` denote a 2D spatial domain, :math:`x\in\Omega` the spatial coordinates, and :math:`t\in\left]0,T\right]` the physical time. -Let :math:`\Omega\subset\mathbb{R}^{2}` denote a 2D spatial domain, :math:`x\in\Omega` the spatial coordinate, and :math:`t\in\left]0,T\right]` the physical time. -Hydrological model -****************** +Hydrological model definition +***************************** + +The spatially distributed hydrological model is a dynamic operator :math:`\mathcal{M}` projecting fields of atmospheric forcings :math:`I`, +catchment physical descriptors :math:`\boldsymbol{D}` onto surface discharge :math:`Q`, model states :math:`\boldsymbol{h}`, and internal fluxes :math:`\boldsymbol{q}` such that: -The spatially distributed hydrological model is a dynamic operator :math:`\mathcal{M}` projecting fields of atmospheric forcings :math:`\mathcal{\boldsymbol{I}}`, -catchment physiographic descriptors :math:`\boldsymbol{\mathcal{D}}` onto surface discharge :math:`Q`, model states :math:`\boldsymbol{h}`, and internal fluxes :math:`\boldsymbol{q}` such that: .. math:: :name: math_num_documentation.forward_inverse_problem.forward_problem_M_1 - - \boldsymbol{U}(x,t)=(Q,\boldsymbol{h},\boldsymbol{q})(x,t)=\mathcal{M}\left(\left[\mathcal{\boldsymbol{I}},\boldsymbol{\mathcal{D}}\right](x,t);\left[\boldsymbol{\theta},\boldsymbol{h}_{0}\right](x)\right) + + \boxed{ + \boldsymbol{U}(x,t)=(Q,\boldsymbol{h},\boldsymbol{q})(x,t)=\mathcal{M}\left(\left[\boldsymbol{I},\boldsymbol{D}\right](x,t);\left[\boldsymbol{\theta},\boldsymbol{h}_{0}\right](x)\right) + } with :math:`\boldsymbol{U}(x,t)` the modeled state-flux variables, :math:`\boldsymbol{\theta}` the parameters and :math:`\boldsymbol{h}_{0}` the initial states. -.. note:: The dimensions of model arrays, by denoting :math:`N=N_{x} \times N_{t}` with :math:`N_{x}` the number of cells in :math:`\Omega` and :math:`N_t` the number of simulation time steps in :math:`\left]0,T\right]`, are as follows: - - Surface discharge :math:`Q(x,t)\in\mathbb{R}^{N}` +.. figure:: ../_static/forward_simple_flowchart.png + :align: center + :width: 800 + + Flowchart of the forward modeling problem: input data, forward hydrological model :math:`\mathcal{M}`, simulated quantites. + +.. dropdown:: Sizes of model variables + :animate: fade-in-slide-down + + The sizes of the variables in the forward/direct problem are detailed here. We denote by :math:`N=N_{x} \times N_{t}` with :math:`N_{x}` the number of cells in :math:`\Omega` and :math:`N_t` the number of simulation time steps in :math:`\left]0,T\right]`. + + - Surface discharge :math:`Q(x,t)\in\mathbb{R}^{N}` - States :math:`\boldsymbol{h}=\left(h_{1}(x,t),...,h_{N_{h}}(x,t)\right)\in\mathbb{R}^{N \times {N_{h}}}` with :math:`N_h` the number of distinct state variables - Internal fluxes :math:`\boldsymbol{q}=\left(q_{1}(x,t),...,q_{N_{q}}(x,t)\right)\in\mathbb{R}^{N \times N_{q}}` with :math:`N_q` the number of distinct internal fluxes - - Atmospheric forcings :math:`\mathcal{\boldsymbol{I}}=\left(\mathcal{I}_{1}(x,t),...,\mathcal{I}_{N_{\mathcal{I}}}(x,t)\right)\in\mathbb{R}^{N \times N_{\mathcal{I}}}` with :math:`N_\mathcal{I}` the number of atmospheric forcings types + - Atmospheric forcings :math:`\mathcal{\boldsymbol{I}}=\left(I_{1}(x,t),...,I_{N_{I}}(x,t)\right)\in\mathbb{R}^{N \times N_{I}}` with :math:`N_I` the number of atmospheric forcings types - - Physiographic descriptors :math:`\mathcal{\boldsymbol{D}}=\left(\mathcal{D}_{1}(x,t),...,\mathcal{D}_{N_{\mathcal{D}}}(x,t)\right)\in\mathbb{R}^{N \times N_{\mathcal{D}}}` with :math:`N_{\mathcal{D}}` the number of physical descriptors + - Physiographic descriptors :math:`\mathcal{\boldsymbol{D}}=\left(D_{1}(x,t),...,D_{N_{D}}(x,t)\right)\in\mathbb{R}^{N \times N_{D}}` with :math:`N_{D}` the number of physical descriptors - Parameters :math:`\boldsymbol{\theta}=\left(\theta_{1}(x),...,\theta_{N_{\theta}}(x)\right)\in\mathbb{R}^{N \times N_{\theta}}` with :math:`N_{\theta}` the number of distinct parameters - Initial states :math:`\boldsymbol{h}_{0}=\boldsymbol{h}(x,t=0)` -Operators composition +Operators Composition ********************* -Note that the operator :math:`\mathcal{M}` can be a composite function containing, at least differentiable operators for vertical and lateral transfert processes within each cell :math:`x\in\Omega`, and routing operator from cells to cells following a flow direction map, plus (optionally) deep neural networks enabling learnable process parameterization and learnable conceptual parameters regionalization as described later. +The **forward hydrological model** :math:`\mathcal{M}` is obtained by combining at least two operators: the hydrological operator :math:`\mathcal{M}_{rr}` to simulate runoff from atmospheric forcings and use this runoff to feed a routing operator :math:`\mathcal{M}_{hy}` for cell to cell flow routing. +A snow module :math:`\mathcal{M}_{snw}` can also be added. -Snow, Production and Routing Operators -====================================== +A **learnable mapping** :math:`\phi`, composed of neural networks, can also be included into the forward model to predict parameters and/or fluxes corrections from various input data. -The hydrological model writes +Several differentiable model structures are proposed in `smash` and detailed in :ref:`model strucures section `. + +.. figure:: ../_static/forward_composition_flowchart.png + :align: center + :width: 800 + + Schematic view of operators composition into the forward model :math:`\mathcal{M}`. + + +Hydrological Model Operators +============================ + +The forward hydrological model is obtained by partial composition (each operator taking various other inputs data and paramters) of the flow operators writes: .. math:: :name: math_num_documentation.forward_inverse_problem.forward_problem_Mhy_circ_Mrr - \mathcal{M}=\mathcal{M}_{hy}\circ\mathcal{M}_{rr}\circ\mathcal{M}_{snw} + \mathcal{M}=\mathcal{M}_{hy}\left(\,.\,,\mathcal{M}_{rr}\left(\,.\,,\mathcal{M}_{snw}\left(.\right)\right)\right) -and is composed of the snow module :math:`\mathcal{M}_{snw}` producing a melt flux :math:`m_{lt}(x,t)` inflowing the production module :math:`\mathcal{M}_{rr}` that produces elemental discharge :math:`q_t(x,t)` inflowing a routing module :math:`\mathcal{M}_{hy}`. +with the snow module :math:`\mathcal{M}_{snw}` producing a melt flux :math:`q_{snw\rightarrow rr}(x,t)` feeding the production module :math:`\mathcal{M}_{rr}` that produces runoff flux :math:`q_{rr \rightarrow hy}(x,t)` feeding the routing module :math:`\mathcal{M}_{hy}`. +Models structures are detailed in :ref:`model strucures section `. .. _math_num_documentation.forward_inverse_problem.mapping: Learnable Mapping ================= -The spatio-temporal fields of model parameters and initial states can be constrained with spatialization rules (e.g. spatial patches for control reduction), or even explained by physiographic descriptors :math:`\boldsymbol{\mathcal{D}}`. This can be achieved via an operator :math:`\phi` projecting physical descriptors :math:`\boldsymbol{\mathcal{D}}` onto model conceptual parameters such that +The spatio-temporal fields of model parameters and initial states can be constrained with spatialization rules (e.g. spatial patches for control reduction), or even explained by physiographic descriptors :math:`\boldsymbol{D}`. This can be achieved via an operator :math:`\phi` projecting physical descriptors :math:`\boldsymbol{D}` onto model conceptual parameters such that .. math:: :name: math_num_documentation.forward_inverse_problem.mapping_general - \left(\boldsymbol{\theta}(x),\boldsymbol{h}_{0}(x)\right)=\phi\left(\boldsymbol{\mathcal{D}}(x,t),\boldsymbol{\rho}\right) + \left(\boldsymbol{\theta}(x),\boldsymbol{h}_{0}(x)\right)=\phi\left(\boldsymbol{D}(x,t),\boldsymbol{\rho}\right) with :math:`\boldsymbol{\rho}` the control vector that can be optimized. @@ -83,10 +111,22 @@ Consequently, replacing in :ref:`Eq. 1 `). -These 3 modules are linked in the following way, :math:`\forall x\in\Omega\;,\;\forall t \in]0 .. T]`: +This section explains the `smash` forward modeling paradigm. It details the various models structures available, composed of **differentiable hydrological-hydraulic operators**. -- The ``snow`` module :math:`\mathcal{M}_{snw}` generates a melt flux :math:`m_{lt}(x,t)` which is then summed with the precipitation flux to inflow the ``hydrological`` module :math:`\mathcal{M}_{rr}`. -- The ``hydrological`` module :math:`\mathcal{M}_{rr}` generates an elementary discharge :math:`q_t(x,t)` which is routed by the ``routing`` module :math:`\mathcal{M}_{hy}` to simulate the surface discharge :math:`Q(x,t)`. +In `smash`, :math:`\forall x \in\Omega\;,\;\forall t \in]0 .. T]`, a forward model structure :math:`\mathcal{M}=\mathcal{M}_{hy}\left(\,.\,,\mathcal{M}_{rr}\left(\,.\,,\mathcal{M}_{snw}\left(.\right)\right)\right)` (cf. :ref:`Eq. 2 `) is a combination of 3 models: -In this section, we will detail all the operators in each module for a given cell :math:`x\in\Omega` and a time step :math:`t\in]0 .. T]`. +- The ``snow`` model :math:`\mathcal{M}_{snw}` generating a melt flux :math:`m_{lt}(x,t)` which is then summed with the precipitation flux to feed the ``hydrological`` model :math:`\mathcal{M}_{rr}`. +- The ``hydrological`` production module :math:`\mathcal{M}_{rr}` generating an elementary discharge :math:`q_t(x,t)` which feeds the routing model. +- The ``routing`` model :math:`\mathcal{M}_{hy}` that simulates the routing of discharge :math:`Q(x,t)`. + +.. figure:: ../_static/flowchart_forward_gridded.png + :align: center + :width: 800 + + Schematic view of a gridded model :math:`\mathcal{M}`: input data, model components, simulated quantites. + .. _math_num_documentation.forward_structure.snow_module: diff --git a/doc/source/math_num_documentation/index.rst b/doc/source/math_num_documentation/index.rst index 7d73b913..b0097ac5 100755 --- a/doc/source/math_num_documentation/index.rst +++ b/doc/source/math_num_documentation/index.rst @@ -4,21 +4,22 @@ Math / Num Documentation ======================== -`smash` is a computational software framework dedicated to **S**\patially distributed **M**\odelling and **AS**\similation for **H**\ydrology, -enabling to tackle spatially distributed differentiable hydrological modeling, with learnable parameterization-regionalization. -This platform enables to combine vertical and lateral flow operators, either physically based or hydrid with neural networks, and perform high -dimensional non linear optimization from multi-source data. -It is designed to simulate discharge hydrographs and hydrological states at any spatial location within a basin and reproduce the hydrological -response of contrasted catchments, both for operational forecasting of floods and low flows, by taking advantage of spatially distributed -meteorological forcings, physiographic data and hydrometric observations. +`smash` is a computational software framework dedicated to **S**\patially distributed **M**\odelling and +**AS**\similation for **H**\ydrology, enabling to tackle spatially distributed differentiable hydrological +modeling, with learnable parameterization-regionalization. This platform enables to combine vertical and +lateral flow operators, either process-based conceptual or hydrid with neural networks, and perform high +dimensional non linear optimization from multi-source data. It is designed to simulate discharge hydrographs +and hydrological states at any spatial location within a basin and reproduce the hydrological response of +contrasted catchments, both for operational forecasting of floods and low flows, by taking advantage of +spatially distributed meteorological forcings, physiographic data and hydrometric observations. + `smash` is a modular platform, open source and is designed for collaborative research and operational applications. It is based on a computationally efficient Fortran kernel enabling parallel computations over large domains, and that is automatically differentiable with the Tapenade engine :cite:p:`hascoet2013tapenade` to generate the numerical adjoint model. The adjoint model enables to compute acurately the gradient of a cost function to high dimensional (spatially distributed) parameters and to perform optimization and learning. It is interfaced in Python using f90wrap :cite:p:`Kermode2020-f90wrap` to (``i``) provide a user-friendly and versatile interface for quick learning and efficient development of research and applications, as well as to (``ii``) directly make accessible the wealth of Python modules and libraries developped by a large and active community (Data pre/post-Processing, Geographic Information System, Deep Learning, etc). -This documentation details the mathematical basis of the forward and inverse modeling problems, their numerical resolution along with optimization -and estimation algorithms. +This documentation details the conceptual basis and mathematical basis of the forward and inverse modeling problems, their numerical resolution along with optimization and estimation algorithms. .. toctree:: :maxdepth: 2 diff --git a/doc/source/user_guide/classical_uses/lez_regionalization.rst b/doc/source/user_guide/classical_uses/lez_regionalization.rst index 5b3f38a9..ea6d6bfc 100644 --- a/doc/source/user_guide/classical_uses/lez_regionalization.rst +++ b/doc/source/user_guide/classical_uses/lez_regionalization.rst @@ -65,20 +65,14 @@ Six physical descriptors are considered in this example, which are: .. TODO: Add descriptor explanation -We can open a Python interface in the **conda environment**. The current working directory will be assumed to be the directory where +We can open a Python interface. The current working directory will be assumed to be the directory where the ``Lez-dataset`` is located. -Activate the environment: - -.. code-block:: shell - - conda activate smash - Open a Python interface: .. code-block:: shell - (smash) python3 + python3 .. ipython:: python :suppress: diff --git a/doc/source/user_guide/quickstart/cance_first_simulation.rst b/doc/source/user_guide/quickstart/cance_first_simulation.rst index 1b59e2ee..d8cef7d3 100644 --- a/doc/source/user_guide/quickstart/cance_first_simulation.rst +++ b/doc/source/user_guide/quickstart/cance_first_simulation.rst @@ -111,20 +111,14 @@ the data is the observed discharge in **cubic meter per second** and any negativ It is not necessary to restrict the observed discharge series to the simulation period. It is possible to provide a time series covering a larger time window over which `smash` will only read the lines corresponding to dates after the starting date provided in the header. -Now that a brief tour of the necessary data has been done, we can open a Python interface in the **conda environment**. The current working directory +Now that a brief tour of the necessary data has been done, we can open a Python interface. The current working directory will be assumed to be the directory where the ``Cance-dataset`` is located. -Activate the environment: - -.. code-block:: shell - - conda activate smash - Open a Python interface: .. code-block:: shell - (smash) python3 + python3 .. ipython:: python :suppress: diff --git a/doc/source/user_guide/quickstart/france_large_domain_simulation.rst b/doc/source/user_guide/quickstart/france_large_domain_simulation.rst index 0c73ab09..bcef1c0d 100644 --- a/doc/source/user_guide/quickstart/france_large_domain_simulation.rst +++ b/doc/source/user_guide/quickstart/france_large_domain_simulation.rst @@ -42,20 +42,14 @@ Now a folder called ``France-dataset`` should be accessible and contain the foll In this dataset, there are no gauge attributes or observed discharge, we are only interested in performing a forward run on a domain without optimization beforehand. -We can open a Python interface in the **conda environment**. The current working directory will be assumed to be the directory where +We can open a Python interface. The current working directory will be assumed to be the directory where the ``France-dataset`` is located. -Activate the environment: - -.. code-block:: shell - - conda activate smash - Open a Python interface: .. code-block:: shell - (smash) python3 + python3 .. ipython:: python :suppress: diff --git a/doc/source/user_guide/quickstart/lez_split_sample_test.rst b/doc/source/user_guide/quickstart/lez_split_sample_test.rst index ca968144..7ac6cf54 100644 --- a/doc/source/user_guide/quickstart/lez_split_sample_test.rst +++ b/doc/source/user_guide/quickstart/lez_split_sample_test.rst @@ -46,20 +46,14 @@ Now a folder called ``Lez-dataset`` should be accessible and contain the followi - ``descriptor`` A directory containing physiographic descriptors in GeoTiff format. -We can open a Python interface in the **conda environment**. The current working directory will be assumed to be the directory where +We can open a Python interface. The current working directory will be assumed to be the directory where the ``Lez-dataset`` is located. -Activate the environment: - -.. code-block:: shell - - conda activate smash - Open a Python interface: .. code-block:: shell - (smash) python3 + python3 .. ipython:: python :suppress: diff --git a/smash/fcore/derived_type/mwd_setup.f90 b/smash/fcore/derived_type/mwd_setup.f90 index a141fdd4..a6a797f5 100644 --- a/smash/fcore/derived_type/mwd_setup.f90 +++ b/smash/fcore/derived_type/mwd_setup.f90 @@ -92,30 +92,30 @@ module mwd_setup logical :: compute_mean_atmos = .true. logical :: read_qobs = .false. - character(lchar) :: qobs_directory = "..." !$F90W char + character(2*lchar) :: qobs_directory = "..." !$F90W char logical :: read_prcp = .false. character(lchar) :: prcp_format = "..." !$F90W char real(sp) :: prcp_conversion_factor = 1._sp - character(lchar) :: prcp_directory = "..." !$F90W char + character(2*lchar) :: prcp_directory = "..." !$F90W char character(lchar) :: prcp_access = "..." !$F90W char logical :: read_pet = .false. character(lchar) :: pet_format = "..." !$F90W char real(sp) :: pet_conversion_factor = 1._sp - character(lchar) :: pet_directory = "..." !$F90W char + character(2*lchar) :: pet_directory = "..." !$F90W char character(lchar) :: pet_access = "..." !$F90W char logical :: daily_interannual_pet = .false. logical :: read_snow = .false. character(lchar) :: snow_format = "..." !$F90W char real(sp) :: snow_conversion_factor = 1._sp - character(lchar) :: snow_directory = "..." !$F90W char + character(2*lchar) :: snow_directory = "..." !$F90W char character(lchar) :: snow_access = "..." !$F90W char logical :: read_temp = .false. character(lchar) :: temp_format = "..." !$F90W char - character(lchar) :: temp_directory = "..." !$F90W char + character(2*lchar) :: temp_directory = "..." !$F90W char character(lchar) :: temp_access = "..." !$F90W char logical :: prcp_partitioning = .false. @@ -124,7 +124,7 @@ module mwd_setup logical :: read_descriptor = .false. character(lchar) :: descriptor_format = "..." !$F90W char - character(lchar) :: descriptor_directory = "..." !$F90W char + character(2*lchar) :: descriptor_directory = "..." !$F90W char character(lchar), allocatable, dimension(:) :: descriptor_name !$F90W char-array ! Post processed variables diff --git a/smash/fcore/forward/forward_db.f90 b/smash/fcore/forward/forward_db.f90 index 12e7c2a6..0c134482 100644 --- a/smash/fcore/forward/forward_db.f90 +++ b/smash/fcore/forward/forward_db.f90 @@ -16726,6 +16726,10 @@ SUBROUTINE SIMULATION_CHECKPOINT_D(setup, mesh, input_data, parameters& DO t=start_time_step,end_time_step rr_parameters_inc = 0 rr_states_inc = 0 +! % Roll discharge buffer. Depending on the routing module, it is sometimes necessary to store +! % more than one discharge time step. Instead of storing all the time steps, we allocate an array +! % whose depth is equal to the depth of the time dependency, and then at each time step, we +! % overwrite the oldest time step by rolling the array. CALL ROLL_DISCHARGE_D(checkpoint_variable%ac_qtz, & & checkpoint_variable_d%ac_qtz) CALL ROLL_DISCHARGE_D(checkpoint_variable%ac_qz, & @@ -17069,6 +17073,10 @@ SUBROUTINE SIMULATION_CHECKPOINT_B(setup, mesh, input_data, parameters& rr_parameters_inc = 0 CALL PUSHINTEGER4(rr_states_inc) rr_states_inc = 0 +! % Roll discharge buffer. Depending on the routing module, it is sometimes necessary to store +! % more than one discharge time step. Instead of storing all the time steps, we allocate an array +! % whose depth is equal to the depth of the time dependency, and then at each time step, we +! % overwrite the oldest time step by rolling the array. CALL PUSHREAL4ARRAY(checkpoint_variable%ac_qtz, SIZE(& & checkpoint_variable%ac_qtz, 1)*SIZE(& & checkpoint_variable%ac_qtz, 2)) @@ -17669,6 +17677,10 @@ SUBROUTINE SIMULATION_CHECKPOINT(setup, mesh, input_data, parameters, & DO t=start_time_step,end_time_step rr_parameters_inc = 0 rr_states_inc = 0 +! % Roll discharge buffer. Depending on the routing module, it is sometimes necessary to store +! % more than one discharge time step. Instead of storing all the time steps, we allocate an array +! % whose depth is equal to the depth of the time dependency, and then at each time step, we +! % overwrite the oldest time step by rolling the array. CALL ROLL_DISCHARGE(checkpoint_variable%ac_qtz) CALL ROLL_DISCHARGE(checkpoint_variable%ac_qz) ! Snow module @@ -17910,7 +17922,7 @@ SUBROUTINE SIMULATION_D(setup, mesh, input_data, parameters, & INTRINSIC INT REAL(sp) :: arg1 REAL(sp) :: result1 -! % We use checkpoint to reduce the maximum memory usage of the adjoint model. +! % We use checkpoints to reduce the maximum memory usage of the adjoint model. ! % Without checkpoints, the maximum memory required is equal to K * T, where K in [0, +inf] is the ! % memory used at each time step and T in [1, +inf] the total number of time steps. ! % With checkpoints, the maximum memory required is equal to (K * C) + (K * T/C), where C in [1, T] @@ -17999,7 +18011,7 @@ SUBROUTINE SIMULATION_B(setup, mesh, input_data, parameters, & INTRINSIC INT REAL(sp) :: arg1 REAL(sp) :: result1 -! % We use checkpoint to reduce the maximum memory usage of the adjoint model. +! % We use checkpoints to reduce the maximum memory usage of the adjoint model. ! % Without checkpoints, the maximum memory required is equal to K * T, where K in [0, +inf] is the ! % memory used at each time step and T in [1, +inf] the total number of time steps. ! % With checkpoints, the maximum memory required is equal to (K * C) + (K * T/C), where C in [1, T] @@ -18130,7 +18142,7 @@ SUBROUTINE SIMULATION(setup, mesh, input_data, parameters, output, & INTRINSIC INT REAL(sp) :: arg1 REAL(sp) :: result1 -! % We use checkpoint to reduce the maximum memory usage of the adjoint model. +! % We use checkpoints to reduce the maximum memory usage of the adjoint model. ! % Without checkpoints, the maximum memory required is equal to K * T, where K in [0, +inf] is the ! % memory used at each time step and T in [1, +inf] the total number of time steps. ! % With checkpoints, the maximum memory required is equal to (K * C) + (K * T/C), where C in [1, T] diff --git a/smash/fcore/forward/forward_openmp_db.f90 b/smash/fcore/forward/forward_openmp_db.f90 index 304c0bec..67b5f90a 100644 --- a/smash/fcore/forward/forward_openmp_db.f90 +++ b/smash/fcore/forward/forward_openmp_db.f90 @@ -17140,6 +17140,10 @@ SUBROUTINE SIMULATION_CHECKPOINT_D(setup, mesh, input_data, parameters& DO t=start_time_step,end_time_step rr_parameters_inc = 0 rr_states_inc = 0 +! % Roll discharge buffer. Depending on the routing module, it is sometimes necessary to store +! % more than one discharge time step. Instead of storing all the time steps, we allocate an array +! % whose depth is equal to the depth of the time dependency, and then at each time step, we +! % overwrite the oldest time step by rolling the array. CALL ROLL_DISCHARGE_D(checkpoint_variable%ac_qtz, & & checkpoint_variable_d%ac_qtz) CALL ROLL_DISCHARGE_D(checkpoint_variable%ac_qz, & @@ -17483,6 +17487,10 @@ SUBROUTINE SIMULATION_CHECKPOINT_B(setup, mesh, input_data, parameters& rr_parameters_inc = 0 CALL PUSHINTEGER4(rr_states_inc) rr_states_inc = 0 +! % Roll discharge buffer. Depending on the routing module, it is sometimes necessary to store +! % more than one discharge time step. Instead of storing all the time steps, we allocate an array +! % whose depth is equal to the depth of the time dependency, and then at each time step, we +! % overwrite the oldest time step by rolling the array. CALL PUSHREAL4ARRAY(checkpoint_variable%ac_qtz, SIZE(& & checkpoint_variable%ac_qtz, 1)*SIZE(& & checkpoint_variable%ac_qtz, 2)) @@ -18083,6 +18091,10 @@ SUBROUTINE SIMULATION_CHECKPOINT(setup, mesh, input_data, parameters, & DO t=start_time_step,end_time_step rr_parameters_inc = 0 rr_states_inc = 0 +! % Roll discharge buffer. Depending on the routing module, it is sometimes necessary to store +! % more than one discharge time step. Instead of storing all the time steps, we allocate an array +! % whose depth is equal to the depth of the time dependency, and then at each time step, we +! % overwrite the oldest time step by rolling the array. CALL ROLL_DISCHARGE(checkpoint_variable%ac_qtz) CALL ROLL_DISCHARGE(checkpoint_variable%ac_qz) ! Snow module @@ -18324,7 +18336,7 @@ SUBROUTINE SIMULATION_D(setup, mesh, input_data, parameters, & INTRINSIC INT REAL(sp) :: arg1 REAL(sp) :: result1 -! % We use checkpoint to reduce the maximum memory usage of the adjoint model. +! % We use checkpoints to reduce the maximum memory usage of the adjoint model. ! % Without checkpoints, the maximum memory required is equal to K * T, where K in [0, +inf] is the ! % memory used at each time step and T in [1, +inf] the total number of time steps. ! % With checkpoints, the maximum memory required is equal to (K * C) + (K * T/C), where C in [1, T] @@ -18413,7 +18425,7 @@ SUBROUTINE SIMULATION_B(setup, mesh, input_data, parameters, & INTRINSIC INT REAL(sp) :: arg1 REAL(sp) :: result1 -! % We use checkpoint to reduce the maximum memory usage of the adjoint model. +! % We use checkpoints to reduce the maximum memory usage of the adjoint model. ! % Without checkpoints, the maximum memory required is equal to K * T, where K in [0, +inf] is the ! % memory used at each time step and T in [1, +inf] the total number of time steps. ! % With checkpoints, the maximum memory required is equal to (K * C) + (K * T/C), where C in [1, T] @@ -18544,7 +18556,7 @@ SUBROUTINE SIMULATION(setup, mesh, input_data, parameters, output, & INTRINSIC INT REAL(sp) :: arg1 REAL(sp) :: result1 -! % We use checkpoint to reduce the maximum memory usage of the adjoint model. +! % We use checkpoints to reduce the maximum memory usage of the adjoint model. ! % Without checkpoints, the maximum memory required is equal to K * T, where K in [0, +inf] is the ! % memory used at each time step and T in [1, +inf] the total number of time steps. ! % With checkpoints, the maximum memory required is equal to (K * C) + (K * T/C), where C in [1, T] diff --git a/smash/fcore/forward/md_simulation.f90 b/smash/fcore/forward/md_simulation.f90 index 84cf5be6..9a4f5a1c 100644 --- a/smash/fcore/forward/md_simulation.f90 +++ b/smash/fcore/forward/md_simulation.f90 @@ -124,6 +124,10 @@ subroutine simulation_checkpoint(setup, mesh, input_data, parameters, output, op rr_parameters_inc = 0 rr_states_inc = 0 + ! % Roll discharge buffer. Depending on the routing module, it is sometimes necessary to store + ! % more than one discharge time step. Instead of storing all the time steps, we allocate an array + ! % whose depth is equal to the depth of the time dependency, and then at each time step, we + ! % overwrite the oldest time step by rolling the array. call roll_discharge(checkpoint_variable%ac_qtz) call roll_discharge(checkpoint_variable%ac_qz) @@ -388,7 +392,7 @@ subroutine simulation(setup, mesh, input_data, parameters, output, options, retu integer :: ncheckpoint, checkpoint_size, i, start_time_step, end_time_step type(Checkpoint_VariableDT) :: checkpoint_variable - ! % We use checkpoint to reduce the maximum memory usage of the adjoint model. + ! % We use checkpoints to reduce the maximum memory usage of the adjoint model. ! % Without checkpoints, the maximum memory required is equal to K * T, where K in [0, +inf] is the ! % memory used at each time step and T in [1, +inf] the total number of time steps. ! % With checkpoints, the maximum memory required is equal to (K * C) + (K * T/C), where C in [1, T] diff --git a/smash/tests/gen_baseline.py b/smash/tests/generate_baseline.py similarity index 79% rename from smash/tests/gen_baseline.py rename to smash/tests/generate_baseline.py index 7181c4d4..6aedfe91 100644 --- a/smash/tests/gen_baseline.py +++ b/smash/tests/generate_baseline.py @@ -15,6 +15,10 @@ import smash from smash._constant import STRUCTURE +sys.path.insert(0, "") +# Change current directory to smash/smash +os.chdir(os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir)) + def parser(): parser = argparse.ArgumentParser() @@ -56,9 +60,9 @@ def adjust_module_names(module_names: list[str]) -> list[str]: pattern = re.compile("|".join(rep.keys())) - ret = ["smash.tests." + pattern.sub(lambda m: rep[re.escape(m.group(0))], name) for name in module_names] + ret = [pattern.sub(lambda m: rep[re.escape(m.group(0))], name) for name in module_names] - ret.remove("smash.tests.test_define_global_vars") + ret.remove("tests.test_define_global_vars") return ret @@ -128,9 +132,9 @@ def compare_baseline(f: h5py.File, new_f: h5py.File): df["TEST NAME" + (max_len_name - 8) * " "] = test_name df["STATUS"] = status - df.to_csv("diff_baseline.csv", sep="|", index=False) + df.to_csv("tests/diff_baseline.csv", sep="|", index=False) - os.system('echo "$(git show --no-patch)\n\n$(cat diff_baseline.csv)" > diff_baseline.csv') + os.system('echo "$(git show --no-patch)\n\n$(cat tests/diff_baseline.csv)" > tests/diff_baseline.csv') if __name__ == "__main__": @@ -152,6 +156,9 @@ def compare_baseline(f: h5py.File, new_f: h5py.File): model = smash.Model(setup, mesh) + # Do not need to read prcp and pet again + setup["read_prcp"] = False + setup["read_pet"] = False model_structure = [] for structure in STRUCTURE: @@ -160,19 +167,23 @@ def compare_baseline(f: h5py.File, new_f: h5py.File): setup["hydrological_module"], setup["routing_module"], ) = structure.split("-") - model_structure.append(smash.Model(setup, mesh)) - - # % Enable stdout + wmodel = smash.Model(setup, mesh) + wmodel.atmos_data.prcp = model.atmos_data.prcp + wmodel.atmos_data.pet = model.atmos_data.pet + if "ci" in wmodel.rr_parameters.keys: + wmodel.set_rr_parameters("ci", model.get_rr_parameters("ci")) + model_structure.append(wmodel) + + # # % Enable stdout sys.stdout = sys.__stdout__ - - module_names = sorted(glob.glob("**/test_*.py", recursive=True)) + module_names = sorted(glob.glob("tests/**/test_*.py", recursive=True)) module_names = adjust_module_names(module_names) - if os.path.exists("new_baseline.hdf5"): - os.remove("new_baseline.hdf5") + if os.path.exists("tests/new_baseline.hdf5"): + os.remove("tests/new_baseline.hdf5") - with h5py.File("new_baseline.hdf5", "w") as f: + with h5py.File("tests/new_baseline.hdf5", "w") as f: for mn in module_names: print(mn, end=" ") module = importlib.import_module(mn) @@ -201,8 +212,8 @@ def compare_baseline(f: h5py.File, new_f: h5py.File): print(".", end="", flush=True) print("") - baseline = h5py.File("baseline.hdf5") - new_baseline = h5py.File("new_baseline.hdf5") + baseline = h5py.File("tests/baseline.hdf5") + new_baseline = h5py.File("tests/new_baseline.hdf5") compare_baseline(baseline, new_baseline)