From 528ce9a14e71e5def187a21863369304ba9961e2 Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Wed, 5 Apr 2023 10:19:36 +0200 Subject: [PATCH 01/73] =?UTF-8?q?Ajout=20d'une=20fonciton=20pour=20=C3=A9c?= =?UTF-8?q?rire=20et=20lire=20un=20fichier=20hdf5=20contenant=20une=20mult?= =?UTF-8?q?itude=20d'objet=20model.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- smash/__init__.py | 3 + smash/io/multi_model_io.py | 389 +++++++++++++++++++++++++++++++++++++ 2 files changed, 392 insertions(+) create mode 100644 smash/io/multi_model_io.py diff --git a/smash/__init__.py b/smash/__init__.py index 6d2aa3b4..b0b7facc 100644 --- a/smash/__init__.py +++ b/smash/__init__.py @@ -12,6 +12,7 @@ from smash.io.mesh_io import save_mesh, read_mesh from smash.io.model_io import save_model, read_model from smash.io.model_ddt_io import save_model_ddt, read_model_ddt +from smash.io.multi_model_io import save_multi_model, read_multi_model from smash.dataset.load import load_dataset @@ -41,6 +42,8 @@ def __getattr__(name): "read_model", "save_model_ddt", "read_model_ddt", + "save_multi_model", + "read_multi_model", "load_dataset", ] diff --git a/smash/io/multi_model_io.py b/smash/io/multi_model_io.py new file mode 100644 index 00000000..7f7e3b93 --- /dev/null +++ b/smash/io/multi_model_io.py @@ -0,0 +1,389 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from smash.core.model import Model + +from smash.core._constant import STRUCTURE_PARAMETERS, STRUCTURE_STATES + +from smash.io._error import ReadHDF5MethodError + +import os +import errno +import warnings +import h5py +import numpy as np + +__all__ = ["save_multi_model", "read_multi_model"] + + +def _default_save_data(structure: str): + return { + "setup": ["dt", "end_time", "start_time", "structure"], + "mesh": ["active_cell", "area", "code", "dx", "flwdir"], + "input_data": ["mean_prcp", "mean_pet", "qobs"], + "parameters": STRUCTURE_PARAMETERS[ + structure + ], # only calibrated Model param will be stored + "states": STRUCTURE_STATES[ + structure + ], # only initial Model states will be stored + "output": [ + { + "fstates": STRUCTURE_STATES[structure] + }, # only final Model states will be stored + "qsim", + "lcurve", + ], + } + + +def _parse_selected_derived_type_to_hdf5( + derived_type, list_attr, hdf5_ins, attr_suffix="" +): + # TODO: clean function for attr_suffix + + for attr in list_attr: + if isinstance(attr, str): + try: + value = getattr(derived_type, attr) + + attr += attr_suffix + + if isinstance(value, np.ndarray): + if value.dtype == "object" or value.dtype.char == "U": + value = value.astype("S") + + hdf5_ins.create_dataset( + attr, + shape=value.shape, + dtype=value.dtype, + data=value, + compression="gzip", + chunks=True, + ) + + else: + hdf5_ins.attrs[attr] = value + + except: + pass + + elif isinstance(attr, dict): + for derived_type_key, list_attr_imd in attr.items(): + try: + derived_type_imd = getattr(derived_type, derived_type_key) + + _parse_selected_derived_type_to_hdf5( + derived_type_imd, list_attr_imd, hdf5_ins + ) + + except: + pass + + +def save_multi_model(model: Model, path: str, group=None, sub_data=None, sub_only=False, replace=False): + """ + Save some derived data types of the Model object. + + This method is considerably lighter than `smash.save_model` method that saves the entire Model object. + However, it is not capable of reconstructing the Model object from the saved data file. + + By default, the following data are stored into the `HDF5 `__ file: + + - ``dt``, ``end_time``, ``start_time``, ``structure`` from `Model.setup` + - ``active_cell``, ``area``, ``code``, ``dx``, ``flwdir`` from `Model.mesh` + - ``mean_prcp``, ``mean_pet``, ``qobs`` from `Model.input_data` + - ``qsim`` from `Model.output` + - The final Model states (depending upon the Model structure) from state derived type of `Model.output` + - The initial Model states (depending upon the Model structure) from `Model.states` + - The Model parameters (depending upon the Model structure) from `Model.parameters` + + Subsidiary data can be added by filling in ``sub_data``. + + Parameters + ---------- + model : Model + The Model object to save derived data types as a HDF5 file. + + path : str + The file path. If the path not end with ``.hdf5``, the extension is automatically added to the file path. + + group : str + subgroup name to group data in the hdf5 file. + + .. note:: + If not given, no subgroub is created and data are stored at the roots. + + sub_data : dict or None, default None + Dictionary which indicates the subsidiary data to store into the HDF5 file. + + .. note:: + If not given, no subsidiary data is saved + + sub_only : bool, default False + Allow to only store subsidiary data. + + replace : bool, default False + replace or not an existing hdf5 file + + See Also + -------- + read_model_ddt: Read derived data types of the Model object from HDF5 file. + Model: Primary data structure of the hydrological model `smash`. + + Examples + -------- + >>> setup, mesh = smash.load_dataset("cance") + >>> model = smash.Model(setup, mesh) + >>> model + Structure: 'gr-a' + Spatio-Temporal dimension: (x: 28, y: 28, time: 1440) + Last update: Initialization + + Save spatially distributed precipitation in addition to default derived data types of Model + + >>> smash.save_model_ddt(model, "model_ddt.hdf5", sub_data={"prcp": model.input_data.prcp}) + + """ + + if not path.endswith(".hdf5"): + path = path + ".hdf5" + + if replace==True: + f= h5py.File(path, "w") + else: + f= h5py.File(path, "a") + + if group is not None: + groupe_name=os.path.basename(group) + groupe_path=os.path.dirname(group) + grp=f.create_group(group) + else: + grp=f + + if not sub_only: + save_data = _default_save_data(model.setup.structure) + + for derived_type_key, list_attr in save_data.items(): + derived_type = getattr(model, derived_type_key) + + if derived_type_key == "states": + _parse_selected_derived_type_to_hdf5( + derived_type, list_attr, grp, attr_suffix="_0" + ) + + else: + _parse_selected_derived_type_to_hdf5(derived_type, list_attr, grp) + + if sub_data is not None: + for attr, value in sub_data.items(): + if (attr in grp) or (attr in grp.attrs): + warnings.warn(f"Ignore updating existing key ({attr})") + + continue + + if isinstance(value, np.ndarray): + if value.dtype == "object" or value.dtype.char == "U": + value = value.astype("S") + + try: + grp.create_dataset( + attr, + shape=value.shape, + dtype=value.dtype, + data=value, + compression="gzip", + chunks=True, + ) + except: + warnings.warn(f"Can not store to HDF5: {attr}") + + else: + try: + grp.attrs[attr] = value + + except: + warnings.warn(f"Can not store to HDF5: {attr}") + + if group is not None: + grp.attrs["_save_func"] = "save_model_ddt" + f[groupe_path].attrs["_save_func"] = "save_multi_model" + else: + grp.attrs["_save_func"] = "save_model_ddt" + + + +# ~ elif isinstance(value, dict): + +# ~ for subkey,subvalue in value.items(): + + # ~ if isinstance(subvalue, np.ndarray): + + # ~ if subvalue.dtype == "object" or subvalue.dtype.char == "U": + # ~ subvalue = subvalue.astype("S") + + # ~ try: + # ~ grp.create_dataset( + # ~ subkey, + # ~ shape=subvalue.shape, + # ~ dtype=subvalue.dtype, + # ~ data=subvalue, + # ~ compression="gzip", + # ~ chunks=True, + # ~ ) + # ~ except: + # ~ warnings.warn(f"Can not store to HDF5: {subkey}") + + # ~ else: + + # ~ try: + # ~ grp.attrs[subkey] = subvalue + # ~ except: + # ~ warnings.warn(f"Can not store to HDF5: {subkey}") + + + +def read_multi_model(path: str, group=None) -> dict: + """ + Read derived data types of the Model object from HDF5 file. + + Parameters + ---------- + path : str + The file path. + + Returns + ------- + data : dict + A dictionary with derived data types loaded from HDF5 file. + + Raises + ------ + FileNotFoundError: + If file not found. + ReadHDF5MethodError: + If file not created with `save_model_ddt`. + + See Also + -------- + save_model_ddt: Save some derived data types of the Model object. + + Examples + -------- + >>> setup, mesh = smash.load_dataset("cance") + >>> model = smash.Model(setup, mesh) + >>> smash.save_model_ddt(model, "model_ddt.hdf5") + + Read the derived data types from HDF5 file + + >>> data = smash.read_multi_model("model_ddt.hdf5") + + Then, to see the dataset keys + + >>> data.keys() + dict_keys(['active_cell', 'area', 'cft', 'code', 'cp', 'exc', 'flwdir', + 'hft', 'hft_0', 'hlr', 'hlr_0', 'hp', 'hp_0', 'lr', 'mean_pet', 'mean_prcp', + 'qobs', 'qsim', 'dt', 'dx', 'end_time', 'start_time', 'structure']) + + And finally, to access to derived data + + >>> data["mean_prcp"] + array([[0., 0., 0., ..., 0., 0., 0.], + [0., 0., 0., ..., 0., 0., 0.], + [0., 0., 0., ..., 0., 0., 0.]], dtype=float32) + + """ + + if os.path.isfile(path): + with h5py.File(path) as f: + + #recursive function to convert hdf5 to dict ? + #res={} + #res=read_hdf5(f,res) + + if group is not None: + + if group in list(f.keys()): + grp=f[group] + else: + raise ReadHDF5MethodError( + f"Unable to acces to group '{group}' in hdf5 '{path}', '{group}' group does not exist." + ) + else: + grp=f + + if grp.attrs.get("_save_func") == "save_multi_model": + group_dict={} + + for name,group in grp.items(): + + keys = list(group.keys()) + + values = [ + group[key][:].astype("U") if group[key][:].dtype.char == "S" else group[key][:] + for key in keys + ] + + attr_keys = list(group.attrs.keys()) + + attr_keys.remove("_save_func") + + attr_values = [group.attrs[key] for key in attr_keys] + + group_dict.update({name:dict(zip(keys + attr_keys, values + attr_values))}) + + return group_dict + + elif grp.attrs.get("_save_func") == "save_model_ddt": + keys = list(grp.keys()) + + values = [ + grp[key][:].astype("U") if grp[key][:].dtype.char == "S" else grp[key][:] + for key in keys + ] + + attr_keys = list(grp.attrs.keys()) + + attr_keys.remove("_save_func") + + attr_values = [grp.attrs[key] for key in attr_keys] + + return dict(zip(keys + attr_keys, values + attr_values)) + + else: + raise ReadHDF5MethodError( + f"Unable to read '{path}' with 'read_model_group' method. The file may not have been created with 'read_model_group' method." + ) + + else: + raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), path) + + + +def read_hdf5(h,res): + + for name,group in h.items(): + + if group.attrs.get("_save_func") == "save_model_ddt": + keys = list(group.keys()) + + values = [ + group[key][:].astype("U") if group[key][:].dtype.char == "S" else group[key][:] + for key in keys + ] + + attr_keys = list(group.attrs.keys()) + + attr_keys.remove("_save_func") + + attr_values = [group.attrs[key] for key in attr_keys] + + res.update({name:dict(zip(keys + attr_keys, values + attr_values))}) + + return res + else: + res.update({name:{}}) + + read_hdf5(group,res) From 5ba8da82e620cc84464abb3cc28598f697b54f14 Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Wed, 5 Apr 2023 16:33:23 +0200 Subject: [PATCH 02/73] Rename variables and update functions comment --- smash/io/multi_model_io.py | 128 +++++++++++-------------------------- 1 file changed, 37 insertions(+), 91 deletions(-) diff --git a/smash/io/multi_model_io.py b/smash/io/multi_model_io.py index 7f7e3b93..f407203c 100644 --- a/smash/io/multi_model_io.py +++ b/smash/io/multi_model_io.py @@ -110,11 +110,11 @@ def save_multi_model(model: Model, path: str, group=None, sub_data=None, sub_onl path : str The file path. If the path not end with ``.hdf5``, the extension is automatically added to the file path. - group : str - subgroup name to group data in the hdf5 file. + location : str + location, absolute path, to store the data in the hdf5 file. .. note:: - If not given, no subgroub is created and data are stored at the roots. + If not given, the data are stored at the root of the hdf5 file. sub_data : dict or None, default None Dictionary which indicates the subsidiary data to store into the HDF5 file. @@ -145,6 +145,7 @@ def save_multi_model(model: Model, path: str, group=None, sub_data=None, sub_onl Save spatially distributed precipitation in addition to default derived data types of Model >>> smash.save_model_ddt(model, "model_ddt.hdf5", sub_data={"prcp": model.input_data.prcp}) + >>> smash.save_multi_model(model,"multi_model_ddt.hdf5", location="rainfall", sub_data={"prcp": model.input_data.prcp}) """ @@ -156,10 +157,10 @@ def save_multi_model(model: Model, path: str, group=None, sub_data=None, sub_onl else: f= h5py.File(path, "a") - if group is not None: - groupe_name=os.path.basename(group) - groupe_path=os.path.dirname(group) - grp=f.create_group(group) + if location is not None: + #loc_name=os.path.basename(location) + loc_path=os.path.dirname(location) + grp=f.create_group(location) else: grp=f @@ -207,45 +208,15 @@ def save_multi_model(model: Model, path: str, group=None, sub_data=None, sub_onl except: warnings.warn(f"Can not store to HDF5: {attr}") - if group is not None: + if location is not None: grp.attrs["_save_func"] = "save_model_ddt" - f[groupe_path].attrs["_save_func"] = "save_multi_model" + f[loc_path].attrs["_save_func"] = "save_multi_model" else: grp.attrs["_save_func"] = "save_model_ddt" - - - -# ~ elif isinstance(value, dict): - -# ~ for subkey,subvalue in value.items(): - - # ~ if isinstance(subvalue, np.ndarray): - - # ~ if subvalue.dtype == "object" or subvalue.dtype.char == "U": - # ~ subvalue = subvalue.astype("S") - - # ~ try: - # ~ grp.create_dataset( - # ~ subkey, - # ~ shape=subvalue.shape, - # ~ dtype=subvalue.dtype, - # ~ data=subvalue, - # ~ compression="gzip", - # ~ chunks=True, - # ~ ) - # ~ except: - # ~ warnings.warn(f"Can not store to HDF5: {subkey}") - - # ~ else: - - # ~ try: - # ~ grp.attrs[subkey] = subvalue - # ~ except: - # ~ warnings.warn(f"Can not store to HDF5: {subkey}") -def read_multi_model(path: str, group=None) -> dict: +def read_multi_model(path: str, location=None) -> dict: """ Read derived data types of the Model object from HDF5 file. @@ -254,6 +225,12 @@ def read_multi_model(path: str, group=None) -> dict: path : str The file path. + location : str + location, absolute path where to read the data in the hdf5 file. + + .. note:: + If not given, the data are read from the root of the hdf5 file. + Returns ------- data : dict @@ -274,11 +251,11 @@ def read_multi_model(path: str, group=None) -> dict: -------- >>> setup, mesh = smash.load_dataset("cance") >>> model = smash.Model(setup, mesh) - >>> smash.save_model_ddt(model, "model_ddt.hdf5") + >>> smash.save_multi_model(model,"multi_model_ddt.hdf5", location="rainfall", sub_data={"prcp": model.input_data.prcp}) Read the derived data types from HDF5 file - >>> data = smash.read_multi_model("model_ddt.hdf5") + >>> data = smash.read_multi_model("multi_model_ddt.hdf5",location="forecast") Then, to see the dataset keys @@ -299,52 +276,48 @@ def read_multi_model(path: str, group=None) -> dict: if os.path.isfile(path): with h5py.File(path) as f: - #recursive function to convert hdf5 to dict ? - #res={} - #res=read_hdf5(f,res) - - if group is not None: + if location is not None: - if group in list(f.keys()): - grp=f[group] + if location in list(f.keys()): + loc=f[location] else: raise ReadHDF5MethodError( - f"Unable to acces to group '{group}' in hdf5 '{path}', '{group}' group does not exist." + f"Unable to acces to group '{location}' in hdf5 '{path}', '{location}' group does not exist." ) else: - grp=f + loc=f - if grp.attrs.get("_save_func") == "save_multi_model": - group_dict={} + if loc.attrs.get("_save_func") == "save_multi_model": + results={} - for name,group in grp.items(): + for name,data in loc.items(): - keys = list(group.keys()) + keys = list(data.keys()) values = [ - group[key][:].astype("U") if group[key][:].dtype.char == "S" else group[key][:] + data[key][:].astype("U") if data[key][:].dtype.char == "S" else data[key][:] for key in keys ] - attr_keys = list(group.attrs.keys()) + attr_keys = list(data.attrs.keys()) attr_keys.remove("_save_func") - attr_values = [group.attrs[key] for key in attr_keys] + attr_values = [data.attrs[key] for key in attr_keys] - group_dict.update({name:dict(zip(keys + attr_keys, values + attr_values))}) + results.update({name:dict(zip(keys + attr_keys, values + attr_values))}) - return group_dict + return results - elif grp.attrs.get("_save_func") == "save_model_ddt": - keys = list(grp.keys()) + elif loc.attrs.get("_save_func") == "save_model_ddt": + keys = list(loc.keys()) values = [ - grp[key][:].astype("U") if grp[key][:].dtype.char == "S" else grp[key][:] + loc[key][:].astype("U") if loc[key][:].dtype.char == "S" else loc[key][:] for key in keys ] - attr_keys = list(grp.attrs.keys()) + attr_keys = list(loc.attrs.keys()) attr_keys.remove("_save_func") @@ -360,30 +333,3 @@ def read_multi_model(path: str, group=None) -> dict: else: raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), path) - - -def read_hdf5(h,res): - - for name,group in h.items(): - - if group.attrs.get("_save_func") == "save_model_ddt": - keys = list(group.keys()) - - values = [ - group[key][:].astype("U") if group[key][:].dtype.char == "S" else group[key][:] - for key in keys - ] - - attr_keys = list(group.attrs.keys()) - - attr_keys.remove("_save_func") - - attr_values = [group.attrs[key] for key in attr_keys] - - res.update({name:dict(zip(keys + attr_keys, values + attr_values))}) - - return res - else: - res.update({name:{}}) - - read_hdf5(group,res) From eaaf9f3ce63c22900910aaed965b12cff027b34c Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Thu, 6 Apr 2023 17:11:29 +0200 Subject: [PATCH 03/73] Fix some issues and use require_group instead of create_group --- smash/io/multi_model_io.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/smash/io/multi_model_io.py b/smash/io/multi_model_io.py index f407203c..19b45207 100644 --- a/smash/io/multi_model_io.py +++ b/smash/io/multi_model_io.py @@ -83,7 +83,7 @@ def _parse_selected_derived_type_to_hdf5( pass -def save_multi_model(model: Model, path: str, group=None, sub_data=None, sub_only=False, replace=False): +def save_multi_model(model: Model, path: str, location=None, sub_data=None, sub_only=False, replace=False): """ Save some derived data types of the Model object. @@ -160,7 +160,9 @@ def save_multi_model(model: Model, path: str, group=None, sub_data=None, sub_onl if location is not None: #loc_name=os.path.basename(location) loc_path=os.path.dirname(location) - grp=f.create_group(location) + if loc_path=="": + loc_path="./" + grp=f.require_group(location) else: grp=f @@ -321,7 +323,7 @@ def read_multi_model(path: str, location=None) -> dict: attr_keys.remove("_save_func") - attr_values = [grp.attrs[key] for key in attr_keys] + attr_values = [loc.attrs[key] for key in attr_keys] return dict(zip(keys + attr_keys, values + attr_values)) From 4b925a68506ff1f40ec79cc2b6c9257075196f54 Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Wed, 12 Apr 2023 12:47:00 +0200 Subject: [PATCH 04/73] Add a full set of generic function for I/O to hdf5 Missing : save full object model => make a dict from the object model directly --- smash/__init__.py | 6 +- smash/io/multi_model_io.py | 507 ++++++++++++++++++------------------- 2 files changed, 244 insertions(+), 269 deletions(-) diff --git a/smash/__init__.py b/smash/__init__.py index c1f0153f..6c38d428 100644 --- a/smash/__init__.py +++ b/smash/__init__.py @@ -12,7 +12,7 @@ from smash.io.mesh_io import save_mesh, read_mesh from smash.io.model_io import save_model, read_model from smash.io.model_ddt_io import save_model_ddt, read_model_ddt -from smash.io.multi_model_io import save_multi_model, read_multi_model +from smash.io.multi_model_io import save_smash_model_to_hdf5, load_hdf5_file from smash.dataset.load import load_dataset @@ -43,8 +43,8 @@ def __getattr__(name): "read_model", "save_model_ddt", "read_model_ddt", - "save_multi_model", - "read_multi_model", + "save_smash_model_to_hdf5", + "load_hdf5_file", "load_dataset", ] diff --git a/smash/io/multi_model_io.py b/smash/io/multi_model_io.py index 19b45207..8cca66b7 100644 --- a/smash/io/multi_model_io.py +++ b/smash/io/multi_model_io.py @@ -15,323 +15,298 @@ import h5py import numpy as np -__all__ = ["save_multi_model", "read_multi_model"] +__all__ = ["open_hdf5", "add_hdf5_sub_group", "default_model_data", "light_model_data", "dump_object_to_hdf5_from_list_attribute", "dump_object_to_hdf5_from_dict_attribute", "dump_object_to_hdf5_from_str_attribute", "dump_object_to_hdf5_from_iteratable", "dump_object_to_hdf5", "save_smash_model_to_hdf5", "load_hdf5_file", "read_hdf5_to_dict"] -def _default_save_data(structure: str): + +def open_hdf5(path, replace=False): + + if not path.endswith(".hdf5"): + + path = path + ".hdf5" + + if replace==True: + + f= h5py.File(path, "w") + + else: + + f= h5py.File(path, "a") + + return f + + + +def add_hdf5_sub_group(hdf5, subgroup=None): + + if subgroup is not None: + + loc_path=os.path.dirname(subgroup) + + if loc_path=="": + + loc_path="./" + hdf5.require_group(subgroup) + + return hdf5 + + + +def default_model_data(structure: str,structure_parameters=STRUCTURE_PARAMETERS,structure_states=STRUCTURE_STATES): + return { - "setup": ["dt", "end_time", "start_time", "structure"], - "mesh": ["active_cell", "area", "code", "dx", "flwdir"], + "setup": ["dt", "end_time", "start_time", "structure", "_ntime_step"], + "mesh": ["active_cell", "area", "code", "dx", "flwdir", "nac", "ng", "path", "ymax", "xmin", "nrow", "ncol", "gauge_pos", "flwacc"], "input_data": ["mean_prcp", "mean_pet", "qobs"], - "parameters": STRUCTURE_PARAMETERS[ + "parameters": structure_parameters[ structure ], # only calibrated Model param will be stored - "states": STRUCTURE_STATES[ + "states": structure_states[ structure ], # only initial Model states will be stored "output": [ { - "fstates": STRUCTURE_STATES[structure] + "fstates": structure_states[structure] }, # only final Model states will be stored "qsim", - "lcurve", + "cost", + "cost_jobs", + "cost_jreg" ], } -def _parse_selected_derived_type_to_hdf5( - derived_type, list_attr, hdf5_ins, attr_suffix="" -): - # TODO: clean function for attr_suffix - - for attr in list_attr: - if isinstance(attr, str): - try: - value = getattr(derived_type, attr) - - attr += attr_suffix - - if isinstance(value, np.ndarray): - if value.dtype == "object" or value.dtype.char == "U": - value = value.astype("S") - hdf5_ins.create_dataset( - attr, - shape=value.shape, - dtype=value.dtype, - data=value, - compression="gzip", - chunks=True, - ) - - else: - hdf5_ins.attrs[attr] = value - - except: - pass - elif isinstance(attr, dict): - for derived_type_key, list_attr_imd in attr.items(): - try: - derived_type_imd = getattr(derived_type, derived_type_key) - - _parse_selected_derived_type_to_hdf5( - derived_type_imd, list_attr_imd, hdf5_ins - ) - - except: - pass - - -def save_multi_model(model: Model, path: str, location=None, sub_data=None, sub_only=False, replace=False): - """ - Save some derived data types of the Model object. - - This method is considerably lighter than `smash.save_model` method that saves the entire Model object. - However, it is not capable of reconstructing the Model object from the saved data file. - - By default, the following data are stored into the `HDF5 `__ file: - - - ``dt``, ``end_time``, ``start_time``, ``structure`` from `Model.setup` - - ``active_cell``, ``area``, ``code``, ``dx``, ``flwdir`` from `Model.mesh` - - ``mean_prcp``, ``mean_pet``, ``qobs`` from `Model.input_data` - - ``qsim`` from `Model.output` - - The final Model states (depending upon the Model structure) from state derived type of `Model.output` - - The initial Model states (depending upon the Model structure) from `Model.states` - - The Model parameters (depending upon the Model structure) from `Model.parameters` +def light_model_data(structure: str,structure_parameters=STRUCTURE_PARAMETERS,structure_states=STRUCTURE_STATES): + + return { + "setup": ["dt", "end_time", "start_time"], + "mesh": ["active_cell", "area", "code", "dx", "ng", "ymax", "xmin", "nrow", "ncol", "gauge_pos", "flwacc"], + "input_data": ["qobs"], + "parameters": structure_parameters[ + structure + ], # only calibrated Model param will be stored + "output": [ + { + "fstates": structure_states[structure] + }, # only final Model states will be stored + "qsim", + ], + } - Subsidiary data can be added by filling in ``sub_data``. - Parameters - ---------- - model : Model - The Model object to save derived data types as a HDF5 file. - path : str - The file path. If the path not end with ``.hdf5``, the extension is automatically added to the file path. +def dump_object_to_hdf5_from_list_attribute(hdf5,instance,list_attr): - location : str - location, absolute path, to store the data in the hdf5 file. + if isinstance(list_attr,list): + + for attr in list_attr: + + if isinstance(attr, str): + + dump_object_to_hdf5_from_str_attribute(hdf5, instance, attr) + + elif isinstance(attr,list): + + dump_object_to_hdf5_from_list_attribute(hdf5, instance, attr) + + elif isinstance(attr,dict): + + dump_object_to_hdf5_from_dict_attribute(hdf5, instance, attr) + + else: + + raise ValueError( + f"unconsistant {attr} in {list_attr}. {attr} must be a an instance of dict, list or str" + ) + + else: - .. note:: - If not given, the data are stored at the root of the hdf5 file. - - sub_data : dict or None, default None - Dictionary which indicates the subsidiary data to store into the HDF5 file. - - .. note:: - If not given, no subsidiary data is saved - - sub_only : bool, default False - Allow to only store subsidiary data. - - replace : bool, default False - replace or not an existing hdf5 file - - See Also - -------- - read_model_ddt: Read derived data types of the Model object from HDF5 file. - Model: Primary data structure of the hydrological model `smash`. - - Examples - -------- - >>> setup, mesh = smash.load_dataset("cance") - >>> model = smash.Model(setup, mesh) - >>> model - Structure: 'gr-a' - Spatio-Temporal dimension: (x: 28, y: 28, time: 1440) - Last update: Initialization - - Save spatially distributed precipitation in addition to default derived data types of Model - - >>> smash.save_model_ddt(model, "model_ddt.hdf5", sub_data={"prcp": model.input_data.prcp}) - >>> smash.save_multi_model(model,"multi_model_ddt.hdf5", location="rainfall", sub_data={"prcp": model.input_data.prcp}) + raise ValueError( + f"{list_attr} must be a instance of list." + ) - """ - if not path.endswith(".hdf5"): - path = path + ".hdf5" +def dump_object_to_hdf5_from_dict_attribute(hdf5,instance,dict_attr): - if replace==True: - f= h5py.File(path, "w") - else: - f= h5py.File(path, "a") + if isinstance(dict_attr,dict): - if location is not None: - #loc_name=os.path.basename(location) - loc_path=os.path.dirname(location) - if loc_path=="": - loc_path="./" - grp=f.require_group(location) - else: - grp=f + for key, attr in dict_attr.items(): + + hdf5=add_hdf5_sub_group(hdf5, subgroup=key) + + try: + + sub_instance=getattr(instance, key) + + except: + + sub_instance=instance + + if isinstance(attr,dict): + + dump_object_to_hdf5_from_dict_attribute(hdf5[key], sub_instance, attr) + + if isinstance(attr,list): + + dump_object_to_hdf5_from_list_attribute(hdf5[key], sub_instance, attr) + + elif isinstance(attr,str): + + dump_object_to_hdf5_from_str_attribute(hdf5[key], sub_instance, attr) + + else : + + raise ValueError( + f"unconsistant {attr} in {dict_attr}. {attr} must be a instance of dict, list or str" + ) - if not sub_only: - save_data = _default_save_data(model.setup.structure) - - for derived_type_key, list_attr in save_data.items(): - derived_type = getattr(model, derived_type_key) - - if derived_type_key == "states": - _parse_selected_derived_type_to_hdf5( - derived_type, list_attr, grp, attr_suffix="_0" + else: + + raise ValueError( + f"{dict_attr} must be a instance of dict." ) - else: - _parse_selected_derived_type_to_hdf5(derived_type, list_attr, grp) - - if sub_data is not None: - for attr, value in sub_data.items(): - if (attr in grp) or (attr in grp.attrs): - warnings.warn(f"Ignore updating existing key ({attr})") - - continue +def dump_object_to_hdf5_from_str_attribute(hdf5,instance,str_attr): + + if isinstance(str_attr, str): + + try: + + value = getattr(instance, str_attr) + if isinstance(value, np.ndarray): + if value.dtype == "object" or value.dtype.char == "U": value = value.astype("S") - - try: - grp.create_dataset( - attr, - shape=value.shape, - dtype=value.dtype, - data=value, - compression="gzip", - chunks=True, - ) - except: - warnings.warn(f"Can not store to HDF5: {attr}") - + + hdf5.create_dataset( + str_attr, + shape=value.shape, + dtype=value.dtype, + data=value, + compression="gzip", + chunks=True, + ) + else: - try: - grp.attrs[attr] = value - - except: - warnings.warn(f"Can not store to HDF5: {attr}") - - if location is not None: - grp.attrs["_save_func"] = "save_model_ddt" - f[loc_path].attrs["_save_func"] = "save_multi_model" + + hdf5.attrs[str_attr] = value + + except: + + raise ValueError( + f"Unable to get attribute {str_attr} in {instance}" + ) + else: - grp.attrs["_save_func"] = "save_model_ddt" - - - -def read_multi_model(path: str, location=None) -> dict: - """ - Read derived data types of the Model object from HDF5 file. - - Parameters - ---------- - path : str - The file path. - - location : str - location, absolute path where to read the data in the hdf5 file. - - .. note:: - If not given, the data are read from the root of the hdf5 file. + + raise ValueError( + f"{str_attr} must be a instance of str." + ) - Returns - ------- - data : dict - A dictionary with derived data types loaded from HDF5 file. - Raises - ------ - FileNotFoundError: - If file not found. - ReadHDF5MethodError: - If file not created with `save_model_ddt`. +def dump_object_to_hdf5_from_iteratable(hdf5, instance, iteratable): + + if isinstance(iteratable,list): + + dump_object_to_hdf5_from_list_attribute(hdf5,instance,iteratable) + + elif isinstance(iteratable,dict): + + dump_object_to_hdf5_from_dict_attribute(hdf5,instance,iteratable) + + else : + + raise ValueError( + f"{iteratable} must be a instance of list or dict." + ) - See Also - -------- - save_model_ddt: Save some derived data types of the Model object. - Examples - -------- - >>> setup, mesh = smash.load_dataset("cance") - >>> model = smash.Model(setup, mesh) - >>> smash.save_multi_model(model,"multi_model_ddt.hdf5", location="rainfall", sub_data={"prcp": model.input_data.prcp}) +def dump_object_to_hdf5(f_hdf5, instance, keys_data, location="./", replace=False): + + hdf5=open_hdf5(f_hdf5, replace=replace) + hdf5=add_hdf5_sub_group(hdf5, subgroup=location) + dump_object_to_hdf5_from_iteratable(hdf5[location], instance, keys_data) + hdf5.close() - Read the derived data types from HDF5 file - >>> data = smash.read_multi_model("multi_model_ddt.hdf5",location="forecast") +def save_smash_model_to_hdf5(path_to_hdf5, instance, keys_data="default", location="./", replace=True): + + if isinstance(keys_data,str): + + if keys_data == "default": + + keys_data=default_model_data(instance.setup.structure) + + elif keys_data == "full": + + #to do + keys_data=default_model_data(instance.setup.structure) + + elif keys_data == "light": + + keys_data=light_model_data(instance.setup.structure) + + if isinstance(keys_data,dict): + + dump_object_to_hdf5(path_to_hdf5, instance, keys_data, location=location, replace=replace) + + else: + + raise ValueError( + f"{keys_data} must be a instance of str or dict." + ) + - Then, to see the dataset keys - >>> data.keys() - dict_keys(['active_cell', 'area', 'cft', 'code', 'cp', 'exc', 'flwdir', - 'hft', 'hft_0', 'hlr', 'hlr_0', 'hp', 'hp_0', 'lr', 'mean_pet', 'mean_prcp', - 'qobs', 'qsim', 'dt', 'dx', 'end_time', 'start_time', 'structure']) - And finally, to access to derived data - >>> data["mean_prcp"] - array([[0., 0., 0., ..., 0., 0., 0.], - [0., 0., 0., ..., 0., 0., 0.], - [0., 0., 0., ..., 0., 0., 0.]], dtype=float32) +def load_hdf5_file(f_hdf5): + + hdf5=open_hdf5(f_hdf5, replace=False) + dictionary=read_hdf5_to_dict(hdf5) + hdf5.close() + return dictionary - """ - if os.path.isfile(path): - with h5py.File(path) as f: +def read_hdf5_to_dict(hdf5): + + dictionary={} + + for key,item in hdf5.items(): + + if str(type(item)).find("group") != -1: - if location is not None: - - if location in list(f.keys()): - loc=f[location] - else: - raise ReadHDF5MethodError( - f"Unable to acces to group '{location}' in hdf5 '{path}', '{location}' group does not exist." - ) - else: - loc=f + dictionary.update({key:read_hdf5_to_dict(item)}) + + list_attr=list(item.attrs.keys()) - if loc.attrs.get("_save_func") == "save_multi_model": - results={} + for key_attr in list_attr: - for name,data in loc.items(): - - keys = list(data.keys()) - - values = [ - data[key][:].astype("U") if data[key][:].dtype.char == "S" else data[key][:] - for key in keys + dictionary[key].update({key_attr:item.attrs[key_attr]}) + + if str(type(item)).find("dataset") != -1: + + values = [ + item[:].astype("U") if item[:].dtype.char == "S" else item[:] ] - - attr_keys = list(data.attrs.keys()) - - attr_keys.remove("_save_func") - - attr_values = [data.attrs[key] for key in attr_keys] - - results.update({name:dict(zip(keys + attr_keys, values + attr_values))}) - - return results + dictionary.update({key:values}) - elif loc.attrs.get("_save_func") == "save_model_ddt": - keys = list(loc.keys()) - - values = [ - loc[key][:].astype("U") if loc[key][:].dtype.char == "S" else loc[key][:] - for key in keys - ] - - attr_keys = list(loc.attrs.keys()) - - attr_keys.remove("_save_func") - - attr_values = [loc.attrs[key] for key in attr_keys] - - return dict(zip(keys + attr_keys, values + attr_values)) - - else: - raise ReadHDF5MethodError( - f"Unable to read '{path}' with 'read_model_group' method. The file may not have been created with 'read_model_group' method." - ) - - else: - raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), path) + list_attr=list(item.attrs.keys()) + + for key_attr in list_attr: + + dictionary.update({key_attr:item.attrs[key_attr]}) + + + list_attr=list(hdf5.attrs.keys()) + + for key_attr in list_attr: + + dictionary.update({key_attr:hdf5.attrs[key_attr]}) + + return dictionary From 64f3c2f64a13fc87355ad63e554b2a6c69c7857d Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Thu, 13 Apr 2023 10:28:36 +0200 Subject: [PATCH 05/73] Add functions to generate object structure : light, medium, full Save model according a default content type or user defined content --- smash/io/multi_model_io.py | 157 ++++++++++++++++++++++++++----------- 1 file changed, 113 insertions(+), 44 deletions(-) diff --git a/smash/io/multi_model_io.py b/smash/io/multi_model_io.py index 8cca66b7..e267009a 100644 --- a/smash/io/multi_model_io.py +++ b/smash/io/multi_model_io.py @@ -15,7 +15,7 @@ import h5py import numpy as np -__all__ = ["open_hdf5", "add_hdf5_sub_group", "default_model_data", "light_model_data", "dump_object_to_hdf5_from_list_attribute", "dump_object_to_hdf5_from_dict_attribute", "dump_object_to_hdf5_from_str_attribute", "dump_object_to_hdf5_from_iteratable", "dump_object_to_hdf5", "save_smash_model_to_hdf5", "load_hdf5_file", "read_hdf5_to_dict"] +__all__ = ["open_hdf5", "add_hdf5_sub_group", "generate_light_smash_object_structure", "generate_medium_smash_object_structure", "generate_object_structure", "generate_smash_object_structure", "dump_object_to_hdf5_from_list_attribute", "dump_object_to_hdf5_from_dict_attribute", "dump_object_to_hdf5_from_str_attribute", "dump_object_to_hdf5_from_iteratable", "dump_object_to_hdf5", "save_smash_model_to_hdf5", "load_hdf5_file", "read_hdf5_to_dict"] @@ -51,52 +51,124 @@ def add_hdf5_sub_group(hdf5, subgroup=None): return hdf5 - -def default_model_data(structure: str,structure_parameters=STRUCTURE_PARAMETERS,structure_states=STRUCTURE_STATES): +def generate_light_smash_object_structure(structure: str,structure_parameters=STRUCTURE_PARAMETERS,structure_states=STRUCTURE_STATES): return { - "setup": ["dt", "end_time", "start_time", "structure", "_ntime_step"], - "mesh": ["active_cell", "area", "code", "dx", "flwdir", "nac", "ng", "path", "ymax", "xmin", "nrow", "ncol", "gauge_pos", "flwacc"], - "input_data": ["mean_prcp", "mean_pet", "qobs"], + "setup": ["dt", "end_time", "start_time"], + "mesh": ["active_cell", "area", "code", "dx", "ng", "ymax", "xmin", "nrow", "ncol", "gauge_pos", "flwacc"], + "input_data": ["qobs"], "parameters": structure_parameters[ structure ], # only calibrated Model param will be stored - "states": structure_states[ - structure - ], # only initial Model states will be stored "output": [ { "fstates": structure_states[structure] }, # only final Model states will be stored "qsim", - "cost", - "cost_jobs", - "cost_jreg" ], } - -def light_model_data(structure: str,structure_parameters=STRUCTURE_PARAMETERS,structure_states=STRUCTURE_STATES): +def generate_medium_smash_object_structure(structure: str,structure_parameters=STRUCTURE_PARAMETERS,structure_states=STRUCTURE_STATES): return { - "setup": ["dt", "end_time", "start_time"], - "mesh": ["active_cell", "area", "code", "dx", "ng", "ymax", "xmin", "nrow", "ncol", "gauge_pos", "flwacc"], - "input_data": ["qobs"], + "setup": ["dt", "end_time", "start_time", "structure", "_ntime_step"], + "mesh": ["active_cell", "area", "code", "dx", "flwdir", "nac", "ng", "path", "ymax", "xmin", "nrow", "ncol", "gauge_pos", "flwacc"], + "input_data": ["mean_prcp", "mean_pet", "qobs"], "parameters": structure_parameters[ structure ], # only calibrated Model param will be stored + "states": structure_states[ + structure + ], # only initial Model states will be stored "output": [ { "fstates": structure_states[structure] }, # only final Model states will be stored "qsim", + "cost", + "cost_jobs", + "cost_jreg" ], } + +def generate_object_structure(instance): + + key_data={} + key_list=list() + return_list=False + + for attr in dir(instance): + + if not attr.startswith("_") and not attr in ["from_handle", "copy"]: + + try: + + value = getattr(instance, attr) + + if isinstance(value, np.ndarray): + + if value.dtype == "object" or value.dtype.char == "U": + value = value.astype("S") + + #key_data.update({attr:value}) + key_list.append(attr) + return_list=True + + elif isinstance(value,(str,float,int)): + + #key_data.update({attr:value}) + key_list.append(attr) + return_list=True + + else: + + depp_key_data=generate_object_structure(value) + + if (len(depp_key_data)>0): + key_data.update({attr:depp_key_data}) + + except: + + pass + + if return_list: + + for attr, value in key_data.items(): + key_list.append({attr:value}) + + return key_list + + else: + + return key_data + + + +def generate_smash_object_structure(instance,typeofstructure="medium"): + + structure=instance.setup.structure + + if typeofstructure=="light": + + key_data=generate_light_smash_object_structure(structure) + + elif typeofstructure=="medium": + + key_data=generate_medium_smash_object_structure(structure) + + elif typeofstructure=="full": + + key_data=generate_object_structure(instance) + + return key_data + + + def dump_object_to_hdf5_from_list_attribute(hdf5,instance,list_attr): if isinstance(list_attr,list): @@ -132,34 +204,34 @@ def dump_object_to_hdf5_from_dict_attribute(hdf5,instance,dict_attr): if isinstance(dict_attr,dict): - for key, attr in dict_attr.items(): + for attr, value in dict_attr.items(): - hdf5=add_hdf5_sub_group(hdf5, subgroup=key) + hdf5=add_hdf5_sub_group(hdf5, subgroup=attr) try: - sub_instance=getattr(instance, key) + sub_instance=getattr(instance, attr) except: sub_instance=instance - if isinstance(attr,dict): + if isinstance(value,dict): - dump_object_to_hdf5_from_dict_attribute(hdf5[key], sub_instance, attr) + dump_object_to_hdf5_from_dict_attribute(hdf5[attr], sub_instance, value) - if isinstance(attr,list): + if isinstance(value,list): - dump_object_to_hdf5_from_list_attribute(hdf5[key], sub_instance, attr) + dump_object_to_hdf5_from_list_attribute(hdf5[attr], sub_instance, value) - elif isinstance(attr,str): + elif isinstance(value,str): - dump_object_to_hdf5_from_str_attribute(hdf5[key], sub_instance, attr) + dump_object_to_hdf5_from_str_attribute(hdf5[attr], sub_instance, value) else : raise ValueError( - f"unconsistant {attr} in {dict_attr}. {attr} must be a instance of dict, list or str" + f"Bad type of '{attr}' in '{dict_attr}'. Dict({attr}) must be a instance of dict, list or str" ) else: @@ -233,35 +305,32 @@ def dump_object_to_hdf5(f_hdf5, instance, keys_data, location="./", replace=Fals hdf5.close() -def save_smash_model_to_hdf5(path_to_hdf5, instance, keys_data="default", location="./", replace=True): +def save_smash_model_to_hdf5(path_to_hdf5, instance, keys_data=None, content="medium", location="./", replace=True): - if isinstance(keys_data,str): + if content == "light": - if keys_data == "default": - - keys_data=default_model_data(instance.setup.structure) - - elif keys_data == "full": - - #to do - keys_data=default_model_data(instance.setup.structure) - - elif keys_data == "light": - - keys_data=light_model_data(instance.setup.structure) + keys_data=generate_light_smash_object_structure(instance.setup.structure) + + elif content == "medium": + + keys_data=generate_medium_smash_object_structure(instance.setup.structure) + + elif content == "full": + + keys_data=generate_object_structure(instance) - if isinstance(keys_data,dict): + if isinstance(keys_data,(dict,list)): dump_object_to_hdf5(path_to_hdf5, instance, keys_data, location=location, replace=replace) else: raise ValueError( - f"{keys_data} must be a instance of str or dict." + f"{keys_data} must be a instance of list or dict." ) - +#Todo read_hdf5_to_model_object def load_hdf5_file(f_hdf5): From 8746f6b95518b7e48a95b1e993f36fb03c0a5272 Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Thu, 13 Apr 2023 11:01:25 +0200 Subject: [PATCH 06/73] add functions from model.io to reload a full model object --- smash/io/multi_model_io.py | 154 ++++++++++++++++++++++++++++++++++++- 1 file changed, 152 insertions(+), 2 deletions(-) diff --git a/smash/io/multi_model_io.py b/smash/io/multi_model_io.py index e267009a..53100eb9 100644 --- a/smash/io/multi_model_io.py +++ b/smash/io/multi_model_io.py @@ -9,11 +9,27 @@ from smash.io._error import ReadHDF5MethodError + +from smash.solver._mwd_setup import SetupDT +from smash.solver._mwd_mesh import MeshDT +from smash.solver._mwd_input_data import Input_DataDT +from smash.solver._mwd_parameters import ParametersDT +from smash.solver._mwd_states import StatesDT +from smash.solver._mwd_output import OutputDT + +from smash.core._build_model import _build_mesh + + + import os import errno import warnings import h5py import numpy as np +import pandas as pd +import smash + + __all__ = ["open_hdf5", "add_hdf5_sub_group", "generate_light_smash_object_structure", "generate_medium_smash_object_structure", "generate_object_structure", "generate_smash_object_structure", "dump_object_to_hdf5_from_list_attribute", "dump_object_to_hdf5_from_dict_attribute", "dump_object_to_hdf5_from_str_attribute", "dump_object_to_hdf5_from_iteratable", "dump_object_to_hdf5", "save_smash_model_to_hdf5", "load_hdf5_file", "read_hdf5_to_dict"] @@ -94,6 +110,15 @@ def generate_medium_smash_object_structure(structure: str,structure_parameters=S } +def generate_full_smash_object_structure(instance): + + key_data=generate_object_structure(instance) + + key_list=list() + key_list.append(key_data) + key_list.append("_last_update") + + return key_list def generate_object_structure(instance): @@ -163,7 +188,7 @@ def generate_smash_object_structure(instance,typeofstructure="medium"): elif typeofstructure=="full": - key_data=generate_object_structure(instance) + key_data=generate_full_smash_object_structure(instance) return key_data @@ -317,7 +342,7 @@ def save_smash_model_to_hdf5(path_to_hdf5, instance, keys_data=None, content="me elif content == "full": - keys_data=generate_object_structure(instance) + keys_data=generate_full_smash_object_structure(instance) if isinstance(keys_data,(dict,list)): @@ -379,3 +404,128 @@ def read_hdf5_to_dict(hdf5): return dictionary + + +def _parse_hdf5_to_derived_type(hdf5_ins, derived_type): + for ds in hdf5_ins.keys(): + if isinstance(hdf5_ins[ds], h5py.Group): + hdf5_ins_imd = hdf5_ins[ds] + + _parse_hdf5_to_derived_type(hdf5_ins_imd, getattr(derived_type, ds)) + + else: + setattr(derived_type, ds, hdf5_ins[ds][:]) + + for attr in hdf5_ins.attrs.keys(): + setattr(derived_type, attr, hdf5_ins.attrs[attr]) + + + + +def read_hdf5_to_model_object(path: str) -> Model: + """ + Read Model object. + + Parameters + ---------- + path : str + The file path. + + Returns + ------- + Model : + A Model object loaded from HDF5 file. + + Raises + ------ + FileNotFoundError: + If file not found. + ReadHDF5MethodError: + If file not created with `save_model`. + + See Also + -------- + save_model: Save Model object. + Model: Primary data structure of the hydrological model `smash`. + + Examples + -------- + >>> setup, mesh = smash.load_dataset("cance") + >>> model = smash.Model(setup, mesh) + >>> model + Structure: 'gr-a' + Spatio-Temporal dimension: (x: 28, y: 28, time: 1440) + Last update: Initialization + + Save Model + + >>> smash.save_model(model, "model.hdf5") + + Read Model + + >>> model_rld = smash.read_model("model.hdf5") + >>> model_rld + Structure: 'gr-a' + Spatio-Temporal dimension: (x: 28, y: 28, time: 1440) + Last update: Initialization + """ + + if os.path.isfile(path): + with h5py.File(path, "r") as f: + + instance = smash.Model(None, None) + + if "descriptor_name" in f["setup"].keys(): + nd = f["setup"]["descriptor_name"].size + + else: + nd = 0 + + instance.setup = SetupDT(nd, f["mesh"].attrs["ng"]) + + _parse_hdf5_to_derived_type(f["setup"], instance.setup) + + st = pd.Timestamp(instance.setup.start_time) + + et = pd.Timestamp(instance.setup.end_time) + + instance.setup._ntime_step = ( + et - st + ).total_seconds() / instance.setup.dt + + instance.mesh = MeshDT( + instance.setup, + f["mesh"].attrs["nrow"], + f["mesh"].attrs["ncol"], + f["mesh"].attrs["ng"], + ) + + _parse_hdf5_to_derived_type(f["mesh"], instance.mesh) + + _build_mesh(instance.setup, instance.mesh) + + instance.input_data = Input_DataDT(instance.setup, instance.mesh) + + instance.parameters = ParametersDT(instance.mesh) + + instance.states = StatesDT(instance.mesh) + + instance.output = OutputDT(instance.setup, instance.mesh) + + for derived_type_key in [ + "input_data", + "parameters", + "states", + "output", + ]: + _parse_hdf5_to_derived_type( + f[derived_type_key], getattr(instance, derived_type_key) + ) + + instance._last_update = f.attrs["_last_update"] + + return instance + + + else: + raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), path) From 8999e60bea71759aec1bf80e48c796d19f5b7d6c Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Thu, 13 Apr 2023 11:15:09 +0200 Subject: [PATCH 07/73] Add generate_smah_model_object as a user smash function Add a test script for testing multi_model_io --- io-multi-model_tests.py | 78 ++++++++++++++++++++++++++++++++++++++ smash/__init__.py | 3 +- smash/io/multi_model_io.py | 21 +++++----- 3 files changed, 92 insertions(+), 10 deletions(-) create mode 100644 io-multi-model_tests.py diff --git a/io-multi-model_tests.py b/io-multi-model_tests.py new file mode 100644 index 00000000..14c58846 --- /dev/null +++ b/io-multi-model_tests.py @@ -0,0 +1,78 @@ +import smash +import numpy as np + + +setup, mesh = smash.load_dataset("cance") +model = smash.Model(setup, mesh) +model.run(inplace=True) + +#generate the structure of the object: it is a dict of key:data to save: typeofstructure={light,medium,full} +keys_data=smash.generate_smash_object_structure(model,typeofstructure="medium") +print(keys_data) +#add a new data to save: +keys_data["parameters"].append('ci') + +#Save a single smash model +smash.save_smash_model_to_hdf5("./model_light.hdf5", model, content="light", replace=True) +smash.save_smash_model_to_hdf5("./model_medium.hdf5", model, content="medium", replace=True) +smash.save_smash_model_to_hdf5("./model_full.hdf5", model, content="full", replace=True) +smash.save_smash_model_to_hdf5("./model_user.hdf5", model, keys_data=keys_data, replace=True) + +#view the hdf5 file +hdf5=smash.io.multi_model_io.open_hdf5("./model_user.hdf5") +hdf5.keys() +hdf5["mesh"].keys() +hdf5["parameters"].keys() +hdf5["output"].keys() +hdf5["output"].attrs.keys() +hdf5["output/fstates"].keys() +hdf5["setup"].attrs.keys() +hdf5.close() + + +#save multi smash model at different place +smash.save_smash_model_to_hdf5("./multi_model.hdf5", model,location="model1",replace=True) +smash.save_smash_model_to_hdf5("./multi_model.hdf5", model,location="model2",replace=False) + + +hdf5=smash.io.multi_model_io.open_hdf5("./multi_model.hdf5") +hdf5.keys() +hdf5["model2"]["setup"].attrs.keys() +hdf5["model2"]["mesh"].keys() +hdf5["model2"]["output"].keys() +hdf5["model2"]["output"].attrs.keys() +hdf5.close() + +#manually group different object in an hdf5 +hdf5=smash.io.multi_model_io.open_hdf5("./model_subgroup.hdf5", replace=True) +hdf5=smash.io.multi_model_io.add_hdf5_sub_group(hdf5, subgroup="model1") +keys_data=smash.io.multi_model_io.generate_smash_object_structure(model,typeofstructure="medium") +smash.io.multi_model_io.dump_object_to_hdf5_from_iteratable(hdf5["model1"], model, keys_data) + +hdf5=smash.io.multi_model_io.open_hdf5("./model_subgroup.hdf5", replace=False) +hdf5=smash.io.multi_model_io.add_hdf5_sub_group(hdf5, subgroup="model2") +keys_data=smash.io.multi_model_io.generate_smash_object_structure(model,typeofstructure="medium") +smash.io.multi_model_io.dump_object_to_hdf5_from_iteratable(hdf5["model2"], model, keys_data) + +hdf5.keys() +hdf5["model1"].keys() +hdf5["model2"].keys() +hdf5.close() + + +#load an hdf5 file to a dictionary +dictionary=smash.load_hdf5_file("./multi_model.hdf5") +dictionary["model1"].keys() +dictionary["model1"]["mesh"].keys() + +#read only a part of an hdf5 file +hdf5=smash.io.multi_model_io.open_hdf5("./multi_model.hdf5") +dictionary=smash.io.multi_model_io.read_hdf5_to_dict(hdf5["model1"]) +dictionary.keys() + +#reload a full model object +model_reloaded=smash.load_hdf5_file("./model_full.hdf5",as_model=True) +model_reloaded +model_reloaded.run() + + diff --git a/smash/__init__.py b/smash/__init__.py index 6c38d428..845e1f45 100644 --- a/smash/__init__.py +++ b/smash/__init__.py @@ -12,7 +12,7 @@ from smash.io.mesh_io import save_mesh, read_mesh from smash.io.model_io import save_model, read_model from smash.io.model_ddt_io import save_model_ddt, read_model_ddt -from smash.io.multi_model_io import save_smash_model_to_hdf5, load_hdf5_file +from smash.io.multi_model_io import save_smash_model_to_hdf5, load_hdf5_file, generate_smash_object_structure from smash.dataset.load import load_dataset @@ -45,6 +45,7 @@ def __getattr__(name): "read_model_ddt", "save_smash_model_to_hdf5", "load_hdf5_file", + "generate_smash_object_structure", "load_dataset", ] diff --git a/smash/io/multi_model_io.py b/smash/io/multi_model_io.py index 53100eb9..83f4464c 100644 --- a/smash/io/multi_model_io.py +++ b/smash/io/multi_model_io.py @@ -355,15 +355,20 @@ def save_smash_model_to_hdf5(path_to_hdf5, instance, keys_data=None, content="me ) -#Todo read_hdf5_to_model_object - -def load_hdf5_file(f_hdf5): +def load_hdf5_file(f_hdf5,as_model=False): - hdf5=open_hdf5(f_hdf5, replace=False) - dictionary=read_hdf5_to_dict(hdf5) - hdf5.close() - return dictionary + if as_model: + + instance=read_hdf5_to_model_object(f_hdf5) + return instance + + else: + + hdf5=open_hdf5(f_hdf5, replace=False) + dictionary=read_hdf5_to_dict(hdf5) + hdf5.close() + return dictionary def read_hdf5_to_dict(hdf5): @@ -420,8 +425,6 @@ def _parse_hdf5_to_derived_type(hdf5_ins, derived_type): setattr(derived_type, attr, hdf5_ins.attrs[attr]) - - def read_hdf5_to_model_object(path: str) -> Model: """ Read Model object. From 92bc5bf1fc4a12a3192fcd41f08e0fd52d7ee21c Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Thu, 13 Apr 2023 16:40:37 +0200 Subject: [PATCH 08/73] =?UTF-8?q?Correction=20du=20stockage=20des=20valeur?= =?UTF-8?q?=20des=20attributs=20dans=20une=20liste=20lors=20de=20la=20lect?= =?UTF-8?q?ure=20Ajout=20d'une=20fonction=20pour=20creer=20un=20dictionnai?= =?UTF-8?q?re=20depuis=20l'objet=20model=20(au=20autre)=20Mise=20=C3=A0=20?= =?UTF-8?q?jour=20du=20script=20de=20test?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- io-multi-model_tests.py | 4 +++ smash/io/multi_model_io.py | 54 +++++++++++++++++++++++++++++++++++--- 2 files changed, 54 insertions(+), 4 deletions(-) diff --git a/io-multi-model_tests.py b/io-multi-model_tests.py index 14c58846..3b513522 100644 --- a/io-multi-model_tests.py +++ b/io-multi-model_tests.py @@ -60,6 +60,10 @@ hdf5.close() +#dump model object to a dictionnay +dictionary=smash.io.multi_model_io.dump_object_to_dictionary(model) + + #load an hdf5 file to a dictionary dictionary=smash.load_hdf5_file("./multi_model.hdf5") dictionary["model1"].keys() diff --git a/smash/io/multi_model_io.py b/smash/io/multi_model_io.py index 83f4464c..f1de345e 100644 --- a/smash/io/multi_model_io.py +++ b/smash/io/multi_model_io.py @@ -225,6 +225,48 @@ def dump_object_to_hdf5_from_list_attribute(hdf5,instance,list_attr): ) + + +def dump_object_to_dictionary(instance): + + key_data={} + key_list=list() + return_list=False + + for attr in dir(instance): + + if not attr.startswith("_") and not attr in ["from_handle", "copy"]: + + try: + + value = getattr(instance, attr) + + if isinstance(value, np.ndarray): + + if value.dtype == "object" or value.dtype.char == "U": + value = value.astype("S") + + key_data.update({attr:value}) + + elif isinstance(value,(str,float,int)): + + key_data.update({attr:value}) + + else: + + depp_key_data=generate_object_structure(value) + + if (len(depp_key_data)>0): + key_data.update({attr:depp_key_data}) + + except: + + pass + + return key_data + + + def dump_object_to_hdf5_from_dict_attribute(hdf5,instance,dict_attr): if isinstance(dict_attr,dict): @@ -389,9 +431,14 @@ def read_hdf5_to_dict(hdf5): if str(type(item)).find("dataset") != -1: - values = [ - item[:].astype("U") if item[:].dtype.char == "S" else item[:] - ] + if item[:].dtype.char == "S": + + values=item[:].astype("U") + + else: + + values=item[:] + dictionary.update({key:values}) list_attr=list(item.attrs.keys()) @@ -400,7 +447,6 @@ def read_hdf5_to_dict(hdf5): dictionary.update({key_attr:item.attrs[key_attr]}) - list_attr=list(hdf5.attrs.keys()) for key_attr in list_attr: From 7cca0fdf5b858a93e7cbf863c8743d81a84472b6 Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Thu, 13 Apr 2023 16:45:48 +0200 Subject: [PATCH 09/73] Juste move a function --- smash/io/multi_model_io.py | 87 ++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 42 deletions(-) diff --git a/smash/io/multi_model_io.py b/smash/io/multi_model_io.py index f1de345e..cf8f3549 100644 --- a/smash/io/multi_model_io.py +++ b/smash/io/multi_model_io.py @@ -226,47 +226,6 @@ def dump_object_to_hdf5_from_list_attribute(hdf5,instance,list_attr): - -def dump_object_to_dictionary(instance): - - key_data={} - key_list=list() - return_list=False - - for attr in dir(instance): - - if not attr.startswith("_") and not attr in ["from_handle", "copy"]: - - try: - - value = getattr(instance, attr) - - if isinstance(value, np.ndarray): - - if value.dtype == "object" or value.dtype.char == "U": - value = value.astype("S") - - key_data.update({attr:value}) - - elif isinstance(value,(str,float,int)): - - key_data.update({attr:value}) - - else: - - depp_key_data=generate_object_structure(value) - - if (len(depp_key_data)>0): - key_data.update({attr:depp_key_data}) - - except: - - pass - - return key_data - - - def dump_object_to_hdf5_from_dict_attribute(hdf5,instance,dict_attr): if isinstance(dict_attr,dict): @@ -364,6 +323,7 @@ def dump_object_to_hdf5_from_iteratable(hdf5, instance, iteratable): ) + def dump_object_to_hdf5(f_hdf5, instance, keys_data, location="./", replace=False): hdf5=open_hdf5(f_hdf5, replace=replace) @@ -372,6 +332,48 @@ def dump_object_to_hdf5(f_hdf5, instance, keys_data, location="./", replace=Fals hdf5.close() + + +def dump_object_to_dictionary(instance): + + key_data={} + key_list=list() + return_list=False + + for attr in dir(instance): + + if not attr.startswith("_") and not attr in ["from_handle", "copy"]: + + try: + + value = getattr(instance, attr) + + if isinstance(value, np.ndarray): + + if value.dtype == "object" or value.dtype.char == "U": + value = value.astype("S") + + key_data.update({attr:value}) + + elif isinstance(value,(str,float,int)): + + key_data.update({attr:value}) + + else: + + depp_key_data=generate_object_structure(value) + + if (len(depp_key_data)>0): + key_data.update({attr:depp_key_data}) + + except: + + pass + + return key_data + + + def save_smash_model_to_hdf5(path_to_hdf5, instance, keys_data=None, content="medium", location="./", replace=True): if content == "light": @@ -395,7 +397,8 @@ def save_smash_model_to_hdf5(path_to_hdf5, instance, keys_data=None, content="me raise ValueError( f"{keys_data} must be a instance of list or dict." ) - + + def load_hdf5_file(f_hdf5,as_model=False): From 4bad975884b7ddfad643a3d11ea0fa3119f0c0a8 Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Thu, 13 Apr 2023 17:51:54 +0200 Subject: [PATCH 10/73] Fix create group in hdf5, path was not well handled --- smash/io/multi_model_io.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/smash/io/multi_model_io.py b/smash/io/multi_model_io.py index cf8f3549..7a901770 100644 --- a/smash/io/multi_model_io.py +++ b/smash/io/multi_model_io.py @@ -57,12 +57,11 @@ def add_hdf5_sub_group(hdf5, subgroup=None): if subgroup is not None: - loc_path=os.path.dirname(subgroup) - - if loc_path=="": + if subgroup=="": - loc_path="./" - hdf5.require_group(subgroup) + subgroup="./" + + hdf5.require_group(subgroup) return hdf5 From b63ced801768f72765a730d7da81f39e19da7fef Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Thu, 13 Apr 2023 18:39:46 +0200 Subject: [PATCH 11/73] Add function to dump a dic to an hdf5 Add sub_data option Update script-test-io --- io-multi-model_tests.py | 19 +++++++++++++++ smash/io/multi_model_io.py | 50 +++++++++++++++++++++++++++++++++++--- 2 files changed, 66 insertions(+), 3 deletions(-) diff --git a/io-multi-model_tests.py b/io-multi-model_tests.py index 3b513522..82a8532b 100644 --- a/io-multi-model_tests.py +++ b/io-multi-model_tests.py @@ -18,6 +18,14 @@ smash.save_smash_model_to_hdf5("./model_full.hdf5", model, content="full", replace=True) smash.save_smash_model_to_hdf5("./model_user.hdf5", model, keys_data=keys_data, replace=True) +#adding subdata +sub_data={"sub_data1":"mydata"} +sub_data.update({"sub_data2":2.5}) +sub_data.update({"sub_data3":{"sub_sub_data1":2.5,"sub_sub_data2":np.zeros(10)}}) + +smash.save_smash_model_to_hdf5("./model_sub_data.hdf5", model, content="medium",sub_data=sub_data, replace=True) + + #view the hdf5 file hdf5=smash.io.multi_model_io.open_hdf5("./model_user.hdf5") hdf5.keys() @@ -29,6 +37,12 @@ hdf5["setup"].attrs.keys() hdf5.close() +#view the hdf5 file with sub_data +hdf5=smash.io.multi_model_io.open_hdf5("./model_sub_data.hdf5") +hdf5.keys() +hdf5.attrs.keys() +hdf5.close() + #save multi smash model at different place smash.save_smash_model_to_hdf5("./multi_model.hdf5", model,location="model1",replace=True) @@ -63,12 +77,17 @@ #dump model object to a dictionnay dictionary=smash.io.multi_model_io.dump_object_to_dictionary(model) +######### Reading HDF5 #load an hdf5 file to a dictionary dictionary=smash.load_hdf5_file("./multi_model.hdf5") dictionary["model1"].keys() dictionary["model1"]["mesh"].keys() +#load a hdf5 file with any sub_data +dictionary=smash.load_hdf5_file("./model_sub_data.hdf5") +dictionary.keys() + #read only a part of an hdf5 file hdf5=smash.io.multi_model_io.open_hdf5("./multi_model.hdf5") dictionary=smash.io.multi_model_io.read_hdf5_to_dict(hdf5["model1"]) diff --git a/smash/io/multi_model_io.py b/smash/io/multi_model_io.py index 7a901770..2d953583 100644 --- a/smash/io/multi_model_io.py +++ b/smash/io/multi_model_io.py @@ -266,6 +266,7 @@ def dump_object_to_hdf5_from_dict_attribute(hdf5,instance,dict_attr): ) + def dump_object_to_hdf5_from_str_attribute(hdf5,instance,str_attr): if isinstance(str_attr, str): @@ -305,6 +306,44 @@ def dump_object_to_hdf5_from_str_attribute(hdf5,instance,str_attr): ) + +def dump_dict_to_hdf5(hdf5,dictionary): + + if isinstance(dictionary,dict): + + for attr, value in dictionary.items(): + + if isinstance(value,(dict,list)): + + hdf5=add_hdf5_sub_group(hdf5, subgroup=attr) + dump_dict_to_hdf5(hdf5[attr],value) + + elif isinstance(value, np.ndarray): + + if value.dtype == "object" or value.dtype.char == "U": + value = value.astype("S") + + hdf5.create_dataset( + attr, + shape=value.shape, + dtype=value.dtype, + data=value, + compression="gzip", + chunks=True, + ) + + else: + + hdf5.attrs[attr] = value + + else: + + raise ValueError( + f"{dictionary} must be a instance of dict." + ) + + + def dump_object_to_hdf5_from_iteratable(hdf5, instance, iteratable): if isinstance(iteratable,list): @@ -323,11 +362,16 @@ def dump_object_to_hdf5_from_iteratable(hdf5, instance, iteratable): -def dump_object_to_hdf5(f_hdf5, instance, keys_data, location="./", replace=False): +def dump_object_to_hdf5(f_hdf5, instance, keys_data, location="./", sub_data=None, replace=False): hdf5=open_hdf5(f_hdf5, replace=replace) hdf5=add_hdf5_sub_group(hdf5, subgroup=location) dump_object_to_hdf5_from_iteratable(hdf5[location], instance, keys_data) + + if isinstance(sub_data,dict): + + dump_dict_to_hdf5(hdf5[location], sub_data) + hdf5.close() @@ -373,7 +417,7 @@ def dump_object_to_dictionary(instance): -def save_smash_model_to_hdf5(path_to_hdf5, instance, keys_data=None, content="medium", location="./", replace=True): +def save_smash_model_to_hdf5(path_to_hdf5, instance, keys_data=None, content="medium", location="./", sub_data=None, replace=True): if content == "light": @@ -389,7 +433,7 @@ def save_smash_model_to_hdf5(path_to_hdf5, instance, keys_data=None, content="me if isinstance(keys_data,(dict,list)): - dump_object_to_hdf5(path_to_hdf5, instance, keys_data, location=location, replace=replace) + dump_object_to_hdf5(path_to_hdf5, instance, keys_data, location=location, sub_data=sub_data,replace=replace) else: From eef1c1d4551a0fbcaacdc736ae9672c2e3a8ec10 Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Mon, 29 May 2023 09:42:39 +0200 Subject: [PATCH 12/73] =?UTF-8?q?Mise=20=C3=A0=20jour=20de=20la=20branche?= =?UTF-8?q?=20io-multi-model=20depuis=20le=20d=C3=A9pot=20gitlab=20inrae?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- io-multi-model_tests.py => hdf5_io_tests.py | 35 +- smash/__init__.py | 2 +- smash/core/_constant.py | 1 + smash/core/_read_input_data.py | 14 +- smash/core/optimize/_optimize.py | 12 +- smash/io/{multi_model_io.py => hdf5_io.py} | 605 +++++++++++++++++--- smash/solver/forward/forward_db.f90 | 238 +++++--- smash/solver/optimize/mw_optimize.f90 | 2 +- smash/solver/optimize/mwd_cost.f90 | 42 +- 9 files changed, 744 insertions(+), 207 deletions(-) rename io-multi-model_tests.py => hdf5_io_tests.py (67%) rename smash/io/{multi_model_io.py => hdf5_io.py} (51%) diff --git a/io-multi-model_tests.py b/hdf5_io_tests.py similarity index 67% rename from io-multi-model_tests.py rename to hdf5_io_tests.py index 82a8532b..e290cde6 100644 --- a/io-multi-model_tests.py +++ b/hdf5_io_tests.py @@ -6,6 +6,9 @@ model = smash.Model(setup, mesh) model.run(inplace=True) +#save a single dictionary to hdf5 +smash.io.hdf5_io.save_dict_to_hdf5("saved_dictionary.hdf5",mesh) + #generate the structure of the object: it is a dict of key:data to save: typeofstructure={light,medium,full} keys_data=smash.generate_smash_object_structure(model,typeofstructure="medium") print(keys_data) @@ -27,7 +30,7 @@ #view the hdf5 file -hdf5=smash.io.multi_model_io.open_hdf5("./model_user.hdf5") +hdf5=smash.io.hdf5_io.open_hdf5("./model_user.hdf5") hdf5.keys() hdf5["mesh"].keys() hdf5["parameters"].keys() @@ -38,7 +41,7 @@ hdf5.close() #view the hdf5 file with sub_data -hdf5=smash.io.multi_model_io.open_hdf5("./model_sub_data.hdf5") +hdf5=smash.io.hdf5_io.open_hdf5("./model_sub_data.hdf5") hdf5.keys() hdf5.attrs.keys() hdf5.close() @@ -49,7 +52,7 @@ smash.save_smash_model_to_hdf5("./multi_model.hdf5", model,location="model2",replace=False) -hdf5=smash.io.multi_model_io.open_hdf5("./multi_model.hdf5") +hdf5=smash.io.hdf5_io.open_hdf5("./multi_model.hdf5") hdf5.keys() hdf5["model2"]["setup"].attrs.keys() hdf5["model2"]["mesh"].keys() @@ -58,15 +61,15 @@ hdf5.close() #manually group different object in an hdf5 -hdf5=smash.io.multi_model_io.open_hdf5("./model_subgroup.hdf5", replace=True) -hdf5=smash.io.multi_model_io.add_hdf5_sub_group(hdf5, subgroup="model1") -keys_data=smash.io.multi_model_io.generate_smash_object_structure(model,typeofstructure="medium") -smash.io.multi_model_io.dump_object_to_hdf5_from_iteratable(hdf5["model1"], model, keys_data) +hdf5=smash.io.hdf5_io.open_hdf5("./model_subgroup.hdf5", replace=True) +hdf5=smash.io.hdf5_io.add_hdf5_sub_group(hdf5, subgroup="model1") +keys_data=smash.io.hdf5_io.generate_smash_object_structure(model,typeofstructure="medium") +smash.io.hdf5_io.dump_object_to_hdf5_from_iteratable(hdf5["model1"], model, keys_data) -hdf5=smash.io.multi_model_io.open_hdf5("./model_subgroup.hdf5", replace=False) -hdf5=smash.io.multi_model_io.add_hdf5_sub_group(hdf5, subgroup="model2") -keys_data=smash.io.multi_model_io.generate_smash_object_structure(model,typeofstructure="medium") -smash.io.multi_model_io.dump_object_to_hdf5_from_iteratable(hdf5["model2"], model, keys_data) +hdf5=smash.io.hdf5_io.open_hdf5("./model_subgroup.hdf5", replace=False) +hdf5=smash.io.hdf5_io.add_hdf5_sub_group(hdf5, subgroup="model2") +keys_data=smash.io.hdf5_io.generate_smash_object_structure(model,typeofstructure="medium") +smash.io.hdf5_io.dump_object_to_hdf5_from_iteratable(hdf5["model2"], model, keys_data) hdf5.keys() hdf5["model1"].keys() @@ -74,8 +77,10 @@ hdf5.close() -#dump model object to a dictionnay -dictionary=smash.io.multi_model_io.dump_object_to_dictionary(model) +#read model object to a dictionnay +dictionary=smash.io.hdf5_io.read_object_as_dict(model) +dictionary.keys() +dictionary["mesh"]["code"] ######### Reading HDF5 @@ -89,8 +94,8 @@ dictionary.keys() #read only a part of an hdf5 file -hdf5=smash.io.multi_model_io.open_hdf5("./multi_model.hdf5") -dictionary=smash.io.multi_model_io.read_hdf5_to_dict(hdf5["model1"]) +hdf5=smash.io.hdf5_io.open_hdf5("./multi_model.hdf5") +dictionary=smash.io.hdf5_io.read_hdf5_to_dict(hdf5["model1"]) dictionary.keys() #reload a full model object diff --git a/smash/__init__.py b/smash/__init__.py index 845e1f45..78c35c59 100644 --- a/smash/__init__.py +++ b/smash/__init__.py @@ -12,7 +12,7 @@ from smash.io.mesh_io import save_mesh, read_mesh from smash.io.model_io import save_model, read_model from smash.io.model_ddt_io import save_model_ddt, read_model_ddt -from smash.io.multi_model_io import save_smash_model_to_hdf5, load_hdf5_file, generate_smash_object_structure +from smash.io.hdf5_io import save_smash_model_to_hdf5, load_hdf5_file, generate_smash_object_structure from smash.dataset.load import load_dataset diff --git a/smash/core/_constant.py b/smash/core/_constant.py index b54aeedf..5100b5e5 100644 --- a/smash/core/_constant.py +++ b/smash/core/_constant.py @@ -119,6 +119,7 @@ JREG_FUN = [ "prior", "smoothing", + "hard_smoothing", ] AUTO_WJREG = ["fast", "lcurve"] diff --git a/smash/core/_read_input_data.py b/smash/core/_read_input_data.py index b9c83654..13637306 100644 --- a/smash/core/_read_input_data.py +++ b/smash/core/_read_input_data.py @@ -62,8 +62,18 @@ def _read_qobs(setup: SetupDT, mesh: MeshDT, input_data: Input_DataDT): else: with open(path[0], "r") as f: - header = pd.Timestamp(f.readline()) - + + try: + + header_string=f.readline() + header = pd.Timestamp(header_string) + + except: + + raise ValueError( + f"Bad header {header_string} string when reading file '{path[0]}'. '{header_string}' may not be a date." + ) + time_diff = int((st - header).total_seconds() / setup.dt) + 1 if time_diff > 0: diff --git a/smash/core/optimize/_optimize.py b/smash/core/optimize/_optimize.py index 93705414..6b30c0dc 100644 --- a/smash/core/optimize/_optimize.py +++ b/smash/core/optimize/_optimize.py @@ -382,10 +382,11 @@ def _optimize_lbfgsb( wjreg_arr[i + 1] = instance.setup._optimize.wjreg # % break if jobs does not minimize - if (instance.output.cost_jobs - jobs_min) / ( - jobs_max - jobs_min - ) >= 0.8: - break + # Bug, todo: if we break, we should reshape the array + # ~ if (instance.output.cost_jobs - jobs_min) / ( + # ~ jobs_max - jobs_min + # ~ ) >= 0.8: + # ~ break # % bounds update for jobs and jreg jobs_min = np.min(cost_jobs_arr) @@ -962,7 +963,8 @@ def _compute_best_lcurve_weight( jreg_max: float, ): # % select the best wjreg based on the transformed lcurve and using our own method decribed in ... - + wjreg_lcurve_opt=None + if ( cost_jobs_arr.size > 2 and (jreg_max - jreg_min) > 0.0 diff --git a/smash/io/multi_model_io.py b/smash/io/hdf5_io.py similarity index 51% rename from smash/io/multi_model_io.py rename to smash/io/hdf5_io.py index 2d953583..b8a2a2de 100644 --- a/smash/io/multi_model_io.py +++ b/smash/io/hdf5_io.py @@ -31,30 +31,93 @@ -__all__ = ["open_hdf5", "add_hdf5_sub_group", "generate_light_smash_object_structure", "generate_medium_smash_object_structure", "generate_object_structure", "generate_smash_object_structure", "dump_object_to_hdf5_from_list_attribute", "dump_object_to_hdf5_from_dict_attribute", "dump_object_to_hdf5_from_str_attribute", "dump_object_to_hdf5_from_iteratable", "dump_object_to_hdf5", "save_smash_model_to_hdf5", "load_hdf5_file", "read_hdf5_to_dict"] +__all__ = ["save_object_to_hdf5", "save_dict_to_hdf5", "save_smash_model_to_hdf5", "load_hdf5_file", "read_object_as_dict"] -def open_hdf5(path, replace=False): - +def open_hdf5(path, read_only=False, replace=False): + """ + Open or create an HDF5 file. + + Parameters + ---------- + path : str + The file path. + read_only : boolean + If true the access to the hdf5 fil is in read-only mode. Multi process can read the same hdf5 file simulteneously. This is not possible when access mode are append 'a' or write 'w'. + replace: Boolean + If true, the existing hdf5file is erased + + Returns + ------- + f : + A HDF5 object. + + Examples + -------- + >>> hdf5=smash.io.multi_model_io.open_hdf5("./my_hdf5.hdf5") + >>> hdf5.keys() + >>> hdf5.attrs.keys() + """ if not path.endswith(".hdf5"): path = path + ".hdf5" - if replace==True: - - f= h5py.File(path, "w") + if read_only: + if os.path.isfile(path): + + f= h5py.File(path, "r") + + else: + + raise ValueError( + f"File {path} does not exist." + ) + else: - - f= h5py.File(path, "a") - + + if replace==True: + + f= h5py.File(path, "w") + + else: + + if os.path.isfile(path): + + f= h5py.File(path, "a") + + else: + + f= h5py.File(path, "w") + return f def add_hdf5_sub_group(hdf5, subgroup=None): - + """ + Create a new subgroup in a HDF5 object + + Parameters + ---------- + hdf5 : object + An hdf5 object opened with open_hdf5() + subgroup: str + Path to a subgroub that must be created + + Returns + ------- + hdf5 : + the HDF5 object. + + Examples + -------- + >>> hdf5=smash.io.multi_model_io.open_hdf5("./model_subgroup.hdf5", replace=True) + >>> hdf5=smash.io.multi_model_io.add_hdf5_sub_group(hdf5, subgroup="mygroup") + >>> hdf5.keys() + >>> hdf5.attrs.keys() + """ if subgroup is not None: if subgroup=="": @@ -67,7 +130,23 @@ def add_hdf5_sub_group(hdf5, subgroup=None): def generate_light_smash_object_structure(structure: str,structure_parameters=STRUCTURE_PARAMETERS,structure_states=STRUCTURE_STATES): - + """ + this function create a light dictionnary containing the required data-structure to save a smash model object to an hdf5 file + + Parameters + ---------- + structure : str + the smash model structure used {gr-a, gr-b, gr-c, gr-d} + structure_parameters: dict + the dict containing the parameter to be saved for each model structure + structure_states: dict + the dict containing the states to be saved for each model structure + + Returns + ------- + dict : + A light dictionary matching the structure of the smash model object. + """ return { "setup": ["dt", "end_time", "start_time"], "mesh": ["active_cell", "area", "code", "dx", "ng", "ymax", "xmin", "nrow", "ncol", "gauge_pos", "flwacc"], @@ -86,7 +165,23 @@ def generate_light_smash_object_structure(structure: str,structure_parameters=ST def generate_medium_smash_object_structure(structure: str,structure_parameters=STRUCTURE_PARAMETERS,structure_states=STRUCTURE_STATES): - + """ + this function create a medium dictionnary containing the required data-structure to save a smash model object to an hdf5 file + + Parameters + ---------- + structure : str + the smash model structure used {gr-a, gr-b, gr-c, gr-d} + structure_parameters: dict + the dict containing the parameter to be saved for each model structure + structure_states: dict + the dict containing the states to be saved for each model structure + + Returns + ------- + dict : + A medium dictionary matching the structure of the smash model object. + """ return { "setup": ["dt", "end_time", "start_time", "structure", "_ntime_step"], "mesh": ["active_cell", "area", "code", "dx", "flwdir", "nac", "ng", "path", "ymax", "xmin", "nrow", "ncol", "gauge_pos", "flwacc"], @@ -110,7 +205,19 @@ def generate_medium_smash_object_structure(structure: str,structure_parameters=S def generate_full_smash_object_structure(instance): + """ + this function create a full dictionnary containing all the structure of an smash model object in order to save it to an hdf5 + + Parameters + ---------- + instance : object + a custom python object. + Returns + ------- + list : + A list containing keys and dictionary matching the structure of the python object. + """ key_data=generate_object_structure(instance) key_list=list() @@ -120,8 +227,22 @@ def generate_full_smash_object_structure(instance): return key_list + + def generate_object_structure(instance): + """ + this function create a full dictionnary containing all the structure of an object in order to save it to an hdf5 + + Parameters + ---------- + instance : object + a custom python object. + Returns + ------- + list or dict : + A list or dictionary matching the structure of the python object. + """ key_data={} key_list=list() return_list=False @@ -134,7 +255,10 @@ def generate_object_structure(instance): value = getattr(instance, attr) - if isinstance(value, np.ndarray): + if isinstance(value, (np.ndarray,list)): + + if isinstance(value,list): + value=np.array(value) if value.dtype == "object" or value.dtype.char == "U": value = value.astype("S") @@ -173,8 +297,24 @@ def generate_object_structure(instance): + + def generate_smash_object_structure(instance,typeofstructure="medium"): + """ + this function create a dictionnary containing a complete ar partial structure of an object in order to save it to an hdf5 + + Parameters + ---------- + instance : object + a custom python object. + typeofstructure : str + the structure type : light, medium, full + Returns + ------- + dict : + A list or dictionary matching the structure of the python object. + """ structure=instance.setup.structure if typeofstructure=="light": @@ -193,8 +333,20 @@ def generate_smash_object_structure(instance,typeofstructure="medium"): + def dump_object_to_hdf5_from_list_attribute(hdf5,instance,list_attr): - + """ + dump a object to a hdf5 file from a list of attributes + + Parameters + ---------- + hdf5 : object + an hdf5 object + instance : object + a custom python object. + list_attr : list + a list of attribute + """ if isinstance(list_attr,list): for attr in list_attr: @@ -226,7 +378,18 @@ def dump_object_to_hdf5_from_list_attribute(hdf5,instance,list_attr): def dump_object_to_hdf5_from_dict_attribute(hdf5,instance,dict_attr): - + """ + dump a object to a hdf5 file from a dictionary of attributes + + Parameters + ---------- + hdf5 : object + an hdf5 object + instance : object + a custom python object. + dict_attr : dict + a dictionary of attribute + """ if isinstance(dict_attr,dict): for attr, value in dict_attr.items(): @@ -268,14 +431,28 @@ def dump_object_to_hdf5_from_dict_attribute(hdf5,instance,dict_attr): def dump_object_to_hdf5_from_str_attribute(hdf5,instance,str_attr): - + """ + dump a object to a hdf5 file from a string attribute + + Parameters + ---------- + hdf5 : object + an hdf5 object + instance : object + a custom python object. + str_attr : str + a string attribute + """ if isinstance(str_attr, str): try: value = getattr(instance, str_attr) - if isinstance(value, np.ndarray): + if isinstance(value, (np.ndarray,list)): + + if isinstance(value,list): + value=np.array(value) if value.dtype == "object" or value.dtype.char == "U": value = value.astype("S") @@ -289,6 +466,10 @@ def dump_object_to_hdf5_from_str_attribute(hdf5,instance,str_attr): chunks=True, ) + elif value is None: + + hdf5.attrs[str_attr] = "_None_" + else: hdf5.attrs[str_attr] = value @@ -296,7 +477,7 @@ def dump_object_to_hdf5_from_str_attribute(hdf5,instance,str_attr): except: raise ValueError( - f"Unable to get attribute {str_attr} in {instance}" + f"Unable to dump attribute {str_attr} with value {value} from {instance}" ) else: @@ -307,34 +488,107 @@ def dump_object_to_hdf5_from_str_attribute(hdf5,instance,str_attr): -def dump_dict_to_hdf5(hdf5,dictionary): +def dump_object_to_hdf5_from_iteratable(hdf5, instance, iteratable=None): + """ + dump a object to a hdf5 file from a iteratable object list or dict + + Parameters + ---------- + hdf5 : object + an hdf5 object + instance : object + a custom python object. + iteratable : list | dict + a list or a dict of attribute + Examples + -------- + setup, mesh = smash.load_dataset("cance") + model = smash.Model(setup, mesh) + model.run(inplace=True) + + hdf5=smash.io.multi_model_io.open_hdf5("./model.hdf5", replace=True) + hdf5=smash.io.multi_model_io.add_hdf5_sub_group(hdf5, subgroup="model1") + keys_data=smash.io.multi_model_io.generate_smash_object_structure(model,typeofstructure="medium") + smash.io.multi_model_io.dump_object_to_hdf5_from_iteratable(hdf5["model1"], model, keys_data) + + hdf5=smash.io.multi_model_io.open_hdf5("./model.hdf5", replace=False) + hdf5=smash.io.multi_model_io.add_hdf5_sub_group(hdf5, subgroup="model2") + keys_data=smash.io.multi_model_io.generate_smash_object_structure(model,typeofstructure="light") + smash.io.multi_model_io.dump_object_to_hdf5_from_iteratable(hdf5["model2"], model, keys_data) + """ + if isinstance(iteratable,list): + + dump_object_to_hdf5_from_list_attribute(hdf5,instance,iteratable) + + elif isinstance(iteratable,dict): + + dump_object_to_hdf5_from_dict_attribute(hdf5,instance,iteratable) + + else : + + raise ValueError( + f"{iteratable} must be a instance of list or dict." + ) + + + +def dump_dict_to_hdf5(hdf5,dictionary): + """ + dump a dictionary to an hdf5 file + + Parameters + ---------- + hdf5 : object + an hdf5 object + dictionary : dict + a custom python dictionary + """ if isinstance(dictionary,dict): for attr, value in dictionary.items(): - if isinstance(value,(dict,list)): - - hdf5=add_hdf5_sub_group(hdf5, subgroup=attr) - dump_dict_to_hdf5(hdf5[attr],value) - - elif isinstance(value, np.ndarray): + try: - if value.dtype == "object" or value.dtype.char == "U": - value = value.astype("S") + if isinstance(value,(dict)): + + hdf5=add_hdf5_sub_group(hdf5, subgroup=attr) + dump_dict_to_hdf5(hdf5[attr],value) + + elif isinstance(value, (np.ndarray,list)): + + if isinstance(value,(list)): + value=np.array(value) + + if value.dtype == "object" or value.dtype.char == "U": + value = value.astype("S") + + #remove dataset if exist + if attr in hdf5.keys(): + del hdf5[attr] + + hdf5.create_dataset( + attr, + shape=value.shape, + dtype=value.dtype, + data=value, + compression="gzip", + chunks=True, + ) - hdf5.create_dataset( - attr, - shape=value.shape, - dtype=value.dtype, - data=value, - compression="gzip", - chunks=True, - ) - - else: + elif value is None: + + hdf5.attrs[attr] = "_None_" + + else: + + hdf5.attrs[attr] = value - hdf5.attrs[attr] = value + except: + + raise ValueError( + f"Unable to save attribute {attr} with value {value}" + ) else: @@ -344,25 +598,65 @@ def dump_dict_to_hdf5(hdf5,dictionary): -def dump_object_to_hdf5_from_iteratable(hdf5, instance, iteratable): +def save_dict_to_hdf5(path_to_hdf5,dictionary=None,location="./",replace=False): + """ + dump a dictionary to an hdf5 file + + Parameters + ---------- + path_to_hdf5 : str + path to the hdf5 file + dictionary : dict | None + a dictionary containing the data to be saved + location : str + path location or subgroup where to write data in the hdf5 file + replace : Boolean + replace an existing hdf5 file. Default is False - if isinstance(iteratable,list): - - dump_object_to_hdf5_from_list_attribute(hdf5,instance,iteratable) + Examples + -------- + setup, mesh = smash.load_dataset("cance") + model = smash.Model(setup, mesh) + model.run(inplace=True) + + smash.io.multi_model_io.save_dict_to_hdf5("saved_dictionary.hdf5",mesh) + """ + if isinstance(dictionary,dict): - elif isinstance(iteratable,dict): + hdf5=open_hdf5(path_to_hdf5, replace=replace) + hdf5=add_hdf5_sub_group(hdf5, subgroup=location) + dump_dict_to_hdf5(hdf5[location], dictionary) - dump_object_to_hdf5_from_dict_attribute(hdf5,instance,iteratable) - - else : + else: raise ValueError( - f"{iteratable} must be a instance of list or dict." + f"The input {dictionary} must be a instance of dict." ) -def dump_object_to_hdf5(f_hdf5, instance, keys_data, location="./", sub_data=None, replace=False): +def save_object_to_hdf5(f_hdf5, instance, keys_data=None, location="./", sub_data=None, replace=False): + """ + dump an object to an hdf5 file + + Parameters + ---------- + f_hdf5 : str + path to the hdf5 file + instance : object + python object + keys_data : list | dict + a list or a dictionary of the attribute to be saved + location : str + path location or subgroup where to write data in the hdf5 file + sub_data : dict | None + a dictionary containing extra-data to be saved + replace : Boolean + replace an existing hdf5 file. Default is False + """ + + if keys_data is None: + keys_data=generate_object_structure(instance) hdf5=open_hdf5(f_hdf5, replace=replace) hdf5=add_hdf5_sub_group(hdf5, subgroup=location) @@ -376,49 +670,50 @@ def dump_object_to_hdf5(f_hdf5, instance, keys_data, location="./", sub_data=Non +def save_smash_model_to_hdf5(path_to_hdf5, instance, keys_data=None, content="medium", location="./", sub_data=None, replace=True): + """ + dump an object to an hdf5 file -def dump_object_to_dictionary(instance): + Parameters + ---------- + path_to_hdf5 : str + path to the hdf5 file + instance : object + python object + keys_data : list | dict + a list or a dictionary of the attribute to be saved + content : str + {light,medium,full} + location : str + path location or subgroup where to write data in the hdf5 file + sub_data : dict | None + a dictionary containing extra-data to be saved + replace : Boolean + replace an existing hdf5 file. Default is False - key_data={} - key_list=list() - return_list=False + Examples + -------- + setup, mesh = smash.load_dataset("cance") + model = smash.Model(setup, mesh) + model.run(inplace=True) - for attr in dir(instance): - - if not attr.startswith("_") and not attr in ["from_handle", "copy"]: - - try: - - value = getattr(instance, attr) - - if isinstance(value, np.ndarray): - - if value.dtype == "object" or value.dtype.char == "U": - value = value.astype("S") - - key_data.update({attr:value}) - - elif isinstance(value,(str,float,int)): - - key_data.update({attr:value}) - - else: - - depp_key_data=generate_object_structure(value) - - if (len(depp_key_data)>0): - key_data.update({attr:depp_key_data}) - - except: - - pass - - return key_data - - - -def save_smash_model_to_hdf5(path_to_hdf5, instance, keys_data=None, content="medium", location="./", sub_data=None, replace=True): + keys_data=smash.generate_smash_object_structure(model,typeofstructure="medium") + #add a new data to save: + keys_data["parameters"].append('ci') + #Save a single smash model + smash.save_smash_model_to_hdf5("./model_light.hdf5", model, content="light", replace=True) + smash.save_smash_model_to_hdf5("./model_medium.hdf5", model, content="medium", replace=True) + smash.save_smash_model_to_hdf5("./model_full.hdf5", model, content="full", replace=True) + smash.save_smash_model_to_hdf5("./model_user.hdf5", model, keys_data=keys_data, replace=True) + + #adding subdata + sub_data={"sub_data1":"mydata"} + sub_data.update({"sub_data2":2.5}) + sub_data.update({"sub_data3":{"sub_sub_data1":2.5,"sub_sub_data2":np.zeros(10)}}) + + smash.save_smash_model_to_hdf5("./model_sub_data.hdf5", model, content="medium",sub_data=sub_data, replace=True) + """ if content == "light": keys_data=generate_light_smash_object_structure(instance.setup.structure) @@ -433,7 +728,7 @@ def save_smash_model_to_hdf5(path_to_hdf5, instance, keys_data=None, content="me if isinstance(keys_data,(dict,list)): - dump_object_to_hdf5(path_to_hdf5, instance, keys_data, location=location, sub_data=sub_data,replace=replace) + save_object_to_hdf5(path_to_hdf5, instance, keys_data, location=location, sub_data=sub_data,replace=replace) else: @@ -443,9 +738,33 @@ def save_smash_model_to_hdf5(path_to_hdf5, instance, keys_data=None, content="me - def load_hdf5_file(f_hdf5,as_model=False): + """ + Load an hdf5 file + + Parameters + ---------- + f_hdf5 : str + path to the hdf5 file + as_model : Boolean + load the hdf5 as a smash model. Default is False + + Return + -------- + instance : an instance of the smash model or a dictionary + Examples + -------- + #load an hdf5 file to a dictionary + dictionary=smash.load_hdf5_file("./multi_model.hdf5") + dictionary["model1"].keys() + dictionary["model1"]["mesh"].keys() + + #reload a full model object + model_reloaded=smash.load_hdf5_file("./model_full.hdf5",as_model=True) + model_reloaded + model_reloaded.run() + """ if as_model: instance=read_hdf5_to_model_object(f_hdf5) @@ -453,14 +772,90 @@ def load_hdf5_file(f_hdf5,as_model=False): else: - hdf5=open_hdf5(f_hdf5, replace=False) + hdf5=open_hdf5(f_hdf5, read_only=True, replace=False) dictionary=read_hdf5_to_dict(hdf5) hdf5.close() return dictionary + + +def read_object_as_dict(instance): + """ + create a dictionary from a custom python object + + Parameters + ---------- + instance : object + an custom python object + + Return + ---------- + key_data: dict + an dictionary containing all keys and atributes of the object + """ + key_data={} + key_list=list() + return_list=False + + for attr in dir(instance): + + if not attr.startswith("_") and not attr in ["from_handle", "copy"]: + + try: + + value = getattr(instance, attr) + + if isinstance(value, (np.ndarray,list)): + + if isinstance(value,list): + value=np.array(value) + + if value.dtype == "object" or value.dtype.char == "U": + value = value.astype("S") + + key_data.update({attr:value}) + + elif isinstance(value,(str,float,int)): + + key_data.update({attr:value}) + + else: + + depp_key_data=read_object_as_dict(value) + + if (len(depp_key_data)>0): + key_data.update({attr:depp_key_data}) + + except: + + pass + + return key_data + + + + def read_hdf5_to_dict(hdf5): + """ + Load an hdf5 file + + Parameters + ---------- + hdf5 : str + path to the hdf5 file + + Return + -------- + dictionary : dict, a dictionary of all keys and attribute included in the hdf5 file + Examples + -------- + #read only a part of an hdf5 file + hdf5=smash.io.multi_model_io.open_hdf5("./multi_model.hdf5") + dictionary=smash.io.multi_model_io.read_hdf5_to_dict(hdf5["model1"]) + dictionary.keys() + """ dictionary={} for key,item in hdf5.items(): @@ -473,7 +868,14 @@ def read_hdf5_to_dict(hdf5): for key_attr in list_attr: - dictionary[key].update({key_attr:item.attrs[key_attr]}) + # check if value is equal to "_None_" (None string because hdf5 does not supported) + if item.attrs[key_attr] == "_None_": + + dictionary[key].update({key_attr:None}) + + else: + + dictionary[key].update({key_attr:item.attrs[key_attr]}) if str(type(item)).find("dataset") != -1: @@ -491,14 +893,22 @@ def read_hdf5_to_dict(hdf5): for key_attr in list_attr: - dictionary.update({key_attr:item.attrs[key_attr]}) + # check if value is equal to "_None_" (None string because hdf5 does not supported) + if item.attrs[key_attr] == "_None_": + dictionary[key].update({key_attr:None}) + else: + dictionary.update({key_attr:item.attrs[key_attr]}) list_attr=list(hdf5.attrs.keys()) for key_attr in list_attr: - dictionary.update({key_attr:hdf5.attrs[key_attr]}) - + # check if value is equal to "_None_" (None string because hdf5 does not supported) + if hdf5.attrs[key_attr] == "_None_": + dictionary.update({key_attr:None}) + else: + dictionary.update({key_attr:hdf5.attrs[key_attr]}) + return dictionary @@ -514,7 +924,12 @@ def _parse_hdf5_to_derived_type(hdf5_ins, derived_type): setattr(derived_type, ds, hdf5_ins[ds][:]) for attr in hdf5_ins.attrs.keys(): - setattr(derived_type, attr, hdf5_ins.attrs[attr]) + + # check if value is equal to "_None_" (None string because hdf5 does not supported) + if hdf5_ins.attrs[attr] == "_None_": + setattr(derived_type, attr, None) + else: + setattr(derived_type, attr, hdf5_ins.attrs[attr]) def read_hdf5_to_model_object(path: str) -> Model: diff --git a/smash/solver/forward/forward_db.f90 b/smash/solver/forward/forward_db.f90 index 4eea6833..e07781b4 100644 --- a/smash/solver/forward/forward_db.f90 +++ b/smash/solver/forward/forward_db.f90 @@ -7505,13 +7505,24 @@ SUBROUTINE COMPUTE_JREG_D(setup, mesh, input_data, parameters, & CASE ('smoothing') result1_d = REG_SMOOTHING_D(setup, mesh, setup%optimize%& & optim_parameters, parameters_matrix, parameters_matrix_d, & -& parameters_bgd_matrix, result1) - temp = setup%optimize%wjreg_fun(i)**4 +& parameters_bgd_matrix, .true., result1) + temp = setup%optimize%wjreg_fun(i)**2._sp parameters_jreg_d = parameters_jreg_d + temp*result1_d result1_d = REG_SMOOTHING_D(setup, mesh, setup%optimize%& & optim_states, states_matrix, states_matrix_d, & -& states_bgd_matrix, result1) - temp = setup%optimize%wjreg_fun(i)**4 +& states_bgd_matrix, .true., result1) + temp = setup%optimize%wjreg_fun(i)**2._sp + states_jreg_d = states_jreg_d + temp*result1_d + CASE ('hard_smoothing') + result1_d = REG_SMOOTHING_D(setup, mesh, setup%optimize%& +& optim_parameters, parameters_matrix, parameters_matrix_d, & +& parameters_bgd_matrix, .false., result1) + temp = setup%optimize%wjreg_fun(i)**2._sp + parameters_jreg_d = parameters_jreg_d + temp*result1_d + result1_d = REG_SMOOTHING_D(setup, mesh, setup%optimize%& +& optim_states, states_matrix, states_matrix_d, & +& states_bgd_matrix, .false., result1) + temp = setup%optimize%wjreg_fun(i)**2._sp states_jreg_d = states_jreg_d + temp*result1_d CASE ('distance_correlation') result1_d = DISTANCE_CORRELATION_DESCRIPTORS_D(setup, mesh, & @@ -7594,6 +7605,10 @@ SUBROUTINE COMPUTE_JREG_B(setup, mesh, input_data, parameters, & REAL(sp) :: res_b3 REAL(sp) :: res4 REAL(sp) :: res_b4 + REAL(sp) :: res5 + REAL(sp) :: res_b5 + REAL(sp) :: res6 + REAL(sp) :: res_b6 INTEGER :: branch CALL GET_PARAMETERS(mesh, parameters, parameters_matrix) CALL GET_PARAMETERS(mesh, parameters_bgd, parameters_bgd_matrix) @@ -7606,21 +7621,29 @@ SUBROUTINE COMPUTE_JREG_B(setup, mesh, input_data, parameters, & & parameters_matrix, parameters_bgd_matrix) res0 = REG_PRIOR(setup, setup%optimize%optim_states, & & states_matrix, states_bgd_matrix) - CALL PUSHCONTROL2B(2) + CALL PUSHCONTROL3B(3) CASE ('smoothing') res1 = REG_SMOOTHING(setup, mesh, setup%optimize%& -& optim_parameters, parameters_matrix, parameters_bgd_matrix) +& optim_parameters, parameters_matrix, parameters_bgd_matrix, & +& .true.) res2 = REG_SMOOTHING(setup, mesh, setup%optimize%optim_states, & -& states_matrix, states_bgd_matrix) - CALL PUSHCONTROL2B(1) +& states_matrix, states_bgd_matrix, .true.) + CALL PUSHCONTROL3B(2) + CASE ('hard_smoothing') + res3 = REG_SMOOTHING(setup, mesh, setup%optimize%& +& optim_parameters, parameters_matrix, parameters_bgd_matrix, & +& .false.) + res4 = REG_SMOOTHING(setup, mesh, setup%optimize%optim_states, & +& states_matrix, states_bgd_matrix, .false.) + CALL PUSHCONTROL3B(1) CASE ('distance_correlation') - res3 = DISTANCE_CORRELATION_DESCRIPTORS(setup, mesh, input_data& + res5 = DISTANCE_CORRELATION_DESCRIPTORS(setup, mesh, input_data& & , 'params', gnp, parameters_matrix) - res4 = DISTANCE_CORRELATION_DESCRIPTORS(setup, mesh, input_data& + res6 = DISTANCE_CORRELATION_DESCRIPTORS(setup, mesh, input_data& & , 'states', gns, states_matrix) - CALL PUSHCONTROL2B(0) + CALL PUSHCONTROL3B(0) CASE DEFAULT - CALL PUSHCONTROL2B(3) + CALL PUSHCONTROL3B(4) END SELECT END DO parameters_jreg_b = jreg_b @@ -7628,35 +7651,48 @@ SUBROUTINE COMPUTE_JREG_B(setup, mesh, input_data, parameters, & states_matrix_b = 0.0_4 parameters_matrix_b = 0.0_4 DO i=setup%optimize%njr,1,-1 - CALL POPCONTROL2B(branch) + CALL POPCONTROL3B(branch) IF (branch .LT. 2) THEN IF (branch .EQ. 0) THEN result1_b = setup%optimize%wjreg_fun(i)*states_jreg_b - res_b4 = result1_b + res_b6 = result1_b CALL DISTANCE_CORRELATION_DESCRIPTORS_B(setup, mesh, & & input_data, 'states', gns, & & states_matrix, & -& states_matrix_b, res_b4) +& states_matrix_b, res_b6) result1_b = setup%optimize%wjreg_fun(i)*parameters_jreg_b - res_b3 = result1_b + res_b5 = result1_b CALL DISTANCE_CORRELATION_DESCRIPTORS_B(setup, mesh, & & input_data, 'params', gnp, & & parameters_matrix, & -& parameters_matrix_b, res_b3) +& parameters_matrix_b, res_b5) ELSE - result1_b = setup%optimize%wjreg_fun(i)**4*states_jreg_b - res_b2 = result1_b + result1_b = setup%optimize%wjreg_fun(i)**2._sp*states_jreg_b + res_b4 = result1_b CALL REG_SMOOTHING_B(setup, mesh, setup%optimize%optim_states& & , states_matrix, states_matrix_b, & -& states_bgd_matrix, res_b2) - result1_b = setup%optimize%wjreg_fun(i)**4*parameters_jreg_b - res_b1 = result1_b +& states_bgd_matrix, .false., res_b4) + result1_b = setup%optimize%wjreg_fun(i)**2._sp*& +& parameters_jreg_b + res_b3 = result1_b CALL REG_SMOOTHING_B(setup, mesh, setup%optimize%& & optim_parameters, parameters_matrix, & & parameters_matrix_b, parameters_bgd_matrix, & -& res_b1) +& .false., res_b3) END IF ELSE IF (branch .EQ. 2) THEN + result1_b = setup%optimize%wjreg_fun(i)**2._sp*states_jreg_b + res_b2 = result1_b + CALL REG_SMOOTHING_B(setup, mesh, setup%optimize%optim_states, & +& states_matrix, states_matrix_b, states_bgd_matrix& +& , .true., res_b2) + result1_b = setup%optimize%wjreg_fun(i)**2._sp*parameters_jreg_b + res_b1 = result1_b + CALL REG_SMOOTHING_B(setup, mesh, setup%optimize%& +& optim_parameters, parameters_matrix, & +& parameters_matrix_b, parameters_bgd_matrix, & +& .true., res_b1) + ELSE IF (branch .EQ. 3) THEN result1_b = setup%optimize%wjreg_fun(i)*states_jreg_b res_b0 = result1_b CALL REG_PRIOR_B(setup, setup%optimize%optim_states, & @@ -7735,12 +7771,23 @@ SUBROUTINE COMPUTE_JREG(setup, mesh, input_data, parameters, & states_jreg = states_jreg + setup%optimize%wjreg_fun(i)*result1 CASE ('smoothing') result1 = REG_SMOOTHING(setup, mesh, setup%optimize%& -& optim_parameters, parameters_matrix, parameters_bgd_matrix) +& optim_parameters, parameters_matrix, parameters_bgd_matrix, & +& .true.) + parameters_jreg = parameters_jreg + setup%optimize%wjreg_fun(i)& +& **2._sp*result1 + result1 = REG_SMOOTHING(setup, mesh, setup%optimize%optim_states& +& , states_matrix, states_bgd_matrix, .true.) + states_jreg = states_jreg + setup%optimize%wjreg_fun(i)**2._sp*& +& result1 + CASE ('hard_smoothing') + result1 = REG_SMOOTHING(setup, mesh, setup%optimize%& +& optim_parameters, parameters_matrix, parameters_bgd_matrix, & +& .false.) parameters_jreg = parameters_jreg + setup%optimize%wjreg_fun(i)& -& **4*result1 +& **2._sp*result1 result1 = REG_SMOOTHING(setup, mesh, setup%optimize%optim_states& -& , states_matrix, states_bgd_matrix) - states_jreg = states_jreg + setup%optimize%wjreg_fun(i)**4*& +& , states_matrix, states_bgd_matrix, .false.) + states_jreg = states_jreg + setup%optimize%wjreg_fun(i)**2._sp*& & result1 CASE ('distance_correlation') result1 = DISTANCE_CORRELATION_DESCRIPTORS(setup, mesh, & @@ -10018,13 +10065,14 @@ END FUNCTION DISTANCE_CORRELATION_DESCRIPTORS ! variations of useful results: res ! with respect to varying inputs: matrix FUNCTION REG_SMOOTHING_D(setup, mesh, optim_arr, matrix, matrix_d, & -& matrix_bgd, res) RESULT (RES_D) +& matrix_bgd, rel_to_bgd, res) RESULT (RES_D) IMPLICIT NONE TYPE(SETUPDT), INTENT(IN) :: setup TYPE(MESHDT), INTENT(IN) :: mesh INTEGER, DIMENSION(:), INTENT(IN) :: optim_arr REAL(sp), DIMENSION(:, :, :), INTENT(IN) :: matrix, matrix_bgd REAL(sp), DIMENSION(:, :, :), INTENT(IN) :: matrix_d + LOGICAL, INTENT(IN) :: rel_to_bgd REAL(sp) :: res REAL(sp) :: res_d INTRINSIC SIZE @@ -10038,9 +10086,15 @@ FUNCTION REG_SMOOTHING_D(setup, mesh, optim_arr, matrix, matrix_d, & INTEGER :: x1 INTEGER :: x2 ! matrix relative to the bgd. We don't want to penalize initial spatial variation. - mat_d = matrix_d - mat = matrix - matrix_bgd - res_d = 0.0_4 + IF (rel_to_bgd) THEN + mat_d = matrix_d + mat = matrix - matrix_bgd + res_d = 0.0_4 + ELSE + mat_d = matrix_d + mat = matrix + res_d = 0.0_4 + END IF DO i=1,SIZE(matrix, 3) IF (optim_arr(i) .GT. 0) THEN DO col=1,SIZE(matrix, 2) @@ -10074,11 +10128,12 @@ FUNCTION REG_SMOOTHING_D(setup, mesh, optim_arr, matrix, matrix_d, & IF (mesh%active_cell(row, max_col) .EQ. 0) max_col = col IF (mesh%active_cell(min_row, col) .EQ. 0) min_row = row IF (mesh%active_cell(max_row, col) .EQ. 0) max_row = row - res_d = res_d + 2*(mat(max_row, col, i)-2*2._sp*mat(row, & -& col, i)+mat(min_row, col, i)+mat(row, max_col, i)+mat(& -& row, min_col, i))*(mat_d(max_row, col, i)-2*2._sp*mat_d(& -& row, col, i)+mat_d(min_row, col, i)+mat_d(row, max_col, & -& i)+mat_d(row, min_col, i)) + res_d = res_d + 2._sp*(mat(max_row, col, i)-2._sp*mat(row& +& , col, i)+mat(min_row, col, i))*(mat_d(max_row, col, i)-& +& 2._sp*mat_d(row, col, i)+mat_d(min_row, col, i)) + 2._sp& +& *(mat(row, max_col, i)-2._sp*mat(row, col, i)+mat(row, & +& min_col, i))*(mat_d(row, max_col, i)-2._sp*mat_d(row, & +& col, i)+mat_d(row, min_col, i)) END IF END DO END DO @@ -10090,13 +10145,14 @@ END FUNCTION REG_SMOOTHING_D ! gradient of useful results: res matrix ! with respect to varying inputs: matrix SUBROUTINE REG_SMOOTHING_B(setup, mesh, optim_arr, matrix, matrix_b, & -& matrix_bgd, res_b) +& matrix_bgd, rel_to_bgd, res_b) IMPLICIT NONE TYPE(SETUPDT), INTENT(IN) :: setup TYPE(MESHDT), INTENT(IN) :: mesh INTEGER, DIMENSION(:), INTENT(IN) :: optim_arr REAL(sp), DIMENSION(:, :, :), INTENT(IN) :: matrix, matrix_bgd REAL(sp), DIMENSION(:, :, :) :: matrix_b + LOGICAL, INTENT(IN) :: rel_to_bgd REAL(sp) :: res REAL(sp) :: res_b INTRINSIC SIZE @@ -10110,12 +10166,19 @@ SUBROUTINE REG_SMOOTHING_B(setup, mesh, optim_arr, matrix, matrix_b, & INTEGER :: x1 INTEGER :: x2 REAL(sp) :: temp_b + REAL(sp) :: temp_b0 INTEGER :: branch INTEGER :: ad_to INTEGER :: ad_to0 INTEGER :: ad_to1 ! matrix relative to the bgd. We don't want to penalize initial spatial variation. - mat = matrix - matrix_bgd + IF (rel_to_bgd) THEN + mat = matrix - matrix_bgd + CALL PUSHCONTROL1B(1) + ELSE + mat = matrix + CALL PUSHCONTROL1B(0) + END IF DO i=1,SIZE(matrix, 3) IF (optim_arr(i) .GT. 0) THEN DO col=1,SIZE(matrix, 2) @@ -10189,14 +10252,16 @@ SUBROUTINE REG_SMOOTHING_B(setup, mesh, optim_arr, matrix, matrix_b, & DO row=ad_to,1,-1 CALL POPCONTROL1B(branch) IF (branch .NE. 0) THEN - temp_b = 2*(mat(max_row, col, i)-2*2._sp*mat(row, col, i)+& -& mat(min_row, col, i)+mat(row, max_col, i)+mat(row, & -& min_col, i))*res_b + temp_b = 2._sp*(mat(max_row, col, i)-2._sp*mat(row, col, i& +& )+mat(min_row, col, i))*res_b + temp_b0 = 2._sp*(mat(row, max_col, i)-2._sp*mat(row, col, & +& i)+mat(row, min_col, i))*res_b + mat_b(row, max_col, i) = mat_b(row, max_col, i) + temp_b0 + mat_b(row, col, i) = mat_b(row, col, i) - 2._sp*temp_b0 + mat_b(row, min_col, i) = mat_b(row, min_col, i) + temp_b0 mat_b(max_row, col, i) = mat_b(max_row, col, i) + temp_b - mat_b(row, col, i) = mat_b(row, col, i) - 2*2._sp*temp_b + mat_b(row, col, i) = mat_b(row, col, i) - 2._sp*temp_b mat_b(min_row, col, i) = mat_b(min_row, col, i) + temp_b - mat_b(row, max_col, i) = mat_b(row, max_col, i) + temp_b - mat_b(row, min_col, i) = mat_b(row, min_col, i) + temp_b CALL POPCONTROL1B(branch) IF (branch .EQ. 0) THEN CALL POPINTEGER4(max_row) @@ -10226,16 +10291,22 @@ SUBROUTINE REG_SMOOTHING_B(setup, mesh, optim_arr, matrix, matrix_b, & END DO END IF END DO - matrix_b = matrix_b + mat_b + CALL POPCONTROL1B(branch) + IF (branch .EQ. 0) THEN + matrix_b = matrix_b + mat_b + ELSE + matrix_b = matrix_b + mat_b + END IF END SUBROUTINE REG_SMOOTHING_B - FUNCTION REG_SMOOTHING(setup, mesh, optim_arr, matrix, matrix_bgd) & -& RESULT (RES) + FUNCTION REG_SMOOTHING(setup, mesh, optim_arr, matrix, matrix_bgd, & +& rel_to_bgd) RESULT (RES) IMPLICIT NONE TYPE(SETUPDT), INTENT(IN) :: setup TYPE(MESHDT), INTENT(IN) :: mesh INTEGER, DIMENSION(:), INTENT(IN) :: optim_arr REAL(sp), DIMENSION(:, :, :), INTENT(IN) :: matrix, matrix_bgd + LOGICAL, INTENT(IN) :: rel_to_bgd REAL(sp) :: res INTRINSIC SIZE REAL(sp), DIMENSION(SIZE(matrix, 1), SIZE(matrix, 2), SIZE(matrix, 3& @@ -10247,7 +10318,11 @@ FUNCTION REG_SMOOTHING(setup, mesh, optim_arr, matrix, matrix_bgd) & INTEGER :: x2 res = 0._sp ! matrix relative to the bgd. We don't want to penalize initial spatial variation. - mat = matrix - matrix_bgd + IF (rel_to_bgd) THEN + mat = matrix - matrix_bgd + ELSE + mat = matrix + END IF DO i=1,SIZE(matrix, 3) IF (optim_arr(i) .GT. 0) THEN DO col=1,SIZE(matrix, 2) @@ -10281,9 +10356,9 @@ FUNCTION REG_SMOOTHING(setup, mesh, optim_arr, matrix, matrix_bgd) & IF (mesh%active_cell(row, max_col) .EQ. 0) max_col = col IF (mesh%active_cell(min_row, col) .EQ. 0) min_row = row IF (mesh%active_cell(max_row, col) .EQ. 0) max_row = row - res = res + (mat(max_row, col, i)-2._sp*mat(row, col, i)+& -& mat(min_row, col, i)+(mat(row, max_col, i)-2._sp*mat(row& -& , col, i)+mat(row, min_col, i)))**2 + res = res + ((mat(max_row, col, i)-2._sp*mat(row, col, i)+& +& mat(min_row, col, i))**2._sp+(mat(row, max_col, i)-2._sp& +& *mat(row, col, i)+mat(row, min_col, i))**2._sp) END IF END DO END DO @@ -10303,19 +10378,17 @@ FUNCTION REG_PRIOR_D(setup, optim_arr, matrix, matrix_d, matrix_bgd, & REAL(sp), DIMENSION(:, :, :), INTENT(IN) :: matrix_d REAL(sp) :: res REAL(sp) :: res_d - INTEGER :: i + INTEGER :: i, col, row INTRINSIC SIZE - INTRINSIC SUM - REAL(sp), DIMENSION(SIZE(matrix, 1), SIZE(matrix, 2)) :: arg1 - REAL(sp), DIMENSION(SIZE(matrix, 1), SIZE(matrix, 2)) :: arg1_d res_d = 0.0_4 DO i=1,SIZE(matrix, 3) IF (optim_arr(i) .GT. 0) THEN - arg1_d(:, :) = 2*(matrix(:, :, i)-matrix_bgd(:, :, i))*matrix_d(& -& :, :, i) - arg1(:, :) = (matrix(:, :, i)-matrix_bgd(:, :, i))*(matrix(:, :& -& , i)-matrix_bgd(:, :, i)) - res_d = res_d + SUM(arg1_d(:, :)) + DO col=1,SIZE(matrix, 2) + DO row=1,SIZE(matrix, 1) + res_d = res_d + 2._sp*(matrix(row, col, i)-matrix_bgd(row, & +& col, i))*matrix_d(row, col, i) + END DO + END DO END IF END DO END FUNCTION REG_PRIOR_D @@ -10332,30 +10405,38 @@ SUBROUTINE REG_PRIOR_B(setup, optim_arr, matrix, matrix_b, matrix_bgd& REAL(sp), DIMENSION(:, :, :) :: matrix_b REAL(sp) :: res REAL(sp) :: res_b - INTEGER :: i + INTEGER :: i, col, row INTRINSIC SIZE - INTRINSIC SUM - REAL(sp), DIMENSION(SIZE(matrix, 1), SIZE(matrix, 2)) :: arg1 - REAL(sp), DIMENSION(SIZE(matrix, 1), SIZE(matrix, 2)) :: arg1_b INTEGER :: ad_to + INTEGER :: ad_to0 + INTEGER :: ad_to1 INTEGER :: branch DO i=1,SIZE(matrix, 3) IF (optim_arr(i) .GT. 0) THEN - arg1(:, :) = (matrix(:, :, i)-matrix_bgd(:, :, i))*(matrix(:, :& -& , i)-matrix_bgd(:, :, i)) + DO col=1,SIZE(matrix, 2) + DO row=1,SIZE(matrix, 1) + + END DO + CALL PUSHINTEGER4(row - 1) + END DO + CALL PUSHINTEGER4(col - 1) CALL PUSHCONTROL1B(1) ELSE CALL PUSHCONTROL1B(0) END IF END DO - ad_to = i - 1 - DO i=ad_to,1,-1 + ad_to1 = i - 1 + DO i=ad_to1,1,-1 CALL POPCONTROL1B(branch) IF (branch .NE. 0) THEN - arg1_b = 0.0_4 - arg1_b(:, :) = res_b - matrix_b(:, :, i) = matrix_b(:, :, i) + 2*(matrix(:, :, i)-& -& matrix_bgd(:, :, i))*arg1_b + CALL POPINTEGER4(ad_to0) + DO col=ad_to0,1,-1 + CALL POPINTEGER4(ad_to) + DO row=ad_to,1,-1 + matrix_b(row, col, i) = matrix_b(row, col, i) + 2._sp*(& +& matrix(row, col, i)-matrix_bgd(row, col, i))*res_b + END DO + END DO END IF END DO END SUBROUTINE REG_PRIOR_B @@ -10366,16 +10447,17 @@ FUNCTION REG_PRIOR(setup, optim_arr, matrix, matrix_bgd) RESULT (RES) INTEGER, DIMENSION(:), INTENT(IN) :: optim_arr REAL(sp), DIMENSION(:, :, :), INTENT(IN) :: matrix, matrix_bgd REAL(sp) :: res - INTEGER :: i + INTEGER :: i, col, row INTRINSIC SIZE - INTRINSIC SUM - REAL(sp), DIMENSION(SIZE(matrix, 1), SIZE(matrix, 2)) :: arg1 res = 0._sp DO i=1,SIZE(matrix, 3) IF (optim_arr(i) .GT. 0) THEN - arg1(:, :) = (matrix(:, :, i)-matrix_bgd(:, :, i))*(matrix(:, :& -& , i)-matrix_bgd(:, :, i)) - res = res + SUM(arg1(:, :)) + DO col=1,SIZE(matrix, 2) + DO row=1,SIZE(matrix, 1) + res = res + (matrix(row, col, i)-matrix_bgd(row, col, i))**& +& 2._sp + END DO + END DO END IF END DO END FUNCTION REG_PRIOR diff --git a/smash/solver/optimize/mw_optimize.f90 b/smash/solver/optimize/mw_optimize.f90 index 041d4e00..ddd47570 100644 --- a/smash/solver/optimize/mw_optimize.f90 +++ b/smash/solver/optimize/mw_optimize.f90 @@ -519,7 +519,7 @@ subroutine optimize_lbfgsb(setup, mesh, input_data, parameters, states, output) n = mesh%nac*(count(setup%optimize%optim_parameters .gt. 0) + & & count(setup%optimize%optim_states .gt. 0)) m = 10 - factr = 1.e6_dp + factr = 1.e1_dp pgtol = 1.e-12_dp allocate (nbd(n), x(n), l(n), u(n), g(n)) diff --git a/smash/solver/optimize/mwd_cost.f90 b/smash/solver/optimize/mwd_cost.f90 index 002d6df1..806f3a03 100644 --- a/smash/solver/optimize/mwd_cost.f90 +++ b/smash/solver/optimize/mwd_cost.f90 @@ -211,11 +211,20 @@ subroutine compute_jreg(setup, mesh, input_data, parameters, parameters_bgd, sta case ("smoothing") - parameters_jreg = parameters_jreg + setup%optimize%wjreg_fun(i)**4* & - & reg_smoothing(setup, mesh, setup%optimize%optim_parameters, parameters_matrix, parameters_bgd_matrix) + parameters_jreg = parameters_jreg + setup%optimize%wjreg_fun(i)**2._sp* & + & reg_smoothing(setup, mesh, setup%optimize%optim_parameters, parameters_matrix, parameters_bgd_matrix,.true.) + + states_jreg = states_jreg + setup%optimize%wjreg_fun(i)**2._sp* & + & reg_smoothing(setup, mesh, setup%optimize%optim_states, states_matrix, states_bgd_matrix,.true.) + + case ("hard_smoothing") + + parameters_jreg = parameters_jreg + setup%optimize%wjreg_fun(i)**2._sp* & + & reg_smoothing(setup, mesh, setup%optimize%optim_parameters, parameters_matrix, parameters_bgd_matrix,.false.) + + states_jreg = states_jreg + setup%optimize%wjreg_fun(i)**2._sp* & + & reg_smoothing(setup, mesh, setup%optimize%optim_states, states_matrix, states_bgd_matrix,.false.) - states_jreg = states_jreg + setup%optimize%wjreg_fun(i)**4* & - & reg_smoothing(setup, mesh, setup%optimize%optim_states, states_matrix, states_bgd_matrix) case ("distance_correlation") @@ -1087,7 +1096,7 @@ function distance_correlation_descriptors(& end function distance_correlation_descriptors - function reg_smoothing(setup, mesh, optim_arr, matrix, matrix_bgd) result(res) + function reg_smoothing(setup, mesh, optim_arr, matrix, matrix_bgd, rel_to_bgd) result(res) !% Notes !% ----- @@ -1103,6 +1112,7 @@ function reg_smoothing(setup, mesh, optim_arr, matrix, matrix_bgd) result(res) type(MeshDT), intent(in) :: mesh integer, dimension(:), intent(in) :: optim_arr real(sp), dimension(:, :, :), intent(in) :: matrix, matrix_bgd + logical, intent(in):: rel_to_bgd real(sp) :: res real(sp), dimension(size(matrix, 1), size(matrix, 2), size(matrix, 3)) :: mat @@ -1111,7 +1121,11 @@ function reg_smoothing(setup, mesh, optim_arr, matrix, matrix_bgd) result(res) res = 0._sp ! matrix relative to the bgd. We don't want to penalize initial spatial variation. - mat = matrix - matrix_bgd + if (rel_to_bgd) then + mat = matrix - matrix_bgd + else + mat = matrix + end if do i = 1, size(matrix, 3) @@ -1147,8 +1161,8 @@ function reg_smoothing(setup, mesh, optim_arr, matrix, matrix_bgd) result(res) max_row = row end if - res = res + ((mat(max_row, col, i) - 2._sp*mat(row, col, i) + mat(min_row, col, i)) & - & + (mat(row, max_col, i) - 2._sp*mat(row, col, i) + mat(row, min_col, i)))**2 + res = res + ((mat(max_row, col, i) - 2._sp*mat(row, col, i) + mat(min_row, col, i))**2._sp & + & + (mat(row, max_col, i) - 2._sp*mat(row, col, i) + mat(row, min_col, i))**2._sp) end if @@ -1181,7 +1195,7 @@ function reg_prior(setup, optim_arr, matrix, matrix_bgd) result(res) real(sp), dimension(:, :, :), intent(in) :: matrix, matrix_bgd real(sp) :: res - integer :: i + integer :: i, col, row res = 0._sp @@ -1189,7 +1203,15 @@ function reg_prior(setup, optim_arr, matrix, matrix_bgd) result(res) if (optim_arr(i) .gt. 0) then - res = res + sum((matrix(:, :, i) - matrix_bgd(:, :, i))*(matrix(:, :, i) - matrix_bgd(:, :, i))) + do col = 1, size(matrix, 2) + + do row = 1, size(matrix, 1) + + res = res + (matrix(row, col, i) - matrix_bgd(row, col, i))**2._sp + + end do + + end do end if From 1d15a613371d08642e26079e44a04a5e60fcdcd5 Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Mon, 29 May 2023 18:04:41 +0200 Subject: [PATCH 13/73] Restore a way to read data with absolute path (in odrer to speed up sequential run). --- smash/core/_read_input_data.py | 77 +++++++++++++++++++++++++--------- 1 file changed, 57 insertions(+), 20 deletions(-) diff --git a/smash/core/_read_input_data.py b/smash/core/_read_input_data.py index 13637306..a04f0001 100644 --- a/smash/core/_read_input_data.py +++ b/smash/core/_read_input_data.py @@ -11,6 +11,7 @@ from smash.solver._mwd_mesh import MeshDT from smash.solver._mwd_input_data import Input_DataDT +import os import warnings import glob from tqdm import tqdm @@ -127,41 +128,77 @@ def _read_prcp(setup: SetupDT, mesh: MeshDT, input_data: Input_DataDT): freq=f"{int(setup.dt)}s", )[1:] - if setup.prcp_format == "tif": - files = sorted(glob.glob(f"{setup.prcp_directory}/**/*tif*", recursive=True)) + + if setup.prcp_fast_access==False : + + if setup.prcp_format == "tif": + files = sorted(glob.glob(f"{setup.prcp_directory}/**/*tif*", recursive=True)) - files = _adjust_left_files(files, date_range) + files = _adjust_left_files(files, date_range) - # % WIP - elif setup.prcp_format == "nc": - files = sorted(glob.glob(f"{setup.prcp_directory}/**/*nc", recursive=True)) + # % WIP + elif setup.prcp_format == "nc": + files = sorted(glob.glob(f"{setup.prcp_directory}/**/*nc", recursive=True)) + for i, date in enumerate(tqdm(date_range, desc=" Reading precipitation")): date_strf = date.strftime("%Y%m%d%H%M") - ind = _index_containing_substring(files, date_strf) + if setup.prcp_fast_access==True : + + year=date_strf[0:4] + month=date_strf[4:6] + day=date_strf[6:8] + path = setup.prcp_directory + os.sep + year + os.sep + month + os.sep + day + os.sep + file_to_read = path + os.sep + setup.prcp_prefix + date_strf + "_" + date_strf + "." + setup.prcp_format + + if (os.path.exists(file_to_read)) : + + matrix = ( + _read_windowed_raster(file_to_read, mesh) * setup.prcp_conversion_factor + ) - if ind == -1: - if setup.sparse_storage: - input_data.sparse_prcp[:, i] = -99.0 + if setup.sparse_storage: + input_data.sparse_prcp[:, i] = sparse_matrix_to_vector(mesh, matrix) + else: + input_data.prcp[..., i] = matrix + else: - input_data.prcp[..., i] = -99.0 - - warnings.warn(f"Missing precipitation file for date {date}") + + if setup.sparse_storage: + input_data.sparse_prcp[:, i] = -99.0 + else: + input_data.prcp[..., i] = -99.0 + + warnings.warn(f"Missing precipitation file for date {date}: {file_to_read}") + else: - matrix = ( - _read_windowed_raster(files[ind], mesh) * setup.prcp_conversion_factor - ) + + ind = _index_containing_substring(files, date_strf) + + if ind == -1: + if setup.sparse_storage: + input_data.sparse_prcp[:, i] = -99.0 - if setup.sparse_storage: - input_data.sparse_prcp[:, i] = sparse_matrix_to_vector(mesh, matrix) + else: + input_data.prcp[..., i] = -99.0 + + warnings.warn(f"Missing precipitation file for date {date}") else: - input_data.prcp[..., i] = matrix + matrix = ( + _read_windowed_raster(files[ind], mesh) * setup.prcp_conversion_factor + ) + + if setup.sparse_storage: + input_data.sparse_prcp[:, i] = sparse_matrix_to_vector(mesh, matrix) - files.pop(ind) + else: + input_data.prcp[..., i] = matrix + + files.pop(ind) def _read_pet(setup: SetupDT, mesh: MeshDT, input_data: Input_DataDT): From 87ffa2086af75c0192e4fafd1f054d9552c058b3 Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Mon, 29 May 2023 18:05:37 +0200 Subject: [PATCH 14/73] Replace a rains error by a warning if all Qobs are in lacuna. In that way, the model.optimize continue and perform 1 iteration (cost=0, sensibility=0 also). Thus it will no more crash python script whent we do assimilation (sequential run) --- smash/core/optimize/_standardize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smash/core/optimize/_standardize.py b/smash/core/optimize/_standardize.py index 7bcae6da..c1fef25e 100644 --- a/smash/core/optimize/_standardize.py +++ b/smash/core/optimize/_standardize.py @@ -341,7 +341,7 @@ def _standardize_gauge( raise ValueError(f"Unknown gauge code '{name}'. Choices: {mesh.code}") if gauge_check.size == 0: - raise ValueError( + warnings.warn( f"No available observed discharge for optimization at gauge(s) {gauge}" ) From 8d9f6d4683f2a2a2dc37fb99b42d5ab544d27133 Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Mon, 29 May 2023 18:07:48 +0200 Subject: [PATCH 15/73] Add 2 new parameter : one for triggering fast access to the precipitation data (reading with abvsolute path) and on to give the prefix of the precitpitation file (like before): prcp_fast_access: True prcp_prefix : antilope_j1_000100_ --- smash/solver/derived_type/mwd_setup.f90 | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/smash/solver/derived_type/mwd_setup.f90 b/smash/solver/derived_type/mwd_setup.f90 index 3e09cfdf..451ff71f 100644 --- a/smash/solver/derived_type/mwd_setup.f90 +++ b/smash/solver/derived_type/mwd_setup.f90 @@ -15,6 +15,8 @@ !% ``qobs_directory`` Observed discharge directory path (default: '...') !% ``read_prcp`` Read precipitation (default: .false.) !% ``prcp_format`` Precipitation format (default: 'tif') +!% ``prcp_fast_access`` Fast access with absolute path (YY/MM/dd/) (default: .false.) +!% ``prcp_prefix`` Precipitation filename prefix (default: '...') !% ``prcp_conversion_factor`` Precipitation conversion factor (default: 1) !% ``prcp_directory`` Precipiation directory path (default: '...') !% ``read_pet`` Reap potential evapotranspiration (default: .false.) @@ -123,6 +125,8 @@ module mwd_setup logical :: read_prcp = .false. character(lchar) :: prcp_format = "tif" !>f90w-char + logical :: prcp_fast_access = .false. + character(lchar) :: prcp_prefix = "..." !>f90w-char real(sp) :: prcp_conversion_factor = 1._sp character(lchar) :: prcp_directory = "..." !>f90w-char From 06df3abf4a8fe09d38ae5bc49d14deed63592d49 Mon Sep 17 00:00:00 2001 From: "ngo-nghi-truyen.huynh" Date: Mon, 5 Jun 2023 14:24:25 +0200 Subject: [PATCH 16/73] FIX: boudary conditions checking with higher tolerance --- doc/source/release/0.5.0-notes.rst | 42 +++++++++++++++++++++++++++ doc/source/release/index.rst | 1 + smash/core/simulation/_standardize.py | 8 ++--- 3 files changed, 47 insertions(+), 4 deletions(-) create mode 100644 doc/source/release/0.5.0-notes.rst diff --git a/doc/source/release/0.5.0-notes.rst b/doc/source/release/0.5.0-notes.rst new file mode 100644 index 00000000..aeb3d9f0 --- /dev/null +++ b/doc/source/release/0.5.0-notes.rst @@ -0,0 +1,42 @@ +.. _release.0.5.0-notes: + +.. currentmodule:: smash + +========================= +smash 0.5.0 Release Notes +========================= + +The smash 0.5.0 release continues the ongoing work to improve the handling, fix possible bugs, clarify the documentation. The highlights are: + +------------ +Contributors +------------ + +This release was made possible thanks to the contributions of: + +--------------- +Compatibilities +--------------- + +------------ +Deprecations +------------ + +------------ +Improvements +------------ + +------------ +New Features +------------ + +----- +Fixes +----- + +Boundary conditions checking +**************************** + +The boundary condition checking previously used a tolerance of 1e-6, which caused issues in certain cases due to machine precision when passing from Python to Fortran via the f90wrapper. +To address this problem, the tolerance has been decreased to 1e-3. + diff --git a/doc/source/release/index.rst b/doc/source/release/index.rst index 2f3c5d0a..1a06e2d3 100644 --- a/doc/source/release/index.rst +++ b/doc/source/release/index.rst @@ -7,6 +7,7 @@ Release notes .. toctree:: :maxdepth: 3 + 0.5.0 <0.5.0-notes> 0.4.2 <0.4.2-notes> 0.4.1 <0.4.1-notes> 0.4.0 <0.4.0-notes> diff --git a/smash/core/simulation/_standardize.py b/smash/core/simulation/_standardize.py index 23ed008f..d16b9318 100644 --- a/smash/core/simulation/_standardize.py +++ b/smash/core/simulation/_standardize.py @@ -270,8 +270,8 @@ def _standardize_bounds( ind = np.argwhere(setup._parameters_name == name) parameters_attr = getattr(parameters, name) - if np.any(parameters_attr + 1e-6 < bounds[i, 0]) or np.any( - parameters_attr - 1e-6 > bounds[i, 1] + if np.any(parameters_attr + 1e-3 < bounds[i, 0]) or np.any( + parameters_attr - 1e-3 > bounds[i, 1] ): raise ValueError( f"bounds values for '{name}' are invalid, background parameters [{np.min(parameters_attr)} {np.max(parameters_attr)}] is outside the bounds {bounds[i,:]}" @@ -282,8 +282,8 @@ def _standardize_bounds( ind = np.argwhere(setup._states_name == name) states_attr = getattr(states, name) - if np.any(states_attr + 1e-6 < bounds[i, 0]) or np.any( - states_attr - 1e-6 > bounds[i, 1] + if np.any(states_attr + 1e-3 < bounds[i, 0]) or np.any( + states_attr - 1e-3 > bounds[i, 1] ): raise ValueError( f"bounds values for '{name}' are invalid, background states [{np.min(states_attr)} {np.max(states_attr)}] is outside the bounds {bounds[i,:]}" From bdc6618cb6dd34a9186571a95eaebaeabb93f9b0 Mon Sep 17 00:00:00 2001 From: "ngo-nghi-truyen.huynh" Date: Mon, 5 Jun 2023 14:44:13 +0200 Subject: [PATCH 17/73] FIX PR: add link to issue in release note --- doc/source/release/0.5.0-notes.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/release/0.5.0-notes.rst b/doc/source/release/0.5.0-notes.rst index aeb3d9f0..6c60b91c 100644 --- a/doc/source/release/0.5.0-notes.rst +++ b/doc/source/release/0.5.0-notes.rst @@ -40,3 +40,4 @@ Boundary conditions checking The boundary condition checking previously used a tolerance of 1e-6, which caused issues in certain cases due to machine precision when passing from Python to Fortran via the f90wrapper. To address this problem, the tolerance has been decreased to 1e-3. +See issue `#23 `__. From 0ae40d7d7e8b298175c016b875366625d08a63df Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Tue, 6 Jun 2023 16:44:47 +0200 Subject: [PATCH 18/73] Fix issu on lcurve if Jreg=hard-smoothing is used. --- smash/core/simulation/_optimize.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/smash/core/simulation/_optimize.py b/smash/core/simulation/_optimize.py index 6510f4cd..2f4df3b3 100644 --- a/smash/core/simulation/_optimize.py +++ b/smash/core/simulation/_optimize.py @@ -326,7 +326,7 @@ def _optimize_lbfgsb( # % bounds initialisation for jobs and jreg jobs_min = instance.output.cost_jobs jobs_max = instance.output._cost_jobs_initial - jreg_min = instance.output._cost_jreg_initial + jreg_min = 0.0 #minimum jreg value, instance.output._cost_jreg_initial > 0 if hard-smoothing jreg_max = instance.output.cost_jreg if (jobs_min / jobs_max) < 0.95 and (jreg_max - jreg_min) > 0.0: @@ -391,7 +391,8 @@ def _optimize_lbfgsb( # % bounds update for jobs and jreg jobs_min = np.min(cost_jobs_arr) jreg_max = np.max(cost_jreg_arr) - + jreg_min = np.min(cost_jreg_arr) + # % select the best wjreg based on the transformed lcurve and using our own method decribed in ... distance, wjreg_lcurve_opt = _compute_best_lcurve_weight( cost_jobs_arr, From f395492a1569acbf3f7301405fc7b1a9b4e55ad7 Mon Sep 17 00:00:00 2001 From: inoelloc Date: Tue, 6 Jun 2023 17:06:10 +0200 Subject: [PATCH 19/73] ENC: New features for reading input data Add a new file "raster.py" inside smash.core Replace _read_windowed_raster by _read_windowed_raster_gdal from "raster.py" --- smash/core/_read_input_data.py | 28 +- smash/core/raster.py | 680 +++++++++++++++++++++++++++++++++ 2 files changed, 696 insertions(+), 12 deletions(-) create mode 100644 smash/core/raster.py diff --git a/smash/core/_read_input_data.py b/smash/core/_read_input_data.py index b9c83654..8c060a1b 100644 --- a/smash/core/_read_input_data.py +++ b/smash/core/_read_input_data.py @@ -1,5 +1,7 @@ from __future__ import annotations +from smash.core.raster import read_windowed_raster_gdal + from smash.core.utils import sparse_matrix_to_vector from smash.core._constant import RATIO_PET_HOURLY @@ -21,25 +23,27 @@ def _read_windowed_raster(path: str, mesh: MeshDT) -> np.ndarray: - ds = gdal.Open(path) + # ~ ds = gdal.Open(path) - transform = ds.GetGeoTransform() + # ~ transform = ds.GetGeoTransform() - xmin = transform[0] - ymax = transform[3] - xres = transform[1] - yres = -transform[5] + # ~ xmin = transform[0] + # ~ ymax = transform[3] + # ~ xres = transform[1] + # ~ yres = -transform[5] - col_off = (mesh.xmin - xmin) / xres - row_off = (ymax - mesh.ymax) / yres + # ~ col_off = (mesh.xmin - xmin) / xres + # ~ row_off = (ymax - mesh.ymax) / yres - band = ds.GetRasterBand(1) + # ~ band = ds.GetRasterBand(1) - nodata = band.GetNoDataValue() + # ~ nodata = band.GetNoDataValue() - arr = band.ReadAsArray(col_off, row_off, mesh.ncol, mesh.nrow) + # ~ arr = band.ReadAsArray(col_off, row_off, mesh.ncol, mesh.nrow) - arr = np.where(arr == nodata, -99, arr) + # ~ arr = np.where(arr == nodata, -99, arr) + + arr = read_windowed_raster_gdal(filename=path, smash_mesh=mesh, band=1, lacuna=-99.) return arr diff --git a/smash/core/raster.py b/smash/core/raster.py new file mode 100644 index 00000000..e2300740 --- /dev/null +++ b/smash/core/raster.py @@ -0,0 +1,680 @@ +from __future__ import annotations + +import numpy as np +from osgeo import gdal + +import os +import errno + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from smash.solver._mwd_setup import SetupDT + from smash.solver._mwd_mesh import MeshDT + from smash.solver._mwd_input_data import Input_DataDT + + +### GDAL RASTER FUNCTIONS + +def generate_polygon(bbox): + """ + Generates a list of coordinates: [[x1,y1],[x2,y2],[x3,y3],[x4,y4],[x1,y1]] + """ + return [[bbox[0],bbox[1]], + [bbox[2],bbox[1]], + [bbox[2],bbox[3]], + [bbox[0],bbox[3]], + [bbox[0],bbox[1]]] + + +def pol_to_bounding_box(pol): + """ + Receives list of coordinates: [[x1,y1],[x2,y2],...,[xN,yN]] + """ + arr = pol_to_np(pol) + return BoundingBox(np.min(arr[:,0]), + np.min(arr[:,1]), + np.max(arr[:,0]), + np.max(arr[:,1])) + + + +def xy_to_colrow(x, y, xmin, ymax, xres, yres): + + col = int((x - xmin) / xres) + row = int((ymax - y) / yres) + + return col, row + + +def colrow_to_xy(col, row, xmin, ymax, xres, yres): + + x = int(col * xres + xmin) + y = int(ymax - row * yres) + + return x, y + + + +def trim_zeros_2D(array, shift_value=False): + + for ax in [0, 1]: + + mask = ~(array == 0).all(axis=ax) + + inv_mask = mask[::-1] + + start_ind = np.argmax(mask) + + end_ind = len(inv_mask) - np.argmax(inv_mask) + + if ax == 0: + scol, ecol = start_ind, end_ind + array = array[:, start_ind:end_ind] + else: + srow, erow = start_ind, end_ind + array = array[start_ind:end_ind, :] + + if shift_value: + return array, scol, ecol, srow, erow + else: + return array + + + + +#just open the raster and return the dataset +def gdal_raster_open(filename): + """ + Opening a raster with gdal. this is just a wrapper around gdal.Open(filename) + + Parameters + ---------- + filename : string, path to a file + + Returns + ---------- + dataset : gdal object + + Examples + ---------- + dataset = gdal_raster_open("filename") + """ + dataset=object() + if os.path.isfile(filename): + dataset = gdal.Open(filename) + else: + raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), filename) + + return dataset + + +def read_windowed_raster_gdal(filename: str, smash_mesh: MeshDT, band=None, lacuna=None) -> np.ndarray: + """ + Reading a raster file with gdal and return a np.ndarray storing the different data bands according the SMASH model boundingbox. + + Parameters + ---------- + filename : string, path to a file + smash_mesh : smash.mesh object representing the mesh + band: band to be read + lacuna: float64 replacing the Nodata value + + Returns + ---------- + array : np.array or np.ndarray storing one or all different data, stored in filename, sliced compare to the mesh boundingbox + + Examples + ---------- + array=read_windowed_raster_gdal("filename", model.mesh) + """ + dataset = gdal_raster_open(filename) + + geotransform=gdal_get_geotransform(dataset) + + if (geotransform['xres'] != smash_mesh.dx) or (geotransform['yres'] != smash_mesh.dx): + new_dataset=gdal_reproject_raster(dataset,smash_mesh.dx,smash_mesh.dx) + dataset=new_dataset + + #si mesh larger than window: window=1,1,all,all + #compute window of smash-mesh and get xoffset and y offsets => offsets + #pass this window to gdal_crop_dataset_to_ndarray(dataset=dataset,window=window,offsets=offset) + #position the rainfall inside the mesh grid according offset ! + + window=gdal_smash_window_from_geotransform(dataset,smash_mesh) + + if (band==None): + array=gdal_crop_dataset_to_ndarray(dataset=dataset,window=window,lacuna=lacuna) + else: + array=gdal_crop_dataset_to_array(dataset=dataset,window=window,band=band,lacuna=lacuna) + + + return array + + + +def gdal_get_geotransform(dataset): + """ + Getting the GeoTransform coeficients from a gdal object + + Parameters + ---------- + dataset : gdal object from gdal.Open() + + Returns + ---------- + geotransform : Python dictionnary + + # ~ A GeoTransform consists in a set of 6 coefficients: + # ~ GT(0) x-coordinate of the upper-left corner of the upper-left pixel. + # ~ GT(1) w-e pixel resolution / pixel width. + # ~ GT(2) row rotation (typically zero). + # ~ GT(3) y-coordinate of the upper-left corner of the upper-left pixel. + # ~ GT(4) column rotation (typically zero). + # ~ GT(5) n-s pixel resolution / pixel height (negative value for a north-up image). + + Examples + ---------- + dataset = gdal_raster_open(filename) + geotransform=gdal_get_geotransform(dataset) + """ + + transform = dataset.GetGeoTransform() + xmin = transform[0] + ymax = transform[3] + xres = transform[1] + yres = -transform[5] + geotransform={'xleft':xmin,'xres':xres, 'ytop':ymax, 'yres':yres} + return geotransform + + + +def gdal_smash_window_from_geotransform(dataset,smash_mesh): + """ + Compute the dataset array window according the Smash mesh + + Parameters + ---------- + dataset : gdal object from gdal.Open() + smash_mesh : Smash mesh object model.mesh + + Returns + ---------- + window : Python dictionnary with componnents: row_off (offset), col_off (offset), nrows, ncols + + Examples + ---------- + window=gdal_smash_window_from_geotransform(dataset,smash_mesh) + """ + geotransform=gdal_get_geotransform(dataset) + + col_off = (smash_mesh.xmin - geotransform['xleft']) / geotransform['xres'] + row_off = (geotransform['ytop'] - smash_mesh.ymax) / geotransform['yres'] + + window={"row_off":row_off,'col_off':col_off,'nrows':int(smash_mesh.nrow*smash_mesh.dx/geotransform['yres']),'ncols':int(smash_mesh.ncol*smash_mesh.dx/geotransform['xres'])} + + return window + + +def union_bbox(bbox1,bbox2): + """ + Function which compute the bounding boxes union of 2 input bbox. It return the working bbox + + Parameters + ---------- + bbox1: dict containin the first bbox informations + bbox2 : dict containin the second bbox informations + ---------- + returns + dic containing the bbox union + + Examples + ---------- + dataset=gdal_raster_open(filename) + possible_bbox=union_bbox(bbox,bbox_dataset) + """ + left=max(bbox1['left'],bbox2['left']) + bottom=max(bbox1['bottom'],bbox2['bottom']) + right=min(bbox1['right'],bbox2['right']) + top=min(bbox1['top'],bbox2['top']) + if (left Date: Wed, 7 Jun 2023 11:34:19 +0200 Subject: [PATCH 20/73] use glob.glob instead of setup.prcp_prefix, but in the yy/mm/dd directory --- smash/core/_read_input_data.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/smash/core/_read_input_data.py b/smash/core/_read_input_data.py index a04f0001..7f8cc290 100644 --- a/smash/core/_read_input_data.py +++ b/smash/core/_read_input_data.py @@ -150,7 +150,8 @@ def _read_prcp(setup: SetupDT, mesh: MeshDT, input_data: Input_DataDT): month=date_strf[4:6] day=date_strf[6:8] path = setup.prcp_directory + os.sep + year + os.sep + month + os.sep + day + os.sep - file_to_read = path + os.sep + setup.prcp_prefix + date_strf + "_" + date_strf + "." + setup.prcp_format + file_to_read=glob.glob(f'{path}*{date_strf}*.{setup.prcp_format}')[0] + #file_to_read = path + os.sep + setup.prcp_prefix + date_strf + "_" + date_strf + "." + setup.prcp_format if (os.path.exists(file_to_read)) : From 059aad171a394bd82a1618876ec16103f801f58b Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Wed, 7 Jun 2023 11:50:43 +0200 Subject: [PATCH 21/73] Remove prcp_prefix from setup. --- smash/solver/derived_type/mwd_setup.f90 | 2 -- 1 file changed, 2 deletions(-) diff --git a/smash/solver/derived_type/mwd_setup.f90 b/smash/solver/derived_type/mwd_setup.f90 index b54cd896..cf45df24 100644 --- a/smash/solver/derived_type/mwd_setup.f90 +++ b/smash/solver/derived_type/mwd_setup.f90 @@ -16,7 +16,6 @@ !% ``read_prcp`` Read precipitation (default: .false.) !% ``prcp_format`` Precipitation format (default: 'tif') !% ``prcp_fast_access`` Fast access with absolute path (YY/MM/dd/) (default: .false.) -!% ``prcp_prefix`` Precipitation filename prefix (default: '...') !% ``prcp_conversion_factor`` Precipitation conversion factor (default: 1) !% ``prcp_directory`` Precipiation directory path (default: '...') !% ``read_pet`` Reap potential evapotranspiration (default: .false.) @@ -126,7 +125,6 @@ module mwd_setup logical :: read_prcp = .false. character(lchar) :: prcp_format = "tif" !>f90w-char logical :: prcp_fast_access = .false. - character(lchar) :: prcp_prefix = "..." !>f90w-char real(sp) :: prcp_conversion_factor = 1._sp character(lchar) :: prcp_directory = "..." !>f90w-char From 0661114df50206cde1aac43faa6777d43db21c35 Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Wed, 7 Jun 2023 17:27:37 +0200 Subject: [PATCH 22/73] Fix bug in lcurve since jreg_initial_value can be greater than 0 --- smash/core/simulation/_optimize.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/smash/core/simulation/_optimize.py b/smash/core/simulation/_optimize.py index 2f4df3b3..71039b6e 100644 --- a/smash/core/simulation/_optimize.py +++ b/smash/core/simulation/_optimize.py @@ -275,10 +275,10 @@ def _optimize_lbfgsb( instance.output, ) - # % compute the best wjreg + # compute the best wjreg with the fast method instance.setup._optimize.wjreg = ( instance.output._cost_jobs_initial - instance.output.cost_jobs - ) / (instance.output.cost_jreg - instance._output._cost_jreg_initial) + ) / (instance.output.cost_jreg) instance.parameters = parameters_bgd.copy() instance.states = states_bgd.copy() @@ -320,9 +320,9 @@ def _optimize_lbfgsb( instance.states, instance.output, ) - + n_cycle += 1 - + # % bounds initialisation for jobs and jreg jobs_min = instance.output.cost_jobs jobs_max = instance.output._cost_jobs_initial @@ -330,17 +330,16 @@ def _optimize_lbfgsb( jreg_max = instance.output.cost_jreg if (jobs_min / jobs_max) < 0.95 and (jreg_max - jreg_min) > 0.0: - # % Computation of the best wjreg using the "fast" method - wjreg_opt = (jobs_max - jobs_min) / (jreg_max - jreg_min) - - # % Computation of the range of wjreg centered on wjreg_opt (4 points minimum) + # Computation of the best wjreg using the "fast" method + wjreg_opt = (jobs_max - jobs_min) / (jreg_max) + # Computation of the range of wjreg centered on wjreg_opt (4 points minimum) wjreg_range = _compute_wjreg_range(wjreg_opt, nb_wjreg_lcurve) else: wjreg_opt = 0.0 wjreg_range = np.empty(shape=0) - # % array initialisation + # array initialisation cost_arr = np.zeros(shape=wjreg_range.size + 1, dtype=np.float32) cost_arr[0] = instance.output.cost @@ -353,7 +352,7 @@ def _optimize_lbfgsb( wjreg_arr = np.zeros(shape=wjreg_range.size + 1, dtype=np.float32) wjreg_arr[0] = instance.setup._optimize.wjreg - # % Doing the lcurve with wjreg_range for optimization + # Doing the lcurve with wjreg_range for optimization for i, wj in enumerate(wjreg_range): instance.setup._optimize.wjreg = wj @@ -390,6 +389,7 @@ def _optimize_lbfgsb( # % bounds update for jobs and jreg jobs_min = np.min(cost_jobs_arr) + jobs_max = np.max(cost_jobs_arr) jreg_max = np.max(cost_jreg_arr) jreg_min = np.min(cost_jreg_arr) @@ -996,7 +996,7 @@ def _compute_best_lcurve_weight( if distance[i] >= max_distance: max_distance = distance[i] wjreg_lcurve_opt = wjreg_arr[i] - + else: distance[i] = np.nan else: From 0b2ac8307c1089568136837fb120e40006350ad6 Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Wed, 7 Jun 2023 18:44:44 +0200 Subject: [PATCH 23/73] Improve the precipitation reading using fast_access. Use glob to select all file intelligently in YY or MM or DD. --- smash/core/_read_input_data.py | 136 +++++++++++++++++++++++---------- 1 file changed, 96 insertions(+), 40 deletions(-) diff --git a/smash/core/_read_input_data.py b/smash/core/_read_input_data.py index 7f8cc290..4e6a7168 100644 --- a/smash/core/_read_input_data.py +++ b/smash/core/_read_input_data.py @@ -121,7 +121,57 @@ def _index_containing_substring(the_list: list, substring: str): return -1 +def split_date(date_datetime): + + date_strf = date_datetime.strftime("%Y%m%d%H%M") + + year=date_strf[0:4] + month=date_strf[4:6] + day=date_strf[6:8] + + return year,month,day + + +def list_prcp_file(setup): + + datetime_date_start=datetime.datetime.fromisoformat(setup.start_time) + datetime_date_end=datetime.datetime.fromisoformat(setup.end_time) + + if datetime_date_endint(s_year): + + for yy in range(int(s_year),int(e_year)+1): + list_file=list_file+(glob.glob(f"{setup.prcp_directory}/{yy:04n}/**/*{setup.prcp_format}*", recursive=True)) + + elif int(e_month)>int(s_month): + + for mm in range(int(s_month),int(e_month)+1): + + list_file=list_file+(glob.glob(f"{setup.prcp_directory}/{s_year}/{mm:02n}/**/*{setup.prcp_format}*", recursive=True)) + + elif int(e_day)>int(s_day): + + for dd in range(int(s_day),int(e_day)+1): + list_file=list_file+(glob.glob(f"{setup.prcp_directory}/{s_year}/{s_month}/{dd:02n}/*{setup.prcp_format}*", recursive=True)) + + else: + + list_file=list_file+(glob.glob(f"{setup.prcp_directory}/{s_year}/{s_month}/{s_day}/*{setup.prcp_format}*", recursive=True)) + + return sorted(list_file) + + def _read_prcp(setup: SetupDT, mesh: MeshDT, input_data: Input_DataDT): + date_range = pd.date_range( start=setup.start_time, end=setup.end_time, @@ -129,7 +179,13 @@ def _read_prcp(setup: SetupDT, mesh: MeshDT, input_data: Input_DataDT): )[1:] - if setup.prcp_fast_access==False : + if setup.prcp_fast_access==True : + + files=list_prcp_file(setup) + + files = _adjust_left_files(files, date_range) + + else : if setup.prcp_format == "tif": files = sorted(glob.glob(f"{setup.prcp_directory}/**/*tif*", recursive=True)) @@ -144,62 +200,62 @@ def _read_prcp(setup: SetupDT, mesh: MeshDT, input_data: Input_DataDT): for i, date in enumerate(tqdm(date_range, desc=" Reading precipitation")): date_strf = date.strftime("%Y%m%d%H%M") - if setup.prcp_fast_access==True : + # ~ if setup.prcp_fast_access==True : - year=date_strf[0:4] - month=date_strf[4:6] - day=date_strf[6:8] - path = setup.prcp_directory + os.sep + year + os.sep + month + os.sep + day + os.sep - file_to_read=glob.glob(f'{path}*{date_strf}*.{setup.prcp_format}')[0] - #file_to_read = path + os.sep + setup.prcp_prefix + date_strf + "_" + date_strf + "." + setup.prcp_format + # ~ year=date_strf[0:4] + # ~ month=date_strf[4:6] + # ~ day=date_strf[6:8] + # ~ path = setup.prcp_directory + os.sep + year + os.sep + month + os.sep + day + os.sep + # ~ file_to_read=glob.glob(f'{path}*{date_strf}*.{setup.prcp_format}')[0] + # ~ #file_to_read = path + os.sep + setup.prcp_prefix + date_strf + "_" + date_strf + "." + setup.prcp_format - if (os.path.exists(file_to_read)) : + # ~ if (os.path.exists(file_to_read)) : - matrix = ( - _read_windowed_raster(file_to_read, mesh) * setup.prcp_conversion_factor - ) + # ~ matrix = ( + # ~ _read_windowed_raster(file_to_read, mesh) * setup.prcp_conversion_factor + # ~ ) - if setup.sparse_storage: - input_data.sparse_prcp[:, i] = sparse_matrix_to_vector(mesh, matrix) + # ~ if setup.sparse_storage: + # ~ input_data.sparse_prcp[:, i] = sparse_matrix_to_vector(mesh, matrix) - else: - input_data.prcp[..., i] = matrix + # ~ else: + # ~ input_data.prcp[..., i] = matrix - else: + # ~ else: - if setup.sparse_storage: - input_data.sparse_prcp[:, i] = -99.0 + # ~ if setup.sparse_storage: + # ~ input_data.sparse_prcp[:, i] = -99.0 - else: - input_data.prcp[..., i] = -99.0 + # ~ else: + # ~ input_data.prcp[..., i] = -99.0 - warnings.warn(f"Missing precipitation file for date {date}: {file_to_read}") - - else: + # ~ warnings.warn(f"Missing precipitation file for date {date}: {file_to_read}") - ind = _index_containing_substring(files, date_strf) + # ~ else: - if ind == -1: - if setup.sparse_storage: - input_data.sparse_prcp[:, i] = -99.0 + ind = _index_containing_substring(files, date_strf) + + if ind == -1: + if setup.sparse_storage: + input_data.sparse_prcp[:, i] = -99.0 - else: - input_data.prcp[..., i] = -99.0 + else: + input_data.prcp[..., i] = -99.0 - warnings.warn(f"Missing precipitation file for date {date}") + warnings.warn(f"Missing precipitation file for date {date}") - else: - matrix = ( - _read_windowed_raster(files[ind], mesh) * setup.prcp_conversion_factor - ) + else: + matrix = ( + _read_windowed_raster(files[ind], mesh) * setup.prcp_conversion_factor + ) - if setup.sparse_storage: - input_data.sparse_prcp[:, i] = sparse_matrix_to_vector(mesh, matrix) + if setup.sparse_storage: + input_data.sparse_prcp[:, i] = sparse_matrix_to_vector(mesh, matrix) - else: - input_data.prcp[..., i] = matrix + else: + input_data.prcp[..., i] = matrix - files.pop(ind) + files.pop(ind) def _read_pet(setup: SetupDT, mesh: MeshDT, input_data: Input_DataDT): From 0e6f41700b1649213115e1ace692b615ffd716c2 Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Thu, 8 Jun 2023 09:37:16 +0200 Subject: [PATCH 24/73] Clean code ... --- smash/core/_read_input_data.py | 50 +++++------------------------- smash/core/simulation/_optimize.py | 7 ----- 2 files changed, 8 insertions(+), 49 deletions(-) diff --git a/smash/core/_read_input_data.py b/smash/core/_read_input_data.py index 4e6a7168..67fc7344 100644 --- a/smash/core/_read_input_data.py +++ b/smash/core/_read_input_data.py @@ -121,7 +121,7 @@ def _index_containing_substring(the_list: list, substring: str): return -1 -def split_date(date_datetime): +def _split_date(date_datetime): date_strf = date_datetime.strftime("%Y%m%d%H%M") @@ -132,7 +132,7 @@ def split_date(date_datetime): return year,month,day -def list_prcp_file(setup): +def _list_prcp_file(setup): datetime_date_start=datetime.datetime.fromisoformat(setup.start_time) datetime_date_end=datetime.datetime.fromisoformat(setup.end_time) @@ -144,8 +144,8 @@ def list_prcp_file(setup): list_file=list() - s_year,s_month,s_day=split_date(datetime_date_start) - e_year,e_month,e_day=split_date(datetime_date_end) + s_year,s_month,s_day=_split_date(datetime_date_start) + e_year,e_month,e_day=_split_date(datetime_date_end) if int(e_year)>int(s_year): @@ -178,13 +178,13 @@ def _read_prcp(setup: SetupDT, mesh: MeshDT, input_data: Input_DataDT): freq=f"{int(setup.dt)}s", )[1:] - if setup.prcp_fast_access==True : - files=list_prcp_file(setup) + files=_list_prcp_file(setup) + + if setup.prcp_format == "tif": + files = _adjust_left_files(files, date_range) - files = _adjust_left_files(files, date_range) - else : if setup.prcp_format == "tif": @@ -196,42 +196,8 @@ def _read_prcp(setup: SetupDT, mesh: MeshDT, input_data: Input_DataDT): elif setup.prcp_format == "nc": files = sorted(glob.glob(f"{setup.prcp_directory}/**/*nc", recursive=True)) - for i, date in enumerate(tqdm(date_range, desc=" Reading precipitation")): date_strf = date.strftime("%Y%m%d%H%M") - - # ~ if setup.prcp_fast_access==True : - - # ~ year=date_strf[0:4] - # ~ month=date_strf[4:6] - # ~ day=date_strf[6:8] - # ~ path = setup.prcp_directory + os.sep + year + os.sep + month + os.sep + day + os.sep - # ~ file_to_read=glob.glob(f'{path}*{date_strf}*.{setup.prcp_format}')[0] - # ~ #file_to_read = path + os.sep + setup.prcp_prefix + date_strf + "_" + date_strf + "." + setup.prcp_format - - # ~ if (os.path.exists(file_to_read)) : - - # ~ matrix = ( - # ~ _read_windowed_raster(file_to_read, mesh) * setup.prcp_conversion_factor - # ~ ) - - # ~ if setup.sparse_storage: - # ~ input_data.sparse_prcp[:, i] = sparse_matrix_to_vector(mesh, matrix) - - # ~ else: - # ~ input_data.prcp[..., i] = matrix - - # ~ else: - - # ~ if setup.sparse_storage: - # ~ input_data.sparse_prcp[:, i] = -99.0 - - # ~ else: - # ~ input_data.prcp[..., i] = -99.0 - - # ~ warnings.warn(f"Missing precipitation file for date {date}: {file_to_read}") - - # ~ else: ind = _index_containing_substring(files, date_strf) diff --git a/smash/core/simulation/_optimize.py b/smash/core/simulation/_optimize.py index 71039b6e..481f330b 100644 --- a/smash/core/simulation/_optimize.py +++ b/smash/core/simulation/_optimize.py @@ -380,13 +380,6 @@ def _optimize_lbfgsb( cost_jreg_arr[i + 1] = instance.output.cost_jreg wjreg_arr[i + 1] = instance.setup._optimize.wjreg - # % break if jobs does not minimize - # Bug, todo: if we break, we should reshape the array - # ~ if (instance.output.cost_jobs - jobs_min) / ( - # ~ jobs_max - jobs_min - # ~ ) >= 0.8: - # ~ break - # % bounds update for jobs and jreg jobs_min = np.min(cost_jobs_arr) jobs_max = np.max(cost_jobs_arr) From c685353cbd682c22c286f664f98d55160868d52b Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Thu, 8 Jun 2023 10:00:44 +0200 Subject: [PATCH 25/73] Rename prcp_fast_access => prcp_yymmdd_access : this option allows to read precp in the directory yy/mm/dd. This option is usefull when smash is run time-step by time-step. This option avoid to invoke glob at the root prcp folder but invoke it intelligently in yy or mm or dd directories. --- smash/core/_read_input_data.py | 2 +- smash/solver/derived_type/mwd_setup.f90 | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/smash/core/_read_input_data.py b/smash/core/_read_input_data.py index 67fc7344..7df1e4ad 100644 --- a/smash/core/_read_input_data.py +++ b/smash/core/_read_input_data.py @@ -178,7 +178,7 @@ def _read_prcp(setup: SetupDT, mesh: MeshDT, input_data: Input_DataDT): freq=f"{int(setup.dt)}s", )[1:] - if setup.prcp_fast_access==True : + if setup.prcp_yymmdd_access==True : files=_list_prcp_file(setup) diff --git a/smash/solver/derived_type/mwd_setup.f90 b/smash/solver/derived_type/mwd_setup.f90 index cf45df24..540573db 100644 --- a/smash/solver/derived_type/mwd_setup.f90 +++ b/smash/solver/derived_type/mwd_setup.f90 @@ -15,7 +15,7 @@ !% ``qobs_directory`` Observed discharge directory path (default: '...') !% ``read_prcp`` Read precipitation (default: .false.) !% ``prcp_format`` Precipitation format (default: 'tif') -!% ``prcp_fast_access`` Fast access with absolute path (YY/MM/dd/) (default: .false.) +!% ``prcp_yymmdd_access`` Access with absolute path (YY/MM/dd/) (default: .false.) !% ``prcp_conversion_factor`` Precipitation conversion factor (default: 1) !% ``prcp_directory`` Precipiation directory path (default: '...') !% ``read_pet`` Reap potential evapotranspiration (default: .false.) @@ -124,7 +124,7 @@ module mwd_setup logical :: read_prcp = .false. character(lchar) :: prcp_format = "tif" !>f90w-char - logical :: prcp_fast_access = .false. + logical :: prcp_yymmdd_access = .false. real(sp) :: prcp_conversion_factor = 1._sp character(lchar) :: prcp_directory = "..." !>f90w-char From f3d8b0f20608290ab337e7a14cb37a35056df274 Mon Sep 17 00:00:00 2001 From: "ngo-nghi-truyen.huynh" Date: Thu, 8 Jun 2023 14:27:43 +0200 Subject: [PATCH 26/73] MAINT: remove density attribute of BayesResult in preparation for V1.0.0 --- doc/source/release/0.5.0-notes.rst | 6 ++ .../in_depth/optimize/bayes_optimize.rst | 4 +- smash/core/simulation/bayes_optimize.py | 83 +++++++++---------- 3 files changed, 48 insertions(+), 45 deletions(-) diff --git a/doc/source/release/0.5.0-notes.rst b/doc/source/release/0.5.0-notes.rst index 6c60b91c..f2881c97 100644 --- a/doc/source/release/0.5.0-notes.rst +++ b/doc/source/release/0.5.0-notes.rst @@ -22,6 +22,12 @@ Compatibilities Deprecations ------------ +BayesResult object +****************** + +The ``density`` attribute of the :class:`smash.BayesResult` object has been deprecated in preparation for the upcoming release 1.0.0. +The other two attributes, ``data`` and ``lcurve``, are still available and can be used for further analysis. + ------------ Improvements ------------ diff --git a/doc/source/user_guide/in_depth/optimize/bayes_optimize.rst b/doc/source/user_guide/in_depth/optimize/bayes_optimize.rst index 414487b1..7b857f56 100644 --- a/doc/source/user_guide/in_depth/optimize/bayes_optimize.rst +++ b/doc/source/user_guide/in_depth/optimize/bayes_optimize.rst @@ -130,8 +130,6 @@ It can be implemented using the :class:`smash.Model.bayes_optimize` method as fo return_br=True ) - model_bo.output.cost # cost value with HDBC - .. ipython:: python :verbatim: @@ -144,6 +142,8 @@ It can be implemented using the :class:`smash.Model.bayes_optimize` method as fo return_br=True ) +.. ipython:: python + model_bo.output.cost # cost value with HDBC .. note:: diff --git a/smash/core/simulation/bayes_optimize.py b/smash/core/simulation/bayes_optimize.py index cf7097e5..f931d538 100644 --- a/smash/core/simulation/bayes_optimize.py +++ b/smash/core/simulation/bayes_optimize.py @@ -34,10 +34,6 @@ class BayesResult(dict): Rrepresenting the generated spatially uniform Model parameters/sates and the corresponding cost values after running the simulations on this dataset. The keys are 'cost' and the names of Model parameters/states considered. - density : dict - Representing the estimated distribution at pixel scale of the Model parameters/states after - running the simulations. The keys are the names of the Model parameters/states. - lcurve : dict The optimization results on the regularization parameter if the L-curve approach is used. The keys are @@ -98,12 +94,14 @@ def _bayes_computation( options: dict | None, ncpu: int, ) -> BayesResult: - # % Prior solution + # % prior solution prior_data = {} + # % density of data distribution + density = {} + # % returns ret_data = {} - ret_density = {} ret_lcurve = {} # % verbose @@ -137,7 +135,7 @@ def _bayes_computation( prior_data[p] = dat_p - ret_density[p] = np.ones(dat_p.shape) + density[p] = np.ones(dat_p.shape) # % Density compute active_mask = np.where(instance.mesh.active_cell == 1) @@ -146,7 +144,7 @@ def _bayes_computation( sample, prior_data, active_mask, - ret_density, + density, algorithm, bw_method, weights, @@ -164,7 +162,7 @@ def _bayes_computation( ost, active_mask, prior_data, - ret_density, + density, ret_lcurve, alpha, ) @@ -180,13 +178,11 @@ def _bayes_computation( ost, active_mask, prior_data, - ret_density, + density, alpha, ) - return BayesResult( - dict(zip(["data", "density", "lcurve"], [ret_data, ret_density, ret_lcurve])) - ) + return BayesResult(dict(zip(["data", "lcurve"], [ret_data, ret_lcurve]))) def _bayes_message(sr: SampleResult, alpha: int | float | list, ncpu: int): @@ -297,10 +293,14 @@ def _unit_simu( for name in sample._problem["names"]: if name in instance.setup._parameters_name: - res[name] = np.copy(getattr(instance.parameters, name)) + res[name] = np.copy( + getattr(instance.parameters, name) + ) # must be copy here (TODO: change in V1.0.0) else: - res[name] = np.copy(getattr(instance.states, name)) + res[name] = np.copy( + getattr(instance.states, name) + ) # must be copy here (TODO: change in V1.0.0) return res @@ -401,29 +401,29 @@ def _compute_density( coord = np.dstack([active_mask[0], active_mask[1]])[0] for p in sample._problem["names"]: - dat_p = np.copy(data[p]) - - if algorithm == "l-bfgs-b": # variational Bayes optim (HD) + if algorithm == "l-bfgs-b": # variational Bayes optim (HD-optim) for c in coord: - density[p][c[0], c[1]] = gaussian_kde( - dat_p[c[0], c[1]], bw_method=bw_method, weights=weights - )(dat_p[c[0], c[1]]) + estimted_density = gaussian_kde( + data[p][c[0], c[1]], bw_method=bw_method, weights=weights + )(data[p][c[0], c[1]]) - else: - if isinstance(algorithm, str): - u_dis = np.mean(dat_p[active_mask], axis=0) + density[p][ + c[0], c[1] + ] = estimted_density # TODO: add this term in V1.0.0: * getattr(sample, "_" + p) # compute joint probability - uniform_density = gaussian_kde( - u_dis, bw_method=bw_method, weights=weights - )( - u_dis - ) # global Bayes optim (LD) + elif isinstance(algorithm, str): # global Bayes optim (LD-optim) + u_dis = np.mean(data[p][active_mask], axis=0) - else: - uniform_density = getattr(sample, "_" + p) # Bayes estim (LD) + estimted_density = gaussian_kde( + u_dis, bw_method=bw_method, weights=weights + )(u_dis) - for c in coord: - density[p][c[0], c[1]] = uniform_density + density[p][ + *zip(*coord) + ] = estimted_density # TODO: add this term in V1.0.0: * getattr(sample, "_" + p) # compute joint probability + + else: # Bayes estim (LD-estim) + density[p][*zip(*coord)] = getattr(sample, "_" + p) ### BAYES ESTIMATE AND L-CURVE @@ -463,20 +463,17 @@ def _compute_param( ost: pd.Timestamp, active_mask: np.ndarray, prior_data: dict, - ret_density: dict, + density: dict, alpha: int | float, ) -> tuple: D_alp = [] - J = np.copy(prior_data["cost"]) - var = {} for name in sample._problem["names"]: - U = np.copy(prior_data[name]) - rho = np.copy(ret_density[name]) - - u, v, d = _compute_mean_U(U, J, rho, alpha, active_mask) + u, v, d = _compute_mean_U( + prior_data[name], prior_data["cost"], density[name], alpha, active_mask + ) if name in instance.setup._parameters_name: setattr(instance.parameters, name, u) @@ -510,7 +507,7 @@ def _lcurve_compute_param( ost: pd.Timestamp, active_mask: np.ndarray, prior_data: dict, - ret_density: dict, + density: dict, ret_lcurve: dict, alpha: list, ): @@ -529,7 +526,7 @@ def _lcurve_compute_param( ost, active_mask, prior_data, - ret_density, + density, alpha_i, ) @@ -554,7 +551,7 @@ def _lcurve_compute_param( ost, active_mask, prior_data, - ret_density, + density, alpha_opt, ) From 723fc8ea09a520d8cc2e98c04ad78e1a1f17d058 Mon Sep 17 00:00:00 2001 From: "ngo-nghi-truyen.huynh" Date: Thu, 8 Jun 2023 15:02:18 +0200 Subject: [PATCH 27/73] FIX PR: workflow git does not accept an output syntax produced by black --- smash/core/simulation/bayes_optimize.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/smash/core/simulation/bayes_optimize.py b/smash/core/simulation/bayes_optimize.py index f931d538..8bd2f4a5 100644 --- a/smash/core/simulation/bayes_optimize.py +++ b/smash/core/simulation/bayes_optimize.py @@ -418,9 +418,8 @@ def _compute_density( u_dis, bw_method=bw_method, weights=weights )(u_dis) - density[p][ - *zip(*coord) - ] = estimted_density # TODO: add this term in V1.0.0: * getattr(sample, "_" + p) # compute joint probability + density[p][*zip(*coord)] = estimted_density + # TODO: add this term in V1.0.0: * getattr(sample, "_" + p) # compute joint probability else: # Bayes estim (LD-estim) density[p][*zip(*coord)] = getattr(sample, "_" + p) From 9da4768abed33c4e02bdea42752ab0f8a971a4d0 Mon Sep 17 00:00:00 2001 From: "ngo-nghi-truyen.huynh" Date: Thu, 8 Jun 2023 15:46:21 +0200 Subject: [PATCH 28/73] FIX PR: fix pipeline - workflow returns error when passing * in numpy array --- smash/core/simulation/bayes_optimize.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/smash/core/simulation/bayes_optimize.py b/smash/core/simulation/bayes_optimize.py index 8bd2f4a5..5e0fc5b9 100644 --- a/smash/core/simulation/bayes_optimize.py +++ b/smash/core/simulation/bayes_optimize.py @@ -399,16 +399,17 @@ def _compute_density( weights: np.ndarray | None, ): coord = np.dstack([active_mask[0], active_mask[1]])[0] + x, y = zip(*coord) for p in sample._problem["names"]: if algorithm == "l-bfgs-b": # variational Bayes optim (HD-optim) - for c in coord: + for xi, yi in zip(x, y): estimted_density = gaussian_kde( - data[p][c[0], c[1]], bw_method=bw_method, weights=weights - )(data[p][c[0], c[1]]) + data[p][xi, yi], bw_method=bw_method, weights=weights + )(data[p][xi, yi]) density[p][ - c[0], c[1] + xi, yi ] = estimted_density # TODO: add this term in V1.0.0: * getattr(sample, "_" + p) # compute joint probability elif isinstance(algorithm, str): # global Bayes optim (LD-optim) @@ -418,11 +419,12 @@ def _compute_density( u_dis, bw_method=bw_method, weights=weights )(u_dis) - density[p][*zip(*coord)] = estimted_density - # TODO: add this term in V1.0.0: * getattr(sample, "_" + p) # compute joint probability + density[p][ + x, y + ] = estimted_density # TODO: add this term in V1.0.0: * getattr(sample, "_" + p) # compute joint probability else: # Bayes estim (LD-estim) - density[p][*zip(*coord)] = getattr(sample, "_" + p) + density[p][x, y] = getattr(sample, "_" + p) ### BAYES ESTIMATE AND L-CURVE From 48a1f075653b16adfaa4fcdaa40a90ecd5271ae8 Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Thu, 8 Jun 2023 16:43:33 +0200 Subject: [PATCH 29/73] complete the release not --- doc/source/release/0.5.0-notes.rst | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/doc/source/release/0.5.0-notes.rst b/doc/source/release/0.5.0-notes.rst index 6c60b91c..6a21d442 100644 --- a/doc/source/release/0.5.0-notes.rst +++ b/doc/source/release/0.5.0-notes.rst @@ -26,10 +26,25 @@ Deprecations Improvements ------------ +Reading precipitation with YY/MM/DD access +****************************************** + +This mode is triggered by the flag setup.prcp_yymmdd_access=True. The precipitation file suppose to be stored in a directory YY/MM/dd. This option is useful of the model is ran time step by time step (many incremental runs). In that case searching the precipitation files can be relatively slow (1 second multiplicate by the number of run). With this mode it is optimized and it is faster. + ------------ New Features ------------ +New regularization function +**************************** + +hard-smoothing : the smoothing regularization function is applied on parameter or states directly. This behavior differ from the "smoothing" mode where the regularization is applied on the difference between the background and the control (parameters or states) + +New functions for reading and writting hdf5 files +************************************************* + +The new function are generic. You can save a dictionary to an hdf5, save an object (not only smash) to an hdf5, read an object as dictionary, read an hdf5 as a dict, read an hdf5 as a smash model object. Functions are provided by smash.io.hdf5_io.py. hdf5 can be opened in read-only to provide several simultaneous access. During the export or the reading, the structure of the dictionnary or object are preserved. When saving an object or a dictionnary in an hdf5, the location can be specified so that dictionary or object can be saved side by side at different places. + ----- Fixes ----- @@ -41,3 +56,8 @@ The boundary condition checking previously used a tolerance of 1e-6, which cause To address this problem, the tolerance has been decreased to 1e-3. See issue `#23 `__. + +Bug fixes when generating the l-curve. +************************************** + +Issues have been solved when selecting the optimal weight for the regularization term. From eccfbc49788375c665ab3e80fa033607759151f6 Mon Sep 17 00:00:00 2001 From: inoelloc Date: Thu, 8 Jun 2023 16:59:50 +0200 Subject: [PATCH 30/73] FIX PR: Regenerated baseline - Two tests have been modified as a result of changes in optimization regularizations --- smash/tests/baseline.hdf5 | Bin 2010235 -> 2010235 bytes smash/tests/diff_baseline.csv | 44 ++++++++++++++++------------------ 2 files changed, 20 insertions(+), 24 deletions(-) diff --git a/smash/tests/baseline.hdf5 b/smash/tests/baseline.hdf5 index b05da70047233e5253a151ae2f67ba8bda978253..45ba8322b6e9ef487f1401bdcf9a21d0951ed5b5 100644 GIT binary patch delta 461 zcma*dPbhtn!tJKXiZKMW{f*U!D~Csflo5FOEzY+@ieXc`x6 zVg5BKII#9ns1tQT1>>E9n{ihK8{oo+unD91?P4_ypMC@jz*MF@j8QFSs>h~PB2%j+ zEU1Gmt$ZRR(d*InIgV5_zo<_ufU$XZo{{^iaxC~*&&og_jsf3}vBJ5Q2wS_q0EXpF z+I{fvo?^Ii1aAVLS$0pTcL7F3Vk9QK3*_#-!zC^>n*9vy{I<<3Lr^uen~8UP&u^dV#p}+&C^-FY z5U)C8Xgg~#FA(zqG5>bfU;+Jp&fZ2w1`rULZkQzds$KoD01yiTu@Ddo1F;AYivqD2 y5Q_t`1Q1ICu@n$XZ&!aTlUc?%bNa+cS#`!6(|1P7zGoC~Ul}F4ePxu~r9c4l#IM}| diff --git a/smash/tests/diff_baseline.csv b/smash/tests/diff_baseline.csv index acd5672e..38a3adcf 100644 --- a/smash/tests/diff_baseline.csv +++ b/smash/tests/diff_baseline.csv @@ -1,12 +1,8 @@ -commit 18cc90156ca16b257692c48b7262d927ed4a8501 -Author: inoelloc -Date: Thu Apr 13 12:17:42 2023 +0200 +commit 48a1f075653b16adfaa4fcdaa40a90ecd5271ae8 +Author: Jay-Allemand Maxime +Date: Thu Jun 8 16:43:33 2023 +0200 - FIX: Lower the tolerance to 1e-5 in multiple_run assertion (probably parallel issue) - - - Modified - - * test_simu.py: in np.allclose change atol from 1e-6 to 1e-5 in test_multiple_run + complete the release not TEST NAME |STATUS ann_optimize_1.cost |NON MODIFIED @@ -20,8 +16,8 @@ bayes_optimize.cost |NON MODIFIED bbox_mesh.flwacc |NON MODIFIED bbox_mesh.flwdir |NON MODIFIED event_seg.arr |NON MODIFIED -gen_samples.nor |MODIFIED -gen_samples.uni |MODIFIED +gen_samples.nor |NON MODIFIED +gen_samples.uni |NON MODIFIED mesh_io.active_cell |NON MODIFIED mesh_io.area |NON MODIFIED mesh_io.code |NON MODIFIED @@ -37,18 +33,18 @@ mesh_io.nrow |NON MODIFIED mesh_io.path |NON MODIFIED mesh_io.xmin |NON MODIFIED mesh_io.ymax |NON MODIFIED -multiple_run.cost |ADDED -multiple_run.qsim |ADDED -mutiple_run.slc_1.cost |ADDED -mutiple_run.slc_1.qsim |ADDED -mutiple_run.slc_2.cost |ADDED -mutiple_run.slc_2.qsim |ADDED -mutiple_run.slc_3.cost |ADDED -mutiple_run.slc_3.qsim |ADDED -mutiple_run.slc_4.cost |ADDED -mutiple_run.slc_4.qsim |ADDED -mutiple_run.slc_5.cost |ADDED -mutiple_run.slc_5.qsim |ADDED +multiple_run.cost |NON MODIFIED +multiple_run.qsim |NON MODIFIED +mutiple_run.slc_1.cost |NON MODIFIED +mutiple_run.slc_1.qsim |NON MODIFIED +mutiple_run.slc_2.cost |NON MODIFIED +mutiple_run.slc_2.qsim |NON MODIFIED +mutiple_run.slc_3.cost |NON MODIFIED +mutiple_run.slc_3.qsim |NON MODIFIED +mutiple_run.slc_4.cost |NON MODIFIED +mutiple_run.slc_4.qsim |NON MODIFIED +mutiple_run.slc_5.cost |NON MODIFIED +mutiple_run.slc_5.qsim |NON MODIFIED net_init.bias_layer_1 |NON MODIFIED net_init.bias_layer_2 |NON MODIFIED net_init.bias_layer_3 |NON MODIFIED @@ -62,8 +58,8 @@ optimize.distributed_l-bfgs-b.cost |NON MODIFIED optimize.distributed_l-bfgs-b_bounds.cft |NON MODIFIED optimize.distributed_l-bfgs-b_bounds.cost |NON MODIFIED optimize.distributed_l-bfgs-b_bounds.cp |NON MODIFIED -optimize.distributed_l-bfgs-b_reg_fast.cost |NON MODIFIED -optimize.distributed_l-bfgs-b_reg_lcurve.cost |NON MODIFIED +optimize.distributed_l-bfgs-b_reg_fast.cost |MODIFIED +optimize.distributed_l-bfgs-b_reg_lcurve.cost |MODIFIED optimize.hyper-linear_l-bfgs-b.cost |NON MODIFIED optimize.hyper-polynomial_l-bfgs-b.cost |NON MODIFIED optimize.uniform_nelder-mead.cost |NON MODIFIED From 2848978981cdddba1c589f6ed8c68c72054d8058 Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Wed, 14 Jun 2023 13:11:56 +0200 Subject: [PATCH 31/73] - New function to read raster with gdal. This function will perform a spatial desagregation if necessary - Optimize a little bit the reading of the raster - Comment the code --- smash/core/_read_input_data.py | 34 +- smash/core/raster.py | 589 +++++++++++---------------------- 2 files changed, 194 insertions(+), 429 deletions(-) diff --git a/smash/core/_read_input_data.py b/smash/core/_read_input_data.py index 8c060a1b..96869e5c 100644 --- a/smash/core/_read_input_data.py +++ b/smash/core/_read_input_data.py @@ -22,32 +22,6 @@ from osgeo import gdal -def _read_windowed_raster(path: str, mesh: MeshDT) -> np.ndarray: - # ~ ds = gdal.Open(path) - - # ~ transform = ds.GetGeoTransform() - - # ~ xmin = transform[0] - # ~ ymax = transform[3] - # ~ xres = transform[1] - # ~ yres = -transform[5] - - # ~ col_off = (mesh.xmin - xmin) / xres - # ~ row_off = (ymax - mesh.ymax) / yres - - # ~ band = ds.GetRasterBand(1) - - # ~ nodata = band.GetNoDataValue() - - # ~ arr = band.ReadAsArray(col_off, row_off, mesh.ncol, mesh.nrow) - - # ~ arr = np.where(arr == nodata, -99, arr) - - arr = read_windowed_raster_gdal(filename=path, smash_mesh=mesh, band=1, lacuna=-99.) - - return arr - - def _read_qobs(setup: SetupDT, mesh: MeshDT, input_data: Input_DataDT): st = pd.Timestamp(setup.start_time) @@ -146,7 +120,7 @@ def _read_prcp(setup: SetupDT, mesh: MeshDT, input_data: Input_DataDT): else: matrix = ( - _read_windowed_raster(files[ind], mesh) * setup.prcp_conversion_factor + read_windowed_raster_gdal(filename=files[ind], smash_mesh=mesh, band=1, lacuna=-99.) * setup.prcp_conversion_factor ) if setup.sparse_storage: @@ -212,7 +186,7 @@ def _read_pet(setup: SetupDT, mesh: MeshDT, input_data: Input_DataDT): subset_date_range = date_range[ind_day] matrix = ( - _read_windowed_raster(files[ind], mesh) + read_windowed_raster_gdal(filename=files[ind], smash_mesh=mesh, band=1, lacuna=-99.) * setup.pet_conversion_factor ) @@ -257,7 +231,7 @@ def _read_pet(setup: SetupDT, mesh: MeshDT, input_data: Input_DataDT): else: matrix = ( - _read_windowed_raster(files[ind], mesh) + read_windowed_raster_gdal(filename=files[ind], smash_mesh=mesh, band=1, lacuna=-99.) * setup.pet_conversion_factor ) @@ -288,4 +262,4 @@ def _read_descriptor(setup: SetupDT, mesh: MeshDT, input_data: Input_DataDT): ) else: - input_data.descriptor[..., i] = _read_windowed_raster(path[0], mesh) + input_data.descriptor[..., i] = read_windowed_raster_gdal(filename=path[0], smash_mesh=mesh, band=1, lacuna=-99.) diff --git a/smash/core/raster.py b/smash/core/raster.py index e2300740..54b651f9 100644 --- a/smash/core/raster.py +++ b/smash/core/raster.py @@ -16,72 +16,6 @@ ### GDAL RASTER FUNCTIONS -def generate_polygon(bbox): - """ - Generates a list of coordinates: [[x1,y1],[x2,y2],[x3,y3],[x4,y4],[x1,y1]] - """ - return [[bbox[0],bbox[1]], - [bbox[2],bbox[1]], - [bbox[2],bbox[3]], - [bbox[0],bbox[3]], - [bbox[0],bbox[1]]] - - -def pol_to_bounding_box(pol): - """ - Receives list of coordinates: [[x1,y1],[x2,y2],...,[xN,yN]] - """ - arr = pol_to_np(pol) - return BoundingBox(np.min(arr[:,0]), - np.min(arr[:,1]), - np.max(arr[:,0]), - np.max(arr[:,1])) - - - -def xy_to_colrow(x, y, xmin, ymax, xres, yres): - - col = int((x - xmin) / xres) - row = int((ymax - y) / yres) - - return col, row - - -def colrow_to_xy(col, row, xmin, ymax, xres, yres): - - x = int(col * xres + xmin) - y = int(ymax - row * yres) - - return x, y - - - -def trim_zeros_2D(array, shift_value=False): - - for ax in [0, 1]: - - mask = ~(array == 0).all(axis=ax) - - inv_mask = mask[::-1] - - start_ind = np.argmax(mask) - - end_ind = len(inv_mask) - np.argmax(inv_mask) - - if ax == 0: - scol, ecol = start_ind, end_ind - array = array[:, start_ind:end_ind] - else: - srow, erow = start_ind, end_ind - array = array[start_ind:end_ind, :] - - if shift_value: - return array, scol, ecol, srow, erow - else: - return array - - - #just open the raster and return the dataset def gdal_raster_open(filename): @@ -133,15 +67,21 @@ def read_windowed_raster_gdal(filename: str, smash_mesh: MeshDT, band=None, lacu geotransform=gdal_get_geotransform(dataset) if (geotransform['xres'] != smash_mesh.dx) or (geotransform['yres'] != smash_mesh.dx): - new_dataset=gdal_reproject_raster(dataset,smash_mesh.dx,smash_mesh.dx) - dataset=new_dataset + + #Attempt to generate a smaller dataset before doing the reprojection. However, it is slower.. + # ~ window=gdal_smash_window_from_geotransform(geotransform,smash_mesh) + # ~ dataset=gdal.Translate('/vsimem/raster.tif', dataset, srcWin=[window['col_off'], window['row_off'], window["ncols"], window["nrows"]]) + + dataset=gdal_reproject_raster(dataset,smash_mesh.dx,smash_mesh.dx) + geotransform=gdal_get_geotransform(dataset) - #si mesh larger than window: window=1,1,all,all - #compute window of smash-mesh and get xoffset and y offsets => offsets + #Todo: + #If smash mesh larger than window: window=1,1,all,all + #compute window of smash-mesh and get x_offset and y_offsets => offsets #pass this window to gdal_crop_dataset_to_ndarray(dataset=dataset,window=window,offsets=offset) #position the rainfall inside the mesh grid according offset ! - window=gdal_smash_window_from_geotransform(dataset,smash_mesh) + window=gdal_smash_window_from_geotransform(geotransform,smash_mesh) if (band==None): array=gdal_crop_dataset_to_ndarray(dataset=dataset,window=window,lacuna=lacuna) @@ -150,208 +90,6 @@ def read_windowed_raster_gdal(filename: str, smash_mesh: MeshDT, band=None, lacu return array - - - -def gdal_get_geotransform(dataset): - """ - Getting the GeoTransform coeficients from a gdal object - - Parameters - ---------- - dataset : gdal object from gdal.Open() - - Returns - ---------- - geotransform : Python dictionnary - - # ~ A GeoTransform consists in a set of 6 coefficients: - # ~ GT(0) x-coordinate of the upper-left corner of the upper-left pixel. - # ~ GT(1) w-e pixel resolution / pixel width. - # ~ GT(2) row rotation (typically zero). - # ~ GT(3) y-coordinate of the upper-left corner of the upper-left pixel. - # ~ GT(4) column rotation (typically zero). - # ~ GT(5) n-s pixel resolution / pixel height (negative value for a north-up image). - - Examples - ---------- - dataset = gdal_raster_open(filename) - geotransform=gdal_get_geotransform(dataset) - """ - - transform = dataset.GetGeoTransform() - xmin = transform[0] - ymax = transform[3] - xres = transform[1] - yres = -transform[5] - geotransform={'xleft':xmin,'xres':xres, 'ytop':ymax, 'yres':yres} - return geotransform - - - -def gdal_smash_window_from_geotransform(dataset,smash_mesh): - """ - Compute the dataset array window according the Smash mesh - - Parameters - ---------- - dataset : gdal object from gdal.Open() - smash_mesh : Smash mesh object model.mesh - - Returns - ---------- - window : Python dictionnary with componnents: row_off (offset), col_off (offset), nrows, ncols - - Examples - ---------- - window=gdal_smash_window_from_geotransform(dataset,smash_mesh) - """ - geotransform=gdal_get_geotransform(dataset) - - col_off = (smash_mesh.xmin - geotransform['xleft']) / geotransform['xres'] - row_off = (geotransform['ytop'] - smash_mesh.ymax) / geotransform['yres'] - - window={"row_off":row_off,'col_off':col_off,'nrows':int(smash_mesh.nrow*smash_mesh.dx/geotransform['yres']),'ncols':int(smash_mesh.ncol*smash_mesh.dx/geotransform['xres'])} - - return window - - -def union_bbox(bbox1,bbox2): - """ - Function which compute the bounding boxes union of 2 input bbox. It return the working bbox - - Parameters - ---------- - bbox1: dict containin the first bbox informations - bbox2 : dict containin the second bbox informations - ---------- - returns - dic containing the bbox union - - Examples - ---------- - dataset=gdal_raster_open(filename) - possible_bbox=union_bbox(bbox,bbox_dataset) - """ - left=max(bbox1['left'],bbox2['left']) - bottom=max(bbox1['bottom'],bbox2['bottom']) - right=min(bbox1['right'],bbox2['right']) - top=min(bbox1['top'],bbox2['top']) - if (leftgeotransform['xres','yres'] ? i.e use ceiling or floor function instead of int ? + #At least it work for case smash_mesh.dx<=geotransform['xres','yres'] which is the moste common case for modelling. New_X_Size=int(dataset.RasterXSize*geotransform['xres']/xres) New_Y_Size=int(dataset.RasterYSize*geotransform['yres']/yres) + in_memory_dataset=gdal.GetDriverByName('MEM') + virtual_destination=in_memory_dataset.Create('',New_X_Size, New_Y_Size, dataset.RasterCount, dataset.GetRasterBand(1).DataType) + ########################################################### #Workaround for gdal bug which initialise array to 0 instead as the No_Data value #Here we initialise the band manually with the nodata_value - band=virtual_destination.GetRasterBand(1) + band=virtual_destination.GetRasterBand(1) #Notice that band is a pointer to virtual_destination band.SetNoDataValue(-9999) Nodataarray = np.ndarray(shape=(New_Y_Size,New_X_Size)) Nodataarray.fill(-9999.0) band.WriteArray(Nodataarray) ########################################################### + virtual_destination.SetGeoTransform(new_dataset_geotranform) virtual_destination.SetProjection(dataset_projection) - gdal.ReprojectImage( dataset, virtual_destination, dataset_projection, dataset_projection, gdal.GRA_NearestNeighbour) + gdal.ReprojectImage( dataset, virtual_destination, dataset_projection, dataset_projection, gdal.GRA_NearestNeighbour,WarpMemoryLimit=500.) + #WarpMemoryLimit=500. would probably increase the speed... but ... #https://gdal.org/programs/gdalwarp.html + #choice are : gdal.GRA_NearestNeighbour, gdal.GRA_Mode, gdal.GRA_Average ... Not tested https://gdal.org/api/gdalwarp_cpp.html#_CPPv4N15GDALResampleAlg11GRA_AverageE + #Use osgeo.gdal.Warp instead of ReprojectImage offer much more option like multithreading ? https://gdal.org/api/python/osgeo.gdal.html#osgeo.gdal.Warp return virtual_destination - #simply slice an array according a window def gdal_crop_dataset_to_array(dataset=object(),window={},band=1,lacuna=None): """ @@ -423,13 +168,16 @@ def gdal_crop_dataset_to_array(dataset=object(),window={},band=1,lacuna=None): window=gdal_smash_window_from_geotransform(dataset,smash_mesh) array=gdal_crop_dataset_to_array(dataset,window,band=1) """ - - sliced_array=dataset.GetRasterBand(band).ReadAsArray(window['col_off'], window['row_off'], window["ncols"], window["nrows"]) + + dataset_band=dataset.GetRasterBand(band) + + sliced_array=dataset_band.ReadAsArray(window['col_off'], window['row_off'], window["ncols"], window["nrows"]) array_float=sliced_array.astype('float64') + #Lacuna treatment here - Nodata=dataset.GetRasterBand(band).GetNoDataValue() - if (lacuna!=None): + if (isinstance(lacuna,float)): + Nodata=dataset_band.GetNoDataValue() mask=np.where(sliced_array==Nodata) array_float[mask]=lacuna @@ -457,14 +205,20 @@ def gdal_crop_dataset_to_ndarray(dataset=object(),window={},lacuna=None): window=gdal_smash_window_from_geotransform(dataset,smash_mesh) array=gdal_crop_dataset_to_array(dataset,window) """ + dictionnary={} - for index in range(1,dataset.RasterCount+1): - sliced_array=dataset.GetRasterBand(index).ReadAsArray(window['col_off'], window['row_off'], window["ncols"], window["nrows"]) + nb_dataset=dataset.RasterCount + for index in range(1,nb_dataset+1): + + dataset_band=dataset.GetRasterBand(index) + + sliced_array=dataset_band.ReadAsArray(window['col_off'], window['row_off'], window["ncols"], window["nrows"]) array_float=sliced_array.astype('float64') + #Lacuna treatment here - Nodata=dataset.GetRasterBand(index).GetNoDataValue() - if (lacuna!=None): + if (isinstance(lacuna,float)): + Nodata=dataset_band.GetNoDataValue() mask=np.where(sliced_array==Nodata) array_float[mask]=lacuna @@ -513,168 +267,205 @@ def gdal_write_dataset(filename,dataset,format='Gtiff'): dst_ds=None - - -### ASCII GRID Functions - - -def init_asciigrid(source={}): +def gdal_get_geotransform(dataset): """ - initialise a empty asciigrid dictionnary + Getting the GeoTransform coeficients from a gdal object Parameters ---------- - none - + dataset : gdal object from gdal.Open() + Returns ---------- - dict, with default properties of a asciigrid + geotransform : Python dictionnary + # ~ A GeoTransform consists in a set of 6 coefficients: + # ~ GT(0) x-coordinate of the upper-left corner of the upper-left pixel. + # ~ GT(1) w-e pixel resolution / pixel width. + # ~ GT(2) row rotation (typically zero). + # ~ GT(3) y-coordinate of the upper-left corner of the upper-left pixel. + # ~ GT(4) column rotation (typically zero). + # ~ GT(5) n-s pixel resolution / pixel height (negative value for a north-up image). + + Examples + ---------- + dataset = gdal_raster_open(filename) + geotransform=gdal_get_geotransform(dataset) """ - if (source.__len__()==0): - - asciigrid = {} - asciigrid["ncols"] = 0 - asciigrid["nrows"] = 0 - asciigrid["xllcorner"] = 0.0 - asciigrid["yllcorner"] = 0.0 - asciigrid["cellsize"] = 0.0 - asciigrid["NODATA_value"] = -99.0 - asciigrid["data"] = np.full(shape=(0,0),fill_value=-99.0) - asciigrid["extend"] = [0.0, 0.0, 0.0, 0.0] - else: - asciigrid=source.copy() - asciigrid["data"] = np.full(shape=(source['nrows'],source['ncols']),fill_value=source['NODATA_value']) + transform = dataset.GetGeoTransform() + geotransform={'xleft':transform[0],'xres':transform[1], 'ytop':transform[3], 'yres':-transform[5]} - return asciigrid + return geotransform -def read_asciigrid(filename): +def gdal_smash_window_from_geotransform(geotransform,smash_mesh): """ - Read an asciigrid file + Compute the dataset array window (from the geotransform) according the Smash mesh Parameters ---------- - filename: path to the file - + geotransform : geotransform computed from a gdal dataset + smash_mesh : Smash mesh object model.mesh + Returns ---------- - dict, containing the asciigrid, data and its properties + window : Python dictionnary with componnents: row_off (offset), col_off (offset), nrows, ncols + Examples + ---------- + dataset = gdal_raster_open(filename) + geotransform=gdal_get_geotransform(dataset) + window=gdal_smash_window_from_geotransform(geotransform,smash_mesh) """ - asciigrid = init_asciigrid() - if os.path.exists(filename): - with open(filename, 'r') as input_file: - header = input_file.readlines()[:6] - header = [item.strip().split()[1] for item in header] - - asciigrid["ncols"] = int(header[0]) - asciigrid["nrows"] = int(header[1]) - asciigrid["xllcorner"] = float(header[2]) - asciigrid["yllcorner"] = float(header[3]) - asciigrid["cellsize"] = float(header[4]) - asciigrid["NODATA_value"] = float(header[1]) - asciigrid["data"] = np.loadtxt(filename, dtype=float, skiprows=6) - asciigrid["extend"] = [ - asciigrid["xllcorner"], asciigrid["xllcorner"] + asciigrid["ncols"] * asciigrid["cellsize"], - asciigrid["yllcorner"], asciigrid["yllcorner"] + asciigrid["nrows"] * asciigrid["cellsize"]] - else: - print(filename + " does not exist") - return asciigrid - + col_off = (smash_mesh.xmin - geotransform['xleft']) / geotransform['xres'] + row_off = (geotransform['ytop'] - smash_mesh.ymax) / geotransform['yres'] + + #If smash_mesh.dx==geotransform['xres','yres'] no problem ! + #It works for case : smash_mesh.dx!=geotransform['xres','yres'] + #Do we must distinguish case smash_mesh.dx<=geotransform['xres','yres'] and smash_mesh.dx>geotransform['xres','yres'] ? i.e use ceiling or floor function instead of int ? + + window={"row_off":row_off,'col_off':col_off,'nrows':int(smash_mesh.nrow*smash_mesh.dx/geotransform['yres']),'ncols':int(smash_mesh.ncol*smash_mesh.dx/geotransform['xres'])} + + return window + -def set_asciigrid(data, ncols, nrows, xllcorner, yllcorner, cellsize, NODATA_value): +def union_bbox(bbox1,bbox2): """ - Set an asciigrid with data + Function which compute the bounding boxes union of 2 input bbox. It return the working bbox Parameters ---------- - data: numpy array containing the data (a matrix) - ncols : integer, number of column - nrows : integer, number of rows - xllcorner : latitue coordinate of the lower left corner - yllcorner : longitude coordinate of the lower left corner - cellsize : cell resolution - NODATA_value : value of the non-data value + bbox1: dict containin the first bbox informations + bbox2 : dict containin the second bbox informations + ---------- + returns + dic containing the bbox union - Returns + Examples ---------- - dict, containing the asciigrid, data and its properties + dataset=gdal_raster_open(filename) + possible_bbox=union_bbox(bbox,bbox_dataset) + """ + left=max(bbox1['left'],bbox2['left']) + bottom=max(bbox1['bottom'],bbox2['bottom']) + right=min(bbox1['right'],bbox2['right']) + top=min(bbox1['top'],bbox2['top']) + if (left Date: Wed, 14 Jun 2023 13:15:27 +0200 Subject: [PATCH 32/73] Add functions for testing the branch --- testing/functions_smash_plot.py | 784 ++++++++++++++++++++++++++++++++ testing/functions_smash_time.py | 181 ++++++++ testing/test-desag_rainfall.py | 57 +++ 3 files changed, 1022 insertions(+) create mode 100644 testing/functions_smash_plot.py create mode 100644 testing/functions_smash_time.py create mode 100644 testing/test-desag_rainfall.py diff --git a/testing/functions_smash_plot.py b/testing/functions_smash_plot.py new file mode 100644 index 00000000..ca91513d --- /dev/null +++ b/testing/functions_smash_plot.py @@ -0,0 +1,784 @@ +import matplotlib.pyplot as plt +import numpy as np +import math +import datetime +import smash +import h5py +import os +import pandas as pd + + +import matplotlib +from matplotlib import cm +from matplotlib.colors import ListedColormap, LinearSegmentedColormap + +from functions_smash_time import * + + +def plot_discharges(model,title="",figname="",columns=[],xlim=[None,None],ylim=[None,None],linewidth=1.5,legend=True,color=["black","grey","blue"],plot_rainfall=True,plot=None): + + #manage date here + #compute date_range + date_deb=datetime.datetime.fromisoformat(model.setup.start_time)+duration_to_timedelta(int(model.setup.dt)) + date_end=datetime.datetime.fromisoformat(model.setup.end_time)+duration_to_timedelta(int(model.setup.dt)) + date_range=[date_deb,date_end,model.setup.dt] + + # ~ plot=plt.subplots() + if plot is None: + if plot_rainfall: + fig, (ax1, ax2) = plt.subplots(2, 1,height_ratios=[1, 4]) + fig.subplots_adjust(hspace=0) + plot=[fig, ax2, ax1] + else: + fig, ax2 = plt.subplots() + plot=[fig, ax2] + else: + if plot_rainfall: + fig=plot[0] + ax1=plot[2] + ax2=plot[1] + else: + fig=plot[0] + ax2=plot[1] + + plot=plot_time_vars(model.input_data.qobs,columns=[0],title=title,label="Observations at "+model.mesh.code[0],xlabel="Time step",dt=0,ylabel="Discharge $(m^3/s)$",figname=figname,color=color[0],linestyle="--",linewidth=linewidth,xlim=xlim,ylim=ylim,legend=legend,date_range=date_range,plot=plot) + + plot=plot_time_vars(model.output.qsim,columns=[0],title=title,label="Simulation at "+model.mesh.code[0],xlabel="Time step",dt=0,ylabel="Discharge $(m^3/s)$",figname=figname,color=color[1],linestyle="-",linewidth=linewidth,legend=legend,xlim=xlim,ylim=ylim,date_range=date_range,plot=plot) + + + xtics = np.arange(np.datetime64(date_range[0]),np.datetime64(date_range[1]), np.timedelta64(int(date_range[2]), 's')) + + if plot_rainfall: + + ax1.bar(xtics,model.input_data.mean_prcp[0,:],label="Average rainfall (mm)") + ax1.invert_yaxis() + ax1.grid(alpha=.7, ls="--") + ax1.get_xaxis().set_visible(False) + ax1.set_ylim(bottom=1.2*max(model.input_data.mean_prcp[0,:])) + ax1.set_ylabel('Average rainfall (mm)') + + if legend: + ax1.legend(loc='upper right') + else: + ax1.legend(loc='upper right').set_visible(False) + + plot=[fig, ax2, ax1] + else: + plot=[fig, ax2] + + return plot + + + + + +def plot_discharges_hdf5(hdf5,title="",figname="",columns=[],xlim=[None,None],ylim=[None,None],linewidth=1.5,legend=True,color=["black","grey","blue"],plot_rainfall=True,plot=None): + + #manage date here + #compute date_range + date_deb=datetime.datetime.fromisoformat(hdf5["setup"].attrs["start_time"])+duration_to_timedelta(int(hdf5["setup"].attrs["dt"])) + date_end=datetime.datetime.fromisoformat(hdf5["setup"].attrs["end_time"])+duration_to_timedelta(int(hdf5["setup"].attrs["dt"])) + date_range=[date_deb,date_end,hdf5["setup"].attrs["dt"]] + + # ~ plot=plt.subplots() + if plot is None: + if plot_rainfall: + fig, (ax1, ax2) = plt.subplots(2, 1,height_ratios=[1, 4]) + fig.subplots_adjust(hspace=0) + plot=[fig, ax2, ax1] + else: + fig, ax2 = plt.subplots() + plot=[fig, ax2] + else: + if plot_rainfall: + fig=plot[0] + ax1=plot[2] + ax2=plot[1] + else: + fig=plot[0] + ax2=plot[1] + + plot=plot_time_vars(hdf5["input_data/qobs"][:,:],columns=[0],title=title,label="Observations at "+hdf5["mesh/code"][0].decode(),xlabel="Time step",dt=0,ylabel="Discharge $(m^3/s)$",figname=figname,color=color[0],linestyle="--",linewidth=linewidth,xlim=xlim,ylim=ylim,legend=legend,date_range=date_range,plot=plot) + + plot=plot_time_vars(hdf5["output/qsim"][:,:],columns=[0],title=title,label="Simulation at "+hdf5["mesh/code"][0].decode(),xlabel="Time step",dt=0,ylabel="Discharge $(m^3/s)$",figname=figname,color=color[1],linestyle="-",linewidth=linewidth,legend=legend,xlim=xlim,ylim=ylim,date_range=date_range,plot=plot) + + + xtics = np.arange(np.datetime64(date_range[0]),np.datetime64(date_range[1]), np.timedelta64(int(date_range[2]), 's')) + + if plot_rainfall: + + ax1.bar(xtics,hdf5["input_data/mean_prcp"][0,:],label="Average rainfall (mm)") + ax1.invert_yaxis() + ax1.grid(alpha=.7, ls="--") + ax1.get_xaxis().set_visible(False) + ax1.set_ylim(bottom=1.2*max(hdf5["input_data/mean_prcp"][0,:])) + ax1.set_ylabel('Average rainfall (mm)') + + if legend: + ax1.legend(loc='upper right') + else: + ax1.legend(loc='upper right').set_visible(False) + + plot=[fig, ax2, ax1] + else: + plot=[fig, ax2] + + return plot + + + + +def plot_results_assim(res,title="",figname="",columns=[0],xlim=[None,None],linewidth=1.5,linestyle="-",plot=None): + + if plot is None: + plot=[None,None] + + + color = matplotlib.colormaps['gist_rainbow'] + color = matplotlib.colormaps['prism'] + sampling=np.arange(len(res))/len(res) + i=0 + + for key,values in res.items(): + + date_deb=datetime.datetime.fromisoformat(values["setup"].get('start_time'))+duration_to_timedelta([int(values["setup"].get('dt')),'s']) + date_end=datetime.datetime.fromisoformat(values["setup"].get('end_time'))+duration_to_timedelta(int(values["setup"].get('dt'))) + date_range=[date_deb.strftime("%Y-%m-%d %H:%M"),date_end.strftime("%Y-%m-%d %H:%M"),int(values["setup"].get('dt'))] + + plot=plot_time_vars(values["output"].get('qsim'),columns=columns,title="",xlabel="Time step",dt=0,ylabel="Discharge $(m^3/s)$",figname=figname,color=color(sampling[i]),linestyle=linestyle,linewidth=linewidth,legend=True,xlim=[None,None],ylim=[None,None],date_range=date_range,plot=plot) + + len_sim=values["output"]['qsim'].shape[1]-1 + y=values["output"]['qsim'][columns,len_sim] + x=datetime.datetime.fromisoformat(values["setup"].get('end_time')) + + fig=plot[0] + ax=plot[1] + ax.plot(x,y,marker='o',markersize=12,color=color(sampling[i])) + + i=i+1 + + plot=[fig,ax] + + # ~ date_start=datetime.datetime.fromisoformat(values["setup"].get('end_time')) + # ~ date_range=[date_start.strftime("%Y-%m-%d %H:%M"),date_end.strftime("%Y-%m-%d %H:%M"),int(values["setup"].get('dt'))] + + # ~ plot=plot_time_vars(y,title="",label="" + key,xlabel="Time step",dt=0,ylabel="Discharge $(m^3/s)$",figname=figname,marker='o',markersize=8,legend=False,xlim=[None,None],ylim=[None,None],date_range=date_range,plot=plot) + + return plot + + + +def plot_results_assim_hdf5(hdf5,title="",figname="",columns=[0],xlim=[None,None],linewidth=1.5,linestyle="-",marker="o",plot=None): + + if plot is None: + plot=[None,None] + + + color = matplotlib.colormaps['gist_rainbow'] + color = matplotlib.colormaps['prism'] + sampling=np.arange(len(list(hdf5.keys())))/len(list(hdf5.keys())) + i=0 + + for key in list(hdf5.keys()): + + date_deb=datetime.datetime.fromisoformat(hdf5[f"{key}/setup"].attrs['start_time'])+duration_to_timedelta([int(hdf5[f"{key}/setup"].attrs['dt']),'s']) + date_end=datetime.datetime.fromisoformat(hdf5[f"{key}/setup"].attrs['end_time'])+duration_to_timedelta(int(hdf5[f"{key}/setup"].attrs['dt'])) + date_range=[date_deb.strftime("%Y-%m-%d %H:%M"),date_end.strftime("%Y-%m-%d %H:%M"),int(hdf5[f"{key}/setup"].attrs['dt'])] + + plot=plot_time_vars(hdf5[f"{key}/output/qsim"][:,:],columns=columns,title="",xlabel="Time step",dt=0,ylabel="Discharge $(m^3/s)$",figname=figname,color=color(sampling[i]),linestyle=linestyle,linewidth=linewidth,legend=True,xlim=[None,None],ylim=[None,None],date_range=date_range,plot=plot) + + len_sim=hdf5[f"{key}/output/qsim"][:,:].shape[1]-1 + y=hdf5[f"{key}/output/qsim"][:,:][columns,len_sim] + x=datetime.datetime.fromisoformat(hdf5[f"{key}/setup"].attrs['end_time']) + + fig=plot[0] + ax=plot[1] + ax.plot(x,y,marker=marker,markersize=12,color=color(sampling[i])) + + i=i+1 + + plot=[fig,ax] + return plot + + + + +def plot_results_warmup(result_warmup,title="",figname="",columns=[0],linewidth=1.5,linestyle="-",plot=None): + + if plot is None: + plot=[None,None] + + for key,values in result_warmup.items(): + + date_deb=datetime.datetime.fromisoformat(values["setup"].get('start_time'))+duration_to_timedelta(int(values["setup"].get('dt'))) + date_end=datetime.datetime.fromisoformat(values["setup"].get('end_time'))+duration_to_timedelta(int(values.get('dt'))) + date_range=[date_deb.strftime("%Y-%m-%d %H:%M"),date_end.strftime("%Y-%m-%d %H:%M"),int(values["setup"].get('dt'))] + + plot=plot_time_vars(values["output"].get('qsim'),columns=columns,title="",label="W.up for t=" + key,xlabel="Time step",dt=0,ylabel="Discharge $(m^3/s)$",figname=figname,color=color(sampling[i]),linestyle=linestyle,linewidth=linewidth,legend=False,xlim=[None,None],ylim=[None,None],date_range=date_range,plot=plot) + + return plot + + +def plot_results_forecast(result_forecast,title="",figname="",columns=[0],linewidth=1.5,linestyle="-",plot=None): + + if plot is None: + plot=[None,None] + + # ~ color = matplotlib.colormaps['gist_rainbow'] + color = matplotlib.colormaps['prism'] + sampling=np.arange(len(result_forecast))/len(result_forecast) + i=0 + + for key,values in result_forecast.items(): + + date_deb=datetime.datetime.fromisoformat(values["setup"].get('start_time'))+duration_to_timedelta(int(values["setup"].get('dt'))) + date_end=datetime.datetime.fromisoformat(values["setup"].get('end_time'))+duration_to_timedelta(int(values["setup"].get('dt'))) + date_range=[date_deb.strftime("%Y-%m-%d %H:%M"),date_end.strftime("%Y-%m-%d %H:%M"),int(values["setup"].get('dt'))] + + plot=plot_time_vars(values["output"].get('qsim'),columns=columns,title="",xlabel="Time step",dt=0,ylabel="Discharge $(m^3/s)$",figname=figname,color=color(sampling[i]),linestyle=linestyle,linewidth=linewidth,legend=True,xlim=[None,None],ylim=[None,None],date_range=date_range,plot=plot) + + # ~ len_sim=values["output"]['qsim'].shape[1]-1 + y=values["output"]['qsim'][columns,0] + # ~ x=datetime.datetime.fromisoformat(values["setup"].get('start_time')) + x=date_deb + + fig=plot[0] + ax=plot[1] + ax.plot(x,y,marker='X',markersize=12,color=color(sampling[i])) + + i=i+1 + + plot=[fig,ax] + + return plot + + + + +def plot_time_vars(data,title="",label="",xlabel="",ylabel="",figname="",step=1,columns=[],dx=1.,dt=0.,xlim=[None,None],ylim=[None,None],color="black",linestyle="-",linewidth=1.5,marker='',markersize=4,legend=True,xtics=[],date_range=None,plot=[None,None]): + + + if ((plot[0]!=None) & (plot[1]!=None)): + fig=plot[0] + ax=plot[1] + else: + fig,ax=plt.subplots() + + if (title!=""): ax.set_title(title) + if (xlabel!=""): ax.axes.set_xlabel(xlabel) + if (ylabel!=""): ax.axes.set_ylabel(ylabel) + + if (len(xtics)==0): + xtics=np.arange(0,data.shape[1]) + if (dt>0): + xtics=xtics*dt + + if date_range is not None: + xtics = np.arange(np.datetime64(date_range[0]),np.datetime64(date_range[1]), np.timedelta64(int(date_range[2]), 's')) + + if (len(columns)>0): + for i in columns: + ax.plot(xtics[:],data[i,:],color=color,label=label,ls=linestyle,lw=linewidth,marker=marker,markersize=markersize) + else: + for i in range(0,data.shape[0],step): + ax.plot(xtics[:],data[i,:],label=label,ls=linestyle,lw=linewidth,marker=marker,markersize=markersize) + + if (ylim[0]!=None): + ax.set_ylim(bottom=ylim[0]) + if (ylim[1]!=None): + ax.set_ylim(top=ylim[1]) + if (xlim[0]!=None): + ax.set_xlim(left=xlim[0]) + if (xlim[1]!=None): + ax.set_xlim(right=xlim[1]) + + ax.axes.grid(True,alpha=.7, ls="--") + if (legend): + ax.legend(loc='upper left') + else: + ax.legend(loc='upper left').set_visible(False) + + if (len(figname)>0): + fig.savefig(figname, transparent=False, dpi=80, bbox_inches="tight") + # ~ else: + # ~ fig.show() + + plot=[fig,ax] + + return plot + + + +def save_figure(fig,figname="myfigure",xsize=8,ysize=6,transparent=False,dpi=80): + fig.set_size_inches(xsize, ysize, forward=True) + fig.savefig(figname, transparent=transparent, dpi=dpi, bbox_inches="tight") + + +def save_figure_from_plot(plot,figname="myfigure",xsize=8,ysize=6,transparent=False,dpi=80,xlim=[None,None],ylim=[None,None]): + + fig=plot[0] + ax=plot[1] + + if (ylim[0]!=None): + ax.set_ylim(bottom=ylim[0]) + if (ylim[1]!=None): + ax.set_ylim(top=ylim[1]) + if (xlim[0]!=None): + ax.set_xlim(left=xlim[0]) + if (xlim[1]!=None): + ax.set_xlim(right=xlim[1]) + + fig.set_size_inches(xsize, ysize, forward=True) + fig.savefig(figname, transparent=transparent, dpi=dpi, bbox_inches="tight") + + + +def plot_matrix(matrix,mask=None,figname="",title="",label="",vmin=None,vmax=None): + + fig, ax = plt.subplots() + ax.set_title(title) + + if mask is not None: + ma = (mask == 0) + ma_var = np.where(ma, np.nan, matrix) + else: + ma_var=matrix + + map_var = ax.imshow(ma_var,vmin=vmin,vmax=vmax); + fig.colorbar(map_var, ax=ax, label=label,shrink=0.75); + + plot=[fig,ax] + + if (len(figname)>0): + fig.savefig(figname, transparent=False, dpi=80, bbox_inches="tight") + else: + fig.show() + + plot=[fig,ax] + + return plot + + + +def plot_image(matrice=np.zeros(shape=(2,2)),bbox=None,title="",xlabel="",ylabel="",zlabel="",vmin=None,vmax=None,mask=None,figname=""): + """ + Function for plotting a matrix as an image + + Parameters + ---------- + matrice : numpy array + bbox : ["left","right","bottom","top"] bouding box to put x and y coordinates instead of the shape of the matrix + title : character, title of the plot + xlabel : character, label of the xaxis + ylabel : character, label of the y axis + zlabel : character, label of the z axis + vmin: real, minimum z value + vmax: real, maximum z value + mask: integer, matrix, shape of matice, contain 0 for pixels that should not be plotted + show: booloen, true call fig.show() or false return fig instead. + + Examples + ---------- + smash.utils.plot_image(mesh_france['drained_area'],bbox=bbox,title="Surfaces drainées",xlabel="Longitude",ylabel="Latitude",zlabel="Surfaces drainées km^2",vmin=0.0,vmax=1000,mask=mesh_france['global_active_cell']) + + """ + + matrice=np.float32(matrice) + + if (type(bbox)!=type(None)): + extend=[bbox["left"],bbox["right"],bbox["bottom"],bbox["top"]]#bbox.values() + else: + extend=None + + if (type(mask)!=type(None)): + matrice[np.where(mask==0)]=np.nan + + # ~ color_matrice=matrice + # ~ if vmax!=None: + # ~ color_matrice[np.where(matrice>vmax)]=vmax + # ~ if vmin!=None: + # ~ color_matrice[np.where(matrice0): + fig.savefig(figname, transparent=False, dpi=80, bbox_inches="tight") + else: + fig.show() + + + + + + +def plot_model_params_and_states(model,variables,fstates=False): + + if not isinstance(variables, list): + raise ValueError( + f"variables '{variables}' must be list of parameters or states names" + ) + + nb_subplot=len(variables) + if (nb_subplot>1): + nb_rows=math.ceil(math.sqrt(nb_subplot)) + nb_cols=math.ceil(nb_subplot/nb_rows) + #nb_cols=nb_subplot- math.floor(math.sqrt(nb_subplot)) + else: + nb_rows=1 + nb_cols=1 + + print(nb_rows,nb_cols) + fig, ax = plt.subplots(nb_rows, nb_cols) + + if len(variables)==1: + ax = [ax] + + fig.suptitle(f'Optimized parameter set') + + for i,var in enumerate(variables): + + rr=(i+1)/(nb_cols) + part_entiere=math.floor(rr) + part_reel=rr-part_entiere + + if part_reel>0: + r=max(0,part_entiere) + else: + r=max(0,part_entiere-1) + + if (part_reel==0.): + c=nb_cols-1 + else: + c=math.ceil((part_reel)*(nb_cols))-1 + + #r=math.ceil(i/(nb_cols)) + #c=(r*nb_cols-i) + print(i,r,c) + + if isinstance(model,dict): + + for key,list_param in smash.core._constant.STRUCTURE_PARAMETERS.items(): + + if var in list_param: + + values=model["parameters"][var] + break + + for key,list_states in smash.core._constant.STRUCTURE_STATES.items(): + + if var in list_states: + + if fstates==True : + values=model["output"]["fstates"][var] + else: + values=model["states"][var] + + break + ma = (model["mesh"]["active_cell"] == 0) + + else: + + if var in model.setup._parameters_name: + + values=getattr(model.parameters,var) + + if var in model.setup._states_name: + + if fstates: + values=getattr(model.output.states,var) + else: + values=getattr(model.states,var) + + ma = (model.mesh.active_cell == 0) + + ma_var = np.where(ma, np.nan, values) + + map_var = ax[r,c].imshow(ma_var); + fig.colorbar(map_var, ax=ax[r,c], label=var,shrink=0.75); + + plot=[fig,ax] + return plot + + + +def plot_lcurve(instance,figname=None,transform=False,annotate=True,plot=None): + + if not isinstance(instance,dict): + raise ValueError( + f"instance must be a dict" + ) + + if plot is not None: + fig=plot[0] + ax=plot[1] + else: + fig,ax=plt.subplots() + + if "wjreg_lcurve_opt" in instance: + pass + else: + return plot + + if (transform==True): + + jobs_max=np.zeros(shape=len(instance["cost_jobs"])) + jobs_max[:]=instance["cost_jobs_initial"] + + jobs_max[:]=max(instance["cost_jobs"]) + + jobs_min=min(instance["cost_jobs"]) + jreg_max=max(instance["cost_jreg"]) + + #index_min=np.where(instance["cost_jobs"] == jobs_min) + + #choose the lower value of jreg if index_min has many values + #index_jreg_max=list(instance["cost_jreg"]).index(min(instance["cost_jreg"][index_min[0]])) + #jreg_max=instance["cost_jreg"][index_jreg_max] + + + jreg_min=np.zeros(shape=len(instance["cost_jreg"])) + #jreg_min[:]=instance["cost_jreg_initial"] + #si cost_jreg_initial > 0 then prendre : + jreg_min[:]=min(instance["cost_jreg"]) + + go_plot=False + if (np.all((jobs_max[0]-jobs_min)>0.)) and (np.all((jreg_max-jreg_min[0])>0.)): + xs=(jobs_max-instance["cost_jobs"])/(jobs_max[0]-jobs_min) + ys=(instance["cost_jreg"]-jreg_min)/(jreg_max-jreg_min[0]) + go_plot=True + + + # ~ if (np.all((jreg_max-jreg_min[0])>0.)): + # ~ ys=(instance["cost_jreg"]-jreg_min)/(jreg_max-jreg_min[0]) + + # ~ #plot lcurve + if (go_plot): + ax.plot(xs,ys, ls="--", marker="x", color="grey"); + + # zip joins x and y coordinates in pairs + i=0 + for x,y in zip(xs,ys): + + label="" + textcolor="black" + point_type="." + ax.plot(x,y, color=textcolor,marker=point_type,markersize=5); + + if (instance["wjreg"][i]==instance["wjreg_lcurve_opt"]): + textcolor="red" + point_type="o" + ax.plot(x,y, color=textcolor,marker=point_type,markersize=8); + + label = "{:.2E}".format(instance["wjreg_lcurve_opt"]) + + #print(instance["wjreg"][i],instance["wjreg_fast"]) + + go_plot=False + if (instance["wjreg"][i]==instance["wjreg_fast"]): + go_plot=True + elif (abs(1.-instance["wjreg"][i]/instance["wjreg_fast"])<0.0001): + go_plot=True + + if (go_plot) : + textcolor="green" + point_type="^" + ax.plot(x,y, color=textcolor,marker=point_type,markersize=8); + + if annotate: + ax.annotate(label, # this is the text + (x,y), # these are the coordinates to position the label + textcoords="offset points", # how to position the text + xytext=(0,5), # distance from text to points (x,y) + ha='right', # horizontal alignment can be left, right or center + color=textcolor, fontsize=10) + + i=i+1 + + ax.plot([0,1],[0,1],color="red") + + else: + + ax.plot(instance["cost_jobs"],instance["cost_jreg"], ls="--", marker="x",color="grey"); + + # zip joins x and y coordinates in pairs + i=0 + for x,y in zip(instance["cost_jobs"],instance["cost_jreg"]): + + label = "{:.2E}".format(instance["wjreg"][i]) + textcolor="black" + + if (instance["wjreg"][i]==instance["wjreg_lcurve_opt"]): + textcolor="red" + + if (abs(1.-instance["wjreg"][i]/instance["wjreg_fast"])<0.0001): + textcolor="green" + + if annotate: + ax.annotate(label, # this is the text + (x,y), # these are the coordinates to position the label + textcoords="offset points", # how to position the text + xytext=(0,5), # distance from text to points (x,y) + ha='right', # horizontal alignment can be left, right or center + color=textcolor, fontsize=10) + + i=i+1 + + + ax.set_xlabel("(jobs_max-jobs)/(jobs_max_jobs_min)"); + ax.set_ylabel("(jreg-jreg_min)/(jreg_max-jreg_min)"); + + if figname is not None: + fig.savefig(figname, transparent=False, dpi=80, bbox_inches="tight") + + plot=[fig,ax] + return plot + + +def plot_dist_wjreg(res_assim): + + fig,ax=plt.subplots() + x=list() + y=list() + for key,values in res_assim.items(): + lcurve=values["lcurve"] + color="black" + point_type="." + markersize=6 + if (lcurve["wjreg_lcurve_opt"] is not None) and (lcurve["wjreg_fast"] is not None) and (lcurve["wjreg_lcurve_opt"] >0.) and (lcurve["wjreg_fast"] >0.): + x.append(float(lcurve["wjreg_lcurve_opt"])) + y.append(float(lcurve["wjreg_fast"])) + + xn=np.log(np.array(x)) + yn=np.log(np.array(y)) + + ax.scatter(np.array(xn),np.array(yn), color=color,marker=point_type) + ax.set_xlabel("log(wjreg) - Lcurve method"); + ax.set_ylabel("log(wjreg) - Fast method"); + + min_val=min(min(xn),min(yn)) + max_val=max(max(xn),max(yn)) + x=np.arange(min_val,max_val+1) + y=np.arange(min_val,max_val+1) + ax.plot(x,y, color="red",marker=None,markersize=markersize) + + return fig,ax + + + + +def plot_mesh(model=None,mesh=None,title=None,figname=None,coef_hydro=99.): + + if model is not None: + if isinstance(mesh_in,smash.Model): + mesh=model.mesh + else: + raise ValueError( + f"model object must be an instance of smash Model" + ) + elif mesh is not None: + if isinstance(mesh,dict): + pass + else: + raise ValueError( + f"mesh must be a dict" + ) + else: + raise ValueError( + f"model or mesh are mandatory and must be a dict or a smash Model object" + ) + + mesh["active_cell"] + gauge=mesh["gauge_pos"] + stations=mesh["code"] + flow_acc=mesh["flwacc"] + + na = (mesh["active_cell"] == 0) + + flow_accum_bv = np.where(na, 0., flow_acc.data) + surfmin=(1.-coef_hydro/100.)*np.max(flow_accum_bv) + mask_flow=(flow_accum_bv < surfmin) + flow_plot=np.where(mask_flow, np.nan,flow_accum_bv.data) + flow_plot=np.where(na, np.nan,flow_plot) + + fig, ax = plt.subplots() + + if title is not None: + ax.set_title(title) + + active_cell = np.where(na, np.nan, mesh["active_cell"]) + #cmap = ListedColormap(["grey", "lightgray"]) + cmap = ListedColormap([ "lightgray"]) + ax.imshow(active_cell,cmap=cmap) + + #cmap = ListedColormap(["lightblue","blue","darkblue"]) + myblues = matplotlib.colormaps['Blues'] + cmp = ListedColormap(myblues(np.linspace(0.30, 1.0, 265))) + im=ax.imshow(flow_plot,cmap=cmp) + #im=ax.imshow(flow_plot,cmap="Blues") + + fig.colorbar(im,cmap="Blues", ax=ax, label="Cumulated surface (km²)",shrink=0.75); + + + for i in range(len(stations)): + coord=gauge[i] + code=stations[i] + ax.plot(coord[1],coord[0], color="green",marker='o',markersize=6) + ax.annotate(code, # this is the text + (coord[1],coord[0]), # these are the coordinates to position the label + textcoords="offset points", # how to position the text + xytext=(0,5), # distance from text to points (x,y) + ha='right', # horizontal alignment can be left, right or center + color="red", + fontsize=10) + + if figname is not None: + fig.savefig(figname, transparent=False, dpi=80, bbox_inches="tight") + + return fig,ax + + + + +def plot_event_seg(model,event_seg,code=''): + event_seg_sta_aval = event_seg[(event_seg['code'] == code)] + + dti = pd.date_range(start=model.setup.start_time, end=model.setup.end_time, freq="H")[1:] + qo = model.input_data.qobs[0, :] + prcp = model.input_data.mean_prcp[0, :] + starts = pd.to_datetime(event_seg_sta_aval["start"]) + ends = pd.to_datetime(event_seg_sta_aval["end"]) + + fig, (ax1, ax2) = plt.subplots(2, 1) + fig.subplots_adjust(hspace=0) + ax1.bar(dti, prcp, color="lightslategrey", label="Rainfall"); + + ax1.axvspan(starts[0], ends[0], alpha=.1, color="red", label="Event segmentation"); + for i in range(1,len(starts)): + ax1.axvspan(starts[i], ends[i], alpha=.1, color="red"); + ax1.axvspan(starts[i], ends[i], alpha=.1, color="red"); + + ax1.grid(alpha=.7, ls="--") + ax1.get_xaxis().set_visible(False) + ax1.set_ylabel("$mm$"); + ax1.invert_yaxis() + ax2.plot(dti, qo, label="Observed discharge"); + for i in range(0,len(starts)): + ax2.axvspan(starts[i], ends[i], alpha=.1, color="red"); + + ax2.grid(alpha=.7, ls="--") + ax2.tick_params(axis="x", labelrotation=20) + ax2.set_ylabel("$m^3/s$"); + ax2.set_xlim(ax1.get_xlim()); + fig.legend(); + fig.suptitle("V5014010"); + + return fig + + diff --git a/testing/functions_smash_time.py b/testing/functions_smash_time.py new file mode 100644 index 00000000..190da1b9 --- /dev/null +++ b/testing/functions_smash_time.py @@ -0,0 +1,181 @@ +import numpy as np +import math +import datetime + + + +def duration_to_timedelta(duration:list|int|float): + + if isinstance(duration,int|float): + delta_t=datetime.timedelta(seconds=duration) + elif isinstance(duration,list): + + if (isinstance(duration[0],str)): + unit=duration[0] + time=duration[1] + elif (isinstance(duration[1],str)): + unit=duration[1] + time=duration[0] + else: + raise ValueError( + f"duration '{duration}' must contain a str and and integer" + ) + + if isinstance(time,int|float): + + if (unit=="seconds") | (unit=="s"): + delta_t=datetime.timedelta(seconds=time) + elif (unit=="minutes") | (unit=="min"): + delta_t=datetime.timedelta(minutes=time) + elif (unit=="hours") | (unit=="h"): + delta_t=datetime.timedelta(hours=time) + elif (unit=="days") | (unit=="d"): + delta_t=datetime.timedelta(days=time) + else: + raise ValueError( + f"duration unit '{unit}' must be an str. Possible values: (seconds|s) (minutes|min) (hours|h) (days|d)" + ) + else: + raise ValueError( + f"duration value '{time}' must be an integer or float." + ) + else: + raise ValueError( + f"duration '{duration}' must be a list or an integer or float." + ) + + return delta_t + + +def to_datetime(time=''): + if isinstance(time,str): + return datetime.datetime.fromisoformat(time) + else: + raise ValueError( + f"time '{time}' must be a instance of str." + ) + + +def to_datestring(date): + return date.strftime("%Y-%m-%d %H:%M") + + +def dict_filter_by_date(in_dict,t_start=None,t_end=None): + + out_res=dict() + + if t_start is not None: + t_s=datetime.datetime.fromisoformat(t_start) + + if t_end is not None: + t_e=datetime.datetime.fromisoformat(t_end) + + for key,value in in_dict.items(): + + date_simu=datetime.datetime.fromisoformat(key) + + if t_start is None: + t_s=date_simu + + if t_end is None: + t_e=date_simu + + if (date_simu>=t_s) and (date_simu<=t_e): + out_res.update({key:value}) + + return out_res + + +def stringdecode(self): + """ + Decode characters from a array of integer: Usefull when you try to access to a array of string in the object model. + """ + return self.tobytes(order='F').decode('utf-8').split() + + + +#date and time functions +def date_to_path(date, format_schapi=True): + """ + Convert the SMASH date format to a path for searching rainfall + + Parameters + ---------- + date : integer representing a date with the format %Y%m%d%H%M%S + + Returns + ---------- + path : string representing the path /year/month/day/ + + Examples + ---------- + date_to_path(date.strftime('%Y%m%d%H%M') + /%Y/%m/%d/ + """ + year=date[0:4] + month=date[4:6] + day=date[6:8] + + if format_schapi: + + ret = os.sep + year + os.sep + month + os.sep + day + os.sep + + else: + + ret = os.sep + year + os.sep + month + os.sep + + return ret + + +def decompose_date(date): + """ + Split a SMASH date + + Parameters + ---------- + date : integer representing a date with the format %Y%m%d%H%M%S + + Returns + ---------- + year,month,day,hour,minute : integers each part of the date (seconds not included) + + Examples + ---------- + year,month,day,hour,minute=decompose_date(date.strftime('%Y%m%d%H%M') + """ + year=date[0:4] + month=date[4:6] + day=date[6:8] + hour=date[8:10] + minute=date[10:13] + return year,month,day,hour,minute + + +def date_range(self): + """ + Generate a Panda date list according the smash model setup + + Parameters + ---------- + self : object model + + Returns + ---------- + date_list: a Panda list of date from self.setup.date_deb to self.setup.date_prv + + Examples + ---------- + model = smash.Model(configuration='Data/Real_case/configuration.txt') + date_list=date_range(model) + """ + delta_t=datetime.timedelta(seconds=self.setup.dt) + + year,month,day,hour,minute=decompose_date(self.setup.date_deb.decode()) + date_start = datetime.datetime(int(year),int(month),int(day),int(hour),int(minute))+delta_t + + year,month,day,hour,minute=decompose_date(self.setup.date_prv.decode()) + date_end = datetime.datetime(int(year),int(month),int(day),int(hour),int(minute)) + + date_list=pandas.date_range(date_start,date_end,freq=delta_t) + return date_list + diff --git a/testing/test-desag_rainfall.py b/testing/test-desag_rainfall.py new file mode 100644 index 00000000..aa975c8e --- /dev/null +++ b/testing/test-desag_rainfall.py @@ -0,0 +1,57 @@ +import smash +import numpy as np +import matplotlib.pyplot as plt +from osgeo import gdal + +from functions_smash_plot import * +from functions_smash_time import * + +setup,mesh=smash.load_dataset('cance') +model=smash.Model(setup,mesh) +plot=plot_discharges(model) +plot[0].show() + +#test the model with smaller dx +mesh["dx"]=500. +model_desag=smash.Model(setup,mesh) + +index=np.where(model.input_data.mean_prcp[0,:]>10)[0] + +plot_matrix(model.input_data.prcp[:,:,index[0]],mask=model.mesh.active_cell,figname='rainfall_dx1000.png',title="rainfall dx=1000m",vmin=0.0,vmax=25) +plot_matrix(model_desag.input_data.prcp[:,:,index[0]],mask=model_desag.mesh.active_cell,figname='rainfall_dx500.png',title="rainfall dx=500m",vmin=0.0,vmax=25) #compare the upper-left corner with the previous figure, it is like the catchment is half sized from the upper-left corner + + + +#tiff filename seleciton +date='201410100600' +YY,MM,DD,hh,mm=decompose_date(date) +filename=f'/home/maxime/DassHydro-Dev/smash/smash/dataset/Cance/prcp/{YY}/{MM}/{DD}/rain_precipitation_{date}_{date}.tif' + +#test the function globally +model.mesh.dx=500. +array=smash.core.raster.read_windowed_raster_gdal(filename=filename, smash_mesh=model.mesh, band=1, lacuna=-99.) + +#manually test all action of the function, test to crop the array before with vsimem fsystem (I expected to be faster but it not) +dataset =smash.core.raster.gdal_raster_open(filename) +geotransform=smash.core.raster.gdal_get_geotransform(dataset) +window=smash.core.raster.gdal_smash_window_from_geotransform(geotransform,model.mesh) +dataset=gdal.Translate('/vsimem/raster.tif', dataset, srcWin=[window['col_off'], window['row_off'], window["ncols"], window["nrows"]]) +dataset=smash.core.raster.gdal_reproject_raster(dataset,model.mesh.dx,model.mesh.dx) +geotransform=smash.core.raster.gdal_get_geotransform(dataset) +window=smash.core.raster.gdal_smash_window_from_geotransform(geotransform,model.mesh) +array=smash.core.raster.gdal_crop_dataset_to_array(dataset=dataset,window=window,band=1,lacuna=-99.) + + +#Test the reprojection +dataset = smash.core.raster.gdal_raster_open(filename) +dataset_500=smash.core.raster.gdal_reproject_raster(dataset,500,500) +dataset_2000=smash.core.raster.gdal_reproject_raster(dataset,2000,2000) +array=dataset.GetRasterBand(1).ReadAsArray() +array_500=dataset_500.GetRasterBand(1).ReadAsArray() +array_2000=dataset_2000.GetRasterBand(1).ReadAsArray() + +maxval=np.max(array) +plot_matrix(array,figname='grid_rainfall_dx1000.png',title="rainfall dx=1000m",vmin=0.0,vmax=maxval) +plot_matrix(array_500,figname='grid_rainfall_dx500.png',title="rainfall dx=500m",vmin=0.0,vmax=maxval) +plot_matrix(array_2000,figname='grid_rainfall_dx2000.png',title="rainfall dx=2000m",vmin=0.0,vmax=maxval) +#Notice grid_rainfall_dx1000.png grid_rainfall_dx500.png are very similar but the size is different, grid_rainfall_dx2000.png has the same aspect but the rainfall is averaged for each cell 2x2 km From 4384edb29314d31ba680a49f7b13c73658a92366 Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Wed, 14 Jun 2023 13:19:36 +0200 Subject: [PATCH 33/73] Update release note --- doc/source/release/0.5.0-notes.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/source/release/0.5.0-notes.rst b/doc/source/release/0.5.0-notes.rst index 6c60b91c..22eefc71 100644 --- a/doc/source/release/0.5.0-notes.rst +++ b/doc/source/release/0.5.0-notes.rst @@ -30,6 +30,11 @@ Improvements New Features ------------ +Spatial desagragation of the input raster +***************************************** + +If the resolution of the input raster is diffrent of the resolution of the model mesh, the input raster are automatically reprojected by gdal. In that case the reading of the input can be slower. For best performances, it can be usful to preprocess the input files (precipitations). Some functions available in smash.core.raster could help for pre-processing. + ----- Fixes ----- From b84e1bf4e7b1d30c5c5894423483d3bed02f0d38 Mon Sep 17 00:00:00 2001 From: "ngo-nghi-truyen.huynh" Date: Tue, 20 Jun 2023 17:30:40 +0200 Subject: [PATCH 34/73] FIX: computation of flood event signatures --- doc/source/release/0.5.0-notes.rst | 7 ++++ smash/core/_event_segmentation.py | 34 ++++++++++------ smash/core/signatures.py | 6 +-- smash/tests/baseline.hdf5 | Bin 2010235 -> 2010235 bytes smash/tests/diff_baseline.csv | 61 ++++++++++++++--------------- 5 files changed, 62 insertions(+), 46 deletions(-) diff --git a/doc/source/release/0.5.0-notes.rst b/doc/source/release/0.5.0-notes.rst index f2881c97..32428962 100644 --- a/doc/source/release/0.5.0-notes.rst +++ b/doc/source/release/0.5.0-notes.rst @@ -47,3 +47,10 @@ The boundary condition checking previously used a tolerance of 1e-6, which cause To address this problem, the tolerance has been decreased to 1e-3. See issue `#23 `__. + +Event signatures computation +**************************** + +The bug related to the computation of flood event signatures has been resolved for specific cases where the peak event is observed during the last time steps in the time window. + +See issue `#28 `__. \ No newline at end of file diff --git a/smash/core/_event_segmentation.py b/smash/core/_event_segmentation.py index 01c5cf8a..33d0a6e9 100644 --- a/smash/core/_event_segmentation.py +++ b/smash/core/_event_segmentation.py @@ -213,8 +213,8 @@ def _events_grad( ind = _detect_peaks(q, mph=np.quantile(q[q > 0], peak_quant)) list_events = [] - for i in ind: - p_search = p[range(max(i - start_seg, 0), i)] + for i_peak in ind: + p_search = p[range(max(i_peak - start_seg, 0), i_peak)] p_search_grad = np.gradient(p_search) ind_start = _detect_peaks( @@ -236,22 +236,30 @@ def _events_grad( ind_start_minq = ind_start[0] - start = ind_start_minq + max(i - start_seg, 0) + start = ind_start_minq + max(i_peak - start_seg, 0) - peakp = _detect_peaks(p[start:i], mpd=len(p)) + peakp = _detect_peaks(p[start:i_peak], mpd=len(p)) if peakp.size == 0: - peakp = np.argmax(p[start:i]) + start + peakp = np.argmax(p[start:i_peak]) + start else: peakp = peakp[0] + start - qbf = _baseflow_separation(q[i - 1 : start + max_duration + end_search - 1])[0] + fwindow = min( + start + max_duration + end_search, q.size + ) # index for determining the end of dflow windows - dflow = q[i - 1 : start + max_duration + end_search - 1] - qbf - dflow = np.array([sum(i) for i in zip(*(dflow[i:] for i in range(end_search)))]) + if fwindow <= i_peak: # reject peak at the last time step + continue - end = i + np.argmin(dflow) + qbf = _baseflow_separation(q[i_peak - 1 : fwindow - 1])[0] + + dflow = q[i_peak - 1 : fwindow - 1] - qbf + dflow_windows = (dflow[i:] for i in range(min(end_search, dflow.size))) + dflow = np.array([sum(i) for i in zip(*dflow_windows)]) + + end = i_peak + np.argmin(dflow) if len(list_events) > 0: prev_start = list_events[-1]["start"] @@ -263,14 +271,16 @@ def _events_grad( if max(end, prev_end) <= prev_start + max_duration: list_events[-1]["end"] = max(end, prev_end) - if q[i] > q[prev_peakq]: - list_events[-1]["peakQ"] = i + if q[i_peak] > q[prev_peakq]: + list_events[-1]["peakQ"] = i_peak if p[peakp] > p[prev_peakp]: list_events[-1]["peakP"] = peakp continue - list_events.append({"start": start, "end": end, "peakP": peakp, "peakQ": i}) + list_events.append( + {"start": start, "end": end, "peakP": peakp, "peakQ": i_peak} + ) return list_events diff --git a/smash/core/signatures.py b/smash/core/signatures.py index d975fd5d..72446a09 100644 --- a/smash/core/signatures.py +++ b/smash/core/signatures.py @@ -398,9 +398,9 @@ def _signatures_comp( ts = t["start"] te = t["end"] - event_prcp = prcp_tmp[ts:te] - event_qobs = qobs_tmp[ts:te] - event_qsim = qsim_tmp[ts:te] + event_prcp = prcp_tmp[ts : te + 1] + event_qobs = qobs_tmp[ts : te + 1] + event_qsim = qsim_tmp[ts : te + 1] season = _get_season(date_range[ts].date()) diff --git a/smash/tests/baseline.hdf5 b/smash/tests/baseline.hdf5 index b05da70047233e5253a151ae2f67ba8bda978253..95204e2f02c2d71d5ffded6fcb6816cf5a87661f 100644 GIT binary patch delta 1156 zcmXBRdsI?)0Kjpsg*q+FNXX38V?MG1oii#7t+wNw%e{m&6K!c!zL*wKpw!HH^4ZeK z%zVsjx}%_oLUltt&a~u6D^s}=nr+L&e2}`Xvn#OQ&i?rP@%{UY=`LZqOYJ!>qWG<- zPWHT7Z;UIss&eHt8H$vexyF?`Bq%VH8{2WBHdsHDLU`-g=iar<9UoBa*H=jw%t^F9T@{g~Oy^UO@%tB0-(rFn6eM8hK}ZrNAC` z5dEL@Y?zWglg-<#$*3oOK^Xns)zY9n994SKkXMfXMJy+hao7}BwB4+n^JWVv^f9t?w!PrFo zu^|0hh8vn2l%j-F7tc~>?%n31$-Ad=D8}LCx|I)7skoaEjm&4aTs)U$$`8JVMQi*_ zEmt~H7x_HR#}4z$icbt=Mb_lRWyA-|9hvSyq9}%r9M3F)sj{RBD{Wt$#Mn*Alj>68 zUcq4-dCeHsB@jvR)AMU^mB(rK!4RkAaoQ8M20dBaE;&$W=L_$2eP1w`I~w1sb1JUXuXH|;ha|om+k$Ie z3~=JJqdE!l8ezzbLT}dd0OvtUZcGY&X_W*cwxKszdJ74uYBsm?dv*{wWIvbj{Z^tZ z>>uC0zseqMgN>zLEH^bXwpSOC8K6tqy_)7X-SFRh}4iP#$=eudO@~wy9de19!GDU$UgQ&`_sNIP4{bQ+pk)f z!sQhRip)ignFNa0C;`XF3$W$0_42wADaXWuhc?E~q2b7vkv2TF`?Ssj>3kf`>9}6g zyzETRe{g^+LJzSs;Hz{vt?{k`33)WFt0~!kdhEhQk6XJ}s)nPPO&Zp|3u1R%Ex~t~ z&n{y4CR?`~TT9xE>#9%t3!XX=v2c7awvRm{q5YAlq(58eTjbT1_!Os8`%={!n!yt7 zo%cgFMyIzB<*lrtoU;xm3+{c^DHwj8H=Eaf{#^dGo zN;7|Lcd?D>Oo0lI2W$a5z#e!HZ~z>Eb-;Rn05|~~0B2w$Km;}cE&vH21DgR?U<=>| OYz5qvD*U$YpZ^7rTSV#r delta 1149 zcmWmAeKga100!{=l6OWJilwg8rHfo#bXzFy?c6w|?ma{`t-NH-rd+?x%d8I7alPg2 z=B0T{)DgqX-LKtQ2%%{@F>RK}OHS=2i@2uO&dBipR^?PF%-zW+$V{4R}xJlbFSz9aOR0K%6rKs1Mm1hT(p~MMj!9f}4E>)sX!R!03al>tGe3_c$ zAZ}e)nIl8agR9y3@6s=8=L%Yoqq3YaI>}zIOvSpxAKSS2k$R@2uYjs=fW5}0I3!LI zYY5!Y2n@m})F)E=7bWU_#@+1m%AurVyf)UKYgnWA2!!g|<*`M>v%Y`ayDCyyXHi}| zPT|a-M+9`ne`0;e)N*IhpLBdpt?H4aZIz)`95hL)o$gGaM6xSXnYHFw{)!HHf}wD- zxkKSvrSt#gQf=7AiY_Ve#iS`J^5M^wH2m)h9G|+-+fN_NW3DwFmEmX-*fSp^oo1K+ zTcUXXL|7~eTOF1x^vHUB$nro-zK|+z(^Up--94GaFt)VS0y}u{{}ruYsF$6h$Fd_VW$E#zZUuy(i?so z2~@YUbfiJOi95>`%GHk|bqsO=iwYQecuhlZWHG;l~NR6PxNojuq>O8dUyBJ;0WNt41a^~E{!J4sH_cL`~+qvE|Ct)jrsH`;RFVC@cS-?!Ro?T$}7 zo(*onqPBYWkJIpS)hLmgu8qQ-yUvCM_mnoykf2DPtJ-Rn#`|mPZ%;M7bcVb5Su+gM zi>r7+9!@}Nc+8X0KaX&hLl0fDqtTtJbYJYts~<&_v#+WT;86da+n+|m<2wH6*&JU< z^DFZ6Nhv-eV;u@tQo4WQFxrse%-MVeypQDRzI99%^HhrU$xu<(AIPVZ5o50hKuYzWwfkhAsPEJI63m%e_75u&4Qe^Q8q zrf%mN-6Es)0RzAg_|}a9utb{zW`H?>1}p$eU -Date: Thu Apr 13 12:17:42 2023 +0200 +commit 305e8b6245551ea341c2a395bc657d5005013a6d +Merge: f3c9235 9da4768 +Author: Ngo Nghi Truyen Huynh <129378719+nghi-truyen@users.noreply.github.com> +Date: Fri Jun 9 12:04:15 2023 +0200 - FIX: Lower the tolerance to 1e-5 in multiple_run assertion (probably parallel issue) + Merge pull request #26 from DassHydro-dev/maint-bayes-optimize - - Modified - - * test_simu.py: in np.allclose change atol from 1e-6 to 1e-5 in test_multiple_run + MAINT/FIX: remove density attribute of BayesResult in preparation for V1.0.0 TEST NAME |STATUS ann_optimize_1.cost |NON MODIFIED @@ -20,8 +19,8 @@ bayes_optimize.cost |NON MODIFIED bbox_mesh.flwacc |NON MODIFIED bbox_mesh.flwdir |NON MODIFIED event_seg.arr |NON MODIFIED -gen_samples.nor |MODIFIED -gen_samples.uni |MODIFIED +gen_samples.nor |NON MODIFIED +gen_samples.uni |NON MODIFIED mesh_io.active_cell |NON MODIFIED mesh_io.area |NON MODIFIED mesh_io.code |NON MODIFIED @@ -37,18 +36,18 @@ mesh_io.nrow |NON MODIFIED mesh_io.path |NON MODIFIED mesh_io.xmin |NON MODIFIED mesh_io.ymax |NON MODIFIED -multiple_run.cost |ADDED -multiple_run.qsim |ADDED -mutiple_run.slc_1.cost |ADDED -mutiple_run.slc_1.qsim |ADDED -mutiple_run.slc_2.cost |ADDED -mutiple_run.slc_2.qsim |ADDED -mutiple_run.slc_3.cost |ADDED -mutiple_run.slc_3.qsim |ADDED -mutiple_run.slc_4.cost |ADDED -mutiple_run.slc_4.qsim |ADDED -mutiple_run.slc_5.cost |ADDED -mutiple_run.slc_5.qsim |ADDED +multiple_run.cost |NON MODIFIED +multiple_run.qsim |NON MODIFIED +mutiple_run.slc_1.cost |NON MODIFIED +mutiple_run.slc_1.qsim |NON MODIFIED +mutiple_run.slc_2.cost |NON MODIFIED +mutiple_run.slc_2.qsim |NON MODIFIED +mutiple_run.slc_3.cost |NON MODIFIED +mutiple_run.slc_3.qsim |NON MODIFIED +mutiple_run.slc_4.cost |NON MODIFIED +mutiple_run.slc_4.qsim |NON MODIFIED +mutiple_run.slc_5.cost |NON MODIFIED +mutiple_run.slc_5.qsim |NON MODIFIED net_init.bias_layer_1 |NON MODIFIED net_init.bias_layer_2 |NON MODIFIED net_init.bias_layer_3 |NON MODIFIED @@ -74,8 +73,8 @@ optimize.uniform_sbs_states.hlr |NON MODIFIED run.cost |NON MODIFIED signatures.cont_obs |NON MODIFIED signatures.cont_sim |NON MODIFIED -signatures.event_obs |NON MODIFIED -signatures.event_sim |NON MODIFIED +signatures.event_obs |MODIFIED +signatures.event_sim |MODIFIED signatures_sens.cont_first_si_cft |NON MODIFIED signatures_sens.cont_first_si_cp |NON MODIFIED signatures_sens.cont_first_si_exc |NON MODIFIED @@ -84,14 +83,14 @@ signatures_sens.cont_total_si_cft |NON MODIFIED signatures_sens.cont_total_si_cp |NON MODIFIED signatures_sens.cont_total_si_exc |NON MODIFIED signatures_sens.cont_total_si_lr |NON MODIFIED -signatures_sens.event_first_si_cft |NON MODIFIED -signatures_sens.event_first_si_cp |NON MODIFIED -signatures_sens.event_first_si_exc |NON MODIFIED -signatures_sens.event_first_si_lr |NON MODIFIED -signatures_sens.event_total_si_cft |NON MODIFIED -signatures_sens.event_total_si_cp |NON MODIFIED -signatures_sens.event_total_si_exc |NON MODIFIED -signatures_sens.event_total_si_lr |NON MODIFIED +signatures_sens.event_first_si_cft |MODIFIED +signatures_sens.event_first_si_cp |MODIFIED +signatures_sens.event_first_si_exc |MODIFIED +signatures_sens.event_first_si_lr |MODIFIED +signatures_sens.event_total_si_cft |MODIFIED +signatures_sens.event_total_si_cp |MODIFIED +signatures_sens.event_total_si_exc |MODIFIED +signatures_sens.event_total_si_lr |MODIFIED xy_mesh.flwacc |NON MODIFIED xy_mesh.flwdir |NON MODIFIED xy_mesh.flwdst |NON MODIFIED From 9149f93640a8e77aa6164f8ed365e32b57edadce Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Wed, 21 Jun 2023 16:22:45 +0200 Subject: [PATCH 35/73] Formatter les fichiers _read_input_data.py et raster.py avec black (black smash/core/_read_input_data.py smash/core/raster.py Supprimer le dossier testing --- smash/core/_read_input_data.py | 17 +- smash/core/raster.py | 462 +++++++++++-------- testing/functions_smash_plot.py | 784 -------------------------------- testing/functions_smash_time.py | 181 -------- testing/test-desag_rainfall.py | 57 --- 5 files changed, 274 insertions(+), 1227 deletions(-) delete mode 100644 testing/functions_smash_plot.py delete mode 100644 testing/functions_smash_time.py delete mode 100644 testing/test-desag_rainfall.py diff --git a/smash/core/_read_input_data.py b/smash/core/_read_input_data.py index 96869e5c..337ab83c 100644 --- a/smash/core/_read_input_data.py +++ b/smash/core/_read_input_data.py @@ -120,7 +120,10 @@ def _read_prcp(setup: SetupDT, mesh: MeshDT, input_data: Input_DataDT): else: matrix = ( - read_windowed_raster_gdal(filename=files[ind], smash_mesh=mesh, band=1, lacuna=-99.) * setup.prcp_conversion_factor + read_windowed_raster_gdal( + filename=files[ind], smash_mesh=mesh, band=1, lacuna=-99.0 + ) + * setup.prcp_conversion_factor ) if setup.sparse_storage: @@ -186,7 +189,9 @@ def _read_pet(setup: SetupDT, mesh: MeshDT, input_data: Input_DataDT): subset_date_range = date_range[ind_day] matrix = ( - read_windowed_raster_gdal(filename=files[ind], smash_mesh=mesh, band=1, lacuna=-99.) + read_windowed_raster_gdal( + filename=files[ind], smash_mesh=mesh, band=1, lacuna=-99.0 + ) * setup.pet_conversion_factor ) @@ -231,7 +236,9 @@ def _read_pet(setup: SetupDT, mesh: MeshDT, input_data: Input_DataDT): else: matrix = ( - read_windowed_raster_gdal(filename=files[ind], smash_mesh=mesh, band=1, lacuna=-99.) + read_windowed_raster_gdal( + filename=files[ind], smash_mesh=mesh, band=1, lacuna=-99.0 + ) * setup.pet_conversion_factor ) @@ -262,4 +269,6 @@ def _read_descriptor(setup: SetupDT, mesh: MeshDT, input_data: Input_DataDT): ) else: - input_data.descriptor[..., i] = read_windowed_raster_gdal(filename=path[0], smash_mesh=mesh, band=1, lacuna=-99.) + input_data.descriptor[..., i] = read_windowed_raster_gdal( + filename=path[0], smash_mesh=mesh, band=1, lacuna=-99.0 + ) diff --git a/smash/core/raster.py b/smash/core/raster.py index 54b651f9..e6e9ad20 100644 --- a/smash/core/raster.py +++ b/smash/core/raster.py @@ -17,231 +17,260 @@ ### GDAL RASTER FUNCTIONS -#just open the raster and return the dataset +# just open the raster and return the dataset def gdal_raster_open(filename): """ Opening a raster with gdal. this is just a wrapper around gdal.Open(filename) - + Parameters ---------- filename : string, path to a file - + Returns ---------- dataset : gdal object - + Examples ---------- dataset = gdal_raster_open("filename") """ - dataset=object() + dataset = object() if os.path.isfile(filename): dataset = gdal.Open(filename) else: raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), filename) - + return dataset -def read_windowed_raster_gdal(filename: str, smash_mesh: MeshDT, band=None, lacuna=None) -> np.ndarray: +def read_windowed_raster_gdal( + filename: str, smash_mesh: MeshDT, band=None, lacuna=None +) -> np.ndarray: """ Reading a raster file with gdal and return a np.ndarray storing the different data bands according the SMASH model boundingbox. - + Parameters ---------- filename : string, path to a file smash_mesh : smash.mesh object representing the mesh band: band to be read lacuna: float64 replacing the Nodata value - + Returns ---------- array : np.array or np.ndarray storing one or all different data, stored in filename, sliced compare to the mesh boundingbox - + Examples ---------- array=read_windowed_raster_gdal("filename", model.mesh) """ dataset = gdal_raster_open(filename) - - geotransform=gdal_get_geotransform(dataset) - - if (geotransform['xres'] != smash_mesh.dx) or (geotransform['yres'] != smash_mesh.dx): - - #Attempt to generate a smaller dataset before doing the reprojection. However, it is slower.. + + geotransform = gdal_get_geotransform(dataset) + + if (geotransform["xres"] != smash_mesh.dx) or ( + geotransform["yres"] != smash_mesh.dx + ): + # Attempt to generate a smaller dataset before doing the reprojection. However, it is slower.. # ~ window=gdal_smash_window_from_geotransform(geotransform,smash_mesh) # ~ dataset=gdal.Translate('/vsimem/raster.tif', dataset, srcWin=[window['col_off'], window['row_off'], window["ncols"], window["nrows"]]) - - dataset=gdal_reproject_raster(dataset,smash_mesh.dx,smash_mesh.dx) - geotransform=gdal_get_geotransform(dataset) - - #Todo: - #If smash mesh larger than window: window=1,1,all,all - #compute window of smash-mesh and get x_offset and y_offsets => offsets - #pass this window to gdal_crop_dataset_to_ndarray(dataset=dataset,window=window,offsets=offset) - #position the rainfall inside the mesh grid according offset ! - - window=gdal_smash_window_from_geotransform(geotransform,smash_mesh) - - if (band==None): - array=gdal_crop_dataset_to_ndarray(dataset=dataset,window=window,lacuna=lacuna) + + dataset = gdal_reproject_raster(dataset, smash_mesh.dx, smash_mesh.dx) + geotransform = gdal_get_geotransform(dataset) + + # Todo: + # If smash mesh larger than window: window=1,1,all,all + # compute window of smash-mesh and get x_offset and y_offsets => offsets + # pass this window to gdal_crop_dataset_to_ndarray(dataset=dataset,window=window,offsets=offset) + # position the rainfall inside the mesh grid according offset ! + + window = gdal_smash_window_from_geotransform(geotransform, smash_mesh) + + if band == None: + array = gdal_crop_dataset_to_ndarray( + dataset=dataset, window=window, lacuna=lacuna + ) else: - array=gdal_crop_dataset_to_array(dataset=dataset,window=window,band=band,lacuna=lacuna) - - - return array + array = gdal_crop_dataset_to_array( + dataset=dataset, window=window, band=band, lacuna=lacuna + ) + return array -def gdal_reproject_raster(dataset,xres,yres): +def gdal_reproject_raster(dataset, xres, yres): """ Reproject the dataset raster accoding a new resolution in the x and y directions - + Parameters ---------- dataset : gdal object from gdal.Open() xres: resolution in the x direction (columns) in meters yres: resolution in the y direction (rows) in meters - + Returns ---------- virtual_destination : a virtual gdal raster object at the new resolution - + Examples ---------- new_dataset=gdal_reproject_raster(dataset,smash_mesh.cellsize,smash_mesh.cellsize) """ - - geotransform=gdal_get_geotransform(dataset) - - dataset_projection=dataset.GetProjection() - - new_dataset_geotranform=(geotransform['xleft'],float( xres ),0.0,geotransform['ytop'],0.0,- float( yres )) - - #Do we must distinguish cases smash_mesh.dx<=geotransform['xres','yres'] and smash_mesh.dx>geotransform['xres','yres'] ? i.e use ceiling or floor function instead of int ? - #At least it work for case smash_mesh.dx<=geotransform['xres','yres'] which is the moste common case for modelling. - New_X_Size=int(dataset.RasterXSize*geotransform['xres']/xres) - New_Y_Size=int(dataset.RasterYSize*geotransform['yres']/yres) - - in_memory_dataset=gdal.GetDriverByName('MEM') - - virtual_destination=in_memory_dataset.Create('',New_X_Size, New_Y_Size, dataset.RasterCount, dataset.GetRasterBand(1).DataType) - + + geotransform = gdal_get_geotransform(dataset) + + dataset_projection = dataset.GetProjection() + + new_dataset_geotranform = ( + geotransform["xleft"], + float(xres), + 0.0, + geotransform["ytop"], + 0.0, + -float(yres), + ) + + # Do we must distinguish cases smash_mesh.dx<=geotransform['xres','yres'] and smash_mesh.dx>geotransform['xres','yres'] ? i.e use ceiling or floor function instead of int ? + # At least it work for case smash_mesh.dx<=geotransform['xres','yres'] which is the moste common case for modelling. + New_X_Size = int(dataset.RasterXSize * geotransform["xres"] / xres) + New_Y_Size = int(dataset.RasterYSize * geotransform["yres"] / yres) + + in_memory_dataset = gdal.GetDriverByName("MEM") + + virtual_destination = in_memory_dataset.Create( + "", + New_X_Size, + New_Y_Size, + dataset.RasterCount, + dataset.GetRasterBand(1).DataType, + ) + ########################################################### - #Workaround for gdal bug which initialise array to 0 instead as the No_Data value - #Here we initialise the band manually with the nodata_value - band=virtual_destination.GetRasterBand(1) #Notice that band is a pointer to virtual_destination + # Workaround for gdal bug which initialise array to 0 instead as the No_Data value + # Here we initialise the band manually with the nodata_value + band = virtual_destination.GetRasterBand( + 1 + ) # Notice that band is a pointer to virtual_destination band.SetNoDataValue(-9999) - Nodataarray = np.ndarray(shape=(New_Y_Size,New_X_Size)) + Nodataarray = np.ndarray(shape=(New_Y_Size, New_X_Size)) Nodataarray.fill(-9999.0) band.WriteArray(Nodataarray) ########################################################### - + virtual_destination.SetGeoTransform(new_dataset_geotranform) virtual_destination.SetProjection(dataset_projection) - gdal.ReprojectImage( dataset, virtual_destination, dataset_projection, dataset_projection, gdal.GRA_NearestNeighbour,WarpMemoryLimit=500.) - #WarpMemoryLimit=500. would probably increase the speed... but ... #https://gdal.org/programs/gdalwarp.html - #choice are : gdal.GRA_NearestNeighbour, gdal.GRA_Mode, gdal.GRA_Average ... Not tested https://gdal.org/api/gdalwarp_cpp.html#_CPPv4N15GDALResampleAlg11GRA_AverageE - #Use osgeo.gdal.Warp instead of ReprojectImage offer much more option like multithreading ? https://gdal.org/api/python/osgeo.gdal.html#osgeo.gdal.Warp - + gdal.ReprojectImage( + dataset, + virtual_destination, + dataset_projection, + dataset_projection, + gdal.GRA_NearestNeighbour, + WarpMemoryLimit=500.0, + ) + # WarpMemoryLimit=500. would probably increase the speed... but ... #https://gdal.org/programs/gdalwarp.html + # choice are : gdal.GRA_NearestNeighbour, gdal.GRA_Mode, gdal.GRA_Average ... Not tested https://gdal.org/api/gdalwarp_cpp.html#_CPPv4N15GDALResampleAlg11GRA_AverageE + # Use osgeo.gdal.Warp instead of ReprojectImage offer much more option like multithreading ? https://gdal.org/api/python/osgeo.gdal.html#osgeo.gdal.Warp + return virtual_destination -#simply slice an array according a window -def gdal_crop_dataset_to_array(dataset=object(),window={},band=1,lacuna=None): +# simply slice an array according a window +def gdal_crop_dataset_to_array(dataset=object(), window={}, band=1, lacuna=None): """ Read the raster bands from gdal object and crop the array according the window - + Parameters ---------- dataset : gdal object from gdal.Open() window: window to crop (in grid unit) band: the band number to be read. default is band number 1 lacuna: None or float64 - + Returns ---------- sliced_array : an array - + Examples ---------- window=gdal_smash_window_from_geotransform(dataset,smash_mesh) array=gdal_crop_dataset_to_array(dataset,window,band=1) """ - - dataset_band=dataset.GetRasterBand(band) - - sliced_array=dataset_band.ReadAsArray(window['col_off'], window['row_off'], window["ncols"], window["nrows"]) - - array_float=sliced_array.astype('float64') - - #Lacuna treatment here - if (isinstance(lacuna,float)): - Nodata=dataset_band.GetNoDataValue() - mask=np.where(sliced_array==Nodata) - array_float[mask]=lacuna - - return array_float + dataset_band = dataset.GetRasterBand(band) + + sliced_array = dataset_band.ReadAsArray( + window["col_off"], window["row_off"], window["ncols"], window["nrows"] + ) + + array_float = sliced_array.astype("float64") + # Lacuna treatment here + if isinstance(lacuna, float): + Nodata = dataset_band.GetNoDataValue() + mask = np.where(sliced_array == Nodata) + array_float[mask] = lacuna -#simply slice an array according a window -def gdal_crop_dataset_to_ndarray(dataset=object(),window={},lacuna=None): + return array_float + + +# simply slice an array according a window +def gdal_crop_dataset_to_ndarray(dataset=object(), window={}, lacuna=None): """ Read the raster bands from gdal object and crop the array according the window - + Parameters ---------- dataset : gdal object from gdal.Open() window: window to crop (in grid unit) lacuna: None or float64 - + Returns ---------- dictionnary : a dictionary with ndarrays (depending the number of bands) - + Examples ---------- window=gdal_smash_window_from_geotransform(dataset,smash_mesh) array=gdal_crop_dataset_to_array(dataset,window) """ - - dictionnary={} - nb_dataset=dataset.RasterCount - for index in range(1,nb_dataset+1): - - dataset_band=dataset.GetRasterBand(index) - - sliced_array=dataset_band.ReadAsArray(window['col_off'], window['row_off'], window["ncols"], window["nrows"]) - - array_float=sliced_array.astype('float64') - - #Lacuna treatment here - if (isinstance(lacuna,float)): - Nodata=dataset_band.GetNoDataValue() - mask=np.where(sliced_array==Nodata) - array_float[mask]=lacuna - - dictionnary.update({index:array_float}) - + + dictionnary = {} + nb_dataset = dataset.RasterCount + for index in range(1, nb_dataset + 1): + dataset_band = dataset.GetRasterBand(index) + + sliced_array = dataset_band.ReadAsArray( + window["col_off"], window["row_off"], window["ncols"], window["nrows"] + ) + + array_float = sliced_array.astype("float64") + + # Lacuna treatment here + if isinstance(lacuna, float): + Nodata = dataset_band.GetNoDataValue() + mask = np.where(sliced_array == Nodata) + array_float[mask] = lacuna + + dictionnary.update({index: array_float}) + return dictionnary -#write a new data set according a name, a meta description and bands as a list of array -def gdal_write_dataset(filename,dataset,format='Gtiff'): +# write a new data set according a name, a meta description and bands as a list of array +def gdal_write_dataset(filename, dataset, format="Gtiff"): """ write a gdal object to a new file - + Parameters ---------- filename : path to the new target file dataset : gdal object from gdal.Open() format: optional, raster format, default is Gtiff - + Returns ---------- none - + Examples ---------- virtual_dataset=gdal_reproject_raster(dataset,500.,500.) @@ -249,36 +278,51 @@ def gdal_write_dataset(filename,dataset,format='Gtiff'): """ width = dataset.RasterXSize height = dataset.RasterYSize - - driver = gdal.GetDriverByName( format ) - dst_ds = driver.Create(filename, xsize=width, ysize=height,bands=dataset.RasterCount, eType=dataset.GetRasterBand(1).DataType) - + + driver = gdal.GetDriverByName(format) + dst_ds = driver.Create( + filename, + xsize=width, + ysize=height, + bands=dataset.RasterCount, + eType=dataset.GetRasterBand(1).DataType, + ) + dst_ds.SetGeoTransform(dataset.GetGeoTransform()) dst_ds.SetProjection(dataset.GetProjection()) - - data = dataset.ReadAsArray(0,0,width,height) - + + data = dataset.ReadAsArray(0, 0, width, height) + # ~ for index in range(1,dataset.RasterCount+1): - # ~ dst_ds.GetRasterBand(index).WriteArray(data[index-1]) - - dst_ds.WriteRaster(0,0,width,height,data.tobytes(),width,height,band_list=list(range(1,dataset.RasterCount+1))) - - #destination=dataset.CreateCopy(filename, dataset, strict=0,options=["TILED=YES", "COMPRESS=PACKBITS"]) - dst_ds=None + # ~ dst_ds.GetRasterBand(index).WriteArray(data[index-1]) + + dst_ds.WriteRaster( + 0, + 0, + width, + height, + data.tobytes(), + width, + height, + band_list=list(range(1, dataset.RasterCount + 1)), + ) + + # destination=dataset.CreateCopy(filename, dataset, strict=0,options=["TILED=YES", "COMPRESS=PACKBITS"]) + dst_ds = None def gdal_get_geotransform(dataset): """ Getting the GeoTransform coeficients from a gdal object - + Parameters ---------- dataset : gdal object from gdal.Open() - + Returns ---------- geotransform : Python dictionnary - + # ~ A GeoTransform consists in a set of 6 coefficients: # ~ GT(0) x-coordinate of the upper-left corner of the upper-left pixel. # ~ GT(1) w-e pixel resolution / pixel width. @@ -286,56 +330,65 @@ def gdal_get_geotransform(dataset): # ~ GT(3) y-coordinate of the upper-left corner of the upper-left pixel. # ~ GT(4) column rotation (typically zero). # ~ GT(5) n-s pixel resolution / pixel height (negative value for a north-up image). - + Examples ---------- dataset = gdal_raster_open(filename) geotransform=gdal_get_geotransform(dataset) """ - + transform = dataset.GetGeoTransform() - geotransform={'xleft':transform[0],'xres':transform[1], 'ytop':transform[3], 'yres':-transform[5]} - - return geotransform + geotransform = { + "xleft": transform[0], + "xres": transform[1], + "ytop": transform[3], + "yres": -transform[5], + } + return geotransform -def gdal_smash_window_from_geotransform(geotransform,smash_mesh): +def gdal_smash_window_from_geotransform(geotransform, smash_mesh): """ Compute the dataset array window (from the geotransform) according the Smash mesh - + Parameters ---------- geotransform : geotransform computed from a gdal dataset smash_mesh : Smash mesh object model.mesh - + Returns ---------- window : Python dictionnary with componnents: row_off (offset), col_off (offset), nrows, ncols - + Examples ---------- dataset = gdal_raster_open(filename) geotransform=gdal_get_geotransform(dataset) window=gdal_smash_window_from_geotransform(geotransform,smash_mesh) """ - - col_off = (smash_mesh.xmin - geotransform['xleft']) / geotransform['xres'] - row_off = (geotransform['ytop'] - smash_mesh.ymax) / geotransform['yres'] - - #If smash_mesh.dx==geotransform['xres','yres'] no problem ! - #It works for case : smash_mesh.dx!=geotransform['xres','yres'] - #Do we must distinguish case smash_mesh.dx<=geotransform['xres','yres'] and smash_mesh.dx>geotransform['xres','yres'] ? i.e use ceiling or floor function instead of int ? - - window={"row_off":row_off,'col_off':col_off,'nrows':int(smash_mesh.nrow*smash_mesh.dx/geotransform['yres']),'ncols':int(smash_mesh.ncol*smash_mesh.dx/geotransform['xres'])} - + + col_off = (smash_mesh.xmin - geotransform["xleft"]) / geotransform["xres"] + row_off = (geotransform["ytop"] - smash_mesh.ymax) / geotransform["yres"] + + # If smash_mesh.dx==geotransform['xres','yres'] no problem ! + # It works for case : smash_mesh.dx!=geotransform['xres','yres'] + # Do we must distinguish case smash_mesh.dx<=geotransform['xres','yres'] and smash_mesh.dx>geotransform['xres','yres'] ? i.e use ceiling or floor function instead of int ? + + window = { + "row_off": row_off, + "col_off": col_off, + "nrows": int(smash_mesh.nrow * smash_mesh.dx / geotransform["yres"]), + "ncols": int(smash_mesh.ncol * smash_mesh.dx / geotransform["xres"]), + } + return window - -def union_bbox(bbox1,bbox2): + +def union_bbox(bbox1, bbox2): """ - Function which compute the bounding boxes union of 2 input bbox. It return the working bbox - + Function which compute the bounding boxes union of 2 input bbox. It return the working bbox + Parameters ---------- bbox1: dict containin the first bbox informations @@ -343,18 +396,18 @@ def union_bbox(bbox1,bbox2): ---------- returns dic containing the bbox union - + Examples ---------- dataset=gdal_raster_open(filename) possible_bbox=union_bbox(bbox,bbox_dataset) """ - left=max(bbox1['left'],bbox2['left']) - bottom=max(bbox1['bottom'],bbox2['bottom']) - right=min(bbox1['right'],bbox2['right']) - top=min(bbox1['top'],bbox2['top']) - if (left0): - xtics=xtics*dt - - if date_range is not None: - xtics = np.arange(np.datetime64(date_range[0]),np.datetime64(date_range[1]), np.timedelta64(int(date_range[2]), 's')) - - if (len(columns)>0): - for i in columns: - ax.plot(xtics[:],data[i,:],color=color,label=label,ls=linestyle,lw=linewidth,marker=marker,markersize=markersize) - else: - for i in range(0,data.shape[0],step): - ax.plot(xtics[:],data[i,:],label=label,ls=linestyle,lw=linewidth,marker=marker,markersize=markersize) - - if (ylim[0]!=None): - ax.set_ylim(bottom=ylim[0]) - if (ylim[1]!=None): - ax.set_ylim(top=ylim[1]) - if (xlim[0]!=None): - ax.set_xlim(left=xlim[0]) - if (xlim[1]!=None): - ax.set_xlim(right=xlim[1]) - - ax.axes.grid(True,alpha=.7, ls="--") - if (legend): - ax.legend(loc='upper left') - else: - ax.legend(loc='upper left').set_visible(False) - - if (len(figname)>0): - fig.savefig(figname, transparent=False, dpi=80, bbox_inches="tight") - # ~ else: - # ~ fig.show() - - plot=[fig,ax] - - return plot - - - -def save_figure(fig,figname="myfigure",xsize=8,ysize=6,transparent=False,dpi=80): - fig.set_size_inches(xsize, ysize, forward=True) - fig.savefig(figname, transparent=transparent, dpi=dpi, bbox_inches="tight") - - -def save_figure_from_plot(plot,figname="myfigure",xsize=8,ysize=6,transparent=False,dpi=80,xlim=[None,None],ylim=[None,None]): - - fig=plot[0] - ax=plot[1] - - if (ylim[0]!=None): - ax.set_ylim(bottom=ylim[0]) - if (ylim[1]!=None): - ax.set_ylim(top=ylim[1]) - if (xlim[0]!=None): - ax.set_xlim(left=xlim[0]) - if (xlim[1]!=None): - ax.set_xlim(right=xlim[1]) - - fig.set_size_inches(xsize, ysize, forward=True) - fig.savefig(figname, transparent=transparent, dpi=dpi, bbox_inches="tight") - - - -def plot_matrix(matrix,mask=None,figname="",title="",label="",vmin=None,vmax=None): - - fig, ax = plt.subplots() - ax.set_title(title) - - if mask is not None: - ma = (mask == 0) - ma_var = np.where(ma, np.nan, matrix) - else: - ma_var=matrix - - map_var = ax.imshow(ma_var,vmin=vmin,vmax=vmax); - fig.colorbar(map_var, ax=ax, label=label,shrink=0.75); - - plot=[fig,ax] - - if (len(figname)>0): - fig.savefig(figname, transparent=False, dpi=80, bbox_inches="tight") - else: - fig.show() - - plot=[fig,ax] - - return plot - - - -def plot_image(matrice=np.zeros(shape=(2,2)),bbox=None,title="",xlabel="",ylabel="",zlabel="",vmin=None,vmax=None,mask=None,figname=""): - """ - Function for plotting a matrix as an image - - Parameters - ---------- - matrice : numpy array - bbox : ["left","right","bottom","top"] bouding box to put x and y coordinates instead of the shape of the matrix - title : character, title of the plot - xlabel : character, label of the xaxis - ylabel : character, label of the y axis - zlabel : character, label of the z axis - vmin: real, minimum z value - vmax: real, maximum z value - mask: integer, matrix, shape of matice, contain 0 for pixels that should not be plotted - show: booloen, true call fig.show() or false return fig instead. - - Examples - ---------- - smash.utils.plot_image(mesh_france['drained_area'],bbox=bbox,title="Surfaces drainées",xlabel="Longitude",ylabel="Latitude",zlabel="Surfaces drainées km^2",vmin=0.0,vmax=1000,mask=mesh_france['global_active_cell']) - - """ - - matrice=np.float32(matrice) - - if (type(bbox)!=type(None)): - extend=[bbox["left"],bbox["right"],bbox["bottom"],bbox["top"]]#bbox.values() - else: - extend=None - - if (type(mask)!=type(None)): - matrice[np.where(mask==0)]=np.nan - - # ~ color_matrice=matrice - # ~ if vmax!=None: - # ~ color_matrice[np.where(matrice>vmax)]=vmax - # ~ if vmin!=None: - # ~ color_matrice[np.where(matrice0): - fig.savefig(figname, transparent=False, dpi=80, bbox_inches="tight") - else: - fig.show() - - - - - - -def plot_model_params_and_states(model,variables,fstates=False): - - if not isinstance(variables, list): - raise ValueError( - f"variables '{variables}' must be list of parameters or states names" - ) - - nb_subplot=len(variables) - if (nb_subplot>1): - nb_rows=math.ceil(math.sqrt(nb_subplot)) - nb_cols=math.ceil(nb_subplot/nb_rows) - #nb_cols=nb_subplot- math.floor(math.sqrt(nb_subplot)) - else: - nb_rows=1 - nb_cols=1 - - print(nb_rows,nb_cols) - fig, ax = plt.subplots(nb_rows, nb_cols) - - if len(variables)==1: - ax = [ax] - - fig.suptitle(f'Optimized parameter set') - - for i,var in enumerate(variables): - - rr=(i+1)/(nb_cols) - part_entiere=math.floor(rr) - part_reel=rr-part_entiere - - if part_reel>0: - r=max(0,part_entiere) - else: - r=max(0,part_entiere-1) - - if (part_reel==0.): - c=nb_cols-1 - else: - c=math.ceil((part_reel)*(nb_cols))-1 - - #r=math.ceil(i/(nb_cols)) - #c=(r*nb_cols-i) - print(i,r,c) - - if isinstance(model,dict): - - for key,list_param in smash.core._constant.STRUCTURE_PARAMETERS.items(): - - if var in list_param: - - values=model["parameters"][var] - break - - for key,list_states in smash.core._constant.STRUCTURE_STATES.items(): - - if var in list_states: - - if fstates==True : - values=model["output"]["fstates"][var] - else: - values=model["states"][var] - - break - ma = (model["mesh"]["active_cell"] == 0) - - else: - - if var in model.setup._parameters_name: - - values=getattr(model.parameters,var) - - if var in model.setup._states_name: - - if fstates: - values=getattr(model.output.states,var) - else: - values=getattr(model.states,var) - - ma = (model.mesh.active_cell == 0) - - ma_var = np.where(ma, np.nan, values) - - map_var = ax[r,c].imshow(ma_var); - fig.colorbar(map_var, ax=ax[r,c], label=var,shrink=0.75); - - plot=[fig,ax] - return plot - - - -def plot_lcurve(instance,figname=None,transform=False,annotate=True,plot=None): - - if not isinstance(instance,dict): - raise ValueError( - f"instance must be a dict" - ) - - if plot is not None: - fig=plot[0] - ax=plot[1] - else: - fig,ax=plt.subplots() - - if "wjreg_lcurve_opt" in instance: - pass - else: - return plot - - if (transform==True): - - jobs_max=np.zeros(shape=len(instance["cost_jobs"])) - jobs_max[:]=instance["cost_jobs_initial"] - - jobs_max[:]=max(instance["cost_jobs"]) - - jobs_min=min(instance["cost_jobs"]) - jreg_max=max(instance["cost_jreg"]) - - #index_min=np.where(instance["cost_jobs"] == jobs_min) - - #choose the lower value of jreg if index_min has many values - #index_jreg_max=list(instance["cost_jreg"]).index(min(instance["cost_jreg"][index_min[0]])) - #jreg_max=instance["cost_jreg"][index_jreg_max] - - - jreg_min=np.zeros(shape=len(instance["cost_jreg"])) - #jreg_min[:]=instance["cost_jreg_initial"] - #si cost_jreg_initial > 0 then prendre : - jreg_min[:]=min(instance["cost_jreg"]) - - go_plot=False - if (np.all((jobs_max[0]-jobs_min)>0.)) and (np.all((jreg_max-jreg_min[0])>0.)): - xs=(jobs_max-instance["cost_jobs"])/(jobs_max[0]-jobs_min) - ys=(instance["cost_jreg"]-jreg_min)/(jreg_max-jreg_min[0]) - go_plot=True - - - # ~ if (np.all((jreg_max-jreg_min[0])>0.)): - # ~ ys=(instance["cost_jreg"]-jreg_min)/(jreg_max-jreg_min[0]) - - # ~ #plot lcurve - if (go_plot): - ax.plot(xs,ys, ls="--", marker="x", color="grey"); - - # zip joins x and y coordinates in pairs - i=0 - for x,y in zip(xs,ys): - - label="" - textcolor="black" - point_type="." - ax.plot(x,y, color=textcolor,marker=point_type,markersize=5); - - if (instance["wjreg"][i]==instance["wjreg_lcurve_opt"]): - textcolor="red" - point_type="o" - ax.plot(x,y, color=textcolor,marker=point_type,markersize=8); - - label = "{:.2E}".format(instance["wjreg_lcurve_opt"]) - - #print(instance["wjreg"][i],instance["wjreg_fast"]) - - go_plot=False - if (instance["wjreg"][i]==instance["wjreg_fast"]): - go_plot=True - elif (abs(1.-instance["wjreg"][i]/instance["wjreg_fast"])<0.0001): - go_plot=True - - if (go_plot) : - textcolor="green" - point_type="^" - ax.plot(x,y, color=textcolor,marker=point_type,markersize=8); - - if annotate: - ax.annotate(label, # this is the text - (x,y), # these are the coordinates to position the label - textcoords="offset points", # how to position the text - xytext=(0,5), # distance from text to points (x,y) - ha='right', # horizontal alignment can be left, right or center - color=textcolor, fontsize=10) - - i=i+1 - - ax.plot([0,1],[0,1],color="red") - - else: - - ax.plot(instance["cost_jobs"],instance["cost_jreg"], ls="--", marker="x",color="grey"); - - # zip joins x and y coordinates in pairs - i=0 - for x,y in zip(instance["cost_jobs"],instance["cost_jreg"]): - - label = "{:.2E}".format(instance["wjreg"][i]) - textcolor="black" - - if (instance["wjreg"][i]==instance["wjreg_lcurve_opt"]): - textcolor="red" - - if (abs(1.-instance["wjreg"][i]/instance["wjreg_fast"])<0.0001): - textcolor="green" - - if annotate: - ax.annotate(label, # this is the text - (x,y), # these are the coordinates to position the label - textcoords="offset points", # how to position the text - xytext=(0,5), # distance from text to points (x,y) - ha='right', # horizontal alignment can be left, right or center - color=textcolor, fontsize=10) - - i=i+1 - - - ax.set_xlabel("(jobs_max-jobs)/(jobs_max_jobs_min)"); - ax.set_ylabel("(jreg-jreg_min)/(jreg_max-jreg_min)"); - - if figname is not None: - fig.savefig(figname, transparent=False, dpi=80, bbox_inches="tight") - - plot=[fig,ax] - return plot - - -def plot_dist_wjreg(res_assim): - - fig,ax=plt.subplots() - x=list() - y=list() - for key,values in res_assim.items(): - lcurve=values["lcurve"] - color="black" - point_type="." - markersize=6 - if (lcurve["wjreg_lcurve_opt"] is not None) and (lcurve["wjreg_fast"] is not None) and (lcurve["wjreg_lcurve_opt"] >0.) and (lcurve["wjreg_fast"] >0.): - x.append(float(lcurve["wjreg_lcurve_opt"])) - y.append(float(lcurve["wjreg_fast"])) - - xn=np.log(np.array(x)) - yn=np.log(np.array(y)) - - ax.scatter(np.array(xn),np.array(yn), color=color,marker=point_type) - ax.set_xlabel("log(wjreg) - Lcurve method"); - ax.set_ylabel("log(wjreg) - Fast method"); - - min_val=min(min(xn),min(yn)) - max_val=max(max(xn),max(yn)) - x=np.arange(min_val,max_val+1) - y=np.arange(min_val,max_val+1) - ax.plot(x,y, color="red",marker=None,markersize=markersize) - - return fig,ax - - - - -def plot_mesh(model=None,mesh=None,title=None,figname=None,coef_hydro=99.): - - if model is not None: - if isinstance(mesh_in,smash.Model): - mesh=model.mesh - else: - raise ValueError( - f"model object must be an instance of smash Model" - ) - elif mesh is not None: - if isinstance(mesh,dict): - pass - else: - raise ValueError( - f"mesh must be a dict" - ) - else: - raise ValueError( - f"model or mesh are mandatory and must be a dict or a smash Model object" - ) - - mesh["active_cell"] - gauge=mesh["gauge_pos"] - stations=mesh["code"] - flow_acc=mesh["flwacc"] - - na = (mesh["active_cell"] == 0) - - flow_accum_bv = np.where(na, 0., flow_acc.data) - surfmin=(1.-coef_hydro/100.)*np.max(flow_accum_bv) - mask_flow=(flow_accum_bv < surfmin) - flow_plot=np.where(mask_flow, np.nan,flow_accum_bv.data) - flow_plot=np.where(na, np.nan,flow_plot) - - fig, ax = plt.subplots() - - if title is not None: - ax.set_title(title) - - active_cell = np.where(na, np.nan, mesh["active_cell"]) - #cmap = ListedColormap(["grey", "lightgray"]) - cmap = ListedColormap([ "lightgray"]) - ax.imshow(active_cell,cmap=cmap) - - #cmap = ListedColormap(["lightblue","blue","darkblue"]) - myblues = matplotlib.colormaps['Blues'] - cmp = ListedColormap(myblues(np.linspace(0.30, 1.0, 265))) - im=ax.imshow(flow_plot,cmap=cmp) - #im=ax.imshow(flow_plot,cmap="Blues") - - fig.colorbar(im,cmap="Blues", ax=ax, label="Cumulated surface (km²)",shrink=0.75); - - - for i in range(len(stations)): - coord=gauge[i] - code=stations[i] - ax.plot(coord[1],coord[0], color="green",marker='o',markersize=6) - ax.annotate(code, # this is the text - (coord[1],coord[0]), # these are the coordinates to position the label - textcoords="offset points", # how to position the text - xytext=(0,5), # distance from text to points (x,y) - ha='right', # horizontal alignment can be left, right or center - color="red", - fontsize=10) - - if figname is not None: - fig.savefig(figname, transparent=False, dpi=80, bbox_inches="tight") - - return fig,ax - - - - -def plot_event_seg(model,event_seg,code=''): - event_seg_sta_aval = event_seg[(event_seg['code'] == code)] - - dti = pd.date_range(start=model.setup.start_time, end=model.setup.end_time, freq="H")[1:] - qo = model.input_data.qobs[0, :] - prcp = model.input_data.mean_prcp[0, :] - starts = pd.to_datetime(event_seg_sta_aval["start"]) - ends = pd.to_datetime(event_seg_sta_aval["end"]) - - fig, (ax1, ax2) = plt.subplots(2, 1) - fig.subplots_adjust(hspace=0) - ax1.bar(dti, prcp, color="lightslategrey", label="Rainfall"); - - ax1.axvspan(starts[0], ends[0], alpha=.1, color="red", label="Event segmentation"); - for i in range(1,len(starts)): - ax1.axvspan(starts[i], ends[i], alpha=.1, color="red"); - ax1.axvspan(starts[i], ends[i], alpha=.1, color="red"); - - ax1.grid(alpha=.7, ls="--") - ax1.get_xaxis().set_visible(False) - ax1.set_ylabel("$mm$"); - ax1.invert_yaxis() - ax2.plot(dti, qo, label="Observed discharge"); - for i in range(0,len(starts)): - ax2.axvspan(starts[i], ends[i], alpha=.1, color="red"); - - ax2.grid(alpha=.7, ls="--") - ax2.tick_params(axis="x", labelrotation=20) - ax2.set_ylabel("$m^3/s$"); - ax2.set_xlim(ax1.get_xlim()); - fig.legend(); - fig.suptitle("V5014010"); - - return fig - - diff --git a/testing/functions_smash_time.py b/testing/functions_smash_time.py deleted file mode 100644 index 190da1b9..00000000 --- a/testing/functions_smash_time.py +++ /dev/null @@ -1,181 +0,0 @@ -import numpy as np -import math -import datetime - - - -def duration_to_timedelta(duration:list|int|float): - - if isinstance(duration,int|float): - delta_t=datetime.timedelta(seconds=duration) - elif isinstance(duration,list): - - if (isinstance(duration[0],str)): - unit=duration[0] - time=duration[1] - elif (isinstance(duration[1],str)): - unit=duration[1] - time=duration[0] - else: - raise ValueError( - f"duration '{duration}' must contain a str and and integer" - ) - - if isinstance(time,int|float): - - if (unit=="seconds") | (unit=="s"): - delta_t=datetime.timedelta(seconds=time) - elif (unit=="minutes") | (unit=="min"): - delta_t=datetime.timedelta(minutes=time) - elif (unit=="hours") | (unit=="h"): - delta_t=datetime.timedelta(hours=time) - elif (unit=="days") | (unit=="d"): - delta_t=datetime.timedelta(days=time) - else: - raise ValueError( - f"duration unit '{unit}' must be an str. Possible values: (seconds|s) (minutes|min) (hours|h) (days|d)" - ) - else: - raise ValueError( - f"duration value '{time}' must be an integer or float." - ) - else: - raise ValueError( - f"duration '{duration}' must be a list or an integer or float." - ) - - return delta_t - - -def to_datetime(time=''): - if isinstance(time,str): - return datetime.datetime.fromisoformat(time) - else: - raise ValueError( - f"time '{time}' must be a instance of str." - ) - - -def to_datestring(date): - return date.strftime("%Y-%m-%d %H:%M") - - -def dict_filter_by_date(in_dict,t_start=None,t_end=None): - - out_res=dict() - - if t_start is not None: - t_s=datetime.datetime.fromisoformat(t_start) - - if t_end is not None: - t_e=datetime.datetime.fromisoformat(t_end) - - for key,value in in_dict.items(): - - date_simu=datetime.datetime.fromisoformat(key) - - if t_start is None: - t_s=date_simu - - if t_end is None: - t_e=date_simu - - if (date_simu>=t_s) and (date_simu<=t_e): - out_res.update({key:value}) - - return out_res - - -def stringdecode(self): - """ - Decode characters from a array of integer: Usefull when you try to access to a array of string in the object model. - """ - return self.tobytes(order='F').decode('utf-8').split() - - - -#date and time functions -def date_to_path(date, format_schapi=True): - """ - Convert the SMASH date format to a path for searching rainfall - - Parameters - ---------- - date : integer representing a date with the format %Y%m%d%H%M%S - - Returns - ---------- - path : string representing the path /year/month/day/ - - Examples - ---------- - date_to_path(date.strftime('%Y%m%d%H%M') - /%Y/%m/%d/ - """ - year=date[0:4] - month=date[4:6] - day=date[6:8] - - if format_schapi: - - ret = os.sep + year + os.sep + month + os.sep + day + os.sep - - else: - - ret = os.sep + year + os.sep + month + os.sep - - return ret - - -def decompose_date(date): - """ - Split a SMASH date - - Parameters - ---------- - date : integer representing a date with the format %Y%m%d%H%M%S - - Returns - ---------- - year,month,day,hour,minute : integers each part of the date (seconds not included) - - Examples - ---------- - year,month,day,hour,minute=decompose_date(date.strftime('%Y%m%d%H%M') - """ - year=date[0:4] - month=date[4:6] - day=date[6:8] - hour=date[8:10] - minute=date[10:13] - return year,month,day,hour,minute - - -def date_range(self): - """ - Generate a Panda date list according the smash model setup - - Parameters - ---------- - self : object model - - Returns - ---------- - date_list: a Panda list of date from self.setup.date_deb to self.setup.date_prv - - Examples - ---------- - model = smash.Model(configuration='Data/Real_case/configuration.txt') - date_list=date_range(model) - """ - delta_t=datetime.timedelta(seconds=self.setup.dt) - - year,month,day,hour,minute=decompose_date(self.setup.date_deb.decode()) - date_start = datetime.datetime(int(year),int(month),int(day),int(hour),int(minute))+delta_t - - year,month,day,hour,minute=decompose_date(self.setup.date_prv.decode()) - date_end = datetime.datetime(int(year),int(month),int(day),int(hour),int(minute)) - - date_list=pandas.date_range(date_start,date_end,freq=delta_t) - return date_list - diff --git a/testing/test-desag_rainfall.py b/testing/test-desag_rainfall.py deleted file mode 100644 index aa975c8e..00000000 --- a/testing/test-desag_rainfall.py +++ /dev/null @@ -1,57 +0,0 @@ -import smash -import numpy as np -import matplotlib.pyplot as plt -from osgeo import gdal - -from functions_smash_plot import * -from functions_smash_time import * - -setup,mesh=smash.load_dataset('cance') -model=smash.Model(setup,mesh) -plot=plot_discharges(model) -plot[0].show() - -#test the model with smaller dx -mesh["dx"]=500. -model_desag=smash.Model(setup,mesh) - -index=np.where(model.input_data.mean_prcp[0,:]>10)[0] - -plot_matrix(model.input_data.prcp[:,:,index[0]],mask=model.mesh.active_cell,figname='rainfall_dx1000.png',title="rainfall dx=1000m",vmin=0.0,vmax=25) -plot_matrix(model_desag.input_data.prcp[:,:,index[0]],mask=model_desag.mesh.active_cell,figname='rainfall_dx500.png',title="rainfall dx=500m",vmin=0.0,vmax=25) #compare the upper-left corner with the previous figure, it is like the catchment is half sized from the upper-left corner - - - -#tiff filename seleciton -date='201410100600' -YY,MM,DD,hh,mm=decompose_date(date) -filename=f'/home/maxime/DassHydro-Dev/smash/smash/dataset/Cance/prcp/{YY}/{MM}/{DD}/rain_precipitation_{date}_{date}.tif' - -#test the function globally -model.mesh.dx=500. -array=smash.core.raster.read_windowed_raster_gdal(filename=filename, smash_mesh=model.mesh, band=1, lacuna=-99.) - -#manually test all action of the function, test to crop the array before with vsimem fsystem (I expected to be faster but it not) -dataset =smash.core.raster.gdal_raster_open(filename) -geotransform=smash.core.raster.gdal_get_geotransform(dataset) -window=smash.core.raster.gdal_smash_window_from_geotransform(geotransform,model.mesh) -dataset=gdal.Translate('/vsimem/raster.tif', dataset, srcWin=[window['col_off'], window['row_off'], window["ncols"], window["nrows"]]) -dataset=smash.core.raster.gdal_reproject_raster(dataset,model.mesh.dx,model.mesh.dx) -geotransform=smash.core.raster.gdal_get_geotransform(dataset) -window=smash.core.raster.gdal_smash_window_from_geotransform(geotransform,model.mesh) -array=smash.core.raster.gdal_crop_dataset_to_array(dataset=dataset,window=window,band=1,lacuna=-99.) - - -#Test the reprojection -dataset = smash.core.raster.gdal_raster_open(filename) -dataset_500=smash.core.raster.gdal_reproject_raster(dataset,500,500) -dataset_2000=smash.core.raster.gdal_reproject_raster(dataset,2000,2000) -array=dataset.GetRasterBand(1).ReadAsArray() -array_500=dataset_500.GetRasterBand(1).ReadAsArray() -array_2000=dataset_2000.GetRasterBand(1).ReadAsArray() - -maxval=np.max(array) -plot_matrix(array,figname='grid_rainfall_dx1000.png',title="rainfall dx=1000m",vmin=0.0,vmax=maxval) -plot_matrix(array_500,figname='grid_rainfall_dx500.png',title="rainfall dx=500m",vmin=0.0,vmax=maxval) -plot_matrix(array_2000,figname='grid_rainfall_dx2000.png',title="rainfall dx=2000m",vmin=0.0,vmax=maxval) -#Notice grid_rainfall_dx1000.png grid_rainfall_dx500.png are very similar but the size is different, grid_rainfall_dx2000.png has the same aspect but the rainfall is averaged for each cell 2x2 km From cfc66e9af7d9c11ab61b6230d418e32d96ad1840 Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Wed, 21 Jun 2023 16:50:22 +0200 Subject: [PATCH 36/73] Fix comments for sphynx Add .rst file for the documentation --- doc/source/api_reference/raster.rst | 26 +++++++++++++++ smash/core/raster.py | 51 ++++++++++++++++------------- 2 files changed, 54 insertions(+), 23 deletions(-) create mode 100644 doc/source/api_reference/raster.rst diff --git a/doc/source/api_reference/raster.rst b/doc/source/api_reference/raster.rst new file mode 100644 index 00000000..9cd49b4d --- /dev/null +++ b/doc/source/api_reference/raster.rst @@ -0,0 +1,26 @@ +.. _api_reference.io: + +====== +Raster +====== + +.. currentmodule:: smash.core.raster + +Some functions to manipulate raster files +***************************************** +.. autosummary:: + :toctree: smash/ + + gdal_raster_open + read_windowed_raster_gdal + gdal_reproject_raster + gdal_crop_dataset_to_array + gdal_crop_dataset_to_ndarray + gdal_write_dataset + gdal_get_geotransform + gdal_smash_window_from_geotransform + union_bbox + get_bbox + get_bbox_from_window + get_window_from_bbox + crop_array diff --git a/smash/core/raster.py b/smash/core/raster.py index e6e9ad20..d0f5ae7d 100644 --- a/smash/core/raster.py +++ b/smash/core/raster.py @@ -27,14 +27,14 @@ def gdal_raster_open(filename): filename : string, path to a file Returns - ---------- + ------- dataset : gdal object Examples - ---------- + -------- dataset = gdal_raster_open("filename") """ - dataset = object() + if os.path.isfile(filename): dataset = gdal.Open(filename) else: @@ -57,11 +57,11 @@ def read_windowed_raster_gdal( lacuna: float64 replacing the Nodata value Returns - ---------- + ------- array : np.array or np.ndarray storing one or all different data, stored in filename, sliced compare to the mesh boundingbox Examples - ---------- + -------- array=read_windowed_raster_gdal("filename", model.mesh) """ dataset = gdal_raster_open(filename) @@ -109,11 +109,11 @@ def gdal_reproject_raster(dataset, xres, yres): yres: resolution in the y direction (rows) in meters Returns - ---------- + ------- virtual_destination : a virtual gdal raster object at the new resolution Examples - ---------- + -------- new_dataset=gdal_reproject_raster(dataset,smash_mesh.cellsize,smash_mesh.cellsize) """ @@ -187,11 +187,11 @@ def gdal_crop_dataset_to_array(dataset=object(), window={}, band=1, lacuna=None) lacuna: None or float64 Returns - ---------- + ------- sliced_array : an array Examples - ---------- + -------- window=gdal_smash_window_from_geotransform(dataset,smash_mesh) array=gdal_crop_dataset_to_array(dataset,window,band=1) """ @@ -225,7 +225,7 @@ def gdal_crop_dataset_to_ndarray(dataset=object(), window={}, lacuna=None): lacuna: None or float64 Returns - ---------- + ------- dictionnary : a dictionary with ndarrays (depending the number of bands) Examples @@ -268,11 +268,11 @@ def gdal_write_dataset(filename, dataset, format="Gtiff"): format: optional, raster format, default is Gtiff Returns - ---------- + ------- none Examples - ---------- + -------- virtual_dataset=gdal_reproject_raster(dataset,500.,500.) gdal_write_dataset('outfile',virtual_dataset) """ @@ -320,7 +320,7 @@ def gdal_get_geotransform(dataset): dataset : gdal object from gdal.Open() Returns - ---------- + ------- geotransform : Python dictionnary # ~ A GeoTransform consists in a set of 6 coefficients: @@ -358,7 +358,7 @@ def gdal_smash_window_from_geotransform(geotransform, smash_mesh): smash_mesh : Smash mesh object model.mesh Returns - ---------- + ------- window : Python dictionnary with componnents: row_off (offset), col_off (offset), nrows, ncols Examples @@ -393,12 +393,13 @@ def union_bbox(bbox1, bbox2): ---------- bbox1: dict containin the first bbox informations bbox2 : dict containin the second bbox informations - ---------- + returns + ------- dic containing the bbox union Examples - ---------- + -------- dataset=gdal_raster_open(filename) possible_bbox=union_bbox(bbox,bbox_dataset) """ @@ -420,12 +421,13 @@ def get_bbox(dataset): Parameters ---------- dataset: gdal object - ---------- + returns + ------- dic containing the bbox of the dataset Examples - ---------- + -------- dataset=gdal_raster_open(filename) bbox_dataset=get_bbox(dataset) """ @@ -447,12 +449,13 @@ def get_bbox_from_window(dataset, window): ---------- dataset: gdal object window : dict with ncols, nrows, col offset and row offset - ---------- + returns + ------- dic containing the computed bbox Examples - ---------- + -------- dataset=gdal_raster_open(filename) bbox_dataset=get_bbox(dataset) window=get_window_from_bbox(dataset,bbox_dataset) @@ -477,12 +480,13 @@ def get_window_from_bbox(dataset, bbox): ---------- dataset: gdal object bbox : dict containing the bbox - ---------- + returns + ------- dic containing the computed windows Examples - ---------- + -------- dataset=gdal_raster_open(filename) bbox_dataset=get_bbox(dataset) window=get_window_from_bbox(dataset,bbox_dataset) @@ -519,8 +523,9 @@ def crop_array(array, window): ---------- array: numpy array window : dict containg the window to crop - ---------- + returns + ------- crop_array: the cropped numpy array, shape of the defined window """ From fe23c1f5ff46f7b030ea4e737abf836d6c17b48f Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Wed, 21 Jun 2023 17:30:33 +0200 Subject: [PATCH 37/73] rename raster -> raster_handler.py move raster_handler.py -> smash.tools fix docstrings upper to lower case some variables in raster_handler.py --- .../{raster.rst => raster_handler.rst} | 10 +-- setup.py | 1 + smash/core/_read_input_data.py | 10 +-- smash/tools/__init__.py | 1 + .../raster.py => tools/raster_handler.py} | 70 +++++++++---------- 5 files changed, 46 insertions(+), 46 deletions(-) rename doc/source/api_reference/{raster.rst => raster_handler.rst} (78%) create mode 100644 smash/tools/__init__.py rename smash/{core/raster.py => tools/raster_handler.py} (89%) diff --git a/doc/source/api_reference/raster.rst b/doc/source/api_reference/raster_handler.rst similarity index 78% rename from doc/source/api_reference/raster.rst rename to doc/source/api_reference/raster_handler.rst index 9cd49b4d..6e00091a 100644 --- a/doc/source/api_reference/raster.rst +++ b/doc/source/api_reference/raster_handler.rst @@ -1,10 +1,10 @@ .. _api_reference.io: -====== -Raster -====== +============== +Raster handler +============== -.. currentmodule:: smash.core.raster +.. currentmodule:: smash.tools.raster_handler Some functions to manipulate raster files ***************************************** @@ -12,7 +12,7 @@ Some functions to manipulate raster files :toctree: smash/ gdal_raster_open - read_windowed_raster_gdal + gdal_read_windowed_raster gdal_reproject_raster gdal_crop_dataset_to_array gdal_crop_dataset_to_ndarray diff --git a/setup.py b/setup.py index ab5a0835..63aeba08 100644 --- a/setup.py +++ b/setup.py @@ -15,6 +15,7 @@ "smash.solver", "smash.mesh", "smash.io", + "smash.tools", "smash.dataset", "smash.tests", "smash.tests.core", diff --git a/smash/core/_read_input_data.py b/smash/core/_read_input_data.py index 337ab83c..6cc8ba26 100644 --- a/smash/core/_read_input_data.py +++ b/smash/core/_read_input_data.py @@ -1,6 +1,6 @@ from __future__ import annotations -from smash.core.raster import read_windowed_raster_gdal +from smash.tools.raster_handler import gdal_read_windowed_raster from smash.core.utils import sparse_matrix_to_vector @@ -120,7 +120,7 @@ def _read_prcp(setup: SetupDT, mesh: MeshDT, input_data: Input_DataDT): else: matrix = ( - read_windowed_raster_gdal( + gdal_read_windowed_raster( filename=files[ind], smash_mesh=mesh, band=1, lacuna=-99.0 ) * setup.prcp_conversion_factor @@ -189,7 +189,7 @@ def _read_pet(setup: SetupDT, mesh: MeshDT, input_data: Input_DataDT): subset_date_range = date_range[ind_day] matrix = ( - read_windowed_raster_gdal( + gdal_read_windowed_raster( filename=files[ind], smash_mesh=mesh, band=1, lacuna=-99.0 ) * setup.pet_conversion_factor @@ -236,7 +236,7 @@ def _read_pet(setup: SetupDT, mesh: MeshDT, input_data: Input_DataDT): else: matrix = ( - read_windowed_raster_gdal( + gdal_read_windowed_raster( filename=files[ind], smash_mesh=mesh, band=1, lacuna=-99.0 ) * setup.pet_conversion_factor @@ -269,6 +269,6 @@ def _read_descriptor(setup: SetupDT, mesh: MeshDT, input_data: Input_DataDT): ) else: - input_data.descriptor[..., i] = read_windowed_raster_gdal( + input_data.descriptor[..., i] = gdal_read_windowed_raster( filename=path[0], smash_mesh=mesh, band=1, lacuna=-99.0 ) diff --git a/smash/tools/__init__.py b/smash/tools/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/smash/tools/__init__.py @@ -0,0 +1 @@ + diff --git a/smash/core/raster.py b/smash/tools/raster_handler.py similarity index 89% rename from smash/core/raster.py rename to smash/tools/raster_handler.py index d0f5ae7d..a230522a 100644 --- a/smash/core/raster.py +++ b/smash/tools/raster_handler.py @@ -2,16 +2,13 @@ import numpy as np from osgeo import gdal - import os import errno from typing import TYPE_CHECKING if TYPE_CHECKING: - from smash.solver._mwd_setup import SetupDT from smash.solver._mwd_mesh import MeshDT - from smash.solver._mwd_input_data import Input_DataDT ### GDAL RASTER FUNCTIONS @@ -43,7 +40,7 @@ def gdal_raster_open(filename): return dataset -def read_windowed_raster_gdal( +def gdal_read_windowed_raster( filename: str, smash_mesh: MeshDT, band=None, lacuna=None ) -> np.ndarray: """ @@ -62,7 +59,7 @@ def read_windowed_raster_gdal( Examples -------- - array=read_windowed_raster_gdal("filename", model.mesh) + array=gdal_read_windowed_raster("filename", model.mesh) """ dataset = gdal_raster_open(filename) @@ -73,7 +70,7 @@ def read_windowed_raster_gdal( ): # Attempt to generate a smaller dataset before doing the reprojection. However, it is slower.. # ~ window=gdal_smash_window_from_geotransform(geotransform,smash_mesh) - # ~ dataset=gdal.Translate('/vsimem/raster.tif', dataset, srcWin=[window['col_off'], window['row_off'], window["ncols"], window["nrows"]]) + # ~ dataset=gdal.Translate('/vsimem/raster.tif', dataset, srcWin=[window['col_off'], window['row_off'], window["ncol"], window["nrow"]]) dataset = gdal_reproject_raster(dataset, smash_mesh.dx, smash_mesh.dx) geotransform = gdal_get_geotransform(dataset) @@ -86,7 +83,7 @@ def read_windowed_raster_gdal( window = gdal_smash_window_from_geotransform(geotransform, smash_mesh) - if band == None: + if band is None: array = gdal_crop_dataset_to_ndarray( dataset=dataset, window=window, lacuna=lacuna ) @@ -132,15 +129,15 @@ def gdal_reproject_raster(dataset, xres, yres): # Do we must distinguish cases smash_mesh.dx<=geotransform['xres','yres'] and smash_mesh.dx>geotransform['xres','yres'] ? i.e use ceiling or floor function instead of int ? # At least it work for case smash_mesh.dx<=geotransform['xres','yres'] which is the moste common case for modelling. - New_X_Size = int(dataset.RasterXSize * geotransform["xres"] / xres) - New_Y_Size = int(dataset.RasterYSize * geotransform["yres"] / yres) + new_x_size = int(dataset.RasterXSize * geotransform["xres"] / xres) + new_y_size = int(dataset.RasterYSize * geotransform["yres"] / yres) in_memory_dataset = gdal.GetDriverByName("MEM") virtual_destination = in_memory_dataset.Create( "", - New_X_Size, - New_Y_Size, + new_x_size, + new_y_size, dataset.RasterCount, dataset.GetRasterBand(1).DataType, ) @@ -148,13 +145,14 @@ def gdal_reproject_raster(dataset, xres, yres): ########################################################### # Workaround for gdal bug which initialise array to 0 instead as the No_Data value # Here we initialise the band manually with the nodata_value + nodata = dataset.GetRasterBand(1).GetNoDataValue() band = virtual_destination.GetRasterBand( 1 ) # Notice that band is a pointer to virtual_destination - band.SetNoDataValue(-9999) - Nodataarray = np.ndarray(shape=(New_Y_Size, New_X_Size)) - Nodataarray.fill(-9999.0) - band.WriteArray(Nodataarray) + band.SetNoDataValue(nodata) + nodataarray = np.ndarray(shape=(new_y_size, new_x_size)) + nodataarray.fill(nodata) + band.WriteArray(nodataarray) ########################################################### virtual_destination.SetGeoTransform(new_dataset_geotranform) @@ -199,15 +197,15 @@ def gdal_crop_dataset_to_array(dataset=object(), window={}, band=1, lacuna=None) dataset_band = dataset.GetRasterBand(band) sliced_array = dataset_band.ReadAsArray( - window["col_off"], window["row_off"], window["ncols"], window["nrows"] + window["col_off"], window["row_off"], window["ncol"], window["nrow"] ) array_float = sliced_array.astype("float64") # Lacuna treatment here if isinstance(lacuna, float): - Nodata = dataset_band.GetNoDataValue() - mask = np.where(sliced_array == Nodata) + nodata = dataset_band.GetNoDataValue() + mask = np.where(sliced_array == nodata) array_float[mask] = lacuna return array_float @@ -229,7 +227,7 @@ def gdal_crop_dataset_to_ndarray(dataset=object(), window={}, lacuna=None): dictionnary : a dictionary with ndarrays (depending the number of bands) Examples - ---------- + -------- window=gdal_smash_window_from_geotransform(dataset,smash_mesh) array=gdal_crop_dataset_to_array(dataset,window) """ @@ -240,15 +238,15 @@ def gdal_crop_dataset_to_ndarray(dataset=object(), window={}, lacuna=None): dataset_band = dataset.GetRasterBand(index) sliced_array = dataset_band.ReadAsArray( - window["col_off"], window["row_off"], window["ncols"], window["nrows"] + window["col_off"], window["row_off"], window["ncol"], window["nrow"] ) array_float = sliced_array.astype("float64") # Lacuna treatment here if isinstance(lacuna, float): - Nodata = dataset_band.GetNoDataValue() - mask = np.where(sliced_array == Nodata) + nodata = dataset_band.GetNoDataValue() + mask = np.where(sliced_array == nodata) array_float[mask] = lacuna dictionnary.update({index: array_float}) @@ -332,7 +330,7 @@ def gdal_get_geotransform(dataset): # ~ GT(5) n-s pixel resolution / pixel height (negative value for a north-up image). Examples - ---------- + -------- dataset = gdal_raster_open(filename) geotransform=gdal_get_geotransform(dataset) """ @@ -359,10 +357,10 @@ def gdal_smash_window_from_geotransform(geotransform, smash_mesh): Returns ------- - window : Python dictionnary with componnents: row_off (offset), col_off (offset), nrows, ncols + window : Python dictionnary with componnents: row_off (offset), col_off (offset), nrow, ncol Examples - ---------- + -------- dataset = gdal_raster_open(filename) geotransform=gdal_get_geotransform(dataset) window=gdal_smash_window_from_geotransform(geotransform,smash_mesh) @@ -378,8 +376,8 @@ def gdal_smash_window_from_geotransform(geotransform, smash_mesh): window = { "row_off": row_off, "col_off": col_off, - "nrows": int(smash_mesh.nrow * smash_mesh.dx / geotransform["yres"]), - "ncols": int(smash_mesh.ncol * smash_mesh.dx / geotransform["xres"]), + "nrow": int(smash_mesh.nrow * smash_mesh.dx / geotransform["yres"]), + "ncol": int(smash_mesh.ncol * smash_mesh.dx / geotransform["xres"]), } return window @@ -448,7 +446,7 @@ def get_bbox_from_window(dataset, window): Parameters ---------- dataset: gdal object - window : dict with ncols, nrows, col offset and row offset + window : dict with ncol, nrow, col offset and row offset returns ------- @@ -464,9 +462,9 @@ def get_bbox_from_window(dataset, window): """ geotransform = gdal_get_geotransform(dataset) left = geotransform["xleft"] + window["col_off"] * geotransform["xres"] - right = left + window["ncols"] * geotransform["xres"] + right = left + window["ncol"] * geotransform["xres"] top = geotransform["ytop"] - window["row_off"] * geotransform["yres"] - bottom = top - window["nrows"] * geotransform["yres"] + bottom = top - window["nrow"] * geotransform["yres"] bbox = {"left": left, "bottom": bottom, "right": right, "top": top} return bbox @@ -496,8 +494,8 @@ def get_window_from_bbox(dataset, bbox): col_off = (bbox["left"] - geotransform["xleft"]) / geotransform["xres"] row_off = (geotransform["ytop"] - bbox["top"]) / geotransform["yres"] - ncols = (bbox["right"] - bbox["left"]) / geotransform["xres"] - nrows = (bbox["top"] - bbox["bottom"]) / geotransform["yres"] + ncol = (bbox["right"] - bbox["left"]) / geotransform["xres"] + nrow = (bbox["top"] - bbox["bottom"]) / geotransform["yres"] if (col_off < 0) or (row_off < 0): raise Exception( @@ -507,8 +505,8 @@ def get_window_from_bbox(dataset, bbox): window = { "row_off": int(row_off), "col_off": int(col_off), - "nrows": int(nrows), - "ncols": int(ncols), + "nrow": int(nrow), + "ncol": int(ncol), } return window @@ -530,7 +528,7 @@ def crop_array(array, window): """ crop_array[ - window["col_off"] : window["col_off"] + window["ncols"], - window["row_off"] : window["row_off"] + window["nrows"], + window["col_off"] : window["col_off"] + window["ncol"], + window["row_off"] : window["row_off"] + window["nrow"], ] return crop_array From 2b05f54290f1c9216675656bde7fe054c6116675 Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Wed, 21 Jun 2023 17:42:30 +0200 Subject: [PATCH 38/73] Fix doc summary for raster_handler --- doc/source/api_reference/index.rst | 3 ++- doc/source/api_reference/raster_handler.rst | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/api_reference/index.rst b/doc/source/api_reference/index.rst index feb7a57a..8c23e3f8 100644 --- a/doc/source/api_reference/index.rst +++ b/doc/source/api_reference/index.rst @@ -20,6 +20,7 @@ Core Python io generate_samples sparse_storage + raster_handler Wrapped Fortran @@ -28,4 +29,4 @@ Wrapped Fortran :maxdepth: 1 derived_type - optimize_routines \ No newline at end of file + optimize_routines diff --git a/doc/source/api_reference/raster_handler.rst b/doc/source/api_reference/raster_handler.rst index 6e00091a..3fde7a31 100644 --- a/doc/source/api_reference/raster_handler.rst +++ b/doc/source/api_reference/raster_handler.rst @@ -1,4 +1,4 @@ -.. _api_reference.io: +.. _api_reference.raster_handler: ============== Raster handler From 38b1bf39d5e8bc3260d8b23ff0ee6c22b42f3318 Mon Sep 17 00:00:00 2001 From: maximejay Date: Thu, 22 Jun 2023 10:08:50 +0200 Subject: [PATCH 39/73] Update doc/source/release/0.5.0-notes.rst Co-authored-by: Francois Colleoni <110899888+inoelloc@users.noreply.github.com> --- doc/source/release/0.5.0-notes.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/release/0.5.0-notes.rst b/doc/source/release/0.5.0-notes.rst index 717d890b..8b56b22e 100644 --- a/doc/source/release/0.5.0-notes.rst +++ b/doc/source/release/0.5.0-notes.rst @@ -39,8 +39,7 @@ New Features Spatial desagragation of the input raster ***************************************** -If the resolution of the input raster is diffrent of the resolution of the model mesh, the input raster are automatically reprojected by gdal. In that case the reading of the input can be slower. For best performances, it can be usful to preprocess the input files (precipitations). Some functions available in smash.core.raster could help for pre-processing. - +If the resolution of the input raster is different from the resolution of the model mesh, the input rasters are automatically reprojected by gdal. In that case the reading of the input can be slower. For best performances, it can be useful to preprocess the input files (precipitations). See the :ref:`API Reference ` section. ----- Fixes ----- From c5e71755e686dc86523981025f71f112047086db Mon Sep 17 00:00:00 2001 From: maximejay Date: Thu, 22 Jun 2023 10:09:08 +0200 Subject: [PATCH 40/73] Update doc/source/release/0.5.0-notes.rst Co-authored-by: Francois Colleoni <110899888+inoelloc@users.noreply.github.com> --- doc/source/release/0.5.0-notes.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/release/0.5.0-notes.rst b/doc/source/release/0.5.0-notes.rst index 8b56b22e..1a13bc98 100644 --- a/doc/source/release/0.5.0-notes.rst +++ b/doc/source/release/0.5.0-notes.rst @@ -36,8 +36,8 @@ Improvements New Features ------------ -Spatial desagragation of the input raster -***************************************** +Spatial disaggregation/aggregation of the input raster +****************************************************** If the resolution of the input raster is different from the resolution of the model mesh, the input rasters are automatically reprojected by gdal. In that case the reading of the input can be slower. For best performances, it can be useful to preprocess the input files (precipitations). See the :ref:`API Reference ` section. ----- From 46dcb411bf876b8848dfcd0999ba02c57a757823 Mon Sep 17 00:00:00 2001 From: maximejay Date: Thu, 22 Jun 2023 10:09:19 +0200 Subject: [PATCH 41/73] Update smash/core/_read_input_data.py Co-authored-by: Francois Colleoni <110899888+inoelloc@users.noreply.github.com> --- smash/core/_read_input_data.py | 1 - 1 file changed, 1 deletion(-) diff --git a/smash/core/_read_input_data.py b/smash/core/_read_input_data.py index 6cc8ba26..5d0aa77f 100644 --- a/smash/core/_read_input_data.py +++ b/smash/core/_read_input_data.py @@ -19,7 +19,6 @@ import pandas as pd import numpy as np import datetime -from osgeo import gdal def _read_qobs(setup: SetupDT, mesh: MeshDT, input_data: Input_DataDT): From 305d74611bf3aa3af42e2dcb709fb47dd676888f Mon Sep 17 00:00:00 2001 From: inoelloc Date: Thu, 22 Jun 2023 10:45:11 +0200 Subject: [PATCH 42/73] FIX: Add new line before release note title --- doc/source/release/0.5.0-notes.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/release/0.5.0-notes.rst b/doc/source/release/0.5.0-notes.rst index 072cb2c8..e1f96e8c 100644 --- a/doc/source/release/0.5.0-notes.rst +++ b/doc/source/release/0.5.0-notes.rst @@ -40,6 +40,7 @@ Spatial disaggregation/aggregation of the input raster ****************************************************** If the resolution of the input raster is different from the resolution of the model mesh, the input rasters are automatically reprojected by gdal. In that case the reading of the input can be slower. For best performances, it can be useful to preprocess the input files (precipitations). See the :ref:`API Reference ` section. + ----- Fixes ----- @@ -57,4 +58,4 @@ Event signatures computation The bug related to the computation of flood event signatures has been resolved for specific cases where the peak event is observed during the last time steps in the time window. -See issue `#28 `__. \ No newline at end of file +See issue `#28 `__. From c54eb372222f7d215bcce73b66b1c7b6f0537f7a Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Tue, 27 Jun 2023 10:27:48 +0200 Subject: [PATCH 43/73] Fix missing quote in venv-install when installing gdal Fix missing variable $env-name in venv-path --- venv_install.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/venv_install.sh b/venv_install.sh index 77b53e03..a13bce8a 100755 --- a/venv_install.sh +++ b/venv_install.sh @@ -32,7 +32,7 @@ if [ ! -d ${venv_path}/${env_name} ] ; then #creating a python environment and activate it python3 -m venv "${venv_path}/.venv-${env_name}" - ln "${venv_path}/.venv-smash/bin/activate" "${venv_path}/${env_name}" + ln "${venv_path}/.venv-${env_name}/bin/activate" "${venv_path}/${env_name}" source ${venv_path}/${env_name} #install minimal python dependencies @@ -43,7 +43,7 @@ if [ ! -d ${venv_path}/${env_name} ] ; then pip install wheel #manually intalling gdal, because it depends on the version of the installed system library - pip install GDAL<=$(gdal-config --version) --global-option=build_ext --global-option="-I/usr/include/gdal" + pip install "GDAL<=$(gdal-config --version)" --global-option=build_ext --global-option="-I/usr/include/gdal" echo '' echo 'Building Smash...' From 81554537cd2732a6be088864b897d803beb88383 Mon Sep 17 00:00:00 2001 From: inoelloc Date: Tue, 27 Jun 2023 16:21:47 +0200 Subject: [PATCH 44/73] FIX: Fix smash.generate_mesh segmentation fault This commit should fix a segmentation fault when two neighboring cells have antagonistic flow directions. --- doc/source/release/0.5.0-notes.rst | 12 +++++++++++- smash/mesh/mw_meshing.f90 | 10 +++++++--- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/doc/source/release/0.5.0-notes.rst b/doc/source/release/0.5.0-notes.rst index e1f96e8c..77038716 100644 --- a/doc/source/release/0.5.0-notes.rst +++ b/doc/source/release/0.5.0-notes.rst @@ -39,7 +39,9 @@ New Features Spatial disaggregation/aggregation of the input raster ****************************************************** -If the resolution of the input raster is different from the resolution of the model mesh, the input rasters are automatically reprojected by gdal. In that case the reading of the input can be slower. For best performances, it can be useful to preprocess the input files (precipitations). See the :ref:`API Reference ` section. +If the resolution of the input raster is different from the resolution of the model mesh, the input rasters are automatically reprojected by gdal. In that case the reading of the input can be slower. For best performances, it can be useful to preprocess the input files (precipitations). + +See API Reference section :ref:`api_reference.raster_handler`. ----- Fixes @@ -59,3 +61,11 @@ Event signatures computation The bug related to the computation of flood event signatures has been resolved for specific cases where the peak event is observed during the last time steps in the time window. See issue `#28 `__. + +``smash.generate_mesh`` segmentation fault +****************************************** + +An error occured when two neighboring cells have antagonistic flow directions ``(1, 5)``, ``(2, 6)``, ``(3, 7)``, ``(4, 8)``. This should be corrected directly in the flow direction file but to avoid +segmentation faults when the maximum number of recursions has been reached, a check is added to the code to exit recursion in that case. + +See issue `#31 `__. diff --git a/smash/mesh/mw_meshing.f90 b/smash/mesh/mw_meshing.f90 index e555e180..d566d2c6 100644 --- a/smash/mesh/mw_meshing.f90 +++ b/smash/mesh/mw_meshing.f90 @@ -27,10 +27,14 @@ recursive subroutine mask_upstream_cells(nrow, ncol, flwdir, row, col, mask) if (row_imd .gt. 0 .and. row_imd .le. nrow .and. & col_imd .gt. 0 .and. col_imd .le. ncol) then - if (flwdir(row_imd, col_imd) .eq. i) then + if (abs(flwdir(row, col) - flwdir(row_imd, col_imd)) .ne. 4) then - call mask_upstream_cells(nrow, ncol, flwdir, row_imd, & - & col_imd, mask) + if (flwdir(row_imd, col_imd) .eq. i) then + + call mask_upstream_cells(nrow, ncol, flwdir, row_imd, & + & col_imd, mask) + + end if end if From 7c7aeab38e44958441d352d7199a7c471defb7d6 Mon Sep 17 00:00:00 2001 From: maximejay Date: Mon, 3 Jul 2023 10:11:08 +0200 Subject: [PATCH 45/73] Update doc/source/release/0.5.0-notes.rst Co-authored-by: Francois Colleoni <110899888+inoelloc@users.noreply.github.com> --- doc/source/release/0.5.0-notes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/release/0.5.0-notes.rst b/doc/source/release/0.5.0-notes.rst index cb598c58..5ce2245a 100644 --- a/doc/source/release/0.5.0-notes.rst +++ b/doc/source/release/0.5.0-notes.rst @@ -32,7 +32,7 @@ The other two attributes, ``data`` and ``lcurve``, are still available and can b Improvements ------------ -Reading precipitation with YY/MM/DD access +Reading atmospheric data with YYYY/MM/DD access ****************************************** This mode is triggered by the flag setup.prcp_yymmdd_access=True. The precipitation file suppose to be stored in a directory YY/MM/dd. This option is useful of the model is ran time step by time step (many incremental runs). In that case searching the precipitation files can be relatively slow (1 second multiplicate by the number of run). With this mode it is optimized and it is faster. From a3ce431e982c47c4d1b2844b36de872d4e276eba Mon Sep 17 00:00:00 2001 From: maximejay Date: Mon, 3 Jul 2023 10:11:29 +0200 Subject: [PATCH 46/73] Update doc/source/release/0.5.0-notes.rst Co-authored-by: Francois Colleoni <110899888+inoelloc@users.noreply.github.com> --- doc/source/release/0.5.0-notes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/release/0.5.0-notes.rst b/doc/source/release/0.5.0-notes.rst index 5ce2245a..c8ab7509 100644 --- a/doc/source/release/0.5.0-notes.rst +++ b/doc/source/release/0.5.0-notes.rst @@ -35,7 +35,7 @@ Improvements Reading atmospheric data with YYYY/MM/DD access ****************************************** -This mode is triggered by the flag setup.prcp_yymmdd_access=True. The precipitation file suppose to be stored in a directory YY/MM/dd. This option is useful of the model is ran time step by time step (many incremental runs). In that case searching the precipitation files can be relatively slow (1 second multiplicate by the number of run). With this mode it is optimized and it is faster. +This mode is triggered by enabling the flag prcp_yyyymmdd_access in the model setup file. The atmospheric data files are supposed to be stored in a directory YYYY/MM/dd. This option is useful if the model is ran time step by time step (many incremental runs). In that case searching the atmospheric data files can be relatively slow (1 second multiplicate by the number of runs). With this mode it is optimized and it is faster. ------------ New Features From 230ac99a76f4d7bf8be705b22217c007e6e907ec Mon Sep 17 00:00:00 2001 From: maximejay Date: Mon, 3 Jul 2023 10:11:42 +0200 Subject: [PATCH 47/73] Update doc/source/release/0.5.0-notes.rst Co-authored-by: Francois Colleoni <110899888+inoelloc@users.noreply.github.com> --- doc/source/release/0.5.0-notes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/release/0.5.0-notes.rst b/doc/source/release/0.5.0-notes.rst index c8ab7509..9e48d983 100644 --- a/doc/source/release/0.5.0-notes.rst +++ b/doc/source/release/0.5.0-notes.rst @@ -44,7 +44,7 @@ New Features New regularization function **************************** -hard-smoothing : the smoothing regularization function is applied on parameter or states directly. This behavior differ from the "smoothing" mode where the regularization is applied on the difference between the background and the control (parameters or states) +hard-smoothing : the smoothing regularization function is applied on parameters or states directly. This behavior differs from the ``smoothing`` mode where the regularization is applied on the difference between the background and the control (parameters or states) New functions for reading and writting hdf5 files ************************************************* From df8a68769f4773e9871afc4ed5374a603c397f99 Mon Sep 17 00:00:00 2001 From: maximejay Date: Mon, 3 Jul 2023 10:12:14 +0200 Subject: [PATCH 48/73] Update doc/source/release/0.5.0-notes.rst Co-authored-by: Francois Colleoni <110899888+inoelloc@users.noreply.github.com> --- doc/source/release/0.5.0-notes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/release/0.5.0-notes.rst b/doc/source/release/0.5.0-notes.rst index 9e48d983..1a9d1201 100644 --- a/doc/source/release/0.5.0-notes.rst +++ b/doc/source/release/0.5.0-notes.rst @@ -49,7 +49,7 @@ hard-smoothing : the smoothing regularization function is applied on parameters New functions for reading and writting hdf5 files ************************************************* -The new function are generic. You can save a dictionary to an hdf5, save an object (not only smash) to an hdf5, read an object as dictionary, read an hdf5 as a dict, read an hdf5 as a smash model object. Functions are provided by smash.io.hdf5_io.py. hdf5 can be opened in read-only to provide several simultaneous access. During the export or the reading, the structure of the dictionnary or object are preserved. When saving an object or a dictionnary in an hdf5, the location can be specified so that dictionary or object can be saved side by side at different places. +The new functions are generic. You can save a dictionary to an hdf5, save an object (not only smash) to an hdf5, read an object as dictionary, read an hdf5 as a dict, read an hdf5 as a smash model object. Functions are provided by smash.io.hdf5_io.py. hdf5 can be opened in read-only to provide several simultaneous access. During the export or the reading, the structure of the dictionary or object is preserved. When saving an object or a dictionary in an hdf5, the location can be specified so that dictionary or object can be saved side by side at different places. Spatial disaggregation/aggregation of the input raster ****************************************************** From 445283cf91034d66563a0cb70bc1f15865f96a8b Mon Sep 17 00:00:00 2001 From: maximejay Date: Mon, 3 Jul 2023 10:12:39 +0200 Subject: [PATCH 49/73] Update smash/io/hdf5_io.py Co-authored-by: Francois Colleoni <110899888+inoelloc@users.noreply.github.com> --- smash/io/hdf5_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smash/io/hdf5_io.py b/smash/io/hdf5_io.py index b8a2a2de..75b80ad4 100644 --- a/smash/io/hdf5_io.py +++ b/smash/io/hdf5_io.py @@ -419,7 +419,7 @@ def dump_object_to_hdf5_from_dict_attribute(hdf5,instance,dict_attr): else : raise ValueError( - f"Bad type of '{attr}' in '{dict_attr}'. Dict({attr}) must be a instance of dict, list or str" + f"inconsistent '{attr}' in '{dict_attr}'. Dict({attr}) must be a instance of dict, list or str" ) else: From 55c6a101b1f63485117ed6432e1652605ee9554b Mon Sep 17 00:00:00 2001 From: maximejay Date: Mon, 3 Jul 2023 10:21:36 +0200 Subject: [PATCH 50/73] Update smash/solver/derived_type/mwd_setup.f90 Co-authored-by: Francois Colleoni <110899888+inoelloc@users.noreply.github.com> --- smash/solver/derived_type/mwd_setup.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smash/solver/derived_type/mwd_setup.f90 b/smash/solver/derived_type/mwd_setup.f90 index 540573db..69daf929 100644 --- a/smash/solver/derived_type/mwd_setup.f90 +++ b/smash/solver/derived_type/mwd_setup.f90 @@ -15,7 +15,7 @@ !% ``qobs_directory`` Observed discharge directory path (default: '...') !% ``read_prcp`` Read precipitation (default: .false.) !% ``prcp_format`` Precipitation format (default: 'tif') -!% ``prcp_yymmdd_access`` Access with absolute path (YY/MM/dd/) (default: .false.) +!% ``prcp_yyyymmdd_access`` Access with absolute path (YYYY/MM/dd/) (default: .false.) !% ``prcp_conversion_factor`` Precipitation conversion factor (default: 1) !% ``prcp_directory`` Precipiation directory path (default: '...') !% ``read_pet`` Reap potential evapotranspiration (default: .false.) From 41252d0e7001ccc0caf6bd0ef20ed76dfd3db2fb Mon Sep 17 00:00:00 2001 From: maximejay Date: Mon, 3 Jul 2023 10:22:08 +0200 Subject: [PATCH 51/73] Update smash/solver/derived_type/mwd_setup.f90 Co-authored-by: Francois Colleoni <110899888+inoelloc@users.noreply.github.com> --- smash/solver/derived_type/mwd_setup.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smash/solver/derived_type/mwd_setup.f90 b/smash/solver/derived_type/mwd_setup.f90 index 69daf929..85b8a34c 100644 --- a/smash/solver/derived_type/mwd_setup.f90 +++ b/smash/solver/derived_type/mwd_setup.f90 @@ -124,7 +124,7 @@ module mwd_setup logical :: read_prcp = .false. character(lchar) :: prcp_format = "tif" !>f90w-char - logical :: prcp_yymmdd_access = .false. + logical :: prcp_yyyymmdd_access = .false. real(sp) :: prcp_conversion_factor = 1._sp character(lchar) :: prcp_directory = "..." !>f90w-char From 05e19ab571d9ab6f9712c88d62afd087d30b032e Mon Sep 17 00:00:00 2001 From: maximejay Date: Mon, 3 Jul 2023 10:45:04 +0200 Subject: [PATCH 52/73] Update smash/core/_read_input_data.py Co-authored-by: Francois Colleoni <110899888+inoelloc@users.noreply.github.com> --- smash/core/_read_input_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smash/core/_read_input_data.py b/smash/core/_read_input_data.py index b6cf2288..e4ef8e44 100644 --- a/smash/core/_read_input_data.py +++ b/smash/core/_read_input_data.py @@ -155,7 +155,7 @@ def _read_prcp(setup: SetupDT, mesh: MeshDT, input_data: Input_DataDT): freq=f"{int(setup.dt)}s", )[1:] - if setup.prcp_yymmdd_access==True : + if setup.prcp_yyyymmdd_access: files=_list_prcp_file(setup) From 9f8ce9bb39b1d88d9da796656a5309e0bfa86cb6 Mon Sep 17 00:00:00 2001 From: maximejay Date: Mon, 3 Jul 2023 11:04:04 +0200 Subject: [PATCH 53/73] Update smash/io/hdf5_io.py Co-authored-by: Francois Colleoni <110899888+inoelloc@users.noreply.github.com> --- smash/io/hdf5_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smash/io/hdf5_io.py b/smash/io/hdf5_io.py index 75b80ad4..d87b818e 100644 --- a/smash/io/hdf5_io.py +++ b/smash/io/hdf5_io.py @@ -77,7 +77,7 @@ def open_hdf5(path, read_only=False, replace=False): else: - if replace==True: + if replace: f= h5py.File(path, "w") From bd0c3050141321cda5571c76634c6547164a36a0 Mon Sep 17 00:00:00 2001 From: maximejay Date: Mon, 3 Jul 2023 11:04:41 +0200 Subject: [PATCH 54/73] Update smash/io/hdf5_io.py Co-authored-by: Francois Colleoni <110899888+inoelloc@users.noreply.github.com> --- smash/io/hdf5_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smash/io/hdf5_io.py b/smash/io/hdf5_io.py index d87b818e..5bbc2fa0 100644 --- a/smash/io/hdf5_io.py +++ b/smash/io/hdf5_io.py @@ -366,7 +366,7 @@ def dump_object_to_hdf5_from_list_attribute(hdf5,instance,list_attr): else: raise ValueError( - f"unconsistant {attr} in {list_attr}. {attr} must be a an instance of dict, list or str" + f"inconsistent {attr} in {list_attr}. {attr} must be a an instance of dict, list or str" ) else: From 66b1fbe124367da413718ffadf2a59b97d89d655 Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Mon, 3 Jul 2023 11:06:05 +0200 Subject: [PATCH 55/73] Rename some functions with underscore --- smash/io/hdf5_io.py | 66 ++++++++++++++++++++++----------------------- 1 file changed, 32 insertions(+), 34 deletions(-) diff --git a/smash/io/hdf5_io.py b/smash/io/hdf5_io.py index 75b80ad4..5b0cbba6 100644 --- a/smash/io/hdf5_io.py +++ b/smash/io/hdf5_io.py @@ -129,7 +129,7 @@ def add_hdf5_sub_group(hdf5, subgroup=None): return hdf5 -def generate_light_smash_object_structure(structure: str,structure_parameters=STRUCTURE_PARAMETERS,structure_states=STRUCTURE_STATES): +def _generate_light_smash_object_structure(structure: str,structure_parameters=STRUCTURE_PARAMETERS,structure_states=STRUCTURE_STATES): """ this function create a light dictionnary containing the required data-structure to save a smash model object to an hdf5 file @@ -164,7 +164,7 @@ def generate_light_smash_object_structure(structure: str,structure_parameters=ST -def generate_medium_smash_object_structure(structure: str,structure_parameters=STRUCTURE_PARAMETERS,structure_states=STRUCTURE_STATES): +def _generate_medium_smash_object_structure(structure: str,structure_parameters=STRUCTURE_PARAMETERS,structure_states=STRUCTURE_STATES): """ this function create a medium dictionnary containing the required data-structure to save a smash model object to an hdf5 file @@ -204,7 +204,7 @@ def generate_medium_smash_object_structure(structure: str,structure_parameters=S } -def generate_full_smash_object_structure(instance): +def _generate_full_smash_object_structure(instance): """ this function create a full dictionnary containing all the structure of an smash model object in order to save it to an hdf5 @@ -227,8 +227,6 @@ def generate_full_smash_object_structure(instance): return key_list - - def generate_object_structure(instance): """ this function create a full dictionnary containing all the structure of an object in order to save it to an hdf5 @@ -319,22 +317,22 @@ def generate_smash_object_structure(instance,typeofstructure="medium"): if typeofstructure=="light": - key_data=generate_light_smash_object_structure(structure) + key_data=_generate_light_smash_object_structure(structure) elif typeofstructure=="medium": - key_data=generate_medium_smash_object_structure(structure) + key_data=_generate_medium_smash_object_structure(structure) elif typeofstructure=="full": - key_data=generate_full_smash_object_structure(instance) + key_data=_generate_full_smash_object_structure(instance) return key_data -def dump_object_to_hdf5_from_list_attribute(hdf5,instance,list_attr): +def _dump_object_to_hdf5_from_list_attribute(hdf5,instance,list_attr): """ dump a object to a hdf5 file from a list of attributes @@ -353,15 +351,15 @@ def dump_object_to_hdf5_from_list_attribute(hdf5,instance,list_attr): if isinstance(attr, str): - dump_object_to_hdf5_from_str_attribute(hdf5, instance, attr) + _dump_object_to_hdf5_from_str_attribute(hdf5, instance, attr) elif isinstance(attr,list): - dump_object_to_hdf5_from_list_attribute(hdf5, instance, attr) + _dump_object_to_hdf5_from_list_attribute(hdf5, instance, attr) elif isinstance(attr,dict): - dump_object_to_hdf5_from_dict_attribute(hdf5, instance, attr) + _dump_object_to_hdf5_from_dict_attribute(hdf5, instance, attr) else: @@ -377,7 +375,7 @@ def dump_object_to_hdf5_from_list_attribute(hdf5,instance,list_attr): -def dump_object_to_hdf5_from_dict_attribute(hdf5,instance,dict_attr): +def _dump_object_to_hdf5_from_dict_attribute(hdf5,instance,dict_attr): """ dump a object to a hdf5 file from a dictionary of attributes @@ -406,15 +404,15 @@ def dump_object_to_hdf5_from_dict_attribute(hdf5,instance,dict_attr): if isinstance(value,dict): - dump_object_to_hdf5_from_dict_attribute(hdf5[attr], sub_instance, value) + _dump_object_to_hdf5_from_dict_attribute(hdf5[attr], sub_instance, value) if isinstance(value,list): - dump_object_to_hdf5_from_list_attribute(hdf5[attr], sub_instance, value) + _dump_object_to_hdf5_from_list_attribute(hdf5[attr], sub_instance, value) elif isinstance(value,str): - dump_object_to_hdf5_from_str_attribute(hdf5[attr], sub_instance, value) + _dump_object_to_hdf5_from_str_attribute(hdf5[attr], sub_instance, value) else : @@ -430,7 +428,7 @@ def dump_object_to_hdf5_from_dict_attribute(hdf5,instance,dict_attr): -def dump_object_to_hdf5_from_str_attribute(hdf5,instance,str_attr): +def _dump_object_to_hdf5_from_str_attribute(hdf5,instance,str_attr): """ dump a object to a hdf5 file from a string attribute @@ -488,7 +486,7 @@ def dump_object_to_hdf5_from_str_attribute(hdf5,instance,str_attr): -def dump_object_to_hdf5_from_iteratable(hdf5, instance, iteratable=None): +def _dump_object_to_hdf5_from_iteratable(hdf5, instance, iteratable=None): """ dump a object to a hdf5 file from a iteratable object list or dict @@ -510,20 +508,20 @@ def dump_object_to_hdf5_from_iteratable(hdf5, instance, iteratable=None): hdf5=smash.io.multi_model_io.open_hdf5("./model.hdf5", replace=True) hdf5=smash.io.multi_model_io.add_hdf5_sub_group(hdf5, subgroup="model1") keys_data=smash.io.multi_model_io.generate_smash_object_structure(model,typeofstructure="medium") - smash.io.multi_model_io.dump_object_to_hdf5_from_iteratable(hdf5["model1"], model, keys_data) + smash.io.multi_model_io._dump_object_to_hdf5_from_iteratable(hdf5["model1"], model, keys_data) hdf5=smash.io.multi_model_io.open_hdf5("./model.hdf5", replace=False) hdf5=smash.io.multi_model_io.add_hdf5_sub_group(hdf5, subgroup="model2") keys_data=smash.io.multi_model_io.generate_smash_object_structure(model,typeofstructure="light") - smash.io.multi_model_io.dump_object_to_hdf5_from_iteratable(hdf5["model2"], model, keys_data) + smash.io.multi_model_io._dump_object_to_hdf5_from_iteratable(hdf5["model2"], model, keys_data) """ if isinstance(iteratable,list): - dump_object_to_hdf5_from_list_attribute(hdf5,instance,iteratable) + _dump_object_to_hdf5_from_list_attribute(hdf5,instance,iteratable) elif isinstance(iteratable,dict): - dump_object_to_hdf5_from_dict_attribute(hdf5,instance,iteratable) + _dump_object_to_hdf5_from_dict_attribute(hdf5,instance,iteratable) else : @@ -533,7 +531,7 @@ def dump_object_to_hdf5_from_iteratable(hdf5, instance, iteratable=None): -def dump_dict_to_hdf5(hdf5,dictionary): +def _dump_dict_to_hdf5(hdf5,dictionary): """ dump a dictionary to an hdf5 file @@ -553,7 +551,7 @@ def dump_dict_to_hdf5(hdf5,dictionary): if isinstance(value,(dict)): hdf5=add_hdf5_sub_group(hdf5, subgroup=attr) - dump_dict_to_hdf5(hdf5[attr],value) + _dump_dict_to_hdf5(hdf5[attr],value) elif isinstance(value, (np.ndarray,list)): @@ -625,7 +623,7 @@ def save_dict_to_hdf5(path_to_hdf5,dictionary=None,location="./",replace=False): hdf5=open_hdf5(path_to_hdf5, replace=replace) hdf5=add_hdf5_sub_group(hdf5, subgroup=location) - dump_dict_to_hdf5(hdf5[location], dictionary) + _dump_dict_to_hdf5(hdf5[location], dictionary) else: @@ -660,11 +658,11 @@ def save_object_to_hdf5(f_hdf5, instance, keys_data=None, location="./", sub_dat hdf5=open_hdf5(f_hdf5, replace=replace) hdf5=add_hdf5_sub_group(hdf5, subgroup=location) - dump_object_to_hdf5_from_iteratable(hdf5[location], instance, keys_data) + _dump_object_to_hdf5_from_iteratable(hdf5[location], instance, keys_data) if isinstance(sub_data,dict): - dump_dict_to_hdf5(hdf5[location], sub_data) + _dump_dict_to_hdf5(hdf5[location], sub_data) hdf5.close() @@ -716,15 +714,15 @@ def save_smash_model_to_hdf5(path_to_hdf5, instance, keys_data=None, content="me """ if content == "light": - keys_data=generate_light_smash_object_structure(instance.setup.structure) + keys_data=_generate_light_smash_object_structure(instance.setup.structure) elif content == "medium": - keys_data=generate_medium_smash_object_structure(instance.setup.structure) + keys_data=_generate_medium_smash_object_structure(instance.setup.structure) elif content == "full": - keys_data=generate_full_smash_object_structure(instance) + keys_data=_generate_full_smash_object_structure(instance) if isinstance(keys_data,(dict,list)): @@ -773,7 +771,7 @@ def load_hdf5_file(f_hdf5,as_model=False): else: hdf5=open_hdf5(f_hdf5, read_only=True, replace=False) - dictionary=read_hdf5_to_dict(hdf5) + dictionary=read_hdf5_as_dict(hdf5) hdf5.close() return dictionary @@ -836,7 +834,7 @@ def read_object_as_dict(instance): -def read_hdf5_to_dict(hdf5): +def read_hdf5_as_dict(hdf5): """ Load an hdf5 file @@ -853,7 +851,7 @@ def read_hdf5_to_dict(hdf5): -------- #read only a part of an hdf5 file hdf5=smash.io.multi_model_io.open_hdf5("./multi_model.hdf5") - dictionary=smash.io.multi_model_io.read_hdf5_to_dict(hdf5["model1"]) + dictionary=smash.io.multi_model_io.read_hdf5_as_dict(hdf5["model1"]) dictionary.keys() """ dictionary={} @@ -862,7 +860,7 @@ def read_hdf5_to_dict(hdf5): if str(type(item)).find("group") != -1: - dictionary.update({key:read_hdf5_to_dict(item)}) + dictionary.update({key:read_hdf5_as_dict(item)}) list_attr=list(item.attrs.keys()) From 1b4ab066bf7cef8c3bbabefe30727aa2eb28be6f Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Mon, 3 Jul 2023 11:22:19 +0200 Subject: [PATCH 56/73] just keep save_model_to_hdf5 and load_hdf5 in __init__.py and __all__ --- hdf5_io_tests.py | 10 +++++++--- smash/__init__.py | 3 +-- smash/io/hdf5_io.py | 13 ++++--------- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/hdf5_io_tests.py b/hdf5_io_tests.py index e290cde6..fefadc20 100644 --- a/hdf5_io_tests.py +++ b/hdf5_io_tests.py @@ -10,7 +10,7 @@ smash.io.hdf5_io.save_dict_to_hdf5("saved_dictionary.hdf5",mesh) #generate the structure of the object: it is a dict of key:data to save: typeofstructure={light,medium,full} -keys_data=smash.generate_smash_object_structure(model,typeofstructure="medium") +keys_data=smash.io.hdf5_io.generate_smash_object_structure(model,typeofstructure="medium") print(keys_data) #add a new data to save: keys_data["parameters"].append('ci') @@ -64,12 +64,12 @@ hdf5=smash.io.hdf5_io.open_hdf5("./model_subgroup.hdf5", replace=True) hdf5=smash.io.hdf5_io.add_hdf5_sub_group(hdf5, subgroup="model1") keys_data=smash.io.hdf5_io.generate_smash_object_structure(model,typeofstructure="medium") -smash.io.hdf5_io.dump_object_to_hdf5_from_iteratable(hdf5["model1"], model, keys_data) +smash.io.hdf5_io._dump_object_to_hdf5_from_iteratable(hdf5["model1"], model, keys_data) hdf5=smash.io.hdf5_io.open_hdf5("./model_subgroup.hdf5", replace=False) hdf5=smash.io.hdf5_io.add_hdf5_sub_group(hdf5, subgroup="model2") keys_data=smash.io.hdf5_io.generate_smash_object_structure(model,typeofstructure="medium") -smash.io.hdf5_io.dump_object_to_hdf5_from_iteratable(hdf5["model2"], model, keys_data) +smash.io.hdf5_io._dump_object_to_hdf5_from_iteratable(hdf5["model2"], model, keys_data) hdf5.keys() hdf5["model1"].keys() @@ -103,4 +103,8 @@ model_reloaded model_reloaded.run() +#TODO : +#- model_reloaded need to be a full hdf5 !! How to test that ? +#- rename file to hdf5_handler.py and move it to tools/ +# move save_model_to_hdf5 and load_hdf5 to an other file dan io/ diff --git a/smash/__init__.py b/smash/__init__.py index 4bb602df..09e5d972 100644 --- a/smash/__init__.py +++ b/smash/__init__.py @@ -13,7 +13,7 @@ from smash.io.mesh_io import save_mesh, read_mesh from smash.io.model_io import save_model, read_model from smash.io.model_ddt_io import save_model_ddt, read_model_ddt -from smash.io.hdf5_io import save_smash_model_to_hdf5, load_hdf5_file, generate_smash_object_structure +from smash.io.hdf5_io import save_smash_model_to_hdf5, load_hdf5_file from smash.dataset.load import load_dataset @@ -47,7 +47,6 @@ def __getattr__(name): "read_model_ddt", "save_smash_model_to_hdf5", "load_hdf5_file", - "generate_smash_object_structure", "load_dataset", ] diff --git a/smash/io/hdf5_io.py b/smash/io/hdf5_io.py index bfde6508..181bfc44 100644 --- a/smash/io/hdf5_io.py +++ b/smash/io/hdf5_io.py @@ -1,15 +1,9 @@ from __future__ import annotations -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from smash.core.model import Model - from smash.core._constant import STRUCTURE_PARAMETERS, STRUCTURE_STATES from smash.io._error import ReadHDF5MethodError - from smash.solver._mwd_setup import SetupDT from smash.solver._mwd_mesh import MeshDT from smash.solver._mwd_input_data import Input_DataDT @@ -19,8 +13,6 @@ from smash.core._build_model import _build_mesh - - import os import errno import warnings @@ -29,9 +21,12 @@ import pandas as pd import smash +from typing import TYPE_CHECKING +if TYPE_CHECKING: + from smash.core.model import Model -__all__ = ["save_object_to_hdf5", "save_dict_to_hdf5", "save_smash_model_to_hdf5", "load_hdf5_file", "read_object_as_dict"] +__all__ = ["save_smash_model_to_hdf5", "load_hdf5_file"] From d79443cdfa8e99ce00b7fff824447b1eb4569d0c Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Wed, 5 Jul 2023 11:48:11 +0200 Subject: [PATCH 57/73] Fix : Test if the hdf5 contains the full smash object structure before loading as a smash model. If not the case, raise an explicit error. --- smash/io/hdf5_io.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/smash/io/hdf5_io.py b/smash/io/hdf5_io.py index 181bfc44..81ecec74 100644 --- a/smash/io/hdf5_io.py +++ b/smash/io/hdf5_io.py @@ -976,6 +976,11 @@ def read_hdf5_to_model_object(path: str) -> Model: if os.path.isfile(path): with h5py.File(path, "r") as f: + if not f.attrs.__contains__('_last_update'): + raise ValueError( + f'The hdf5 file {path} does not contain the full smash object structure and therefore cannot be loaded as a smash model object. The full structure of a smash model object can be saved using smash.save_smash_model_to_hdf5(filename, smash_model, content="full").' + ) + instance = smash.Model(None, None) if "descriptor_name" in f["setup"].keys(): From d01824031b5e7195a50c58b8b242c93fd958fe0e Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Wed, 5 Jul 2023 12:32:41 +0200 Subject: [PATCH 58/73] Split hdf5_io.py -> tools/hdf5_handler.py #generic function to handle hdf5 -> tools/object_handler.py #only handle object -> io/hdf5_io.py #only handle hdf5 <=> smash model object --- smash/io/hdf5_io.py | 649 +--------------------------------- smash/tools/hdf5_handler.py | 514 +++++++++++++++++++++++++++ smash/tools/object_handler.py | 127 +++++++ 3 files changed, 649 insertions(+), 641 deletions(-) create mode 100644 smash/tools/hdf5_handler.py create mode 100644 smash/tools/object_handler.py diff --git a/smash/io/hdf5_io.py b/smash/io/hdf5_io.py index 81ecec74..a71ed73a 100644 --- a/smash/io/hdf5_io.py +++ b/smash/io/hdf5_io.py @@ -13,6 +13,9 @@ from smash.core._build_model import _build_mesh +from smash.tools import hdf5_handler +from smash.tools import object_handler + import os import errno import warnings @@ -30,100 +33,6 @@ -def open_hdf5(path, read_only=False, replace=False): - """ - Open or create an HDF5 file. - - Parameters - ---------- - path : str - The file path. - read_only : boolean - If true the access to the hdf5 fil is in read-only mode. Multi process can read the same hdf5 file simulteneously. This is not possible when access mode are append 'a' or write 'w'. - replace: Boolean - If true, the existing hdf5file is erased - - Returns - ------- - f : - A HDF5 object. - - Examples - -------- - >>> hdf5=smash.io.multi_model_io.open_hdf5("./my_hdf5.hdf5") - >>> hdf5.keys() - >>> hdf5.attrs.keys() - """ - if not path.endswith(".hdf5"): - - path = path + ".hdf5" - - if read_only: - - if os.path.isfile(path): - - f= h5py.File(path, "r") - - else: - - raise ValueError( - f"File {path} does not exist." - ) - - else: - - if replace: - - f= h5py.File(path, "w") - - else: - - if os.path.isfile(path): - - f= h5py.File(path, "a") - - else: - - f= h5py.File(path, "w") - - return f - - - -def add_hdf5_sub_group(hdf5, subgroup=None): - """ - Create a new subgroup in a HDF5 object - - Parameters - ---------- - hdf5 : object - An hdf5 object opened with open_hdf5() - subgroup: str - Path to a subgroub that must be created - - Returns - ------- - hdf5 : - the HDF5 object. - - Examples - -------- - >>> hdf5=smash.io.multi_model_io.open_hdf5("./model_subgroup.hdf5", replace=True) - >>> hdf5=smash.io.multi_model_io.add_hdf5_sub_group(hdf5, subgroup="mygroup") - >>> hdf5.keys() - >>> hdf5.attrs.keys() - """ - if subgroup is not None: - - if subgroup=="": - - subgroup="./" - - hdf5.require_group(subgroup) - - return hdf5 - - def _generate_light_smash_object_structure(structure: str,structure_parameters=STRUCTURE_PARAMETERS,structure_states=STRUCTURE_STATES): """ this function create a light dictionnary containing the required data-structure to save a smash model object to an hdf5 file @@ -213,7 +122,7 @@ def _generate_full_smash_object_structure(instance): list : A list containing keys and dictionary matching the structure of the python object. """ - key_data=generate_object_structure(instance) + key_data=smash.tools.object_handler.generate_object_structure(instance) key_list=list() key_list.append(key_data) @@ -222,79 +131,10 @@ def _generate_full_smash_object_structure(instance): return key_list -def generate_object_structure(instance): - """ - this function create a full dictionnary containing all the structure of an object in order to save it to an hdf5 - - Parameters - ---------- - instance : object - a custom python object. - - Returns - ------- - list or dict : - A list or dictionary matching the structure of the python object. - """ - key_data={} - key_list=list() - return_list=False - - for attr in dir(instance): - - if not attr.startswith("_") and not attr in ["from_handle", "copy"]: - - try: - - value = getattr(instance, attr) - - if isinstance(value, (np.ndarray,list)): - - if isinstance(value,list): - value=np.array(value) - - if value.dtype == "object" or value.dtype.char == "U": - value = value.astype("S") - - #key_data.update({attr:value}) - key_list.append(attr) - return_list=True - - elif isinstance(value,(str,float,int)): - - #key_data.update({attr:value}) - key_list.append(attr) - return_list=True - - else: - - depp_key_data=generate_object_structure(value) - - if (len(depp_key_data)>0): - key_data.update({attr:depp_key_data}) - - except: - - pass - - if return_list: - - for attr, value in key_data.items(): - key_list.append({attr:value}) - - return key_list - - else: - - return key_data - - - - def generate_smash_object_structure(instance,typeofstructure="medium"): """ - this function create a dictionnary containing a complete ar partial structure of an object in order to save it to an hdf5 + this function create a dictionnary containing a complete ar partial structure of an object in order to save it to an hdf5. This functions is a conveninet way to generate the key_data as a dictionary. Then personnal keys can be added to the key_data dict. Parameters ---------- @@ -326,343 +166,6 @@ def generate_smash_object_structure(instance,typeofstructure="medium"): - -def _dump_object_to_hdf5_from_list_attribute(hdf5,instance,list_attr): - """ - dump a object to a hdf5 file from a list of attributes - - Parameters - ---------- - hdf5 : object - an hdf5 object - instance : object - a custom python object. - list_attr : list - a list of attribute - """ - if isinstance(list_attr,list): - - for attr in list_attr: - - if isinstance(attr, str): - - _dump_object_to_hdf5_from_str_attribute(hdf5, instance, attr) - - elif isinstance(attr,list): - - _dump_object_to_hdf5_from_list_attribute(hdf5, instance, attr) - - elif isinstance(attr,dict): - - _dump_object_to_hdf5_from_dict_attribute(hdf5, instance, attr) - - else: - - raise ValueError( - f"inconsistent {attr} in {list_attr}. {attr} must be a an instance of dict, list or str" - ) - - else: - - raise ValueError( - f"{list_attr} must be a instance of list." - ) - - - -def _dump_object_to_hdf5_from_dict_attribute(hdf5,instance,dict_attr): - """ - dump a object to a hdf5 file from a dictionary of attributes - - Parameters - ---------- - hdf5 : object - an hdf5 object - instance : object - a custom python object. - dict_attr : dict - a dictionary of attribute - """ - if isinstance(dict_attr,dict): - - for attr, value in dict_attr.items(): - - hdf5=add_hdf5_sub_group(hdf5, subgroup=attr) - - try: - - sub_instance=getattr(instance, attr) - - except: - - sub_instance=instance - - if isinstance(value,dict): - - _dump_object_to_hdf5_from_dict_attribute(hdf5[attr], sub_instance, value) - - if isinstance(value,list): - - _dump_object_to_hdf5_from_list_attribute(hdf5[attr], sub_instance, value) - - elif isinstance(value,str): - - _dump_object_to_hdf5_from_str_attribute(hdf5[attr], sub_instance, value) - - else : - - raise ValueError( - f"inconsistent '{attr}' in '{dict_attr}'. Dict({attr}) must be a instance of dict, list or str" - ) - - else: - - raise ValueError( - f"{dict_attr} must be a instance of dict." - ) - - - -def _dump_object_to_hdf5_from_str_attribute(hdf5,instance,str_attr): - """ - dump a object to a hdf5 file from a string attribute - - Parameters - ---------- - hdf5 : object - an hdf5 object - instance : object - a custom python object. - str_attr : str - a string attribute - """ - if isinstance(str_attr, str): - - try: - - value = getattr(instance, str_attr) - - if isinstance(value, (np.ndarray,list)): - - if isinstance(value,list): - value=np.array(value) - - if value.dtype == "object" or value.dtype.char == "U": - value = value.astype("S") - - hdf5.create_dataset( - str_attr, - shape=value.shape, - dtype=value.dtype, - data=value, - compression="gzip", - chunks=True, - ) - - elif value is None: - - hdf5.attrs[str_attr] = "_None_" - - else: - - hdf5.attrs[str_attr] = value - - except: - - raise ValueError( - f"Unable to dump attribute {str_attr} with value {value} from {instance}" - ) - - else: - - raise ValueError( - f"{str_attr} must be a instance of str." - ) - - - -def _dump_object_to_hdf5_from_iteratable(hdf5, instance, iteratable=None): - """ - dump a object to a hdf5 file from a iteratable object list or dict - - Parameters - ---------- - hdf5 : object - an hdf5 object - instance : object - a custom python object. - iteratable : list | dict - a list or a dict of attribute - - Examples - -------- - setup, mesh = smash.load_dataset("cance") - model = smash.Model(setup, mesh) - model.run(inplace=True) - - hdf5=smash.io.multi_model_io.open_hdf5("./model.hdf5", replace=True) - hdf5=smash.io.multi_model_io.add_hdf5_sub_group(hdf5, subgroup="model1") - keys_data=smash.io.multi_model_io.generate_smash_object_structure(model,typeofstructure="medium") - smash.io.multi_model_io._dump_object_to_hdf5_from_iteratable(hdf5["model1"], model, keys_data) - - hdf5=smash.io.multi_model_io.open_hdf5("./model.hdf5", replace=False) - hdf5=smash.io.multi_model_io.add_hdf5_sub_group(hdf5, subgroup="model2") - keys_data=smash.io.multi_model_io.generate_smash_object_structure(model,typeofstructure="light") - smash.io.multi_model_io._dump_object_to_hdf5_from_iteratable(hdf5["model2"], model, keys_data) - """ - if isinstance(iteratable,list): - - _dump_object_to_hdf5_from_list_attribute(hdf5,instance,iteratable) - - elif isinstance(iteratable,dict): - - _dump_object_to_hdf5_from_dict_attribute(hdf5,instance,iteratable) - - else : - - raise ValueError( - f"{iteratable} must be a instance of list or dict." - ) - - - -def _dump_dict_to_hdf5(hdf5,dictionary): - """ - dump a dictionary to an hdf5 file - - Parameters - ---------- - hdf5 : object - an hdf5 object - dictionary : dict - a custom python dictionary - """ - if isinstance(dictionary,dict): - - for attr, value in dictionary.items(): - - try: - - if isinstance(value,(dict)): - - hdf5=add_hdf5_sub_group(hdf5, subgroup=attr) - _dump_dict_to_hdf5(hdf5[attr],value) - - elif isinstance(value, (np.ndarray,list)): - - if isinstance(value,(list)): - value=np.array(value) - - if value.dtype == "object" or value.dtype.char == "U": - value = value.astype("S") - - #remove dataset if exist - if attr in hdf5.keys(): - del hdf5[attr] - - hdf5.create_dataset( - attr, - shape=value.shape, - dtype=value.dtype, - data=value, - compression="gzip", - chunks=True, - ) - - elif value is None: - - hdf5.attrs[attr] = "_None_" - - else: - - hdf5.attrs[attr] = value - - except: - - raise ValueError( - f"Unable to save attribute {attr} with value {value}" - ) - - else: - - raise ValueError( - f"{dictionary} must be a instance of dict." - ) - - - -def save_dict_to_hdf5(path_to_hdf5,dictionary=None,location="./",replace=False): - """ - dump a dictionary to an hdf5 file - - Parameters - ---------- - path_to_hdf5 : str - path to the hdf5 file - dictionary : dict | None - a dictionary containing the data to be saved - location : str - path location or subgroup where to write data in the hdf5 file - replace : Boolean - replace an existing hdf5 file. Default is False - - Examples - -------- - setup, mesh = smash.load_dataset("cance") - model = smash.Model(setup, mesh) - model.run(inplace=True) - - smash.io.multi_model_io.save_dict_to_hdf5("saved_dictionary.hdf5",mesh) - """ - if isinstance(dictionary,dict): - - hdf5=open_hdf5(path_to_hdf5, replace=replace) - hdf5=add_hdf5_sub_group(hdf5, subgroup=location) - _dump_dict_to_hdf5(hdf5[location], dictionary) - - else: - - raise ValueError( - f"The input {dictionary} must be a instance of dict." - ) - - - -def save_object_to_hdf5(f_hdf5, instance, keys_data=None, location="./", sub_data=None, replace=False): - """ - dump an object to an hdf5 file - - Parameters - ---------- - f_hdf5 : str - path to the hdf5 file - instance : object - python object - keys_data : list | dict - a list or a dictionary of the attribute to be saved - location : str - path location or subgroup where to write data in the hdf5 file - sub_data : dict | None - a dictionary containing extra-data to be saved - replace : Boolean - replace an existing hdf5 file. Default is False - """ - - if keys_data is None: - keys_data=generate_object_structure(instance) - - hdf5=open_hdf5(f_hdf5, replace=replace) - hdf5=add_hdf5_sub_group(hdf5, subgroup=location) - _dump_object_to_hdf5_from_iteratable(hdf5[location], instance, keys_data) - - if isinstance(sub_data,dict): - - _dump_dict_to_hdf5(hdf5[location], sub_data) - - hdf5.close() - - - def save_smash_model_to_hdf5(path_to_hdf5, instance, keys_data=None, content="medium", location="./", sub_data=None, replace=True): """ dump an object to an hdf5 file @@ -721,7 +224,7 @@ def save_smash_model_to_hdf5(path_to_hdf5, instance, keys_data=None, content="me if isinstance(keys_data,(dict,list)): - save_object_to_hdf5(path_to_hdf5, instance, keys_data, location=location, sub_data=sub_data,replace=replace) + smash.tools.hdf5_handler.save_object_to_hdf5(path_to_hdf5, instance, keys_data, location=location, sub_data=sub_data,replace=replace) else: @@ -730,7 +233,6 @@ def save_smash_model_to_hdf5(path_to_hdf5, instance, keys_data=None, content="me ) - def load_hdf5_file(f_hdf5,as_model=False): """ Load an hdf5 file @@ -765,147 +267,12 @@ def load_hdf5_file(f_hdf5,as_model=False): else: - hdf5=open_hdf5(f_hdf5, read_only=True, replace=False) - dictionary=read_hdf5_as_dict(hdf5) + hdf5=smash.tools.hdf5_handler.open_hdf5(f_hdf5, read_only=True, replace=False) + dictionary=smash.tools.hdf5_handler.read_hdf5_as_dict(hdf5) hdf5.close() return dictionary - - -def read_object_as_dict(instance): - """ - create a dictionary from a custom python object - - Parameters - ---------- - instance : object - an custom python object - - Return - ---------- - key_data: dict - an dictionary containing all keys and atributes of the object - """ - key_data={} - key_list=list() - return_list=False - - for attr in dir(instance): - - if not attr.startswith("_") and not attr in ["from_handle", "copy"]: - - try: - - value = getattr(instance, attr) - - if isinstance(value, (np.ndarray,list)): - - if isinstance(value,list): - value=np.array(value) - - if value.dtype == "object" or value.dtype.char == "U": - value = value.astype("S") - - key_data.update({attr:value}) - - elif isinstance(value,(str,float,int)): - - key_data.update({attr:value}) - - else: - - depp_key_data=read_object_as_dict(value) - - if (len(depp_key_data)>0): - key_data.update({attr:depp_key_data}) - - except: - - pass - - return key_data - - - - -def read_hdf5_as_dict(hdf5): - """ - Load an hdf5 file - - Parameters - ---------- - hdf5 : str - path to the hdf5 file - - Return - -------- - dictionary : dict, a dictionary of all keys and attribute included in the hdf5 file - - Examples - -------- - #read only a part of an hdf5 file - hdf5=smash.io.multi_model_io.open_hdf5("./multi_model.hdf5") - dictionary=smash.io.multi_model_io.read_hdf5_as_dict(hdf5["model1"]) - dictionary.keys() - """ - dictionary={} - - for key,item in hdf5.items(): - - if str(type(item)).find("group") != -1: - - dictionary.update({key:read_hdf5_as_dict(item)}) - - list_attr=list(item.attrs.keys()) - - for key_attr in list_attr: - - # check if value is equal to "_None_" (None string because hdf5 does not supported) - if item.attrs[key_attr] == "_None_": - - dictionary[key].update({key_attr:None}) - - else: - - dictionary[key].update({key_attr:item.attrs[key_attr]}) - - if str(type(item)).find("dataset") != -1: - - if item[:].dtype.char == "S": - - values=item[:].astype("U") - - else: - - values=item[:] - - dictionary.update({key:values}) - - list_attr=list(item.attrs.keys()) - - for key_attr in list_attr: - - # check if value is equal to "_None_" (None string because hdf5 does not supported) - if item.attrs[key_attr] == "_None_": - dictionary[key].update({key_attr:None}) - else: - dictionary.update({key_attr:item.attrs[key_attr]}) - - list_attr=list(hdf5.attrs.keys()) - - for key_attr in list_attr: - - # check if value is equal to "_None_" (None string because hdf5 does not supported) - if hdf5.attrs[key_attr] == "_None_": - dictionary.update({key_attr:None}) - else: - dictionary.update({key_attr:hdf5.attrs[key_attr]}) - - return dictionary - - - def _parse_hdf5_to_derived_type(hdf5_ins, derived_type): for ds in hdf5_ins.keys(): if isinstance(hdf5_ins[ds], h5py.Group): diff --git a/smash/tools/hdf5_handler.py b/smash/tools/hdf5_handler.py new file mode 100644 index 00000000..8d670f78 --- /dev/null +++ b/smash/tools/hdf5_handler.py @@ -0,0 +1,514 @@ +from __future__ import annotations + +import os +import h5py +import numpy as np + +from smash.tools import object_handler + +def open_hdf5(path, read_only=False, replace=False): + """ + Open or create an HDF5 file. + + Parameters + ---------- + path : str + The file path. + read_only : boolean + If true the access to the hdf5 fil is in read-only mode. Multi process can read the same hdf5 file simulteneously. This is not possible when access mode are append 'a' or write 'w'. + replace: Boolean + If true, the existing hdf5file is erased + + Returns + ------- + f : + A HDF5 object. + + Examples + -------- + >>> hdf5=smash.io.multi_model_io.open_hdf5("./my_hdf5.hdf5") + >>> hdf5.keys() + >>> hdf5.attrs.keys() + """ + if not path.endswith(".hdf5"): + + path = path + ".hdf5" + + if read_only: + + if os.path.isfile(path): + + f= h5py.File(path, "r") + + else: + + raise ValueError( + f"File {path} does not exist." + ) + + else: + + if replace: + + f= h5py.File(path, "w") + + else: + + if os.path.isfile(path): + + f= h5py.File(path, "a") + + else: + + f= h5py.File(path, "w") + + return f + + + +def add_hdf5_sub_group(hdf5, subgroup=None): + """ + Create a new subgroup in a HDF5 object + + Parameters + ---------- + hdf5 : object + An hdf5 object opened with open_hdf5() + subgroup: str + Path to a subgroub that must be created + + Returns + ------- + hdf5 : + the HDF5 object. + + Examples + -------- + >>> hdf5=smash.io.multi_model_io.open_hdf5("./model_subgroup.hdf5", replace=True) + >>> hdf5=smash.io.multi_model_io.add_hdf5_sub_group(hdf5, subgroup="mygroup") + >>> hdf5.keys() + >>> hdf5.attrs.keys() + """ + if subgroup is not None: + + if subgroup=="": + + subgroup="./" + + hdf5.require_group(subgroup) + + return hdf5 + + + +def _dump_object_to_hdf5_from_list_attribute(hdf5,instance,list_attr): + """ + dump a object to a hdf5 file from a list of attributes + + Parameters + ---------- + hdf5 : object + an hdf5 object + instance : object + a custom python object. + list_attr : list + a list of attribute + """ + if isinstance(list_attr,list): + + for attr in list_attr: + + if isinstance(attr, str): + + _dump_object_to_hdf5_from_str_attribute(hdf5, instance, attr) + + elif isinstance(attr,list): + + _dump_object_to_hdf5_from_list_attribute(hdf5, instance, attr) + + elif isinstance(attr,dict): + + _dump_object_to_hdf5_from_dict_attribute(hdf5, instance, attr) + + else: + + raise ValueError( + f"inconsistent {attr} in {list_attr}. {attr} must be a an instance of dict, list or str" + ) + + else: + + raise ValueError( + f"{list_attr} must be a instance of list." + ) + + + +def _dump_object_to_hdf5_from_dict_attribute(hdf5,instance,dict_attr): + """ + dump a object to a hdf5 file from a dictionary of attributes + + Parameters + ---------- + hdf5 : object + an hdf5 object + instance : object + a custom python object. + dict_attr : dict + a dictionary of attribute + """ + if isinstance(dict_attr,dict): + + for attr, value in dict_attr.items(): + + hdf5=add_hdf5_sub_group(hdf5, subgroup=attr) + + try: + + sub_instance=getattr(instance, attr) + + except: + + sub_instance=instance + + if isinstance(value,dict): + + _dump_object_to_hdf5_from_dict_attribute(hdf5[attr], sub_instance, value) + + if isinstance(value,list): + + _dump_object_to_hdf5_from_list_attribute(hdf5[attr], sub_instance, value) + + elif isinstance(value,str): + + _dump_object_to_hdf5_from_str_attribute(hdf5[attr], sub_instance, value) + + else : + + raise ValueError( + f"inconsistent '{attr}' in '{dict_attr}'. Dict({attr}) must be a instance of dict, list or str" + ) + + else: + + raise ValueError( + f"{dict_attr} must be a instance of dict." + ) + + + +def _dump_object_to_hdf5_from_str_attribute(hdf5,instance,str_attr): + """ + dump a object to a hdf5 file from a string attribute + + Parameters + ---------- + hdf5 : object + an hdf5 object + instance : object + a custom python object. + str_attr : str + a string attribute + """ + if isinstance(str_attr, str): + + try: + + value = getattr(instance, str_attr) + + if isinstance(value, (np.ndarray,list)): + + if isinstance(value,list): + value=np.array(value) + + if value.dtype == "object" or value.dtype.char == "U": + value = value.astype("S") + + hdf5.create_dataset( + str_attr, + shape=value.shape, + dtype=value.dtype, + data=value, + compression="gzip", + chunks=True, + ) + + elif value is None: + + hdf5.attrs[str_attr] = "_None_" + + else: + + hdf5.attrs[str_attr] = value + + except: + + raise ValueError( + f"Unable to dump attribute {str_attr} with value {value} from {instance}" + ) + + else: + + raise ValueError( + f"{str_attr} must be a instance of str." + ) + + + +def _dump_object_to_hdf5_from_iteratable(hdf5, instance, iteratable=None): + """ + dump a object to a hdf5 file from a iteratable object list or dict + + Parameters + ---------- + hdf5 : object + an hdf5 object + instance : object + a custom python object. + iteratable : list | dict + a list or a dict of attribute + + Examples + -------- + setup, mesh = smash.load_dataset("cance") + model = smash.Model(setup, mesh) + model.run(inplace=True) + + hdf5=smash.io.multi_model_io.open_hdf5("./model.hdf5", replace=True) + hdf5=smash.io.multi_model_io.add_hdf5_sub_group(hdf5, subgroup="model1") + keys_data=smash.io.multi_model_io.generate_smash_object_structure(model,typeofstructure="medium") + smash.io.multi_model_io._dump_object_to_hdf5_from_iteratable(hdf5["model1"], model, keys_data) + + hdf5=smash.io.multi_model_io.open_hdf5("./model.hdf5", replace=False) + hdf5=smash.io.multi_model_io.add_hdf5_sub_group(hdf5, subgroup="model2") + keys_data=smash.io.multi_model_io.generate_smash_object_structure(model,typeofstructure="light") + smash.io.multi_model_io._dump_object_to_hdf5_from_iteratable(hdf5["model2"], model, keys_data) + """ + if isinstance(iteratable,list): + + _dump_object_to_hdf5_from_list_attribute(hdf5,instance,iteratable) + + elif isinstance(iteratable,dict): + + _dump_object_to_hdf5_from_dict_attribute(hdf5,instance,iteratable) + + else : + + raise ValueError( + f"{iteratable} must be a instance of list or dict." + ) + + + +def _dump_dict_to_hdf5(hdf5,dictionary): + """ + dump a dictionary to an hdf5 file + + Parameters + ---------- + hdf5 : object + an hdf5 object + dictionary : dict + a custom python dictionary + """ + if isinstance(dictionary,dict): + + for attr, value in dictionary.items(): + + try: + + if isinstance(value,(dict)): + + hdf5=add_hdf5_sub_group(hdf5, subgroup=attr) + _dump_dict_to_hdf5(hdf5[attr],value) + + elif isinstance(value, (np.ndarray,list)): + + if isinstance(value,(list)): + value=np.array(value) + + if value.dtype == "object" or value.dtype.char == "U": + value = value.astype("S") + + #remove dataset if exist + if attr in hdf5.keys(): + del hdf5[attr] + + hdf5.create_dataset( + attr, + shape=value.shape, + dtype=value.dtype, + data=value, + compression="gzip", + chunks=True, + ) + + elif value is None: + + hdf5.attrs[attr] = "_None_" + + else: + + hdf5.attrs[attr] = value + + except: + + raise ValueError( + f"Unable to save attribute {attr} with value {value}" + ) + + else: + + raise ValueError( + f"{dictionary} must be a instance of dict." + ) + + + +def save_dict_to_hdf5(path_to_hdf5,dictionary=None,location="./",replace=False): + """ + dump a dictionary to an hdf5 file + + Parameters + ---------- + path_to_hdf5 : str + path to the hdf5 file + dictionary : dict | None + a dictionary containing the data to be saved + location : str + path location or subgroup where to write data in the hdf5 file + replace : Boolean + replace an existing hdf5 file. Default is False + + Examples + -------- + setup, mesh = smash.load_dataset("cance") + model = smash.Model(setup, mesh) + model.run(inplace=True) + + smash.io.multi_model_io.save_dict_to_hdf5("saved_dictionary.hdf5",mesh) + """ + if isinstance(dictionary,dict): + + hdf5=open_hdf5(path_to_hdf5, replace=replace) + hdf5=add_hdf5_sub_group(hdf5, subgroup=location) + _dump_dict_to_hdf5(hdf5[location], dictionary) + + else: + + raise ValueError( + f"The input {dictionary} must be a instance of dict." + ) + + + +def save_object_to_hdf5(f_hdf5, instance, keys_data=None, location="./", sub_data=None, replace=False): + """ + dump an object to an hdf5 file + + Parameters + ---------- + f_hdf5 : str + path to the hdf5 file + instance : object + python object + keys_data : list | dict + a list or a dictionary of the attribute to be saved + location : str + path location or subgroup where to write data in the hdf5 file + sub_data : dict | None + a dictionary containing extra-data to be saved + replace : Boolean + replace an existing hdf5 file. Default is False + """ + + if keys_data is None: + keys_data=smash.tools.object_handler.generate_object_structure(instance) + + hdf5=open_hdf5(f_hdf5, replace=replace) + hdf5=add_hdf5_sub_group(hdf5, subgroup=location) + _dump_object_to_hdf5_from_iteratable(hdf5[location], instance, keys_data) + + if isinstance(sub_data,dict): + + _dump_dict_to_hdf5(hdf5[location], sub_data) + + hdf5.close() + + + +def read_hdf5_as_dict(hdf5): + """ + Load an hdf5 file + + Parameters + ---------- + hdf5 : str + path to the hdf5 file + + Return + -------- + dictionary : dict, a dictionary of all keys and attribute included in the hdf5 file + + Examples + -------- + #read only a part of an hdf5 file + hdf5=smash.io.multi_model_io.open_hdf5("./multi_model.hdf5") + dictionary=smash.io.multi_model_io.read_hdf5_as_dict(hdf5["model1"]) + dictionary.keys() + """ + dictionary={} + + for key,item in hdf5.items(): + + if str(type(item)).find("group") != -1: + + dictionary.update({key:read_hdf5_as_dict(item)}) + + list_attr=list(item.attrs.keys()) + + for key_attr in list_attr: + + # check if value is equal to "_None_" (None string because hdf5 does not supported) + if item.attrs[key_attr] == "_None_": + + dictionary[key].update({key_attr:None}) + + else: + + dictionary[key].update({key_attr:item.attrs[key_attr]}) + + if str(type(item)).find("dataset") != -1: + + if item[:].dtype.char == "S": + + values=item[:].astype("U") + + else: + + values=item[:] + + dictionary.update({key:values}) + + list_attr=list(item.attrs.keys()) + + for key_attr in list_attr: + + # check if value is equal to "_None_" (None string because hdf5 does not supported) + if item.attrs[key_attr] == "_None_": + dictionary[key].update({key_attr:None}) + else: + dictionary.update({key_attr:item.attrs[key_attr]}) + + list_attr=list(hdf5.attrs.keys()) + + for key_attr in list_attr: + + # check if value is equal to "_None_" (None string because hdf5 does not supported) + if hdf5.attrs[key_attr] == "_None_": + dictionary.update({key_attr:None}) + else: + dictionary.update({key_attr:hdf5.attrs[key_attr]}) + + return dictionary + diff --git a/smash/tools/object_handler.py b/smash/tools/object_handler.py new file mode 100644 index 00000000..7e23d1ca --- /dev/null +++ b/smash/tools/object_handler.py @@ -0,0 +1,127 @@ +from __future__ import annotations + +import os +import numpy as np + + +def generate_object_structure(instance): + """ + this function create a full dictionnary containing all the structure of an object in order to save it to an hdf5 + + Parameters + ---------- + instance : object + a custom python object. + + Returns + ------- + list or dict : + A list or dictionary matching the structure of the python object. + """ + key_data={} + key_list=list() + return_list=False + + for attr in dir(instance): + + if not attr.startswith("_") and not attr in ["from_handle", "copy"]: + + try: + + value = getattr(instance, attr) + + if isinstance(value, (np.ndarray,list)): + + if isinstance(value,list): + value=np.array(value) + + if value.dtype == "object" or value.dtype.char == "U": + value = value.astype("S") + + #key_data.update({attr:value}) + key_list.append(attr) + return_list=True + + elif isinstance(value,(str,float,int)): + + #key_data.update({attr:value}) + key_list.append(attr) + return_list=True + + else: + + depp_key_data=generate_object_structure(value) + + if (len(depp_key_data)>0): + key_data.update({attr:depp_key_data}) + + except: + + pass + + if return_list: + + for attr, value in key_data.items(): + key_list.append({attr:value}) + + return key_list + + else: + + return key_data + + + +def read_object_as_dict(instance): + """ + create a dictionary from a custom python object + + Parameters + ---------- + instance : object + an custom python object + + Return + ---------- + key_data: dict + an dictionary containing all keys and atributes of the object + """ + key_data={} + key_list=list() + return_list=False + + for attr in dir(instance): + + if not attr.startswith("_") and not attr in ["from_handle", "copy"]: + + try: + + value = getattr(instance, attr) + + if isinstance(value, (np.ndarray,list)): + + if isinstance(value,list): + value=np.array(value) + + if value.dtype == "object" or value.dtype.char == "U": + value = value.astype("S") + + key_data.update({attr:value}) + + elif isinstance(value,(str,float,int)): + + key_data.update({attr:value}) + + else: + + depp_key_data=read_object_as_dict(value) + + if (len(depp_key_data)>0): + key_data.update({attr:depp_key_data}) + + except: + + pass + + return key_data + From ec1b9fc7cd3d6f7794efd36978fe9e85234e180d Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Wed, 5 Jul 2023 12:35:30 +0200 Subject: [PATCH 59/73] update io_hdf5 tests files --- hdf5_io_tests.py | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/hdf5_io_tests.py b/hdf5_io_tests.py index fefadc20..fe36b329 100644 --- a/hdf5_io_tests.py +++ b/hdf5_io_tests.py @@ -7,7 +7,7 @@ model.run(inplace=True) #save a single dictionary to hdf5 -smash.io.hdf5_io.save_dict_to_hdf5("saved_dictionary.hdf5",mesh) +smash.tools.hdf5_handler.save_dict_to_hdf5("saved_dictionary.hdf5",mesh) #generate the structure of the object: it is a dict of key:data to save: typeofstructure={light,medium,full} keys_data=smash.io.hdf5_io.generate_smash_object_structure(model,typeofstructure="medium") @@ -30,7 +30,7 @@ #view the hdf5 file -hdf5=smash.io.hdf5_io.open_hdf5("./model_user.hdf5") +hdf5=smash.tools.hdf5_handler.open_hdf5("./model_user.hdf5") hdf5.keys() hdf5["mesh"].keys() hdf5["parameters"].keys() @@ -41,7 +41,7 @@ hdf5.close() #view the hdf5 file with sub_data -hdf5=smash.io.hdf5_io.open_hdf5("./model_sub_data.hdf5") +hdf5=smash.tools.hdf5_handler.open_hdf5("./model_sub_data.hdf5") hdf5.keys() hdf5.attrs.keys() hdf5.close() @@ -52,7 +52,7 @@ smash.save_smash_model_to_hdf5("./multi_model.hdf5", model,location="model2",replace=False) -hdf5=smash.io.hdf5_io.open_hdf5("./multi_model.hdf5") +hdf5=smash.tools.hdf5_handler.open_hdf5("./multi_model.hdf5") hdf5.keys() hdf5["model2"]["setup"].attrs.keys() hdf5["model2"]["mesh"].keys() @@ -61,24 +61,28 @@ hdf5.close() #manually group different object in an hdf5 -hdf5=smash.io.hdf5_io.open_hdf5("./model_subgroup.hdf5", replace=True) -hdf5=smash.io.hdf5_io.add_hdf5_sub_group(hdf5, subgroup="model1") +hdf5=smash.tools.hdf5_handler.open_hdf5("./model_subgroup.hdf5", replace=True) +hdf5=smash.tools.hdf5_handler.add_hdf5_sub_group(hdf5, subgroup="model1") +hdf5=smash.tools.hdf5_handler.add_hdf5_sub_group(hdf5, subgroup="model2") keys_data=smash.io.hdf5_io.generate_smash_object_structure(model,typeofstructure="medium") -smash.io.hdf5_io._dump_object_to_hdf5_from_iteratable(hdf5["model1"], model, keys_data) +keys_data_2=smash.tools.object_handler.generate_object_structure(model) +smash.tools.hdf5_handler._dump_object_to_hdf5_from_iteratable(hdf5["model1"], model, keys_data) +smash.tools.hdf5_handler._dump_object_to_hdf5_from_iteratable(hdf5["model2"], model, keys_data_2) -hdf5=smash.io.hdf5_io.open_hdf5("./model_subgroup.hdf5", replace=False) -hdf5=smash.io.hdf5_io.add_hdf5_sub_group(hdf5, subgroup="model2") +hdf5=smash.tools.hdf5_handler.open_hdf5("./model_subgroup.hdf5", replace=False) +hdf5=smash.tools.hdf5_handler.add_hdf5_sub_group(hdf5, subgroup="model3") keys_data=smash.io.hdf5_io.generate_smash_object_structure(model,typeofstructure="medium") -smash.io.hdf5_io._dump_object_to_hdf5_from_iteratable(hdf5["model2"], model, keys_data) +smash.tools.hdf5_handler._dump_object_to_hdf5_from_iteratable(hdf5["model3"], model, keys_data) hdf5.keys() hdf5["model1"].keys() hdf5["model2"].keys() +hdf5["model3"].keys() hdf5.close() #read model object to a dictionnay -dictionary=smash.io.hdf5_io.read_object_as_dict(model) +dictionary=smash.tools.object_handler.read_object_as_dict(model) dictionary.keys() dictionary["mesh"]["code"] @@ -94,17 +98,20 @@ dictionary.keys() #read only a part of an hdf5 file -hdf5=smash.io.hdf5_io.open_hdf5("./multi_model.hdf5") -dictionary=smash.io.hdf5_io.read_hdf5_to_dict(hdf5["model1"]) +hdf5=smash.tools.hdf5_handler.open_hdf5("./multi_model.hdf5") +dictionary=smash.tools.hdf5_handler.read_hdf5_as_dict(hdf5["model1"]) dictionary.keys() #reload a full model object +model_reloaded=smash.load_hdf5_file("./model_medium.hdf5",as_model=True) #get error model_reloaded=smash.load_hdf5_file("./model_full.hdf5",as_model=True) model_reloaded model_reloaded.run() #TODO : -#- model_reloaded need to be a full hdf5 !! How to test that ? -#- rename file to hdf5_handler.py and move it to tools/ -# move save_model_to_hdf5 and load_hdf5 to an other file dan io/ + +# compile documentation +# tests failed +# remove hdf5_io_test.py +# black *.py From c81574bd9d35040cff6c0672f5c5a69cd9cca0ab Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Wed, 5 Jul 2023 13:00:52 +0200 Subject: [PATCH 60/73] Update docstring functions --- smash/io/hdf5_io.py | 2 +- smash/tools/hdf5_handler.py | 30 +++++++++++++++--------------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/smash/io/hdf5_io.py b/smash/io/hdf5_io.py index a71ed73a..86bd5a5e 100644 --- a/smash/io/hdf5_io.py +++ b/smash/io/hdf5_io.py @@ -193,7 +193,7 @@ def save_smash_model_to_hdf5(path_to_hdf5, instance, keys_data=None, content="me model = smash.Model(setup, mesh) model.run(inplace=True) - keys_data=smash.generate_smash_object_structure(model,typeofstructure="medium") + keys_data=smash.io.hdf5_io.generate_smash_object_structure(model,typeofstructure="medium") #add a new data to save: keys_data["parameters"].append('ci') diff --git a/smash/tools/hdf5_handler.py b/smash/tools/hdf5_handler.py index 8d670f78..122daefc 100644 --- a/smash/tools/hdf5_handler.py +++ b/smash/tools/hdf5_handler.py @@ -26,7 +26,7 @@ def open_hdf5(path, read_only=False, replace=False): Examples -------- - >>> hdf5=smash.io.multi_model_io.open_hdf5("./my_hdf5.hdf5") + >>> hdf5=smash.tools.hdf5_handler.open_hdf5("./my_hdf5.hdf5") >>> hdf5.keys() >>> hdf5.attrs.keys() """ @@ -84,8 +84,8 @@ def add_hdf5_sub_group(hdf5, subgroup=None): Examples -------- - >>> hdf5=smash.io.multi_model_io.open_hdf5("./model_subgroup.hdf5", replace=True) - >>> hdf5=smash.io.multi_model_io.add_hdf5_sub_group(hdf5, subgroup="mygroup") + >>> hdf5=smash.tools.hdf5_handler.open_hdf5("./model_subgroup.hdf5", replace=True) + >>> hdf5=smash.tools.hdf5_handler.add_hdf5_sub_group(hdf5, subgroup="mygroup") >>> hdf5.keys() >>> hdf5.attrs.keys() """ @@ -274,15 +274,15 @@ def _dump_object_to_hdf5_from_iteratable(hdf5, instance, iteratable=None): model = smash.Model(setup, mesh) model.run(inplace=True) - hdf5=smash.io.multi_model_io.open_hdf5("./model.hdf5", replace=True) - hdf5=smash.io.multi_model_io.add_hdf5_sub_group(hdf5, subgroup="model1") - keys_data=smash.io.multi_model_io.generate_smash_object_structure(model,typeofstructure="medium") - smash.io.multi_model_io._dump_object_to_hdf5_from_iteratable(hdf5["model1"], model, keys_data) - - hdf5=smash.io.multi_model_io.open_hdf5("./model.hdf5", replace=False) - hdf5=smash.io.multi_model_io.add_hdf5_sub_group(hdf5, subgroup="model2") - keys_data=smash.io.multi_model_io.generate_smash_object_structure(model,typeofstructure="light") - smash.io.multi_model_io._dump_object_to_hdf5_from_iteratable(hdf5["model2"], model, keys_data) + hdf5=smash.tools.hdf5_handler.open_hdf5("./model.hdf5", replace=True) + hdf5=smash.tools.hdf5_handler.add_hdf5_sub_group(hdf5, subgroup="model1") + keys_data=smash.io.hdf5_io.generate_smash_object_structure(model,typeofstructure="medium") + smash.tools.hdf5_handler._dump_object_to_hdf5_from_iteratable(hdf5["model1"], model, keys_data) + + hdf5=smash.tools.hdf5_handler.open_hdf5("./model.hdf5", replace=False) + hdf5=smash.tools.hdf5_handler.add_hdf5_sub_group(hdf5, subgroup="model2") + keys_data=smash.io.hdf5_io.generate_smash_object_structure(model,typeofstructure="light") + smash.tools.hdf5_handler._dump_object_to_hdf5_from_iteratable(hdf5["model2"], model, keys_data) """ if isinstance(iteratable,list): @@ -386,7 +386,7 @@ def save_dict_to_hdf5(path_to_hdf5,dictionary=None,location="./",replace=False): model = smash.Model(setup, mesh) model.run(inplace=True) - smash.io.multi_model_io.save_dict_to_hdf5("saved_dictionary.hdf5",mesh) + smash.tools.hdf5_handler.save_dict_to_hdf5("saved_dictionary.hdf5",mesh) """ if isinstance(dictionary,dict): @@ -453,8 +453,8 @@ def read_hdf5_as_dict(hdf5): Examples -------- #read only a part of an hdf5 file - hdf5=smash.io.multi_model_io.open_hdf5("./multi_model.hdf5") - dictionary=smash.io.multi_model_io.read_hdf5_as_dict(hdf5["model1"]) + hdf5=smash.tools.hdf5_handler.open_hdf5("./multi_model.hdf5") + dictionary=smash.tools.hdf5_handler.read_hdf5_as_dict(hdf5["model1"]) dictionary.keys() """ dictionary={} From df978149f13e310285b4f817e7018b607408438d Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Wed, 5 Jul 2023 13:15:59 +0200 Subject: [PATCH 61/73] Fix documentation issues, update docstring functions --- doc/source/api_reference/index.rst | 3 +++ doc/source/release/0.5.0-notes.rst | 4 ++-- smash/io/hdf5_io.py | 14 +++++++------- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/doc/source/api_reference/index.rst b/doc/source/api_reference/index.rst index 8c23e3f8..ffcd5e01 100644 --- a/doc/source/api_reference/index.rst +++ b/doc/source/api_reference/index.rst @@ -20,7 +20,10 @@ Core Python io generate_samples sparse_storage + hdf5_io raster_handler + hdf5_handler + object_handler Wrapped Fortran diff --git a/doc/source/release/0.5.0-notes.rst b/doc/source/release/0.5.0-notes.rst index 47d2d9d6..025d3fee 100644 --- a/doc/source/release/0.5.0-notes.rst +++ b/doc/source/release/0.5.0-notes.rst @@ -33,7 +33,7 @@ Improvements ------------ Reading atmospheric data with YYYY/MM/DD access -****************************************** +*********************************************** This mode is triggered by enabling the flag prcp_yyyymmdd_access in the model setup file. The atmospheric data files are supposed to be stored in a directory YYYY/MM/dd. This option is useful if the model is ran time step by time step (many incremental runs). In that case searching the atmospheric data files can be relatively slow (1 second multiplicate by the number of runs). With this mode it is optimized and it is faster. @@ -88,4 +88,4 @@ See issue `#28 `__. An error occured when two neighboring cells have antagonistic flow directions ``(1, 5)``, ``(2, 6)``, ``(3, 7)``, ``(4, 8)``. This should be corrected directly in the flow direction file but to avoid segmentation faults when the maximum number of recursions has been reached, a check is added to the code to exit recursion in that case. -See issue `#31 `__. \ No newline at end of file +See issue `#31 `__. diff --git a/smash/io/hdf5_io.py b/smash/io/hdf5_io.py index 86bd5a5e..14a73d8b 100644 --- a/smash/io/hdf5_io.py +++ b/smash/io/hdf5_io.py @@ -168,18 +168,18 @@ def generate_smash_object_structure(instance,typeofstructure="medium"): def save_smash_model_to_hdf5(path_to_hdf5, instance, keys_data=None, content="medium", location="./", sub_data=None, replace=True): """ - dump an object to an hdf5 file + Save an instance of smash.Model to an hdf5 file. Parameters ---------- path_to_hdf5 : str path to the hdf5 file - instance : object + instance : smash.Model object python object keys_data : list | dict a list or a dictionary of the attribute to be saved content : str - {light,medium,full} + {light,medium,full} : content saved from the smash.Model object. Notice that if content=full, an instance of the smash.Model can be reloaded from the hdf5 file. location : str path location or subgroup where to write data in the hdf5 file sub_data : dict | None @@ -235,7 +235,7 @@ def save_smash_model_to_hdf5(path_to_hdf5, instance, keys_data=None, content="me def load_hdf5_file(f_hdf5,as_model=False): """ - Load an hdf5 file + Load an hdf5 file to a dictionary or to an instance of smash.Model. Parameters ---------- @@ -294,17 +294,17 @@ def _parse_hdf5_to_derived_type(hdf5_ins, derived_type): def read_hdf5_to_model_object(path: str) -> Model: """ - Read Model object. + Read an hdf5 and return an instance of smash.Model object. Parameters ---------- path : str - The file path. + The hdf5 file path. Returns ------- Model : - A Model object loaded from HDF5 file. + An instance of smash.Model object loaded from an HDF5 file. Raises ------ From 99249b2069dd4c5ecebe2fbd3b7760004d782153 Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Wed, 5 Jul 2023 13:33:08 +0200 Subject: [PATCH 62/73] Reformat files remove io_test.hdf5 --- hdf5_io_tests.py | 117 ----------- smash/io/hdf5_io.py | 179 +++++++++-------- smash/tools/hdf5_handler.py | 352 +++++++++++++--------------------- smash/tools/object_handler.py | 120 +++++------- 4 files changed, 292 insertions(+), 476 deletions(-) delete mode 100644 hdf5_io_tests.py diff --git a/hdf5_io_tests.py b/hdf5_io_tests.py deleted file mode 100644 index fe36b329..00000000 --- a/hdf5_io_tests.py +++ /dev/null @@ -1,117 +0,0 @@ -import smash -import numpy as np - - -setup, mesh = smash.load_dataset("cance") -model = smash.Model(setup, mesh) -model.run(inplace=True) - -#save a single dictionary to hdf5 -smash.tools.hdf5_handler.save_dict_to_hdf5("saved_dictionary.hdf5",mesh) - -#generate the structure of the object: it is a dict of key:data to save: typeofstructure={light,medium,full} -keys_data=smash.io.hdf5_io.generate_smash_object_structure(model,typeofstructure="medium") -print(keys_data) -#add a new data to save: -keys_data["parameters"].append('ci') - -#Save a single smash model -smash.save_smash_model_to_hdf5("./model_light.hdf5", model, content="light", replace=True) -smash.save_smash_model_to_hdf5("./model_medium.hdf5", model, content="medium", replace=True) -smash.save_smash_model_to_hdf5("./model_full.hdf5", model, content="full", replace=True) -smash.save_smash_model_to_hdf5("./model_user.hdf5", model, keys_data=keys_data, replace=True) - -#adding subdata -sub_data={"sub_data1":"mydata"} -sub_data.update({"sub_data2":2.5}) -sub_data.update({"sub_data3":{"sub_sub_data1":2.5,"sub_sub_data2":np.zeros(10)}}) - -smash.save_smash_model_to_hdf5("./model_sub_data.hdf5", model, content="medium",sub_data=sub_data, replace=True) - - -#view the hdf5 file -hdf5=smash.tools.hdf5_handler.open_hdf5("./model_user.hdf5") -hdf5.keys() -hdf5["mesh"].keys() -hdf5["parameters"].keys() -hdf5["output"].keys() -hdf5["output"].attrs.keys() -hdf5["output/fstates"].keys() -hdf5["setup"].attrs.keys() -hdf5.close() - -#view the hdf5 file with sub_data -hdf5=smash.tools.hdf5_handler.open_hdf5("./model_sub_data.hdf5") -hdf5.keys() -hdf5.attrs.keys() -hdf5.close() - - -#save multi smash model at different place -smash.save_smash_model_to_hdf5("./multi_model.hdf5", model,location="model1",replace=True) -smash.save_smash_model_to_hdf5("./multi_model.hdf5", model,location="model2",replace=False) - - -hdf5=smash.tools.hdf5_handler.open_hdf5("./multi_model.hdf5") -hdf5.keys() -hdf5["model2"]["setup"].attrs.keys() -hdf5["model2"]["mesh"].keys() -hdf5["model2"]["output"].keys() -hdf5["model2"]["output"].attrs.keys() -hdf5.close() - -#manually group different object in an hdf5 -hdf5=smash.tools.hdf5_handler.open_hdf5("./model_subgroup.hdf5", replace=True) -hdf5=smash.tools.hdf5_handler.add_hdf5_sub_group(hdf5, subgroup="model1") -hdf5=smash.tools.hdf5_handler.add_hdf5_sub_group(hdf5, subgroup="model2") -keys_data=smash.io.hdf5_io.generate_smash_object_structure(model,typeofstructure="medium") -keys_data_2=smash.tools.object_handler.generate_object_structure(model) -smash.tools.hdf5_handler._dump_object_to_hdf5_from_iteratable(hdf5["model1"], model, keys_data) -smash.tools.hdf5_handler._dump_object_to_hdf5_from_iteratable(hdf5["model2"], model, keys_data_2) - -hdf5=smash.tools.hdf5_handler.open_hdf5("./model_subgroup.hdf5", replace=False) -hdf5=smash.tools.hdf5_handler.add_hdf5_sub_group(hdf5, subgroup="model3") -keys_data=smash.io.hdf5_io.generate_smash_object_structure(model,typeofstructure="medium") -smash.tools.hdf5_handler._dump_object_to_hdf5_from_iteratable(hdf5["model3"], model, keys_data) - -hdf5.keys() -hdf5["model1"].keys() -hdf5["model2"].keys() -hdf5["model3"].keys() -hdf5.close() - - -#read model object to a dictionnay -dictionary=smash.tools.object_handler.read_object_as_dict(model) -dictionary.keys() -dictionary["mesh"]["code"] - -######### Reading HDF5 - -#load an hdf5 file to a dictionary -dictionary=smash.load_hdf5_file("./multi_model.hdf5") -dictionary["model1"].keys() -dictionary["model1"]["mesh"].keys() - -#load a hdf5 file with any sub_data -dictionary=smash.load_hdf5_file("./model_sub_data.hdf5") -dictionary.keys() - -#read only a part of an hdf5 file -hdf5=smash.tools.hdf5_handler.open_hdf5("./multi_model.hdf5") -dictionary=smash.tools.hdf5_handler.read_hdf5_as_dict(hdf5["model1"]) -dictionary.keys() - -#reload a full model object -model_reloaded=smash.load_hdf5_file("./model_medium.hdf5",as_model=True) #get error -model_reloaded=smash.load_hdf5_file("./model_full.hdf5",as_model=True) -model_reloaded -model_reloaded.run() - -#TODO : - -# compile documentation -# tests failed -# remove hdf5_io_test.py -# black *.py - diff --git a/smash/io/hdf5_io.py b/smash/io/hdf5_io.py index 14a73d8b..d9436a7e 100644 --- a/smash/io/hdf5_io.py +++ b/smash/io/hdf5_io.py @@ -32,8 +32,11 @@ __all__ = ["save_smash_model_to_hdf5", "load_hdf5_file"] - -def _generate_light_smash_object_structure(structure: str,structure_parameters=STRUCTURE_PARAMETERS,structure_states=STRUCTURE_STATES): +def _generate_light_smash_object_structure( + structure: str, + structure_parameters=STRUCTURE_PARAMETERS, + structure_states=STRUCTURE_STATES, +): """ this function create a light dictionnary containing the required data-structure to save a smash model object to an hdf5 file @@ -42,9 +45,9 @@ def _generate_light_smash_object_structure(structure: str,structure_parameters=S structure : str the smash model structure used {gr-a, gr-b, gr-c, gr-d} structure_parameters: dict - the dict containing the parameter to be saved for each model structure + the dict containing the parameter to be saved for each model structure structure_states: dict - the dict containing the states to be saved for each model structure + the dict containing the states to be saved for each model structure Returns ------- @@ -53,7 +56,19 @@ def _generate_light_smash_object_structure(structure: str,structure_parameters=S """ return { "setup": ["dt", "end_time", "start_time"], - "mesh": ["active_cell", "area", "code", "dx", "ng", "ymax", "xmin", "nrow", "ncol", "gauge_pos", "flwacc"], + "mesh": [ + "active_cell", + "area", + "code", + "dx", + "ng", + "ymax", + "xmin", + "nrow", + "ncol", + "gauge_pos", + "flwacc", + ], "input_data": ["qobs"], "parameters": structure_parameters[ structure @@ -67,8 +82,11 @@ def _generate_light_smash_object_structure(structure: str,structure_parameters=S } - -def _generate_medium_smash_object_structure(structure: str,structure_parameters=STRUCTURE_PARAMETERS,structure_states=STRUCTURE_STATES): +def _generate_medium_smash_object_structure( + structure: str, + structure_parameters=STRUCTURE_PARAMETERS, + structure_states=STRUCTURE_STATES, +): """ this function create a medium dictionnary containing the required data-structure to save a smash model object to an hdf5 file @@ -77,9 +95,9 @@ def _generate_medium_smash_object_structure(structure: str,structure_parameters= structure : str the smash model structure used {gr-a, gr-b, gr-c, gr-d} structure_parameters: dict - the dict containing the parameter to be saved for each model structure + the dict containing the parameter to be saved for each model structure structure_states: dict - the dict containing the states to be saved for each model structure + the dict containing the states to be saved for each model structure Returns ------- @@ -88,7 +106,22 @@ def _generate_medium_smash_object_structure(structure: str,structure_parameters= """ return { "setup": ["dt", "end_time", "start_time", "structure", "_ntime_step"], - "mesh": ["active_cell", "area", "code", "dx", "flwdir", "nac", "ng", "path", "ymax", "xmin", "nrow", "ncol", "gauge_pos", "flwacc"], + "mesh": [ + "active_cell", + "area", + "code", + "dx", + "flwdir", + "nac", + "ng", + "path", + "ymax", + "xmin", + "nrow", + "ncol", + "gauge_pos", + "flwacc", + ], "input_data": ["mean_prcp", "mean_pet", "qobs"], "parameters": structure_parameters[ structure @@ -103,7 +136,7 @@ def _generate_medium_smash_object_structure(structure: str,structure_parameters= "qsim", "cost", "cost_jobs", - "cost_jreg" + "cost_jreg", ], } @@ -116,23 +149,22 @@ def _generate_full_smash_object_structure(instance): ---------- instance : object a custom python object. - + Returns ------- list : A list containing keys and dictionary matching the structure of the python object. """ - key_data=smash.tools.object_handler.generate_object_structure(instance) - - key_list=list() + key_data = smash.tools.object_handler.generate_object_structure(instance) + + key_list = list() key_list.append(key_data) key_list.append("_last_update") - - return key_list + return key_list -def generate_smash_object_structure(instance,typeofstructure="medium"): +def generate_smash_object_structure(instance, typeofstructure="medium"): """ this function create a dictionnary containing a complete ar partial structure of an object in order to save it to an hdf5. This functions is a conveninet way to generate the key_data as a dictionary. Then personnal keys can be added to the key_data dict. @@ -142,33 +174,37 @@ def generate_smash_object_structure(instance,typeofstructure="medium"): a custom python object. typeofstructure : str the structure type : light, medium, full - + Returns ------- dict : A list or dictionary matching the structure of the python object. """ - structure=instance.setup.structure - - if typeofstructure=="light": - - key_data=_generate_light_smash_object_structure(structure) - - elif typeofstructure=="medium": - - key_data=_generate_medium_smash_object_structure(structure) - - elif typeofstructure=="full": - - key_data=_generate_full_smash_object_structure(instance) - - return key_data + structure = instance.setup.structure + + if typeofstructure == "light": + key_data = _generate_light_smash_object_structure(structure) + elif typeofstructure == "medium": + key_data = _generate_medium_smash_object_structure(structure) + + elif typeofstructure == "full": + key_data = _generate_full_smash_object_structure(instance) + + return key_data -def save_smash_model_to_hdf5(path_to_hdf5, instance, keys_data=None, content="medium", location="./", sub_data=None, replace=True): +def save_smash_model_to_hdf5( + path_to_hdf5, + instance, + keys_data=None, + content="medium", + location="./", + sub_data=None, + replace=True, +): """ - Save an instance of smash.Model to an hdf5 file. + Save an instance of smash.Model to an hdf5 file. Parameters ---------- @@ -186,17 +222,17 @@ def save_smash_model_to_hdf5(path_to_hdf5, instance, keys_data=None, content="me a dictionary containing extra-data to be saved replace : Boolean replace an existing hdf5 file. Default is False - + Examples -------- setup, mesh = smash.load_dataset("cance") model = smash.Model(setup, mesh) model.run(inplace=True) - + keys_data=smash.io.hdf5_io.generate_smash_object_structure(model,typeofstructure="medium") #add a new data to save: keys_data["parameters"].append('ci') - + #Save a single smash model smash.save_smash_model_to_hdf5("./model_light.hdf5", model, content="light", replace=True) smash.save_smash_model_to_hdf5("./model_medium.hdf5", model, content="medium", replace=True) @@ -211,29 +247,29 @@ def save_smash_model_to_hdf5(path_to_hdf5, instance, keys_data=None, content="me smash.save_smash_model_to_hdf5("./model_sub_data.hdf5", model, content="medium",sub_data=sub_data, replace=True) """ if content == "light": - - keys_data=_generate_light_smash_object_structure(instance.setup.structure) - + keys_data = _generate_light_smash_object_structure(instance.setup.structure) + elif content == "medium": - - keys_data=_generate_medium_smash_object_structure(instance.setup.structure) - + keys_data = _generate_medium_smash_object_structure(instance.setup.structure) + elif content == "full": - - keys_data=_generate_full_smash_object_structure(instance) - - if isinstance(keys_data,(dict,list)): - - smash.tools.hdf5_handler.save_object_to_hdf5(path_to_hdf5, instance, keys_data, location=location, sub_data=sub_data,replace=replace) - - else: - - raise ValueError( - f"{keys_data} must be a instance of list or dict." - ) + keys_data = _generate_full_smash_object_structure(instance) + + if isinstance(keys_data, (dict, list)): + smash.tools.hdf5_handler.save_object_to_hdf5( + path_to_hdf5, + instance, + keys_data, + location=location, + sub_data=sub_data, + replace=replace, + ) + + else: + raise ValueError(f"{keys_data} must be a instance of list or dict.") -def load_hdf5_file(f_hdf5,as_model=False): +def load_hdf5_file(f_hdf5, as_model=False): """ Load an hdf5 file to a dictionary or to an instance of smash.Model. @@ -243,32 +279,30 @@ def load_hdf5_file(f_hdf5,as_model=False): path to the hdf5 file as_model : Boolean load the hdf5 as a smash model. Default is False - + Return -------- instance : an instance of the smash model or a dictionary - + Examples -------- #load an hdf5 file to a dictionary dictionary=smash.load_hdf5_file("./multi_model.hdf5") dictionary["model1"].keys() dictionary["model1"]["mesh"].keys() - + #reload a full model object model_reloaded=smash.load_hdf5_file("./model_full.hdf5",as_model=True) model_reloaded model_reloaded.run() """ if as_model: - - instance=read_hdf5_to_model_object(f_hdf5) + instance = read_hdf5_to_model_object(f_hdf5) return instance - + else: - - hdf5=smash.tools.hdf5_handler.open_hdf5(f_hdf5, read_only=True, replace=False) - dictionary=smash.tools.hdf5_handler.read_hdf5_as_dict(hdf5) + hdf5 = smash.tools.hdf5_handler.open_hdf5(f_hdf5, read_only=True, replace=False) + dictionary = smash.tools.hdf5_handler.read_hdf5_as_dict(hdf5) hdf5.close() return dictionary @@ -284,7 +318,6 @@ def _parse_hdf5_to_derived_type(hdf5_ins, derived_type): setattr(derived_type, ds, hdf5_ins[ds][:]) for attr in hdf5_ins.attrs.keys(): - # check if value is equal to "_None_" (None string because hdf5 does not supported) if hdf5_ins.attrs[attr] == "_None_": setattr(derived_type, attr, None) @@ -342,12 +375,11 @@ def read_hdf5_to_model_object(path: str) -> Model: if os.path.isfile(path): with h5py.File(path, "r") as f: - - if not f.attrs.__contains__('_last_update'): + if not f.attrs.__contains__("_last_update"): raise ValueError( f'The hdf5 file {path} does not contain the full smash object structure and therefore cannot be loaded as a smash model object. The full structure of a smash model object can be saved using smash.save_smash_model_to_hdf5(filename, smash_model, content="full").' ) - + instance = smash.Model(None, None) if "descriptor_name" in f["setup"].keys(): @@ -364,9 +396,7 @@ def read_hdf5_to_model_object(path: str) -> Model: et = pd.Timestamp(instance.setup.end_time) - instance.setup._ntime_step = ( - et - st - ).total_seconds() / instance.setup.dt + instance.setup._ntime_step = (et - st).total_seconds() / instance.setup.dt instance.mesh = MeshDT( instance.setup, @@ -401,6 +431,5 @@ def read_hdf5_to_model_object(path: str) -> Model: return instance - else: raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), path) diff --git a/smash/tools/hdf5_handler.py b/smash/tools/hdf5_handler.py index 122daefc..ff0f856c 100644 --- a/smash/tools/hdf5_handler.py +++ b/smash/tools/hdf5_handler.py @@ -6,6 +6,7 @@ from smash.tools import object_handler + def open_hdf5(path, read_only=False, replace=False): """ Open or create an HDF5 file. @@ -31,39 +32,27 @@ def open_hdf5(path, read_only=False, replace=False): >>> hdf5.attrs.keys() """ if not path.endswith(".hdf5"): - path = path + ".hdf5" - + if read_only: - if os.path.isfile(path): - - f= h5py.File(path, "r") - + f = h5py.File(path, "r") + else: - - raise ValueError( - f"File {path} does not exist." - ) - + raise ValueError(f"File {path} does not exist.") + else: - if replace: - - f= h5py.File(path, "w") - + f = h5py.File(path, "w") + else: - if os.path.isfile(path): - - f= h5py.File(path, "a") - + f = h5py.File(path, "a") + else: - - f= h5py.File(path, "w") - - return f + f = h5py.File(path, "w") + return f def add_hdf5_sub_group(hdf5, subgroup=None): @@ -90,18 +79,15 @@ def add_hdf5_sub_group(hdf5, subgroup=None): >>> hdf5.attrs.keys() """ if subgroup is not None: - - if subgroup=="": - - subgroup="./" - + if subgroup == "": + subgroup = "./" + hdf5.require_group(subgroup) - - return hdf5 + return hdf5 -def _dump_object_to_hdf5_from_list_attribute(hdf5,instance,list_attr): +def _dump_object_to_hdf5_from_list_attribute(hdf5, instance, list_attr): """ dump a object to a hdf5 file from a list of attributes @@ -114,37 +100,27 @@ def _dump_object_to_hdf5_from_list_attribute(hdf5,instance,list_attr): list_attr : list a list of attribute """ - if isinstance(list_attr,list): - + if isinstance(list_attr, list): for attr in list_attr: - if isinstance(attr, str): - _dump_object_to_hdf5_from_str_attribute(hdf5, instance, attr) - - elif isinstance(attr,list): - + + elif isinstance(attr, list): _dump_object_to_hdf5_from_list_attribute(hdf5, instance, attr) - - elif isinstance(attr,dict): - + + elif isinstance(attr, dict): _dump_object_to_hdf5_from_dict_attribute(hdf5, instance, attr) - + else: - raise ValueError( f"inconsistent {attr} in {list_attr}. {attr} must be a an instance of dict, list or str" ) - - else: - - raise ValueError( - f"{list_attr} must be a instance of list." - ) + else: + raise ValueError(f"{list_attr} must be a instance of list.") -def _dump_object_to_hdf5_from_dict_attribute(hdf5,instance,dict_attr): +def _dump_object_to_hdf5_from_dict_attribute(hdf5, instance, dict_attr): """ dump a object to a hdf5 file from a dictionary of attributes @@ -157,47 +133,39 @@ def _dump_object_to_hdf5_from_dict_attribute(hdf5,instance,dict_attr): dict_attr : dict a dictionary of attribute """ - if isinstance(dict_attr,dict): - + if isinstance(dict_attr, dict): for attr, value in dict_attr.items(): - - hdf5=add_hdf5_sub_group(hdf5, subgroup=attr) - + hdf5 = add_hdf5_sub_group(hdf5, subgroup=attr) + try: - - sub_instance=getattr(instance, attr) - + sub_instance = getattr(instance, attr) + except: - - sub_instance=instance - - if isinstance(value,dict): - - _dump_object_to_hdf5_from_dict_attribute(hdf5[attr], sub_instance, value) - - if isinstance(value,list): - - _dump_object_to_hdf5_from_list_attribute(hdf5[attr], sub_instance, value) - - elif isinstance(value,str): - + sub_instance = instance + + if isinstance(value, dict): + _dump_object_to_hdf5_from_dict_attribute( + hdf5[attr], sub_instance, value + ) + + if isinstance(value, list): + _dump_object_to_hdf5_from_list_attribute( + hdf5[attr], sub_instance, value + ) + + elif isinstance(value, str): _dump_object_to_hdf5_from_str_attribute(hdf5[attr], sub_instance, value) - - else : - + + else: raise ValueError( f"inconsistent '{attr}' in '{dict_attr}'. Dict({attr}) must be a instance of dict, list or str" ) - - else: - - raise ValueError( - f"{dict_attr} must be a instance of dict." - ) + else: + raise ValueError(f"{dict_attr} must be a instance of dict.") -def _dump_object_to_hdf5_from_str_attribute(hdf5,instance,str_attr): +def _dump_object_to_hdf5_from_str_attribute(hdf5, instance, str_attr): """ dump a object to a hdf5 file from a string attribute @@ -211,19 +179,16 @@ def _dump_object_to_hdf5_from_str_attribute(hdf5,instance,str_attr): a string attribute """ if isinstance(str_attr, str): - try: - value = getattr(instance, str_attr) - - if isinstance(value, (np.ndarray,list)): - - if isinstance(value,list): - value=np.array(value) - + + if isinstance(value, (np.ndarray, list)): + if isinstance(value, list): + value = np.array(value) + if value.dtype == "object" or value.dtype.char == "U": value = value.astype("S") - + hdf5.create_dataset( str_attr, shape=value.shape, @@ -232,27 +197,20 @@ def _dump_object_to_hdf5_from_str_attribute(hdf5,instance,str_attr): compression="gzip", chunks=True, ) - + elif value is None: - - hdf5.attrs[str_attr] = "_None_" - + hdf5.attrs[str_attr] = "_None_" + else: - hdf5.attrs[str_attr] = value - + except: - raise ValueError( f"Unable to dump attribute {str_attr} with value {value} from {instance}" ) - - else: - - raise ValueError( - f"{str_attr} must be a instance of str." - ) + else: + raise ValueError(f"{str_attr} must be a instance of str.") def _dump_object_to_hdf5_from_iteratable(hdf5, instance, iteratable=None): @@ -267,13 +225,13 @@ def _dump_object_to_hdf5_from_iteratable(hdf5, instance, iteratable=None): a custom python object. iteratable : list | dict a list or a dict of attribute - + Examples -------- setup, mesh = smash.load_dataset("cance") model = smash.Model(setup, mesh) model.run(inplace=True) - + hdf5=smash.tools.hdf5_handler.open_hdf5("./model.hdf5", replace=True) hdf5=smash.tools.hdf5_handler.add_hdf5_sub_group(hdf5, subgroup="model1") keys_data=smash.io.hdf5_io.generate_smash_object_structure(model,typeofstructure="medium") @@ -284,23 +242,17 @@ def _dump_object_to_hdf5_from_iteratable(hdf5, instance, iteratable=None): keys_data=smash.io.hdf5_io.generate_smash_object_structure(model,typeofstructure="light") smash.tools.hdf5_handler._dump_object_to_hdf5_from_iteratable(hdf5["model2"], model, keys_data) """ - if isinstance(iteratable,list): - - _dump_object_to_hdf5_from_list_attribute(hdf5,instance,iteratable) - - elif isinstance(iteratable,dict): - - _dump_object_to_hdf5_from_dict_attribute(hdf5,instance,iteratable) - - else : - - raise ValueError( - f"{iteratable} must be a instance of list or dict." - ) + if isinstance(iteratable, list): + _dump_object_to_hdf5_from_list_attribute(hdf5, instance, iteratable) + + elif isinstance(iteratable, dict): + _dump_object_to_hdf5_from_dict_attribute(hdf5, instance, iteratable) + else: + raise ValueError(f"{iteratable} must be a instance of list or dict.") -def _dump_dict_to_hdf5(hdf5,dictionary): +def _dump_dict_to_hdf5(hdf5, dictionary): """ dump a dictionary to an hdf5 file @@ -311,29 +263,24 @@ def _dump_dict_to_hdf5(hdf5,dictionary): dictionary : dict a custom python dictionary """ - if isinstance(dictionary,dict): - + if isinstance(dictionary, dict): for attr, value in dictionary.items(): - try: - - if isinstance(value,(dict)): - - hdf5=add_hdf5_sub_group(hdf5, subgroup=attr) - _dump_dict_to_hdf5(hdf5[attr],value) - - elif isinstance(value, (np.ndarray,list)): - - if isinstance(value,(list)): - value=np.array(value) - + if isinstance(value, (dict)): + hdf5 = add_hdf5_sub_group(hdf5, subgroup=attr) + _dump_dict_to_hdf5(hdf5[attr], value) + + elif isinstance(value, (np.ndarray, list)): + if isinstance(value, (list)): + value = np.array(value) + if value.dtype == "object" or value.dtype.char == "U": value = value.astype("S") - - #remove dataset if exist + + # remove dataset if exist if attr in hdf5.keys(): del hdf5[attr] - + hdf5.create_dataset( attr, shape=value.shape, @@ -342,30 +289,21 @@ def _dump_dict_to_hdf5(hdf5,dictionary): compression="gzip", chunks=True, ) - + elif value is None: - hdf5.attrs[attr] = "_None_" - + else: - hdf5.attrs[attr] = value - + except: - - raise ValueError( - f"Unable to save attribute {attr} with value {value}" - ) - - else: - - raise ValueError( - f"{dictionary} must be a instance of dict." - ) + raise ValueError(f"Unable to save attribute {attr} with value {value}") + else: + raise ValueError(f"{dictionary} must be a instance of dict.") -def save_dict_to_hdf5(path_to_hdf5,dictionary=None,location="./",replace=False): +def save_dict_to_hdf5(path_to_hdf5, dictionary=None, location="./", replace=False): """ dump a dictionary to an hdf5 file @@ -379,30 +317,27 @@ def save_dict_to_hdf5(path_to_hdf5,dictionary=None,location="./",replace=False): path location or subgroup where to write data in the hdf5 file replace : Boolean replace an existing hdf5 file. Default is False - + Examples -------- setup, mesh = smash.load_dataset("cance") model = smash.Model(setup, mesh) model.run(inplace=True) - + smash.tools.hdf5_handler.save_dict_to_hdf5("saved_dictionary.hdf5",mesh) """ - if isinstance(dictionary,dict): - - hdf5=open_hdf5(path_to_hdf5, replace=replace) - hdf5=add_hdf5_sub_group(hdf5, subgroup=location) + if isinstance(dictionary, dict): + hdf5 = open_hdf5(path_to_hdf5, replace=replace) + hdf5 = add_hdf5_sub_group(hdf5, subgroup=location) _dump_dict_to_hdf5(hdf5[location], dictionary) - - else: - - raise ValueError( - f"The input {dictionary} must be a instance of dict." - ) + else: + raise ValueError(f"The input {dictionary} must be a instance of dict.") -def save_object_to_hdf5(f_hdf5, instance, keys_data=None, location="./", sub_data=None, replace=False): +def save_object_to_hdf5( + f_hdf5, instance, keys_data=None, location="./", sub_data=None, replace=False +): """ dump an object to an hdf5 file @@ -421,20 +356,18 @@ def save_object_to_hdf5(f_hdf5, instance, keys_data=None, location="./", sub_dat replace : Boolean replace an existing hdf5 file. Default is False """ - + if keys_data is None: - keys_data=smash.tools.object_handler.generate_object_structure(instance) - - hdf5=open_hdf5(f_hdf5, replace=replace) - hdf5=add_hdf5_sub_group(hdf5, subgroup=location) + keys_data = smash.tools.object_handler.generate_object_structure(instance) + + hdf5 = open_hdf5(f_hdf5, replace=replace) + hdf5 = add_hdf5_sub_group(hdf5, subgroup=location) _dump_object_to_hdf5_from_iteratable(hdf5[location], instance, keys_data) - - if isinstance(sub_data,dict): - + + if isinstance(sub_data, dict): _dump_dict_to_hdf5(hdf5[location], sub_data) - - hdf5.close() + hdf5.close() def read_hdf5_as_dict(hdf5): @@ -445,11 +378,11 @@ def read_hdf5_as_dict(hdf5): ---------- hdf5 : str path to the hdf5 file - + Return -------- dictionary : dict, a dictionary of all keys and attribute included in the hdf5 file - + Examples -------- #read only a part of an hdf5 file @@ -457,58 +390,47 @@ def read_hdf5_as_dict(hdf5): dictionary=smash.tools.hdf5_handler.read_hdf5_as_dict(hdf5["model1"]) dictionary.keys() """ - dictionary={} - - for key,item in hdf5.items(): - + dictionary = {} + + for key, item in hdf5.items(): if str(type(item)).find("group") != -1: - - dictionary.update({key:read_hdf5_as_dict(item)}) - - list_attr=list(item.attrs.keys()) - + dictionary.update({key: read_hdf5_as_dict(item)}) + + list_attr = list(item.attrs.keys()) + for key_attr in list_attr: - # check if value is equal to "_None_" (None string because hdf5 does not supported) if item.attrs[key_attr] == "_None_": - - dictionary[key].update({key_attr:None}) - + dictionary[key].update({key_attr: None}) + else: - - dictionary[key].update({key_attr:item.attrs[key_attr]}) - + dictionary[key].update({key_attr: item.attrs[key_attr]}) + if str(type(item)).find("dataset") != -1: - if item[:].dtype.char == "S": - - values=item[:].astype("U") - + values = item[:].astype("U") + else: - - values=item[:] - - dictionary.update({key:values}) - - list_attr=list(item.attrs.keys()) - + values = item[:] + + dictionary.update({key: values}) + + list_attr = list(item.attrs.keys()) + for key_attr in list_attr: - # check if value is equal to "_None_" (None string because hdf5 does not supported) if item.attrs[key_attr] == "_None_": - dictionary[key].update({key_attr:None}) + dictionary[key].update({key_attr: None}) else: - dictionary.update({key_attr:item.attrs[key_attr]}) - - list_attr=list(hdf5.attrs.keys()) - + dictionary.update({key_attr: item.attrs[key_attr]}) + + list_attr = list(hdf5.attrs.keys()) + for key_attr in list_attr: - # check if value is equal to "_None_" (None string because hdf5 does not supported) if hdf5.attrs[key_attr] == "_None_": - dictionary.update({key_attr:None}) + dictionary.update({key_attr: None}) else: - dictionary.update({key_attr:hdf5.attrs[key_attr]}) - - return dictionary + dictionary.update({key_attr: hdf5.attrs[key_attr]}) + return dictionary diff --git a/smash/tools/object_handler.py b/smash/tools/object_handler.py index 7e23d1ca..e3d885b5 100644 --- a/smash/tools/object_handler.py +++ b/smash/tools/object_handler.py @@ -12,66 +12,56 @@ def generate_object_structure(instance): ---------- instance : object a custom python object. - + Returns ------- list or dict : A list or dictionary matching the structure of the python object. """ - key_data={} - key_list=list() - return_list=False - + key_data = {} + key_list = list() + return_list = False + for attr in dir(instance): - if not attr.startswith("_") and not attr in ["from_handle", "copy"]: - try: - value = getattr(instance, attr) - - if isinstance(value, (np.ndarray,list)): - - if isinstance(value,list): - value=np.array(value) - + + if isinstance(value, (np.ndarray, list)): + if isinstance(value, list): + value = np.array(value) + if value.dtype == "object" or value.dtype.char == "U": value = value.astype("S") - - #key_data.update({attr:value}) + + # key_data.update({attr:value}) key_list.append(attr) - return_list=True - - elif isinstance(value,(str,float,int)): - - #key_data.update({attr:value}) + return_list = True + + elif isinstance(value, (str, float, int)): + # key_data.update({attr:value}) key_list.append(attr) - return_list=True - - else: - - depp_key_data=generate_object_structure(value) - - if (len(depp_key_data)>0): - key_data.update({attr:depp_key_data}) - + return_list = True + + else: + depp_key_data = generate_object_structure(value) + + if len(depp_key_data) > 0: + key_data.update({attr: depp_key_data}) + except: - pass - + if return_list: - for attr, value in key_data.items(): - key_list.append({attr:value}) - + key_list.append({attr: value}) + return key_list - + else: - return key_data - def read_object_as_dict(instance): """ create a dictionary from a custom python object @@ -80,48 +70,40 @@ def read_object_as_dict(instance): ---------- instance : object an custom python object - + Return ---------- key_data: dict an dictionary containing all keys and atributes of the object """ - key_data={} - key_list=list() - return_list=False - + key_data = {} + key_list = list() + return_list = False + for attr in dir(instance): - if not attr.startswith("_") and not attr in ["from_handle", "copy"]: - try: - value = getattr(instance, attr) - - if isinstance(value, (np.ndarray,list)): - - if isinstance(value,list): - value=np.array(value) - + + if isinstance(value, (np.ndarray, list)): + if isinstance(value, list): + value = np.array(value) + if value.dtype == "object" or value.dtype.char == "U": value = value.astype("S") - - key_data.update({attr:value}) - - elif isinstance(value,(str,float,int)): - - key_data.update({attr:value}) - - else: - - depp_key_data=read_object_as_dict(value) - - if (len(depp_key_data)>0): - key_data.update({attr:depp_key_data}) - + + key_data.update({attr: value}) + + elif isinstance(value, (str, float, int)): + key_data.update({attr: value}) + + else: + depp_key_data = read_object_as_dict(value) + + if len(depp_key_data) > 0: + key_data.update({attr: depp_key_data}) + except: - pass - - return key_data + return key_data From c8bf7f23df96ca7264c240ec3dd4ec71d2deafe8 Mon Sep 17 00:00:00 2001 From: "ngo-nghi-truyen.huynh" Date: Wed, 5 Jul 2023 19:18:12 +0200 Subject: [PATCH 63/73] FIX pipeline: re-generate baseline regarding to non-updated hdf5 file when remotely merging branch --- smash/tests/baseline.hdf5 | Bin 2010235 -> 2010235 bytes smash/tests/diff_baseline.csv | 12 +++++------- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/smash/tests/baseline.hdf5 b/smash/tests/baseline.hdf5 index 45ba8322b6e9ef487f1401bdcf9a21d0951ed5b5..79d2083f5855a737ab27a10e97b9a341102b06e2 100644 GIT binary patch delta 1156 zcmXBRdsI?)0Kjpsg*q+FNXX38V?MG1oii#7t+wNw%e{m&6K!c!zL*wKpw!HH^4ZeK z%zVsjx}%_oLUltt&a~u6D^s}=nr+L&e2}`Xvn#OQ&i?rP@%{UY=`LZqOYJ!>qWG<- zPWHT7Z;UIss&eHt8H$vexyF?`Bq%VH8{2WBHdsHDLU`-g=iar<9UoBa*H=jw%t^F9T@{g~Oy^UO@%tB0-(rFn6eM8hK}ZrNAC` z5dEL@Y?zWglg-<#$*3oOK^Xns)zY9n994SKkXMfXMJy+hao7}BwB4+n^JWVv^f9t?w!PrFo zu^|0hh8vn2l%j-F7tc~>?%n31$-Ad=D8}LCx|I)7skoaEjm&4aTs)U$$`8JVMQi*_ zEmt~H7x_HR#}4z$icbt=Mb_lRWyA-|9hvSyq9}%r9M3F)sj{RBD{Wt$#Mn*Alj>68 zUcq4-dCeHsB@jvR)AMU^mB(rK!4RkAaoQ8M20dBaE;&$W=L_$2eP1w`I~w1sb1JUXuXH|;ha|om+k$Ie z3~=JJqdE!l8ezzbLT}dd0OvtUZcGY&X_W*cwxKszdJ74uYBsm?dv*{wWIvbj{Z^tZ z>>uC0zseqMgN>zLEH^bXwpSOC8K6tqy_)7X-SFRh}4iP#$=eudO@~wy9de19!GDU$UgQ&`_sNIP4{bQ+pk)f z!sQhRip)ignFNa0C;`XF3$W$0_42wADaXWuhc?E~q2b7vkv2TF`?Ssj>3kf`>9}6g zyzETRe{g^+LJzSs;Hz{vt?{k`33)WFt0~!kdhEhQk6XJ}s)nPPO&Zp|3u1R%Ex~t~ z&n{y4CR?`~TT9xE>#9%t3!XX=v2c7awvRm{q5YAlq(58eTjbT1_!Os8`%={!n!yt7 zo%cgFMyIzB<*lrtoU;xm3+{c^DHwj8H=Eaf{#^dGo zN;7|Lcd?D>Oo0lI2W$a5z#e!HZ~z>Eb-;Rn05|~~0B2w$Km;}cE&vH21DgR?U<=>| OYz5qvD*U$YpZ^7rTSV#r delta 1149 zcmWmAeKga100!{=l6OWJilwg8rHfo#bXzFy?c6w|?ma{`t-NH-rd+?x%d8I7alPg2 z=B0T{)DgqX-LKtQ2%%{@F>RK}OHS=2i@2uO&dBipR^?PF%-zW+$V{4R}xJlbFSz9aOR0K%6rKs1Mm1hT(p~MMj!9f}4E>)sX!R!03al>tGe3_c$ zAZ}e)nIl8agR9y3@6s=8=L%Yoqq3YaI>}zIOvSpxAKSS2k$R@2uYjs=fW5}0I3!LI zYY5!Y2n@m})F)E=7bWU_#@+1m%AurVyf)UKYgnWA2!!g|<*`M>v%Y`ayDCyyXHi}| zPT|a-M+9`ne`0;e)N*IhpLBdpt?H4aZIz)`95hL)o$gGaM6xSXnYHFw{)!HHf}wD- zxkKSvrSt#gQf=7AiY_Ve#iS`J^5M^wH2m)h9G|+-+fN_NW3DwFmEmX-*fSp^oo1K+ zTcUXXL|7~eTOF1x^vHUB$nro-zK|+z(^Up--94GaFt)VS0y}u{{}ruYsF$6h$Fd_VW$E#zZUuy(i?so z2~@YUbfiJOi95>`%GHk|bqsO=iwYQecuhlZWHG;l~NR6PxNojuq>O8dUyBJ;0WNt41a^~E{!J4sH_cL`~+qvE|Ct)jrsH`;RFVC@cS-?!Ro?T$}7 zo(*onqPBYWkJIpS)hLmgu8qQ-yUvCM_mnoykf2DPtJ-Rn#`|mPZ%;M7bcVb5Su+gM zi>r7+9!@}Nc+8X0KaX&hLl0fDqtTtJbYJYts~<&_v#+WT;86da+n+|m<2wH6*&JU< z^DFZ6Nhv-eV;u@tQo4WQFxrse%-MVeypQDRzI99%^HhrU$xu<(AIPVZ5o50hKuYzWwfkhAsPEJI63m%e_75u&4Qe^Q8q zrf%mN-6Es)0RzAg_|}a9utb{zW`H?>1}p$eU -Date: Thu Jun 8 16:59:50 2023 +0200 +commit 99249b2069dd4c5ecebe2fbd3b7760004d782153 +Author: Jay-Allemand Maxime +Date: Wed Jul 5 13:33:08 2023 +0200 - FIX PR: Regenerated baseline - - - Two tests have been modified as a result of changes in optimization - regularizations + Reformat files + remove io_test.hdf5 TEST NAME |STATUS ann_optimize_1.cost |NON MODIFIED From 2fe8999ab84a0def171879a8685b47bacf1a0aa0 Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Thu, 6 Jul 2023 11:19:16 +0200 Subject: [PATCH 64/73] =?UTF-8?q?Revert=20change=20Error=20->=20warning=20?= =?UTF-8?q?->=20Error=20lorsque=20aucune=20gauge=20n'a=20de=20d=C3=A9bits?= =?UTF-8?q?=20observ=C3=A9s=20!?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- smash/core/simulation/_standardize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smash/core/simulation/_standardize.py b/smash/core/simulation/_standardize.py index 25f67023..d16b9318 100644 --- a/smash/core/simulation/_standardize.py +++ b/smash/core/simulation/_standardize.py @@ -342,7 +342,7 @@ def _standardize_gauge( raise ValueError(f"Unknown gauge code '{name}'. Choices: {mesh.code}") if gauge_check.size == 0: - warnings.warn( + raise ValueError( f"No available observed discharge for optimization at gauge(s) {gauge}" ) From 2f72ef8eb135979478cb09f5b98e98e12fc67e3c Mon Sep 17 00:00:00 2001 From: inoelloc Date: Sun, 9 Jul 2023 18:31:04 +0200 Subject: [PATCH 65/73] ENC/FIX: Add highlights and contributors to release note Fix missing file in api_reference for documentation --- doc/source/api_reference/hdf5_handler.rst | 5 +++++ doc/source/api_reference/hdf5_io.rst | 5 +++++ doc/source/api_reference/object_handler.rst | 5 +++++ doc/source/release/0.5.0-notes.rst | 13 +++++++++---- 4 files changed, 24 insertions(+), 4 deletions(-) create mode 100644 doc/source/api_reference/hdf5_handler.rst create mode 100644 doc/source/api_reference/hdf5_io.rst create mode 100644 doc/source/api_reference/object_handler.rst diff --git a/doc/source/api_reference/hdf5_handler.rst b/doc/source/api_reference/hdf5_handler.rst new file mode 100644 index 00000000..b35ce512 --- /dev/null +++ b/doc/source/api_reference/hdf5_handler.rst @@ -0,0 +1,5 @@ +.. _api_reference.hdf5_handler: + +============ +Hdf5 handler +============ diff --git a/doc/source/api_reference/hdf5_io.rst b/doc/source/api_reference/hdf5_io.rst new file mode 100644 index 00000000..2a83f385 --- /dev/null +++ b/doc/source/api_reference/hdf5_io.rst @@ -0,0 +1,5 @@ +.. _api_reference.hdf5_io: + +======= +Hdf5 io +======= diff --git a/doc/source/api_reference/object_handler.rst b/doc/source/api_reference/object_handler.rst new file mode 100644 index 00000000..6b8b8456 --- /dev/null +++ b/doc/source/api_reference/object_handler.rst @@ -0,0 +1,5 @@ +.. _api_reference.object_handler: + +============== +Object handler +============== diff --git a/doc/source/release/0.5.0-notes.rst b/doc/source/release/0.5.0-notes.rst index 025d3fee..2f613df2 100644 --- a/doc/source/release/0.5.0-notes.rst +++ b/doc/source/release/0.5.0-notes.rst @@ -8,15 +8,20 @@ smash 0.5.0 Release Notes The smash 0.5.0 release continues the ongoing work to improve the handling, fix possible bugs, clarify the documentation. The highlights are: +- Reading atmospheric data with YYYY/MM/DD access +- New regularization function +- Spatial disaggregation/aggregation of the input raster +- Fix bugs in mesh creation, signature calculation and regularization l-curve + ------------ Contributors ------------ This release was made possible thanks to the contributions of: ---------------- -Compatibilities ---------------- +- Maxime Jay-Allemand +- Ngo Nghi Truyen Huynh +- François Colleoni ------------ Deprecations @@ -42,7 +47,7 @@ New Features ------------ New regularization function -**************************** +*************************** hard-smoothing : the smoothing regularization function is applied on parameters or states directly. This behavior differs from the ``smoothing`` mode where the regularization is applied on the difference between the background and the control (parameters or states) From 4d1e0b99b1124bddd4d6ef3217c82ad6551859f6 Mon Sep 17 00:00:00 2001 From: "ngo-nghi-truyen.huynh" Date: Sun, 9 Jul 2023 20:00:51 +0200 Subject: [PATCH 66/73] FIX: max_duration event segmentation algorithm --- .../signal_analysis/hydrograph_segmentation.rst | 5 ++--- doc/source/release/0.5.0-notes.rst | 5 +++++ smash/core/_event_segmentation.py | 5 +++-- smash/core/model.py | 2 +- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/doc/source/math_num_documentation/signal_analysis/hydrograph_segmentation.rst b/doc/source/math_num_documentation/signal_analysis/hydrograph_segmentation.rst index 6eca100b..8cb7f076 100644 --- a/doc/source/math_num_documentation/signal_analysis/hydrograph_segmentation.rst +++ b/doc/source/math_num_documentation/signal_analysis/hydrograph_segmentation.rst @@ -59,6 +59,5 @@ For :math:`t_{j}\in E`: .. note:: If there exists :math:`m+1` :math:`(m>0)` consecutive events :math:`(sd_{u},ed_{u}),...,(sd_{u+m},ed_{u+m})` - occurring "nearly simultaneously", that means all of these events - occur in no more than ``max_duration`` hours: :math:`ed_{u+m}`__. +Segmentation algorithm +********************** + +If multiple events are detected, the duration of the merged event is no longer constrained by the max duration parameter. Instead, its duration may exceed this value. + ``smash.generate_mesh`` segmentation fault ****************************************** diff --git a/smash/core/_event_segmentation.py b/smash/core/_event_segmentation.py index 33d0a6e9..fc6418f8 100644 --- a/smash/core/_event_segmentation.py +++ b/smash/core/_event_segmentation.py @@ -267,9 +267,10 @@ def _events_grad( prev_peakq = list_events[-1]["peakQ"] prev_peakp = list_events[-1]["peakP"] - # % merge two events respecting to max duration: - if max(end, prev_end) <= prev_start + max_duration: + # % detect double events: + if prev_end >= start: list_events[-1]["end"] = max(end, prev_end) + list_events[-1]["start"] = min(start, prev_start) if q[i_peak] > q[prev_peakq]: list_events[-1]["peakQ"] = i_peak diff --git a/smash/core/model.py b/smash/core/model.py index 70b61a67..335154a6 100644 --- a/smash/core/model.py +++ b/smash/core/model.py @@ -1441,7 +1441,7 @@ def event_segmentation(self, peak_quant: float = 0.995, max_duration: float = 24 Events will be selected if their discharge peaks exceed the **peak_quant**-quantile of the observed discharge timeseries. max_duration: float, default 240 - The expected maximum duration of an event (in hour). + The expected maximum duration of an event (in hours). If multiple events are detected, their duration may exceed this value. Returns ------- From 9c760201ff0590f0e8ef38314b16b0b0932366f1 Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Wed, 12 Jul 2023 10:04:32 +0200 Subject: [PATCH 67/73] Fix : When the rainfall data is spatially desagregated, a temporary new raster geotiff is generated. We guess the no data value using the input raster. However, if nodata value was set toi None type, the setter "_setNoDataValue" crash. Indeed arguments must be a double type only. Thus We know check if the guessed nodata value is an instance of float, else we set nodata value to -99.0 --- smash/tools/raster_handler.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/smash/tools/raster_handler.py b/smash/tools/raster_handler.py index a230522a..c882b2e5 100644 --- a/smash/tools/raster_handler.py +++ b/smash/tools/raster_handler.py @@ -146,10 +146,14 @@ def gdal_reproject_raster(dataset, xres, yres): # Workaround for gdal bug which initialise array to 0 instead as the No_Data value # Here we initialise the band manually with the nodata_value nodata = dataset.GetRasterBand(1).GetNoDataValue() + if not isinstance(nodata,float): + nodata=-99. + band = virtual_destination.GetRasterBand( 1 ) # Notice that band is a pointer to virtual_destination - band.SetNoDataValue(nodata) + band.SetNoDataValue(nodata) #nodata argument of type 'double' + nodataarray = np.ndarray(shape=(new_y_size, new_x_size)) nodataarray.fill(nodata) band.WriteArray(nodataarray) From d6cd7e2261971d3549b555661805fa6abcb994fe Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Wed, 12 Jul 2023 11:04:30 +0200 Subject: [PATCH 68/73] Fix: crash when start_time = end_time. When this case happens, a clear error message is sent to stdout --- smash/core/_build_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/smash/core/_build_model.py b/smash/core/_build_model.py index f659bb5a..21200551 100644 --- a/smash/core/_build_model.py +++ b/smash/core/_build_model.py @@ -83,9 +83,9 @@ def _standardize_setup(setup: SetupDT): except: raise ValueError("argument end_time is not a valid date") - if (et - st).total_seconds() < 0: + if (et - st).total_seconds() <= 0: raise ValueError( - "argument end_time corresponds to an earlier date than start_time" + "argument end_time is date earlier or equal to argument start_time" ) if setup.read_qobs and setup.qobs_directory == "...": From 9404a19cb7035ef3e46a6505e6debd5710029fdd Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Wed, 12 Jul 2023 11:16:39 +0200 Subject: [PATCH 69/73] balck files and update release note --- doc/source/release/0.5.0-notes.rst | 8 ++++++++ smash/tools/raster_handler.py | 18 +++++++++--------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/doc/source/release/0.5.0-notes.rst b/doc/source/release/0.5.0-notes.rst index 37a30cce..1b0e42ba 100644 --- a/doc/source/release/0.5.0-notes.rst +++ b/doc/source/release/0.5.0-notes.rst @@ -99,3 +99,11 @@ An error occured when two neighboring cells have antagonistic flow directions `` segmentation faults when the maximum number of recursions has been reached, a check is added to the code to exit recursion in that case. See issue `#31 `__. + +Correctly handle Nodata value during the spatial desagregation of the rainfall +****************************************************************************** + +A crash occured during the desagregation of th rainfall. The creation of a GDAL virtual-destination failed when the parent geotiff file has its Nodata value unset (None type). When this is the case, the Nodata value of the desagregated rainfall is automatically set to -99. + +see issue `#36 https://github.com/DassHydro-dev/smash/pull/36/commits/9c760201ff0590f0e8ef38314b16b0b0932366f1>`__. + diff --git a/smash/tools/raster_handler.py b/smash/tools/raster_handler.py index c882b2e5..a1814187 100644 --- a/smash/tools/raster_handler.py +++ b/smash/tools/raster_handler.py @@ -31,7 +31,7 @@ def gdal_raster_open(filename): -------- dataset = gdal_raster_open("filename") """ - + if os.path.isfile(filename): dataset = gdal.Open(filename) else: @@ -146,13 +146,13 @@ def gdal_reproject_raster(dataset, xres, yres): # Workaround for gdal bug which initialise array to 0 instead as the No_Data value # Here we initialise the band manually with the nodata_value nodata = dataset.GetRasterBand(1).GetNoDataValue() - if not isinstance(nodata,float): - nodata=-99. + if not isinstance(nodata, float): + nodata = -99.0 band = virtual_destination.GetRasterBand( 1 ) # Notice that band is a pointer to virtual_destination - band.SetNoDataValue(nodata) #nodata argument of type 'double' + band.SetNoDataValue(nodata) # nodata argument of type 'double' nodataarray = np.ndarray(shape=(new_y_size, new_x_size)) nodataarray.fill(nodata) @@ -395,7 +395,7 @@ def union_bbox(bbox1, bbox2): ---------- bbox1: dict containin the first bbox informations bbox2 : dict containin the second bbox informations - + returns ------- dic containing the bbox union @@ -423,7 +423,7 @@ def get_bbox(dataset): Parameters ---------- dataset: gdal object - + returns ------- dic containing the bbox of the dataset @@ -451,7 +451,7 @@ def get_bbox_from_window(dataset, window): ---------- dataset: gdal object window : dict with ncol, nrow, col offset and row offset - + returns ------- dic containing the computed bbox @@ -482,7 +482,7 @@ def get_window_from_bbox(dataset, bbox): ---------- dataset: gdal object bbox : dict containing the bbox - + returns ------- dic containing the computed windows @@ -525,7 +525,7 @@ def crop_array(array, window): ---------- array: numpy array window : dict containg the window to crop - + returns ------- crop_array: the cropped numpy array, shape of the defined window From 5eb91829be1299c49f192263b5e32c8424fe8b69 Mon Sep 17 00:00:00 2001 From: Jay-Allemand Maxime Date: Wed, 12 Jul 2023 11:28:05 +0200 Subject: [PATCH 70/73] update release note --- doc/source/release/0.5.0-notes.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/source/release/0.5.0-notes.rst b/doc/source/release/0.5.0-notes.rst index 37a30cce..92a53456 100644 --- a/doc/source/release/0.5.0-notes.rst +++ b/doc/source/release/0.5.0-notes.rst @@ -99,3 +99,9 @@ An error occured when two neighboring cells have antagonistic flow directions `` segmentation faults when the maximum number of recursions has been reached, a check is added to the code to exit recursion in that case. See issue `#31 `__. + +Stop the execution of smash when start_time equal to end_time + +In that circonstance, the code crashed during the data reading with no obvious reason. Now just stop the code execution and return an error when this case occurs. + +See commit ``__. From b5d32058cd9c63b6f1b46a1566e134f3a0e0e732 Mon Sep 17 00:00:00 2001 From: Francois Colleoni Date: Wed, 12 Jul 2023 11:43:42 +0200 Subject: [PATCH 71/73] FIX: resolve issue on flow distances computation This commit fix an error when flow distances are computed on adjacent non-nested catchments --- doc/source/release/0.5.0-notes.rst | 11 +++++++++-- smash/mesh/mw_meshing.f90 | 14 +++++++------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/doc/source/release/0.5.0-notes.rst b/doc/source/release/0.5.0-notes.rst index 37a30cce..1995a957 100644 --- a/doc/source/release/0.5.0-notes.rst +++ b/doc/source/release/0.5.0-notes.rst @@ -92,10 +92,17 @@ Segmentation algorithm If multiple events are detected, the duration of the merged event is no longer constrained by the max duration parameter. Instead, its duration may exceed this value. -``smash.generate_mesh`` segmentation fault -****************************************** +Catchment delineation segmentation fault +**************************************** An error occured when two neighboring cells have antagonistic flow directions ``(1, 5)``, ``(2, 6)``, ``(3, 7)``, ``(4, 8)``. This should be corrected directly in the flow direction file but to avoid segmentation faults when the maximum number of recursions has been reached, a check is added to the code to exit recursion in that case. See issue `#31 `__. + +Catchment flow distances on adjacent non-nested catchments +********************************************************** + +There is a bug when calculating flow distances when two adjacent catchments are considered in the mesh but non-nested. During calculation, a flag is set around the 8 adjacent cells of each upstream cell and not on the upstream cell in particular. As a result, a gauge stuck to a cell of another catchment will not be considered as a non-nested gauge and will be filled with -99. The bug has been solved by flagging only the upstream cell and not the 8 adjacent cells. + +See issue `#38 `__. diff --git a/smash/mesh/mw_meshing.f90 b/smash/mesh/mw_meshing.f90 index d566d2c6..5f9b82aa 100644 --- a/smash/mesh/mw_meshing.f90 +++ b/smash/mesh/mw_meshing.f90 @@ -345,18 +345,18 @@ recursive subroutine distance_upstream_cells(nrow, ncol, ng, & row_imd = row + drow(i) col_imd = col + dcol(i) - do j = 1, ng - - if (row_imd .eq. row_dln(j) .and. & - & col_imd .eq. col_dln(j)) flag(j) = 1 - - end do - if (row_imd .gt. 0 .and. row_imd .le. nrow .and. & & col_imd .gt. 0 .and. col_imd .le. ncol) then if (flwdir(row_imd, col_imd) .eq. i) then + do j = 1, ng + + if (row_imd .eq. row_dln(j) .and. & + & col_imd .eq. col_dln(j)) flag(j) = 1 + + end do + !% Avoid to compute square root if not diagonal if (dcol(i) .eq. 0) then From d3706664002234740ec62c517d3abc12f259fb88 Mon Sep 17 00:00:00 2001 From: Francois Colleoni Date: Wed, 12 Jul 2023 11:58:50 +0200 Subject: [PATCH 72/73] FIX: Quick fix on release note, linking to issue --- doc/source/release/0.5.0-notes.rst | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/doc/source/release/0.5.0-notes.rst b/doc/source/release/0.5.0-notes.rst index 1b0e42ba..92532832 100644 --- a/doc/source/release/0.5.0-notes.rst +++ b/doc/source/release/0.5.0-notes.rst @@ -100,10 +100,9 @@ segmentation faults when the maximum number of recursions has been reached, a ch See issue `#31 `__. -Correctly handle Nodata value during the spatial desagregation of the rainfall -****************************************************************************** +Correctly handle Nodata value during the spatial disaggregation of the rainfall +******************************************************************************* -A crash occured during the desagregation of th rainfall. The creation of a GDAL virtual-destination failed when the parent geotiff file has its Nodata value unset (None type). When this is the case, the Nodata value of the desagregated rainfall is automatically set to -99. - -see issue `#36 https://github.com/DassHydro-dev/smash/pull/36/commits/9c760201ff0590f0e8ef38314b16b0b0932366f1>`__. +A crash occured during the disaggregation of the rainfall. The creation of a GDAL virtual-destination failed when the parent geotiff file has its Nodata value unset (None type). When this is the case, the Nodata value of the disaggregation rainfall is automatically set to -99. +See issue `#40 `__. From 48dc06be61867488c84b88d8b45c562d51357dcc Mon Sep 17 00:00:00 2001 From: Francois Colleoni Date: Wed, 12 Jul 2023 12:10:03 +0200 Subject: [PATCH 73/73] FIX: Quick fix on error message and release note --- doc/source/release/0.5.0-notes.rst | 7 ++++--- smash/core/_build_model.py | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/doc/source/release/0.5.0-notes.rst b/doc/source/release/0.5.0-notes.rst index 92a53456..968b148a 100644 --- a/doc/source/release/0.5.0-notes.rst +++ b/doc/source/release/0.5.0-notes.rst @@ -100,8 +100,9 @@ segmentation faults when the maximum number of recursions has been reached, a ch See issue `#31 `__. -Stop the execution of smash when start_time equal to end_time +Stop the execution of smash when ``start_time`` is equal to ``end_time`` +************************************************************************ -In that circonstance, the code crashed during the data reading with no obvious reason. Now just stop the code execution and return an error when this case occurs. +When ``start_time`` is equal to ``end_time``, the code crashes during the data reading with no obvious reason. Now just stop the code execution and return an error when this case occurs. -See commit ``__. +See issue `#41 `__. diff --git a/smash/core/_build_model.py b/smash/core/_build_model.py index 21200551..41f8a82c 100644 --- a/smash/core/_build_model.py +++ b/smash/core/_build_model.py @@ -85,7 +85,7 @@ def _standardize_setup(setup: SetupDT): if (et - st).total_seconds() <= 0: raise ValueError( - "argument end_time is date earlier or equal to argument start_time" + "argument end_time is a date earlier to or equal to argument start_time" ) if setup.read_qobs and setup.qobs_directory == "...":