From 7489be15c644b54c5f20501c38e654a47c63c53c Mon Sep 17 00:00:00 2001 From: Patrick Schmitt <50843444+pat-schmitt@users.noreply.github.com> Date: Thu, 25 Apr 2024 12:07:03 +0200 Subject: [PATCH] Bugfix in merge_gridded_data (#1697) * only preserve total when not zero in merge_gridded_data * include smooting in merge_gridded_data * added possibility to merge monthly distributed thicknesses * small adaptations in merge_simulated_thickness --- oggm/core/gis.py | 27 ++++++++++++- oggm/sandbox/distribute_2d.py | 73 ++++++++++++++++++++++++----------- oggm/tests/test_models.py | 34 +++++++++++++++- oggm/workflow.py | 6 +++ 4 files changed, 116 insertions(+), 24 deletions(-) diff --git a/oggm/core/gis.py b/oggm/core/gis.py index 88d3e4bd0..7765e083c 100644 --- a/oggm/core/gis.py +++ b/oggm/core/gis.py @@ -1896,6 +1896,7 @@ def reproject_gridded_data_variable_to_grid(gdir, use_glacier_mask=True, interp='nearest', preserve_totals=True, + smooth_radius=None, slice_of_variable=None): """ Function for reprojecting a gridded data variable to a different grid. @@ -1926,6 +1927,10 @@ def reproject_gridded_data_variable_to_grid(gdir, The total value is defined as the sum of all grid cell values times the area of the grid cell (e.g. preserving ice volume). Default is True. + smooth_radius : int + pixel size of the gaussian smoothing, only used if preserve_totals is + True. Default is to use cfg.PARAMS['smooth_window'] (i.e. a size in + meters). Set to zero to suppress smoothing. slice_of_variable : None | dict Can provide dimensions with values as a dictionary for extracting only a slice of the data before reprojecting. This can be useful for large @@ -1958,6 +1963,7 @@ def reproject_gridded_data_variable_to_grid(gdir, if preserve_totals: # only preserve for float variables if not np.issubdtype(data, np.integer): + # do we want to do smoothing? if len(data.dims) == 2: sum_axis = None @@ -1969,10 +1975,29 @@ def reproject_gridded_data_variable_to_grid(gdir, total_before = (np.nansum(data.values, axis=sum_axis) * ds.salem.grid.dx ** 2) + + if smooth_radius != 0: + if smooth_radius is None: + smooth_radius = np.rint(cfg.PARAMS['smooth_window'] / + target_grid.dx) + # use data_mask to not expand the extent of the data + data_mask = ~np.isclose(r_data, 0, atol=1e-6) + if r_data.ndim == 3: + r_data = np.array( + [gaussian_blur(r_data[i, :, :], int(smooth_radius)) + for i in range(r_data.shape[0])]) + else: + r_data = gaussian_blur(r_data, int(smooth_radius)) + r_data[~data_mask] = 0 + total_after = (np.nansum(r_data, axis=sum_axis) * target_grid.dx ** 2) - factor = total_before / total_after + # only preserve total if there is some data before + factor = np.where(np.isclose(total_before, 0, atol=1e-6), + 0., + total_before / total_after) + if len(data.dims) == 3: # need to add two axis for broadcasting factor = factor[:, np.newaxis, np.newaxis] diff --git a/oggm/sandbox/distribute_2d.py b/oggm/sandbox/distribute_2d.py index 9226b2c89..744b6f058 100644 --- a/oggm/sandbox/distribute_2d.py +++ b/oggm/sandbox/distribute_2d.py @@ -235,7 +235,8 @@ def distribute_thickness_from_simulation(gdir, into account for the melt *within* one band, a downside which is somehow mitigated with smoothing (the default is quite some smoothing). - Writes a new file caller gridded_simulation.nc together with a new time dimension. + Writes a new file called gridded_simulation.nc together with a new time + dimension. Parameters ---------- @@ -244,13 +245,13 @@ def distribute_thickness_from_simulation(gdir, input_filesuffix : str the filesuffix of the flowline diagnostics file. output_filesuffix : str - the filesuffix of the gridded_simulation file to write. If empty, + the filesuffix of the gridded_simulation.nc file to write. If empty, it will be set to input_filesuffix. concat_input_filesuffix : str - the filesuffix of the flowline diagnostics file to concat with the - main one. `concat_input_filesuffix` is assumed to be prior to the - main one, i.e. often you will be calling - `concat_input_filesuffix='_spinup_historical'`. + the filesuffix of the flowline diagnostics file to concat with the main + one, provided with input_filesuffix. `concat_input_filesuffix` is + assumed to be prior in time to the main one, i.e. often you will be + calling `concat_input_filesuffix='_spinup_historical'`. fl_diag : xarray.core.dataset.Dataset directly provide a flowline diagnostics file instead of reading it from disk. This could be useful, for example, to merge two files @@ -385,17 +386,23 @@ def distribute_thickness_from_simulation(gdir, this_glacier_mask = new_thick > 0 this_vol = dgy.volume_m3.values.sum() - # Smooth - dx = gdir.grid.dx - if smooth_radius != 0: - if smooth_radius is None: - smooth_radius = np.rint(cfg.PARAMS['smooth_window'] / dx) - new_thick = gaussian_blur(new_thick, int(smooth_radius)) + if np.isclose(this_vol, 0, atol=1e-6): + # No ice left + new_thick = np.NaN + else: + # Smooth + dx = gdir.grid.dx + if smooth_radius != 0: + if smooth_radius is None: + smooth_radius = np.rint(cfg.PARAMS['smooth_window'] / dx) + new_thick = gaussian_blur(new_thick, int(smooth_radius)) + new_thick[~this_glacier_mask] = np.NaN - # Conserve volume - tmp_vol = np.nansum(new_thick) * dx2 - new_thick *= this_vol / tmp_vol + # Conserve volume + tmp_vol = np.nansum(new_thick) * dx2 + new_thick *= this_vol / tmp_vol + out_thick[i, :] = new_thick with xr.open_dataset(gdir.get_filepath('gridded_data')) as ds: @@ -433,6 +440,7 @@ def merge_simulated_thickness(gdirs, keep_dem_file=False, interp='nearest', preserve_totals=True, + smooth_radius=None, use_glacier_mask=True, add_topography=True, use_multiprocessing=False, @@ -453,13 +461,18 @@ def merge_simulated_thickness(gdirs, Default is gridded_simulation_merged{simulation_filesuffix}. simulation_filesuffix : str the filesuffix of the gridded_simulation file - years_to_merge : list | None - If not None, only the years in this list are merged. Default is None. + years_to_merge : list | xr.DataArray | None + If not None, and list of integers only the years at month 1 are + merged. You can also provide the timesteps as xarray.DataArray + containing calendar_year and calendar_month as it is the standard + output format for gridded nc files of OGGM. + Default is None. keep_dem_file interp add_topography use_glacier_mask preserve_totals + smooth_radius use_multiprocessing : bool If we use multiprocessing the merging is done in parallel, but requires more memory. Default is True. @@ -498,6 +511,7 @@ def _calc_bedrock_topo(fp): 'distributed_thickness', ], preserve_totals=preserve_totals, + smooth_radius=smooth_radius, use_glacier_mask=use_glacier_mask, add_topography=add_topography, keep_dem_file=keep_dem_file, @@ -514,22 +528,36 @@ def _calc_bedrock_topo(fp): # then the simulated thickness files if years_to_merge is None: - # open first file to get the years + # open first file to get all available timesteps with xr.open_dataset( gdirs[0].get_filepath('gridded_simulation', filesuffix=simulation_filesuffix) ) as ds: - years_to_merge = ds.time.values + years_to_merge = ds.time + + for timestep in years_to_merge: + if isinstance(timestep, int) or isinstance(timestep, np.int64): + year = timestep + month = 1 + elif isinstance(timestep, xr.DataArray): + year = int(timestep.calendar_year) + month = int(timestep.calendar_month) + else: + raise NotImplementedError('Wrong type for years_to_merge! ' + 'Should be list of int or ' + 'xarray.DataArray for monthly ' + 'timesteps.') - for yr in years_to_merge: workflow.merge_gridded_data( gdirs, output_folder=output_folder, - output_filename=f"{output_filename}_{yr}", + output_filename=f"{output_filename}_{year}_{month:02d}", input_file='gridded_simulation', input_filesuffix=simulation_filesuffix, - included_variables=[('simulated_thickness', {'time': [yr]})], + included_variables=[('simulated_thickness', + {'time': [timestep]})], preserve_totals=preserve_totals, + smooth_radius=smooth_radius, use_glacier_mask=use_glacier_mask, add_topography=False, keep_dem_file=False, @@ -557,6 +585,7 @@ def _calc_bedrock_topo(fp): ], [('simulated_thickness', selected_time)]], preserve_totals=preserve_totals, + smooth_radius=smooth_radius, use_glacier_mask=use_glacier_mask, add_topography=add_topography, keep_dem_file=keep_dem_file, diff --git a/oggm/tests/test_models.py b/oggm/tests/test_models.py index b53e77a3c..590f8aa85 100644 --- a/oggm/tests/test_models.py +++ b/oggm/tests/test_models.py @@ -5772,7 +5772,15 @@ def test_merge_simulated_thickness(self): prepro_base_url='https://cluster.klima.uni-bremen.de/~oggm/gdirs/' 'oggm_v1.6/L3-L5_files/2023.1/elev_bands/W5E5/') - workflow.execute_entity_task(run_random_climate, gdirs, y0=2014, + # we run the first glacier with a larger temperature bias to force a + # a complete retreat for testing + workflow.execute_entity_task(tasks.run_random_climate, gdirs[0:1], + y0=2014, halfsize=5, nyears=20, seed=0, + temperature_bias=5, + output_filesuffix='_random', + store_fl_diagnostics=True) + + workflow.execute_entity_task(run_random_climate, gdirs[1:], y0=2014, halfsize=5, nyears=20, seed=0, output_filesuffix='_random', store_fl_diagnostics=True) @@ -5850,3 +5858,27 @@ def test_merge_simulated_thickness(self): ds_merged_selected_years = ds assert_allclose(ds_merged_selected_years.simulated_thickness.values, ds_merged.loc[{'time': years_to_test}].simulated_thickness.values) + + # test merging of monthly distributed data + workflow.execute_entity_task(run_random_climate, gdirs, y0=2014, + halfsize=5, nyears=2, seed=0, + output_filesuffix='_random_short', + store_fl_diagnostics=True) + workflow.execute_entity_task( + distribute_2d.distribute_thickness_from_simulation, + gdirs, + input_filesuffix='_random_short', + add_monthly=True, + ) + distribute_2d.merge_simulated_thickness( + gdirs, + simulation_filesuffix='_random_short', + reset=True) + files_to_open_monthly = glob.glob( + os.path.join(cfg.PATHS['working_dir'], + 'gridded_simulation_merged_random_short*')) + assert len(files_to_open_monthly) == 26 + with xr.open_mfdataset(files_to_open_monthly) as ds: + ds_merged_monthly = ds + + assert len(ds_merged_monthly.time) == 25 diff --git a/oggm/workflow.py b/oggm/workflow.py index d36b8e780..73e3e3db0 100644 --- a/oggm/workflow.py +++ b/oggm/workflow.py @@ -848,6 +848,7 @@ def merge_gridded_data(gdirs, output_folder=None, input_filesuffix='', included_variables='all', preserve_totals=True, + smooth_radius=None, use_glacier_mask=True, add_topography=False, keep_dem_file=False, @@ -901,6 +902,10 @@ def merge_gridded_data(gdirs, output_folder=None, original file. The total value is defined as the sum of all grid cell values times the area of the grid cell (e.g. preserving ice volume). Default is True. + smooth_radius : int + pixel size of the gaussian smoothing, only used if preserve_totals is + True. Default is to use cfg.PARAMS['smooth_window'] (i.e. a size in + meters). Set to zero to suppress smoothing. use_glacier_mask : bool If True only the data cropped by the glacier mask is included in the merged file. You must make sure that the variable 'glacier_mask' exists @@ -1085,6 +1090,7 @@ def merge_gridded_data(gdirs, output_folder=None, use_glacier_mask=use_glacier_mask, interp=interp, preserve_totals=preserve_totals, + smooth_radius=smooth_radius, slice_of_variable=slice_of_var, )