From 18dd5e38c5030f585ee0338f18447e8ce02f9c57 Mon Sep 17 00:00:00 2001 From: coli Date: Wed, 15 May 2024 18:12:59 +0200 Subject: [PATCH 1/8] adding test for remap bug --- emiproc/tests_utils/test_inventories.py | 10 ++++++++++ tests/test_remap_inv.py | 13 +++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/emiproc/tests_utils/test_inventories.py b/emiproc/tests_utils/test_inventories.py index 59cd41c..0d9b1d9 100644 --- a/emiproc/tests_utils/test_inventories.py +++ b/emiproc/tests_utils/test_inventories.py @@ -74,3 +74,13 @@ ) } ) + +inv_with_gdfs_bad_indexes = Inventory.from_gdf( + gdfs={ + "adf": gpd.GeoDataFrame( + {"CO2": [1, 2, 3]}, + geometry=[Point(0, 0), Point(1, 1), Point(2, 2)], + index=[0, 1, 100000000], + ) + } +) diff --git a/tests/test_remap_inv.py b/tests/test_remap_inv.py index 50b8fb2..12012b1 100644 --- a/tests/test_remap_inv.py +++ b/tests/test_remap_inv.py @@ -1,7 +1,7 @@ import pytest -from emiproc.inventories import EmissionInfo +from emiproc.inventories import EmissionInfo, Inventory from emiproc.regrid import remap_inventory -from emiproc.tests_utils.test_inventories import inv_with_pnt_sources +from emiproc.tests_utils.test_inventories import inv_with_pnt_sources, inv_with_gdfs_bad_indexes from emiproc.tests_utils.test_grids import regular_grid, gpd_grid from emiproc.inventories.utils import get_total_emissions @@ -62,3 +62,12 @@ def test_remap_different_grids(): except Exception as e: raise AssertionError(f"Remapping failed for grid {grid.name}") from e + + +def test_remap_with_gdf_wrong_indices(): + """Test that the remap_inventory function works also if the indices in the gdfs are bad.""" + + inv = inv_with_gdfs_bad_indexes + + regular_grid.crs = None + remapped = remap_inventory(inv, regular_grid) \ No newline at end of file From 70733308a02c3322faaa0d86faa8d6db16a70c62 Mon Sep 17 00:00:00 2001 From: coli Date: Wed, 15 May 2024 18:14:37 +0200 Subject: [PATCH 2/8] fixed bug remap wrong index --- emiproc/regrid.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/emiproc/regrid.py b/emiproc/regrid.py index 1354eb0..5b43a40 100644 --- a/emiproc/regrid.py +++ b/emiproc/regrid.py @@ -467,7 +467,7 @@ def remap_inventory( w_file = weigths_file.with_stem(weigths_file.stem + f"_gdfs_{category}") w_mapping = get_weights_mapping( w_file, - gdf.geometry, + gdf.geometry.reset_index(drop=True), grid_cells, loop_over_inv_objects=True, method=method, @@ -476,7 +476,12 @@ def remap_inventory( for sub in gdf.columns: if isinstance(gdf[sub].dtype, gpd.array.GeometryDtype): continue # Geometric column - remapped = weights_remap(w_mapping, gdf[sub], len(grid_cells)) + remapped = weights_remap( + w_mapping, + # Reset the index, same as the grid was applied in the weights mapping function + gdf[sub].reset_index(drop=True), + len(grid_cells), + ) if (category, sub) not in mapping_dict: # Create new entry mapping_dict[(category, sub)] = remapped From de1d78396da3dd646b28a22317eab500e9357e94 Mon Sep 17 00:00:00 2001 From: coli Date: Wed, 15 May 2024 18:57:50 +0200 Subject: [PATCH 3/8] fixed plotting --- emiproc/inventories/__init__.py | 15 ++++++++-- emiproc/plots/__init__.py | 49 +++++++++++++++++++++++++-------- 2 files changed, 49 insertions(+), 15 deletions(-) diff --git a/emiproc/inventories/__init__.py b/emiproc/inventories/__init__.py index fb74460..c03b740 100644 --- a/emiproc/inventories/__init__.py +++ b/emiproc/inventories/__init__.py @@ -178,9 +178,18 @@ def cell_areas(self) -> np.ndarray: These match the geometry from the gdf. """ - if hasattr(self, "_cell_area"): - return self._cell_area - raise NotImplementedError(f"implement or assign 'cell_areas' in {self.name}") + if not hasattr(self, "_cell_area"): + # Compute with the grid + if self.grid is None: + raise ValueError( + "No grid set, cannot compute cell areas. " + f"implement or assign 'cell_areas' in {self.name}" + ) + self._cell_area = np.array(self.grid.cell_areas) + + return self._cell_area + + @cell_areas.setter def cell_areas(self, cell_areas): diff --git a/emiproc/plots/__init__.py b/emiproc/plots/__init__.py index 98f0f99..f5747d2 100644 --- a/emiproc/plots/__init__.py +++ b/emiproc/plots/__init__.py @@ -1,4 +1,5 @@ """Few plot functions for the emiproc package.""" + from __future__ import annotations from os import PathLike from pathlib import Path @@ -9,7 +10,7 @@ import matplotlib as mpl import matplotlib.pyplot as plt -from matplotlib.colors import LogNorm +from matplotlib.colors import LogNorm, SymLogNorm from emiproc.plots import nclcmaps @@ -110,6 +111,7 @@ def plot_inventory( figsize=(16, 9), q=0.001, cmap=nclcmaps.cmap("WhViBlGrYeOrRe"), + symcmap="RdBu_r", spec_lims: None | tuple[float] = None, out_dir: PathLike | None = None, axis_formatter: str | None = None, @@ -184,22 +186,39 @@ def add_ax_info(ax: mpl.axes.Axes): fig, ax = plt.subplots( figsize=figsize, - # figsize=(38.40, 21.60), # gridspec_kw={"right": 0.9, "wspace": 0}, ) - # cax = fig.add_axes([0.87, 0.15, 0.02, 0.7]) - emission_non_zero_values = emissions[emissions > 0] + # Check if there are values below 0 + emission_non_zero_values = emissions[ + (emissions != 0) & (~np.isnan(emissions)) + ] + if len(emission_non_zero_values) == 0: + print(f"passsed {sub},{cat} no emissions") + continue - norm = LogNorm( - vmin=np.quantile(emission_non_zero_values, q), - vmax=np.quantile(emission_non_zero_values, 1 - q), - ) + if np.any(emissions < 0): + abs_values = np.abs(emission_non_zero_values) + vmax = np.quantile(abs_values, 1 - q) + # Use symlog instead + norm = SymLogNorm( + linthresh=np.quantile(abs_values, q), + vmin=-vmax, + vmax=vmax, + ) + this_cmap = symcmap + + else: + norm = LogNorm( + vmin=np.quantile(emission_non_zero_values, q), + vmax=np.quantile(emission_non_zero_values, 1 - q), + ) + this_cmap = cmap im = ax.imshow( emissions, norm=norm, - cmap=cmap, + cmap=this_cmap, extent=[x_min, x_max, y_min, y_max], ) add_ax_info(ax) @@ -208,7 +227,7 @@ def add_ax_info(ax: mpl.axes.Axes): im, label="kg/y/m2", extend="both", - extendfrac=0.02 + extendfrac=0.02, # cax=cax, ) @@ -229,7 +248,13 @@ def add_ax_info(ax: mpl.axes.Axes): fig, ax = plt.subplots(figsize=figsize) - emission_non_zero_values = total_sub_emissions[total_sub_emissions > 0] + emission_non_zero_values = total_sub_emissions[ + (total_sub_emissions > 0) & (~np.isnan(total_sub_emissions)) + ] + if len(emission_non_zero_values) == 0: + print(f"passsed {sub},total_emissions, no emissions") + continue + norm = LogNorm( vmin=np.quantile(emission_non_zero_values, q), vmax=np.quantile(emission_non_zero_values, 1 - q), @@ -246,7 +271,7 @@ def add_ax_info(ax: mpl.axes.Axes): im, label="kg/y/m2", extend="both", - extendfrac=0.02 + extendfrac=0.02, # cax=cax, ) From b5a5d10c417697b7b32b9cd225d0bdbceb0b453a Mon Sep 17 00:00:00 2001 From: coli Date: Thu, 16 May 2024 16:46:03 +0200 Subject: [PATCH 4/8] fixed bier is biogenic --- emiproc/inventories/zurich/speciation_co2_bio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/emiproc/inventories/zurich/speciation_co2_bio.py b/emiproc/inventories/zurich/speciation_co2_bio.py index 834c720..5de3ff6 100644 --- a/emiproc/inventories/zurich/speciation_co2_bio.py +++ b/emiproc/inventories/zurich/speciation_co2_bio.py @@ -18,7 +18,7 @@ "c3407_Roestereien_Emissionen_Kanton": 0.0, "c3408_Druckereien_Emissionen_Kanton": 0.0, "c3409_Laboratorien_Emissionen_Kanton": 0.0, - "c3410_Bierbrauereien_Emissionen_Kanton": 0.0, + "c3410_Bierbrauereien_Emissionen_Kanton": 1.0, "c3411_Brotproduktion_Emissionen_Kanton": 0.0, "c3412_MedizinischePraxen_Emissionen_Kanton": 0.0, "c3413_Gesundheitswesen_Emissionen_Kanton": 0.0, From 888aa14be2d42c2f534ac125af3742604863e184 Mon Sep 17 00:00:00 2001 From: coli Date: Thu, 16 May 2024 18:21:41 +0200 Subject: [PATCH 5/8] fixed plot changing emission values and plot not closing --- emiproc/plots/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/emiproc/plots/__init__.py b/emiproc/plots/__init__.py index f5747d2..f2789d2 100644 --- a/emiproc/plots/__init__.py +++ b/emiproc/plots/__init__.py @@ -158,7 +158,7 @@ def add_ax_info(ax: mpl.axes.Axes): # And also miss point sources for that category print(f"passsed {sub},{cat} no data") continue - emissions = inv.gdf[(cat, sub)].to_numpy() + emissions = inv.gdf[(cat, sub)].copy(deep=True).to_numpy() if cat in inv.gdfs and sub in inv.gdfs[cat]: weights_file = out_dir / f".emiproc_weights_{inv.name}_gdfs_{cat}" weights_mapping = get_weights_mapping( @@ -241,6 +241,8 @@ def add_ax_info(ax: mpl.axes.Axes): fig.clear() else: plt.show() + + plt.close(fig) if not np.any(total_sub_emissions): print(f"passsed {sub},total_emissions, no emissions") @@ -286,3 +288,5 @@ def add_ax_info(ax: mpl.axes.Axes): fig.clear() else: plt.show() + + plt.close(fig) From f486d2c3b6058ff07c8ddbdbbfbf9ec035ddb8be Mon Sep 17 00:00:00 2001 From: coli Date: Thu, 16 May 2024 20:04:55 +0200 Subject: [PATCH 6/8] adding a unit --- emiproc/utilities.py | 1 + 1 file changed, 1 insertion(+) diff --git a/emiproc/utilities.py b/emiproc/utilities.py index 43dc8e0..ee2b72d 100644 --- a/emiproc/utilities.py +++ b/emiproc/utilities.py @@ -39,6 +39,7 @@ class Units(Enum): KG_PER_YEAR = "kg/y" KG_PER_HOUR = "kg/h" KG_PER_M2_PER_S = "kg/m2/s" + MUG_PER_M2_PER_S = "µg/m2/s" PER_M2_UNITS = [Units.KG_PER_M2_PER_S] From 1a5a7efb7a70ebbc270baa1bf968ebedbada8340 Mon Sep 17 00:00:00 2001 From: coli Date: Thu, 16 May 2024 20:06:24 +0200 Subject: [PATCH 7/8] improving the rasters export --- emiproc/exports/rasters.py | 124 ++++++++++++++++++++++++++++++------- 1 file changed, 103 insertions(+), 21 deletions(-) diff --git a/emiproc/exports/rasters.py b/emiproc/exports/rasters.py index e2ed163..ddfc757 100644 --- a/emiproc/exports/rasters.py +++ b/emiproc/exports/rasters.py @@ -10,7 +10,6 @@ from emiproc.utilities import Units, SEC_PER_YR - def export_raster_netcdf( inv: Inventory, path: PathLike, @@ -21,6 +20,8 @@ def export_raster_netcdf( lat_name: str = "lat", var_name_format: str = "{substance}_{category}", unit: Units = Units.KG_PER_YEAR, + group_categories: bool = False, + add_totals: bool = True, ) -> Path: """Export the inventory to a netcdf file as a raster. @@ -42,6 +43,13 @@ def export_raster_netcdf( :param var_name_format: The format string to use for the variable names. The format string should contain two named fields: ``substance`` and ``category``. :param unit: The unit of the emissions. + :param group_categories: If True the categories will be grouped in the output file. + Intead of a variable for each pair (substance, category) there will be a variable + only for each substance and the categories will be grouped in a dimension. + :param add_totals: If True, the total emissions for each substance will be added + as new variables to the file. + One will be the raster sum of all categories and the other will be the total sum + over all cells. """ @@ -58,27 +66,57 @@ def export_raster_netcdf( conversion_factor = ( 1 / SEC_PER_YR / np.array(grid.cell_areas).reshape(grid.shape).T ) + elif unit == Units.MUG_PER_M2_PER_S: + conversion_factor = ( + 1 / SEC_PER_YR / np.array(grid.cell_areas).reshape(grid.shape).T + ) * 1e9 else: raise NotImplementedError(f"Unknown {unit=}") ds = xr.Dataset( - data_vars={ - var_name_format.format(substance=sub, category=cat): ( - [lat_name, lon_name], - remapped_inv.gdf[(cat, sub)].to_numpy().reshape(grid.shape).T - * conversion_factor, - { - "standard_name": f"{sub}_{cat}", - "long_name": f"{sub}_{cat}", - "units": str(unit.value), - "comment": f"emissions of {sub} in {cat}", - "projection": f"{crs}", - }, - ) - for sub in inv.substances - for cat in inv.categories - if (cat, sub) in remapped_inv.gdf - }, + data_vars=( + { + var_name_format.format(substance=sub, category=cat): ( + [lat_name, lon_name], + remapped_inv.gdf[(cat, sub)].to_numpy().reshape(grid.shape).T + * conversion_factor, + { + "standard_name": f"{sub}_{cat}", + "long_name": f"{sub}_{cat}", + "units": str(unit.value), + "comment": f"emissions of {sub} in {cat}", + "projection": f"{crs}", + }, + ) + for sub in inv.substances + for cat in inv.categories + if (cat, sub) in remapped_inv.gdf + } + if not group_categories + else { + var_name_format.format(substance=sub): ( + ["category", lat_name, lon_name], + np.array( + [ + remapped_inv.gdf[(cat, sub)] + .to_numpy() + .reshape(grid.shape) + .T + for cat in inv.categories + ] + ) + * conversion_factor, + { + "standard_name": f"tendency_of_atmosphere_mass_content_of_{sub}_due_to_emission", + "long_name": f"{sub}", + "units": str(unit.value), + "comment": f"emissions of {sub}", + "projection": f"{crs}", + }, + ) + for sub in inv.substances + } + ), coords={ "substance": inv.substances, "category": inv.categories, @@ -113,7 +151,53 @@ def export_raster_netcdf( attrs=netcdf_attributes, ) - if unit in [Units.KG_PER_M2_PER_S]: + if add_totals: + for sub in inv.substances: + names = ( + [ + var_name_format.format(substance=sub, category=cat) + for cat in inv.categories + ] + if not group_categories + else var_name_format.format(substance=sub) + ) + ds[f"emi_{sub}_all_sectors"] = ds[names].sum("category") + ds[f"emi_{sub}_all_sectors"].attrs = { + "standard_name": f"tendency_of_atmosphere_mass_content_of_{sub}_due_to_emission", + "long_name": f"Aggregated Emissions of {sub} from all sectors", + "units": str(unit.value), + "comment": "annual mean emission rate", + "projection": f"{crs}", + } + + # Total emission is not weighted by the cell area + # So we always give kg/year + total_emission = ds[names] + if unit == Units.KG_PER_M2_PER_S: + total_emission = ( + total_emission + * np.array(grid.cell_areas).reshape(grid.shape).T + * SEC_PER_YR + ) + elif unit == Units.KG_PER_YEAR: + pass + elif unit == Units.MUG_PER_M2_PER_S: + total_emission = ( + total_emission + * np.array(grid.cell_areas).reshape(grid.shape).T + * SEC_PER_YR + * 1e-9 + ) + else: + raise NotImplementedError(f"Unknown {unit=}") + ds[f"emi_{sub}_total"] = total_emission.sum([lon_name, lat_name]) + ds[f"emi_{sub}_total"].attrs = { + "long_name": f"Total Emissions of {sub}", + "units": "kg yr-1", + "comment": "annual total emission", + } + + if unit in [Units.KG_PER_M2_PER_S, Units.MUG_PER_M2_PER_S]: # add the cell area ds["cell_area"] = ( [lat_name, lon_name], @@ -130,6 +214,4 @@ def export_raster_netcdf( out_filepath = path.with_suffix(".nc") ds.to_netcdf(out_filepath) - - return out_filepath From b2e1ffba28df69d2e33712bab88ac8c7e07c851d Mon Sep 17 00:00:00 2001 From: coli Date: Thu, 16 May 2024 20:52:28 +0200 Subject: [PATCH 8/8] fixed export for tests --- emiproc/exports/rasters.py | 47 ++++++++++++++++++++++++++---------- tests/test_export_rasters.py | 11 +++++++++ 2 files changed, 45 insertions(+), 13 deletions(-) diff --git a/emiproc/exports/rasters.py b/emiproc/exports/rasters.py index ddfc757..a22da92 100644 --- a/emiproc/exports/rasters.py +++ b/emiproc/exports/rasters.py @@ -18,7 +18,7 @@ def export_raster_netcdf( weights_path: PathLike | None = None, lon_name: str = "lon", lat_name: str = "lat", - var_name_format: str = "{substance}_{category}", + var_name_format: str | None = None, unit: Units = Units.KG_PER_YEAR, group_categories: bool = False, add_totals: bool = True, @@ -53,6 +53,12 @@ def export_raster_netcdf( """ + if var_name_format is None: + if group_categories: + var_name_format = "{substance}" + else: + var_name_format = "{substance}_{category}" + remapped_inv = remap_inventory(inv, grid, weights_path) # add the history @@ -98,10 +104,14 @@ def export_raster_netcdf( ["category", lat_name, lon_name], np.array( [ - remapped_inv.gdf[(cat, sub)] - .to_numpy() - .reshape(grid.shape) - .T + ( + remapped_inv.gdf[(cat, sub)] + .to_numpy() + .reshape(grid.shape) + .T + if (cat, sub) in remapped_inv.gdf + else np.zeros(grid.shape).T + ) for cat in inv.categories ] ) @@ -153,15 +163,18 @@ def export_raster_netcdf( if add_totals: for sub in inv.substances: - names = ( - [ + if group_categories: + da = ds[var_name_format.format(substance=sub)].sum("category") + else: + var_names = [ var_name_format.format(substance=sub, category=cat) for cat in inv.categories + if var_name_format.format(substance=sub, category=cat) + in ds.data_vars ] - if not group_categories - else var_name_format.format(substance=sub) - ) - ds[f"emi_{sub}_all_sectors"] = ds[names].sum("category") + da = sum([ds[name] for name in var_names]) + + ds[f"emi_{sub}_all_sectors"] = da ds[f"emi_{sub}_all_sectors"].attrs = { "standard_name": f"tendency_of_atmosphere_mass_content_of_{sub}_due_to_emission", "long_name": f"Aggregated Emissions of {sub} from all sectors", @@ -169,10 +182,18 @@ def export_raster_netcdf( "comment": "annual mean emission rate", "projection": f"{crs}", } - + if group_categories: + total_emission = ds[var_name_format.format(substance=sub)] + else: + da_of_cat = { + cat: ds[var_name_format.format(substance=sub, category=cat)] + if var_name_format.format(substance=sub, category=cat) in ds.data_vars + else xr.zeros_like(da) + for cat in inv.categories + } + total_emission = xr.concat(list(da_of_cat.values()), dim="category") # Total emission is not weighted by the cell area # So we always give kg/year - total_emission = ds[names] if unit == Units.KG_PER_M2_PER_S: total_emission = ( total_emission diff --git a/tests/test_export_rasters.py b/tests/test_export_rasters.py index bb3bf1e..7a2e325 100644 --- a/tests/test_export_rasters.py +++ b/tests/test_export_rasters.py @@ -19,6 +19,17 @@ def test_base_function(): regular_grid, netcdf_attributes={}, ) + +def test_group_categories(): + """Simply test that the function works with defaults""" + + export_raster_netcdf( + raster_inv, + TEST_OUTPUTS_DIR / "test_raster.nc", + regular_grid, + netcdf_attributes={}, + group_categories=True, + ) def test_unit():