Merge pull request #50 from C2SM-RCM/coli

Various updates: There was a bug with remapping gdfs if the indexes was not a simple integer range. Improving the raster export Adding tests fix bio ratio for bier
C2SM-RCM · May 17, 2024 · b25ef71 · b25ef71
2 parents 8d233b5 + b2e1ffb
commit b25ef71
Show file tree

Hide file tree

Showing 9 changed files with 220 additions and 43 deletions.
diff --git a/emiproc/exports/rasters.py b/emiproc/exports/rasters.py
@@ -10,7 +10,6 @@
 from emiproc.utilities import Units, SEC_PER_YR
 
 
-
 def export_raster_netcdf(
     inv: Inventory,
     path: PathLike,
@@ -19,8 +18,10 @@ def export_raster_netcdf(
     weights_path: PathLike | None = None,
     lon_name: str = "lon",
     lat_name: str = "lat",
-    var_name_format: str = "{substance}_{category}",
+    var_name_format: str | None = None,
     unit: Units = Units.KG_PER_YEAR,
+    group_categories: bool = False,
+    add_totals: bool = True,
 ) -> Path:
     """Export the inventory to a netcdf file as a raster.
 
@@ -42,9 +43,22 @@ def export_raster_netcdf(
     :param var_name_format: The format string to use for the variable names.
         The format string should contain two named fields: ``substance`` and ``category``.
     :param unit: The unit of the emissions.
+    :param group_categories: If True the categories will be grouped in the output file.
+        Intead of a variable for each pair (substance, category) there will be a variable
+        only for each substance and the categories will be grouped in a dimension.
+    :param add_totals: If True, the total emissions for each substance will be added
+        as new variables to the file.
+        One will be the raster sum of all categories and the other will be the total sum
+        over all cells.
 
     """
 
+    if var_name_format is None:
+        if group_categories:
+            var_name_format = "{substance}"
+        else:
+            var_name_format = "{substance}_{category}"
+
     remapped_inv = remap_inventory(inv, grid, weights_path)
 
     # add the history
@@ -58,27 +72,61 @@ def export_raster_netcdf(
         conversion_factor = (
             1 / SEC_PER_YR / np.array(grid.cell_areas).reshape(grid.shape).T
         )
+    elif unit == Units.MUG_PER_M2_PER_S:
+        conversion_factor = (
+            1 / SEC_PER_YR / np.array(grid.cell_areas).reshape(grid.shape).T
+        ) * 1e9
     else:
         raise NotImplementedError(f"Unknown {unit=}")
 
     ds = xr.Dataset(
-        data_vars={
-            var_name_format.format(substance=sub, category=cat): (
-                [lat_name, lon_name],
-                remapped_inv.gdf[(cat, sub)].to_numpy().reshape(grid.shape).T
-                * conversion_factor,
-                {
-                    "standard_name": f"{sub}_{cat}",
-                    "long_name": f"{sub}_{cat}",
-                    "units": str(unit.value),
-                    "comment": f"emissions of {sub} in {cat}",
-                    "projection": f"{crs}",
-                },
-            )
-            for sub in inv.substances
-            for cat in inv.categories
-            if (cat, sub) in remapped_inv.gdf
-        },
+        data_vars=(
+            {
+                var_name_format.format(substance=sub, category=cat): (
+                    [lat_name, lon_name],
+                    remapped_inv.gdf[(cat, sub)].to_numpy().reshape(grid.shape).T
+                    * conversion_factor,
+                    {
+                        "standard_name": f"{sub}_{cat}",
+                        "long_name": f"{sub}_{cat}",
+                        "units": str(unit.value),
+                        "comment": f"emissions of {sub} in {cat}",
+                        "projection": f"{crs}",
+                    },
+                )
+                for sub in inv.substances
+                for cat in inv.categories
+                if (cat, sub) in remapped_inv.gdf
+            }
+            if not group_categories
+            else {
+                var_name_format.format(substance=sub): (
+                    ["category", lat_name, lon_name],
+                    np.array(
+                        [
+                            (
+                                remapped_inv.gdf[(cat, sub)]
+                                .to_numpy()
+                                .reshape(grid.shape)
+                                .T
+                                if (cat, sub) in remapped_inv.gdf
+                                else np.zeros(grid.shape).T
+                            )
+                            for cat in inv.categories
+                        ]
+                    )
+                    * conversion_factor,
+                    {
+                        "standard_name": f"tendency_of_atmosphere_mass_content_of_{sub}_due_to_emission",
+                        "long_name": f"{sub}",
+                        "units": str(unit.value),
+                        "comment": f"emissions of {sub}",
+                        "projection": f"{crs}",
+                    },
+                )
+                for sub in inv.substances
+            }
+        ),
         coords={
             "substance": inv.substances,
             "category": inv.categories,
@@ -113,7 +161,64 @@ def export_raster_netcdf(
         attrs=netcdf_attributes,
     )
 
-    if unit in [Units.KG_PER_M2_PER_S]:
+    if add_totals:
+        for sub in inv.substances:
+            if group_categories:
+                da = ds[var_name_format.format(substance=sub)].sum("category")
+            else:
+                var_names = [
+                    var_name_format.format(substance=sub, category=cat)
+                    for cat in inv.categories
+                    if var_name_format.format(substance=sub, category=cat)
+                    in ds.data_vars
+                ]
+                da = sum([ds[name] for name in var_names])
+
+            ds[f"emi_{sub}_all_sectors"] = da
+            ds[f"emi_{sub}_all_sectors"].attrs = {
+                "standard_name": f"tendency_of_atmosphere_mass_content_of_{sub}_due_to_emission",
+                "long_name": f"Aggregated Emissions of {sub} from all sectors",
+                "units": str(unit.value),
+                "comment": "annual mean emission rate",
+                "projection": f"{crs}",
+            }
+            if group_categories:
+                total_emission = ds[var_name_format.format(substance=sub)]
+            else:
+                da_of_cat = {
+                    cat: ds[var_name_format.format(substance=sub, category=cat)]
+                    if var_name_format.format(substance=sub, category=cat) in ds.data_vars
+                    else xr.zeros_like(da)
+                    for cat in inv.categories
+                }
+                total_emission = xr.concat(list(da_of_cat.values()), dim="category")
+            # Total emission is not weighted by the cell area
+            # So we always give kg/year
+            if unit == Units.KG_PER_M2_PER_S:
+                total_emission = (
+                    total_emission
+                    * np.array(grid.cell_areas).reshape(grid.shape).T
+                    * SEC_PER_YR
+                )
+            elif unit == Units.KG_PER_YEAR:
+                pass
+            elif unit == Units.MUG_PER_M2_PER_S:
+                total_emission = (
+                    total_emission
+                    * np.array(grid.cell_areas).reshape(grid.shape).T
+                    * SEC_PER_YR
+                    * 1e-9
+                )
+            else:
+                raise NotImplementedError(f"Unknown {unit=}")
+            ds[f"emi_{sub}_total"] = total_emission.sum([lon_name, lat_name])
+            ds[f"emi_{sub}_total"].attrs = {
+                "long_name": f"Total Emissions of {sub}",
+                "units": "kg yr-1",
+                "comment": "annual total emission",
+            }
+
+    if unit in [Units.KG_PER_M2_PER_S, Units.MUG_PER_M2_PER_S]:
         # add the cell area
         ds["cell_area"] = (
             [lat_name, lon_name],
@@ -130,6 +235,4 @@ def export_raster_netcdf(
     out_filepath = path.with_suffix(".nc")
     ds.to_netcdf(out_filepath)
 
-
-
     return out_filepath
diff --git a/emiproc/inventories/__init__.py b/emiproc/inventories/__init__.py
@@ -178,9 +178,18 @@ def cell_areas(self) -> np.ndarray:
 
         These match the geometry from the gdf.
         """
-        if hasattr(self, "_cell_area"):
-            return self._cell_area
-        raise NotImplementedError(f"implement or assign 'cell_areas' in {self.name}")
+        if not hasattr(self, "_cell_area"):
+            # Compute with the grid
+            if self.grid is None:
+                raise ValueError(
+                    "No grid set, cannot compute cell areas. "
+                    f"implement or assign 'cell_areas' in {self.name}"
+                )
+            self._cell_area = np.array(self.grid.cell_areas)
+
+        return self._cell_area
+
+
 
     @cell_areas.setter
     def cell_areas(self, cell_areas):

diff --git a/emiproc/inventories/zurich/speciation_co2_bio.py b/emiproc/inventories/zurich/speciation_co2_bio.py
@@ -18,7 +18,7 @@
     "c3407_Roestereien_Emissionen_Kanton": 0.0,
     "c3408_Druckereien_Emissionen_Kanton": 0.0,
     "c3409_Laboratorien_Emissionen_Kanton": 0.0,
-    "c3410_Bierbrauereien_Emissionen_Kanton": 0.0,
+    "c3410_Bierbrauereien_Emissionen_Kanton": 1.0,
     "c3411_Brotproduktion_Emissionen_Kanton": 0.0,
     "c3412_MedizinischePraxen_Emissionen_Kanton": 0.0,
     "c3413_Gesundheitswesen_Emissionen_Kanton": 0.0,

diff --git a/emiproc/plots/__init__.py b/emiproc/plots/__init__.py
@@ -1,4 +1,5 @@
 """Few plot functions for the emiproc package."""
+
 from __future__ import annotations
 from os import PathLike
 from pathlib import Path
@@ -9,7 +10,7 @@
 
 import matplotlib as mpl
 import matplotlib.pyplot as plt
-from matplotlib.colors import LogNorm
+from matplotlib.colors import LogNorm, SymLogNorm
 
 
 from emiproc.plots import nclcmaps
@@ -110,6 +111,7 @@ def plot_inventory(
     figsize=(16, 9),
     q=0.001,
     cmap=nclcmaps.cmap("WhViBlGrYeOrRe"),
+    symcmap="RdBu_r",
     spec_lims: None | tuple[float] = None,
     out_dir: PathLike | None = None,
     axis_formatter: str | None = None,
@@ -156,7 +158,7 @@ def add_ax_info(ax: mpl.axes.Axes):
                 # And also miss point sources for that category
                 print(f"passsed {sub},{cat} no data")
                 continue
-            emissions = inv.gdf[(cat, sub)].to_numpy()
+            emissions = inv.gdf[(cat, sub)].copy(deep=True).to_numpy()
             if cat in inv.gdfs and sub in inv.gdfs[cat]:
                 weights_file = out_dir / f".emiproc_weights_{inv.name}_gdfs_{cat}"
                 weights_mapping = get_weights_mapping(
@@ -184,22 +186,39 @@ def add_ax_info(ax: mpl.axes.Axes):
 
             fig, ax = plt.subplots(
                 figsize=figsize,
-                # figsize=(38.40, 21.60),
                 # gridspec_kw={"right": 0.9, "wspace": 0},
             )
-            # cax = fig.add_axes([0.87, 0.15, 0.02, 0.7])
 
-            emission_non_zero_values = emissions[emissions > 0]
+            # Check if there are values below 0
+            emission_non_zero_values = emissions[
+                (emissions != 0) & (~np.isnan(emissions))
+            ]
+            if len(emission_non_zero_values) == 0:
+                print(f"passsed {sub},{cat} no emissions")
+                continue
 
-            norm = LogNorm(
-                vmin=np.quantile(emission_non_zero_values, q),
-                vmax=np.quantile(emission_non_zero_values, 1 - q),
-            )
+            if np.any(emissions < 0):
+                abs_values = np.abs(emission_non_zero_values)
+                vmax = np.quantile(abs_values, 1 - q)
+                # Use symlog instead
+                norm = SymLogNorm(
+                    linthresh=np.quantile(abs_values, q),
+                    vmin=-vmax,
+                    vmax=vmax,
+                )
+                this_cmap = symcmap
+
+            else:
+                norm = LogNorm(
+                    vmin=np.quantile(emission_non_zero_values, q),
+                    vmax=np.quantile(emission_non_zero_values, 1 - q),
+                )
+                this_cmap = cmap
 
             im = ax.imshow(
                 emissions,
                 norm=norm,
-                cmap=cmap,
+                cmap=this_cmap,
                 extent=[x_min, x_max, y_min, y_max],
             )
             add_ax_info(ax)
@@ -208,7 +227,7 @@ def add_ax_info(ax: mpl.axes.Axes):
                 im,
                 label="kg/y/m2",
                 extend="both",
-                extendfrac=0.02
+                extendfrac=0.02,
                 # cax=cax,
             )
 
@@ -222,14 +241,22 @@ def add_ax_info(ax: mpl.axes.Axes):
                 fig.clear()
             else:
                 plt.show()
+
+            plt.close(fig)
 
         if not np.any(total_sub_emissions):
             print(f"passsed {sub},total_emissions, no emissions")
             continue
 
         fig, ax = plt.subplots(figsize=figsize)
 
-        emission_non_zero_values = total_sub_emissions[total_sub_emissions > 0]
+        emission_non_zero_values = total_sub_emissions[
+            (total_sub_emissions > 0) & (~np.isnan(total_sub_emissions))
+        ]
+        if len(emission_non_zero_values) == 0:
+            print(f"passsed {sub},total_emissions, no emissions")
+            continue
+
         norm = LogNorm(
             vmin=np.quantile(emission_non_zero_values, q),
             vmax=np.quantile(emission_non_zero_values, 1 - q),
@@ -246,7 +273,7 @@ def add_ax_info(ax: mpl.axes.Axes):
             im,
             label="kg/y/m2",
             extend="both",
-            extendfrac=0.02
+            extendfrac=0.02,
             # cax=cax,
         )
 
@@ -261,3 +288,5 @@ def add_ax_info(ax: mpl.axes.Axes):
             fig.clear()
         else:
             plt.show()
+
+        plt.close(fig)
diff --git a/emiproc/regrid.py b/emiproc/regrid.py
@@ -467,7 +467,7 @@ def remap_inventory(
                 w_file = weigths_file.with_stem(weigths_file.stem + f"_gdfs_{category}")
             w_mapping = get_weights_mapping(
                 w_file,
-                gdf.geometry,
+                gdf.geometry.reset_index(drop=True),
                 grid_cells,
                 loop_over_inv_objects=True,
                 method=method,
@@ -476,7 +476,12 @@ def remap_inventory(
             for sub in gdf.columns:
                 if isinstance(gdf[sub].dtype, gpd.array.GeometryDtype):
                     continue  # Geometric column
-                remapped = weights_remap(w_mapping, gdf[sub], len(grid_cells))
+                remapped = weights_remap(
+                    w_mapping,
+                    # Reset the index, same as the grid was applied in the weights mapping function
+                    gdf[sub].reset_index(drop=True),
+                    len(grid_cells),
+                )
                 if (category, sub) not in mapping_dict:
                     # Create new entry
                     mapping_dict[(category, sub)] = remapped

diff --git a/emiproc/tests_utils/test_inventories.py b/emiproc/tests_utils/test_inventories.py
@@ -74,3 +74,13 @@
         )
     }
 )
+
+inv_with_gdfs_bad_indexes = Inventory.from_gdf(
+    gdfs={
+        "adf": gpd.GeoDataFrame(
+            {"CO2": [1, 2, 3]},
+            geometry=[Point(0, 0), Point(1, 1), Point(2, 2)],
+            index=[0, 1, 100000000],
+        )
+    }
+)
diff --git a/emiproc/utilities.py b/emiproc/utilities.py
@@ -39,6 +39,7 @@ class Units(Enum):
     KG_PER_YEAR = "kg/y"
     KG_PER_HOUR = "kg/h"
     KG_PER_M2_PER_S = "kg/m2/s"
+    MUG_PER_M2_PER_S = "µg/m2/s"
 
 
 PER_M2_UNITS = [Units.KG_PER_M2_PER_S]