From 18dd5e38c5030f585ee0338f18447e8ce02f9c57 Mon Sep 17 00:00:00 2001
From: coli <lionel.constantin@empa.ch>
Date: Wed, 15 May 2024 18:12:59 +0200
Subject: [PATCH 1/8] adding test for remap bug

---
 emiproc/tests_utils/test_inventories.py | 10 ++++++++++
 tests/test_remap_inv.py                 | 13 +++++++++++--
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/emiproc/tests_utils/test_inventories.py b/emiproc/tests_utils/test_inventories.py
index 59cd41c..0d9b1d9 100644
--- a/emiproc/tests_utils/test_inventories.py
+++ b/emiproc/tests_utils/test_inventories.py
@@ -74,3 +74,13 @@
         )
     }
 )
+
+inv_with_gdfs_bad_indexes = Inventory.from_gdf(
+    gdfs={
+        "adf": gpd.GeoDataFrame(
+            {"CO2": [1, 2, 3]},
+            geometry=[Point(0, 0), Point(1, 1), Point(2, 2)],
+            index=[0, 1, 100000000],
+        )
+    }
+)
diff --git a/tests/test_remap_inv.py b/tests/test_remap_inv.py
index 50b8fb2..12012b1 100644
--- a/tests/test_remap_inv.py
+++ b/tests/test_remap_inv.py
@@ -1,7 +1,7 @@
 import pytest 
-from emiproc.inventories import  EmissionInfo
+from emiproc.inventories import  EmissionInfo, Inventory
 from emiproc.regrid import remap_inventory
-from emiproc.tests_utils.test_inventories import inv_with_pnt_sources
+from emiproc.tests_utils.test_inventories import inv_with_pnt_sources, inv_with_gdfs_bad_indexes
 from emiproc.tests_utils.test_grids import regular_grid, gpd_grid
 
 from emiproc.inventories.utils import get_total_emissions
@@ -62,3 +62,12 @@ def test_remap_different_grids():
         except Exception as e:
             raise AssertionError(f"Remapping failed for grid {grid.name}") from e
 
+
+
+def test_remap_with_gdf_wrong_indices():
+    """Test that the remap_inventory function works also if the indices in the gdfs are bad."""
+
+    inv = inv_with_gdfs_bad_indexes
+
+    regular_grid.crs = None
+    remapped = remap_inventory(inv, regular_grid)
\ No newline at end of file

From 70733308a02c3322faaa0d86faa8d6db16a70c62 Mon Sep 17 00:00:00 2001
From: coli <lionel.constantin@empa.ch>
Date: Wed, 15 May 2024 18:14:37 +0200
Subject: [PATCH 2/8] fixed bug remap wrong index

---
 emiproc/regrid.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/emiproc/regrid.py b/emiproc/regrid.py
index 1354eb0..5b43a40 100644
--- a/emiproc/regrid.py
+++ b/emiproc/regrid.py
@@ -467,7 +467,7 @@ def remap_inventory(
                 w_file = weigths_file.with_stem(weigths_file.stem + f"_gdfs_{category}")
             w_mapping = get_weights_mapping(
                 w_file,
-                gdf.geometry,
+                gdf.geometry.reset_index(drop=True),
                 grid_cells,
                 loop_over_inv_objects=True,
                 method=method,
@@ -476,7 +476,12 @@ def remap_inventory(
             for sub in gdf.columns:
                 if isinstance(gdf[sub].dtype, gpd.array.GeometryDtype):
                     continue  # Geometric column
-                remapped = weights_remap(w_mapping, gdf[sub], len(grid_cells))
+                remapped = weights_remap(
+                    w_mapping,
+                    # Reset the index, same as the grid was applied in the weights mapping function
+                    gdf[sub].reset_index(drop=True),
+                    len(grid_cells),
+                )
                 if (category, sub) not in mapping_dict:
                     # Create new entry
                     mapping_dict[(category, sub)] = remapped

From de1d78396da3dd646b28a22317eab500e9357e94 Mon Sep 17 00:00:00 2001
From: coli <lionel.constantin@empa.ch>
Date: Wed, 15 May 2024 18:57:50 +0200
Subject: [PATCH 3/8] fixed plotting

---
 emiproc/inventories/__init__.py | 15 ++++++++--
 emiproc/plots/__init__.py       | 49 +++++++++++++++++++++++++--------
 2 files changed, 49 insertions(+), 15 deletions(-)

diff --git a/emiproc/inventories/__init__.py b/emiproc/inventories/__init__.py
index fb74460..c03b740 100644
--- a/emiproc/inventories/__init__.py
+++ b/emiproc/inventories/__init__.py
@@ -178,9 +178,18 @@ def cell_areas(self) -> np.ndarray:
 
         These match the geometry from the gdf.
         """
-        if hasattr(self, "_cell_area"):
-            return self._cell_area
-        raise NotImplementedError(f"implement or assign 'cell_areas' in {self.name}")
+        if not hasattr(self, "_cell_area"):
+            # Compute with the grid
+            if self.grid is None:
+                raise ValueError(
+                    "No grid set, cannot compute cell areas. "
+                    f"implement or assign 'cell_areas' in {self.name}"
+                )
+            self._cell_area = np.array(self.grid.cell_areas)
+            
+        return self._cell_area
+
+
 
     @cell_areas.setter
     def cell_areas(self, cell_areas):
diff --git a/emiproc/plots/__init__.py b/emiproc/plots/__init__.py
index 98f0f99..f5747d2 100644
--- a/emiproc/plots/__init__.py
+++ b/emiproc/plots/__init__.py
@@ -1,4 +1,5 @@
 """Few plot functions for the emiproc package."""
+
 from __future__ import annotations
 from os import PathLike
 from pathlib import Path
@@ -9,7 +10,7 @@
 
 import matplotlib as mpl
 import matplotlib.pyplot as plt
-from matplotlib.colors import LogNorm
+from matplotlib.colors import LogNorm, SymLogNorm
 
 
 from emiproc.plots import nclcmaps
@@ -110,6 +111,7 @@ def plot_inventory(
     figsize=(16, 9),
     q=0.001,
     cmap=nclcmaps.cmap("WhViBlGrYeOrRe"),
+    symcmap="RdBu_r",
     spec_lims: None | tuple[float] = None,
     out_dir: PathLike | None = None,
     axis_formatter: str | None = None,
@@ -184,22 +186,39 @@ def add_ax_info(ax: mpl.axes.Axes):
 
             fig, ax = plt.subplots(
                 figsize=figsize,
-                # figsize=(38.40, 21.60),
                 # gridspec_kw={"right": 0.9, "wspace": 0},
             )
-            # cax = fig.add_axes([0.87, 0.15, 0.02, 0.7])
 
-            emission_non_zero_values = emissions[emissions > 0]
+            # Check if there are values below 0
+            emission_non_zero_values = emissions[
+                (emissions != 0) & (~np.isnan(emissions))
+            ]
+            if len(emission_non_zero_values) == 0:
+                print(f"passsed {sub},{cat} no emissions")
+                continue
 
-            norm = LogNorm(
-                vmin=np.quantile(emission_non_zero_values, q),
-                vmax=np.quantile(emission_non_zero_values, 1 - q),
-            )
+            if np.any(emissions < 0):
+                abs_values = np.abs(emission_non_zero_values)
+                vmax = np.quantile(abs_values, 1 - q)
+                # Use symlog instead
+                norm = SymLogNorm(
+                    linthresh=np.quantile(abs_values, q),
+                    vmin=-vmax,
+                    vmax=vmax,
+                )
+                this_cmap = symcmap
+
+            else:
+                norm = LogNorm(
+                    vmin=np.quantile(emission_non_zero_values, q),
+                    vmax=np.quantile(emission_non_zero_values, 1 - q),
+                )
+                this_cmap = cmap
 
             im = ax.imshow(
                 emissions,
                 norm=norm,
-                cmap=cmap,
+                cmap=this_cmap,
                 extent=[x_min, x_max, y_min, y_max],
             )
             add_ax_info(ax)
@@ -208,7 +227,7 @@ def add_ax_info(ax: mpl.axes.Axes):
                 im,
                 label="kg/y/m2",
                 extend="both",
-                extendfrac=0.02
+                extendfrac=0.02,
                 # cax=cax,
             )
 
@@ -229,7 +248,13 @@ def add_ax_info(ax: mpl.axes.Axes):
 
         fig, ax = plt.subplots(figsize=figsize)
 
-        emission_non_zero_values = total_sub_emissions[total_sub_emissions > 0]
+        emission_non_zero_values = total_sub_emissions[
+            (total_sub_emissions > 0) & (~np.isnan(total_sub_emissions))
+        ]
+        if len(emission_non_zero_values) == 0:
+            print(f"passsed {sub},total_emissions, no emissions")
+            continue
+
         norm = LogNorm(
             vmin=np.quantile(emission_non_zero_values, q),
             vmax=np.quantile(emission_non_zero_values, 1 - q),
@@ -246,7 +271,7 @@ def add_ax_info(ax: mpl.axes.Axes):
             im,
             label="kg/y/m2",
             extend="both",
-            extendfrac=0.02
+            extendfrac=0.02,
             # cax=cax,
         )
 

From b5a5d10c417697b7b32b9cd225d0bdbceb0b453a Mon Sep 17 00:00:00 2001
From: coli <lionel.constantin@empa.ch>
Date: Thu, 16 May 2024 16:46:03 +0200
Subject: [PATCH 4/8] fixed bier is biogenic

---
 emiproc/inventories/zurich/speciation_co2_bio.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/emiproc/inventories/zurich/speciation_co2_bio.py b/emiproc/inventories/zurich/speciation_co2_bio.py
index 834c720..5de3ff6 100644
--- a/emiproc/inventories/zurich/speciation_co2_bio.py
+++ b/emiproc/inventories/zurich/speciation_co2_bio.py
@@ -18,7 +18,7 @@
     "c3407_Roestereien_Emissionen_Kanton": 0.0,
     "c3408_Druckereien_Emissionen_Kanton": 0.0,
     "c3409_Laboratorien_Emissionen_Kanton": 0.0,
-    "c3410_Bierbrauereien_Emissionen_Kanton": 0.0,
+    "c3410_Bierbrauereien_Emissionen_Kanton": 1.0,
     "c3411_Brotproduktion_Emissionen_Kanton": 0.0,
     "c3412_MedizinischePraxen_Emissionen_Kanton": 0.0,
     "c3413_Gesundheitswesen_Emissionen_Kanton": 0.0,

From 888aa14be2d42c2f534ac125af3742604863e184 Mon Sep 17 00:00:00 2001
From: coli <lionel.constantin@empa.ch>
Date: Thu, 16 May 2024 18:21:41 +0200
Subject: [PATCH 5/8] fixed plot changing emission values and plot not closing

---
 emiproc/plots/__init__.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/emiproc/plots/__init__.py b/emiproc/plots/__init__.py
index f5747d2..f2789d2 100644
--- a/emiproc/plots/__init__.py
+++ b/emiproc/plots/__init__.py
@@ -158,7 +158,7 @@ def add_ax_info(ax: mpl.axes.Axes):
                 # And also miss point sources for that category
                 print(f"passsed {sub},{cat} no data")
                 continue
-            emissions = inv.gdf[(cat, sub)].to_numpy()
+            emissions = inv.gdf[(cat, sub)].copy(deep=True).to_numpy()
             if cat in inv.gdfs and sub in inv.gdfs[cat]:
                 weights_file = out_dir / f".emiproc_weights_{inv.name}_gdfs_{cat}"
                 weights_mapping = get_weights_mapping(
@@ -241,6 +241,8 @@ def add_ax_info(ax: mpl.axes.Axes):
                 fig.clear()
             else:
                 plt.show()
+            
+            plt.close(fig)
 
         if not np.any(total_sub_emissions):
             print(f"passsed {sub},total_emissions, no emissions")
@@ -286,3 +288,5 @@ def add_ax_info(ax: mpl.axes.Axes):
             fig.clear()
         else:
             plt.show()
+        
+        plt.close(fig)

From f486d2c3b6058ff07c8ddbdbbfbf9ec035ddb8be Mon Sep 17 00:00:00 2001
From: coli <lionel.constantin@empa.ch>
Date: Thu, 16 May 2024 20:04:55 +0200
Subject: [PATCH 6/8] adding a unit

---
 emiproc/utilities.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/emiproc/utilities.py b/emiproc/utilities.py
index 43dc8e0..ee2b72d 100644
--- a/emiproc/utilities.py
+++ b/emiproc/utilities.py
@@ -39,6 +39,7 @@ class Units(Enum):
     KG_PER_YEAR = "kg/y"
     KG_PER_HOUR = "kg/h"
     KG_PER_M2_PER_S = "kg/m2/s"
+    MUG_PER_M2_PER_S = "µg/m2/s"
 
 
 PER_M2_UNITS = [Units.KG_PER_M2_PER_S]

From 1a5a7efb7a70ebbc270baa1bf968ebedbada8340 Mon Sep 17 00:00:00 2001
From: coli <lionel.constantin@empa.ch>
Date: Thu, 16 May 2024 20:06:24 +0200
Subject: [PATCH 7/8] improving the rasters export

---
 emiproc/exports/rasters.py | 124 ++++++++++++++++++++++++++++++-------
 1 file changed, 103 insertions(+), 21 deletions(-)

diff --git a/emiproc/exports/rasters.py b/emiproc/exports/rasters.py
index e2ed163..ddfc757 100644
--- a/emiproc/exports/rasters.py
+++ b/emiproc/exports/rasters.py
@@ -10,7 +10,6 @@
 from emiproc.utilities import Units, SEC_PER_YR
 
 
-
 def export_raster_netcdf(
     inv: Inventory,
     path: PathLike,
@@ -21,6 +20,8 @@ def export_raster_netcdf(
     lat_name: str = "lat",
     var_name_format: str = "{substance}_{category}",
     unit: Units = Units.KG_PER_YEAR,
+    group_categories: bool = False,
+    add_totals: bool = True,
 ) -> Path:
     """Export the inventory to a netcdf file as a raster.
 
@@ -42,6 +43,13 @@ def export_raster_netcdf(
     :param var_name_format: The format string to use for the variable names.
         The format string should contain two named fields: ``substance`` and ``category``.
     :param unit: The unit of the emissions.
+    :param group_categories: If True the categories will be grouped in the output file.
+        Intead of a variable for each pair (substance, category) there will be a variable
+        only for each substance and the categories will be grouped in a dimension.
+    :param add_totals: If True, the total emissions for each substance will be added
+        as new variables to the file.
+        One will be the raster sum of all categories and the other will be the total sum
+        over all cells.
 
     """
 
@@ -58,27 +66,57 @@ def export_raster_netcdf(
         conversion_factor = (
             1 / SEC_PER_YR / np.array(grid.cell_areas).reshape(grid.shape).T
         )
+    elif unit == Units.MUG_PER_M2_PER_S:
+        conversion_factor = (
+            1 / SEC_PER_YR / np.array(grid.cell_areas).reshape(grid.shape).T
+        ) * 1e9
     else:
         raise NotImplementedError(f"Unknown {unit=}")
 
     ds = xr.Dataset(
-        data_vars={
-            var_name_format.format(substance=sub, category=cat): (
-                [lat_name, lon_name],
-                remapped_inv.gdf[(cat, sub)].to_numpy().reshape(grid.shape).T
-                * conversion_factor,
-                {
-                    "standard_name": f"{sub}_{cat}",
-                    "long_name": f"{sub}_{cat}",
-                    "units": str(unit.value),
-                    "comment": f"emissions of {sub} in {cat}",
-                    "projection": f"{crs}",
-                },
-            )
-            for sub in inv.substances
-            for cat in inv.categories
-            if (cat, sub) in remapped_inv.gdf
-        },
+        data_vars=(
+            {
+                var_name_format.format(substance=sub, category=cat): (
+                    [lat_name, lon_name],
+                    remapped_inv.gdf[(cat, sub)].to_numpy().reshape(grid.shape).T
+                    * conversion_factor,
+                    {
+                        "standard_name": f"{sub}_{cat}",
+                        "long_name": f"{sub}_{cat}",
+                        "units": str(unit.value),
+                        "comment": f"emissions of {sub} in {cat}",
+                        "projection": f"{crs}",
+                    },
+                )
+                for sub in inv.substances
+                for cat in inv.categories
+                if (cat, sub) in remapped_inv.gdf
+            }
+            if not group_categories
+            else {
+                var_name_format.format(substance=sub): (
+                    ["category", lat_name, lon_name],
+                    np.array(
+                        [
+                            remapped_inv.gdf[(cat, sub)]
+                            .to_numpy()
+                            .reshape(grid.shape)
+                            .T
+                            for cat in inv.categories
+                        ]
+                    )
+                    * conversion_factor,
+                    {
+                        "standard_name": f"tendency_of_atmosphere_mass_content_of_{sub}_due_to_emission",
+                        "long_name": f"{sub}",
+                        "units": str(unit.value),
+                        "comment": f"emissions of {sub}",
+                        "projection": f"{crs}",
+                    },
+                )
+                for sub in inv.substances
+            }
+        ),
         coords={
             "substance": inv.substances,
             "category": inv.categories,
@@ -113,7 +151,53 @@ def export_raster_netcdf(
         attrs=netcdf_attributes,
     )
 
-    if unit in [Units.KG_PER_M2_PER_S]:
+    if add_totals:
+        for sub in inv.substances:
+            names = (
+                [
+                    var_name_format.format(substance=sub, category=cat)
+                    for cat in inv.categories
+                ]
+                if not group_categories
+                else var_name_format.format(substance=sub)
+            )
+            ds[f"emi_{sub}_all_sectors"] = ds[names].sum("category")
+            ds[f"emi_{sub}_all_sectors"].attrs = {
+                "standard_name": f"tendency_of_atmosphere_mass_content_of_{sub}_due_to_emission",
+                "long_name": f"Aggregated Emissions of {sub} from all sectors",
+                "units": str(unit.value),
+                "comment": "annual mean emission rate",
+                "projection": f"{crs}",
+            }
+
+            # Total emission is not weighted by the cell area
+            # So we always give kg/year
+            total_emission = ds[names]
+            if unit == Units.KG_PER_M2_PER_S:
+                total_emission = (
+                    total_emission
+                    * np.array(grid.cell_areas).reshape(grid.shape).T
+                    * SEC_PER_YR
+                )
+            elif unit == Units.KG_PER_YEAR:
+                pass
+            elif unit == Units.MUG_PER_M2_PER_S:
+                total_emission = (
+                    total_emission
+                    * np.array(grid.cell_areas).reshape(grid.shape).T
+                    * SEC_PER_YR
+                    * 1e-9
+                )
+            else:
+                raise NotImplementedError(f"Unknown {unit=}")
+            ds[f"emi_{sub}_total"] = total_emission.sum([lon_name, lat_name])
+            ds[f"emi_{sub}_total"].attrs = {
+                "long_name": f"Total Emissions of {sub}",
+                "units": "kg yr-1",
+                "comment": "annual total emission",
+            }
+
+    if unit in [Units.KG_PER_M2_PER_S, Units.MUG_PER_M2_PER_S]:
         # add the cell area
         ds["cell_area"] = (
             [lat_name, lon_name],
@@ -130,6 +214,4 @@ def export_raster_netcdf(
     out_filepath = path.with_suffix(".nc")
     ds.to_netcdf(out_filepath)
 
-
-
     return out_filepath

From b2e1ffba28df69d2e33712bab88ac8c7e07c851d Mon Sep 17 00:00:00 2001
From: coli <lionel.constantin@empa.ch>
Date: Thu, 16 May 2024 20:52:28 +0200
Subject: [PATCH 8/8] fixed export for tests

---
 emiproc/exports/rasters.py   | 47 ++++++++++++++++++++++++++----------
 tests/test_export_rasters.py | 11 +++++++++
 2 files changed, 45 insertions(+), 13 deletions(-)

diff --git a/emiproc/exports/rasters.py b/emiproc/exports/rasters.py
index ddfc757..a22da92 100644
--- a/emiproc/exports/rasters.py
+++ b/emiproc/exports/rasters.py
@@ -18,7 +18,7 @@ def export_raster_netcdf(
     weights_path: PathLike | None = None,
     lon_name: str = "lon",
     lat_name: str = "lat",
-    var_name_format: str = "{substance}_{category}",
+    var_name_format: str | None = None,
     unit: Units = Units.KG_PER_YEAR,
     group_categories: bool = False,
     add_totals: bool = True,
@@ -53,6 +53,12 @@ def export_raster_netcdf(
 
     """
 
+    if var_name_format is None:
+        if group_categories:
+            var_name_format = "{substance}"
+        else:
+            var_name_format = "{substance}_{category}"
+
     remapped_inv = remap_inventory(inv, grid, weights_path)
 
     # add the history
@@ -98,10 +104,14 @@ def export_raster_netcdf(
                     ["category", lat_name, lon_name],
                     np.array(
                         [
-                            remapped_inv.gdf[(cat, sub)]
-                            .to_numpy()
-                            .reshape(grid.shape)
-                            .T
+                            (
+                                remapped_inv.gdf[(cat, sub)]
+                                .to_numpy()
+                                .reshape(grid.shape)
+                                .T
+                                if (cat, sub) in remapped_inv.gdf
+                                else np.zeros(grid.shape).T
+                            )
                             for cat in inv.categories
                         ]
                     )
@@ -153,15 +163,18 @@ def export_raster_netcdf(
 
     if add_totals:
         for sub in inv.substances:
-            names = (
-                [
+            if group_categories:
+                da = ds[var_name_format.format(substance=sub)].sum("category")
+            else:
+                var_names = [
                     var_name_format.format(substance=sub, category=cat)
                     for cat in inv.categories
+                    if var_name_format.format(substance=sub, category=cat)
+                    in ds.data_vars
                 ]
-                if not group_categories
-                else var_name_format.format(substance=sub)
-            )
-            ds[f"emi_{sub}_all_sectors"] = ds[names].sum("category")
+                da = sum([ds[name] for name in var_names])
+
+            ds[f"emi_{sub}_all_sectors"] = da
             ds[f"emi_{sub}_all_sectors"].attrs = {
                 "standard_name": f"tendency_of_atmosphere_mass_content_of_{sub}_due_to_emission",
                 "long_name": f"Aggregated Emissions of {sub} from all sectors",
@@ -169,10 +182,18 @@ def export_raster_netcdf(
                 "comment": "annual mean emission rate",
                 "projection": f"{crs}",
             }
-
+            if group_categories:
+                total_emission = ds[var_name_format.format(substance=sub)]
+            else:
+                da_of_cat = {
+                    cat: ds[var_name_format.format(substance=sub, category=cat)]
+                    if var_name_format.format(substance=sub, category=cat) in ds.data_vars
+                    else xr.zeros_like(da)
+                    for cat in inv.categories
+                }
+                total_emission = xr.concat(list(da_of_cat.values()), dim="category")
             # Total emission is not weighted by the cell area
             # So we always give kg/year
-            total_emission = ds[names]
             if unit == Units.KG_PER_M2_PER_S:
                 total_emission = (
                     total_emission
diff --git a/tests/test_export_rasters.py b/tests/test_export_rasters.py
index bb3bf1e..7a2e325 100644
--- a/tests/test_export_rasters.py
+++ b/tests/test_export_rasters.py
@@ -19,6 +19,17 @@ def test_base_function():
         regular_grid,
         netcdf_attributes={},
     )
+
+def test_group_categories():
+    """Simply test that the function works with defaults"""
+
+    export_raster_netcdf(
+        raster_inv,
+        TEST_OUTPUTS_DIR / "test_raster.nc",
+        regular_grid,
+        netcdf_attributes={},
+        group_categories=True,
+    )
         
 def test_unit():