Skip to content

Commit

Permalink
Merge pull request #50 from C2SM-RCM/coli
Browse files Browse the repository at this point in the history
Various updates:

    There was a bug with remapping gdfs if the indexes was not a simple integer range.
    Improving the raster export
    Adding tests
    fix bio ratio for bier
  • Loading branch information
lionel42 authored May 17, 2024
2 parents 8d233b5 + b2e1ffb commit b25ef71
Show file tree
Hide file tree
Showing 9 changed files with 220 additions and 43 deletions.
147 changes: 125 additions & 22 deletions emiproc/exports/rasters.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from emiproc.utilities import Units, SEC_PER_YR



def export_raster_netcdf(
inv: Inventory,
path: PathLike,
Expand All @@ -19,8 +18,10 @@ def export_raster_netcdf(
weights_path: PathLike | None = None,
lon_name: str = "lon",
lat_name: str = "lat",
var_name_format: str = "{substance}_{category}",
var_name_format: str | None = None,
unit: Units = Units.KG_PER_YEAR,
group_categories: bool = False,
add_totals: bool = True,
) -> Path:
"""Export the inventory to a netcdf file as a raster.
Expand All @@ -42,9 +43,22 @@ def export_raster_netcdf(
:param var_name_format: The format string to use for the variable names.
The format string should contain two named fields: ``substance`` and ``category``.
:param unit: The unit of the emissions.
:param group_categories: If True the categories will be grouped in the output file.
Intead of a variable for each pair (substance, category) there will be a variable
only for each substance and the categories will be grouped in a dimension.
:param add_totals: If True, the total emissions for each substance will be added
as new variables to the file.
One will be the raster sum of all categories and the other will be the total sum
over all cells.
"""

if var_name_format is None:
if group_categories:
var_name_format = "{substance}"
else:
var_name_format = "{substance}_{category}"

remapped_inv = remap_inventory(inv, grid, weights_path)

# add the history
Expand All @@ -58,27 +72,61 @@ def export_raster_netcdf(
conversion_factor = (
1 / SEC_PER_YR / np.array(grid.cell_areas).reshape(grid.shape).T
)
elif unit == Units.MUG_PER_M2_PER_S:
conversion_factor = (
1 / SEC_PER_YR / np.array(grid.cell_areas).reshape(grid.shape).T
) * 1e9
else:
raise NotImplementedError(f"Unknown {unit=}")

ds = xr.Dataset(
data_vars={
var_name_format.format(substance=sub, category=cat): (
[lat_name, lon_name],
remapped_inv.gdf[(cat, sub)].to_numpy().reshape(grid.shape).T
* conversion_factor,
{
"standard_name": f"{sub}_{cat}",
"long_name": f"{sub}_{cat}",
"units": str(unit.value),
"comment": f"emissions of {sub} in {cat}",
"projection": f"{crs}",
},
)
for sub in inv.substances
for cat in inv.categories
if (cat, sub) in remapped_inv.gdf
},
data_vars=(
{
var_name_format.format(substance=sub, category=cat): (
[lat_name, lon_name],
remapped_inv.gdf[(cat, sub)].to_numpy().reshape(grid.shape).T
* conversion_factor,
{
"standard_name": f"{sub}_{cat}",
"long_name": f"{sub}_{cat}",
"units": str(unit.value),
"comment": f"emissions of {sub} in {cat}",
"projection": f"{crs}",
},
)
for sub in inv.substances
for cat in inv.categories
if (cat, sub) in remapped_inv.gdf
}
if not group_categories
else {
var_name_format.format(substance=sub): (
["category", lat_name, lon_name],
np.array(
[
(
remapped_inv.gdf[(cat, sub)]
.to_numpy()
.reshape(grid.shape)
.T
if (cat, sub) in remapped_inv.gdf
else np.zeros(grid.shape).T
)
for cat in inv.categories
]
)
* conversion_factor,
{
"standard_name": f"tendency_of_atmosphere_mass_content_of_{sub}_due_to_emission",
"long_name": f"{sub}",
"units": str(unit.value),
"comment": f"emissions of {sub}",
"projection": f"{crs}",
},
)
for sub in inv.substances
}
),
coords={
"substance": inv.substances,
"category": inv.categories,
Expand Down Expand Up @@ -113,7 +161,64 @@ def export_raster_netcdf(
attrs=netcdf_attributes,
)

if unit in [Units.KG_PER_M2_PER_S]:
if add_totals:
for sub in inv.substances:
if group_categories:
da = ds[var_name_format.format(substance=sub)].sum("category")
else:
var_names = [
var_name_format.format(substance=sub, category=cat)
for cat in inv.categories
if var_name_format.format(substance=sub, category=cat)
in ds.data_vars
]
da = sum([ds[name] for name in var_names])

ds[f"emi_{sub}_all_sectors"] = da
ds[f"emi_{sub}_all_sectors"].attrs = {
"standard_name": f"tendency_of_atmosphere_mass_content_of_{sub}_due_to_emission",
"long_name": f"Aggregated Emissions of {sub} from all sectors",
"units": str(unit.value),
"comment": "annual mean emission rate",
"projection": f"{crs}",
}
if group_categories:
total_emission = ds[var_name_format.format(substance=sub)]
else:
da_of_cat = {
cat: ds[var_name_format.format(substance=sub, category=cat)]
if var_name_format.format(substance=sub, category=cat) in ds.data_vars
else xr.zeros_like(da)
for cat in inv.categories
}
total_emission = xr.concat(list(da_of_cat.values()), dim="category")
# Total emission is not weighted by the cell area
# So we always give kg/year
if unit == Units.KG_PER_M2_PER_S:
total_emission = (
total_emission
* np.array(grid.cell_areas).reshape(grid.shape).T
* SEC_PER_YR
)
elif unit == Units.KG_PER_YEAR:
pass
elif unit == Units.MUG_PER_M2_PER_S:
total_emission = (
total_emission
* np.array(grid.cell_areas).reshape(grid.shape).T
* SEC_PER_YR
* 1e-9
)
else:
raise NotImplementedError(f"Unknown {unit=}")
ds[f"emi_{sub}_total"] = total_emission.sum([lon_name, lat_name])
ds[f"emi_{sub}_total"].attrs = {
"long_name": f"Total Emissions of {sub}",
"units": "kg yr-1",
"comment": "annual total emission",
}

if unit in [Units.KG_PER_M2_PER_S, Units.MUG_PER_M2_PER_S]:
# add the cell area
ds["cell_area"] = (
[lat_name, lon_name],
Expand All @@ -130,6 +235,4 @@ def export_raster_netcdf(
out_filepath = path.with_suffix(".nc")
ds.to_netcdf(out_filepath)



return out_filepath
15 changes: 12 additions & 3 deletions emiproc/inventories/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,9 +178,18 @@ def cell_areas(self) -> np.ndarray:
These match the geometry from the gdf.
"""
if hasattr(self, "_cell_area"):
return self._cell_area
raise NotImplementedError(f"implement or assign 'cell_areas' in {self.name}")
if not hasattr(self, "_cell_area"):
# Compute with the grid
if self.grid is None:
raise ValueError(
"No grid set, cannot compute cell areas. "
f"implement or assign 'cell_areas' in {self.name}"
)
self._cell_area = np.array(self.grid.cell_areas)

return self._cell_area



@cell_areas.setter
def cell_areas(self, cell_areas):
Expand Down
2 changes: 1 addition & 1 deletion emiproc/inventories/zurich/speciation_co2_bio.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
"c3407_Roestereien_Emissionen_Kanton": 0.0,
"c3408_Druckereien_Emissionen_Kanton": 0.0,
"c3409_Laboratorien_Emissionen_Kanton": 0.0,
"c3410_Bierbrauereien_Emissionen_Kanton": 0.0,
"c3410_Bierbrauereien_Emissionen_Kanton": 1.0,
"c3411_Brotproduktion_Emissionen_Kanton": 0.0,
"c3412_MedizinischePraxen_Emissionen_Kanton": 0.0,
"c3413_Gesundheitswesen_Emissionen_Kanton": 0.0,
Expand Down
55 changes: 42 additions & 13 deletions emiproc/plots/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Few plot functions for the emiproc package."""

from __future__ import annotations
from os import PathLike
from pathlib import Path
Expand All @@ -9,7 +10,7 @@

import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
from matplotlib.colors import LogNorm, SymLogNorm


from emiproc.plots import nclcmaps
Expand Down Expand Up @@ -110,6 +111,7 @@ def plot_inventory(
figsize=(16, 9),
q=0.001,
cmap=nclcmaps.cmap("WhViBlGrYeOrRe"),
symcmap="RdBu_r",
spec_lims: None | tuple[float] = None,
out_dir: PathLike | None = None,
axis_formatter: str | None = None,
Expand Down Expand Up @@ -156,7 +158,7 @@ def add_ax_info(ax: mpl.axes.Axes):
# And also miss point sources for that category
print(f"passsed {sub},{cat} no data")
continue
emissions = inv.gdf[(cat, sub)].to_numpy()
emissions = inv.gdf[(cat, sub)].copy(deep=True).to_numpy()
if cat in inv.gdfs and sub in inv.gdfs[cat]:
weights_file = out_dir / f".emiproc_weights_{inv.name}_gdfs_{cat}"
weights_mapping = get_weights_mapping(
Expand Down Expand Up @@ -184,22 +186,39 @@ def add_ax_info(ax: mpl.axes.Axes):

fig, ax = plt.subplots(
figsize=figsize,
# figsize=(38.40, 21.60),
# gridspec_kw={"right": 0.9, "wspace": 0},
)
# cax = fig.add_axes([0.87, 0.15, 0.02, 0.7])

emission_non_zero_values = emissions[emissions > 0]
# Check if there are values below 0
emission_non_zero_values = emissions[
(emissions != 0) & (~np.isnan(emissions))
]
if len(emission_non_zero_values) == 0:
print(f"passsed {sub},{cat} no emissions")
continue

norm = LogNorm(
vmin=np.quantile(emission_non_zero_values, q),
vmax=np.quantile(emission_non_zero_values, 1 - q),
)
if np.any(emissions < 0):
abs_values = np.abs(emission_non_zero_values)
vmax = np.quantile(abs_values, 1 - q)
# Use symlog instead
norm = SymLogNorm(
linthresh=np.quantile(abs_values, q),
vmin=-vmax,
vmax=vmax,
)
this_cmap = symcmap

else:
norm = LogNorm(
vmin=np.quantile(emission_non_zero_values, q),
vmax=np.quantile(emission_non_zero_values, 1 - q),
)
this_cmap = cmap

im = ax.imshow(
emissions,
norm=norm,
cmap=cmap,
cmap=this_cmap,
extent=[x_min, x_max, y_min, y_max],
)
add_ax_info(ax)
Expand All @@ -208,7 +227,7 @@ def add_ax_info(ax: mpl.axes.Axes):
im,
label="kg/y/m2",
extend="both",
extendfrac=0.02
extendfrac=0.02,
# cax=cax,
)

Expand All @@ -222,14 +241,22 @@ def add_ax_info(ax: mpl.axes.Axes):
fig.clear()
else:
plt.show()

plt.close(fig)

if not np.any(total_sub_emissions):
print(f"passsed {sub},total_emissions, no emissions")
continue

fig, ax = plt.subplots(figsize=figsize)

emission_non_zero_values = total_sub_emissions[total_sub_emissions > 0]
emission_non_zero_values = total_sub_emissions[
(total_sub_emissions > 0) & (~np.isnan(total_sub_emissions))
]
if len(emission_non_zero_values) == 0:
print(f"passsed {sub},total_emissions, no emissions")
continue

norm = LogNorm(
vmin=np.quantile(emission_non_zero_values, q),
vmax=np.quantile(emission_non_zero_values, 1 - q),
Expand All @@ -246,7 +273,7 @@ def add_ax_info(ax: mpl.axes.Axes):
im,
label="kg/y/m2",
extend="both",
extendfrac=0.02
extendfrac=0.02,
# cax=cax,
)

Expand All @@ -261,3 +288,5 @@ def add_ax_info(ax: mpl.axes.Axes):
fig.clear()
else:
plt.show()

plt.close(fig)
9 changes: 7 additions & 2 deletions emiproc/regrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,7 @@ def remap_inventory(
w_file = weigths_file.with_stem(weigths_file.stem + f"_gdfs_{category}")
w_mapping = get_weights_mapping(
w_file,
gdf.geometry,
gdf.geometry.reset_index(drop=True),
grid_cells,
loop_over_inv_objects=True,
method=method,
Expand All @@ -476,7 +476,12 @@ def remap_inventory(
for sub in gdf.columns:
if isinstance(gdf[sub].dtype, gpd.array.GeometryDtype):
continue # Geometric column
remapped = weights_remap(w_mapping, gdf[sub], len(grid_cells))
remapped = weights_remap(
w_mapping,
# Reset the index, same as the grid was applied in the weights mapping function
gdf[sub].reset_index(drop=True),
len(grid_cells),
)
if (category, sub) not in mapping_dict:
# Create new entry
mapping_dict[(category, sub)] = remapped
Expand Down
10 changes: 10 additions & 0 deletions emiproc/tests_utils/test_inventories.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,13 @@
)
}
)

inv_with_gdfs_bad_indexes = Inventory.from_gdf(
gdfs={
"adf": gpd.GeoDataFrame(
{"CO2": [1, 2, 3]},
geometry=[Point(0, 0), Point(1, 1), Point(2, 2)],
index=[0, 1, 100000000],
)
}
)
1 change: 1 addition & 0 deletions emiproc/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class Units(Enum):
KG_PER_YEAR = "kg/y"
KG_PER_HOUR = "kg/h"
KG_PER_M2_PER_S = "kg/m2/s"
MUG_PER_M2_PER_S = "µg/m2/s"


PER_M2_UNITS = [Units.KG_PER_M2_PER_S]
Expand Down
Loading

0 comments on commit b25ef71

Please sign in to comment.