Merge branch 'coli' of https://github.com/C2SM-RCM/emiproc into coli

C2SM-RCM · Feb 21, 2024 · c39fa01 · c39fa01
2 parents 08f2777 + 14d01c5
commit c39fa01
Show file tree

Hide file tree

Showing 70 changed files with 4,196 additions and 1,455 deletions.
diff --git a/.github/workflows/pull-request-links.yaml b/.github/workflows/pull-request-links.yaml
@@ -0,0 +1,16 @@
+name: readthedocs/actions
+on:
+  pull_request_target:
+    types:
+      - opened
+
+permissions:
+  pull-requests: write
+
+jobs:
+  pull-request-links:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: readthedocs/actions/preview@v1
+        with:
+          project-slug: "readthedocs-preview"
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -0,0 +1,36 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
+
+name: Python package
+
+on:
+  push:
+    branches: [ "master" ]
+  pull_request:
+    branches: [ "master" ]
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.9", "3.10", "3.11"]
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v3
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        python -m pip install flake8 pytest
+    - name: Install package
+      run: |
+        python -m pip install -e .
+    - name: Test with pytest
+      run: |
+        pytest
diff --git a/docs/source/api.rst b/docs/source/api.rst
@@ -53,9 +53,8 @@ Categories Manipulations
 Speciation
 ----------
 
-.. autofunction:: emiproc.speciation.speciate_inventory
+.. autofunction:: emiproc.speciation.speciate
 
-.. autofunction:: emiproc.speciation.speciate_nox
 
 
 Utilities
@@ -158,6 +157,33 @@ input/output
 .. autofunction:: emiproc.profiles.temporal_profiles.to_yaml
 
 
+Data Generation
+---------------
+
+Functions that can be used to generate some parts of inventory data.
+
+Heating Degree Days (HDD)
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. autofunction:: emiproc.profiles.hdd.create_HDD_scaling_factor
+
+
+Human Respiration 
+^^^^^^^^^^^^^^^^^ 
+
+.. autofunction:: emiproc.human_respiration.people_to_emissions
+
+.. autoenum:: emiproc.human_respiration.EmissionFactor
+
+
+VPRM 
+^^^^
+
+.. automodule:: emiproc.profiles.vprm
+    :members:
+
+
+
 Exporting 
 ---------
 

diff --git a/docs/source/bibliography.rst b/docs/source/bibliography.rst
@@ -8,3 +8,19 @@ Bibliography
     Jähn et al., Geosci. Model Dev., 13, 2379–2392, 2020
 
     https://doi.org/10.5194/gmd-13-2379-2020.
+
+.. [Mahadevan_2008]
+    A satellite-based biosphere parameterization for net ecosystem CO2 exchange: Vegetation Photosynthesis and Respiration Model (VPRM)
+
+    Mahadevan, Pathmathevan and Wofsy, Steven C. and Matross, Daniel M. and Xiao, Xiangming and Dunn, Allison L. and Lin, John C. and Gerbig, Christoph and Munger, J. William and Chow, Victoria Y. and Gottlieb, Elaine W.
+    Global Biogeochemical Cycles, 22, 2008
+
+    https://doi.org/10.1029/2006GB002735
+
+.. [Urban_VPRM]
+    Quantification of Urban Forest and Grassland Carbon Fluxes Using Field Measurements and a Satellite-Based Model in Washington DC/Baltimore Area
+
+    Winbourne, J. B. and Smith, I. A. and Stoynova, H. and Kohler, C. and Gately, C. K. and Logan, B. A. and Reblin, J. and Reinmann, A. and Allen, D. W. and Hutyra, L. R.
+    Journal of Geophysical Research: Biogeosciences, 127, 2022
+
+    https://doi.org/10.1029/2021JG006568
diff --git a/emiproc/__init__.py b/emiproc/__init__.py
@@ -1,10 +1,12 @@
 """Emission processing package."""
+from __future__ import annotations
 
 import logging
 from pathlib import Path
 
 # directory where the data files are stored
 FILES_DIR = Path(__file__).parent.parent / "files"
+TESTS_DIR = FILES_DIR / "test"
 
 logger = logging.getLogger("emiproc")
 
@@ -15,14 +17,19 @@
 logger.setLevel(PROCESS)
 
 
-def deprecated(func):
+def deprecated(msg: str | None = None):
     """Decorator to mark functions as deprecated."""
 
-    def wrapper(*args, **kwargs):
-        logger.warning(
-            "Call to deprecated function {}.".format(func.__name__),
-            stacklevel=2,
-        )
-        return func(*args, **kwargs)
+    def deprecated_decorator(func, msg=msg):
+        def wrapper(*args, msg=msg, **kwargs):
+            msg_default = "Call to deprecated function {}.".format(func.__name__)
+            if msg is None:
+                msg = msg_default
+            else:
+                msg = msg_default + " " + msg
+            logger.warning(msg, stacklevel=2)
+            return func(*args, **kwargs)
 
-    return wrapper
+        return wrapper
+
+    return deprecated_decorator
diff --git a/emiproc/exports/hourly.py b/emiproc/exports/hourly.py
@@ -0,0 +1,210 @@
+from __future__ import annotations
+
+import logging
+from datetime import datetime
+from os import PathLike
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import xarray as xr
+
+from emiproc import PROCESS
+from emiproc.exports.netcdf import NetcdfAttributes
+from emiproc.grids import RegularGrid
+from emiproc.inventories import Inventory
+from emiproc.profiles.temporal_profiles import create_scaling_factors_time_serie
+from emiproc.profiles.utils import get_desired_profile_index
+from emiproc.regrid import remap_inventory
+from emiproc.utilities import HOUR_PER_YR, PER_M2_UNITS, SEC_PER_YR, Units
+
+logger = logging.getLogger(__name__)
+
+
+def export_hourly_emissions(
+    inv: Inventory,
+    path: PathLike,
+    start_time: datetime,
+    end_time: datetime,
+    netcdf_attributes: NetcdfAttributes,
+    var_name_format: str = "{substance}_{category}",
+    filename_format: str = "%Y%m%dT%H%M%SZ.nc",
+    unit: Units = Units.KG_PER_HOUR,
+) -> Path:
+    """Export the inventory to hourly netcdf files.
+
+    Supports structured and unstructured grids. Supports gridded emissions
+    and point sources.
+
+    A file given at a specific hour is valid for the whole hour.
+    (ex file for 14h00 is valid from 14h00 to 14h59m59s)
+
+    :param inv: the inventory to export
+    :param path: the path to the output directory
+    :param netcdf_attributes: NetCDF attributes to add to the file.
+        These can be generated using
+        :py:func:`emiproc.exports.netcdf.nc_cf_attributes` .
+    :param weights_path: Optionally,
+        The path to the weights file to use for regridding.
+        If not given, the weights will be calculated on the fly.
+    :param var_name_format: The format string to use for the variable names.
+        The format string should contain two named fields: ``substance`` and ``category``.
+    :param filename_format: The format string to use for the file names.
+        The format string should contain fields for date and time.
+    :param unit: The unit of the emissions.
+
+    """
+    # Check if the inventory is gridded
+    if inv.gdfs:
+        raise NotImplementedError("Shapped sources are not implemented yet")
+
+    if inv.t_profiles_indexes is None or inv.t_profiles_groups is None:
+        raise ValueError(
+            "The inventory does not contain temporal profiles required for hourly"
+            " exports."
+        )
+    for invalid_dim in ["type", "country", "cell"]:
+        if invalid_dim in inv.t_profiles_indexes:
+            raise ValueError(f"Temporal profiles with {invalid_dim=} are not supported")
+
+    grid = inv.grid
+    is_regular_grid = isinstance(grid, RegularGrid)
+    crs = grid.crs
+
+    # add the history
+    netcdf_attributes["emiproc_history"] = str(inv.history)
+    netcdf_attributes["projection"] = f"{crs}"
+
+    if unit == Units.KG_PER_YEAR:
+        conversion_factor = 1.0
+    elif unit == Units.KG_PER_HOUR:
+        conversion_factor = 1.0 / HOUR_PER_YR
+    elif unit == Units.KG_PER_M2_PER_S:
+        conversion_factor = 1.0 / SEC_PER_YR / np.array(grid.cell_areas)
+    else:
+        raise NotImplementedError(f"Unknown {unit=}")
+
+    # Create the scaling factors for all the time profiles
+    reqired_profiles_indexes = np.unique(inv.t_profiles_indexes)
+    df_scaling_factors = pd.DataFrame(
+        {
+            index: create_scaling_factors_time_serie(
+                start_time=start_time,
+                end_time=end_time,
+                profiles=inv.t_profiles_groups[index],
+            )
+            for index in reqired_profiles_indexes
+            if index >= -1
+        }
+    )
+
+    coords = {
+        "substance": inv.substances,
+        "category": inv.categories,
+        "cell": np.arange(len(grid)),
+    }
+
+    if is_regular_grid:
+        coords["lat"] = (
+            "lat",
+            grid.lat_range,
+            {
+                "standard_name": "latitude",
+                "long_name": "latitude",
+                "units": "degrees_north",
+                "comment": "center_of_cell",
+                "bounds": "lat_bnds",
+                "projection": f"{grid.crs}",
+                "axis": "Y",
+            },
+        )
+        coords["lon"] = (
+            "lon",
+            grid.lon_range,
+            {
+                "long_name": "longitude",
+                "units": "degrees_east",
+                "standard_name": "longitude",
+                "comment": "center_of_cell",
+                "bounds": "lon_bnds",
+                "projection": f"{grid.crs}",
+                "axis": "X",
+            },
+        )
+
+    data_dim = ["lat", "lon"] if is_regular_grid else ["cell"]
+
+    base_ds = xr.Dataset(
+        coords=coords,
+        attrs=netcdf_attributes,
+    )
+    if unit in PER_M2_UNITS:
+        # add the cell area
+        areas = np.array(grid.cell_areas).reshape(grid.shape).T
+        if not is_regular_grid:
+            areas = areas.reshape(-1)
+        base_ds["cell_area"] = (
+            data_dim,
+            areas,
+            {
+                "standard_name": "cell_area",
+                "long_name": "cell_area",
+                "units": "m2",
+                "comment": "area of the cell",
+                "projection": f"{crs}",
+            },
+        )
+    path = Path(path)
+    logger.log(PROCESS, f"Exporting hourly emissions to {path}")
+
+    # Iterrate over time
+    for dt, row in df_scaling_factors.iterrows():
+        ds = base_ds.copy()
+        ds["time"] = dt
+        vars = {}
+        for cat in inv.categories:
+            for sub in inv.substances:
+                # Get the scaling factor
+                try:
+                    index = get_desired_profile_index(
+                        inv.t_profiles_indexes, cat=cat, sub=sub
+                    )
+                except ValueError as ve:
+                    logger.warning(
+                        f"Could not find profile for {cat=} {sub=}: {ve} \n Assuming"
+                        " constant profile"
+                    )
+                    index = -1
+
+                if index == -1:
+                    scaling_factor = 1.0
+                else:
+                    scaling_factor = row[index]
+                if (cat, sub) not in inv.gdf.columns:
+                    # Ignore non present cat-sub
+                    continue
+                # Get the emissions
+                emissions = inv.gdf[(cat, sub)].to_numpy().astype(float)
+                # Multiply by the scaling factor
+                emissions *= scaling_factor * conversion_factor
+                name = var_name_format.format(substance=sub, category=cat)
+
+                if is_regular_grid:
+                    emissions = emissions.reshape(grid.shape).T
+
+                vars[name] = xr.DataArray(
+                    emissions,
+                    dims=data_dim,
+                    attrs={
+                        "standard_name": f"{sub}_{cat}",
+                        "long_name": f"{sub}_{cat}",
+                        "units": str(unit.value),
+                        "comment": f"emissions of {sub} in {cat}",
+                    },
+                    name=name,
+                )
+        # Add to the dataset
+        ds.update(vars)
+        dt: pd.Timestamp
+
+        ds.to_netcdf(path / f"{dt.strftime(filename_format)}")