GFDRR · jasperschroeder · Oct 5, 2024 · Sep 18, 2024 · Sep 20, 2024 · Sep 22, 2024
diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml
@@ -0,0 +1,37 @@
+name: Run Unit Tests
+
+# Trigger the action on push or pull request
+on:
+  # TODO: Specify appropriate branches
+  push:
+    branches:
+      - js/implement_unit_tests
+  pull_request:
+    branches:
+      - dev_push
+
+jobs: 
+  test:
+    runs-on: ubuntu-latest
+
+    steps:   
+      # Check out repository
+      - name: Checkout repository
+        uses: actions/checkout@v3
+
+      # Set up Python
+      - name: Set up Python 3.10
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+
+      # Install dependencies
+      - name: Install dependencies
+        run: |
+          # $CONDA is an environment variable pointing to the root of the miniconda directory
+          $CONDA/bin/conda env update --file tools/code/rdl-tools.yml --name base
+
+      - name: Test with pytest
+        run: |
+          conda install pytest
+          $CONDA/bin/pytest
diff --git a/pytest.ini b/pytest.ini
@@ -0,0 +1,2 @@
+[pytest]
+pythonpath = tools/code/
diff --git a/tools/__init__.py b/tools/__init__.py
diff --git a/tools/code/__init__.py b/tools/code/__init__.py
diff --git a/tools/code/common.py b/tools/code/common.py
@@ -1,10 +1,9 @@
 # This file includes common configuration elements used by the script.
-import os
 
-# Load env vars from dotenv file
-from dotenv import dotenv_values
+import os
+from dotenv import dotenv_values, find_dotenv
 
-config     = dotenv_values(".env")
+config = dotenv_values(find_dotenv())
 DATA_DIR   = config["DATA_DIR"]
 OUTPUT_DIR = config["OUTPUT_DIR"]
 CACHE_DIR  = config["CACHE_DIR"]
@@ -32,7 +31,7 @@
     'MENA': 'AFRICA',       # Middle East and North Africa
     'EAP': 'ASIA',          # East Asia and Pacific
     'SAR': 'ASIA',          # South Asia
-    'LCR': 'LAC',           # Latin America and Caribbean
     'ECA': 'ASIA',          # East Europe and Central Asia
+    'LCR': 'LAC',           # Latin America and Caribbean
     'Other': 'GLOBAL',      # North America, Europe, Japan, Korea, Australia and New Zealand
 }
diff --git a/tools/code/damageFunctions.py b/tools/code/damageFunctions.py
@@ -36,10 +36,8 @@ def damage_factor_builtup(x: np.array, wb_region: str):
         'LAC': np.maximum(0.0, np.minimum(1.0, 1.04578 + (0.001490579 - 1.04578)/(1 + (x/0.5619431)**1.509554))),
         'GLOBAL': np.maximum(0.0, np.minimum(1.0, 2.100049 + (-0.00003530885 - 2.100049)/(1 + (x/6.632485)**0.559315))),
     }
-    region = wb_to_region.get(wb_region)
-    if region not in function_mapping.keys():
-        return np.maximum(0.0, np.minimum(1.0, 2.100049 + (-0.00003530885 - 2.100049)/(1 + (x/6.632485)**0.559315)))
-    return function_mapping.get(wb_to_region.get(wb_region))
+    region = wb_to_region.get(wb_region, 'GLOBAL')
+    return function_mapping.get(region)
 
 
 # Floods (river and coastal) impact function over Agricultural areas
@@ -60,9 +58,6 @@ def damage_factor_agri(x: np.array, wb_region: str):
         'LAC': np.maximum(0.0, np.minimum(1.0, 1.876076 + (0.01855393 - 1.876076)/(1 + (x/5.08262)**0.7629432))),
         'GLOBAL': np.maximum(0.0, np.minimum(1.0, 1.167022 + (-0.002602531 - 1.167022)/(1 + (x/1.398796)**1.246833))),
     }
-
-    region = wb_to_region.get(wb_region)
-    if region not in function_mapping.keys():
-        return np.maximum(0.0, np.minimum(1.0, 1.167022 + (-0.002602531 - 1.167022)/(1 + (x/1.398796)**1.246833)))
-    return function_mapping.get(wb_to_region.get(wb_region))
+    region = wb_to_region.get(wb_region, 'GLOBAL')
+    return function_mapping.get(region)
 
diff --git a/tools/code/rdl-tools.yml b/tools/code/rdl-tools.yml
@@ -1,5 +1,5 @@
 # This file may be used to create an environment using:
-# conda create --name rdl-tools --file rdl-tools.yml
+# conda env create --name rdl-tools --file rdl-tools.yml
 
 name: rdl-tools
 channels:
@@ -34,6 +34,7 @@ dependencies:
   - scipy
   - xarray
   - contextily
+  - pytest
   - pip
   - pip:
     - geopy
diff --git a/tools/code/runAnalysis.py b/tools/code/runAnalysis.py
@@ -9,24 +9,25 @@
 import rasterio
 import rioxarray as rxr
 from rasterstats import gen_zonal_stats, zonal_stats
-from shapely.geometry import shape
 
 # Importing internal libraries
 import common
-from input_utils import *
+import input_utils
 from damageFunctions import mortality_factor, damage_factor_builtup, damage_factor_agri
 
 # Importing the libraries for parallel processing
 import itertools as it
 from functools import partial
 import multiprocess as mp
 
+DATA_DIR = common.DATA_DIR
+OUTPUT_DIR = common.OUTPUT_DIR
 warnings.filterwarnings("ignore", message="'GeoSeries.swapaxes' is deprecated", category=FutureWarning)
 
 # Defining functions for parallel processing of zonal_stats
 def chunks(iterable_data, n):
-    it_data = it.iter(iterable_data)
-    for chunk in it.iter(lambda: list(it.islice(it_data, n)), []):
+    it_data = iter(iterable_data)
+    for chunk in iter(lambda: list(it.islice(it_data, n)), []):
         yield chunk
 
 def zonal_stats_partial(feats, raster, stats="*", affine=None, nodata=None, all_touched=True):
@@ -65,7 +66,7 @@ def process_exposure_data(country, exp_cat, exp_nam, exp_year, exp_folder, wb_re
                 exp_ras = f"{exp_folder}/{country}_POP.tif"
                 if not os.path.exists(exp_ras):
                     print(f"Population data not found. Fetching data for {country}...")
-                    fetch_population_data(country, exp_year)
+                    input_utils.fetch_population_data(country, exp_year)
                     if not os.path.exists(exp_ras):
                         raise FileNotFoundError(f"Failed to fetch population data for {country}")
                 damage_factor = mortality_factor
@@ -74,16 +75,17 @@ def process_exposure_data(country, exp_cat, exp_nam, exp_year, exp_folder, wb_re
                 exp_ras = f"{exp_folder}/{country}_BU.tif"
                 if not os.path.exists(exp_ras):
                     print(f"Built-up data not found. Fetching data for {country}...")
-                    fetch_built_up_data(country)
+                    input_utils.fetch_built_up_data(country)
                     if not os.path.exists(exp_ras):
                         raise FileNotFoundError(f"Failed to fetch built-up data for {country}")
                 damage_factor = lambda x, region=wb_region: damage_factor_builtup(x, region)
 
             elif exp_cat == 'AGR':
                 exp_ras = f"{exp_folder}/{country}_AGR.tif"
+
                 if not os.path.exists(exp_ras):
                     print(f"Agriculture data not found. Fetching data for {country}...")
-                    fetch_agri_data(country)
+                    input_utils.fetch_agri_data(country)
                     if not os.path.exists(exp_ras):
                         raise FileNotFoundError(f"Failed to fetch agricultural data for {country}")
                 damage_factor = lambda x, region=wb_region: damage_factor_agri(x, region)
@@ -145,7 +147,7 @@ def run_analysis(country: str, haz_cat: str, period: str, scenario: str, valid_R
 
         # Fetch the ADM data
         print(f"Fetching ADM data for {country}, level {adm_level}")
-        adm_data = get_adm_data(country, adm_level)
+        adm_data = input_utils.get_adm_data(country, adm_level)
         if adm_data is None:
             raise ValueError(f"ADM data not available for {country}, level {adm_level}")
 
@@ -204,7 +206,7 @@ def run_analysis(country: str, haz_cat: str, period: str, scenario: str, valid_R
                                     'prob_RPs_LB':prob_RPs_LB,
                                     'prob_RPs_UB':prob_RPs_UB,
                                     'prob_RPs_Mean':prob_RPs_Mean})
-        prob_RPs_df.to_csv(os.path.join(common.OUTPUT_DIR, f"{country}_{haz_cat}_prob_RPs.csv"), index=False)
+        prob_RPs_df.to_csv(os.path.join(OUTPUT_DIR, f"{country}_{haz_cat}_prob_RPs.csv"), index=False)
 
         # Computing the results for each RP
         n_valid_RPs_gt_1 = len(valid_RPs) > 1
@@ -297,7 +299,7 @@ def calc_imp_RPs(RPs, haz_folder, analysis_type, country, haz_cat, period, scena
             # Assign impact factor (this is F_i in the equations)
             haz_data = damage_factor(haz_data, wb_region)
             if save_check_raster:
-                haz_data.rio.to_raster(os.path.join(common.OUTPUT_DIR, f"{country}_{haz_cat}_{period}_{scenario}_{rp}_{exp_cat}_haz_imp_factor.tif"))
+                haz_data.rio.to_raster(os.path.join(OUTPUT_DIR, f"{country}_{haz_cat}_{period}_{scenario}_{rp}_{exp_cat}_haz_imp_factor.tif"))
         elif analysis_type == "Classes":
             # Assign bin values to raster data - Follows: x_{i-1} <= x_{i} < x_{i+1}
             bin_idx = np.digitize(haz_data, bin_seq)
@@ -307,7 +309,7 @@ def calc_imp_RPs(RPs, haz_folder, analysis_type, country, haz_cat, period, scena
         affected_exp = exp_data.where(haz_data.data > 0, np.nan)
 
         if save_check_raster:
-            affected_exp.rio.to_raster(os.path.join(common.OUTPUT_DIR, f"{country}_{haz_cat}_{period}_{scenario}_{rp}_{exp_cat}_affected.tif"))
+            affected_exp.rio.to_raster(os.path.join(OUTPUT_DIR, f"{country}_{haz_cat}_{period}_{scenario}_{rp}_{exp_cat}_affected.tif"))
 
         # Conduct analyses for classes
         if analysis_type == "Classes":
@@ -333,7 +335,7 @@ def calc_imp_RPs(RPs, haz_folder, analysis_type, country, haz_cat, period, scena
             impact_exp = affected_exp.data * haz_data
             # If save intermediate to disk is TRUE, then
             if save_check_raster:
-                impact_exp.rio.to_raster(os.path.join(common.OUTPUT_DIR, f"{country}_{period}_{scenario}_{rp}_{exp_cat}_impact.tif"))
+                impact_exp.rio.to_raster(os.path.join(OUTPUT_DIR, f"{country}_{period}_{scenario}_{rp}_{exp_cat}_impact.tif"))
             # Compute the impact per ADM level
             impact_exp_per_ADM = gen_zonal_stats(vectors=adm_data["geometry"], raster=impact_exp.data, stats=["sum"],
                                                  affine=impact_exp.rio.transform(), nodata=np.nan)
@@ -351,12 +353,17 @@ def result_df_reorder_columns(result_df, RPs, analysis_type, exp_cat, adm_level,
     Reorders the columns of result_df.
     """
     # Re-ordering and dropping selected columns for better presentation of the results
-    if analysis_type == "Function":
-        all_RPs = ["RP" + str(rp) for rp in RPs]
-        all_exp = [x + f"_{exp_cat}_exp" for x in all_RPs]
-        all_imp = [x + f"_{exp_cat}_imp" for x in all_RPs]
-        col_order = all_adm_codes + all_adm_names + [f"ADM{adm_level}_{exp_cat}"] + all_exp + all_imp + ["geometry"]
-        result_df = result_df.loc[:, col_order]
+
+    if analysis_type != "Function":
+        return result_df
+
+    adm_column = f"ADM{adm_level}_{exp_cat}"
+
+    all_RPs = ["RP" + str(rp) for rp in RPs]
+    all_exp = [x + f"_{exp_cat}_exp" for x in all_RPs]
+    all_imp = [x + f"_{exp_cat}_imp" for x in all_RPs]
+    col_order = all_adm_codes + all_adm_names + [adm_column] + all_exp + all_imp + ["geometry"]
+    result_df = result_df.loc[:, col_order]
 
     return result_df
 

diff --git a/tools/tests/__init__.py b/tools/tests/__init__.py
diff --git a/tools/tests/code/__init__.py b/tools/tests/code/__init__.py