From e85677813f9a3511ae15ec10e59211006f5459df Mon Sep 17 00:00:00 2001
From: Janet Barclay <jbarclay@usgs.gov>
Date: Wed, 6 Apr 2022 10:50:19 -0500
Subject: [PATCH 1/4] adding optional list of catch_prop_vars

---
 river_dl/preproc_utils.py | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/river_dl/preproc_utils.py b/river_dl/preproc_utils.py
index 47686b4..7743144 100755
--- a/river_dl/preproc_utils.py
+++ b/river_dl/preproc_utils.py
@@ -236,11 +236,12 @@ def join_catch_properties(x_data_ts, catch_props):
     return xr.merge([x_data_ts, ds_catch], join="left")
 
 
-def prep_catch_props(x_data_ts, catch_prop_file, spatial_idx_name, replace_nan_with_mean=True):
+def prep_catch_props(x_data_ts, catch_prop_file, catch_prop_vars, spatial_idx_name, replace_nan_with_mean=True):
     """
     read catch property file and join with ts data
     :param x_data_ts: [xr dataset] timeseries x-data
     :param catch_prop_file: [str] the feather file of catchment attributes
+    :param catch_prop_vars: [list of str] the catchment attributes to use, if None, all attributes will be kept
     :param spatial_idx_name: [str] name of column that is used for spatial
         index (e.g., 'seg_id_nat')
     :param replace_nan_with_mean: [bool] if true, any nan will be replaced with
@@ -248,12 +249,20 @@ def prep_catch_props(x_data_ts, catch_prop_file, spatial_idx_name, replace_nan_w
     :return: [xr dataset] merged datasets
     """
     df_catch_props = pd.read_feather(catch_prop_file)
+    
+    #keep only the requested variables
+    if catch_prop_vars:
+        catch_prop_vars.append(spatial_idx_name) 
+        df_catch_props = df_catch_props[catch_prop_vars]
+
     # replace nans with column means
     if replace_nan_with_mean:
         df_catch_props = df_catch_props.apply(
             lambda x: x.fillna(x.mean()), axis=0
         )
-    ds_catch_props = df_catch_props.set_index(spatial_idx_name).to_xarray()
+    ds_catch_props = df_catch_props.loc[df_catch_props[spatial_idx_name].isin(x_data_ts[spatial_idx_name].values)].set_index(spatial_idx_name).to_xarray()
+    
+
     return join_catch_properties(x_data_ts, ds_catch_props)
 
 
@@ -759,6 +768,7 @@ def prep_all_data(
     dist_idx_name="rowcolnames",
     dist_type="updown",
     catch_prop_file=None,
+    catch_prop_vars=None,
     exclude_file=None,
     log_y_vars=False,
     out_file=None,
@@ -812,6 +822,8 @@ def prep_all_data(
     "updown")
     :param catch_prop_file: [str] the path to the catchment properties file. If
     left unfilled, the catchment properties will not be included as predictors
+    :param catch_prop_vars: [list of str] list of catchment properties to use. If
+    left unfilled and a catchment property file is supplied all variables will be used.
     :param exclude_file: [str] path to exclude file
     :param log_y_vars: [bool] whether or not to take the log of discharge in
     training
@@ -860,7 +872,9 @@ def prep_all_data(
     x_data = x_data[x_vars]
 
     if catch_prop_file:
-        x_data = prep_catch_props(x_data, catch_prop_file, spatial_idx_name)
+        x_data = prep_catch_props(x_data, catch_prop_file, catch_prop_vars, spatial_idx_name)
+        #update the list of x_vars
+        x_vars = [i for i in x_data.data_vars]
     # make sure we don't have any weird or missing input values
     check_if_finite(x_data)
     x_trn, x_val, x_tst = separate_trn_tst(

From 526d4bea819fdffac245bf60608c7333776a2a75 Mon Sep 17 00:00:00 2001
From: Janet Barclay <jbarclay@usgs.gov>
Date: Tue, 12 Apr 2022 07:39:18 -0500
Subject: [PATCH 2/4] adding comment

---
 river_dl/preproc_utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/river_dl/preproc_utils.py b/river_dl/preproc_utils.py
index 7743144..c6f67e4 100755
--- a/river_dl/preproc_utils.py
+++ b/river_dl/preproc_utils.py
@@ -260,6 +260,7 @@ def prep_catch_props(x_data_ts, catch_prop_file, catch_prop_vars, spatial_idx_na
         df_catch_props = df_catch_props.apply(
             lambda x: x.fillna(x.mean()), axis=0
         )
+    #this filters the catchment properties to only the reaches in the x dataset
     ds_catch_props = df_catch_props.loc[df_catch_props[spatial_idx_name].isin(x_data_ts[spatial_idx_name].values)].set_index(spatial_idx_name).to_xarray()
     
 

From f7ec49beb93d84acdf424134f0a1f9dfc5a54208 Mon Sep 17 00:00:00 2001
From: Janet Barclay <janetbarclay@gmail.com>
Date: Tue, 12 Apr 2022 13:07:24 -0400
Subject: [PATCH 3/4] Update river_dl/preproc_utils.py

Co-authored-by: Jeff Sadler <jeffrey.sadler2@gmail.com>
---
 river_dl/preproc_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/river_dl/preproc_utils.py b/river_dl/preproc_utils.py
index c6f67e4..931c46b 100755
--- a/river_dl/preproc_utils.py
+++ b/river_dl/preproc_utils.py
@@ -875,7 +875,7 @@ def prep_all_data(
     if catch_prop_file:
         x_data = prep_catch_props(x_data, catch_prop_file, catch_prop_vars, spatial_idx_name)
         #update the list of x_vars
-        x_vars = [i for i in x_data.data_vars]
+        x_vars = list(x_data.data_vars)
     # make sure we don't have any weird or missing input values
     check_if_finite(x_data)
     x_trn, x_val, x_tst = separate_trn_tst(

From 94db715818d4745d25af18f0675da3556617930b Mon Sep 17 00:00:00 2001
From: Janet Barclay <jbarclay@usgs.gov>
Date: Tue, 12 Apr 2022 14:27:35 -0500
Subject: [PATCH 4/4] removing extra segment filter

---
 river_dl/preproc_utils.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/river_dl/preproc_utils.py b/river_dl/preproc_utils.py
index 931c46b..ac8fbdc 100755
--- a/river_dl/preproc_utils.py
+++ b/river_dl/preproc_utils.py
@@ -260,9 +260,7 @@ def prep_catch_props(x_data_ts, catch_prop_file, catch_prop_vars, spatial_idx_na
         df_catch_props = df_catch_props.apply(
             lambda x: x.fillna(x.mean()), axis=0
         )
-    #this filters the catchment properties to only the reaches in the x dataset
-    ds_catch_props = df_catch_props.loc[df_catch_props[spatial_idx_name].isin(x_data_ts[spatial_idx_name].values)].set_index(spatial_idx_name).to_xarray()
-    
+    ds_catch_props = df_catch_props.set_index(spatial_idx_name).to_xarray()
 
     return join_catch_properties(x_data_ts, ds_catch_props)