From e85677813f9a3511ae15ec10e59211006f5459df Mon Sep 17 00:00:00 2001 From: Janet Barclay Date: Wed, 6 Apr 2022 10:50:19 -0500 Subject: [PATCH 1/4] adding optional list of catch_prop_vars --- river_dl/preproc_utils.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/river_dl/preproc_utils.py b/river_dl/preproc_utils.py index 47686b4..7743144 100755 --- a/river_dl/preproc_utils.py +++ b/river_dl/preproc_utils.py @@ -236,11 +236,12 @@ def join_catch_properties(x_data_ts, catch_props): return xr.merge([x_data_ts, ds_catch], join="left") -def prep_catch_props(x_data_ts, catch_prop_file, spatial_idx_name, replace_nan_with_mean=True): +def prep_catch_props(x_data_ts, catch_prop_file, catch_prop_vars, spatial_idx_name, replace_nan_with_mean=True): """ read catch property file and join with ts data :param x_data_ts: [xr dataset] timeseries x-data :param catch_prop_file: [str] the feather file of catchment attributes + :param catch_prop_vars: [list of str] the catchment attributes to use, if None, all attributes will be kept :param spatial_idx_name: [str] name of column that is used for spatial index (e.g., 'seg_id_nat') :param replace_nan_with_mean: [bool] if true, any nan will be replaced with @@ -248,12 +249,20 @@ def prep_catch_props(x_data_ts, catch_prop_file, spatial_idx_name, replace_nan_w :return: [xr dataset] merged datasets """ df_catch_props = pd.read_feather(catch_prop_file) + + #keep only the requested variables + if catch_prop_vars: + catch_prop_vars.append(spatial_idx_name) + df_catch_props = df_catch_props[catch_prop_vars] + # replace nans with column means if replace_nan_with_mean: df_catch_props = df_catch_props.apply( lambda x: x.fillna(x.mean()), axis=0 ) - ds_catch_props = df_catch_props.set_index(spatial_idx_name).to_xarray() + ds_catch_props = df_catch_props.loc[df_catch_props[spatial_idx_name].isin(x_data_ts[spatial_idx_name].values)].set_index(spatial_idx_name).to_xarray() + + return join_catch_properties(x_data_ts, ds_catch_props) @@ -759,6 +768,7 @@ def prep_all_data( dist_idx_name="rowcolnames", dist_type="updown", catch_prop_file=None, + catch_prop_vars=None, exclude_file=None, log_y_vars=False, out_file=None, @@ -812,6 +822,8 @@ def prep_all_data( "updown") :param catch_prop_file: [str] the path to the catchment properties file. If left unfilled, the catchment properties will not be included as predictors + :param catch_prop_vars: [list of str] list of catchment properties to use. If + left unfilled and a catchment property file is supplied all variables will be used. :param exclude_file: [str] path to exclude file :param log_y_vars: [bool] whether or not to take the log of discharge in training @@ -860,7 +872,9 @@ def prep_all_data( x_data = x_data[x_vars] if catch_prop_file: - x_data = prep_catch_props(x_data, catch_prop_file, spatial_idx_name) + x_data = prep_catch_props(x_data, catch_prop_file, catch_prop_vars, spatial_idx_name) + #update the list of x_vars + x_vars = [i for i in x_data.data_vars] # make sure we don't have any weird or missing input values check_if_finite(x_data) x_trn, x_val, x_tst = separate_trn_tst( From 526d4bea819fdffac245bf60608c7333776a2a75 Mon Sep 17 00:00:00 2001 From: Janet Barclay Date: Tue, 12 Apr 2022 07:39:18 -0500 Subject: [PATCH 2/4] adding comment --- river_dl/preproc_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/river_dl/preproc_utils.py b/river_dl/preproc_utils.py index 7743144..c6f67e4 100755 --- a/river_dl/preproc_utils.py +++ b/river_dl/preproc_utils.py @@ -260,6 +260,7 @@ def prep_catch_props(x_data_ts, catch_prop_file, catch_prop_vars, spatial_idx_na df_catch_props = df_catch_props.apply( lambda x: x.fillna(x.mean()), axis=0 ) + #this filters the catchment properties to only the reaches in the x dataset ds_catch_props = df_catch_props.loc[df_catch_props[spatial_idx_name].isin(x_data_ts[spatial_idx_name].values)].set_index(spatial_idx_name).to_xarray() From f7ec49beb93d84acdf424134f0a1f9dfc5a54208 Mon Sep 17 00:00:00 2001 From: Janet Barclay Date: Tue, 12 Apr 2022 13:07:24 -0400 Subject: [PATCH 3/4] Update river_dl/preproc_utils.py Co-authored-by: Jeff Sadler --- river_dl/preproc_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/river_dl/preproc_utils.py b/river_dl/preproc_utils.py index c6f67e4..931c46b 100755 --- a/river_dl/preproc_utils.py +++ b/river_dl/preproc_utils.py @@ -875,7 +875,7 @@ def prep_all_data( if catch_prop_file: x_data = prep_catch_props(x_data, catch_prop_file, catch_prop_vars, spatial_idx_name) #update the list of x_vars - x_vars = [i for i in x_data.data_vars] + x_vars = list(x_data.data_vars) # make sure we don't have any weird or missing input values check_if_finite(x_data) x_trn, x_val, x_tst = separate_trn_tst( From 94db715818d4745d25af18f0675da3556617930b Mon Sep 17 00:00:00 2001 From: Janet Barclay Date: Tue, 12 Apr 2022 14:27:35 -0500 Subject: [PATCH 4/4] removing extra segment filter --- river_dl/preproc_utils.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/river_dl/preproc_utils.py b/river_dl/preproc_utils.py index 931c46b..ac8fbdc 100755 --- a/river_dl/preproc_utils.py +++ b/river_dl/preproc_utils.py @@ -260,9 +260,7 @@ def prep_catch_props(x_data_ts, catch_prop_file, catch_prop_vars, spatial_idx_na df_catch_props = df_catch_props.apply( lambda x: x.fillna(x.mean()), axis=0 ) - #this filters the catchment properties to only the reaches in the x dataset - ds_catch_props = df_catch_props.loc[df_catch_props[spatial_idx_name].isin(x_data_ts[spatial_idx_name].values)].set_index(spatial_idx_name).to_xarray() - + ds_catch_props = df_catch_props.set_index(spatial_idx_name).to_xarray() return join_catch_properties(x_data_ts, ds_catch_props)