Skip to content
This repository has been archived by the owner on Jun 2, 2023. It is now read-only.

Prepare for Upcoming DGEC Transition #133

Open
wants to merge 47 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
b256051
delete unnecessary file
galengorski Jan 31, 2023
58b30b2
initiate submodules
galengorski Jan 31, 2023
632c51e
update branch for data release add model trainging and analysis snake…
galengorski Feb 7, 2023
91e9663
add input files per comments from @amsnyder on [#3]
galengorski Feb 14, 2023
b1dbecc
Update model run_id for manuscript results
galengorski Feb 15, 2023
8dbcc3d
add model results
galengorski Feb 15, 2023
f68b61f
Update README.md
amsnyder Mar 15, 2023
4d454bb
Update README.md
amsnyder Mar 15, 2023
d30d739
Update utils.py
amsnyder Mar 20, 2023
3c9165b
Update fill_discharge_prms.py
amsnyder Mar 20, 2023
4062b38
Merge pull request #4 from amsnyder/asnyder-review2
galengorski Mar 20, 2023
1da20f3
delete coawst fetch and munge config files, not used and not needed i…
galengorski Apr 5, 2023
47561f9
Created necessary files for DGEC migration
jesse-ross Apr 7, 2023
d8bb5a4
Merge pull request #132 from galengorski/data_release
amsnyder Apr 10, 2023
82fb7d2
remove submodule"
amsnyder Apr 10, 2023
7ac139d
remove gitmodules
amsnyder Apr 10, 2023
ad0774b
add versions to env yaml and put in alphabetical order
amsnyder Apr 11, 2023
e855e53
replace river-dl submodule
amsnyder Apr 11, 2023
bd230d0
add required input and river-dl changes to model training
amsnyder Apr 11, 2023
26ec6b3
update use of river-dl in run_model.py
amsnyder Apr 11, 2023
b216c1b
update analysis
amsnyder Apr 11, 2023
edf7c9c
update readme and model run name
amsnyder Apr 11, 2023
cb8c618
Merge pull request #1 from amsnyder/remove_submodule
amsnyder Apr 11, 2023
8cc2619
Delete LICENSE
amsnyder Apr 11, 2023
1ec6ebc
move utils.py
amsnyder Apr 11, 2023
c3eb6ac
Merge pull request #2 from amsnyder/remove_submodule
amsnyder Apr 11, 2023
b1dd6e7
change default to manuscript results
amsnyder Apr 11, 2023
319fe19
Merge pull request #3 from amsnyder/remove_submodule
amsnyder Apr 11, 2023
cd7f577
add missing quote
amsnyder Apr 11, 2023
14921c7
Merge pull request #4 from amsnyder/remove_submodule
amsnyder Apr 11, 2023
dec9072
add function to install R packages
amsnyder Apr 14, 2023
ed901d3
Update README.md
amsnyder Apr 17, 2023
b4c9c8a
Merge pull request #134 from amsnyder/data_release
amsnyder Apr 17, 2023
f20b095
Update README.md
amsnyder Apr 17, 2023
b6bdbe4
change output dir
amsnyder Apr 17, 2023
e4085b0
add inputs for manuscript results
amsnyder Apr 17, 2023
5233b50
update readme to indicate steps 7/8 only pertain to manuscript run
amsnyder Apr 17, 2023
f3a8a09
Merge branch 'data_release' into move_add_inputs
amsnyder Apr 17, 2023
d45ef9c
Delete .snakemake_timestamp
amsnyder Apr 17, 2023
6bcc3e8
Delete .snakemake_timestamp
amsnyder Apr 17, 2023
28a862f
change default run name
amsnyder Apr 17, 2023
4c495cc
Merge branch 'move_add_inputs' of github.com:USGS-R/drb-estuary-salin…
amsnyder Apr 17, 2023
1cf039d
add manuscript config and adapt analysis pipeline
amsnyder Apr 17, 2023
5303cf8
remove user from pathname
amsnyder Apr 17, 2023
90bfd63
Merge branch 'prepare-for-dgec-transition' of https://github.com/USGS…
galengorski Apr 19, 2023
8ea92f7
update code.json file
galengorski Apr 19, 2023
a0159b2
fix typo in LICENSE.md
galengorski Apr 19, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,13 @@ _targets

#All model output
*/out/*
#Except modeling results from manuscript
!03_model/out/Run_Manuscript_Results/*

#All model inputs
*/in/*
#Certain model inputs
02_munge/in/*
03_model/in/COAWST_model_runs/raw/*
03_model/out/

#R files
# History files
Expand Down Expand Up @@ -67,3 +71,4 @@ __pycache__/

#dask
dask-worker-space/
.Rproj.user
Binary file added 01_fetch/in/953860.zip
Binary file not shown.
1,371 changes: 0 additions & 1,371 deletions 01_fetch/in/river_mile_coords.csv

This file was deleted.

52 changes: 0 additions & 52 deletions 01_fetch/params_config_fetch_coawst_model.yaml

This file was deleted.

2 changes: 1 addition & 1 deletion 01_fetch/params_config_fetch_usgs_nwis.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ s3_bucket: 'drb-estuary-salinity'
# start and end dates for data fetch
# should be in the format 'YYYY-MM-DD'
start_dt: '2000-01-01'
end_dt: '2019-12-31'
end_dt: '2021-12-31'
2 changes: 2 additions & 0 deletions 01_fetch/src/fetch_coawst_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import os
from dask.distributed import Client
import yaml
import sys
sys.path.insert(0, os.path.join('01_fetch', 'src'))
import utils

#client = Client()
Expand Down
50 changes: 0 additions & 50 deletions 01_fetch/src/fetch_gridmet.R

This file was deleted.

5 changes: 4 additions & 1 deletion 01_fetch/src/fetch_noaa_nerrs.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import os
import utils
import yaml
import glob
from zipfile import ZipFile
import shutil
import sys
sys.path.insert(0, os.path.join('01_fetch', 'src'))
import utils


# import config
with open("01_fetch/params_config_fetch_noaa_nerrs.yaml", 'r') as stream:
Expand Down
4 changes: 3 additions & 1 deletion 01_fetch/src/fetch_noaa_nos.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
import datetime
from dateutil.relativedelta import relativedelta
import requests, json
import utils
import yaml
import sys
sys.path.insert(0, os.path.join('01_fetch', 'src'))
import utils

# import config
with open("01_fetch/params_config_fetch_noaa_nos.yaml", 'r') as stream:
Expand Down
12 changes: 5 additions & 7 deletions 01_fetch/src/fetch_usgs_nwis.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import yaml
import datetime
import pandas as pd
import sys
sys.path.insert(0, os.path.join('01_fetch', 'src'))
import utils

# import config
Expand Down Expand Up @@ -48,11 +50,6 @@ def process_site_info_to_csv(raw_site_info_txt, site_info_outfile_csv):
print('uploading to s3')
s3_client.upload_file(site_info_outfile_csv, s3_bucket, local_to_s3_pathname(site_info_outfile_csv))

# get minimum date to pull for this site
start_dt = site_info_df_subset[['begin_date']].min().values[0]

return start_dt

def fetch_params(outfile):
'''get table of all possible USGS site parameters'''
params_url = 'https://help.waterdata.usgs.gov/code/parameter_cd_query?fmt=rdb&group_cd=%'
Expand Down Expand Up @@ -101,8 +98,9 @@ def fetch_single_site_data(site_num):
site_info_outfile_txt = os.path.join('.', '01_fetch', 'out', 'metadata', f'usgs_nwis_site_info_{site_num}.txt')
fetch_site_info(site_num, site_info_outfile_txt)
site_info_outfile_csv = os.path.join('.', '01_fetch', 'out', 'metadata', f'usgs_nwis_site_info_{site_num}.csv')
start_dt = process_site_info_to_csv(site_info_outfile_txt, site_info_outfile_csv)
end_dt = datetime.datetime.today().strftime("%Y-%m-%d")
process_site_info_to_csv(site_info_outfile_txt, site_info_outfile_csv)
start_dt = config['start_dt']
end_dt = config['end_dt']

# start and end dates for data fetch
data_outfile_txt = os.path.join('.', '01_fetch', 'out', f'usgs_nwis_{site_num}.txt')
Expand Down
6 changes: 3 additions & 3 deletions utils.py → 01_fetch/src/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ def process_to_timestep(df, cols, agg_level, prop_obs_required):
must have datetimes in a column named 'datetime'
'''
# get proportion of measurements available for timestep
expected_measurements = df.resample(agg_level, on='datetime').count().mode()[cols].loc[0]
observed_measurements = df.resample(agg_level, on='datetime').count()[cols].loc[:]
expected_measurements = df.resample(agg_level, on='datetime').count().mode().loc[0]
observed_measurements = df.resample(agg_level, on='datetime').count().loc[:]
prop_df = observed_measurements / expected_measurements
# calculate averages for timestep
df = df.resample(agg_level, on='datetime').mean()
Expand Down Expand Up @@ -84,4 +84,4 @@ def get_datafile_list(read_location, s3_client=None, s3_bucket=None):
prefix = os.path.join('01_fetch', 'out')
file_prefix='usgs_nwis_0'
raw_datafiles = [os.path.join(prefix, f) for f in os.listdir(prefix) if f.startswith(file_prefix)]
return raw_datafiles
return raw_datafiles
11 changes: 0 additions & 11 deletions 02_munge/params_config_munge_coawst_model.yaml

This file was deleted.

4 changes: 2 additions & 2 deletions 02_munge/src/fill_discharge_prms.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ def download_unzip_sb(sb_url, prms_predictions, destination):
os.makedirs(destination, exist_ok=True)
sb = sciencebasepy.SbSession()
sb.download_file(sb_url, prms_predictions+'.zip', destination)
with zipfile.ZipFile(os.path.join(destination,prms_predictions+'.zip'), 'r') as zip_ref:
zip_ref.extractall(destination)
with zipfile.ZipFile(os.path.join(destination,prms_predictions+'.zip'), 'r') as zip_ref:
zip_ref.extractall(destination)



Expand Down
2 changes: 2 additions & 0 deletions 02_munge/src/munge_noaa_nerrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import pandas as pd
import numpy as np
import yaml
import sys
sys.path.insert(0, os.path.join('01_fetch', 'src'))
import utils

# import config
Expand Down
4 changes: 3 additions & 1 deletion 02_munge/src/munge_noaa_nos.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
import pandas as pd
import boto3
import yaml
import utils
from scipy import signal
import sys
sys.path.insert(0, os.path.join('01_fetch', 'src'))
import utils

# import config
with open("02_munge/params_config_munge_noaa_nos.yaml", 'r') as stream:
Expand Down
2 changes: 2 additions & 0 deletions 02_munge/src/munge_usgs_nwis.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import pandas as pd
import re
import yaml
import sys
sys.path.insert(0, os.path.join('01_fetch', 'src'))
import utils

# import config
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
out_dir: '03b_model/out/HP_Tune_02'
run_id: 'Run_29'
out_dir: '03_model/out/HP_Tune_00'
#run_id: 'Run_29'
inputs: ['discharge_01463500',
#'specific_conductance_01463500',
'discharge_01474500',
Expand Down
Loading