Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] committed Sep 9, 2024
1 parent a0c146b commit 731e899
Show file tree
Hide file tree
Showing 20 changed files with 546 additions and 475 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build_image.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ on:
- main
paths: # only run on changes to the Dockerfile
- 'Dockerfile'


jobs:
build-and-push:
Expand Down
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
FROM quay.io/pangeo/pangeo-notebook:2023.10.24
FROM quay.io/pangeo/pangeo-notebook:2023.10.24
LABEL maintainer="Julius Busecked"
LABEL repo="https://github.com/ocean-transport/scale-aware-air-sea"


RUN mamba install -n=notebook aerobulk-python -y
RUN mamba install -n=notebook aerobulk-python -y
RUN pip install coiled
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ pip install .
```

### Custom Docker image
The science published from the repository is using a [custom Docker image](/Dockerfile) (installing additional dependencies on top of the [pangeo docker image](https://github.com/pangeo-data/pangeo-docker-images)).
The science published from the repository is using a [custom Docker image](/Dockerfile) (installing additional dependencies on top of the [pangeo docker image](https://github.com/pangeo-data/pangeo-docker-images)).
You can find the image on [quay.io](https://quay.io/repository/jbusecke/scale-aware-air-sea?tab=tags) and refer to specific tags used in the notebooks in `./pipeline`.

#### Pulling custom image on LEAP-Pangeo Jupyterhub
To work with the custom image on the LEAP-Pangeo Jupyterhub, just follow the instructions to use a [custom image](), and enter `quay.io/jbusecke/scale-aware-air-sea:<tag>`, where you replace tag with the relevant version tag.
To work with the custom image on the LEAP-Pangeo Jupyterhub, just follow the instructions to use a [custom image](), and enter `quay.io/jbusecke/scale-aware-air-sea:<tag>`, where you replace tag with the relevant version tag.


E.g. for tag `68b654d76dce`:
Expand Down
2 changes: 1 addition & 1 deletion notebooks/jbusecke/new-workspace.jupyterlab-workspace
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"data":{"layout-restorer:data":{"main":{"dock":{"type":"split-area","orientation":"horizontal","sizes":[0.5,0.5],"children":[{"type":"tab-area","currentIndex":2,"widgets":["notebook:1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/aerobulk-python_performance.ipynb","terminal:1","notebook:1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/workflow/cm26_pipeline.ipynb","notebook:1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/workflow/reproduce_cm26_flux.ipynb","notebook:1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/testing/cm26_xesmf_hack.ipynb"]},{"type":"tab-area","currentIndex":3,"widgets":["dask-dashboard-launcher:/individual-progress","dask-dashboard-launcher:/individual-workers","dask-dashboard-launcher:/individual-task-stream","dask-dashboard-launcher:/individual-workers-memory"]}]},"current":"dask-dashboard-launcher:/individual-workers-memory"},"down":{"size":0,"widgets":[]},"left":{"collapsed":true,"widgets":["filebrowser","running-sessions","dask-dashboard-launcher","git-sessions","@jupyterlab/toc:plugin","extensionmanager.main-view"]},"right":{"collapsed":true,"widgets":["jp-property-inspector","debugger-sidebar"]},"relativeSizes":[0,1,0]},"file-browser-filebrowser:cwd":{"path":"1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke"},"notebook:1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/aerobulk-python_performance.ipynb":{"data":{"path":"1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/aerobulk-python_performance.ipynb","factory":"Notebook"}},"notebook:1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/workflow/cm26_pipeline.ipynb":{"data":{"path":"1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/workflow/cm26_pipeline.ipynb","factory":"Notebook"}},"notebook:1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/workflow/reproduce_cm26_flux.ipynb":{"data":{"path":"1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/workflow/reproduce_cm26_flux.ipynb","factory":"Notebook"}},"notebook:1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/testing/cm26_xesmf_hack.ipynb":{"data":{"path":"1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/testing/cm26_xesmf_hack.ipynb","factory":"Notebook"}},"terminal:1":{"data":{"name":"1"}},"dask-dashboard-launcher":{"url":"https://us-central1-b.gcp.pangeo.io/services/dask-gateway/clusters/prod.1e69eadfa71b4df0842dbaaeb5c7b01d/","cluster":""},"dask-dashboard-launcher:/individual-progress":{"data":{"route":"/individual-progress","label":"Progress","key":"Progress"}},"dask-dashboard-launcher:/individual-workers":{"data":{"route":"/individual-workers","label":"Workers","key":"Workers"}},"dask-dashboard-launcher:/individual-task-stream":{"data":{"route":"/individual-task-stream","label":"Task Stream","key":"Task Stream"}},"dask-dashboard-launcher:/individual-workers-memory":{"data":{"route":"/individual-workers-memory","label":"Workers Memory","key":"Workers Memory"}}},"metadata":{"id":"new-workspace","last_modified":"2022-06-23T17:40:03.102459+00:00","created":"2022-06-23T17:40:03.102459+00:00"}}
{"data":{"layout-restorer:data":{"main":{"dock":{"type":"split-area","orientation":"horizontal","sizes":[0.5,0.5],"children":[{"type":"tab-area","currentIndex":2,"widgets":["notebook:1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/aerobulk-python_performance.ipynb","terminal:1","notebook:1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/workflow/cm26_pipeline.ipynb","notebook:1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/workflow/reproduce_cm26_flux.ipynb","notebook:1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/testing/cm26_xesmf_hack.ipynb"]},{"type":"tab-area","currentIndex":3,"widgets":["dask-dashboard-launcher:/individual-progress","dask-dashboard-launcher:/individual-workers","dask-dashboard-launcher:/individual-task-stream","dask-dashboard-launcher:/individual-workers-memory"]}]},"current":"dask-dashboard-launcher:/individual-workers-memory"},"down":{"size":0,"widgets":[]},"left":{"collapsed":true,"widgets":["filebrowser","running-sessions","dask-dashboard-launcher","git-sessions","@jupyterlab/toc:plugin","extensionmanager.main-view"]},"right":{"collapsed":true,"widgets":["jp-property-inspector","debugger-sidebar"]},"relativeSizes":[0,1,0]},"file-browser-filebrowser:cwd":{"path":"1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke"},"notebook:1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/aerobulk-python_performance.ipynb":{"data":{"path":"1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/aerobulk-python_performance.ipynb","factory":"Notebook"}},"notebook:1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/workflow/cm26_pipeline.ipynb":{"data":{"path":"1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/workflow/cm26_pipeline.ipynb","factory":"Notebook"}},"notebook:1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/workflow/reproduce_cm26_flux.ipynb":{"data":{"path":"1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/workflow/reproduce_cm26_flux.ipynb","factory":"Notebook"}},"notebook:1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/testing/cm26_xesmf_hack.ipynb":{"data":{"path":"1_PROJECTS/scale-aware-air-sea/notebooks/jbusecke/testing/cm26_xesmf_hack.ipynb","factory":"Notebook"}},"terminal:1":{"data":{"name":"1"}},"dask-dashboard-launcher":{"url":"https://us-central1-b.gcp.pangeo.io/services/dask-gateway/clusters/prod.1e69eadfa71b4df0842dbaaeb5c7b01d/","cluster":""},"dask-dashboard-launcher:/individual-progress":{"data":{"route":"/individual-progress","label":"Progress","key":"Progress"}},"dask-dashboard-launcher:/individual-workers":{"data":{"route":"/individual-workers","label":"Workers","key":"Workers"}},"dask-dashboard-launcher:/individual-task-stream":{"data":{"route":"/individual-task-stream","label":"Task Stream","key":"Task Stream"}},"dask-dashboard-launcher:/individual-workers-memory":{"data":{"route":"/individual-workers-memory","label":"Workers Memory","key":"Workers Memory"}}},"metadata":{"id":"new-workspace","last_modified":"2022-06-23T17:40:03.102459+00:00","created":"2022-06-23T17:40:03.102459+00:00"}}
1 change: 0 additions & 1 deletion notebooks/jbusecke/testing/xesmf_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,3 @@
ds_atmos = xr.open_zarr('gs://cmip6/GFDL_CM2_6/control/atmos_daily.zarr', **kwargs)

regridder = xe.Regridder(ds_atmos.olr.isel(time=0), ds.surface_temp.isel(time=0), 'bilinear', periodic=True) # all the atmos data is on the cell center AFAIK

15 changes: 7 additions & 8 deletions notebooks/jbusecke/workflow/cm26_pipeline-debug.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# coding: utf-8

# # Debug issues with exceeding wind stress
#
#
# ## Tasks
# - Find timestep(s) that exhibit the behavior
# - Confirm that this behavior only affects certain algorithms
Expand Down Expand Up @@ -31,7 +31,7 @@
from dask.diagnostics import ProgressBar
from cm26_utils import write_split_zarr, noskin_ds_wrapper, load_and_merge_cm26

# 👇 replace with your key
# 👇 replace with your key
with open('/home/jovyan/keys/pangeo-forge-ocean-transport-4967-347e2048c5a1.json') as token_file:
token = json.load(token_file)

Expand Down Expand Up @@ -63,7 +63,7 @@
# I did `jupyter nbconvert --to python cm26_pipeline-debug.ipynb`
# and then I get the error about the wind stress.

# I have executed this with all algos and I get crashes for:
# I have executed this with all algos and I get crashes for:
# 'coare3p6'
# 'andreas'
# 'coare3p0'
Expand All @@ -87,7 +87,7 @@


# ## Investigate the max windstress values we are getting with the working algos
#
#
# Is there a correlation betweewn max wind speeds and stresses? Yeah definitely!

# In[ ]:
Expand Down Expand Up @@ -118,8 +118,8 @@

# ## Ok can we actually get around this and get some results at all?
# If not we need to raise the tau cut of in aerobulk.
#
# My simple approach right here is to set every wind value larger than `threshold` to zero. This is not a feasible solution for our processing, but I just want to see how low we have to go to get all algos to go through!
#
# My simple approach right here is to set every wind value larger than `threshold` to zero. This is not a feasible solution for our processing, but I just want to see how low we have to go to get all algos to go through!

# In[ ]:

Expand All @@ -133,7 +133,7 @@
mask = ds_masked.wind>threshold
ds_masked['u_ref'] = ds_masked['u_ref'].where(mask, 0)
ds_masked['v_ref'] = ds_masked['v_ref'].where(mask, 0)

break
ds_out = noskin_ds_wrapper(ds_merged, algo=algo, input_range_check=False)
with ProgressBar():
Expand All @@ -143,4 +143,3 @@
stress_max = stress.max(['xt_ocean', 'yt_ocean']).assign_coords(algo=algo)
print(stress_max)
datasets.append(stress_max)

10 changes: 5 additions & 5 deletions notebooks/jbusecke/workflow/cm26_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from dask.diagnostics import ProgressBar
from cm26_utils import write_split_zarr, noskin_ds_wrapper

# 👇 replace with your key
# 👇 replace with your key
with open('/home/jovyan/keys/pangeo-forge-ocean-transport-4967-347e2048c5a1.json') as token_file:
token = json.load(token_file)
fs = gcsfs.GCSFileSystem(token=token)
Expand Down Expand Up @@ -87,7 +87,7 @@

############################# wind cutting ##############
# ok so lets not add nans into fields like above. Instead, lets see in which timesteps this actually occurs and for noe completely ignore these timesteps
# This is not ideal in the long run, but maybe at least gives us a way to output
# This is not ideal in the long run, but maybe at least gives us a way to output

# ds_cut = ds_merged.isel(time=slice(0,500))
# wind = ds_cut.wind
Expand All @@ -96,7 +96,7 @@
threshold = 30
with ProgressBar():
strong_wind_cells = (wind > threshold).sum(['xt_ocean','yt_ocean']).load()

strong_wind_index = strong_wind_cells > 0

# double check that these events are still rare in space and time
Expand All @@ -121,6 +121,6 @@
if fs.exists(path) and overwrite:
# # # delete the mapper (only uncomment if you want to start from scratch!)
print("DELETE existing store")
fs.rm(path, recursive=True)
fs.rm(path, recursive=True)

write_split_zarr(mapper, ds_out, split_interval=64)
write_split_zarr(mapper, ds_out, split_interval=64)
24 changes: 12 additions & 12 deletions notebooks/jbusecke/workflow/cm26_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,37 +14,37 @@
# - Adjust units for aerobulk input
# - Calculate relative wind components
# """
# kwargs = dict(consolidated=True, use_cftime=True, inline_array=inline_array, engine='zarr')#,
# kwargs = dict(consolidated=True, use_cftime=True, inline_array=inline_array, engine='zarr')#,
# print('Load Data')
# mapper = filesystem.get_mapper("gs://cmip6/GFDL_CM2_6/control/surface")
# # ds_ocean = xr.open_dataset(mapper, chunks='auto', **kwargs)
# # ds_ocean = xr.open_dataset(mapper, chunks={'time':2}, **kwargs)
# ds_ocean = xr.open_dataset(mapper, chunks={'time':3}, **kwargs)
# # cat = open_catalog("https://raw.githubusercontent.com/pangeo-data/pangeo-datastore/master/intake-catalogs/ocean/GFDL_CM2.6.yaml")
# # ds_ocean = cat["GFDL_CM2_6_control_ocean_surface"].to_dask()

# # ds_flux = cat["GFDL_CM2_6_control_ocean_boundary_flux"].to_dask()
# mapper = filesystem.get_mapper("gs://cmip6/GFDL_CM2_6/control/ocean_boundary")
# # ds_flux = xr.open_dataset(mapper, chunks='auto', **kwargs)
# # ds_flux = xr.open_dataset(mapper, chunks={'time':2}, **kwargs)
# ds_flux = xr.open_dataset(mapper, chunks={'time':3}, **kwargs)


# # xarray says not to do this
# # ds_atmos = xr.open_zarr('gs://cmip6/GFDL_CM2_6/control/atmos_daily.zarr', chunks={'time':1}, **kwargs)
# mapper = filesystem.get_mapper("gs://cmip6/GFDL_CM2_6/control/atmos_daily.zarr")
# # ds_atmos = xr.open_dataset(mapper, chunks={'time':120}, **kwargs)
# # ds_atmos = xr.open_dataset(mapper, chunks={'time':2}, **kwargs)
# # ds_atmos = xr.open_dataset(mapper, chunks={'time':3}, **kwargs)
# ds_atmos = xr.open_dataset(mapper, chunks={'time':120}, **kwargs).chunk({'time':3})

# # # instead do this
# # ds_atmos = ds_atmos.chunk({'time':1})

# mapper = filesystem.get_mapper("gs://cmip6/GFDL_CM2_6/grid")
# ds_oc_grid = xr.open_dataset(mapper, chunks={}, **kwargs)
# # ds_oc_grid = cat["GFDL_CM2_6_grid"].to_dask()

# print('Align in time')
# # cut to same time
# all_dims = set(list(ds_ocean.dims)+list(ds_atmos.dims))
Expand Down Expand Up @@ -78,7 +78,7 @@
# mapper = filesystem.get_mapper(path)
# ds_regridder = xr.open_zarr(mapper).load()
# regridder = xe.Regridder(
# ds_atmos.olr.to_dataset(name='dummy').isel(time=0).reset_coords(drop=True),# this is the same dumb problem I keep having with
# ds_atmos.olr.to_dataset(name='dummy').isel(time=0).reset_coords(drop=True),# this is the same dumb problem I keep having with
# ds_ocean.surface_temp.to_dataset(name='dummy').isel(time=0).reset_coords(drop=True),
# 'bilinear',
# weights=ds_regridder,
Expand All @@ -100,17 +100,17 @@
# # fix units for aerobulk
# ds_merged['surface_temp'] = ds_merged['surface_temp'] + 273.15
# ds_merged['slp'] = ds_merged['slp'] * 100 # check this

# print('Mask nans')
# # atmos missing values are filled with 0s, which causes issues with the filtering
# # Ideally this should be masked before the regridding, but xesmf fills with 0 again...
# mask = ~np.isnan(ds_merged['surface_temp'])
# for mask_var in ['slp', 't_ref', 'q_ref']:
# ds_merged[mask_var] = ds_merged[mask_var].where(mask)

# # Calculate relative wind
# print('Calculate relative wind')
# ds_merged['u_relative'] = ds_merged['u_ref'] - ds_merged['u_ocean']
# ds_merged['v_relative'] = ds_merged['v_ref'] - ds_merged['v_ocean']
# return ds_merged

# return ds_merged
36 changes: 18 additions & 18 deletions notebooks/jbusecke/workflow/cm26_utils_old.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,15 @@ def write_split_zarr(store, ds, split_dim='time', chunks=1, split_interval=180):
This can be helpful to e.g. avoid problems with overly eager dask schedulers
"""
# Got my inspiration for this mostly here: https://github.com/pydata/xarray/issues/6069

# determine the variables and coordinates that depend on the split_dim
other_dims = [di for di in ds.dims if di != split_dim]
split_vars_coords = [va for va in ds.variables if split_dim in ds[va].dims and va not in ds.dims]
non_split_vars_coords = [va for va in ds.variables if va not in split_vars_coords and va not in ds.dims]

# Generate a stripped dataset that only contains variables/coordinates that do not depend on `split_dim`
ds_stripped = ds.drop_vars(split_vars_coords+[split_dim])

# initialize the store without writing values
print('initializing store')
ds.to_zarr(
Expand All @@ -28,45 +28,45 @@ def write_split_zarr(store, ds, split_dim='time', chunks=1, split_interval=180):
encoding={split_dim:{"chunks":[chunks]}},
consolidated=True, # TODO: Not sure if this is proper. Might have to consolidate the whole thing as a last step?
)

# Write out only the non-split variables/coordinates
if len(non_split_vars_coords) > 0:
print('Writing coordinates')
ds_stripped.to_zarr(store, mode='a') #I guess a is 'add'. This is honestly not clear enough in the xarray docs.
# with `w` there are issues with the shape.
# with `w` there are issues with the shape.

# TODO: what about the attrs?

# Populate split chunks as regions
n = len(ds[split_dim])
splits = list(range(0,n,split_interval))

# Make sure the last item in the list covers the full length of the time on our dataset
if splits[-1] != n:
splits = splits + [n]

for ii in tqdm(range(len(splits)-1)):
print(f'Writing split {ii}')
# TODO: put some retry logic in here...
start = splits[ii]
stop = splits[ii+1]

ds_write = ds.isel({split_dim:slice(start, stop)})
print(f'Start: {ds_write[split_dim][0].data}')
print(f'Stop: {ds_write[split_dim][-1].data}')

# strip everything except the values
drop_vars = non_split_vars_coords+other_dims
ds_write = ds_write.drop_vars(drop_vars)

with ProgressBar():
ds_write.to_zarr(store, region={split_dim:slice(start, stop)}, mode='a')#why are the variables not instantiated in the init step
# TODO: This is model agnostic and should live somewhere else?

# TODO: This is model agnostic and should live somewhere else?
def noskin_ds_wrapper(ds_in, algo='ecmwf', **kwargs):
ds_out = xr.Dataset()
ds_in = ds_in.copy(deep=False)

sst = ds_in.surface_temp + 273.15
t_zt = ds_in.t_ref
hum_zt = ds_in.q_ref
Expand All @@ -75,7 +75,7 @@ def noskin_ds_wrapper(ds_in, algo='ecmwf', **kwargs):
slp = ds_in.slp * 100 # check this
zu = 10
zt = 2

ql, qh, taux, tauy, evap = noskin(
sst,
t_zt,
Expand All @@ -91,7 +91,7 @@ def noskin_ds_wrapper(ds_in, algo='ecmwf', **kwargs):
ds_out['ql'] = ql
ds_out['qh'] = qh
ds_out['evap'] = evap
ds_out['taux'] = taux
ds_out['taux'] = taux
ds_out['tauy'] = tauy
return ds_out

Expand All @@ -114,7 +114,7 @@ def load_and_merge_cm26(regridder_token):
)
# instead do this
ds_atmos = ds_atmos.chunk({'time':1})

fs = gcsfs.GCSFileSystem(token=regridder_token)
path = 'ocean-transport-group/scale-aware-air-sea/regridding_weights/CM26_atmos2ocean.zarr'
mapper = fs.get_mapper(path)
Expand All @@ -138,4 +138,4 @@ def load_and_merge_cm26(regridder_token):
ds_merged = ds_merged.transpose(
'xt_ocean', 'yt_ocean', 'time'
)
return ds_merged
return ds_merged
8 changes: 4 additions & 4 deletions notebooks/jbusecke/workflow/debugging_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
def noskin_ds_wrapper(ds_in):
ds_out = xr.Dataset()
ds_in = ds_in.copy(deep=False)

sst = ds_in.surface_temp + 273.15
t_zt = ds_in.t_ref
hum_zt = ds_in.q_ref
Expand All @@ -13,7 +13,7 @@ def noskin_ds_wrapper(ds_in):
slp = ds_in.slp * 100 # check this
zu = 10
zt = 2

ql, qh, taux, tauy, evap = noskin(
sst,
t_zt,
Expand All @@ -28,10 +28,10 @@ def noskin_ds_wrapper(ds_in):
ds_out['ql'] = ql
ds_out['qh'] = qh
ds_out['evap'] = evap
ds_out['taux'] = taux
ds_out['taux'] = taux
ds_out['tauy'] = tauy
return ds_out

# load the tempsave file from `cm26_pipeline.ipynb`
ds_coarsened = xr.open_dataset('test_coarsened_filled.nc')
ds_coarse_res = noskin_ds_wrapper(ds_coarsened)
ds_coarse_res = noskin_ds_wrapper(ds_coarsened)
Loading

0 comments on commit 731e899

Please sign in to comment.