Skip to content

Commit

Permalink
calling lookup table fn only at runtime
Browse files Browse the repository at this point in the history
  • Loading branch information
franckalbinet committed Jun 24, 2024
1 parent 230474c commit 28f9bae
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 17 deletions.
10 changes: 6 additions & 4 deletions marisco/handlers/helcom.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from ..configs import (base_path, nc_tpl_path, cfg, cache_path, cdl_cfg, Enums, lut_path,
species_lut_path, sediments_lut_path, bodyparts_lut_path)
from ..serializers import NetCDFEncoder
from collections.abc import Callable

# %% ../../nbs/handlers/helcom.ipynb 9
fname_in = '../../_data/accdb/mors/csv'
Expand Down Expand Up @@ -147,7 +148,7 @@ def get_maris_lut(fname_in,
data_provider_lut:str, # Data provider lookup table name
data_provider_id_col:str, # Data provider lookup column id of interest
data_provider_name_col:str, # Data provider lookup column name of interest
maris_lut:str, # MARIS source lookup table name and path
maris_lut:Callable, # Function retrieving MARIS source lookup table
maris_id: str, # Id of MARIS lookup table nomenclature item to match
maris_name: str, # Name of MARIS lookup table nomenclature item to match
unmatched_fixes={},
Expand All @@ -156,6 +157,7 @@ def get_maris_lut(fname_in,
):
fname_cache = cache_path() / fname_cache
lut = {}
maris_lut = maris_lut()
df = pd.read_csv(Path(fname_in) / data_provider_lut)
if overwrite or (not fname_cache.exists()):
for _, row in tqdm(df.iterrows(), total=len(df)):
Expand Down Expand Up @@ -208,7 +210,7 @@ def __call__(self, tfm):
data_provider_lut='RUBIN_NAME.csv',
data_provider_id_col='RUBIN',
data_provider_name_col='SCIENTIFIC NAME',
maris_lut=species_lut_path(),
maris_lut=species_lut_path,
maris_id='species_id',
maris_name='species',
unmatched_fixes=unmatched_fixes_biota_species,
Expand Down Expand Up @@ -241,7 +243,7 @@ def __call__(self, tfm):
data_provider_lut='TISSUE.csv',
data_provider_id_col='TISSUE',
data_provider_name_col='TISSUE_DESCRIPTION',
maris_lut=bodyparts_lut_path(),
maris_lut=bodyparts_lut_path,
maris_id='bodypar_id',
maris_name='bodypar',
unmatched_fixes=unmatched_fixes_biota_tissues)
Expand All @@ -260,7 +262,7 @@ def __call__(self, tfm):
data_provider_lut='SEDIMENT_TYPE.csv',
data_provider_id_col='SEDI',
data_provider_name_col='SEDIMENT TYPE',
maris_lut=sediments_lut_path(),
maris_lut=sediments_lut_path,
maris_id='sedtype_id',
maris_name='sedtype',
unmatched_fixes=unmatched_fixes_sediments)
Expand Down
28 changes: 15 additions & 13 deletions nbs/handlers/helcom.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,8 @@
"from marisco.metadata import (GlobAttrsFeeder, BboxCB, DepthRangeCB, TimeRangeCB, ZoteroCB, KeyValuePairCB)\n",
"from marisco.configs import (base_path, nc_tpl_path, cfg, cache_path, cdl_cfg, Enums, lut_path,\n",
" species_lut_path, sediments_lut_path, bodyparts_lut_path)\n",
"from marisco.serializers import NetCDFEncoder"
"from marisco.serializers import NetCDFEncoder\n",
"from collections.abc import Callable"
]
},
{
Expand Down Expand Up @@ -1490,7 +1491,7 @@
" data_provider_lut:str, # Data provider lookup table name\n",
" data_provider_id_col:str, # Data provider lookup column id of interest\n",
" data_provider_name_col:str, # Data provider lookup column name of interest\n",
" maris_lut:str, # MARIS source lookup table name and path\n",
" maris_lut:Callable, # Function retrieving MARIS source lookup table\n",
" maris_id: str, # Id of MARIS lookup table nomenclature item to match\n",
" maris_name: str, # Name of MARIS lookup table nomenclature item to match\n",
" unmatched_fixes={},\n",
Expand All @@ -1499,6 +1500,7 @@
" ):\n",
" fname_cache = cache_path() / fname_cache\n",
" lut = {}\n",
" maris_lut = maris_lut()\n",
" df = pd.read_csv(Path(fname_in) / data_provider_lut)\n",
" if overwrite or (not fname_cache.exists()):\n",
" for _, row in tqdm(df.iterrows(), total=len(df)):\n",
Expand Down Expand Up @@ -1550,7 +1552,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 46/46 [00:06<00:00, 6.60it/s]\n"
"100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 46/46 [00:06<00:00, 7.04it/s]\n"
]
}
],
Expand All @@ -1560,7 +1562,7 @@
" data_provider_lut='RUBIN_NAME.csv',\n",
" data_provider_id_col='RUBIN',\n",
" data_provider_name_col='SCIENTIFIC NAME',\n",
" maris_lut=species_lut_path(),\n",
" maris_lut=species_lut_path,\n",
" maris_id='species_id',\n",
" maris_name='species',\n",
" unmatched_fixes=unmatched_fixes_biota_species,\n",
Expand Down Expand Up @@ -2163,7 +2165,7 @@
" data_provider_lut='RUBIN_NAME.csv',\n",
" data_provider_id_col='RUBIN',\n",
" data_provider_name_col='SCIENTIFIC NAME',\n",
" maris_lut=species_lut_path(),\n",
" maris_lut=species_lut_path,\n",
" maris_id='species_id',\n",
" maris_name='species',\n",
" unmatched_fixes=unmatched_fixes_biota_species,\n",
Expand Down Expand Up @@ -2458,7 +2460,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 129.18it/s]\n"
"100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 140.26it/s]\n"
]
}
],
Expand All @@ -2469,7 +2471,7 @@
" data_provider_lut='TISSUE.csv',\n",
" data_provider_id_col='TISSUE',\n",
" data_provider_name_col='TISSUE_DESCRIPTION',\n",
" maris_lut=bodyparts_lut_path(),\n",
" maris_lut=bodyparts_lut_path,\n",
" maris_id='bodypar_id',\n",
" maris_name='bodypar',\n",
" unmatched_fixes=unmatched_fixes_biota_tissues,\n",
Expand Down Expand Up @@ -2831,7 +2833,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 129.31it/s]\n"
"100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 139.22it/s]\n"
]
}
],
Expand All @@ -2842,7 +2844,7 @@
" data_provider_lut='TISSUE.csv',\n",
" data_provider_id_col='TISSUE',\n",
" data_provider_name_col='TISSUE_DESCRIPTION',\n",
" maris_lut=bodyparts_lut_path(),\n",
" maris_lut=bodyparts_lut_path,\n",
" maris_id='bodypar_id',\n",
" maris_name='bodypar',\n",
" unmatched_fixes=unmatched_fixes_biota_tissues,\n",
Expand All @@ -2864,7 +2866,7 @@
" data_provider_lut='TISSUE.csv',\n",
" data_provider_id_col='TISSUE',\n",
" data_provider_name_col='TISSUE_DESCRIPTION',\n",
" maris_lut=bodyparts_lut_path(),\n",
" maris_lut=bodyparts_lut_path,\n",
" maris_id='bodypar_id',\n",
" maris_name='bodypar',\n",
" unmatched_fixes=unmatched_fixes_biota_tissues)"
Expand Down Expand Up @@ -3314,7 +3316,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 47/47 [00:00<00:00, 127.48it/s]\n"
"100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 47/47 [00:00<00:00, 146.47it/s]\n"
]
}
],
Expand All @@ -3326,7 +3328,7 @@
" data_provider_lut='SEDIMENT_TYPE.csv',\n",
" data_provider_id_col='SEDI',\n",
" data_provider_name_col='SEDIMENT TYPE',\n",
" maris_lut=sediments_lut_path(),\n",
" maris_lut=sediments_lut_path,\n",
" maris_id='sedtype_id',\n",
" maris_name='sedtype',\n",
" unmatched_fixes=unmatched_fixes_sediments,\n",
Expand Down Expand Up @@ -3835,7 +3837,7 @@
" data_provider_lut='SEDIMENT_TYPE.csv',\n",
" data_provider_id_col='SEDI',\n",
" data_provider_name_col='SEDIMENT TYPE',\n",
" maris_lut=sediments_lut_path(),\n",
" maris_lut=sediments_lut_path,\n",
" maris_id='sedtype_id',\n",
" maris_name='sedtype',\n",
" unmatched_fixes=unmatched_fixes_sediments)"
Expand Down

0 comments on commit 28f9bae

Please sign in to comment.