Skip to content

Commit

Permalink
WIP: Update OSPAR handler
Browse files Browse the repository at this point in the history
- Improve data processing pipeline for OSPAR data
  • Loading branch information
niallmurphy93 committed Oct 16, 2024
1 parent b3ebef6 commit 5bd5c7c
Show file tree
Hide file tree
Showing 4 changed files with 2,306 additions and 2,253 deletions.
60 changes: 30 additions & 30 deletions marisco/handlers/helcom.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,13 +256,13 @@ def __call__(self, tfm: Transformer):
fname_cache='species_helcom.pkl'
).generate_lookup_table(fixes=fixes_biota_species, as_df=False, overwrite=False)

# %% ../../nbs/handlers/helcom.ipynb 88
# %% ../../nbs/handlers/helcom.ipynb 90
fixes_biota_tissues = {
'WHOLE FISH WITHOUT HEAD AND ENTRAILS': 'Whole animal eviscerated without head',
'ENTRAILS': 'Viscera',
'SKIN/EPIDERMIS': 'Skin'}

# %% ../../nbs/handlers/helcom.ipynb 91
# %% ../../nbs/handlers/helcom.ipynb 93
lut_tissues = lambda: Remapper(provider_lut_df=pd.read_csv('../../_data/accdb/mors/csv/TISSUE.csv'),
maris_lut_fn=bodyparts_lut_path,
maris_col_id='bodypar_id',
Expand All @@ -272,11 +272,11 @@ def __call__(self, tfm: Transformer):
fname_cache='tissues_helcom.pkl'
).generate_lookup_table(fixes=fixes_biota_tissues, as_df=False, overwrite=False)

# %% ../../nbs/handlers/helcom.ipynb 95
# %% ../../nbs/handlers/helcom.ipynb 97
lut_biogroup = lambda: get_lut(species_lut_path().parent, species_lut_path().name,
key='species_id', value='biogroup_id')

# %% ../../nbs/handlers/helcom.ipynb 98
# %% ../../nbs/handlers/helcom.ipynb 100
# TODO: Include Commonname field after next MARIS data reconciling process.
def get_taxon_info_lut(
maris_lut:str # Path to the MARIS lookup table (Excel file)
Expand All @@ -287,7 +287,7 @@ def get_taxon_info_lut(

lut_taxon = lambda: get_taxon_info_lut(species_lut_path())

# %% ../../nbs/handlers/helcom.ipynb 99
# %% ../../nbs/handlers/helcom.ipynb 101
class RemapTaxonInformationCB(Callback):
"Update taxon information based on MARIS species LUT."
def __init__(self, fn_lut: Callable):
Expand All @@ -307,12 +307,12 @@ def __call__(self, tfm: Transformer):
if len(unmatched) > 0:
print(f"Unmatched species IDs: {', '.join(unmatched)}")

# %% ../../nbs/handlers/helcom.ipynb 108
# %% ../../nbs/handlers/helcom.ipynb 110
fixes_sediments = {
'NO DATA': '(Not available)'
}

# %% ../../nbs/handlers/helcom.ipynb 110
# %% ../../nbs/handlers/helcom.ipynb 112
class RemapSedimentCB(Callback):
"Update sediment id based on MARIS species LUT (dbo_sedtype.xlsx)."
def __init__(self,
Expand Down Expand Up @@ -352,7 +352,7 @@ def _print_unmatched_sedi(self,
"Print the SEDI value if the matched_id is -1."
print(f"Unmatched SEDI: {sedi_value}")

# %% ../../nbs/handlers/helcom.ipynb 111
# %% ../../nbs/handlers/helcom.ipynb 113
lut_sediments = lambda: Remapper(provider_lut_df=pd.read_csv(Path(fname_in) / 'SEDIMENT_TYPE.csv'),
maris_lut_fn=sediments_lut_path,
maris_col_id='sedtype_id',
Expand All @@ -362,7 +362,7 @@ def _print_unmatched_sedi(self,
fname_cache='sediments_helcom.pkl'
).generate_lookup_table(fixes=fixes_sediments, as_df=False, overwrite=False)

# %% ../../nbs/handlers/helcom.ipynb 121
# %% ../../nbs/handlers/helcom.ipynb 123
lut_units = {
'seawater': 1, # 'Bq/m3'
'sediment': 4, # 'Bq/kgd' for sediment
Expand All @@ -373,7 +373,7 @@ def _print_unmatched_sedi(self,
}
}

# %% ../../nbs/handlers/helcom.ipynb 122
# %% ../../nbs/handlers/helcom.ipynb 124
class RemapUnitCB(Callback):
"Set the `unit` id column in the DataFrames based on a lookup table."
def __init__(self,
Expand All @@ -388,10 +388,10 @@ def __call__(self, tfm: Transformer):
else:
tfm.dfs[grp]['unit'] = tfm.dfs[grp]['BASIS'].apply(lambda x: lut_units[grp].get(x, 0))

# %% ../../nbs/handlers/helcom.ipynb 127
# %% ../../nbs/handlers/helcom.ipynb 129
lut_dl = lambda: pd.read_excel(detection_limit_lut_path(), usecols=['name','id']).set_index('name').to_dict()['id']

# %% ../../nbs/handlers/helcom.ipynb 129
# %% ../../nbs/handlers/helcom.ipynb 131
coi_dl = {'seawater' : {'val' : 'VALUE_Bq/m³',
'unc' : 'ERROR%_m³',
'dl' : '< VALUE_Bq/m³'},
Expand All @@ -403,7 +403,7 @@ def __call__(self, tfm: Transformer):
'unc' : 'ERROR%_kg',
'dl' : '< VALUE_Bq/kg'}}

# %% ../../nbs/handlers/helcom.ipynb 132
# %% ../../nbs/handlers/helcom.ipynb 134
# TO BE REFACTORED
class RemapDetectionLimitCB(Callback):
"Remap value type to MARIS format."
Expand Down Expand Up @@ -443,14 +443,14 @@ def _update_detection_limit(self,
# Perform lookup
df['detection_limit'] = df['detection_limit'].map(lut)

# %% ../../nbs/handlers/helcom.ipynb 140
# %% ../../nbs/handlers/helcom.ipynb 142
lut_filtered = {
'N': 2,
'n': 2,
'F': 1
}

# %% ../../nbs/handlers/helcom.ipynb 142
# %% ../../nbs/handlers/helcom.ipynb 144
class RemapFiltCB(Callback):
"Lookup FILT value in dataframe using the lookup table."
def __init__(self,
Expand All @@ -463,7 +463,7 @@ def __call__(self, tfm):
if 'FILT' in df.columns:
df['FILT'] = df['FILT'].map(lambda x: self.lut_filtered.get(x, 0))

# %% ../../nbs/handlers/helcom.ipynb 147
# %% ../../nbs/handlers/helcom.ipynb 149
class AddSampleLabCodeCB(Callback):
"Remap `KEY` column to `samplabcode` in each DataFrame."
def __call__(self, tfm: Transformer):
Expand All @@ -473,10 +473,10 @@ def __call__(self, tfm: Transformer):
def _remap_sample_id(self, df: pd.DataFrame):
df['samplabcode'] = df['KEY']

# %% ../../nbs/handlers/helcom.ipynb 152
# %% ../../nbs/handlers/helcom.ipynb 154
lut_method = lambda: pd.read_csv(Path(fname_in) / 'ANALYSIS_METHOD.csv').set_index('METHOD').to_dict()['DESCRIPTION']

# %% ../../nbs/handlers/helcom.ipynb 153
# %% ../../nbs/handlers/helcom.ipynb 155
class AddMeasurementNoteCB(Callback):
"Record measurement notes by adding a 'measurenote' column to DataFrames."
def __init__(self,
Expand All @@ -490,7 +490,7 @@ def __call__(self, tfm: Transformer):
if 'METHOD' in df.columns:
df['measurementnote'] = df['METHOD'].map(lambda x: lut.get(x, 0))

# %% ../../nbs/handlers/helcom.ipynb 157
# %% ../../nbs/handlers/helcom.ipynb 159
class RemapStationIdCB(Callback):
"Remap Station ID to MARIS format."
def __init__(self):
Expand All @@ -501,15 +501,15 @@ def __call__(self, tfm: Transformer):
for grp in tfm.dfs.keys():
tfm.dfs[grp]['station'] = tfm.dfs[grp]['STATION']

# %% ../../nbs/handlers/helcom.ipynb 161
# %% ../../nbs/handlers/helcom.ipynb 163
class RemapSedSliceTopBottomCB(Callback):
"Remap Sediment slice top and bottom to MARIS format."
def __call__(self, tfm: Transformer):
"Iterate through all DataFrames in the transformer object and remap sediment slice top and bottom."
tfm.dfs['sediment']['top'] = tfm.dfs['sediment']['UPPSLI']
tfm.dfs['sediment']['bottom'] = tfm.dfs['sediment']['LOWSLI']

# %% ../../nbs/handlers/helcom.ipynb 166
# %% ../../nbs/handlers/helcom.ipynb 168
class LookupDryWetRatio(Callback):
"Lookup dry-wet ratio and format for MARIS."
def __call__(self, tfm: Transformer):
Expand All @@ -525,7 +525,7 @@ def _apply_dry_wet_ratio(self, df: pd.DataFrame) -> None:
df.loc[df['dry_wet_ratio'] == 0, 'dry_wet_ratio'] = np.NaN


# %% ../../nbs/handlers/helcom.ipynb 172
# %% ../../nbs/handlers/helcom.ipynb 174
class ParseCoordinates(Callback):
"""
Get geographical coordinates from columns expressed in degrees decimal format
Expand Down Expand Up @@ -575,7 +575,7 @@ def _safe_convert(self, value) -> str:
print(f"Error converting value {value}: {e}")
return value

# %% ../../nbs/handlers/helcom.ipynb 183
# %% ../../nbs/handlers/helcom.ipynb 185
def get_common_rules(
vars: dict, # Configuration dictionary
encoding_type: str # Encoding type (`netcdf` or `openrefine`)
Expand Down Expand Up @@ -615,7 +615,7 @@ def get_common_rules(

return common

# %% ../../nbs/handlers/helcom.ipynb 184
# %% ../../nbs/handlers/helcom.ipynb 186
def get_specific_rules(
vars: dict, # Configuration dictionary
encoding_type: str # Encoding type (`netcdf` or `openrefine`)
Expand Down Expand Up @@ -654,7 +654,7 @@ def get_specific_rules(
}
}

# %% ../../nbs/handlers/helcom.ipynb 185
# %% ../../nbs/handlers/helcom.ipynb 187
def get_renaming_rules(
encoding_type: str = 'netcdf' # Encoding type (`netcdf` or `openrefine`)
) -> dict: # Renaming rules for NetCDF and OpenRefine.
Expand All @@ -674,7 +674,7 @@ def get_renaming_rules(

return dict(rules)

# %% ../../nbs/handlers/helcom.ipynb 186
# %% ../../nbs/handlers/helcom.ipynb 188
class SelectAndRenameColumnCB(Callback):
"Select and rename columns in a DataFrame based on renaming rules for a specified encoding type."
def __init__(self,
Expand Down Expand Up @@ -745,7 +745,7 @@ def _apply_renaming(self,
return df, not_found_keys


# %% ../../nbs/handlers/helcom.ipynb 195
# %% ../../nbs/handlers/helcom.ipynb 197
kw = ['oceanography', 'Earth Science > Oceans > Ocean Chemistry> Radionuclides',
'Earth Science > Human Dimensions > Environmental Impacts > Nuclear Radiation Exposure',
'Earth Science > Oceans > Ocean Chemistry > Ocean Tracers, Earth Science > Oceans > Marine Sediments',
Expand All @@ -757,7 +757,7 @@ def _apply_renaming(self,
'Earth Science > Biological Classification > Animals/Invertebrates > Arthropods > Crustaceans',
'Earth Science > Biological Classification > Plants > Macroalgae (Seaweeds)']

# %% ../../nbs/handlers/helcom.ipynb 196
# %% ../../nbs/handlers/helcom.ipynb 198
def get_attrs(
tfm: Transformer, # Transformer object
zotero_key: str, # Zotero dataset record key
Expand All @@ -773,7 +773,7 @@ def get_attrs(
KeyValuePairCB('publisher_postprocess_logs', ', '.join(tfm.logs))
])()

# %% ../../nbs/handlers/helcom.ipynb 198
# %% ../../nbs/handlers/helcom.ipynb 200
def enums_xtra(
tfm: Transformer, # Transformer object
vars: list # List of variables to extract from the transformer
Expand All @@ -787,7 +787,7 @@ def enums_xtra(
xtras[f'{var}_t'] = enums.filter(f'{var}_t', unique_vals)
return xtras

# %% ../../nbs/handlers/helcom.ipynb 200
# %% ../../nbs/handlers/helcom.ipynb 202
def encode(
fname_in: str, # Input file name
fname_out_nc: str, # Output file name
Expand Down
4 changes: 4 additions & 0 deletions nbs/api/configs.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1272,6 +1272,10 @@
"display_name": "python3",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"version": "3.10.15"
}
},
"nbformat": 4,
Expand Down
Loading

0 comments on commit 5bd5c7c

Please sign in to comment.