Skip to content

Commit

Permalink
sanitize uncertainties + fix measurements + match biota species in os…
Browse files Browse the repository at this point in the history
…par handler
  • Loading branch information
franckalbinet committed Sep 26, 2024
1 parent 5021d6f commit 3285c9a
Show file tree
Hide file tree
Showing 5 changed files with 924 additions and 196 deletions.
3 changes: 1 addition & 2 deletions marisco/handlers/helcom.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,7 @@ def _define_beg_period(self, df):
"Create a standardized date representation for Open Refine."
df['begperiod'] = df['time']

# %% ../../nbs/handlers/helcom.ipynb 58
# Columns of interest
# %% ../../nbs/handlers/helcom.ipynb 59
coi_val = {'seawater' : {'val': 'VALUE_Bq/m³'},
'biota': {'val': 'VALUE_Bq/kg'},
'sediment': {'val': 'VALUE_Bq/kg'}}
Expand Down
31 changes: 17 additions & 14 deletions marisco/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/api/utils.ipynb.

# %% auto 0
__all__ = ['get_unique_across_dfs', 'Remapper', 'has_valid_varname', 'get_bbox', 'ddmm_to_dd', 'download_files_in_folder',
__all__ = ['NA', 'get_unique_across_dfs', 'Remapper', 'has_valid_varname', 'get_bbox', 'ddmm_to_dd', 'download_files_in_folder',
'download_file', 'match_worms', 'Match', 'match_maris_lut', 'test_dfs']

# %% ../nbs/api/utils.ipynb 2
Expand All @@ -26,7 +26,10 @@
import jellyfish as jf
from collections.abc import Callable

# %% ../nbs/api/utils.ipynb 6
# %% ../nbs/api/utils.ipynb 5
NA = 'Not available'

# %% ../nbs/api/utils.ipynb 8
def get_unique_across_dfs(dfs:dict, # Dictionary of dataframes
col_name:str='NUCLIDE', # Column name to extract unique values from
as_df:bool=False, # Return a DataFrame of unique values
Expand All @@ -41,7 +44,7 @@ def get_unique_across_dfs(dfs:dict, # Dictionary of dataframes
if include_nchars: df_uniques['n_chars'] = df_uniques['value'].str.len()
return df_uniques

# %% ../nbs/api/utils.ipynb 12
# %% ../nbs/api/utils.ipynb 14
class Remapper():
"Remap a data provider lookup table to a MARIS lookup table using fuzzy matching."
def __init__(self,
Expand Down Expand Up @@ -104,7 +107,7 @@ def _format_output(self):
return df_lut.sort_values(by='match_score', ascending=False)


# %% ../nbs/api/utils.ipynb 14
# %% ../nbs/api/utils.ipynb 16
def has_valid_varname(
var_names:list, # variable names
cdl_path:str, # Path to MARIS CDL file (point of truth)
Expand Down Expand Up @@ -137,7 +140,7 @@ def has_valid_varname(
print(f'"{name}" variable name not found in MARIS CDL')
return has_valid

# %% ../nbs/api/utils.ipynb 18
# %% ../nbs/api/utils.ipynb 20
def get_bbox(df,
coord_cols=('lon', 'lat')
):
Expand All @@ -146,7 +149,7 @@ def get_bbox(df,
arr = [(row[x], row[y]) for _, row in df.iterrows()]
return MultiPoint(arr).envelope

# %% ../nbs/api/utils.ipynb 24
# %% ../nbs/api/utils.ipynb 26
def ddmm_to_dd(
ddmmmm:float # Coordinates in degrees/minutes decimal format
) -> float: # Coordinates in degrees decimal format
Expand All @@ -155,7 +158,7 @@ def ddmm_to_dd(
mins = mins * 100
return round(int(degs) + (mins / 60), 6)

# %% ../nbs/api/utils.ipynb 27
# %% ../nbs/api/utils.ipynb 29
def download_files_in_folder(owner:str,
repo:str,
src_dir:str,
Expand Down Expand Up @@ -189,7 +192,7 @@ def download_file(owner, repo, src_dir, dest_dir, fname):
else:
print(f"Error: {response.status_code}")

# %% ../nbs/api/utils.ipynb 29
# %% ../nbs/api/utils.ipynb 31
def match_worms(
name:str # Name of species to look up in WoRMS
):
Expand All @@ -212,7 +215,7 @@ def match_worms(
else:
return -1

# %% ../nbs/api/utils.ipynb 34
# %% ../nbs/api/utils.ipynb 36
@dataclass
class Match:
"Match between a data provider name and a MARIS lookup table."
Expand All @@ -221,7 +224,7 @@ class Match:
source_name: str
match_score: int

# %% ../nbs/api/utils.ipynb 35
# %% ../nbs/api/utils.ipynb 37
def match_maris_lut(
lut_path: str, # Path to MARIS species authoritative species look-up table
data_provider_name: str, # Name of data provider nomenclature item to look up
Expand All @@ -238,15 +241,15 @@ def match_maris_lut(
df = df.sort_values(by='score', ascending=True)[:nresults]
return df[[maris_id, maris_name, 'score']]

# %% ../nbs/api/utils.ipynb 42
# %% ../nbs/api/utils.ipynb 44
def get_bbox(df,
coord_cols=('lon', 'lat')
):
x, y = coord_cols
arr = [(row[x], row[y]) for _, row in df.iterrows()]
return MultiPoint(arr).envelope

# %% ../nbs/api/utils.ipynb 49
# %% ../nbs/api/utils.ipynb 51
def download_files_in_folder(owner:str,
repo:str,
src_dir:str,
Expand Down Expand Up @@ -280,7 +283,7 @@ def download_file(owner, repo, src_dir, dest_dir, fname):
else:
print(f"Error: {response.status_code}")

# %% ../nbs/api/utils.ipynb 51
# %% ../nbs/api/utils.ipynb 53
def match_worms(
name:str # Name of species to look up in WoRMS
):
Expand All @@ -303,7 +306,7 @@ def match_worms(
else:
return -1

# %% ../nbs/api/utils.ipynb 56
# %% ../nbs/api/utils.ipynb 58
def test_dfs(
dfs1:dict, # First dictionary of DataFrames to compare
dfs2:dict # Second dictionary of DataFrames to compare
Expand Down
19 changes: 19 additions & 0 deletions nbs/api/utils.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,25 @@
"import pandas as pd"
]
},
{
"cell_type": "markdown",
"id": "fb15839e",
"metadata": {},
"source": [
"We define below useful constants throughout the package."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6dff2deb",
"metadata": {},
"outputs": [],
"source": [
"#| exports\n",
"NA = 'Not available'"
]
},
{
"cell_type": "markdown",
"id": "7ddde356",
Expand Down
Loading

0 comments on commit 3285c9a

Please sign in to comment.