Exported source
+class Callback():
+"Base class for callbacks."
+ = 0 order
From 5731cc9d1260f21b3fda3a8f46052d79977af29b Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 14 Oct 2024 10:28:24 +0000 Subject: [PATCH] deploy: 6b015ab367417e2eb0e50688d545f21dc301574f --- .nojekyll | 0 api/callbacks.html | 1897 +++++ api/configs.html | 2008 +++++ api/inout.html | 797 ++ api/metadata.html | 1057 +++ api/nc_template.html | 1358 ++++ api/serializers.html | 1295 +++ api/utils.html | 2038 +++++ cli/create_nc_template.html | 729 ++ cli/init.html | 752 ++ cli/netcdfy.html | 787 ++ handlers/helcom.html | 7049 +++++++++++++++++ handlers/maris_legacy.html | 3944 +++++++++ handlers/netcdf_to_csv.html | 3820 +++++++++ img/logo.png | Bin 0 -> 106866 bytes index.html | 849 ++ robots.txt | 1 + search.json | 801 ++ site_libs/bootstrap/bootstrap-icons.css | 2078 +++++ site_libs/bootstrap/bootstrap-icons.woff | Bin 0 -> 176200 bytes site_libs/bootstrap/bootstrap.min.css | 12 + site_libs/bootstrap/bootstrap.min.js | 7 + site_libs/clipboard/clipboard.min.js | 7 + site_libs/quarto-html/anchor.min.js | 9 + site_libs/quarto-html/popper.min.js | 6 + .../quarto-syntax-highlighting.css | 205 + site_libs/quarto-html/quarto.js | 908 +++ site_libs/quarto-html/tippy.css | 1 + site_libs/quarto-html/tippy.umd.min.js | 2 + site_libs/quarto-nav/headroom.min.js | 7 + site_libs/quarto-nav/quarto-nav.js | 325 + site_libs/quarto-search/autocomplete.umd.js | 3 + site_libs/quarto-search/fuse.min.js | 9 + site_libs/quarto-search/quarto-search.js | 1290 +++ sitemap.xml | 59 + styles.css | 68 + 36 files changed, 34178 insertions(+) create mode 100644 .nojekyll create mode 100644 api/callbacks.html create mode 100644 api/configs.html create mode 100644 api/inout.html create mode 100644 api/metadata.html create mode 100644 api/nc_template.html create mode 100644 api/serializers.html create mode 100644 api/utils.html create mode 100644 cli/create_nc_template.html create mode 100644 cli/init.html create mode 100644 cli/netcdfy.html create mode 100644 handlers/helcom.html create mode 100644 handlers/maris_legacy.html create mode 100644 handlers/netcdf_to_csv.html create mode 100644 img/logo.png create mode 100644 index.html create mode 100644 robots.txt create mode 100644 search.json create mode 100644 site_libs/bootstrap/bootstrap-icons.css create mode 100644 site_libs/bootstrap/bootstrap-icons.woff create mode 100644 site_libs/bootstrap/bootstrap.min.css create mode 100644 site_libs/bootstrap/bootstrap.min.js create mode 100644 site_libs/clipboard/clipboard.min.js create mode 100644 site_libs/quarto-html/anchor.min.js create mode 100644 site_libs/quarto-html/popper.min.js create mode 100644 site_libs/quarto-html/quarto-syntax-highlighting.css create mode 100644 site_libs/quarto-html/quarto.js create mode 100644 site_libs/quarto-html/tippy.css create mode 100644 site_libs/quarto-html/tippy.umd.min.js create mode 100644 site_libs/quarto-nav/headroom.min.js create mode 100644 site_libs/quarto-nav/quarto-nav.js create mode 100644 site_libs/quarto-search/autocomplete.umd.js create mode 100644 site_libs/quarto-search/fuse.min.js create mode 100644 site_libs/quarto-search/quarto-search.js create mode 100644 sitemap.xml create mode 100644 styles.css diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 0000000..e69de29 diff --git a/api/callbacks.html b/api/callbacks.html new file mode 100644 index 0000000..98a6656 --- /dev/null +++ b/api/callbacks.html @@ -0,0 +1,1897 @@ + +
+ + + + + + + + +The Transformer
class is designed to facilitate the application of a series of callbacks to a set of dataframes. It provides a structured way to apply transformations (i.e Callback
) to the data, with a focus on flexibility and ease of use.
+++Callback ()
Base class for callbacks.
+ ++++run_cbs (cbs:List[__main__.Callback], obj:Any)
Run the callbacks in the order they are specified.
++ | Type | +Details | +
---|---|---|
cbs | +List | +List of callbacks to run | +
obj | +Any | +Object to pass to the callbacks | +
+++Transformer (data:Union[Dict[str,pandas.core.frame.DataFrame],pandas.core + .frame.DataFrame], + cbs:Optional[List[__main__.Callback]]=None, + inplace:bool=False)
Transform the dataframe(s) according to the specified callbacks.
++ | Type | +Default | +Details | +
---|---|---|---|
data | +Union | ++ | Data to be transformed | +
cbs | +Optional | +None | +List of callbacks to run | +
inplace | +bool | +False | +Whether to modify the dataframe(s) in place | +
class Transformer():
+ "Transform the dataframe(s) according to the specified callbacks."
+ def __init__(self,
+ data: Union[Dict[str, pd.DataFrame], pd.DataFrame], # Data to be transformed
+ cbs: Optional[List[Callback]]=None, # List of callbacks to run
+ inplace: bool=False # Whether to modify the dataframe(s) in place
+ ):
+ fc.store_attr()
+ self.is_single_df = isinstance(data, pd.DataFrame)
+ self.df, self.dfs = self._prepare_data(data, inplace)
+ self.logs = []
+
+ def _prepare_data(self, data, inplace):
+ if self.is_single_df:
+ return (data if inplace else data.copy()), None
+ else:
+ return None, (data if inplace else {k: v.copy() for k, v in data.items()})
+
+ def unique(self, col_name: str) -> np.ndarray:
+ "Distinct values of a specific column present in all groups."
+ if self.is_single_df:
+ values = self.df.get(col_name, pd.Series()).dropna().values
+ else:
+ columns = [df.get(col_name) for df in self.dfs.values() if df.get(col_name) is not None]
+ values = np.concatenate([col.dropna().values for col in columns]) if columns else []
+ return np.unique(values)
+
+ def __call__(self):
+ "Transform the dataframe(s) according to the specified callbacks."
+ if self.cbs: run_cbs(self.cbs, self)
+ return self.df if self.dfs is None else self.dfs
Below, a few examples of how to use the Transformer
class. Let’s define first a test callback that adds 1
to the depth
:
And apply it to the following dataframes:
+dfs = {'biota': pd.DataFrame({'id': [0, 1, 2], 'species': [0, 2, 0], 'depth': [2, 3, 4]}),
+ 'seawater': pd.DataFrame({'id': [0, 1, 2], 'depth': [3, 4, 5]})}
+
+tfm = Transformer(dfs, cbs=[TestCB()])
+dfs_test = tfm()
+
+fc.test_eq(dfs_test['biota']['depth'].to_list(), [3, 4, 5])
+fc.test_eq(dfs_test['seawater']['depth'].to_list(), [4, 5, 6])
This section gathers callbacks that are used to transform the geographical coordinates.
++++SanitizeLonLatCB (lon_col:str='lon', lat_col:str='lat', + verbose:bool=False)
Drop rows with invalid longitude & latitude values. Convert ,
separator to .
separator.
+ | Type | +Default | +Details | +
---|---|---|---|
lon_col | +str | +lon | +Longitude column name | +
lat_col | +str | +lat | +Latitude column name | +
verbose | +bool | +False | +Whether to print the number of invalid longitude & latitude values | +
class SanitizeLonLatCB(Callback):
+ "Drop rows with invalid longitude & latitude values. Convert `,` separator to `.` separator."
+ def __init__(self,
+ lon_col: str='lon', # Longitude column name
+ lat_col: str='lat', # Latitude column name
+ verbose: bool=False # Whether to print the number of invalid longitude & latitude values
+ ):
+ fc.store_attr()
+
+ def __call__(self, tfm: Transformer):
+ for grp, df in tfm.dfs.items():
+ # Convert `,` separator to `.` separator
+ df[self.lon_col] = df[self.lon_col].apply(lambda x: float(str(x).replace(',', '.')))
+ df[self.lat_col] = df[self.lat_col].apply(lambda x: float(str(x).replace(',', '.')))
+
+ # Mask zero values
+ mask_zeroes = (df[self.lon_col] == 0) & (df[self.lat_col] == 0)
+ nZeroes = mask_zeroes.sum()
+ if nZeroes and self.verbose:
+ print(f'The "{grp}" group contains {nZeroes} data points whose ({self.lon_col}, {self.lat_col}) = (0, 0)')
+
+ # Mask out of bounds values
+ mask_goob = (df[self.lon_col] < -180) | (df[self.lon_col] > 180) | (df[self.lat_col] < -90) | (df[self.lat_col] > 90)
+ nGoob = mask_goob.sum()
+ if nGoob and self.verbose:
+ print(f'The "{grp}" group contains {nGoob} data points with unrealistic {self.lon_col} or {self.lat_col} values.')
+
+ tfm.dfs[grp] = df.loc[~(mask_zeroes | mask_goob)]
This section gathers callbacks that are used to add required columns to the dataframes.
++++AddSampleTypeIdColumnCB (cdl_cfg:Callable=<function cdl_cfg>, + col_name:str='samptype_id')
Base class for callbacks.
++ | Type | +Default | +Details | +
---|---|---|---|
cdl_cfg | +Callable | +cdl_cfg | +Callable to get the CDL config dictionary | +
col_name | +str | +samptype_id | +Column name to store the sample type id | +
class AddSampleTypeIdColumnCB(Callback):
+ def __init__(self,
+ cdl_cfg: Callable=cdl_cfg, # Callable to get the CDL config dictionary
+ col_name: str='samptype_id' # Column name to store the sample type id
+ ):
+ "Add a column with the sample type id as defined in the CDL."
+ fc.store_attr()
+ self.lut = {v['name']: v['id'] for v in cdl_cfg()['grps'].values()}
+
+ def __call__(self, tfm):
+ for grp, df in tfm.dfs.items(): df[self.col_name] = self.lut[grp]
Let’s test the callback:
+dfs = {v['name']: pd.DataFrame({'col_test': [0, 1, 2]}) for v in CONFIGS_CDL['grps'].values()}
+
+tfm = Transformer(dfs, cbs=[AddSampleTypeIdColumnCB(cdl_cfg=lambda: CONFIGS_CDL)])
+dfs_test = tfm()
+
+for v in CONFIGS_CDL['grps'].values():
+ fc.test_eq(dfs_test[v['name']]['samptype_id'].unique().item(), v['id'])
+++AddNuclideIdColumnCB (col_value:str, lut_fname_fn:Callable=<function + nuc_lut_path>, col_name:str='nuclide_id')
Base class for callbacks.
++ | Type | +Default | +Details | +
---|---|---|---|
col_value | +str | ++ | Column name containing the nuclide name | +
lut_fname_fn | +Callable | +nuc_lut_path | +Function returning the lut path | +
col_name | +str | +nuclide_id | +Column name to store the nuclide id | +
class AddNuclideIdColumnCB(Callback):
+ def __init__(self,
+ col_value: str, # Column name containing the nuclide name
+ lut_fname_fn: Callable=nuc_lut_path, # Function returning the lut path
+ col_name: str='nuclide_id' # Column name to store the nuclide id
+ ):
+ "Add a column with the nuclide id."
+ fc.store_attr()
+ self.lut = get_lut(lut_fname_fn().parent, lut_fname_fn().name,
+ key='nc_name', value='nuclide_id', reverse=False)
+
+ def __call__(self, tfm: Transformer):
+ for grp, df in tfm.dfs.items():
+ df[self.col_name] = df[self.col_value].map(self.lut)
dfs = {v['name']: pd.DataFrame({'Nuclide': ['cs137', 'pu239_240_tot']}) for v in CONFIGS_CDL['grps'].values()}
+
+lut_fname_fn = lambda: Path('./files/lut/dbo_nuclide.xlsx')
+
+tfm = Transformer(dfs, cbs=[AddNuclideIdColumnCB(col_value='Nuclide', lut_fname_fn=lut_fname_fn)])
+tfm()['seawater']
+
+expected = [33, 77]
+for grp in tfm.dfs.keys():
+ fc.test_eq(tfm.dfs[grp]['nuclide_id'].to_list(), expected)
+++RemapCB (fn_lut:Callable, col_remap:str, col_src:str, + dest_grps:list[str]|str=['seawater', 'biota', 'sediment', + 'suspended-matter'], default_value:Any=-1)
Generic MARIS remapping callback.
++ | Type | +Default | +Details | +
---|---|---|---|
fn_lut | +Callable | ++ | Function that returns the lookup table dictionary | +
col_remap | +str | ++ | Name of the column to remap | +
col_src | +str | ++ | Name of the column with the source values | +
dest_grps | +list[str] | str | +[‘seawater’, ‘biota’, ‘sediment’, ‘suspended-matter’] | +List of destination groups | +
default_value | +Any | +-1 | +Default value for unmatched entries | +
class RemapCB(Callback):
+ "Generic MARIS remapping callback."
+ def __init__(self,
+ fn_lut: Callable, # Function that returns the lookup table dictionary
+ col_remap: str, # Name of the column to remap
+ col_src: str, # Name of the column with the source values
+ dest_grps: list[str]|str=grp_names(), # List of destination groups
+ default_value: Any = -1 # Default value for unmatched entries
+ ):
+ fc.store_attr()
+ self.lut = None
+ if isinstance(dest_grps, str): self.dest_grps = [dest_grps]
+ self.__doc__ = f"Remap values from '{col_src}' to '{col_remap}' for groups: {', '.join(dest_grps)}."
+
+ def __call__(self, tfm):
+ self.lut = self.fn_lut()
+ for grp in self.dest_grps:
+ if grp in tfm.dfs:
+ self._remap_group(tfm.dfs[grp])
+
+ def _remap_group(self, df: pd.DataFrame):
+ df[self.col_remap] = df[self.col_src].apply(self._remap_value)
+
+ def _remap_value(self, value: str) -> Any:
+ value = value.strip() if isinstance(value, str) else value
+ match = self.lut.get(value, Match(self.default_value, None, None, None))
+ if isinstance(match, Match):
+ if match.matched_id == self.default_value:
+ print(f"Unmatched value: {value}")
+ return match.matched_id
+ else:
+ return match
+++LowerStripNameCB (col_src:str, col_dst:str=None, + fn_transform:Callable=<function <lambda>>)
Convert values to lowercase and strip any trailing spaces.
++ | Type | +Default | +Details | +
---|---|---|---|
col_src | +str | ++ | Source column name e.g. ‘Nuclide’ | +
col_dst | +str | +None | +Destination column name | +
fn_transform | +Callable | +Transformation function | +
class LowerStripNameCB(Callback):
+ "Convert values to lowercase and strip any trailing spaces."
+ def __init__(self,
+ col_src: str, # Source column name e.g. 'Nuclide'
+ col_dst: str=None, # Destination column name
+ fn_transform: Callable=lambda x: x.lower().strip() # Transformation function
+ ):
+ fc.store_attr()
+ self.__doc__ = f"Convert values from '{col_src}' to lowercase, strip spaces, and store in '{col_dst}'."
+ if not col_dst: self.col_dst = col_src
+
+ def _safe_transform(self, value):
+ "Ensure value is not NA and apply transformation function."
+ return value if pd.isna(value) else self.fn_transform(str(value))
+
+ def __call__(self, tfm):
+ for key in tfm.dfs.keys():
+ tfm.dfs[key][self.col_dst] = tfm.dfs[key][self.col_src].apply(self._safe_transform)
Let’s test the callback:
+dfs = {'seawater': pd.DataFrame({'Nuclide': ['CS137', '226RA']})}
+
+tfm = Transformer(dfs, cbs=[LowerStripNameCB(col_src='Nuclide', col_dst='NUCLIDE')])
+fc.test_eq(tfm()['seawater']['NUCLIDE'].to_list(), ['cs137', '226ra'])
+
+
+tfm = Transformer(dfs, cbs=[LowerStripNameCB(col_src='Nuclide')])
+fc.test_eq(tfm()['seawater']['Nuclide'].to_list(), ['cs137', '226ra'])
The point is when (semi-automatic) remapping names generally:
++++RemoveAllNAValuesCB (cols_to_check:Dict[str,str])
Remove rows with all NA values.
++ | Type | +Details | +
---|---|---|
cols_to_check | +Dict | +A dictionary with the sample type as key and the column name to check as value | +
class RemoveAllNAValuesCB(Callback):
+ "Remove rows with all NA values."
+ def __init__(self,
+ cols_to_check: Dict[str, str] # A dictionary with the sample type as key and the column name to check as value
+ ):
+ fc.store_attr()
+
+ def __call__(self, tfm):
+ for k in tfm.dfs.keys():
+ col_to_check = self.cols_to_check[k]
+ mask = tfm.dfs[k][col_to_check].isnull().all(axis=1)
+ tfm.dfs[k] = tfm.dfs[k][~mask]
Many data providers use a long format (e.g lat, lon, radionuclide, value, unc, ...
) to store their data. When encoding as netCDF
, it is often required to use a wide format (e.g lat, lon, nuclide1_value, nuclide1_unc, nuclide2_value, nuclide2_unc, ...
). The class ReshapeLongToWide
is designed to perform this transformation.
+++ReshapeLongToWide (columns:List[str]=['nuclide'], + values:List[str]=['value'], num_fill_value:int=-999, + str_fill_value='STR FILL VALUE')
Convert data from long to wide with renamed columns.
++ | Type | +Default | +Details | +
---|---|---|---|
columns | +List | +[‘nuclide’] | +Columns to use as index | +
values | +List | +[‘value’] | +Columns to use as values | +
num_fill_value | +int | +-999 | +Fill value for numeric columns | +
str_fill_value | +str | +STR FILL VALUE | ++ |
class ReshapeLongToWide(Callback):
+ "Convert data from long to wide with renamed columns."
+ def __init__(self,
+ columns: List[str]=['nuclide'], # Columns to use as index
+ values: List[str]=['value'], # Columns to use as values
+ num_fill_value: int=-999, # Fill value for numeric columns
+ str_fill_value='STR FILL VALUE'
+ ):
+ fc.store_attr()
+ self.derived_cols = self._get_derived_cols()
+
+ def _get_derived_cols(self):
+ "Retrieve all possible derived vars (e.g 'unc', 'dl', ...) from configs."
+ return [value['name'] for value in cdl_cfg()['vars']['suffixes'].values()]
+
+ def renamed_cols(self, cols):
+ "Flatten columns name."
+ return [inner if outer == "value" else f'{inner}{outer}' if inner else outer
+ for outer, inner in cols]
+
+ def _get_unique_fill_value(self, df, idx):
+ "Get a unique fill value for NaN replacement."
+ fill_value = self.num_fill_value
+ while (df[idx] == fill_value).any().any():
+ fill_value -= 1
+ return fill_value
+
+ def _fill_nan_values(self, df, idx):
+ "Fill NaN values in index columns."
+ num_fill_value = self._get_unique_fill_value(df, idx)
+ for col in idx:
+ fill_value = num_fill_value if pd.api.types.is_numeric_dtype(df[col]) else self.str_fill_value
+ df[col] = df[col].fillna(fill_value)
+ return df, num_fill_value
+
+ def pivot(self, df):
+ derived_coi = [col for col in self.derived_cols if col in df.columns]
+ # In past implementation we added an index column before pivoting
+ # TO BE REMOVED
+ # making all rows (compound_idx) unique.
+ # df.index.name = 'org_index'
+ # df = df.reset_index()
+ idx = list(set(df.columns) - set(self.columns + derived_coi + self.values))
+
+ df, num_fill_value = self._fill_nan_values(df, idx)
+
+ pivot_df = df.pivot_table(index=idx,
+ columns=self.columns,
+ values=self.values + derived_coi,
+ fill_value=np.nan,
+ aggfunc=lambda x: x).reset_index()
+
+ pivot_df[idx] = pivot_df[idx].replace({self.str_fill_value: np.nan, num_fill_value: np.nan})
+ pivot_df = self.set_index(pivot_df)
+ return pivot_df
+
+ def set_index(self, df):
+ "Set the index of the dataframe."
+ # TODO: Consider implementing a universal unique index
+ # by hashing the compound index columns (lat, lon, time, depth, etc.)
+ df.index.name = 'org_index'
+ return df
+
+ def __call__(self, tfm):
+ for grp in tfm.dfs.keys():
+ tfm.dfs[grp] = self.pivot(tfm.dfs[grp])
+ tfm.dfs[grp].columns = self.renamed_cols(tfm.dfs[grp].columns)
Example of usage:
+compound_idx
(in our case made of lon
, lat
, time
, depth
, …) are uniquedfs_test = {'seawater': pd.DataFrame({
+ 'compound_idx': ['a', 'b', 'c', 'd'],
+ 'nuclide': ['cs137', 'cs137', 'pu239_240_tot', 'pu239_240_tot'],
+ 'value': [1, 2, 3, 4],
+ '_unc': [0.1, 0.2, 0.3, 0.4]})}
+
+tfm = Transformer(dfs_test, cbs=[ReshapeLongToWide()])
+tfm()['seawater']
+ | compound_idx | +cs137_unc | +pu239_240_tot_unc | +cs137 | +pu239_240_tot | +
---|---|---|---|---|---|
org_index | ++ | + | + | + | + |
0 | +a | +0.1 | +NaN | +1.0 | +NaN | +
1 | +b | +0.2 | +NaN | +2.0 | +NaN | +
2 | +c | +NaN | +0.3 | +NaN | +3.0 | +
3 | +d | +NaN | +0.4 | +NaN | +4.0 | +
dfs_test = {'seawater': pd.DataFrame({
+ 'compound_idx': ['a', 'a', 'c', 'd'],
+ 'nuclide': ['cs137', 'cs134', 'pu239_240_tot', 'pu239_240_tot'],
+ 'value': [1, 2, 3, 4],
+ '_unc': [0.1, 0.2, 0.3, 0.4]})}
+
+tfm = Transformer(dfs_test, cbs=[ReshapeLongToWide()])
+tfm()['seawater']
+ | compound_idx | +cs134_unc | +cs137_unc | +pu239_240_tot_unc | +cs134 | +cs137 | +pu239_240_tot | +
---|---|---|---|---|---|---|---|
org_index | ++ | + | + | + | + | + | + |
0 | +a | +0.2 | +0.1 | +NaN | +2.0 | +1.0 | +NaN | +
1 | +c | +NaN | +NaN | +0.3 | +NaN | +NaN | +3.0 | +
2 | +d | +NaN | +NaN | +0.4 | +NaN | +NaN | +4.0 | +
+++CompareDfsAndTfmCB (dfs:Dict[str,pandas.core.frame.DataFrame])
Create a dataframe of dropped data. Data included in the dfs
not in the tfm
.
+ | Type | +Details | +
---|---|---|
dfs | +Dict | +Original dataframes | +
class CompareDfsAndTfmCB(Callback):
+ "Create a dataframe of dropped data. Data included in the `dfs` not in the `tfm`."
+ def __init__(self,
+ dfs: Dict[str, pd.DataFrame] # Original dataframes
+ ):
+ fc.store_attr()
+
+ def __call__(self, tfm: Transformer) -> None:
+ self._initialize_tfm_attributes(tfm)
+ for grp in tfm.dfs.keys():
+ dropped_df = self._get_dropped_data(grp, tfm)
+ tfm.dfs_dropped[grp] = dropped_df
+ tfm.compare_stats[grp] = self._compute_stats(grp, tfm)
+
+ def _initialize_tfm_attributes(self, tfm: Transformer) -> None:
+ tfm.dfs_dropped = {}
+ tfm.compare_stats = {}
+
+ def _get_dropped_data(self,
+ grp: str, # The group key
+ tfm: Transformer # The transformation object containing `dfs`
+ ) -> pd.DataFrame: # Dataframe with dropped rows
+ "Get the data that is present in `dfs` but not in `tfm.dfs`."
+ index_diff = self.dfs[grp].index.difference(tfm.dfs[grp].index)
+ return self.dfs[grp].loc[index_diff]
+
+ def _compute_stats(self,
+ grp: str, # The group key
+ tfm: Transformer # The transformation object containing `dfs`
+ ) -> Dict[str, int]: # Dictionary with comparison statistics
+ "Compute comparison statistics between `dfs` and `tfm.dfs`."
+ return {
+ 'Number of rows in dfs': len(self.dfs[grp].index),
+ 'Number of rows in tfm.dfs': len(tfm.dfs[grp].index),
+ 'Number of dropped rows': len(tfm.dfs_dropped[grp].index),
+ 'Number of rows in tfm.dfs + Number of dropped rows': len(tfm.dfs[grp].index) + len(tfm.dfs_dropped[grp].index)
+ }
CompareDfsAndTfmCB
compares the original dataframes to the transformed dataframe. A dictionary of dataframes, tfm.dfs_dropped
, is created to include the data present in the original dataset but absent from the transformed data. tfm.compare_stats
provides a quick overview of the number of rows in both the original dataframes and the transformed dataframe.
For instance:
+dfs_test = {
+ 'seawater': pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}),
+ 'sediment': pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}),
+}
+
+class TestTfmCB(Callback):
+ def __call__(self, tfm):
+ for key in tfm.dfs.keys():
+ df = tfm.dfs[key]
+ drop_idxs = [0, 1] if key == 'seawater' else [0]
+ df.drop(drop_idxs, inplace=True)
+
+tfm = Transformer(dfs_test, cbs=[
+ TestTfmCB(),
+ CompareDfsAndTfmCB(dfs_test)], inplace=False)
+
+print(tfm())
+
+fc.test_eq(tfm.compare_stats['seawater']['Number of dropped rows'], 2)
+fc.test_eq(tfm.compare_stats['sediment']['Number of dropped rows'], 1)
{'seawater': a b
+2 3 6, 'sediment': a b
+1 2 5
+2 3 6}
+These callbacks are used to transform the time variable according to netCDF CF standards. For instance, the EncodeTimeCB
callback is used to encode the time variable as an integer representing seconds since a reference date as specified in configs.ipynb
CONFIGS_CDL
dictionary.
+++EncodeTimeCB (cfg:dict, verbose:bool=False)
Encode time as int
representing seconds since xxx.
+ | Type | +Default | +Details | +
---|---|---|---|
cfg | +dict | ++ | Configuration dictionary | +
verbose | +bool | +False | +Whether to print the number of invalid time entries | +
class EncodeTimeCB(Callback):
+ "Encode time as `int` representing seconds since xxx."
+ def __init__(self,
+ cfg: dict, # Configuration dictionary
+ verbose: bool=False # Whether to print the number of invalid time entries
+ ):
+ fc.store_attr()
+
+ def __call__(self, tfm):
+ def format_time(x):
+ return date2num(x, units=self.cfg['units']['time'])
+
+ for k in tfm.dfs.keys():
+ # If invalid time entries.
+ if tfm.dfs[k]['time'].isna().any():
+ if self.verbose:
+ invalid_time_df=tfm.dfs[k][tfm.dfs[k]['time'].isna()]
+ print (f'{len(invalid_time_df.index)} of {len(tfm.dfs[k].index)} entries for `time` are invalid for {k}.')
+ # Filter nan values
+ tfm.dfs[k] = tfm.dfs[k][tfm.dfs[k]['time'].notna()]
+
+ tfm.dfs[k]['time'] = tfm.dfs[k]['time'].apply(format_time)
.toml
configuration files copied under /home/.marisco
folder and associated utilities function. These .toml
files can be then adapted to your specific needs if required.
+ +++base_path ()
Return the path to the .marisco
folder under your home directory.
By default, we create a folder named .marisco
under your home directory that will receive all configuration files as defined in BASE_PATH
:
CONFIGS = {
+ 'gh': {
+ 'owner': 'franckalbinet',
+ 'repo': 'marisco'
+ },
+ 'names': {
+ 'nc_template': 'maris-template.nc'
+ },
+ 'dirs': {
+ 'lut': str(base_path() / 'lut'), # Look-up tables
+ 'cache': str(base_path() / 'cache'), # Cache (e.f WoRMS species)
+ 'tmp': str(base_path() / 'tmp')
+ },
+ 'paths': {
+ 'luts': 'nbs/files/lut'
+ },
+ 'units': {
+ 'time': 'seconds since 1970-01-01 00:00:00.0'
+ },
+ 'zotero': {
+ 'api_key': os.getenv('ZOTERO_API_KEY'),
+ 'lib_id': '2432820'
+ }
+}
The CONFIGS
dictionary defines general settings:
key | +Value | +Description | +
---|---|---|
dirs/lut |
+/Users/franckalbinet/.marisco/lut |
+Location & name of the directory receiving lookup tables. | +
dirs/cache |
+/Users/franckalbinet/.marisco/cache |
+Location & name of the directory receiving cache files such as WoRMS species retrieved. | +
dirs/tmp |
+/Users/franckalbinet/.marisco/tmp |
+Location & name of temporary files. | +
gh/owner |
+franckalbinet |
+GitHub account owner. | +
gh/repo |
+marisco |
+GitHub user used to download specific files (e.g lookup tables) during installation. | +
names/nc_template |
+maris-template.nc |
+Name of the MARIS NetCDF4 template. | +
paths_luts |
+nbs/files/lut |
+GitHub repository directory name containing lookup tables. | +
units_time |
+seconds since 1970-01-01 00:00:00.0 |
+Reference date and time used for NetCDF time encoding. | +
zotero/api_key |
+your-zotero-api-key |
+Zotero API key (“ZOTERO_API_KEY” environment variable). | +
zotero/lib_id |
+2432820 |
+Zotero library ID. | +
The main CONFIGS_CDL
dictionary, used to generate a NetCDF CDL (Common Data Language) .toml
file. This file is then used to generate a template MARIS netcdf file. For further details refers to the configs.ipynb
file.
Below, the vars/defaults section printed:
++++cfg ()
Return the configuration as a dictionary.
++++nuc_lut_path ()
Return the path to the nuclide lookup table.
++++lut_path ()
Return the path to the lookup tables directory.
++++cache_path ()
Return the path to the cache directory.
+CONFIGS_CDL = {
+ 'placeholder': '_to_be_filled_in_',
+ 'grps': {
+ 'sea': {
+ 'name': 'seawater',
+ 'id': 1
+ },
+ 'bio': {
+ 'name': 'biota',
+ 'id': 2
+ },
+ 'sed': {
+ 'name': 'sediment',
+ 'id': 3
+ },
+ 'sus': {
+ 'name': 'suspended-matter',
+ 'id': 4
+ }
+ },
+ 'global_attrs': {
+ # Do not update keys. Only values if required
+ 'id': '', # zotero?
+ 'title': '',
+ 'summary': '',
+ 'keywords': '',
+ 'keywords_vocabulary': 'GCMD Science Keywords',
+ 'keywords_vocabulary_url': 'https://gcmd.earthdata.nasa.gov/static/kms/',
+ 'record': '',
+ 'featureType': '',
+ 'cdm_data_type': '',
+
+ # Conventions
+ 'Conventions': 'CF-1.10 ACDD-1.3',
+
+ # Publisher [ACDD1.3]
+ 'publisher_name': 'Paul MCGINNITY, Iolanda OSVATH, Florence DESCROIX-COMANDUCCI',
+ 'publisher_email': 'p.mc-ginnity@iaea.org, i.osvath@iaea.org, F.Descroix-Comanducci@iaea.org',
+ 'publisher_url': 'https://maris.iaea.org',
+ 'publisher_institution': 'International Atomic Energy Agency - IAEA',
+
+ # Creator info [ACDD1.3]
+ 'creator_name': '',
+ 'institution': '',
+ 'metadata_link': '',
+ 'creator_email': '',
+ 'creator_url': '',
+ 'references': '',
+ 'license': ' '.join(['Without prejudice to the applicable Terms and Conditions',
+ '(https://nucleus.iaea.org/Pages/Others/Disclaimer.aspx),',
+ 'I hereby agree that any use of the data will contain appropriate',
+ 'acknowledgement of the data source(s) and the IAEA Marine',
+ 'Radioactivity Information System (MARIS).']),
+ 'comment': '',
+ # Dataset info & coordinates [ACDD1.3]
+ #'project': '', # Network long name
+ #'platform': '', # Should be a long / full name
+ 'geospatial_lat_min': '',
+ 'geospatial_lon_min': '',
+ 'geospatial_lat_max': '',
+ 'geospatial_lon_max': '',
+ 'geospatial_vertical_min': '',
+ 'geospatial_vertical_max': '',
+ 'geospatial_bounds': '', # wkt representation
+ 'geospatial_bounds_crs': 'EPSG:4326',
+
+ # Time information
+ 'time_coverage_start': '',
+ 'time_coverage_end': '',
+ #'time_coverage_resolution': '',
+ 'local_time_zone': '',
+ 'date_created': '',
+ 'date_modified': '',
+ #
+ # -- Additional metadata (custom to MARIS)
+ #
+ 'publisher_postprocess_logs': ''
+ },
+ 'dim': {
+ 'name': 'sample',
+ 'attrs': {
+ 'long_name': 'Sample ID of measurement'
+ },
+ 'dtype': 'u8'
+ },
+ 'vars': {
+ 'defaults': {
+ 'lon': {
+ 'name': 'lon',
+ 'attrs': {
+ 'long_name': 'Measurement longitude',
+ 'standard_name': 'longitude',
+ 'units': 'degrees_north',
+ 'axis': 'Y',
+ '_CoordinateAxisType': 'Lon'
+ },
+ 'dtype': 'f4'
+ },
+ 'lat': {
+ 'name': 'lat',
+ 'attrs': {
+ 'long_name': 'Measurement latitude',
+ 'standard_name': 'latitude',
+ 'units': 'degrees_east',
+ 'axis': 'X',
+ '_CoordinateAxisType': 'Lat'
+ },
+ 'dtype': 'f4'
+ },
+ 'smp_depth': {
+ 'name': 'smp_depth',
+ 'attrs': {
+ 'long_name': 'Sample depth below seal level',
+ 'standard_name': 'sample_depth_below_sea_floor',
+ 'units': 'm',
+ 'axis': 'Z'
+ },
+ 'dtype': 'f4'
+ },
+ 'tot_depth': {
+ 'name': 'tot_depth',
+ 'attrs': {
+ 'long_name': 'Total depth below seal level',
+ 'standard_name': 'total_depth_below_sea_floor',
+ 'units': 'm',
+ 'axis': 'Z'
+ },
+ 'dtype': 'f4'
+ },
+ 'time': {
+ 'name': 'time',
+ 'attrs': {
+ 'long_name': 'Time of measurement',
+ 'standard_name': 'time',
+ 'units': 'seconds since 1970-01-01 00:00:00.0',
+ 'time_origin': '1970-01-01 00:00:00',
+ 'time_zone': 'UTC',
+ 'abbreviation': 'Date/Time',
+ 'axis': 'T',
+ 'calendar': 'gregorian'
+ },
+ 'dtype': 'u8',
+ },
+ 'area': {
+ 'name': 'area',
+ 'attrs': {
+ 'long_name': 'Marine area/region id',
+ 'standard_name': 'area_id'
+ },
+ 'dtype': 'area_t'
+ },
+ },
+ 'bio': {
+ 'bio_group': {
+ 'name': 'bio_group',
+ 'attrs': {
+ 'long_name': 'Biota group',
+ 'standard_name': 'biota_group_tbd'
+ },
+ 'dtype': 'bio_group_t'
+ },
+ 'species': {
+ 'name': 'species',
+ 'attrs': {
+ 'long_name': 'Species',
+ 'standard_name': 'species'
+ },
+ 'dtype': 'species_t'
+ },
+ 'body_part': {
+ 'name': 'body_part',
+ 'attrs': {
+ 'long_name': 'Body part',
+ 'standard_name': 'body_part_tbd'
+ },
+ 'dtype': 'body_part_t'
+ }
+ },
+ 'sed': {
+ 'sed_type': {
+ 'name': 'sed_type',
+ 'attrs': {
+ 'long_name': 'Sediment type',
+ 'standard_name': 'sediment_type_tbd'
+ },
+ 'dtype': 'sed_type_t'
+ }
+ },
+ 'suffixes': {
+ 'uncertainty': {
+ 'name': '_unc',
+ 'attrs': {
+ 'long_name': ' uncertainty',
+ 'standard_name': '_uncertainty'
+ },
+ 'dtype': 'f4'
+ },
+ 'detection_limit': {
+ 'name': '_dl',
+ 'attrs': {
+ 'long_name': ' detection limit',
+ 'standard_name': '_detection_limit'
+ },
+ 'dtype': 'dl_t'
+ },
+ 'volume': {
+ 'name': '_vol',
+ 'attrs': {
+ 'long_name': ' volume',
+ 'standard_name': '_volume'
+ },
+ 'dtype': 'f4'
+ },
+ 'salinity': {
+ 'name': '_sal',
+ 'attrs': {
+ 'long_name': ' salinity',
+ 'standard_name': '_sal'
+ },
+ 'dtype': 'f4'
+ },
+ 'temperature': {
+ 'name': '_temp',
+ 'attrs': {
+ 'long_name': ' temperature',
+ 'standard_name': '_temp'
+ },
+ 'dtype': 'f4'
+ },
+ 'filtered': {
+ 'name': '_filt',
+ 'attrs': {
+ 'long_name': ' filtered',
+ 'standard_name': '_filtered'
+ },
+ 'dtype': 'filt_t'
+ },
+ 'counting_method': {
+ 'name': '_counmet',
+ 'attrs': {
+ 'long_name': ' counting method',
+ 'standard_name': '_counting_method'
+ },
+ 'dtype': 'counmet_t'
+ },
+ 'sampling_method': {
+ 'name': '_sampmet',
+ 'attrs': {
+ 'long_name': ' sampling method',
+ 'standard_name': '_sampling_method'
+ },
+ 'dtype': 'sampmet_t'
+ },
+ 'preparation_method': {
+ 'name': '_prepmet',
+ 'attrs': {
+ 'long_name': ' preparation method',
+ 'standard_name': '_preparation_method'
+ },
+ 'dtype': 'prepmet_t'
+ },
+ 'unit': {
+ 'name': '_unit',
+ 'attrs': {
+ 'long_name': ' unit',
+ 'standard_name': '_unit'
+ },
+ 'dtype': 'unit_t'
+ }
+ }
+ },
+ 'enums': [
+ {
+ 'name': 'area_t',
+ 'fname': 'dbo_area.xlsx',
+ 'key': 'displayName',
+ 'value':'areaId'
+ },
+ {
+ 'name': 'bio_group_t',
+ 'fname': 'dbo_biogroup.xlsx',
+ 'key': 'biogroup',
+ 'value':'biogroup_id'
+ },
+ {
+ 'name': 'body_part_t',
+ 'fname': 'dbo_bodypar.xlsx',
+ 'key': 'bodypar',
+ 'value':'bodypar_id'
+ },
+ {
+ 'name': 'species_t',
+ 'fname': 'dbo_species_cleaned.xlsx',
+ 'key': 'species',
+ 'value':'species_id'
+ },
+ {
+ 'name': 'sed_type_t',
+ 'fname': 'dbo_sedtype.xlsx',
+ 'key': 'sedtype',
+ 'value':'sedtype_id'
+ },
+ {
+ 'name': 'unit_t',
+ 'fname': 'dbo_unit.xlsx',
+ 'key': 'unit_sanitized',
+ 'value':'unit_id'
+ },
+ {
+ 'name': 'dl_t',
+ 'fname': 'dbo_detectlimit.xlsx',
+ 'key': 'name_sanitized',
+ 'value':'id'
+ },
+ {
+ 'name': 'filt_t',
+ 'fname': 'dbo_filtered.xlsx',
+ 'key': 'name',
+ 'value':'id'
+ },
+ {
+ 'name': 'counmet_t',
+ 'fname': 'dbo_counmet.xlsx',
+ 'key': 'counmet',
+ 'value':'counmet_id'
+ },
+ {
+ 'name': 'sampmet_t',
+ 'fname': 'dbo_sampmet.xlsx',
+ 'key': 'sampmet',
+ 'value':'sampmet_id'
+ },
+ {
+ 'name': 'prepmet_t',
+ 'fname': 'dbo_prepmet.xlsx',
+ 'key': 'prepmet',
+ 'value':'prepmet_id'
+ }
+ ]
+}
{ 'area': { 'attrs': { 'long_name': 'Marine area/region id',
+ 'standard_name': 'area_id'},
+ 'dtype': 'area_t',
+ 'name': 'area'},
+ 'lat': { 'attrs': { '_CoordinateAxisType': 'Lat',
+ 'axis': 'X',
+ 'long_name': 'Measurement latitude',
+ 'standard_name': 'latitude',
+ 'units': 'degrees_east'},
+ 'dtype': 'f4',
+ 'name': 'lat'},
+ 'lon': { 'attrs': { '_CoordinateAxisType': 'Lon',
+ 'axis': 'Y',
+ 'long_name': 'Measurement longitude',
+ 'standard_name': 'longitude',
+ 'units': 'degrees_north'},
+ 'dtype': 'f4',
+ 'name': 'lon'},
+ 'smp_depth': { 'attrs': { 'axis': 'Z',
+ 'long_name': 'Sample depth below seal level',
+ 'standard_name': 'sample_depth_below_sea_floor',
+ 'units': 'm'},
+ 'dtype': 'f4',
+ 'name': 'smp_depth'},
+ 'time': { 'attrs': { 'abbreviation': 'Date/Time',
+ 'axis': 'T',
+ 'calendar': 'gregorian',
+ 'long_name': 'Time of measurement',
+ 'standard_name': 'time',
+ 'time_origin': '1970-01-01 00:00:00',
+ 'time_zone': 'UTC',
+ 'units': 'seconds since 1970-01-01 00:00:00.0'},
+ 'dtype': 'u8',
+ 'name': 'time'},
+ 'tot_depth': { 'attrs': { 'axis': 'Z',
+ 'long_name': 'Total depth below seal level',
+ 'standard_name': 'total_depth_below_sea_floor',
+ 'units': 'm'},
+ 'dtype': 'f4',
+ 'name': 'tot_depth'}}
+++cdl_cfg ()
Return the CDL (Common Data Language) configuration as a dictionary.
++++grp_names ()
Return the group names as defined in cdl.toml
.
+++species_lut_path ()
Return the path to the species lookup table.
++++bodyparts_lut_path ()
Return the path to the body parts lookup table.
++++biogroup_lut_path ()
Return the path to the biota group lookup table.
++++sediments_lut_path ()
Return the path to the sediment type lookup table.
++++unit_lut_path ()
Return the path to the unit lookup table.
++++detection_limit_lut_path ()
Return the path to the detection limit lookup table.
++++filtered_lut_path ()
Return the path to the filtered lookup table.
++++area_lut_path ()
Return the path to the area lookup table.
+ ++++name2grp (name:str, cdl:dict)
+ | Type | +Details | +
---|---|---|
name | +str | +Group name | +
cdl | +dict | +CDL configuration | +
Example:
+ ++++nc_tpl_name ()
Return the name of the MARIS NetCDF template as defined in configs.toml
+++nc_tpl_path ()
Return the path of the MARIS NetCDF template as defined in configs.toml
Enumeration types are used to avoid using strings as NetCDF4 variable values. Instead, enumeration types (lookup tables) such as {'Crustaceans': 2, 'Echinoderms': 3, ...}
are prepended to the NetCDF file template and associated ids (integers) are used as values.
+++sanitize (s:str|float)
*Sanitize dictionary key to comply with NetCDF enumeration type:
+(
, )
, .
, /
, -
+ | Type | +Details | +
---|---|---|
s | +str | float | +String or float to sanitize | +
Returns | +str | float | +Sanitized string or original float | +
def sanitize(
+ s: str|float # String or float to sanitize
+ ) -> str|float: # Sanitized string or original float
+ """
+ Sanitize dictionary key to comply with NetCDF enumeration type:
+
+ - Remove `(`, `)`, `.`, `/`, `-`
+ - Strip the string
+ - Return original value if it's not a string (e.g., NaN)
+ """
+ if isinstance(s, str):
+ s = re.sub(r'[().]', '', s)
+ return re.sub(r'[/-]', ' ', s).strip()
+ elif pd.isna(s): # This covers np.nan, None, and pandas NaT
+ return s
+ else:
+ return str(s).strip()
For example:
+NetCDF4 enumeration type seems to not accept keys containing non alphanumeric characters like parentheses, dots, slash, … As a result, MARIS lookup table needs to be sanitized.
++++get_lut (src_dir:str, fname:str, key:str, value:str, + do_sanitize:bool=True, reverse:bool=False)
Convert MARIS db lookup table excel file to dictionary {'name': id, ...}
or {id: name, ...}
if reverse
is True.
+ | Type | +Default | +Details | +
---|---|---|---|
src_dir | +str | ++ | Directory containing lookup tables | +
fname | +str | ++ | Excel file lookup table name | +
key | +str | ++ | Excel file column name to be used as dict keys | +
value | +str | ++ | Excel file column name to be used as dict values | +
do_sanitize | +bool | +True | +Sanitization required? | +
reverse | +bool | +False | +Reverse lookup table (value, key) | +
Returns | +dict | ++ | MARIS lookup table (key, value) | +
def get_lut(
+ src_dir: str, # Directory containing lookup tables
+ fname: str, # Excel file lookup table name
+ key: str, # Excel file column name to be used as dict keys
+ value: str, # Excel file column name to be used as dict values
+ do_sanitize: bool=True, # Sanitization required?
+ reverse: bool=False # Reverse lookup table (value, key)
+ ) -> dict: # MARIS lookup table (key, value)
+ "Convert MARIS db lookup table excel file to dictionary `{'name': id, ...}` or `{id: name, ...}` if `reverse` is True."
+ fname = Path(src_dir) / fname
+ df = pd.read_excel(fname, usecols=[key, value]).dropna(subset=value)
+ df[value] = df[value].astype('int')
+ df = df.set_index(key)
+ lut = df[value].to_dict()
+
+ if do_sanitize:
+ lut = {sanitize(k): v for k, v in lut.items()}
+
+ lut = {try_int(k): try_int(v) for k, v in lut.items()}
+ return {v: k for k, v in lut.items()} if reverse else lut
For example:
+lut_src_dir = './files/lut'
+get_lut(lut_src_dir, 'dbo_biogroup.xlsx', key='biogroup', value='biogroup_id', reverse=False)
{'Not applicable': -1,
+ 'Not available': 0,
+ 'Birds': 1,
+ 'Crustaceans': 2,
+ 'Echinoderms': 3,
+ 'Fish': 4,
+ 'Mammals': 5,
+ 'Molluscs': 6,
+ 'Others': 7,
+ 'Plankton': 8,
+ 'Polychaete worms': 9,
+ 'Reptile': 10,
+ 'Seaweeds and plants': 11,
+ 'Cephalopods': 12,
+ 'Gastropods': 13,
+ 'Bivalves': 14}
++++Enums (lut_src_dir:str, cdl_enums:dict)
Return dictionaries of MARIS NetCDF’s enumeration types.
++ | Type | +Details | +
---|---|---|
lut_src_dir | +str | +Directory containing lookup tables | +
cdl_enums | +dict | +CDL configuration enumeration types | +
class Enums():
+ "Return dictionaries of MARIS NetCDF's enumeration types."
+ def __init__(self,
+ lut_src_dir:str, # Directory containing lookup tables
+ cdl_enums:dict # CDL configuration enumeration types
+ ):
+ fc.store_attr()
+ self.types = self.lookup()
+
+ def filter(self, name, values):
+ return {name: id for name, id in self.types[name].items() if id in values}
+
+ def lookup(self):
+ types = {}
+ for enum in self.cdl_enums:
+ name, fname, key, value = enum.values()
+ lut = get_lut(self.lut_src_dir, fname, key=key, value=value)
+ types[name] = lut
+ return types
{'Not applicable': -1,
+ 'Not available': 0,
+ 'Detected value': 1,
+ 'Detection limit': 2,
+ 'Not detected': 3,
+ 'Derived': 4}
+{'Not applicable': -1,
+ 'NOT AVAILABLE': 0,
+ 'Bq per m3': 1,
+ 'Bq per m2': 2,
+ 'Bq per kg': 3,
+ 'Bq per kgd': 4,
+ 'Bq per kgw': 5,
+ 'kg per kg': 6,
+ 'TU': 7,
+ 'DELTA per mill': 8,
+ 'atom per kg': 9,
+ 'atom per kgd': 10,
+ 'atom per kgw': 11,
+ 'atom per l': 12,
+ 'Bq per kgC': 13}
++++get_enum_dicts (lut_src_dir:str, cdl_enums:dict, **kwargs)
Return a dict of NetCDF enumeration types.
++ | Type | +Details | +
---|---|---|
lut_src_dir | +str | +Directory containing lookup tables | +
cdl_enums | +dict | +CDL configuration enumeration types | +
kwargs | ++ | + |
def get_enum_dicts(
+ lut_src_dir:str, # Directory containing lookup tables
+ cdl_enums:dict, # CDL configuration enumeration types
+ **kwargs # Additional arguments
+ ):
+ "Return a dict of NetCDF enumeration types."
+ enum_types = {}
+ for enum in cdl_enums:
+ name, fname, key, value = enum.values()
+ lut = get_lut(lut_src_dir, fname, key=key, value=value, **kwargs)
+ enum_types[name] = lut
+
+ return enum_types
For example:
+lut_src_dir_test = './files/lut'
+cdl_enums_test = read_toml('./files/cdl.toml')['enums']
+
+enums = get_enum_dicts(lut_src_dir=lut_src_dir_test,
+ cdl_enums=cdl_enums_test)
+enums.keys()
dict_keys(['area_t', 'bio_group_t', 'body_part_t', 'species_t', 'sed_type_t', 'unit_t', 'dl_t', 'filt_t', 'counmet_t', 'sampmet_t', 'prepmet_t'])
++++write_toml (fname, cfg)
Write a TOML file from a dictionary.
++++read_toml (fname)
Read a TOML file into a dictionary.
+ + + ++++GlobAttrsFeeder (dfs:dict, cbs:list=[], logs:list=[])
Produce NetCDF global attributes as specified by the callbacks.
++ | Type | +Default | +Details | +
---|---|---|---|
dfs | +dict | ++ | Dictionary of NetCDF group DataFrames | +
cbs | +list | +[] | +Callbacks | +
logs | +list | +[] | +List of preprocessing steps taken | +
class GlobAttrsFeeder:
+ "Produce NetCDF global attributes as specified by the callbacks."
+ def __init__(self,
+ dfs:dict, # Dictionary of NetCDF group DataFrames
+ cbs:list=[], # Callbacks
+ logs:list=[] # List of preprocessing steps taken
+ ):
+ fc.store_attr()
+ self.attrs = {}
+
+ def callback(self):
+ run_cbs(self.cbs, self)
+
+ def __call__(self):
+ self.callback()
+ return self.attrs
+++BboxCB ()
Compute dataset geographical bounding box
+class BboxCB(Callback):
+ "Compute dataset geographical bounding box"
+ def __call__(self, obj):
+ bbox = get_bbox(pd.concat(obj.dfs))
+ lon_min, lon_max, lat_min, lat_max = [str(bound) for bound in bbox.bounds]
+ obj.attrs.update({
+ 'geospatial_lat_min': lat_min,
+ 'geospatial_lat_max': lat_max,
+ 'geospatial_lon_min': lon_min,
+ 'geospatial_lon_max': lon_max,
+ 'geospatial_bounds': bbox.wkt})
+++DepthRangeCB (depth_col='depth')
Compute depth values range
+class DepthRangeCB(Callback):
+ "Compute depth values range"
+ def __init__(self, depth_col='depth'): fc.store_attr()
+ def __call__(self, obj):
+ depths = pd.concat(obj.dfs).get(self.depth_col, default=pd.Series([]))
+ if not depths.empty:
+ max_depth, min_depth = depths.max(), depths.min()
+ obj.attrs.update({
+ 'geospatial_vertical_max': '0' if min_depth == 0 else str(-min_depth),
+ 'geospatial_vertical_min': str(-max_depth)})
+++TimeRangeCB (cfg)
Compute time values range
+class TimeRangeCB(Callback):
+ "Compute time values range"
+ def __init__(self, cfg): fc.store_attr()
+ def __call__(self, obj):
+ time = pd.concat(obj.dfs)['time']
+ start, end = [num2date(t, units=self.cfg['units']['time']).isoformat()
+ for t in (time.min(), time.max())]
+ obj.attrs.update({
+ 'time_coverage_start': start,
+ 'time_coverage_end': end})
+++ZoteroItem (item_id, cfg)
Initialize self. See help(type(self)) for accurate signature.
+class ZoteroItem:
+ def __init__(self, item_id, cfg):
+ self.cfg = cfg
+ self.item = self.getItem(item_id)
+
+ def exist(self): return self.item != None
+
+ def getItem(self, item_id):
+ zot = zotero.Zotero(self.cfg['lib_id'], 'group', self.cfg['api_key'])
+ try:
+ return zot.item(item_id)
+ except zotero_errors.ResourceNotFound:
+ print(f'Item {item_id} does not exist in Zotero library')
+ return None
+
+ def title(self):
+ return self.item['data']['title']
+
+ def summary(self):
+ return self.item['data']['abstractNote']
+
+ def creator_name(self):
+ # creators = [f'{c["creatorType"]}: {c["name"]}' for c in self.item['data']['creators']]
+ # return '; '.join(creators)
+ return json.dumps(self.item['data']['creators'])
+
+ def __repr__(self):
+ return json.dumps(self.item, indent=4)
+++ZoteroCB (itemId, cfg)
Retrieve Zotero metadata.
+# TBD: put it in callback module
+class ZoteroCB(Callback):
+ "Retrieve Zotero metadata."
+ def __init__(self, itemId, cfg): fc.store_attr()
+ def __call__(self, obj):
+ item = ZoteroItem(self.itemId, self.cfg['zotero'])
+ if item.exist():
+ for attr in ['title', 'summary', 'creator_name']:
+ obj.attrs[attr] = getattr(item, attr)()
from marisco.configs import cfg
+
+GlobAttrsFeeder(None, cbs=[
+ ZoteroCB('26VMZZ2Q', cfg=cfg())
+ ])()
{'title': 'Environmental database - Helsinki Commission Monitoring of Radioactive Substances',
+ 'summary': 'MORS Environment database has been used to collate data resulting from monitoring of environmental radioactivity in the Baltic Sea based on HELCOM Recommendation 26/3.\n\nThe database is structured according to HELCOM Guidelines on Monitoring of Radioactive Substances (https://www.helcom.fi/wp-content/uploads/2019/08/Guidelines-for-Monitoring-of-Radioactive-Substances.pdf), which specifies reporting format, database structure, data types and obligatory parameters used for reporting data under Recommendation 26/3.\n\nThe database is updated and quality assured annually by HELCOM MORS EG.',
+ 'creator_name': '[{"creatorType": "author", "name": "HELCOM MORS"}]'}
+{'title': 'Radioactivity Monitoring of the Irish Marine Environment 1991 and 1992',
+ 'summary': '',
+ 'creator_name': '[{"creatorType": "author", "firstName": "A.", "lastName": "McGarry"}, {"creatorType": "author", "firstName": "S.", "lastName": "Lyons"}, {"creatorType": "author", "firstName": "C.", "lastName": "McEnri"}, {"creatorType": "author", "firstName": "T.", "lastName": "Ryan"}, {"creatorType": "author", "firstName": "M.", "lastName": "O\'Colmain"}, {"creatorType": "author", "firstName": "J.D.", "lastName": "Cunningham"}]'}
+Item x does not exist in Zotero library
+{}
++++KeyValuePairCB (k, v)
Base class for callbacks.
+ +kw = ['oceanography', 'Earth Science > Oceans > Ocean Chemistry> Radionuclides',
+ 'Earth Science > Human Dimensions > Environmental Impacts > Nuclear Radiation Exposure',
+ 'Earth Science > Oceans > Ocean Chemistry > Ocean Tracers, Earth Science > Oceans > Marine Sediments',
+ 'Earth Science > Oceans > Ocean Chemistry, Earth Science > Oceans > Sea Ice > Isotopes',
+ 'Earth Science > Oceans > Water Quality > Ocean Contaminants',
+ 'Earth Science > Biological Classification > Animals/Vertebrates > Fish',
+ 'Earth Science > Biosphere > Ecosystems > Marine Ecosystems',
+ 'Earth Science > Biological Classification > Animals/Invertebrates > Mollusks',
+ 'Earth Science > Biological Classification > Animals/Invertebrates > Arthropods > Crustaceans',
+ 'Earth Science > Biological Classification > Plants > Macroalgae (Seaweeds)']
feed = GlobAttrsFeeder(dfs, cbs=[
+ BboxCB(),
+ DepthRangeCB(),
+ TimeRangeCB(cfg=CONFIGS),
+ ZoteroCB('26VMZZ2Q', cfg=CONFIGS),
+ KeyValuePairCB('keywords', ', '.join(kw))
+ ])
+
+attrs = feed(); attrs
{'geospatial_lat_min': '29.05',
+ 'geospatial_lat_max': '65.35',
+ 'geospatial_lon_min': '9.6333',
+ 'geospatial_lon_max': '54.0',
+ 'geospatial_bounds': 'POLYGON ((9.6333 54, 29.05 54, 29.05 65.35, 9.6333 65.35, 9.6333 54))',
+ 'geospatial_vertical_max': '0',
+ 'geospatial_vertical_min': '-248.0',
+ 'time_coverage_start': '1984-01-10T00:00:00',
+ 'time_coverage_end': '1987-06-28T00:00:00',
+ 'title': 'Environmental database - Helsinki Commission Monitoring of Radioactive Substances',
+ 'summary': 'MORS Environment database has been used to collate data resulting from monitoring of environmental radioactivity in the Baltic Sea based on HELCOM Recommendation 26/3.\n\nThe database is structured according to HELCOM Guidelines on Monitoring of Radioactive Substances (https://www.helcom.fi/wp-content/uploads/2019/08/Guidelines-for-Monitoring-of-Radioactive-Substances.pdf), which specifies reporting format, database structure, data types and obligatory parameters used for reporting data under Recommendation 26/3.\n\nThe database is updated and quality assured annually by HELCOM MORS EG.',
+ 'creator_name': '[{"creatorType": "author", "name": "HELCOM MORS"}]',
+ 'keywords': 'oceanography, Earth Science > Oceans > Ocean Chemistry> Radionuclides, Earth Science > Human Dimensions > Environmental Impacts > Nuclear Radiation Exposure, Earth Science > Oceans > Ocean Chemistry > Ocean Tracers, Earth Science > Oceans > Marine Sediments, Earth Science > Oceans > Ocean Chemistry, Earth Science > Oceans > Sea Ice > Isotopes, Earth Science > Oceans > Water Quality > Ocean Contaminants, Earth Science > Biological Classification > Animals/Vertebrates > Fish, Earth Science > Biosphere > Ecosystems > Marine Ecosystems, Earth Science > Biological Classification > Animals/Invertebrates > Mollusks, Earth Science > Biological Classification > Animals/Invertebrates > Arthropods > Crustaceans, Earth Science > Biological Classification > Plants > Macroalgae (Seaweeds)'}
+.toml
config file.
+ Generate a NetCDF4 template from the configurable CDL.toml
file, itself generated in /api/configs.ipynb
.
+++NCTemplater (cdl:Dict, nuclide_vars_fname:str, tpl_fname:str, + enum_dicts:Dict, verbose=False)
MARIS NetCDF template generator.
++ | Type | +Default | +Details | +
---|---|---|---|
cdl | +Dict | ++ | “Pseudo CDL” (.toml ) |
+
nuclide_vars_fname | +str | ++ | File name and path of MARIS nuclide lookup table containing variable names | +
tpl_fname | +str | ++ | File name and path of NetCDF4 file to be generated | +
enum_dicts | +Dict | ++ | MARIS NetCDF enumeration types | +
verbose | +bool | +False | ++ |
class NCTemplater:
+ "MARIS NetCDF template generator."
+ def __init__(self,
+ cdl:Dict, # "Pseudo CDL" (`.toml`)
+ nuclide_vars_fname:str, # File name and path of MARIS nuclide lookup table containing variable names
+ tpl_fname:str, # File name and path of NetCDF4 file to be generated
+ enum_dicts:Dict, # MARIS NetCDF enumeration types
+ verbose=False
+ ):
+ fc.store_attr()
+ self.dim = cdl['dim']
+ self.enum_types = {}
For example, provided the configuration cdl.toml
below, the templater gets access, among others, to its dim
definiton section:
cdl_test = read_toml('./files/cdl.toml')
+lut_src_dir_test = './files/lut'
+cdl_enums_test = read_toml('./files/cdl.toml')['enums']
+enums = get_enum_dicts(lut_src_dir=lut_src_dir_test,
+ cdl_enums=cdl_enums_test)
+
+
+templater = NCTemplater(cdl=cdl_test,
+ nuclide_vars_fname='./files/lut/dbo_nuclide.xlsx',
+ tpl_fname='./files/nc/test.nc',
+ enum_dicts=enums)
+
+expected = {'name': 'sample',
+ 'dtype': 'u8',
+ 'attrs': {'long_name': 'Sample ID of measurement'}
+ }
+
+fc.test_eq(templater.dim, expected)
+++NCTemplater.nuclide_vars (col_varnames:str='nc_name', + col_stdnames:str='nusymbol', dtype:str='f4')
Return the name of the radionuclide variables analysed.
++ | Type | +Default | +Details | +
---|---|---|---|
col_varnames | +str | +nc_name | +Column name in the Excel lookup file containing the NetCDF variable names | +
col_stdnames | +str | +nusymbol | +Column name Excel lookup file containing the NetCDF standard names | +
dtype | +str | +f4 | +Default data type | +
Returns | +list | ++ | List of nuclide variables (including their names and attributes) | +
@patch
+def nuclide_vars(
+ self:NCTemplater,
+ col_varnames:str='nc_name', # Column name in the Excel lookup file containing the NetCDF variable names
+ col_stdnames:str='nusymbol', # Column name Excel lookup file containing the NetCDF standard names
+ dtype:str='f4', # Default data type
+ ) -> list[dict]: # List of nuclide variables (including their names and attributes)
+ "Return the name of the radionuclide variables analysed."
+ df = pd.read_excel(self.nuclide_vars_fname, index_col=0)
+
+ df = df[(df.nuclide != 'NOT AVAILABLE') & (df.nuclide != 'NOT APPLICABLE')]
+ # df = df[df.nuclide.isin(['NOT AVAILABLE', 'NOT APPLICABLE'])]
+
+ return [
+ {
+ 'name': n,
+ 'dtype': dtype,
+ 'attrs': {
+ 'long_name': f"{nuclide.capitalize()} {massnb}",
+ 'standard_name': sn,
+ }
+ }
+ for n, nuclide, massnb, sn in zip(
+ df[col_varnames],
+ df['nuclide'].str.capitalize(),
+ df['massnb'].astype(int),
+ df[col_stdnames],
+ )
+ ]
For example, to retrieve the NetCDF nuclide names and associated attributes:
+templater = NCTemplater(cdl=cdl_test,
+ nuclide_vars_fname='./files/lut/dbo_nuclide.xlsx',
+ tpl_fname='./files/nc/test.nc',
+ enum_dicts=enums)
+expected = [
+ {'name': 'h3', 'attrs': {'long_name': 'Tritium 3', 'standard_name': '3H'}, 'dtype': 'f4'},
+ {'name': 'be7', 'attrs': {'long_name': 'Beryllium 7', 'standard_name': '7Be'}, 'dtype': 'f4'}
+ ]
+
+fc.test_eq(templater.nuclide_vars()[:2], expected)
+++NCTemplater.derive (nuclide:dict, suffix:dict)
Derive NetCDF nuclide-dependent variable names & attributes as defined in CDL.
++ | Type | +Details | +
---|---|---|
nuclide | +dict | +Nuclide variable name and associated netcdf attributes | +
suffix | +dict | +Naming rules as described in CDL (e.g _unc ) |
+
Returns | +dict | +Derived variable name and associated attributes | +
@patch
+def derive(
+ self:NCTemplater,
+ nuclide:dict, # Nuclide variable name and associated netcdf attributes
+ suffix:dict, # Naming rules as described in CDL (e.g `_unc`)
+) -> dict: # Derived variable name and associated attributes
+ "Derive NetCDF nuclide-dependent variable names & attributes as defined in CDL."
+ return {
+ # 'name': nuclide['name'] + '_' + suffix['name'],
+ 'name': nuclide['name'] + suffix['name'],
+ 'dtype': suffix['dtype'], # Using dtype from suffix
+ 'attrs': {key: nuclide['attrs'][key] + suffix['attrs'][key] for key in nuclide['attrs']}
+ }
For example, among others, the cdl.toml
file defines the naming convention on variable names deriving from nuclides (e.g h3_unc
for measurement uncertainty on the h3
nuclide variable).
Here is below the defined Tritium NetCDF variable as specified in the .cdl
file:
{'name': 'h3',
+ 'dtype': 'f4',
+ 'attrs': {'long_name': 'Tritium 3', 'standard_name': '3H'}}
+# Example of suffix defined in the .cdl file
+suffix = {
+ 'name': '_unc',
+ 'attrs': {
+ 'long_name': ' uncertainty',
+ 'standard_name': '_uncertainty'
+ },
+ 'dtype': 'f4'
+ }
+
+# And what we expect
+expected = {
+ 'name': 'h3_unc',
+ 'attrs': {
+ 'long_name': 'Tritium 3 uncertainty',
+ 'standard_name': '3H_uncertainty'
+ },
+ 'dtype': 'f4'
+ }
+
+fc.test_eq(templater.derive(templater.nuclide_vars()[0], suffix=suffix), expected)
+++NCTemplater.create_enum_types ()
Create enumeration types
++++NCTemplater.create_groups ()
Create NetCDF groups
++++NCTemplater.create_variables (grp:netCDF4._netCDF4.Group)
Create variables
++ | Type | +Details | +
---|---|---|
grp | +Group | +NetCDF group | +
+++NCTemplater.create_default_variables (grp:netCDF4._netCDF4.Group)
Create Default variables
++ | Type | +Details | +
---|---|---|
grp | +Group | +NetCDF group | +
+++NCTemplater.create_group_specific_variables (grp:netCDF4._netCDF4.Group)
Create group specific variables
++ | Type | +Details | +
---|---|---|
grp | +Group | +NetCDF group | +
+++NCTemplater.create_analyte_variables (grp:netCDF4._netCDF4.Group)
Create analyte variables and dependent one as uncertainty, detection limit, …
++ | Type | +Details | +
---|---|---|
grp | +Group | +NetCDF group | +
@patch
+def create_analyte_variables(self:NCTemplater,
+ grp:netCDF4.Group, # NetCDF group
+ ):
+ "Create analyte variables and dependent one as uncertainty, detection limit, ..."
+ for var in self.nuclide_vars():
+ self.create_variable(grp, var)
+ for v in self.cdl['vars']['suffixes'].values():
+ self.create_variable(grp, self.derive(var, v))
+++NCTemplater.create_variable (grp:netCDF4._netCDF4.Group, var:Dict)
Create NetCDF variable with proper types (standard and enums)
++ | Type | +Details | +
---|---|---|
grp | +Group | +NetCDF group | +
var | +Dict | +Variable specificiation dict with name , dtype and attrs keys |
+
@patch
+def create_variable(self:NCTemplater,
+ grp:netCDF4.Group, # NetCDF group
+ var:Dict, # Variable specificiation dict with `name`, `dtype` and `attrs` keys
+ ):
+ "Create NetCDF variable with proper types (standard and enums)"
+ name, dtype, attrs = var.values()
+ nc_var = grp.createVariable(name,
+ self.enum_types.get(dtype) or dtype,
+ self.dim['name'])
+ nc_var.setncatts(attrs)
+++NCTemplater.generate ()
Generate CDL
+@patch
+def generate(self:NCTemplater):
+ "Generate CDL"
+ # with NetCDFWriter(self.tpl_fname) as self.nc:
+ with Dataset(self.tpl_fname, 'w', format='NETCDF4') as self.nc:
+ self.nc.setncatts(self.cdl['global_attrs'])
+ self.create_enum_types()
+ self.nc.createDimension(self.dim['name'], None)
+ self.create_groups()
So in summary, to produce a template MARIS NetCDF:
+templater = NCTemplater(cdl=cdl_test,
+ nuclide_vars_fname='./files/lut/dbo_nuclide.xlsx',
+ tpl_fname='./files/nc/template-test.nc',
+ enum_dicts=enums,
+ verbose=True)
+
+templater.generate()
Creating area_t enumeration type
+Creating bio_group_t enumeration type
+Creating body_part_t enumeration type
+Creating species_t enumeration type
+Creating sed_type_t enumeration type
+Creating unit_t enumeration type
+Creating dl_t enumeration type
+Creating filt_t enumeration type
+Creating counmet_t enumeration type
+Creating sampmet_t enumeration type
+Creating prepmet_t enumeration type
+NetCDF
, csv
, … formats.
+ +++NetCDFEncoder (dfs:dict[pandas.core.frame.DataFrame], src_fname:str, + dest_fname:str, global_attrs:Dict, enums_xtra:Dict={}, + verbose:bool=False)
MARIS NetCDF encoder.
++ | Type | +Default | +Details | +
---|---|---|---|
dfs | +dict | ++ | dict of Dataframes to encode with group name as key {‘sediment’: df_sed, …} | +
src_fname | +str | ++ | File name and path to the MARIS CDL template | +
dest_fname | +str | ++ | Name of output file to produce | +
global_attrs | +Dict | ++ | Global attributes | +
enums_xtra | +Dict | +{} | +Enumeration types to overwrite | +
verbose | +bool | +False | +Print currently written NetCDF group and variable names | +
class NetCDFEncoder:
+ "MARIS NetCDF encoder."
+ def __init__(self,
+ dfs:dict[pd.DataFrame], # dict of Dataframes to encode with group name as key {'sediment': df_sed, ...}
+ src_fname:str, # File name and path to the MARIS CDL template
+ dest_fname:str, # Name of output file to produce
+ global_attrs:Dict, # Global attributes
+ enums_xtra:Dict={}, # Enumeration types to overwrite
+ verbose:bool=False, # Print currently written NetCDF group and variable names
+ ):
+ store_attr()
+ self.enum_types = {}
df_seawater = pd.DataFrame({
+ 'sample': [0, 1, 5],
+ 'lon': [141, 142, 143],
+ 'lat': [37.3, 38.3, 39.3],
+ 'time': [1234, 1235, 1236],
+ 'i131': [1, 1.5, 2],
+ 'i131_dl': [0, 1, 2],
+ 'i131_unit': [1, 1, 2],
+ 'species': [134, 136, 137]
+ })
+
+df_biota = pd.DataFrame({
+ 'sample': [0, 1],
+ 'lon': [141, 142],
+ 'lat': [37.3, 38.3],
+ 'time': [1234, 1235],
+ 'i131': [1, 1.5],
+ 'i131_dl': [0, 1],
+ 'i131_unit': [1, 1],
+ 'species': [134, 136]
+ })
+
+dfs = {'seawater': df_seawater, 'biota': df_biota}
+attrs = {'id': '123', 'title': 'Test title', 'summary': 'Summary test'}
+src = './files/nc/template-test.nc'
+dest = './files/nc/encoding-test.nc'
+enums_xtra = {
+ 'species_t': {'Aristeus antennatus': 134, 'Apostichopus': 136}
+ }
+++NetCDFEncoder.copy_global_attributes ()
Update NetCDF template global attributes as specified by global_attrs
argument.
+++NetCDFEncoder.copy_dimensions ()
+++NetCDFEncoder.process_groups ()
+++NetCDFEncoder.process_group (group_name, df)
+++NetCDFEncoder.copy_variables (group_name, df, group_dest)
+++NetCDFEncoder.copy_variable (var_name, var_src, df, group_dest)
@patch
+def copy_variable(self:NetCDFEncoder, var_name, var_src, df, group_dest):
+ dtype_name = var_src.datatype.name
+ enums_src = self.src.enumtypes
+ if self.verbose:
+ print(80*'-')
+ print(f'Group: {group_dest.name}, Variable: {var_name}')
+ # If the type of the var is an enum (meaning present in the template src) then create it
+ if dtype_name in enums_src: self.copy_enum_type(dtype_name)
+ self._create_and_copy_variable(var_name, var_src, df, group_dest, dtype_name)
+ self.copy_variable_attributes(var_name, var_src, group_dest)
+++NetCDFEncoder.sanitize_if_enum_and_nan (values, fill_value=-1)
@patch
+def _create_and_copy_variable(self:NetCDFEncoder, var_name, var_src, df, group_dest, dtype_name):
+ variable_type = self.enum_types.get(dtype_name, var_src.datatype)
+ # Use the group_dest dimensions
+ group_dest.createVariable(var_name, variable_type, group_dest.dimensions, compression='zlib', complevel=9)
+ isNotEnum = type(variable_type) != netCDF4._netCDF4.EnumType
+ values = df[var_name].values
+ group_dest[var_name][:] = values if isNotEnum else self.sanitize_if_enum_and_nan(values)
+++NetCDFEncoder.copy_enum_type (dtype_name)
@patch
+def copy_enum_type(self:NetCDFEncoder, dtype_name):
+ # if enum type not already created
+ if dtype_name not in self.enum_types:
+ enum_info = self.src.enumtypes[dtype_name]
+ # If a subset of an enum is defined in enums_xtra (typically for the lengthy species_t)
+ if enum_info.name in self.enums_xtra:
+ # add "not applicable"
+ enum_info.enum_dict = self.enums_xtra[enum_info.name]
+ enum_info.enum_dict['Not applicable'] = -1 # TBD
+ self.enum_types[dtype_name] = self.dest.createEnumType(enum_info.dtype,
+ enum_info.name,
+ enum_info.enum_dict)
+++NetCDFEncoder.copy_variable_attributes (var_name, var_src, group_dest)
# DEPRECATED
+@patch
+def cast_verbose_rf(self:NetCDFEncoder,
+ df,
+ col):
+ """
+ Try to cast df column to numeric type:
+ - Silently coerce to nan if not possible
+ - But log when it failed
+ """
+ n_before = sum(df.reset_index()[col].notna())
+ df_after = pd.to_numeric(df.reset_index()[col], errors='coerce', downcast=None)
+ n_after = sum(df_after.notna())
+ if n_before != n_after: print(f'Failed to convert type of {col} in {n_before - n_after} occurences')
+ return df_after
+++NetCDFEncoder.encode ()
Encode MARIS NetCDF based on template and dataframes.
+@patch
+def encode(self:NetCDFEncoder):
+ "Encode MARIS NetCDF based on template and dataframes."
+ with Dataset(self.src_fname, format='NETCDF4') as self.src, Dataset(self.dest_fname, 'w', format='NETCDF4') as self.dest:
+ self.copy_global_attributes()
+ self.copy_dimensions()
+ self.process_groups()
# Test that correct variables are created in groups
+with Dataset(dest, 'r', format='NETCDF4') as nc:
+ fc.test_eq(nc['biota'].variables.keys(),
+ ['sample', 'lon', 'lat', 'time', 'species', 'i131', 'i131_dl', 'i131_unit'])
+
+ fc.test_eq(nc['seawater'].variables.keys(),
+ ['sample', 'lon', 'lat', 'time', 'i131', 'i131_dl', 'i131_unit'])
# Test that correct variables are created in groups
+with Dataset(dest, 'r', format='NETCDF4') as nc:
+ print(nc.dimensions.items())
+ print(nc['biota'].dimensions.items())
+ print(nc['seawater'].dimensions.items())
dict_items([('sample', <class 'netCDF4._netCDF4.Dimension'> (unlimited): name = 'sample', size = 0)])
+dict_items([('biota', <class 'netCDF4._netCDF4.Dimension'>: name = 'biota', size = 2)])
+dict_items([('seawater', <class 'netCDF4._netCDF4.Dimension'>: name = 'seawater', size = 3)])
++++OpenRefineCsvEncoder (dfs:dict[pandas.core.frame.DataFrame], + dest_fname:str, ref_id=-1, verbose:bool=False)
OpenRefine CSV from NetCDF.
++ | Type | +Default | +Details | +
---|---|---|---|
dfs | +dict | ++ | dict of Dataframes to encode with group name as key {‘sediment’: df_sed, …} | +
dest_fname | +str | ++ | Name of output file to produce | +
ref_id | +int | +-1 | +ref_id to include | +
verbose | +bool | +False | +
class OpenRefineCsvEncoder:
+ "OpenRefine CSV from NetCDF."
+ def __init__(self,
+ dfs:dict[pd.DataFrame], # dict of Dataframes to encode with group name as key {'sediment': df_sed, ...}
+ dest_fname:str, # Name of output file to produce
+ ref_id = -1, # ref_id to include
+ verbose:bool=False, # Print
+ ):
+ store_attr()
+++OpenRefineCsvEncoder.process_groups_to_csv ()
+++OpenRefineCsvEncoder.process_group_to_csv (group_name, df)
+++OpenRefineCsvEncoder.encode ()
Encode OpenRefine CSV based on dataframes from NetCDF.
+We define below useful constants throughout the package.
+ +Abstracting some common operations.
++++get_unique_across_dfs (dfs:dict, col_name:str='NUCLIDE', + as_df:bool=False, include_nchars:bool=False)
Get a list of unique column values across dataframes.
++ | Type | +Default | +Details | +
---|---|---|---|
dfs | +dict | ++ | Dictionary of dataframes | +
col_name | +str | +NUCLIDE | +Column name to extract unique values from | +
as_df | +bool | +False | +Return a DataFrame of unique values | +
include_nchars | +bool | +False | +Add a column with the number of characters in the value | +
Returns | +list | ++ | Returns a list of unique column values across dataframes | +
def get_unique_across_dfs(dfs:dict, # Dictionary of dataframes
+ col_name:str='NUCLIDE', # Column name to extract unique values from
+ as_df:bool=False, # Return a DataFrame of unique values
+ include_nchars:bool=False # Add a column with the number of characters in the value
+ ) -> list: # Returns a list of unique column values across dataframes
+ "Get a list of unique column values across dataframes."
+ unique_values = list(set().union(*(df[col_name].unique() for df in dfs.values() if col_name in df.columns)))
+ if not as_df:
+ return unique_values
+ else:
+ df_uniques = pd.DataFrame(unique_values, columns=['value']).reset_index()
+ if include_nchars: df_uniques['n_chars'] = df_uniques['value'].str.len()
+ return df_uniques
Example of use:
+dfs_test = {'seawater': pd.DataFrame({'NUCLIDE': ['cs137', 'cs134_137_tot', 'cs134_137_tot']}),
+ 'biota': pd.DataFrame({'NUCLIDE': ['cs137', 'cs134', 'cs134_137_tot']}),
+ 'sediment': pd.DataFrame({'NUCLIDE': ['cs134_137_tot', 'cs134_137_tot', 'cs134_137_tot']})}
+
+fc.test_eq(set(get_unique_across_dfs(dfs_test, col_name='NUCLIDE')),
+ set(['cs134', 'cs137', 'cs134_137_tot']))
What if the column name is not in one of the dataframe?
+dfs_test = {'seawater': pd.DataFrame({'NUCLIDE': ['cs137', 'cs134_137_tot', 'cs134_137_tot']}),
+ 'biota': pd.DataFrame({'NUCLIDE': ['cs137', 'cs134', 'cs134_137_tot']}),
+ 'sediment': pd.DataFrame({'NONUCLIDE': ['cs134_137_tot', 'cs134_137_tot', 'cs134_137_tot']})}
+
+fc.test_eq(set(get_unique_across_dfs(dfs_test, col_name='NUCLIDE')),
+ set(['cs134', 'cs137', 'cs134_137_tot']))
+ | index | +value | +n_chars | +
---|---|---|---|
0 | +0 | +cs137 | +5 | +
1 | +1 | +cs134_137_tot | +13 | +
2 | +2 | +cs134 | +5 | +
+++Remapper (provider_lut_df:pandas.core.frame.DataFrame, + maris_lut_fn:<built-infunctioncallable>, maris_col_id:str, + maris_col_name:str, provider_col_to_match:str, + provider_col_key, fname_cache)
Remap a data provider lookup table to a MARIS lookup table using fuzzy matching.
++ | Type | +Details | +
---|---|---|
provider_lut_df | +DataFrame | +Data provider lookup table to be remapped | +
maris_lut_fn | +callable | +Function that returns the MARIS lookup table path | +
maris_col_id | +str | +MARIS lookup table column name for the id | +
maris_col_name | +str | +MARIS lookup table column name for the name | +
provider_col_to_match | +str | +Data provider lookup table column name for the name to match | +
provider_col_key | ++ | Data provider lookup table column name for the key | +
fname_cache | ++ | Cache file name | +
class Remapper():
+ "Remap a data provider lookup table to a MARIS lookup table using fuzzy matching."
+ def __init__(self,
+ provider_lut_df:pd.DataFrame, # Data provider lookup table to be remapped
+ maris_lut_fn:callable, # Function that returns the MARIS lookup table path
+ maris_col_id:str, # MARIS lookup table column name for the id
+ maris_col_name:str, # MARIS lookup table column name for the name
+ provider_col_to_match:str, # Data provider lookup table column name for the name to match
+ provider_col_key, # Data provider lookup table column name for the key
+ fname_cache # Cache file name
+ ):
+ fc.store_attr()
+ self.cache_file = cache_path() / fname_cache
+ self.maris_lut = maris_lut_fn()
+ self.lut = {}
+
+ def generate_lookup_table(self,
+ fixes={}, # Lookup table fixes
+ as_df=True, # Whether to return a DataFrame
+ overwrite=True):
+ "Generate a lookup table from a data provider lookup table to a MARIS lookup table using fuzzy matching."
+ self.fixes = fixes
+ self.as_df = as_df
+ if overwrite or not self.cache_file.exists():
+ self._create_lookup_table()
+ fc.save_pickle(self.cache_file, self.lut)
+ else:
+ self.lut = fc.load_pickle(self.cache_file)
+
+ return self._format_output()
+
+ def _create_lookup_table(self):
+ df = self.provider_lut_df
+ for _, row in tqdm(df.iterrows(), total=len(df), desc="Processing"):
+ self._process_row(row)
+
+ def _process_row(self, row):
+ value_to_match = row[self.provider_col_to_match]
+ if isinstance(value_to_match, str): # Only process if value is a string
+ # If value is in fixes, use the fixed value
+ name_to_match = self.fixes.get(value_to_match, value_to_match)
+
+ result = match_maris_lut(self.maris_lut, name_to_match, self.maris_col_id, self.maris_col_name).iloc[0]
+ match = Match(result[self.maris_col_id], result[self.maris_col_name],
+ value_to_match, result['score'])
+ self.lut[row[self.provider_col_key]] = match
+ else:
+ # Handle non-string values (e.g., NaN)
+ self.lut[row[self.provider_col_key]] = Match(-1, "Unknown", value_to_match, 0)
+
+ def select_match(self, match_score_threshold:int=1):
+ self.lut = {k: v for k, v in self.lut.items() if v.match_score >= match_score_threshold}
+ return self._format_output()
+
+ def _format_output(self):
+ if not self.as_df: return self.lut
+ df_lut = pd.DataFrame.from_dict(self.lut, orient='index',
+ columns=['matched_maris_name', 'source_name', 'match_score'])
+ df_lut.index.name = 'source_key'
+ return df_lut.sort_values(by='match_score', ascending=False)
+++has_valid_varname (var_names:list, cdl_path:str, group=None)
Check that proposed variable names are in MARIS CDL
++ | Type | +Default | +Details | +
---|---|---|---|
var_names | +list | ++ | variable names | +
cdl_path | +str | ++ | Path to MARIS CDL file (point of truth) | +
group | +NoneType | +None | +Check if the variable names is contained in the group | +
def has_valid_varname(
+ var_names:list, # variable names
+ cdl_path:str, # Path to MARIS CDL file (point of truth)
+ group = None, # Check if the variable names is contained in the group
+):
+ "Check that proposed variable names are in MARIS CDL"
+ has_valid = True
+ with Dataset(cdl_path) as nc:
+ cdl_vars={}
+ all_vars=[]
+ # get variable names in CDL
+ for grp in nc.groups.values():
+ # Create a list of var for each group
+ vars = list(grp.variables.keys())
+ cdl_vars[grp.name] = vars
+ all_vars.extend(vars)
+
+ if group != None:
+ allowed_vars= cdl_vars[group]
+ else:
+ # get unique
+ allowed_vars = list(set(all_vars))
+
+ for name in var_names:
+ if name not in allowed_vars:
+ has_valid = False
+ if group != None:
+ print(f'"{name}" variable name not found in group "{group}" of MARIS CDL')
+ else:
+ print(f'"{name}" variable name not found in MARIS CDL')
+ return has_valid
+++get_bbox (df, coord_cols=('lon', 'lat'))
Get the bounding box of a DataFrame.
+'POLYGON ((-10 40, 5 40, 5 50, -10 50, -10 40))'
++++ddmm_to_dd (ddmmmm:float)
+ | Type | +Details | +
---|---|---|
ddmmmm | +float | +Coordinates in degrees/minutes decimal format | +
Returns | +float | +Coordinates in degrees decimal format | +
+++download_file (owner, repo, src_dir, dest_dir, fname)
def download_files_in_folder(owner:str,
+ repo:str,
+ src_dir:str,
+ dest_dir:str
+ ):
+ "Make a GET request to the GitHub API to get the contents of the folder."
+ url = f"https://api.github.com/repos/{owner}/{repo}/contents/{src_dir}"
+ response = requests.get(url)
+
+ if response.status_code == 200:
+ contents = response.json()
+
+ # Iterate over the files and download them
+ for item in contents:
+ if item["type"] == "file":
+ fname = item["name"]
+ download_file(owner, repo, src_dir, dest_dir, fname)
+ else:
+ print(f"Error: {response.status_code}")
+
+def download_file(owner, repo, src_dir, dest_dir, fname):
+ # Make a GET request to the GitHub API to get the raw file contents
+ url = f"https://raw.githubusercontent.com/{owner}/{repo}/master/{src_dir}/{fname}"
+ response = requests.get(url)
+
+ if response.status_code == 200:
+ # Save the file locally
+ with open(Path(dest_dir) / fname, "wb") as file:
+ file.write(response.content)
+ print(f"{fname} downloaded successfully.")
+ else:
+ print(f"Error: {response.status_code}")
+++download_files_in_folder (owner:str, repo:str, src_dir:str, dest_dir:str)
Make a GET request to the GitHub API to get the contents of the folder.
+The World Register of Marine Species (WorMS) is an authoritative classification and catalogue of marine names. It provides a REST API (among others) allowing to “fuzzy” match any species name you might encounter in marine data sources names againt their own database. There are several types of matches as described here.
++++match_worms (name:str)
Lookup name
in WoRMS (fuzzy match).
+ | Type | +Details | +
---|---|---|
name | +str | +Name of species to look up in WoRMS | +
def match_worms(
+ name:str # Name of species to look up in WoRMS
+ ):
+ "Lookup `name` in WoRMS (fuzzy match)."
+ url = 'https://www.marinespecies.org/rest/AphiaRecordsByMatchNames'
+ params = {
+ 'scientificnames[]': [name],
+ 'marine_only': 'true'
+ }
+ headers = {
+ 'accept': 'application/json'
+ }
+
+ response = requests.get(url, params=params, headers=headers)
+
+ # Check if the request was successful (status code 200)
+ if response.status_code == 200:
+ data = response.json()
+ return data
+ else:
+ return -1
For instance:
+[[{'AphiaID': 107083,
+ 'url': 'https://www.marinespecies.org/aphia.php?p=taxdetails&id=107083',
+ 'scientificname': 'Aristeus antennatus',
+ 'authority': '(Risso, 1816)',
+ 'status': 'accepted',
+ 'unacceptreason': None,
+ 'taxonRankID': 220,
+ 'rank': 'Species',
+ 'valid_AphiaID': 107083,
+ 'valid_name': 'Aristeus antennatus',
+ 'valid_authority': '(Risso, 1816)',
+ 'parentNameUsageID': 106807,
+ 'kingdom': 'Animalia',
+ 'phylum': 'Arthropoda',
+ 'class': 'Malacostraca',
+ 'order': 'Decapoda',
+ 'family': 'Aristeidae',
+ 'genus': 'Aristeus',
+ 'citation': 'DecaNet eds. (2024). DecaNet. Aristeus antennatus (Risso, 1816). Accessed through: World Register of Marine Species at: https://www.marinespecies.org/aphia.php?p=taxdetails&id=107083 on 2024-06-10',
+ 'lsid': 'urn:lsid:marinespecies.org:taxname:107083',
+ 'isMarine': 1,
+ 'isBrackish': 0,
+ 'isFreshwater': 0,
+ 'isTerrestrial': 0,
+ 'isExtinct': 0,
+ 'match_type': 'exact',
+ 'modified': '2022-08-24T09:48:14.813Z'}]]
+Using https://jamesturk.github.io/jellyfish fuzzy matching distance metrics.
++++Match (matched_id:int, matched_maris_name:str, source_name:str, + match_score:int)
Match between a data provider name and a MARIS lookup table.
++++match_maris_lut (lut_path:str, data_provider_name:str, maris_id:str, + maris_name:str, dist_fn:collections.abc.Callable=<built- + in function levenshtein_distance>, nresults:int=10)
Fuzzy matching data provider and MARIS lookup tables (e.g biota species, sediments, …).
++ | Type | +Default | +Details | +
---|---|---|---|
lut_path | +str | ++ | Path to MARIS species authoritative species look-up table | +
data_provider_name | +str | ++ | Name of data provider nomenclature item to look up | +
maris_id | +str | ++ | Id of MARIS lookup table nomenclature item to match | +
maris_name | +str | ++ | Name of MARIS lookup table nomenclature item to match | +
dist_fn | +Callable | +levenshtein_distance | +Distance function | +
nresults | +int | +10 | +Maximum number of results to return | +
Returns | +DataFrame | ++ | + |
def match_maris_lut(
+ lut_path: str, # Path to MARIS species authoritative species look-up table
+ data_provider_name: str, # Name of data provider nomenclature item to look up
+ maris_id: str, # Id of MARIS lookup table nomenclature item to match
+ maris_name: str, # Name of MARIS lookup table nomenclature item to match
+ dist_fn: Callable = jf.levenshtein_distance, # Distance function
+ nresults: int = 10 # Maximum number of results to return
+) -> pd.DataFrame:
+ "Fuzzy matching data provider and MARIS lookup tables (e.g biota species, sediments, ...)."
+ df = pd.read_excel(lut_path)
+ df = df.dropna(subset=[maris_name])
+ df = df.astype({maris_id: 'int'})
+ df['score'] = df[maris_name].str.lower().apply(lambda x: dist_fn(data_provider_name.lower(), x))
+ df = df.sort_values(by='score', ascending=True)[:nresults]
+ return df[[maris_id, maris_name, 'score']]
Below an example trying to match the name “PLANKTON” with dbo_species_cleaned.xlsx
MARIS biota species lookup table:
lut_fname = '../files/lut/dbo_species_cleaned.xlsx'
+match_maris_lut(lut_fname, data_provider_name='PLANKTON',
+ maris_id='species_id', maris_name='species')
+ | species_id | +species | +score | +
---|---|---|---|
281 | +280 | +Plankton | +0 | +
696 | +695 | +Zooplankton | +3 | +
633 | +632 | +Palaemon | +4 | +
697 | +696 | +Phytoplankton | +5 | +
812 | +811 | +Chanos | +5 | +
160 | +159 | +Neuston | +5 | +
234 | +233 | +Penaeus | +6 | +
1458 | +1457 | +Lamnidae | +6 | +
1438 | +1437 | +Labrus | +6 | +
1527 | +1526 | +Favites | +6 | +
Below an example trying to match the name “GLACIAL” with dbo_sedtype.xlsx MARIS sediment lookup table:
+lut_fname = '../files/lut/dbo_sedtype.xlsx'
+match_maris_lut(lut_fname, data_provider_name='GLACIAL',
+ maris_id='sedtype_id', maris_name='sedtype')
+ | sedtype_id | +sedtype | +score | +
---|---|---|---|
26 | +25 | +Glacial | +0 | +
3 | +2 | +Gravel | +4 | +
2 | +1 | +Clay | +5 | +
51 | +50 | +Glacial clay | +5 | +
4 | +3 | +Marsh | +6 | +
7 | +6 | +Sand | +6 | +
13 | +12 | +Silt | +6 | +
15 | +14 | +Sludge | +6 | +
27 | +26 | +Soft | +7 | +
52 | +51 | +Soft clay | +7 | +
lut_fname = '../files/lut/dbo_nuclide.xlsx'
+match_maris_lut(lut_fname, data_provider_name='CS-137',
+ maris_id='nuclide_id', maris_name='nc_name')
+ | nuclide_id | +nc_name | +score | +
---|---|---|---|
31 | +33 | +cs137 | +1 | +
30 | +31 | +cs134 | +2 | +
99 | +102 | +cs136 | +2 | +
29 | +30 | +cs127 | +2 | +
111 | +114 | +ce139 | +3 | +
109 | +112 | +sb127 | +3 | +
8 | +7 | +co57 | +4 | +
28 | +29 | +i131 | +4 | +
71 | +74 | +cm243 | +4 | +
90 | +93 | +sn117m | +4 | +
+++get_bbox (df, coord_cols=('lon', 'lat'))
'POLYGON ((-10 40, 5 40, 5 50, -10 50, -10 40))'
++++download_file (owner, repo, src_dir, dest_dir, fname)
def download_files_in_folder(owner:str,
+ repo:str,
+ src_dir:str,
+ dest_dir:str
+ ):
+ "Make a GET request to the GitHub API to get the contents of the folder"
+ url = f"https://api.github.com/repos/{owner}/{repo}/contents/{src_dir}"
+ response = requests.get(url)
+
+ if response.status_code == 200:
+ contents = response.json()
+
+ # Iterate over the files and download them
+ for item in contents:
+ if item["type"] == "file":
+ fname = item["name"]
+ download_file(owner, repo, src_dir, dest_dir, fname)
+ else:
+ print(f"Error: {response.status_code}")
+
+def download_file(owner, repo, src_dir, dest_dir, fname):
+ # Make a GET request to the GitHub API to get the raw file contents
+ url = f"https://raw.githubusercontent.com/{owner}/{repo}/master/{src_dir}/{fname}"
+ response = requests.get(url)
+
+ if response.status_code == 200:
+ # Save the file locally
+ with open(Path(dest_dir) / fname, "wb") as file:
+ file.write(response.content)
+ print(f"{fname} downloaded successfully.")
+ else:
+ print(f"Error: {response.status_code}")
+++download_files_in_folder (owner:str, repo:str, src_dir:str, dest_dir:str)
Make a GET request to the GitHub API to get the contents of the folder
+The World Register of Marine Species (WorMS) is an authoritative classification and catalogue of marine names. It provides a REST API (among others) allowing to “fuzzy” match any species name you might encounter in marine data sources names againt their own database. There are several types of matches as described here.
++++match_worms (name:str)
Lookup name
in WoRMS (fuzzy match)
+ | Type | +Details | +
---|---|---|
name | +str | +Name of species to look up in WoRMS | +
def match_worms(
+ name:str # Name of species to look up in WoRMS
+ ):
+ "Lookup `name` in WoRMS (fuzzy match)"
+ url = 'https://www.marinespecies.org/rest/AphiaRecordsByMatchNames'
+ params = {
+ 'scientificnames[]': [name],
+ 'marine_only': 'true'
+ }
+ headers = {
+ 'accept': 'application/json'
+ }
+
+ response = requests.get(url, params=params, headers=headers)
+
+ # Check if the request was successful (status code 200)
+ if response.status_code == 200:
+ data = response.json()
+ return data
+ else:
+ return -1
For instance:
+[[{'AphiaID': 107083,
+ 'url': 'https://www.marinespecies.org/aphia.php?p=taxdetails&id=107083',
+ 'scientificname': 'Aristeus antennatus',
+ 'authority': '(Risso, 1816)',
+ 'status': 'accepted',
+ 'unacceptreason': None,
+ 'taxonRankID': 220,
+ 'rank': 'Species',
+ 'valid_AphiaID': 107083,
+ 'valid_name': 'Aristeus antennatus',
+ 'valid_authority': '(Risso, 1816)',
+ 'parentNameUsageID': 106807,
+ 'kingdom': 'Animalia',
+ 'phylum': 'Arthropoda',
+ 'class': 'Malacostraca',
+ 'order': 'Decapoda',
+ 'family': 'Aristeidae',
+ 'genus': 'Aristeus',
+ 'citation': 'DecaNet eds. (2024). DecaNet. Aristeus antennatus (Risso, 1816). Accessed through: World Register of Marine Species at: https://www.marinespecies.org/aphia.php?p=taxdetails&id=107083 on 2024-06-10',
+ 'lsid': 'urn:lsid:marinespecies.org:taxname:107083',
+ 'isMarine': 1,
+ 'isBrackish': 0,
+ 'isFreshwater': 0,
+ 'isTerrestrial': 0,
+ 'isExtinct': 0,
+ 'match_type': 'exact',
+ 'modified': '2022-08-24T09:48:14.813Z'}]]
++++test_dfs (dfs1:dict, dfs2:dict)
Compare two dictionaries of DataFrames for equality (also ensuring that columns are in the same order).
++ | Type | +Details | +
---|---|---|
dfs1 | +dict | +First dictionary of DataFrames to compare | +
dfs2 | +dict | +Second dictionary of DataFrames to compare | +
Returns | +None | +It raises an AssertionError if the DataFrames are not equal |
+
def test_dfs(
+ dfs1:dict, # First dictionary of DataFrames to compare
+ dfs2:dict # Second dictionary of DataFrames to compare
+ ) -> None: # It raises an `AssertionError` if the DataFrames are not equal
+ "Compare two dictionaries of DataFrames for equality (also ensuring that columns are in the same order)."
+ for grp in dfs1.keys():
+ df1, df2 = (df.sort_index() for df in (dfs1[grp], dfs2[grp]))
+ fc.test_eq(df1, df2.reindex(columns=df1.columns))
+++main (verbose:bool=False)
Create MARIS NetCDF template, optionally in verbose mode
++ | Type | +Default | +Details | +
---|---|---|---|
verbose | +bool | +False | +Verbose | +
The autoreload extension is already loaded. To reload it, use:
+ %reload_ext autoreload
++++main ()
Create configuration files & download lookup tables
+ + +
The autoreload extension is already loaded. To reload it, use:
+ %reload_ext autoreload
++++import_handler (handler_name, fn_name='encode')
+++main (handler_name:str, src:str, dest:str)
Encode MARIS dataset as NetCDF
++ | Type | +Details | +
---|---|---|
handler_name | +str | +Handler’s name (e.g helcom, …) | +
src | +str | +Path to dataset to encode | +
dest | +str | +Path to converted NetCDF4 | +
++This data pipeline, known as a “handler” in Marisco terminology, is designed to clean, standardize, and encode HELCOM data into
+NetCDF
format. The handler processes raw HELCOM data, applying various transformations and lookups to align it withMARIS
data standards.
Key functions of this handler:
+NetCDF
format compatible with MARIS requirementsThis handler is a crucial component in the Marisco data processing workflow, ensuring HELCOM data is properly integrated into the MARIS database.
+Note: Additionally, an optional encoder (pipeline) is provided below to process data into a .csv
format compatible with the MARIS master database. This feature is maintained for legacy purposes, as data ingestion was previously performed using OpenRefine.
For new MARIS users, please refer to Understanding MARIS Data Formats (NetCDF and Open Refine) for detailed information.
+The present notebook pretends to be an instance of Literate Programming in the sense that it is a narrative that includes code snippets that are interspersed with explanations. When a function or a class needs to be exported in a dedicated python module (in our case marisco/handlers/helcom.py
) the code snippet is added to the module using #| exports
as provided by the wonderful nbdev library.
fname_in: path to the folder containing the HELCOM data in CSV format. The path can be defined as a relative path.
fname_out_nc: path and filename for the NetCDF output.The path can be defined as a relative path.
fname_out_csv: path and filename for the Open Refine csv output.The path can be defined as a relative path.
Zotero key: used to retrieve attributes related to the dataset from Zotero. The MARIS datasets include a library available on Zotero.
ref_id: refers to the location in Archive of the Zotero library.
Helcom MORS (Monitoring of Radioactive Substances in the Baltic Sea) data is provided as a Microsoft Access database. Mdbtools
can be used to convert the tables of the Microsoft Access database to .csv
files on Unix-like OS.
Example steps:
+Install mdbtools via VScode Terminal:
+sudo apt-get -y install mdbtools
Install unzip via VScode Terminal:
+sudo apt-get -y install unzip
In VS Code
terminal (for instance), navigate to the marisco data folder:
cd /home/marisco/downloads/marisco/_data/accdb/mors_19840101_20211231
Unzip MORS_ENVIRONMENT.zip
:
unzip MORS_ENVIRONMENT.zip
Run preprocess.sh
to generate the required data files:
./preprocess.sh MORS_ENVIRONMENT.zip
Content of preprocess.sh
script:
#!/bin/bash
+
+# Example of use: ./preprocess.sh MORS_ENVIRONMENT.zip
+unzip $1
+dbname=$(ls *.accdb)
+mkdir csv
+for table in $(mdb-tables -1 "$dbname"); do
+ echo "Export table $table"
+ mdb-export "$dbname" "$table" > "csv/$table.csv"
+done
Once converted to .csv
files, the data is ready to be loaded into a dictionary of dataframes.
+++load_data (src_dir:str|pathlib.Path, smp_types:list=[('SEA', 'seawater'), + ('SED', 'sediment'), ('BIO', 'biota')])
Load HELCOM data and return the data in a dictionary of dataframes with the dictionary key as the sample type.
++ | Type | +Default | +Details | +
---|---|---|---|
src_dir | +str | pathlib.Path | ++ | The directory where the source CSV files are located | +
smp_types | +list | +[(‘SEA’, ‘seawater’), (‘SED’, ‘sediment’), (‘BIO’, ‘biota’)] | +A list of tuples, each containing the file prefix and the corresponding sample type name | +
Returns | +Dict | ++ | A dictionary with sample types as keys and their corresponding dataframes as values | +
def load_data(src_dir: str|Path, # The directory where the source CSV files are located
+ smp_types: list=default_smp_types # A list of tuples, each containing the file prefix and the corresponding sample type name
+ ) -> Dict[str, pd.DataFrame]: # A dictionary with sample types as keys and their corresponding dataframes as values
+ "Load HELCOM data and return the data in a dictionary of dataframes with the dictionary key as the sample type."
+ src_path = Path(src_dir)
+
+ def load_and_merge(file_prefix: str) -> pd.DataFrame:
+ try:
+ df_meas = pd.read_csv(src_path / f'{file_prefix}02.csv')
+ df_smp = pd.read_csv(src_path / f'{file_prefix}01.csv')
+ return pd.merge(df_meas, df_smp, on='KEY', how='left')
+ except FileNotFoundError as e:
+ print(f"Error loading files for {file_prefix}: {e}")
+ return pd.DataFrame() # Return an empty DataFrame if files are not found
+
+ return {smp_type: load_and_merge(file_prefix) for file_prefix, smp_type in smp_types}
dfs
is a dictionary of dataframes created from the Helcom dataset located at the path fname_in
. The data to be included in each dataframe is sorted by sample type. Each dictionary is defined with a key equal to the sample type.
dfs = load_data(fname_in)
+
+#|eval: false
+dfs = load_data(fname_in)
+print('keys/sample types: ', dfs.keys())
+
+for key in dfs.keys():
+ print(f'{key} columns: ', dfs[key].columns)
keys/sample types: dict_keys(['seawater', 'sediment', 'biota'])
+seawater columns: Index(['KEY', 'NUCLIDE', 'METHOD', '< VALUE_Bq/m³', 'VALUE_Bq/m³', 'ERROR%_m³',
+ 'DATE_OF_ENTRY_x', 'COUNTRY', 'LABORATORY', 'SEQUENCE', 'DATE', 'YEAR',
+ 'MONTH', 'DAY', 'STATION', 'LATITUDE (ddmmmm)', 'LATITUDE (dddddd)',
+ 'LONGITUDE (ddmmmm)', 'LONGITUDE (dddddd)', 'TDEPTH', 'SDEPTH', 'SALIN',
+ 'TTEMP', 'FILT', 'MORS_SUBBASIN', 'HELCOM_SUBBASIN', 'DATE_OF_ENTRY_y'],
+ dtype='object')
+sediment columns: Index(['KEY', 'NUCLIDE', 'METHOD', '< VALUE_Bq/kg', 'VALUE_Bq/kg', 'ERROR%_kg',
+ '< VALUE_Bq/m²', 'VALUE_Bq/m²', 'ERROR%_m²', 'DATE_OF_ENTRY_x',
+ 'COUNTRY', 'LABORATORY', 'SEQUENCE', 'DATE', 'YEAR', 'MONTH', 'DAY',
+ 'STATION', 'LATITUDE (ddmmmm)', 'LATITUDE (dddddd)',
+ 'LONGITUDE (ddmmmm)', 'LONGITUDE (dddddd)', 'DEVICE', 'TDEPTH',
+ 'UPPSLI', 'LOWSLI', 'AREA', 'SEDI', 'OXIC', 'DW%', 'LOI%',
+ 'MORS_SUBBASIN', 'HELCOM_SUBBASIN', 'SUM_LINK', 'DATE_OF_ENTRY_y'],
+ dtype='object')
+biota columns: Index(['KEY', 'NUCLIDE', 'METHOD', '< VALUE_Bq/kg', 'VALUE_Bq/kg', 'BASIS',
+ 'ERROR%', 'NUMBER', 'DATE_OF_ENTRY_x', 'COUNTRY', 'LABORATORY',
+ 'SEQUENCE', 'DATE', 'YEAR', 'MONTH', 'DAY', 'STATION',
+ 'LATITUDE ddmmmm', 'LATITUDE dddddd', 'LONGITUDE ddmmmm',
+ 'LONGITUDE dddddd', 'SDEPTH', 'RUBIN', 'BIOTATYPE', 'TISSUE', 'NO',
+ 'LENGTH', 'WEIGHT', 'DW%', 'LOI%', 'MORS_SUBBASIN', 'HELCOM_SUBBASIN',
+ 'DATE_OF_ENTRY_y'],
+ dtype='object')
+The sample type (seawater
, biota
, sediment
, …) as defined in the configs.ipynb
are encoded group names in NetCDF produced. Addition of sample type ids into individual dataframes is done using the AddSampleTypeIdColumnCB
callback for legacy purposes (i.e. Open Refine output).
dfs = load_data(fname_in)
+tfm = Transformer(dfs, cbs=[AddSampleTypeIdColumnCB(),
+ CompareDfsAndTfmCB(dfs)
+ ])
+
+print(tfm()['seawater'][['KEY', 'samptype_id']].head())
+print(pd.DataFrame.from_dict(tfm.compare_stats) , '\n')
KEY samptype_id
+0 WKRIL2012003 1
+1 WKRIL2012004 1
+2 WKRIL2012005 1
+3 WKRIL2012006 1
+4 WKRIL2012007 1
+ seawater sediment biota
+Number of rows in dfs 21216 39817 15827
+Number of rows in tfm.dfs 21216 39817 15827
+Number of dropped rows 0 0 0
+Number of rows in tfm.dfs + Number of dropped rows 21216 39817 15827
+
+FEEDBACK TO DATA PROVIDER: Some nuclide names contain one or multiple trailing spaces.
+This is demonstrated below for the NUCLIDE
column:
df = get_unique_across_dfs(load_data(fname_in), 'NUCLIDE', as_df=True, include_nchars=True)
+df['stripped_chars'] = df['value'].str.strip().str.replace(' ', '').str.len()
+print(df[df['n_chars'] != df['stripped_chars']])
index value n_chars stripped_chars
+14 14 CS137 9 5
+20 20 SR90 6 4
+31 31 PU238 8 5
+34 34 CS137 6 5
+37 37 K40 8 3
+53 53 SR90 7 4
+54 54 SR90 5 4
+59 59 SR90 8 4
+62 62 CO60 8 4
+69 69 CS134 8 5
+73 73 TC99 7 4
+75 75 AM241 8 5
+91 91 CS137 8 5
+To fix this issue, we use the LowerStripNameCB
callback. For each dataframe in the dictionary of dataframes, it corrects the nuclide name by converting it lowercase, striping any leading or trailing whitespace(s) and ensuring the number comes before letters (e.g. 137cs
).
dfs = load_data(fname_in)
+tfm = Transformer(dfs, cbs=[LowerStripNameCB(col_src='NUCLIDE')])
+
+for key in tfm().keys():
+ print(f'{key} nuclides: ')
+ print(tfm()[key]['NUCLIDE'].unique())
seawater nuclides:
+['cs137' 'sr90' 'h3' 'cs134' 'pu238' 'pu239240' 'am241' 'cm242' 'cm244'
+ 'tc99' 'k40' 'ru103' 'sr89' 'sb125' 'nb95' 'ru106' 'zr95' 'ag110m'
+ 'cm243244' 'ba140' 'ce144' 'u234' 'u238' 'co60' 'pu239' 'pb210' 'po210'
+ 'np237' 'pu240' 'mn54']
+sediment nuclides:
+['ra226' 'cs137' 'ra228' 'k40' 'sr90' 'cs134137' 'cs134' 'pu239240'
+ 'pu238' 'co60' 'ru103' 'ru106' 'sb125' 'ag110m' 'ce144' 'am241' 'be7'
+ 'th228' 'pb210' 'co58' 'mn54' 'zr95' 'ba140' 'po210' 'ra224' 'nb95'
+ 'pu238240' 'pu241' 'pu239' 'eu155' 'ir192' 'th232' 'cd109' 'sb124' 'zn65'
+ 'th234' 'tl208' 'pb212' 'pb214' 'bi214' 'ac228' 'ra223' 'u235' 'bi212']
+biota nuclides:
+['cs134' 'k40' 'co60' 'cs137' 'sr90' 'ag108m' 'mn54' 'co58' 'ag110m'
+ 'zn65' 'sb125' 'pu239240' 'ru106' 'be7' 'ce144' 'pb210' 'po210' 'sb124'
+ 'sr89' 'zr95' 'te129m' 'ru103' 'nb95' 'ce141' 'la140' 'i131' 'ba140'
+ 'pu238' 'u235' 'bi214' 'pb214' 'pb212' 'tl208' 'ac228' 'ra223' 'eu155'
+ 'ra226' 'gd153' 'sn113' 'fe59' 'tc99' 'co57' 'sn117m' 'eu152' 'sc46'
+ 'rb86' 'ra224' 'th232' 'cs134137' 'am241' 'ra228' 'th228' 'k-40' 'cs138'
+ 'cs139' 'cs140' 'cs141' 'cs142' 'cs143' 'cs144' 'cs145' 'cs146']
+We below map nuclide names used by HELCOM to the MARIS standard nuclide names.
+Remapping data provider nomenclatures into MARIS standards is one recurrent operation and is done in a semi-automated manner according to the following pattern:
+As now on, we will use this pattern to remap the HELCOM data provider nomenclatures into MARIS standards and name it for the sake of brevity IMFA (Inspect, Match, Fix, Apply).
+The unique values of the data provider nuclide names. The get_unique_across_dfs
is a utility function allowing to retrieve unique values of a specific column across all dataframes (please remind that we have one dataframe per sample type - biota, …).
dfs = load_data(fname_in)
+tfm = Transformer(dfs, cbs=[LowerStripNameCB(col_src='NUCLIDE')])
+dfs_output = tfm()
+
+get_unique_across_dfs(dfs_output, col_name='NUCLIDE', as_df=True).head(5)
+ | index | +value | +
---|---|---|
0 | +0 | +pu239240 | +
1 | +1 | +cs144 | +
2 | +2 | +cs141 | +
3 | +3 | +cs140 | +
4 | +4 | +sn117m | +
Let’s now create an instance of a fuzzy matching algorithm Remapper
:
And try to match HELCOM to MARIS nuclide names as automatically as possible. The match_score
column allows to assess the results:
Processing: 100%|██████████| 77/77 [00:02<00:00, 32.60it/s]
++ | matched_maris_name | +source_name | +match_score | +
---|---|---|---|
source_key | ++ | + | + |
pu239240 | +pu240 | +pu239240 | +3 | +
pu238240 | +pu240 | +pu238240 | +3 | +
cm243244 | +cm244 | +cm243244 | +3 | +
cs134137 | +cs137 | +cs134137 | +3 | +
cs142 | +ce144 | +cs142 | +2 | +
cs145 | +cs136 | +cs145 | +2 | +
cs143 | +cs127 | +cs143 | +2 | +
cs144 | +ce144 | +cs144 | +1 | +
cs141 | +ce141 | +cs141 | +1 | +
cs140 | +ce140 | +cs140 | +1 | +
cs138 | +cs137 | +cs138 | +1 | +
cs139 | +ce139 | +cs139 | +1 | +
cs146 | +cs136 | +cs146 | +1 | +
k-40 | +k40 | +k-40 | +1 | +
We then manually inspect the remaining unmatched names and create a fixes table to map them to the correct MARIS standards:
+fixes_nuclide_names = {
+ 'cs134137': 'cs134_137_tot',
+ 'cm243244': 'cm243_244_tot',
+ 'pu239240': 'pu239_240_tot',
+ 'pu238240': 'pu238_240_tot',
+ 'cs143': 'cs137',
+ 'cs145': 'cs137',
+ 'cs142': 'cs137',
+ 'cs141': 'cs137',
+ 'cs144': 'cs137',
+ 'k-40': 'k40',
+ 'cs140': 'cs137',
+ 'cs146': 'cs137',
+ 'cs139': 'cs137',
+ 'cs138': 'cs137'
+ }
Let’s try to match again but this time we use the fixes_nuclide_names
to map the nuclide names to the MARIS standards:
remapper.generate_lookup_table(as_df=True, fixes=fixes_nuclide_names)
+fc.test_eq(len(remapper.select_match(match_score_threshold=1)), 0)
Processing: 100%|██████████| 77/77 [00:01<00:00, 47.36it/s]
+Test passes! We can now create a callback RemapNuclideNameCB
to remap the nuclide names. Note that we pass overwrite=False
to the Remapper
constructor to now use the cached version.
+++RemapNuclideNameCB (fn_lut:Callable)
Remap data provider nuclide names to MARIS nuclide names.
++ | Type | +Details | +
---|---|---|
fn_lut | +Callable | +Function that returns the lookup table dictionary | +
# Create a lookup table for nuclide names
+lut_nuclides = lambda df: Remapper(provider_lut_df=df,
+ maris_lut_fn=nuc_lut_path,
+ maris_col_id='nuclide_id',
+ maris_col_name='nc_name',
+ provider_col_to_match='value',
+ provider_col_key='value',
+ fname_cache='nuclides_helcom.pkl').generate_lookup_table(fixes=fixes_nuclide_names,
+ as_df=False, overwrite=False)
class RemapNuclideNameCB(Callback):
+ "Remap data provider nuclide names to MARIS nuclide names."
+ def __init__(self,
+ fn_lut: Callable # Function that returns the lookup table dictionary
+ ):
+ fc.store_attr()
+
+ def __call__(self, tfm: Transformer):
+ df_uniques = get_unique_across_dfs(tfm.dfs, col_name='NUCLIDE', as_df=True)
+ lut = {k: v.matched_maris_name for k, v in self.fn_lut(df_uniques).items()}
+ for k in tfm.dfs.keys():
+ tfm.dfs[k]['NUCLIDE'] = tfm.dfs[k]['NUCLIDE'].replace(lut)
Let’s see it in action, along with the RemapRdnNameCB
callback:
dfs = load_data(fname_in)
+tfm = Transformer(dfs, cbs=[LowerStripNameCB(col_src='NUCLIDE'),
+ RemapNuclideNameCB(lut_nuclides)
+ ])
+dfs_out = tfm()
+
+# For instance
+dfs_out['biota'].NUCLIDE.unique()
array(['cs134', 'k40', 'co60', 'cs137', 'sr90', 'ag108m', 'mn54', 'co58',
+ 'ag110m', 'zn65', 'sb125', 'pu239_240_tot', 'ru106', 'be7',
+ 'ce144', 'pb210', 'po210', 'sb124', 'sr89', 'zr95', 'te129m',
+ 'ru103', 'nb95', 'ce141', 'la140', 'i131', 'ba140', 'pu238',
+ 'u235', 'bi214', 'pb214', 'pb212', 'tl208', 'ac228', 'ra223',
+ 'eu155', 'ra226', 'gd153', 'sn113', 'fe59', 'tc99', 'co57',
+ 'sn117m', 'eu152', 'sc46', 'rb86', 'ra224', 'th232',
+ 'cs134_137_tot', 'am241', 'ra228', 'th228'], dtype=object)
+The nuclide_id
column is added to the dataframe for legacy reasons (again Open Refine output).
dfs = load_data(fname_in)
+tfm = Transformer(dfs, cbs=[LowerStripNameCB(col_src='NUCLIDE'),
+ RemapNuclideNameCB(lut_nuclides),
+ AddNuclideIdColumnCB(col_value='NUCLIDE')
+ ])
+dfs_out = tfm()
+
+# For instance
+dfs_out['biota'][['NUCLIDE', 'nuclide_id']]
+ | NUCLIDE | +nuclide_id | +
---|---|---|
0 | +cs134 | +31 | +
1 | +k40 | +4 | +
2 | +co60 | +9 | +
3 | +cs137 | +33 | +
4 | +cs134 | +31 | +
... | +... | +... | +
15822 | +k40 | +4 | +
15823 | +cs137 | +33 | +
15824 | +be7 | +2 | +
15825 | +k40 | +4 | +
15826 | +cs137 | +33 | +
15827 rows × 2 columns
+FEEDBACK TO DATA PROVIDER: Time/date is provide in the DATE
, YEAR
, MONTH
, DAY
columns. Note that the DATE
contains missing values as indicated below. When missing, we fallback on the YEAR
, MONTH
, DAY
columns. Note also that sometimes DAY
and MONTH
contain 0. In this case we systematically set them to 1.
dfs = load_data(fname_in)
+for key in dfs.keys():
+ print(f'{key} DATE null values: ', dfs[key]['DATE'].isna().sum())
seawater DATE null values: 502
+sediment DATE null values: 741
+biota DATE null values: 72
++++ParseTimeCB ()
Parse and standardize time information in the dataframe.
+class ParseTimeCB(Callback):
+ "Parse and standardize time information in the dataframe."
+ def __call__(self, tfm: Transformer):
+ for df in tfm.dfs.values():
+ self._process_dates(df)
+ self._define_beg_period(df)
+
+ def _process_dates(self, df: pd.DataFrame) -> None:
+ "Process and correct date and time information in the DataFrame."
+ df['time'] = self._parse_date(df)
+ self._handle_missing_dates(df)
+ self._fill_missing_time(df)
+
+ def _parse_date(self, df: pd.DataFrame) -> pd.Series:
+ "Parse the DATE column if present."
+ return pd.to_datetime(df['DATE'], format='%m/%d/%y %H:%M:%S', errors='coerce')
+
+ def _handle_missing_dates(self, df: pd.DataFrame):
+ "Handle cases where DAY or MONTH is 0 or missing."
+ df.loc[df["DAY"] == 0, "DAY"] = 1
+ df.loc[df["MONTH"] == 0, "MONTH"] = 1
+
+ missing_day_month = (df["DAY"].isna()) & (df["MONTH"].isna()) & (df["YEAR"].notna())
+ df.loc[missing_day_month, ["DAY", "MONTH"]] = 1
+
+ def _fill_missing_time(self, df: pd.DataFrame) -> None:
+ "Fill missing time values using YEAR, MONTH, and DAY columns."
+ missing_time = df['time'].isna()
+ df.loc[missing_time, 'time'] = pd.to_datetime(
+ df.loc[missing_time, ['YEAR', 'MONTH', 'DAY']],
+ format='%Y%m%d',
+ errors='coerce'
+ )
+
+ def _define_beg_period(self, df: pd.DataFrame) -> None:
+ "Create a standardized date representation for Open Refine."
+ df['begperiod'] = df['time']
Apply the transformer for callbacks ParseTimeCB
. Then, print the begperiod
and time
data for seawater
.
dfs = load_data(fname_in)
+tfm = Transformer(dfs, cbs=[ParseTimeCB(),
+ CompareDfsAndTfmCB(dfs)
+ ])
+tfm()
+print(pd.DataFrame.from_dict(tfm.compare_stats) , '\n')
+print(tfm.dfs['seawater'][['begperiod','time']])
seawater sediment biota
+Number of rows in dfs 21216 39817 15827
+Number of rows in tfm.dfs 21216 39817 15827
+Number of dropped rows 0 0 0
+Number of rows in tfm.dfs + Number of dropped rows 21216 39817 15827
+
+ begperiod time
+0 2012-05-23 2012-05-23
+1 2012-05-23 2012-05-23
+2 2012-06-17 2012-06-17
+3 2012-05-24 2012-05-24
+4 2012-05-24 2012-05-24
+... ... ...
+21211 2021-10-15 2021-10-15
+21212 2021-11-04 2021-11-04
+21213 2021-10-15 2021-10-15
+21214 2021-05-17 2021-05-17
+21215 2021-05-13 2021-05-13
+
+[21216 rows x 2 columns]
+NetCDF time format requires the time to be encoded as number of milliseconds since a time of origin. In our case the time of origin is 1970-01-01
as indicated in configs.ipynb
CONFIFS['units']['time']
dictionary.
EncodeTimeCB
converts the HELCOM time
format to the MARIS NetCDF time
format.
dfs = load_data(fname_in)
+tfm = Transformer(dfs, cbs=[ParseTimeCB(),
+ EncodeTimeCB(cfg(), verbose=True),
+ CompareDfsAndTfmCB(dfs)
+ ])
+tfm()
+print(pd.DataFrame.from_dict(tfm.compare_stats) , '\n')
8 of 21216 entries for `time` are invalid for seawater.
+1 of 39817 entries for `time` are invalid for sediment.
+ seawater sediment biota
+Number of rows in dfs 21216 39817 15827
+Number of rows in tfm.dfs 21208 39816 15827
+Number of dropped rows 8 1 0
+Number of rows in tfm.dfs + Number of dropped rows 21216 39817 15827
+
++ | KEY | +NUCLIDE | +METHOD | +< VALUE_Bq/m³ | +VALUE_Bq/m³ | +ERROR%_m³ | +DATE_OF_ENTRY_x | +COUNTRY | +LABORATORY | +SEQUENCE | +... | +TDEPTH | +SDEPTH | +SALIN | +TTEMP | +FILT | +MORS_SUBBASIN | +HELCOM_SUBBASIN | +DATE_OF_ENTRY_y | +time | +begperiod | +
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | +WKRIL2012003 | +CS137 | +NaN | +NaN | +5.3 | +32.000000 | +08/20/14 00:00:00 | +90.0 | +KRIL | +2012003.0 | +... | +NaN | +0.0 | +NaN | +NaN | +NaN | +11.0 | +11.0 | +08/20/14 00:00:00 | +1337731200 | +2012-05-23 | +
1 | +WKRIL2012004 | +CS137 | +NaN | +NaN | +19.9 | +20.000000 | +08/20/14 00:00:00 | +90.0 | +KRIL | +2012004.0 | +... | +NaN | +29.0 | +NaN | +NaN | +NaN | +11.0 | +11.0 | +08/20/14 00:00:00 | +1337731200 | +2012-05-23 | +
2 | +WKRIL2012005 | +CS137 | +NaN | +NaN | +25.5 | +20.000000 | +08/20/14 00:00:00 | +90.0 | +KRIL | +2012005.0 | +... | +NaN | +0.0 | +NaN | +NaN | +NaN | +11.0 | +3.0 | +08/20/14 00:00:00 | +1339891200 | +2012-06-17 | +
3 | +WKRIL2012006 | +CS137 | +NaN | +NaN | +17.0 | +29.000000 | +08/20/14 00:00:00 | +90.0 | +KRIL | +2012006.0 | +... | +NaN | +0.0 | +NaN | +NaN | +NaN | +11.0 | +11.0 | +08/20/14 00:00:00 | +1337817600 | +2012-05-24 | +
4 | +WKRIL2012007 | +CS137 | +NaN | +NaN | +22.2 | +18.000000 | +08/20/14 00:00:00 | +90.0 | +KRIL | +2012007.0 | +... | +NaN | +39.0 | +NaN | +NaN | +NaN | +11.0 | +11.0 | +08/20/14 00:00:00 | +1337817600 | +2012-05-24 | +
... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +
21211 | +WSSSM2021005 | +H3 | +SSM45 | +NaN | +1030.0 | +93.203883 | +09/06/22 00:00:00 | +77.0 | +SSSM | +202105.0 | +... | +NaN | +1.0 | +NaN | +NaN | +N | +1.0 | +8.0 | +09/06/22 00:00:00 | +1634256000 | +2021-10-15 | +
21212 | +WSSSM2021006 | +H3 | +SSM45 | +NaN | +2240.0 | +43.303571 | +09/06/22 00:00:00 | +77.0 | +SSSM | +202106.0 | +... | +NaN | +1.0 | +NaN | +NaN | +N | +10.0 | +10.0 | +09/06/22 00:00:00 | +1635984000 | +2021-11-04 | +
21213 | +WSSSM2021007 | +H3 | +SSM45 | +NaN | +2060.0 | +47.087379 | +09/06/22 00:00:00 | +77.0 | +SSSM | +202107.0 | +... | +NaN | +1.0 | +NaN | +NaN | +N | +12.0 | +12.0 | +09/06/22 00:00:00 | +1634256000 | +2021-10-15 | +
21214 | +WSSSM2021008 | +H3 | +SSM45 | +NaN | +2300.0 | +43.478261 | +09/06/22 00:00:00 | +77.0 | +SSSM | +202108.0 | +... | +NaN | +1.0 | +NaN | +NaN | +N | +12.0 | +12.0 | +09/06/22 00:00:00 | +1621209600 | +2021-05-17 | +
21215 | +WSSSM2021004 | +H3 | +SSM45 | +< | +NaN | +NaN | +09/06/22 00:00:00 | +77.0 | +SSSM | +202104.0 | +... | +NaN | +1.0 | +NaN | +NaN | +N | +15.0 | +18.0 | +09/06/22 00:00:00 | +1620864000 | +2021-05-13 | +
21208 rows × 29 columns
+We allocate each column containing measurement values (named differently across sample types as unit
are mentioned as well in column names) into a single column value
and remove NA where needed.
+++SanitizeValue (coi:Dict[str,Dict[str,str]])
Sanitize value/measurement by removing blank entries and populating value
column.
+ | Type | +Details | +
---|---|---|
coi | +Dict | +Columns of interest. Format: {group_name: {‘val’: ‘column_name’}} | +
class SanitizeValue(Callback):
+ "Sanitize value/measurement by removing blank entries and populating `value` column."
+ def __init__(self,
+ coi: Dict[str, Dict[str, str]] # Columns of interest. Format: {group_name: {'val': 'column_name'}}
+ ):
+ fc.store_attr()
+
+ def __call__(self, tfm: Transformer):
+ for grp, df in tfm.dfs.items():
+ value_col = self.coi[grp]['val']
+ df.dropna(subset=[value_col], inplace=True)
+ df['value'] = df[value_col]
dfs = load_data(fname_in)
+tfm = Transformer(dfs, cbs=[SanitizeValue(coi_val),
+ CompareDfsAndTfmCB(dfs)
+ ])
+
+tfm()
+print(pd.DataFrame.from_dict(tfm.compare_stats) , '\n')
seawater sediment biota
+Number of rows in dfs 21216 39817 15827
+Number of rows in tfm.dfs 21122 39532 15798
+Number of dropped rows 94 285 29
+Number of rows in tfm.dfs + Number of dropped rows 21216 39817 15827
+
+Function unc_rel2stan
converts uncertainty from relative uncertainty to standard uncertainty.
+++unc_rel2stan (df:pandas.core.frame.DataFrame, meas_col:str, unc_col:str)
Convert relative uncertainty to absolute uncertainty.
++ | Type | +Details | +
---|---|---|
df | +DataFrame | +DataFrame containing measurement and uncertainty columns | +
meas_col | +str | +Name of the column with measurement values | +
unc_col | +str | +Name of the column with relative uncertainty values (percentages) | +
Returns | +Series | +Series with calculated absolute uncertainties | +
def unc_rel2stan(
+ df: pd.DataFrame, # DataFrame containing measurement and uncertainty columns
+ meas_col: str, # Name of the column with measurement values
+ unc_col: str # Name of the column with relative uncertainty values (percentages)
+) -> pd.Series: # Series with calculated absolute uncertainties
+ "Convert relative uncertainty to absolute uncertainty."
+ return df.apply(lambda row: row[unc_col] * row[meas_col] / 100, axis=1)
For each sample type in the Helcom dataset, the uncertainty is given as a relative uncertainty. The column names for both the value and the uncertainty vary by sample type. The coi_units_unc dictionary defines the column names for the Value and Uncertainty for each sample type.
+NormalizeUncCB callback normalizes the uncertainty by converting from relative uncertainty to standard uncertainty.
++++NormalizeUncCB (fn_convert_unc:Callable=<function unc_rel2stan>, + coi:List[Tuple[str,str,str]]=[('seawater', 'VALUE_Bq/m³', + 'ERROR%_m³'), ('biota', 'VALUE_Bq/kg', 'ERROR%'), + ('sediment', 'VALUE_Bq/kg', 'ERROR%_kg')])
Convert from relative error % to uncertainty of activity unit.
++ | Type | +Default | +Details | +
---|---|---|---|
fn_convert_unc | +Callable | +unc_rel2stan | +Function converting relative uncertainty to absolute uncertainty | +
coi | +List | +[(‘seawater’, ‘VALUE_Bq/m³’, ’ERROR%_m³’), (‘biota’, ‘VALUE_Bq/kg’, ‘ERROR%’), (‘sediment’, ‘VALUE_Bq/kg’, ’ERROR%_kg’)] | +List of columns of interest | +
class NormalizeUncCB(Callback):
+ "Convert from relative error % to uncertainty of activity unit."
+ def __init__(self,
+ fn_convert_unc: Callable=unc_rel2stan, # Function converting relative uncertainty to absolute uncertainty
+ coi: List[Tuple[str, str, str]]=coi_units_unc # List of columns of interest
+ ):
+ fc.store_attr()
+
+ def __call__(self, tfm: Transformer):
+ for grp, val, unc in self.coi:
+ if grp in tfm.dfs:
+ df = tfm.dfs[grp]
+ df['uncertainty'] = self.fn_convert_unc(df, val, unc)
Apply the transformer for callback NormalizeUncCB(). Then, print the value (i.e. activity per unit ) and standard uncertainty for each sample type.
+dfs = load_data(fname_in)
+tfm = Transformer(dfs, cbs=[NormalizeUncCB(),
+ SanitizeValue(coi_val)])
+
+print(tfm()['seawater'][['value', 'uncertainty']][:5])
+print(tfm()['biota'][['value', 'uncertainty']][:5])
+print(tfm()['sediment'][['value', 'uncertainty']][:5])
value uncertainty
+0 5.3 1.696
+1 19.9 3.980
+2 25.5 5.100
+3 17.0 4.930
+4 22.2 3.996
+ value uncertainty
+0 0.010140 NaN
+1 135.300000 4.830210
+2 0.013980 NaN
+3 4.338000 0.150962
+4 0.009614 NaN
+ value uncertainty
+0 35.0 9.10
+1 36.0 7.92
+2 38.0 9.12
+3 36.0 9.00
+4 30.0 6.90
+We follow in the next following processing steps the same approach as for remapping of nuclide names above.
+Let’s inspect the RUBIN_NAME.csv
file provided by HELCOM describing the biota species nomenclature.
+ | RUBIN_ID | +RUBIN | +SCIENTIFIC NAME | +ENGLISH NAME | +
---|---|---|---|---|
0 | +11 | +ABRA BRA | +ABRAMIS BRAMA | +BREAM | +
1 | +12 | +ANGU ANG | +ANGUILLA ANGUILLA | +EEL | +
2 | +13 | +ARCT ISL | +ARCTICA ISLANDICA | +ISLAND CYPRINE | +
3 | +14 | +ASTE RUB | +ASTERIAS RUBENS | +COMMON STARFISH | +
4 | +15 | +CARD EDU | +CARDIUM EDULE | +COCKLE | +
We try to remap the SCIENTIFIC NAME
column to the species
column of the MARIS nomenclature, again using a Remapper
object:
remapper = Remapper(provider_lut_df=pd.read_csv(Path(fname_in) / 'RUBIN_NAME.csv'),
+ maris_lut_fn=species_lut_path,
+ maris_col_id='species_id',
+ maris_col_name='species',
+ provider_col_to_match='SCIENTIFIC NAME',
+ provider_col_key='RUBIN',
+ fname_cache='species_helcom.pkl'
+ )
+
+remapper.generate_lookup_table(as_df=True)
+remapper.select_match(match_score_threshold=1)
Processing: 100%|██████████| 46/46 [00:07<00:00, 6.41it/s]
++ | matched_maris_name | +source_name | +match_score | +
---|---|---|---|
source_key | ++ | + | + |
STIZ LUC | +Sander lucioperca | +STIZOSTEDION LUCIOPERCA | +10 | +
LAMI SAC | +Laminaria japonica | +LAMINARIA SACCHARINA | +7 | +
CARD EDU | +Cardiidae | +CARDIUM EDULE | +6 | +
ENCH CIM | +Echinodermata | +ENCHINODERMATA CIM | +5 | +
PSET MAX | +Pinctada maxima | +PSETTA MAXIMA | +5 | +
MACO BAL | +Macoma balthica | +MACOMA BALTICA | +1 | +
STUC PEC | +Stuckenia pectinata | +STUCKENIA PECTINATE | +1 | +
We fix below some of the entries that are not properly matched by the Remapper
object:
And give it an another try:
+remapper.generate_lookup_table(fixes=fixes_biota_species)
+remapper.select_match(match_score_threshold=1)
Processing: 100%|██████████| 46/46 [00:07<00:00, 5.79it/s]
++ | matched_maris_name | +source_name | +match_score | +
---|---|---|---|
source_key | ++ | + | + |
ENCH CIM | +Echinodermata | +ENCHINODERMATA CIM | +5 | +
MACO BAL | +Macoma balthica | +MACOMA BALTICA | +1 | +
STIZ LUC | +Sander lucioperca | +STIZOSTEDION LUCIOPERCA | +1 | +
STUC PEC | +Stuckenia pectinata | +STUCKENIA PECTINATE | +1 | +
Visual inspection of the remaining unperfectly matched entries seem acceptable to proceed.
+We can now use the generic RemapCB
callback to perform the remapping of the RUBIN
column to the species
column after having defined the lookup table lut_biota
.
lut_biota = lambda: Remapper(provider_lut_df=pd.read_csv(Path(fname_in) / 'RUBIN_NAME.csv'),
+ maris_lut_fn=species_lut_path,
+ maris_col_id='species_id',
+ maris_col_name='species',
+ provider_col_to_match='SCIENTIFIC NAME',
+ provider_col_key='RUBIN',
+ fname_cache='species_helcom.pkl'
+ ).generate_lookup_table(fixes=fixes_biota_species, as_df=False, overwrite=False)
dfs = load_data(fname_in)
+tfm = Transformer(dfs, cbs=[
+ RemapCB(fn_lut=lut_biota, col_remap='species', col_src='RUBIN', dest_grps='biota')
+ ])
+
+# For instance:
+print(tfm()['biota']['species'].unique())
[ 99 243 50 139 270 192 191 284 84 269 122 96 287 279
+ 278 288 286 244 129 275 271 285 283 247 120 59 280 274
+ 273 290 289 272 277 276 21 282 110 281 245 704 1524 703
+ 1611 621 60]
+Let’s inspect the TISSUE.csv
file provided by HELCOM describing the tissue nomenclature. Biota tissue is known as body part
in the maris data set.
+ | TISSUE | +TISSUE_DESCRIPTION | +
---|---|---|
0 | +1 | +WHOLE FISH | +
1 | +2 | +WHOLE FISH WITHOUT ENTRAILS | +
2 | +3 | +WHOLE FISH WITHOUT HEAD AND ENTRAILS | +
3 | +4 | +FLESH WITH BONES | +
4 | +5 | +FLESH WITHOUT BONES (FILETS) | +
remapper = Remapper(provider_lut_df=pd.read_csv('../../_data/accdb/mors/csv/TISSUE.csv'),
+ maris_lut_fn=bodyparts_lut_path,
+ maris_col_id='bodypar_id',
+ maris_col_name='bodypar',
+ provider_col_to_match='TISSUE_DESCRIPTION',
+ provider_col_key='TISSUE',
+ fname_cache='tissues_helcom.pkl'
+ )
+
+remapper.generate_lookup_table(as_df=True)
+remapper.select_match(match_score_threshold=1)
Processing: 100%|██████████| 29/29 [00:00<00:00, 139.43it/s]
++ | matched_maris_name | +source_name | +match_score | +
---|---|---|---|
source_key | ++ | + | + |
3 | +Flesh without bones | +WHOLE FISH WITHOUT HEAD AND ENTRAILS | +20 | +
2 | +Flesh without bones | +WHOLE FISH WITHOUT ENTRAILS | +13 | +
8 | +Soft parts | +SKIN/EPIDERMIS | +10 | +
5 | +Flesh without bones | +FLESH WITHOUT BONES (FILETS) | +9 | +
1 | +Whole animal | +WHOLE FISH | +5 | +
12 | +Brain | +ENTRAILS | +5 | +
15 | +Stomach and intestine | +STOMACH + INTESTINE | +3 | +
41 | +Whole animal | +WHOLE ANIMALS | +1 | +
We fix below some of the entries that are not properly matched by the Remapper
object:
remapper.generate_lookup_table(as_df=True, fixes=fixes_biota_tissues)
+remapper.select_match(match_score_threshold=1)
Processing: 100%|██████████| 29/29 [00:00<00:00, 142.26it/s]
++ | matched_maris_name | +source_name | +match_score | +
---|---|---|---|
source_key | ++ | + | + |
2 | +Flesh without bones | +WHOLE FISH WITHOUT ENTRAILS | +13 | +
5 | +Flesh without bones | +FLESH WITHOUT BONES (FILETS) | +9 | +
1 | +Whole animal | +WHOLE FISH | +5 | +
15 | +Stomach and intestine | +STOMACH + INTESTINE | +3 | +
41 | +Whole animal | +WHOLE ANIMALS | +1 | +
Visual inspection of the remaining unperfectly matched entries seem acceptable to proceed.
+We can now use the generic RemapCB
callback to perform the remapping of the TISSUE
column to the body_part
column after having defined the lookup table lut_tissues
.
lut_tissues = lambda: Remapper(provider_lut_df=pd.read_csv('../../_data/accdb/mors/csv/TISSUE.csv'),
+ maris_lut_fn=bodyparts_lut_path,
+ maris_col_id='bodypar_id',
+ maris_col_name='bodypar',
+ provider_col_to_match='TISSUE_DESCRIPTION',
+ provider_col_key='TISSUE',
+ fname_cache='tissues_helcom.pkl'
+ ).generate_lookup_table(fixes=fixes_biota_tissues, as_df=False, overwrite=False)
dfs = load_data(fname_in)
+tfm = Transformer(dfs, cbs=[
+ RemapCB(fn_lut=lut_biota, col_remap='species', col_src='RUBIN', dest_grps='biota'),
+ RemapCB(lut_tissues, 'body_part', 'TISSUE', 'biota')
+ ])
+
+print(tfm()['biota'][['TISSUE', 'body_part']][:5])
TISSUE body_part
+0 5 52
+1 5 52
+2 5 52
+3 5 52
+4 5 52
+get_biogroup_lut
reads the file at species_lut_path()
and from the contents of this file creates a dictionary linking species_id
to biogroup_id
.
dfs = load_data(fname_in)
+tfm = Transformer(dfs, cbs=[
+ RemapCB(fn_lut=lut_biota, col_remap='species', col_src='RUBIN', dest_grps='biota'),
+ RemapCB(lut_tissues, 'body_part', 'TISSUE', 'biota'),
+ RemapCB(lut_biogroup, 'bio_group', 'species', 'biota')
+ ])
+
+print(tfm()['biota']['bio_group'].unique())
[ 4 2 14 11 8 3]
+Currently, the details (Taxonname
, TaxonRepName
, Taxonrank
) are used for importing into the MARIS master database, but they are not included in the NetCDF encoding.
We first need to retrieve the taxon information from the dbo_species.xlsx
file.
+++get_taxon_info_lut (maris_lut:str)
Retrieve a lookup table for Taxonname from a MARIS lookup table.
++ | Type | +Details | +
---|---|---|
maris_lut | +str | +Path to the MARIS lookup table (Excel file) | +
Returns | +dict | +A dictionary mapping species_id to biogroup_id | +
# TODO: Include Commonname field after next MARIS data reconciling process.
+def get_taxon_info_lut(
+ maris_lut:str # Path to the MARIS lookup table (Excel file)
+) -> dict: # A dictionary mapping species_id to biogroup_id
+ "Retrieve a lookup table for Taxonname from a MARIS lookup table."
+ species = pd.read_excel(maris_lut)
+ return species[['species_id', 'Taxonname', 'Taxonrank','TaxonDB','TaxonDBID','TaxonDBURL']].set_index('species_id').to_dict()
+
+lut_taxon = lambda: get_taxon_info_lut(species_lut_path())
+++RemapTaxonInformationCB (fn_lut:Callable)
Update taxon information based on MARIS species LUT.
+class RemapTaxonInformationCB(Callback):
+ "Update taxon information based on MARIS species LUT."
+ def __init__(self, fn_lut: Callable):
+ self.fn_lut = fn_lut
+
+ def __call__(self, tfm: Transformer):
+ lut = self.fn_lut()
+ df = tfm.dfs['biota']
+
+ df['TaxonRepName'] = df.get('RUBIN', 'Unknown')
+
+ taxon_columns = ['Taxonname', 'Taxonrank', 'TaxonDB', 'TaxonDBID', 'TaxonDBURL']
+ for col in taxon_columns:
+ df[col] = df['species'].map(lut[col]).fillna('Unknown')
+
+ unmatched = df[df['Taxonname'] == 'Unknown']['species'].unique()
+ if len(unmatched) > 0:
+ print(f"Unmatched species IDs: {', '.join(unmatched)}")
dfs = load_data(fname_in)
+tfm = Transformer(dfs, cbs=[
+ RemapCB(fn_lut=lut_biota, col_remap='species', col_src='RUBIN', dest_grps='biota'),
+ RemapCB(lut_tissues, 'body_part', 'TISSUE', 'biota'),
+ RemapCB(lut_biogroup, 'bio_group', 'species', 'biota'),
+ RemapTaxonInformationCB(lut_taxon)
+ ])
+tfm()
+print(tfm.dfs['biota'][['TaxonRepName', 'Taxonname', 'Taxonrank',
+ 'TaxonDB','TaxonDBID','TaxonDBURL']].drop_duplicates().head())
TaxonRepName Taxonname Taxonrank TaxonDB TaxonDBID \
+0 GADU MOR Gadus morhua species Wikidata Q199788
+40 SPRA SPR Sprattus sprattus species Wikidata Q506823
+44 CLUP HAR Clupea harengus species Wikidata Q2396858
+77 MERL MNG Merlangius merlangus species Wikidata Q273083
+78 LIMA LIM Limanda limanda species Wikidata Q1135526
+
+ TaxonDBURL
+0 https://www.wikidata.org/wiki/Q199788
+40 https://www.wikidata.org/wiki/Q506823
+44 https://www.wikidata.org/wiki/Q2396858
+77 https://www.wikidata.org/wiki/Q273083
+78 https://www.wikidata.org/wiki/Q1135526
+We use again the same IMFA (Inspect, Match, Fix, Apply) pattern to remap the HELCOM sediment types.
+Let’s inspect the SEDIMENT_TYPE.csv
file provided by HELCOM describing the sediment type nomenclature:
+ | SEDI | +SEDIMENT TYPE | +RECOMMENDED TO BE USED | +
---|---|---|---|
0 | +-99 | +NO DATA | +NaN | +
1 | +0 | +GRAVEL | +YES | +
2 | +1 | +SAND | +YES | +
3 | +2 | +FINE SAND | +NO | +
4 | +3 | +SILT | +YES | +
FEEDBACK TO DATA PROVIDER: The SEDI
values 56
and 73
are not found in the SEDIMENT_TYPE.csv
lookup table provided. Note also there are many nan
values in the SEDIMENT_TYPE.csv
file.
We reassign them to -99
for now but should be clarified/fixed. This is demonstrated below.
df_sed_lut = pd.read_csv(Path(fname_in) / 'SEDIMENT_TYPE.csv')
+dfs = load_data(fname_in)
+
+sediment_sedi = set(dfs['sediment'].SEDI.unique())
+lookup_sedi = set(df_sed_lut['SEDI'])
+missing = sediment_sedi - lookup_sedi
+print(f"Missing SEDI values: {missing if missing else 'None'}")
Missing SEDI values: {56.0, 73.0, nan}
+Let’s try to match as many as possible:
+remapper = Remapper(provider_lut_df=pd.read_csv(Path(fname_in)/'SEDIMENT_TYPE.csv'),
+ maris_lut_fn=sediments_lut_path,
+ maris_col_id='sedtype_id',
+ maris_col_name='sedtype',
+ provider_col_to_match='SEDIMENT TYPE',
+ provider_col_key='SEDI',
+ fname_cache='sediments_helcom.pkl'
+ )
+
+remapper.generate_lookup_table(as_df=True)
+remapper.select_match(match_score_threshold=1)
Processing: 100%|██████████| 47/47 [00:00<00:00, 132.73it/s]
++ | matched_maris_name | +source_name | +match_score | +
---|---|---|---|
source_key | ++ | + | + |
-99 | +Soft | +NO DATA | +5 | +
50 | +Mud and gravel | +MUD AND GARVEL | +2 | +
46 | +Glacial clay | +CLACIAL CLAY | +1 | +
remapper.generate_lookup_table(as_df=True, fixes=fixes_sediments)
+remapper.select_match(match_score_threshold=1)
Processing: 0%| | 0/47 [00:00<?, ?it/s]Processing: 100%|██████████| 47/47 [00:00<00:00, 126.59it/s]
++ | matched_maris_name | +source_name | +match_score | +
---|---|---|---|
source_key | ++ | + | + |
50 | +Mud and gravel | +MUD AND GARVEL | +2 | +
46 | +Glacial clay | +CLACIAL CLAY | +1 | +
+++RemapSedimentCB (fn_lut:Callable)
Update sediment id based on MARIS species LUT (dbo_sedtype.xlsx).
++ | Type | +Details | +
---|---|---|
fn_lut | +Callable | +Function that returns the lookup table dictionary | +
class RemapSedimentCB(Callback):
+ "Update sediment id based on MARIS species LUT (dbo_sedtype.xlsx)."
+ def __init__(self,
+ fn_lut: Callable, # Function that returns the lookup table dictionary
+ ):
+ fc.store_attr()
+
+ def _fix_inconsistent_sedi(self, df:pd.DataFrame) -> pd.DataFrame:
+ "Temporary fix for inconsistent SEDI values. Data provider to confirm and clarify."
+ df['SEDI'] = df['SEDI'].replace({56: -99, 73: -99, np.nan: -99})
+ return df
+
+ def __call__(self, tfm: Transformer):
+ "Remap sediment types in the DataFrame using the lookup table and handle specific replacements."
+ lut = self.fn_lut()
+
+ # Set SedRepName (TBC: what's used for?)
+ tfm.dfs['sediment']['SedRepName'] = tfm.dfs['sediment']['SEDI']
+
+ tfm.dfs['sediment'] = self._fix_inconsistent_sedi(tfm.dfs['sediment'])
+ tfm.dfs['sediment']['sed_type'] = tfm.dfs['sediment']['SEDI'].apply(lambda x: self._get_sediment_type(x, lut))
+
+ def _get_sediment_type(self,
+ sedi_value: int, # The `SEDI` value from the DataFrame
+ lut: dict # The lookup table dictionary
+ ) -> Match: # The Match object
+ "Get the matched_id from the lookup table and print SEDI if the matched_id is -1."
+ match = lut.get(sedi_value, Match(-1, None, None, None))
+
+ if match.matched_id == -1:
+ self._print_unmatched_sedi(sedi_value)
+ return match.matched_id
+
+ def _print_unmatched_sedi(self,
+ sedi_value: int # The `SEDI` value from the DataFram
+ ) -> None:
+ "Print the SEDI value if the matched_id is -1."
+ print(f"Unmatched SEDI: {sedi_value}")
lut_sediments = lambda: Remapper(provider_lut_df=pd.read_csv(Path(fname_in) / 'SEDIMENT_TYPE.csv'),
+ maris_lut_fn=sediments_lut_path,
+ maris_col_id='sedtype_id',
+ maris_col_name='sedtype',
+ provider_col_to_match='SEDIMENT TYPE',
+ provider_col_key='SEDI',
+ fname_cache='sediments_helcom.pkl'
+ ).generate_lookup_table(fixes=fixes_sediments, as_df=False, overwrite=False)
Apply the transformer for callbacks RemapSedimentCB(get_maris_sediments)
. Then, print the SEDI
and sed_type
for the biota
dataframe.
dfs = load_data(fname_in)
+tfm = Transformer(dfs, cbs=[RemapSedimentCB(lut_sediments)])
+
+tfm()['sediment']['sed_type'].unique()
array([ 0, 2, 58, 30, 59, 55, 56, 36, 29, 47, 4, 54, 33, 6, 44, 42, 48,
+ 61, 57, 28, 49, 32, 45, 39, 46, 38, 31, 60, 62, 26, 53, 52, 1, 51,
+ 37, 34, 50, 7, 10, 41, 43, 35])
+FEEDBACK TO DATA PROVIDER: The handling of unit types varies between biota
and sediment
sample types. For consistency and ease of use, it would be beneficial to have dedicated unit columns for all sample types.
For seawater
and sediment
sample types, the HELCOM dataset refers to units direcly in the name of certain columns, such as VALUE_Bq/m³
or VALUE_Bq/kg
. As for biota
, the units are included in the BASIS
column. This is shown below:
dfs = load_data(fname_in)
+for grp in ['biota', 'sediment', 'seawater']:
+ print(f"{grp}: {dfs[grp].columns}")
+
+dfs['biota']['BASIS'].unique()
biota: Index(['KEY', 'NUCLIDE', 'METHOD', '< VALUE_Bq/kg', 'VALUE_Bq/kg', 'BASIS',
+ 'ERROR%', 'NUMBER', 'DATE_OF_ENTRY_x', 'COUNTRY', 'LABORATORY',
+ 'SEQUENCE', 'DATE', 'YEAR', 'MONTH', 'DAY', 'STATION',
+ 'LATITUDE ddmmmm', 'LATITUDE dddddd', 'LONGITUDE ddmmmm',
+ 'LONGITUDE dddddd', 'SDEPTH', 'RUBIN', 'BIOTATYPE', 'TISSUE', 'NO',
+ 'LENGTH', 'WEIGHT', 'DW%', 'LOI%', 'MORS_SUBBASIN', 'HELCOM_SUBBASIN',
+ 'DATE_OF_ENTRY_y'],
+ dtype='object')
+sediment: Index(['KEY', 'NUCLIDE', 'METHOD', '< VALUE_Bq/kg', 'VALUE_Bq/kg', 'ERROR%_kg',
+ '< VALUE_Bq/m²', 'VALUE_Bq/m²', 'ERROR%_m²', 'DATE_OF_ENTRY_x',
+ 'COUNTRY', 'LABORATORY', 'SEQUENCE', 'DATE', 'YEAR', 'MONTH', 'DAY',
+ 'STATION', 'LATITUDE (ddmmmm)', 'LATITUDE (dddddd)',
+ 'LONGITUDE (ddmmmm)', 'LONGITUDE (dddddd)', 'DEVICE', 'TDEPTH',
+ 'UPPSLI', 'LOWSLI', 'AREA', 'SEDI', 'OXIC', 'DW%', 'LOI%',
+ 'MORS_SUBBASIN', 'HELCOM_SUBBASIN', 'SUM_LINK', 'DATE_OF_ENTRY_y'],
+ dtype='object')
+seawater: Index(['KEY', 'NUCLIDE', 'METHOD', '< VALUE_Bq/m³', 'VALUE_Bq/m³', 'ERROR%_m³',
+ 'DATE_OF_ENTRY_x', 'COUNTRY', 'LABORATORY', 'SEQUENCE', 'DATE', 'YEAR',
+ 'MONTH', 'DAY', 'STATION', 'LATITUDE (ddmmmm)', 'LATITUDE (dddddd)',
+ 'LONGITUDE (ddmmmm)', 'LONGITUDE (dddddd)', 'TDEPTH', 'SDEPTH', 'SALIN',
+ 'TTEMP', 'FILT', 'MORS_SUBBASIN', 'HELCOM_SUBBASIN', 'DATE_OF_ENTRY_y'],
+ dtype='object')
+array(['W', nan, 'D', 'F'], dtype=object)
+Given the inconsistent handling of units across sample types, we need to define custom mapping rules for standardizing the units. Below the MARIS unit types:
++ | unit_id | +unit | +unit_sanitized | +
---|---|---|---|
0 | +-1 | +Not applicable | +Not applicable | +
1 | +0 | +NOT AVAILABLE | +NOT AVAILABLE | +
2 | +1 | +Bq/m3 | +Bq per m3 | +
3 | +2 | +Bq/m2 | +Bq per m2 | +
4 | +3 | +Bq/kg | +Bq per kg | +
5 | +4 | +Bq/kgd | +Bq per kgd | +
6 | +5 | +Bq/kgw | +Bq per kgw | +
7 | +6 | +kg/kg | +kg per kg | +
8 | +7 | +TU | +TU | +
9 | +8 | +DELTA/mill | +DELTA per mill | +
10 | +9 | +atom/kg | +atom per kg | +
11 | +10 | +atom/kgd | +atom per kgd | +
12 | +11 | +atom/kgw | +atom per kgw | +
13 | +12 | +atom/l | +atom per l | +
14 | +13 | +Bq/kgC | +Bq per kgC | +
We define unit names renaming rules from HELCOM in an ad hoc way for now:
++++RemapUnitCB (lut_units:dict={'seawater': 1, 'sediment': 4, 'biota': {'D': + 4, 'W': 5, 'F': 5}})
Set the unit
id column in the DataFrames based on a lookup table.
+ | Type | +Default | +Details | +
---|---|---|---|
lut_units | +dict | +{‘seawater’: 1, ‘sediment’: 4, ‘biota’: {‘D’: 4, ‘W’: 5, ‘F’: 5}} | +Dictionary containing renaming rules for different unit categories | +
class RemapUnitCB(Callback):
+ "Set the `unit` id column in the DataFrames based on a lookup table."
+ def __init__(self,
+ lut_units: dict=lut_units # Dictionary containing renaming rules for different unit categories
+ ):
+ fc.store_attr()
+
+ def __call__(self, tfm: Transformer):
+ for grp in tfm.dfs.keys():
+ if grp in ['seawater', 'sediment']:
+ tfm.dfs[grp]['unit'] = self.lut_units[grp]
+ else:
+ tfm.dfs[grp]['unit'] = tfm.dfs[grp]['BASIS'].apply(lambda x: lut_units[grp].get(x, 0))
Apply the transformer for callback RemapUnitCB()
. Then, print the unique unit
for the seawater
dataframe.
Detection limits are encoded as follows in MARIS:
++ | id | +name | +name_sanitized | +
---|---|---|---|
0 | +-1 | +Not applicable | +Not applicable | +
1 | +0 | +Not Available | +Not available | +
2 | +1 | += | +Detected value | +
3 | +2 | +< | +Detection limit | +
4 | +3 | +ND | +Not detected | +
5 | +4 | +DE | +Derived | +
Based on columns of interest for each sample type:
+We follow the following business logic to encode the detection limit:
+RemapDetectionLimitCB
creates a detection_limit
column with values determined as follows: 1. Perform a lookup with the appropriate columns value type (or detection limit) columns (< VALUE_Bq/m³
or < VALUE_Bq/kg
) against the table returned from the function get_detectionlimit_lut
. 2. If < VALUE_Bq/m³
or < VALUE_Bq/kg
is NaN but both activity values (VALUE_Bq/m³
or VALUE_Bq/kg
) and standard uncertainty (ERROR%_m³
, ERROR%
, or ERROR%_kg
) are provided, then assign the ID of 1
(i.e. “Detected value”). 3. For other NaN values in the detection_limit
column, set them to 0
(i.e. Not Available
).
+++RemapDetectionLimitCB (coi:dict, fn_lut:Callable)
Remap value type to MARIS format.
++ | Type | +Details | +
---|---|---|
coi | +dict | +Configuration options for column names | +
fn_lut | +Callable | +Function that returns a lookup table | +
# TO BE REFACTORED
+class RemapDetectionLimitCB(Callback):
+ "Remap value type to MARIS format."
+ def __init__(self,
+ coi: dict, # Configuration options for column names
+ fn_lut: Callable # Function that returns a lookup table
+ ):
+ fc.store_attr()
+
+ def __call__(self, tfm: Transformer):
+ "Remap detection limits in the DataFrames using the lookup table."
+ lut = self.fn_lut()
+
+ for grp in tfm.dfs:
+ df = tfm.dfs[grp]
+ self._update_detection_limit(df, grp, lut)
+
+ def _update_detection_limit(self,
+ df: pd.DataFrame, # The DataFrame to modify
+ grp: str, # The group name to get the column configuration
+ lut: dict # The lookup table dictionary
+ ) -> None:
+ "Update detection limit column in the DataFrame based on lookup table and rules."
+ detection_col = self.coi[grp]['dl']
+ value_col = self.coi[grp]['val']
+ uncertainty_col = self.coi[grp]['unc']
+
+ # Copy detection limit column
+ df['detection_limit'] = df[detection_col]
+
+ # Fill values with '=' or 'Not Available'
+ condition = ((df[value_col].notna()) & (df[uncertainty_col].notna()) &
+ (~df['detection_limit'].isin(lut.keys())))
+ df.loc[condition, 'detection_limit'] = '='
+ df.loc[~df['detection_limit'].isin(lut.keys()), 'detection_limit'] = 'Not Available'
+
+ # Perform lookup
+ df['detection_limit'] = df['detection_limit'].map(lut)
dfs = load_data(fname_in)
+tfm = Transformer(dfs, cbs=[
+ NormalizeUncCB(),
+ SanitizeValue(coi_val),
+ RemapUnitCB(),
+ RemapDetectionLimitCB(coi_dl, lut_dl)])
+
+
+for grp in ['biota', 'sediment', 'seawater']:
+ print(f"{grp}: {tfm()[grp]['detection_limit'].unique()}")
biota: [2 1 0]
+sediment: [1 2 0]
+seawater: [1 2 0]
+HELCOM filtered status is encoded as follows in the FILT
column:
+ | index | +value | +
---|---|---|
0 | +0 | +n | +
1 | +1 | +NaN | +
2 | +2 | +N | +
3 | +3 | +F | +
While MARIS uses a different encoding for filtered status:
++ | id | +name | +
---|---|---|
0 | +-1 | +Not applicable | +
1 | +0 | +Not available | +
2 | +1 | +Yes | +
3 | +2 | +No | +
For only four categories to remap, the Remapper
is an overkill. We can use a simple dictionary to map the values:
RemapFiltCB
converts the HELCOM FILT
format to the MARIS FILT
format.
+++RemapFiltCB (lut_filtered:dict={'N': 2, 'n': 2, 'F': 1})
Lookup FILT value in dataframe using the lookup table.
++ | Type | +Default | +Details | +
---|---|---|---|
lut_filtered | +dict | +{‘N’: 2, ‘n’: 2, ‘F’: 1} | +Dictionary mapping FILT codes to their corresponding names | +
class RemapFiltCB(Callback):
+ "Lookup FILT value in dataframe using the lookup table."
+ def __init__(self,
+ lut_filtered: dict=lut_filtered, # Dictionary mapping FILT codes to their corresponding names
+ ):
+ fc.store_attr()
+
+ def __call__(self, tfm):
+ for df in tfm.dfs.values():
+ if 'FILT' in df.columns:
+ df['FILT'] = df['FILT'].map(lambda x: self.lut_filtered.get(x, 0))
For instance:
+ +Sample Laboratory code is currently stored in MARIS master DB but not encoded as NetCDF variable. Decision to include it in the NetCDF output is TBD.
++++AddSampleLabCodeCB ()
Remap KEY
column to samplabcode
in each DataFrame.
dfs = load_data(fname_in)
+tfm = Transformer(dfs, cbs=[
+ AddSampleLabCodeCB(),
+ CompareDfsAndTfmCB(dfs)
+ ])
+
+print(tfm()['seawater']['samplabcode'].unique())
+print(pd.DataFrame.from_dict(tfm.compare_stats) , '\n')
['WKRIL2012003' 'WKRIL2012004' 'WKRIL2012005' ... 'WSSSM2021006'
+ 'WSSSM2021007' 'WSSSM2021008']
+ seawater sediment biota
+Number of rows in dfs 21216 39817 15827
+Number of rows in tfm.dfs 21216 39817 15827
+Number of dropped rows 0 0 0
+Number of rows in tfm.dfs + Number of dropped rows 21216 39817 15827
+
+The HELCOM dataset includes a look-up table ANALYSIS_METHOD.csv
capturing the measurement method used as described by HELCOM. For instance:
+ | METHOD | +DESCRIPTION | +COUNTRY | +
---|---|---|---|
0 | +BFFG01 | +Gammaspectrometric analysis with Germanium det... | +6 | +
1 | +BFFG02 | +Sr-90, a) Y-90 extraction method dried ash and... | +6 | +
2 | +BFFG03 | +Pu238, Pu239241; Ashing and and drying the tra... | +6 | +
3 | +BFFG04 | +Am-241 (not to in use any more) | +6 | +
4 | +CLOR01 | +137Cs and 40K activity concentrations are dete... | +67 | +
+++AddMeasurementNoteCB (fn_lut:Callable)
Record measurement notes by adding a ‘measurenote’ column to DataFrames.
++ | Type | +Details | +
---|---|---|
fn_lut | +Callable | +Function that returns the lookup dictionary with METHOD as key and DESCRIPTION as value |
+
class AddMeasurementNoteCB(Callback):
+ "Record measurement notes by adding a 'measurenote' column to DataFrames."
+ def __init__(self,
+ fn_lut: Callable # Function that returns the lookup dictionary with `METHOD` as key and `DESCRIPTION` as value
+ ):
+ fc.store_attr()
+
+ def __call__(self, tfm: Transformer):
+ lut = self.fn_lut()
+ for df in tfm.dfs.values():
+ if 'METHOD' in df.columns:
+ df['measurementnote'] = df['METHOD'].map(lambda x: lut.get(x, 0))
dfs = load_data(fname_in)
+tfm = Transformer(dfs, cbs=[
+ AddMeasurementNoteCB(lut_method),
+ CompareDfsAndTfmCB(dfs)])
+
+tfm()
+print(tfm.dfs['seawater']['measurementnote'].unique()[:5])
+print(pd.DataFrame.from_dict(tfm.compare_stats) , '\n')
[0
+ 'Radiochemical method Radiocaesium separation from seawater samples.134+137Cs was adsorbed on AMP mat, dissolved with NaOH and after purification precipitated as chloroplatinate (Cs2PtCl6).Counting with low background anticoincidence beta counter.'
+ 'Radiochem. meth of Sr90. Precipation with oxalate and separation of calcium, barium, radium and ytrium couting with low background anticoincidence beta counter. 1982-1994'
+ 'For tritium liquid scintialtion counting, combined with electrolytic enrichment of analysed water samples, double distilled, before and after electrolysis in cells. Liquid Scintillation spectrometer LKB Wallac model 1410'
+ 'Pretreatment drying (sediment, biota samples) and ashing (biota samples)or vaporization to 1000 ml (sea water samples), measured by gamma-spectrometry using HPGe detectors sediment, biota, sea water /Cs-137, Cs-134, K-40']
+ seawater sediment biota
+Number of rows in dfs 21216 39817 15827
+Number of rows in tfm.dfs 21216 39817 15827
+Number of dropped rows 0 0 0
+Number of rows in tfm.dfs + Number of dropped rows 21216 39817 15827
+
+For MARIS master DB import only (not included in the NetCDF output).
++++RemapStationIdCB ()
Remap Station ID to MARIS format.
+class RemapStationIdCB(Callback):
+ "Remap Station ID to MARIS format."
+ def __init__(self):
+ fc.store_attr()
+
+ def __call__(self, tfm: Transformer):
+ "Iterate through all DataFrames in the transformer object and remap `STATION` to `station_id`."
+ for grp in tfm.dfs.keys():
+ tfm.dfs[grp]['station'] = tfm.dfs[grp]['STATION']
dfs = load_data(fname_in)
+tfm = Transformer(dfs, cbs=[
+ RemapStationIdCB(),
+ CompareDfsAndTfmCB(dfs)
+ ])
+tfm()
+print(pd.DataFrame.from_dict(tfm.compare_stats) , '\n')
seawater sediment biota
+Number of rows in dfs 21216 39817 15827
+Number of rows in tfm.dfs 21216 39817 15827
+Number of dropped rows 0 0 0
+Number of rows in tfm.dfs + Number of dropped rows 21216 39817 15827
+
+For MARIS master DB import only (not included in the NetCDF output).
++++RemapSedSliceTopBottomCB ()
Remap Sediment slice top and bottom to MARIS format.
+class RemapSedSliceTopBottomCB(Callback):
+ "Remap Sediment slice top and bottom to MARIS format."
+ def __call__(self, tfm: Transformer):
+ "Iterate through all DataFrames in the transformer object and remap sediment slice top and bottom."
+ tfm.dfs['sediment']['top'] = tfm.dfs['sediment']['UPPSLI']
+ tfm.dfs['sediment']['bottom'] = tfm.dfs['sediment']['LOWSLI']
DW%
is not included in the NetCDF output currently.
HELCOM Description:
+Sediment: 1. DW%: DRY WEIGHT AS PERCENTAGE (%) OF FRESH WEIGHT. 2. VALUE_Bq/kg: Measured radioactivity concentration in Bq/kg dry wt. in scientific format(e.g. 123 = 1.23E+02, 0.076 = 7.6E-02)
+Biota: 1. WEIGHT: Average weight (in g) of specimen in the sample 2. DW%: DRY WEIGHT AS PERCENTAGE (%) OF FRESH WEIGHT
++++LookupDryWetRatio ()
Lookup dry-wet ratio and format for MARIS.
+class LookupDryWetRatio(Callback):
+ "Lookup dry-wet ratio and format for MARIS."
+ def __call__(self, tfm: Transformer):
+ "Iterate through all DataFrames in the transformer object and apply the dry-wet ratio lookup."
+ for grp in tfm.dfs.keys():
+ if 'DW%' in tfm.dfs[grp].columns:
+ self._apply_dry_wet_ratio(tfm.dfs[grp])
+
+ def _apply_dry_wet_ratio(self, df: pd.DataFrame) -> None:
+ "Apply dry-wet ratio conversion and formatting to the given DataFrame."
+ df['dry_wet_ratio'] = df['DW%']
+ # Convert 'DW%' = 0% to NaN.
+ df.loc[df['dry_wet_ratio'] == 0, 'dry_wet_ratio'] = np.NaN
dfs = load_data(fname_in)
+tfm = Transformer(dfs, cbs=[
+ LookupDryWetRatio(),
+ CompareDfsAndTfmCB(dfs)
+ ])
+
+tfm()
+print(pd.DataFrame.from_dict(tfm.compare_stats) , '\n')
+print(tfm.dfs['biota']['dry_wet_ratio'].head())
seawater sediment biota
+Number of rows in dfs 21216 39817 15827
+Number of rows in tfm.dfs 21216 39817 15827
+Number of dropped rows 0 0 0
+Number of rows in tfm.dfs + Number of dropped rows 21216 39817 15827
+
+0 18.453
+1 18.453
+2 18.453
+3 18.453
+4 18.458
+Name: dry_wet_ratio, dtype: float64
+FEEDBACK TO DATA PROVIDER: Column names for geographical coordinates are inconsistent across sample types (biota, sediment, seawater). Sometimes using parentheses, sometimes not.
+dfs = load_data(fname_in)
+for grp in dfs.keys():
+ print(f'{grp}: {[col for col in dfs[grp].columns if "LON" in col or "LAT" in col]}')
seawater: ['LATITUDE (ddmmmm)', 'LATITUDE (dddddd)', 'LONGITUDE (ddmmmm)', 'LONGITUDE (dddddd)']
+sediment: ['LATITUDE (ddmmmm)', 'LATITUDE (dddddd)', 'LONGITUDE (ddmmmm)', 'LONGITUDE (dddddd)']
+biota: ['LATITUDE ddmmmm', 'LATITUDE dddddd', 'LONGITUDE ddmmmm', 'LONGITUDE dddddd']
+FEEDBACK TO DATA PROVIDER: Geographical coordinates are provided in both decimal degree and degree-minute formats. Some coordinates are missing the decimal format and obliged us to use the degree-minute format with less precision.
++++ParseCoordinates (fn_convert_cor:Callable)
Get geographical coordinates from columns expressed in degrees decimal format or from columns in degrees/minutes decimal format where degrees decimal format is missing.
++ | Type | +Details | +
---|---|---|
fn_convert_cor | +Callable | +Function that converts coordinates from degree-minute to decimal degree format | +
class ParseCoordinates(Callback):
+ """
+ Get geographical coordinates from columns expressed in degrees decimal format
+ or from columns in degrees/minutes decimal format where degrees decimal format is missing.
+ """
+ def __init__(self,
+ fn_convert_cor: Callable # Function that converts coordinates from degree-minute to decimal degree format
+ ):
+ self.fn_convert_cor = fn_convert_cor
+
+ def __call__(self, tfm:Transformer):
+ for df in tfm.dfs.values():
+ self._format_coordinates(df)
+
+ def _format_coordinates(self, df:pd.DataFrame) -> None:
+ coord_cols = self._get_coord_columns(df.columns)
+
+ for coord in ['lat', 'lon']:
+ decimal_col, minute_col = coord_cols[f'{coord}_d'], coord_cols[f'{coord}_m']
+
+ condition = df[decimal_col].isna() | (df[decimal_col] == 0)
+ df[coord] = np.where(condition,
+ df[minute_col].apply(self._safe_convert),
+ df[decimal_col])
+
+ df.dropna(subset=['lat', 'lon'], inplace=True)
+
+ def _get_coord_columns(self, columns) -> dict:
+ return {
+ 'lon_d': self._find_coord_column(columns, 'LON', 'dddddd'),
+ 'lat_d': self._find_coord_column(columns, 'LAT', 'dddddd'),
+ 'lon_m': self._find_coord_column(columns, 'LON', 'ddmmmm'),
+ 'lat_m': self._find_coord_column(columns, 'LAT', 'ddmmmm')
+ }
+
+ def _find_coord_column(self, columns, coord_type, coord_format) -> str:
+ pattern = re.compile(f'{coord_type}.*{coord_format}', re.IGNORECASE)
+ matching_columns = [col for col in columns if pattern.search(col)]
+ return matching_columns[0] if matching_columns else None
+
+ def _safe_convert(self, value) -> str:
+ if pd.isna(value):
+ return value
+ try:
+ return self.fn_convert_cor(value)
+ except Exception as e:
+ print(f"Error converting value {value}: {e}")
+ return value
dfs = load_data(fname_in)
+tfm = Transformer(dfs, cbs=[
+ ParseCoordinates(ddmm_to_dd),
+ CompareDfsAndTfmCB(dfs)
+ ])
+tfm()
+print(pd.DataFrame.from_dict(tfm.compare_stats) , '\n')
+print(tfm.dfs['biota'][['lat','lon']])
seawater sediment biota
+Number of rows in dfs 21216 39817 15827
+Number of rows in tfm.dfs 21208 39816 15827
+Number of dropped rows 8 1 0
+Number of rows in tfm.dfs + Number of dropped rows 21216 39817 15827
+
+ lat lon
+0 54.283333 12.316667
+1 54.283333 12.316667
+2 54.283333 12.316667
+3 54.283333 12.316667
+4 54.283333 12.316667
+... ... ...
+15822 60.373333 18.395667
+15823 60.373333 18.395667
+15824 60.503333 18.366667
+15825 60.503333 18.366667
+15826 60.503333 18.366667
+
+[15827 rows x 2 columns]
+FEEDBACK TO DATA PROVIDER: Some samples have (lon, lat): (0, 0) or are outside lon/lat possible values.
+Sanitize coordinates drops a row when both longitude & latitude equal 0 or data contains unrealistic longitude & latitude values. Converts longitude & latitude ,
separator to .
separator.”
dfs = load_data(fname_in)
+tfm = Transformer(dfs, cbs=[
+ ParseCoordinates(ddmm_to_dd),
+ SanitizeLonLatCB(),
+ CompareDfsAndTfmCB(dfs)
+ ])
+
+tfm()
+print(pd.DataFrame.from_dict(tfm.compare_stats) , '\n')
+print(tfm.dfs['biota'][['lat','lon']])
seawater sediment biota
+Number of rows in dfs 21216 39817 15827
+Number of rows in tfm.dfs 21208 39816 15827
+Number of dropped rows 8 1 0
+Number of rows in tfm.dfs + Number of dropped rows 21216 39817 15827
+
+ lat lon
+0 54.283333 12.316667
+1 54.283333 12.316667
+2 54.283333 12.316667
+3 54.283333 12.316667
+4 54.283333 12.316667
+... ... ...
+15822 60.373333 18.395667
+15823 60.373333 18.395667
+15824 60.503333 18.366667
+15825 60.503333 18.366667
+15826 60.503333 18.366667
+
+[15827 rows x 2 columns]
+dfs = load_data(fname_in)
+tfm = Transformer(dfs, cbs=[
+ AddSampleTypeIdColumnCB(),
+ LowerStripNameCB(col_src='NUCLIDE'),
+ RemapNuclideNameCB(lut_nuclides),
+ AddNuclideIdColumnCB(col_value='NUCLIDE'),
+ ParseTimeCB(),
+ EncodeTimeCB(cfg()),
+ SanitizeValue(coi_val),
+ NormalizeUncCB(),
+ RemapCB(fn_lut=lut_biota, col_remap='species', col_src='RUBIN', dest_grps='biota'),
+ RemapCB(lut_tissues, 'body_part', 'TISSUE', 'biota'),
+ RemapCB(lut_biogroup, 'bio_group', 'species', 'biota'),
+ RemapTaxonInformationCB(lut_taxon),
+ RemapSedimentCB(lut_sediments),
+ RemapUnitCB(),
+ RemapDetectionLimitCB(coi_dl, lut_dl),
+ RemapFiltCB(lut_filtered),
+ AddSampleLabCodeCB(),
+ AddMeasurementNoteCB(lut_method),
+ RemapStationIdCB(),
+ RemapSedSliceTopBottomCB(),
+ LookupDryWetRatio(),
+ ParseCoordinates(ddmm_to_dd),
+ SanitizeLonLatCB(),
+ CompareDfsAndTfmCB(dfs)
+ ])
+
+tfm()
+print(pd.DataFrame.from_dict(tfm.compare_stats) , '\n')
seawater sediment biota
+Number of rows in dfs 21216 39817 15827
+Number of rows in tfm.dfs 21114 39531 15798
+Number of dropped rows 102 286 29
+Number of rows in tfm.dfs + Number of dropped rows 21216 39817 15827
+
+For instance, to inspect dropped rows:
++ | KEY | +NUCLIDE | +METHOD | +< VALUE_Bq/m³ | +VALUE_Bq/m³ | +ERROR%_m³ | +DATE_OF_ENTRY_x | +COUNTRY | +LABORATORY | +SEQUENCE | +... | +LONGITUDE (ddmmmm) | +LONGITUDE (dddddd) | +TDEPTH | +SDEPTH | +SALIN | +TTEMP | +FILT | +MORS_SUBBASIN | +HELCOM_SUBBASIN | +DATE_OF_ENTRY_y | +
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
13439 | +WRISO2001025 | +CS137 | +RISO02 | +NaN | +NaN | +10.0 | +NaN | +26.0 | +RISO | +2001025.0 | +... | +10.500 | +10.833333 | +22.0 | +20.0 | +0.00 | +NaN | +N | +5.0 | +5.0 | +NaN | +
14017 | +WLEPA2002001 | +CS134 | +LEPA02 | +< | +NaN | +NaN | +NaN | +93.0 | +LEPA | +2002001.0 | +... | +21.030 | +21.050000 | +16.0 | +0.0 | +3.77 | +14.40 | +N | +4.0 | +9.0 | +NaN | +
14020 | +WLEPA2002002 | +CS134 | +LEPA02 | +< | +NaN | +NaN | +NaN | +93.0 | +LEPA | +2002004.0 | +... | +20.574 | +20.956667 | +14.0 | +0.0 | +6.57 | +11.95 | +N | +4.0 | +9.0 | +NaN | +
14023 | +WLEPA2002003 | +CS134 | +LEPA02 | +< | +NaN | +NaN | +NaN | +93.0 | +LEPA | +2002007.0 | +... | +19.236 | +19.393333 | +73.0 | +0.0 | +7.00 | +9.19 | +N | +4.0 | +9.0 | +NaN | +
14026 | +WLEPA2002004 | +CS134 | +LEPA02 | +< | +NaN | +NaN | +NaN | +93.0 | +LEPA | +2002010.0 | +... | +20.205 | +20.341700 | +47.0 | +0.0 | +7.06 | +8.65 | +N | +4.0 | +9.0 | +NaN | +
5 rows × 27 columns
+++Column names are standardized to MARIS NetCDF format (i.e. PEP8 ).
+
+++get_common_rules (vars:dict, encoding_type:str)
Get common renaming rules for NetCDF and OpenRefine.
++ | Type | +Details | +
---|---|---|
vars | +dict | +Configuration dictionary | +
encoding_type | +str | +Encoding type (netcdf or openrefine ) |
+
Returns | +dict | +Common renaming rules for NetCDF and OpenRefine. | +
def get_common_rules(
+ vars: dict, # Configuration dictionary
+ encoding_type: str # Encoding type (`netcdf` or `openrefine`)
+ ) -> dict: # Common renaming rules for NetCDF and OpenRefine.
+ "Get common renaming rules for NetCDF and OpenRefine."
+ common = {
+ 'lat': 'latitude' if encoding_type == 'openrefine' else vars['defaults']['lat']['name'],
+ 'lon': 'longitude' if encoding_type == 'openrefine' else vars['defaults']['lon']['name'],
+ 'time': 'begperiod' if encoding_type == 'openrefine' else vars['defaults']['time']['name'],
+ 'NUCLIDE': 'nuclide_id' if encoding_type == 'openrefine' else 'nuclide',
+ 'detection_limit': 'detection' if encoding_type == 'openrefine' else vars['suffixes']['detection_limit']['name'],
+ 'unit': 'unit_id' if encoding_type == 'openrefine' else vars['suffixes']['unit']['name'],
+ 'value': 'activity' if encoding_type == 'openrefine' else 'value',
+ 'uncertainty': 'uncertaint' if encoding_type == 'openrefine' else vars['suffixes']['uncertainty']['name'],
+ 'SDEPTH': 'sampdepth' if encoding_type == 'openrefine' else vars['defaults']['smp_depth']['name'],
+ 'TDEPTH': 'totdepth' if encoding_type == 'openrefine' else vars['defaults']['tot_depth']['name'],
+ }
+
+ if encoding_type == 'openrefine':
+ common.update({
+ 'samptype_id': 'samptype_id',
+ 'station': 'station',
+ 'samplabcode': 'samplabcode',
+ 'SALIN': 'salinity',
+ 'TTEMP': 'temperatur',
+ 'FILT': 'filtered',
+ 'measurenote': 'measurenote'
+ })
+ else:
+ common.update({
+ 'counting_method': vars['suffixes']['counting_method']['name'],
+ 'sampling_method': vars['suffixes']['sampling_method']['name'],
+ 'preparation_method': vars['suffixes']['preparation_method']['name'],
+ 'SALIN': vars['suffixes']['salinity']['name'],
+ 'TTEMP': vars['suffixes']['temperature']['name'],
+ })
+
+ return common
+++get_specific_rules (vars:dict, encoding_type:str)
Get specific renaming rules for NetCDF and OpenRefine.
++ | Type | +Details | +
---|---|---|
vars | +dict | +Configuration dictionary | +
encoding_type | +str | +Encoding type (netcdf or openrefine ) |
+
Returns | +dict | +Specific renaming rules for NetCDF and OpenRefine. | +
def get_specific_rules(
+ vars: dict, # Configuration dictionary
+ encoding_type: str # Encoding type (`netcdf` or `openrefine`)
+ ) -> dict: # Specific renaming rules for NetCDF and OpenRefine.
+ "Get specific renaming rules for NetCDF and OpenRefine."
+ if encoding_type == 'netcdf':
+ return {
+ 'biota': {
+ 'species': vars['bio']['species']['name'],
+ 'body_part': vars['bio']['body_part']['name'],
+ 'bio_group': vars['bio']['bio_group']['name']
+ },
+ 'sediment': {
+ 'sed_type': vars['sed']['sed_type']['name'],
+ }
+ }
+ elif encoding_type == 'openrefine':
+ return {
+ 'biota': {
+ 'species': 'species_id',
+ 'Taxonname': 'Taxonname',
+ 'TaxonRepName': 'TaxonRepName',
+ 'Taxonrank': 'Taxonrank',
+ 'TaxonDB': 'TaxonDB',
+ 'TaxonDBID': 'TaxonDBID',
+ 'TaxonDBURL': 'TaxonDBURL',
+ 'body_part': 'bodypar_id',
+ 'dry_wet_ratio': 'percentwt',
+ },
+ 'sediment': {
+ 'sed_type': 'sedtype_id',
+ 'top': 'sliceup',
+ 'bottom': 'slicedown',
+ 'SedRepName': 'SedRepName',
+ 'dry_wet_ratio': 'percentwt',
+ }
+ }
+++get_renaming_rules (encoding_type:str='netcdf')
Get renaming rules for NetCDF and OpenRefine.
++ | Type | +Default | +Details | +
---|---|---|---|
encoding_type | +str | +netcdf | +Encoding type (netcdf or openrefine ) |
+
Returns | +dict | ++ | Renaming rules for NetCDF and OpenRefine. | +
def get_renaming_rules(
+ encoding_type: str = 'netcdf' # Encoding type (`netcdf` or `openrefine`)
+ ) -> dict: # Renaming rules for NetCDF and OpenRefine.
+ "Get renaming rules for NetCDF and OpenRefine."
+ vars = cdl_cfg()['vars']
+
+ if encoding_type not in ['netcdf', 'openrefine']:
+ raise ValueError("Invalid encoding_type provided. Please use 'netcdf' or 'openrefine'.")
+
+ common_rules = get_common_rules(vars, encoding_type)
+ specific_rules = get_specific_rules(vars, encoding_type)
+
+ rules = defaultdict(dict)
+ for sample_type in ['seawater', 'biota', 'sediment']:
+ rules[sample_type] = common_rules.copy()
+ rules[sample_type].update(specific_rules.get(sample_type, {}))
+
+ return dict(rules)
+++SelectAndRenameColumnCB (fn_renaming_rules:Callable, + encoding_type:str='netcdf', verbose:bool=False)
Select and rename columns in a DataFrame based on renaming rules for a specified encoding type.
++ | Type | +Default | +Details | +
---|---|---|---|
fn_renaming_rules | +Callable | ++ | A function that returns an OrderedDict of renaming rules | +
encoding_type | +str | +netcdf | +The encoding type (netcdf or openrefine ) to determine which renaming rules to use |
+
verbose | +bool | +False | +Whether to print out renaming rules that were not applied | +
class SelectAndRenameColumnCB(Callback):
+ "Select and rename columns in a DataFrame based on renaming rules for a specified encoding type."
+ def __init__(self,
+ fn_renaming_rules: Callable, # A function that returns an OrderedDict of renaming rules
+ encoding_type: str='netcdf', # The encoding type (`netcdf` or `openrefine`) to determine which renaming rules to use
+ verbose: bool=False # Whether to print out renaming rules that were not applied
+ ):
+ fc.store_attr()
+
+ def __call__(self, tfm: Transformer):
+ "Apply column selection and renaming to DataFrames in the transformer, and identify unused rules."
+ try:
+ renaming_rules = self.fn_renaming_rules(self.encoding_type)
+ except ValueError as e:
+ print(f"Error fetching renaming rules: {e}")
+ return
+
+ for group in tfm.dfs.keys():
+ # Get relevant renaming rules for the current group
+ group_rules = self._get_group_rules(renaming_rules, group)
+
+ if not group_rules:
+ continue
+
+ # Apply renaming rules and track keys not found in the DataFrame
+ df = tfm.dfs[group]
+ df, not_found_keys = self._apply_renaming(df, group_rules)
+ tfm.dfs[group] = df
+
+ # Print any renaming rules that were not used
+ if not_found_keys and self.verbose:
+ print(f"\nGroup '{group}' has the following renaming rules not applied:")
+ for old_col in not_found_keys:
+ print(f"Key '{old_col}' from renaming rules was not found in the DataFrame.")
+
+ def _get_group_rules(self,
+ renaming_rules: OrderedDict, # Renaming rules
+ group: str # Group name to filter rules
+ ) -> OrderedDict: # Renaming rules applicable to the specified group
+ "Retrieve and merge renaming rules for the specified group based on the encoding type."
+ relevant_rules = [rules for key, rules in renaming_rules.items() if group in key]
+ merged_rules = OrderedDict()
+ for rules in relevant_rules:
+ merged_rules.update(rules)
+ return merged_rules
+
+ def _apply_renaming(self,
+ df: pd.DataFrame, # DataFrame to modify
+ rename_rules: OrderedDict # Renaming rules
+ ) -> tuple: # (Renamed and filtered df, Column names from renaming rules that were not found in the DataFrame)
+ """
+ Select columns based on renaming rules and apply renaming, only for existing columns
+ while maintaining the order of the dictionary columns."""
+ existing_columns = set(df.columns)
+ valid_rules = OrderedDict((old_col, new_col) for old_col, new_col in rename_rules.items() if old_col in existing_columns)
+
+ # Create a list to maintain the order of columns
+ columns_to_keep = [col for col in rename_rules.keys() if col in existing_columns]
+ columns_to_keep += [new_col for old_col, new_col in valid_rules.items() if new_col in df.columns]
+
+ df = df[list(OrderedDict.fromkeys(columns_to_keep))]
+
+ # Apply renaming
+ df.rename(columns=valid_rules, inplace=True)
+
+ # Determine which keys were not found
+ not_found_keys = set(rename_rules.keys()) - existing_columns
+ return df, not_found_keys
dfs = load_data(fname_in)
+tfm = Transformer(dfs, cbs=[AddSampleTypeIdColumnCB(),
+ LowerStripNameCB(col_src='NUCLIDE'),
+ RemapNuclideNameCB(lut_nuclides),
+ AddNuclideIdColumnCB(col_value='NUCLIDE'),
+ ParseTimeCB(),
+ EncodeTimeCB(cfg()),
+ SanitizeValue(coi_val),
+ NormalizeUncCB(),
+ RemapBiotaSpeciesCB(lut_biota),
+ RemapBiotaBodyPartCB(lut_tissues),
+ RemapBiogroupCB(lut_biogroup),
+ RemapTaxonInformationCB(lut_taxon),
+ RemapSedimentCB(lut_sediments),
+ RemapUnitCB(),
+ RemapDetectionLimitCB(coi_dl, lut_dl),
+ RemapFiltCB(lut_filtered),
+ AddSampleLabCodeCB(),
+ AddMeasurementNoteCB(lut_method),
+ RemapStationIdCB(),
+ RemapSedSliceTopBottomCB(),
+ LookupDryWetRatio(),
+ ParseCoordinates(ddmm_to_dd),
+ SanitizeLonLatCB(),
+ CompareDfsAndTfmCB(dfs),
+ SelectAndRenameColumnCB(get_renaming_rules, encoding_type='netcdf'),
+ ])
+
+tfm()
+for grp in tfm.dfs.keys():
+ print(f'{grp} columns:')
+ print(tfm.dfs[grp].columns)
seawater columns:
+Index(['lat', 'lon', 'time', 'nuclide', '_dl', '_unit', 'value', '_unc',
+ 'smp_depth', 'tot_depth', '_sal', '_temp'],
+ dtype='object')
+sediment columns:
+Index(['lat', 'lon', 'time', 'nuclide', '_dl', '_unit', 'value', '_unc',
+ 'tot_depth', 'sed_type'],
+ dtype='object')
+biota columns:
+Index(['lat', 'lon', 'time', 'nuclide', '_dl', '_unit', 'value', '_unc',
+ 'smp_depth', 'species', 'body_part', 'bio_group'],
+ dtype='object')
+Convert data from long to wide and rename columns to comply with NetCDF format.
+dfs = load_data(fname_in)
+tfm = Transformer(dfs, cbs=[AddSampleTypeIdColumnCB(),
+ LowerStripNameCB(col_src='NUCLIDE'),
+ RemapNuclideNameCB(lut_nuclides),
+ AddNuclideIdColumnCB(col_value='NUCLIDE'),
+ ParseTimeCB(),
+ EncodeTimeCB(cfg()),
+ SanitizeValue(coi_val),
+ NormalizeUncCB(),
+ RemapCB(fn_lut=lut_biota, col_remap='species', col_src='RUBIN', dest_grps='biota'),
+ RemapCB(lut_tissues, 'body_part', 'TISSUE', 'biota'),
+ RemapCB(lut_biogroup, 'bio_group', 'species', 'biota'),
+ RemapTaxonInformationCB(lut_taxon),
+ RemapSedimentCB(lut_sediments),
+ RemapUnitCB(),
+ RemapDetectionLimitCB(coi_dl, lut_dl),
+ RemapFiltCB(lut_filtered),
+ AddSampleLabCodeCB(),
+ AddMeasurementNoteCB(lut_method),
+ RemapStationIdCB(),
+ RemapSedSliceTopBottomCB(),
+ LookupDryWetRatio(),
+ ParseCoordinates(ddmm_to_dd),
+ SanitizeLonLatCB(),
+ SelectAndRenameColumnCB(get_renaming_rules, encoding_type='netcdf'),
+ ReshapeLongToWide()
+ ])
+
+tfm()
+for grp in tfm.dfs.keys():
+ print(f'{grp} columns:')
+ print(tfm.dfs[grp].columns)
seawater columns:
+Index(['smp_depth', 'tot_depth', 'lon', 'time', 'lat', 'ag110m_dl', 'am241_dl',
+ 'ba140_dl', 'ce144_dl', 'cm242_dl',
+ ...
+ 'pu240', 'ru103', 'ru106', 'sb125', 'sr89', 'sr90', 'tc99', 'u234',
+ 'u238', 'zr95'],
+ dtype='object', length=175)
+sediment columns:
+Index(['tot_depth', 'lon', 'sed_type', 'time', 'lat', 'ac228_dl', 'ag110m_dl',
+ 'am241_dl', 'ba140_dl', 'be7_dl',
+ ...
+ 'sb124', 'sb125', 'sr90', 'th228', 'th232', 'th234', 'tl208', 'u235',
+ 'zn65', 'zr95'],
+ dtype='object', length=177)
+biota columns:
+Index(['body_part', 'smp_depth', 'lon', 'time', 'lat', 'bio_group', 'species',
+ 'ac228_dl', 'ag108m_dl', 'ag110m_dl',
+ ...
+ 'sr89', 'sr90', 'tc99', 'te129m', 'th228', 'th232', 'tl208', 'u235',
+ 'zn65', 'zr95'],
+ dtype='object', length=211)
+dfs = load_data(fname_in)
+
+tfm = Transformer(dfs, cbs=[AddSampleTypeIdColumnCB(),
+ LowerStripNameCB(col_src='NUCLIDE'),
+ RemapNuclideNameCB(lut_nuclides),
+ AddNuclideIdColumnCB(col_value='NUCLIDE'),
+ ParseTimeCB(),
+ EncodeTimeCB(cfg()),
+ SanitizeValue(coi_val),
+ NormalizeUncCB(),
+ RemapCB(fn_lut=lut_biota, col_remap='species', col_src='RUBIN', dest_grps='biota'),
+ RemapCB(lut_tissues, 'body_part', 'TISSUE', 'biota'),
+ RemapCB(lut_biogroup, 'bio_group', 'species', 'biota'),
+ RemapTaxonInformationCB(lut_taxon),
+ RemapSedimentCB(lut_sediments),
+ RemapUnitCB(),
+ RemapDetectionLimitCB(coi_dl, lut_dl),
+ RemapFiltCB(lut_filtered),
+ AddSampleLabCodeCB(),
+ AddMeasurementNoteCB(lut_method),
+ RemapStationIdCB(),
+ RemapSedSliceTopBottomCB(),
+ LookupDryWetRatio(),
+ ParseCoordinates(ddmm_to_dd),
+ SanitizeLonLatCB(),
+ SelectAndRenameColumnCB(get_renaming_rules, encoding_type='netcdf'),
+ ReshapeLongToWide()
+ ])
+
+tfm()
+tfm.logs
["Convert values from 'NUCLIDE' to lowercase, strip spaces, and store in 'None'.",
+ 'Parse and standardize time information in the dataframe.',
+ 'Encode time as `int` representing seconds since xxx',
+ 'Sanitize value/measurement by removing blank entries and populating `value` column.',
+ 'Convert from relative error % to uncertainty of activity unit.',
+ "Remap values from 'RUBIN' to 'species' for groups: b, i, o, t, a.",
+ "Remap values from 'TISSUE' to 'body_part' for groups: b, i, o, t, a.",
+ "Remap values from 'species' to 'bio_group' for groups: b, i, o, t, a.",
+ 'Update taxon information based on MARIS species LUT.',
+ 'Update sediment id based on MARIS species LUT (dbo_sedtype.xlsx).',
+ 'Set the `unit` id column in the DataFrames based on a lookup table.',
+ 'Remap value type to MARIS format.',
+ 'Lookup FILT value in dataframe using the lookup table.',
+ 'Remap `KEY` column to `samplabcode` in each DataFrame.',
+ "Record measurement notes by adding a 'measurenote' column to DataFrames.",
+ 'Remap Station ID to MARIS format.',
+ 'Remap Sediment slice top and bottom to MARIS format.',
+ 'Lookup dry-wet ratio and format for MARIS.',
+ '\n Get geographical coordinates from columns expressed in degrees decimal format \n or from columns in degrees/minutes decimal format where degrees decimal format is missing.\n ',
+ 'Drop row when both longitude & latitude equal 0. Drop unrealistic longitude & latitude values. Convert longitude & latitude `,` separator to `.` separator.',
+ 'Select and rename columns in a DataFrame based on renaming rules for a specified encoding type.']
++++get_attrs (tfm:marisco.callbacks.Transformer, zotero_key:str, + kw:list=['oceanography', 'Earth Science > Oceans > Ocean + Chemistry> Radionuclides', 'Earth Science > Human Dimensions > + Environmental Impacts > Nuclear Radiation Exposure', 'Earth + Science > Oceans > Ocean Chemistry > Ocean Tracers, Earth + Science > Oceans > Marine Sediments', 'Earth Science > Oceans + > Ocean Chemistry, Earth Science > Oceans > Sea Ice > + Isotopes', 'Earth Science > Oceans > Water Quality > Ocean + Contaminants', 'Earth Science > Biological Classification > + Animals/Vertebrates > Fish', 'Earth Science > Biosphere > + Ecosystems > Marine Ecosystems', 'Earth Science > Biological + Classification > Animals/Invertebrates > Mollusks', 'Earth + Science > Biological Classification > Animals/Invertebrates > + Arthropods > Crustaceans', 'Earth Science > Biological + Classification > Plants > Macroalgae (Seaweeds)'])
Retrieve all global attributes.
++ | Type | +Default | +Details | +
---|---|---|---|
tfm | +Transformer | ++ | Transformer object | +
zotero_key | +str | ++ | Zotero dataset record key | +
kw | +list | +[‘oceanography’, ‘Earth Science > Oceans > Ocean Chemistry> Radionuclides’, ‘Earth Science > Human Dimensions > Environmental Impacts > Nuclear Radiation Exposure’, ‘Earth Science > Oceans > Ocean Chemistry > Ocean Tracers, Earth Science > Oceans > Marine Sediments’, ‘Earth Science > Oceans > Ocean Chemistry, Earth Science > Oceans > Sea Ice > Isotopes’, ‘Earth Science > Oceans > Water Quality > Ocean Contaminants’, ‘Earth Science > Biological Classification > Animals/Vertebrates > Fish’, ‘Earth Science > Biosphere > Ecosystems > Marine Ecosystems’, ‘Earth Science > Biological Classification > Animals/Invertebrates > Mollusks’, ‘Earth Science > Biological Classification > Animals/Invertebrates > Arthropods > Crustaceans’, ‘Earth Science > Biological Classification > Plants > Macroalgae (Seaweeds)’] | +List of keywords | +
Returns | +dict | ++ | Global attributes | +
def get_attrs(
+ tfm: Transformer, # Transformer object
+ zotero_key: str, # Zotero dataset record key
+ kw: list = kw # List of keywords
+ ) -> dict: # Global attributes
+ "Retrieve all global attributes."
+ return GlobAttrsFeeder(tfm.dfs, cbs=[
+ BboxCB(),
+ DepthRangeCB(),
+ TimeRangeCB(cfg()),
+ ZoteroCB(zotero_key, cfg=cfg()),
+ KeyValuePairCB('keywords', ', '.join(kw)),
+ KeyValuePairCB('publisher_postprocess_logs', ', '.join(tfm.logs))
+ ])()
{'geospatial_lat_min': '31.17',
+ 'geospatial_lat_max': '65.75',
+ 'geospatial_lon_min': '9.6333',
+ 'geospatial_lon_max': '53.5',
+ 'geospatial_bounds': 'POLYGON ((9.6333 53.5, 31.17 53.5, 31.17 65.75, 9.6333 65.75, 9.6333 53.5))',
+ 'time_coverage_start': '1984-01-10T00:00:00',
+ 'time_coverage_end': '2021-12-15T00:00:00',
+ 'title': 'Environmental database - Helsinki Commission Monitoring of Radioactive Substances',
+ 'summary': 'MORS Environment database has been used to collate data resulting from monitoring of environmental radioactivity in the Baltic Sea based on HELCOM Recommendation 26/3.\n\nThe database is structured according to HELCOM Guidelines on Monitoring of Radioactive Substances (https://www.helcom.fi/wp-content/uploads/2019/08/Guidelines-for-Monitoring-of-Radioactive-Substances.pdf), which specifies reporting format, database structure, data types and obligatory parameters used for reporting data under Recommendation 26/3.\n\nThe database is updated and quality assured annually by HELCOM MORS EG.',
+ 'creator_name': '[{"creatorType": "author", "name": "HELCOM MORS"}]',
+ 'keywords': 'oceanography, Earth Science > Oceans > Ocean Chemistry> Radionuclides, Earth Science > Human Dimensions > Environmental Impacts > Nuclear Radiation Exposure, Earth Science > Oceans > Ocean Chemistry > Ocean Tracers, Earth Science > Oceans > Marine Sediments, Earth Science > Oceans > Ocean Chemistry, Earth Science > Oceans > Sea Ice > Isotopes, Earth Science > Oceans > Water Quality > Ocean Contaminants, Earth Science > Biological Classification > Animals/Vertebrates > Fish, Earth Science > Biosphere > Ecosystems > Marine Ecosystems, Earth Science > Biological Classification > Animals/Invertebrates > Mollusks, Earth Science > Biological Classification > Animals/Invertebrates > Arthropods > Crustaceans, Earth Science > Biological Classification > Plants > Macroalgae (Seaweeds)',
+ 'publisher_postprocess_logs': "Convert values from 'NUCLIDE' to lowercase, strip spaces, and store in 'None'., Parse and standardize time information in the dataframe., Encode time as `int` representing seconds since xxx, Sanitize value/measurement by removing blank entries and populating `value` column., Convert from relative error % to uncertainty of activity unit., Remap values from 'RUBIN' to 'species' for groups: b, i, o, t, a., Remap values from 'TISSUE' to 'body_part' for groups: b, i, o, t, a., Remap values from 'species' to 'bio_group' for groups: b, i, o, t, a., Update taxon information based on MARIS species LUT., Update sediment id based on MARIS species LUT (dbo_sedtype.xlsx)., Set the `unit` id column in the DataFrames based on a lookup table., Remap value type to MARIS format., Lookup FILT value in dataframe using the lookup table., Remap `KEY` column to `samplabcode` in each DataFrame., Record measurement notes by adding a 'measurenote' column to DataFrames., Remap Station ID to MARIS format., Remap Sediment slice top and bottom to MARIS format., Lookup dry-wet ratio and format for MARIS., \n Get geographical coordinates from columns expressed in degrees decimal format \n or from columns in degrees/minutes decimal format where degrees decimal format is missing.\n , Drop row when both longitude & latitude equal 0. Drop unrealistic longitude & latitude values. Convert longitude & latitude `,` separator to `.` separator., Select and rename columns in a DataFrame based on renaming rules for a specified encoding type."}
++++enums_xtra (tfm:marisco.callbacks.Transformer, vars:list)
Retrieve a subset of the lengthy enum as species_t
for instance.
+ | Type | +Details | +
---|---|---|
tfm | +Transformer | +Transformer object | +
vars | +list | +List of variables to extract from the transformer | +
def enums_xtra(
+ tfm: Transformer, # Transformer object
+ vars: list # List of variables to extract from the transformer
+ ):
+ "Retrieve a subset of the lengthy enum as `species_t` for instance."
+ enums = Enums(lut_src_dir=lut_path(), cdl_enums=cdl_cfg()['enums'])
+ xtras = {}
+ for var in vars:
+ unique_vals = tfm.unique(var)
+ if unique_vals.any():
+ xtras[f'{var}_t'] = enums.filter(f'{var}_t', unique_vals)
+ return xtras
+++encode (fname_in:str, fname_out_nc:str, nc_tpl_path:str, **kwargs)
Encode data to NetCDF.
++ | Type | +Details | +
---|---|---|
fname_in | +str | +Input file name | +
fname_out_nc | +str | +Output file name | +
nc_tpl_path | +str | +NetCDF template file name | +
kwargs | ++ | + |
Returns | +None | +Additional arguments | +
def encode(
+ fname_in: str, # Input file name
+ fname_out_nc: str, # Output file name
+ nc_tpl_path: str, # NetCDF template file name
+ **kwargs # Additional arguments
+ ) -> None:
+ "Encode data to NetCDF."
+ dfs = load_data(fname_in)
+ tfm = Transformer(dfs, cbs=[AddSampleTypeIdColumnCB(),
+ LowerStripNameCB(col_src='NUCLIDE'),
+ RemapNuclideNameCB(lut_nuclides),
+ AddNuclideIdColumnCB(col_value='NUCLIDE'),
+ ParseTimeCB(),
+ EncodeTimeCB(cfg()),
+ SanitizeValue(coi_val),
+ NormalizeUncCB(),
+ RemapCB(fn_lut=lut_biota, col_remap='species', col_src='RUBIN', dest_grps='biota'),
+ RemapCB(lut_tissues, 'body_part', 'TISSUE', 'biota'),
+ RemapCB(lut_biogroup, 'bio_group', 'species', 'biota'),
+ RemapTaxonInformationCB(lut_taxon),
+ RemapSedimentCB(lut_sediments),
+ RemapUnitCB(),
+ RemapDetectionLimitCB(coi_dl, lut_dl),
+ RemapFiltCB(lut_filtered),
+ AddSampleLabCodeCB(),
+ AddMeasurementNoteCB(lut_method),
+ RemapStationIdCB(),
+ RemapSedSliceTopBottomCB(),
+ LookupDryWetRatio(),
+ ParseCoordinates(ddmm_to_dd),
+ SanitizeLonLatCB(),
+ SelectAndRenameColumnCB(get_renaming_rules, encoding_type='netcdf'),
+ ReshapeLongToWide()
+ ])
+ tfm()
+ encoder = NetCDFEncoder(tfm.dfs,
+ src_fname=nc_tpl_path,
+ dest_fname=fname_out_nc,
+ global_attrs=get_attrs(tfm, zotero_key=zotero_key, kw=kw),
+ verbose=kwargs.get('verbose', False),
+ enums_xtra=enums_xtra(tfm, vars=['species', 'body_part'])
+ )
+ encoder.encode()
dfs = load_data(fname_in)
+tfm = Transformer(dfs, cbs=[
+ AddSampleTypeIdColumnCB(),
+ LowerStripNameCB(col_src='NUCLIDE'),
+ RemapNuclideNameCB(lut_nuclides),
+ AddNuclideIdColumnCB(col_value='NUCLIDE'),
+ ParseTimeCB(),
+ EncodeTimeCB(cfg()),
+ SanitizeValue(coi_val),
+ NormalizeUncCB(),
+ RemapCB(fn_lut=lut_biota, col_remap='species', col_src='RUBIN', dest_grps='biota'),
+ RemapCB(lut_tissues, 'body_part', 'TISSUE', 'biota'),
+ RemapCB(lut_biogroup, 'bio_group', 'species', 'biota'),
+ RemapTaxonInformationCB(lut_taxon),
+ RemapSedimentCB(lut_sediments),
+ RemapUnitCB(),
+ RemapDetectionLimitCB(coi_dl, lut_dl),
+ RemapFiltCB(lut_filtered),
+ AddSampleLabCodeCB(),
+ AddMeasurementNoteCB(lut_method),
+ RemapStationIdCB(),
+ RemapSedSliceTopBottomCB(),
+ LookupDryWetRatio(),
+ ParseCoordinates(ddmm_to_dd),
+ SanitizeLonLatCB(),
+ SelectAndRenameColumnCB(get_renaming_rules, encoding_type='openrefine', verbose=True),
+ CompareDfsAndTfmCB(dfs)
+ ])
+
+tfm()
+print(pd.DataFrame.from_dict(tfm.compare_stats) , '\n')
+Group 'seawater' has the following renaming rules not applied:
+Key 'measurenote' from renaming rules was not found in the DataFrame.
+
+Group 'sediment' has the following renaming rules not applied:
+Key 'SDEPTH' from renaming rules was not found in the DataFrame.
+Key 'measurenote' from renaming rules was not found in the DataFrame.
+Key 'TTEMP' from renaming rules was not found in the DataFrame.
+Key 'FILT' from renaming rules was not found in the DataFrame.
+Key 'SALIN' from renaming rules was not found in the DataFrame.
+
+Group 'biota' has the following renaming rules not applied:
+Key 'TDEPTH' from renaming rules was not found in the DataFrame.
+Key 'measurenote' from renaming rules was not found in the DataFrame.
+Key 'TTEMP' from renaming rules was not found in the DataFrame.
+Key 'FILT' from renaming rules was not found in the DataFrame.
+Key 'SALIN' from renaming rules was not found in the DataFrame.
+ seawater sediment biota
+Number of rows in dfs 21216 39817 15827
+Number of rows in tfm.dfs 21114 39531 15798
+Number of dropped rows 102 286 29
+Number of rows in tfm.dfs + Number of dropped rows 21216 39817 15827
+
+Example of data included in dfs_dropped.
+Main reasons for data to be dropped from dfs: - No activity value reported (e.g. VALUE_Bq/kg) - No time value reported.
++ | KEY | +NUCLIDE | +METHOD | +< VALUE_Bq/kg | +VALUE_Bq/kg | +ERROR%_kg | +< VALUE_Bq/m² | +VALUE_Bq/m² | +ERROR%_m² | +DATE_OF_ENTRY_x | +... | +LOWSLI | +AREA | +SEDI | +OXIC | +DW% | +LOI% | +MORS_SUBBASIN | +HELCOM_SUBBASIN | +SUM_LINK | +DATE_OF_ENTRY_y | +
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
11784 | +SLREB1998021 | +SR90 | +2 | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +... | +12.0 | +0.02100 | +55.0 | +O | +NaN | +NaN | +14.0 | +14.0 | +a | +NaN | +
11824 | +SLVDC1997023 | +CS137 | +1 | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +... | +14.0 | +0.02100 | +55.0 | +O | +NaN | +NaN | +9.0 | +9.0 | +a | +NaN | +
11832 | +SLVDC1997031 | +CS137 | +1 | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +... | +14.0 | +0.02100 | +55.0 | +O | +NaN | +NaN | +9.0 | +9.0 | +a | +NaN | +
11841 | +SLVDC1997040 | +CS137 | +1 | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +... | +16.0 | +0.02100 | +55.0 | +O | +NaN | +NaN | +9.0 | +9.0 | +a | +NaN | +
11849 | +SLVDC1998011 | +CS137 | +1 | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +... | +16.0 | +0.02100 | +55.0 | +O | +NaN | +NaN | +14.0 | +14.0 | +a | +NaN | +
... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +
39769 | +SSSSM2021030 | +CO60 | +SSSM43 | +< | +NaN | +NaN | +< | +NaN | +NaN | +09/06/22 00:00:00 | +... | +2.0 | +0.01608 | +NaN | +NaN | +28.200000 | +15.0 | +12.0 | +12.0 | +NaN | +09/06/22 00:00:00 | +
39774 | +SSSSM2021030 | +RA226 | +SSSM43 | +< | +NaN | +NaN | +< | +NaN | +NaN | +09/06/22 00:00:00 | +... | +2.0 | +0.01608 | +NaN | +NaN | +28.200000 | +15.0 | +12.0 | +12.0 | +NaN | +09/06/22 00:00:00 | +
39775 | +SSSSM2021030 | +RA223 | +SSSM43 | +< | +NaN | +NaN | +< | +NaN | +NaN | +09/06/22 00:00:00 | +... | +2.0 | +0.01608 | +NaN | +NaN | +28.200000 | +15.0 | +12.0 | +12.0 | +NaN | +09/06/22 00:00:00 | +
39777 | +SSSSM2021031 | +CS137 | +SSSM43 | +< | +NaN | +NaN | +< | +0.0 | +NaN | +09/06/22 00:00:00 | +... | +2.0 | +0.01608 | +NaN | +NaN | +31.993243 | +NaN | +13.0 | +13.0 | +NaN | +09/06/22 00:00:00 | +
39779 | +SSSSM2021031 | +CO60 | +SSSM43 | +< | +NaN | +NaN | +< | +NaN | +NaN | +09/06/22 00:00:00 | +... | +2.0 | +0.01608 | +NaN | +NaN | +31.993243 | +NaN | +13.0 | +13.0 | +NaN | +09/06/22 00:00:00 | +
286 rows × 35 columns
+def encode_or(
+ fname_in: str, # Input file name
+ fname_out_csv: str, # Output file name
+ ref_id: str, # Reference ID as defined in MARIS master DB
+ **kwargs # Additional arguments
+ ) -> None:
+ "Encode data to Open Refine CSV."
+ dfs = load_data(fname_in)
+ tfm = Transformer(dfs, cbs=[
+ AddSampleTypeIdColumnCB(),
+ LowerStripNameCB(col_src='NUCLIDE'),
+ RemapNuclideNameCB(lut_nuclides),
+ AddNuclideIdColumnCB(col_value='NUCLIDE'),
+ ParseTimeCB(),
+ EncodeTimeCB(cfg()),
+ SanitizeValue(coi_val),
+ NormalizeUncCB(),
+ RemapCB(fn_lut=lut_biota, col_remap='species', col_src='RUBIN', dest_grps='biota'),
+ RemapCB(lut_tissues, 'body_part', 'TISSUE', 'biota'),
+ RemapCB(lut_biogroup, 'bio_group', 'species', 'biota'),
+ RemapTaxonInformationCB(lut_taxon),
+ RemapSedimentCB(lut_sediments),
+ RemapUnitCB(),
+ RemapDetectionLimitCB(coi_dl, lut_dl),
+ RemapFiltCB(lut_filtered),
+ AddSampleLabCodeCB(),
+ AddMeasurementNoteCB(lut_method),
+ RemapStationIdCB(),
+ RemapSedSliceTopBottomCB(),
+ LookupDryWetRatio(),
+ ParseCoordinates(ddmm_to_dd),
+ SanitizeLonLatCB(),
+ SelectAndRenameColumnCB(get_renaming_rules, encoding_type='openrefine', verbose=True),
+ CompareDfsAndTfmCB(dfs)
+ ])
+
+ tfm()
+
+ encoder = OpenRefineCsvEncoder(tfm.dfs,
+ dest_fname=fname_out_csv,
+ ref_id = ref_id,
+ verbose = True
+ )
+ encoder.encode()
Field name | +Full name | +HELCOM | +
---|---|---|
sampquality | +Sample quality | +N | +
lab_id | +Laboratory ID | +N | +
profile_id | +Profile ID | +N | +
transect_id | +Transect ID | +N | +
endperiod | +End period | +N | +
vartype | +Variable type | +N | +
freq | +Frequency | +N | +
rl_detection | +Range low detection | +N | +
rangelow | +Range low | +N | +
rangeupp | +Range upper | +N | +
Commonname | +Common name | +N | +
volume | +Volume | +N | +
filtpore | +Filter pore | +N | +
acid | +Acidified | +N | +
oxygen | +Oxygen | +N | +
samparea | +Sample area | +N | +
drywt | +Dry weight | +N | +
wetwt | +Wet weight | +N | +
sampmet_id | +Sampling method ID | +N | +
drymet_id | +Drying method ID | +N | +
prepmet_id | +Preparation method ID | +N | +
counmet_id | +Counting method ID | +N | +
refnote | +Reference note | +N | +
sampnote | +Sample note | +N | +
gfe | +Good for export | +? | +
TODO:
+TODO: Include FILT for NetCDF
+TODO: Check sediment ‘DW%’ data that is less than 1%. Is this realistic? Check the ‘DW%’ data that is 0%. Run below before SelectAndRenameColumnCB.
+dfs = load_data(fname_in)
+tfm = Transformer(dfs, cbs=[LowerStripRdnNameCB(col_src='NUCLIDE'),
+ ])
+tfm()
{'seawater': KEY NUCLIDE METHOD < VALUE_Bq/m³ VALUE_Bq/m³ ERROR%_m³ \
+ 0 WKRIL2012003 cs137 NaN NaN 5.3 32.000000
+ 1 WKRIL2012004 cs137 NaN NaN 19.9 20.000000
+ 2 WKRIL2012005 cs137 NaN NaN 25.5 20.000000
+ 3 WKRIL2012006 cs137 NaN NaN 17.0 29.000000
+ 4 WKRIL2012007 cs137 NaN NaN 22.2 18.000000
+ ... ... ... ... ... ... ...
+ 21211 WSSSM2021005 h3 SSM45 NaN 1030.0 93.203883
+ 21212 WSSSM2021006 h3 SSM45 NaN 2240.0 43.303571
+ 21213 WSSSM2021007 h3 SSM45 NaN 2060.0 47.087379
+ 21214 WSSSM2021008 h3 SSM45 NaN 2300.0 43.478261
+ 21215 WSSSM2021004 h3 SSM45 < NaN NaN
+
+ DATE_OF_ENTRY_x COUNTRY LABORATORY SEQUENCE ... \
+ 0 08/20/14 00:00:00 90.0 KRIL 2012003.0 ...
+ 1 08/20/14 00:00:00 90.0 KRIL 2012004.0 ...
+ 2 08/20/14 00:00:00 90.0 KRIL 2012005.0 ...
+ 3 08/20/14 00:00:00 90.0 KRIL 2012006.0 ...
+ 4 08/20/14 00:00:00 90.0 KRIL 2012007.0 ...
+ ... ... ... ... ... ...
+ 21211 09/06/22 00:00:00 77.0 SSSM 202105.0 ...
+ 21212 09/06/22 00:00:00 77.0 SSSM 202106.0 ...
+ 21213 09/06/22 00:00:00 77.0 SSSM 202107.0 ...
+ 21214 09/06/22 00:00:00 77.0 SSSM 202108.0 ...
+ 21215 09/06/22 00:00:00 77.0 SSSM 202104.0 ...
+
+ LONGITUDE (ddmmmm) LONGITUDE (dddddd) TDEPTH SDEPTH SALIN TTEMP \
+ 0 29.2000 29.3333 NaN 0.0 NaN NaN
+ 1 29.2000 29.3333 NaN 29.0 NaN NaN
+ 2 23.0900 23.1500 NaN 0.0 NaN NaN
+ 3 27.5900 27.9833 NaN 0.0 NaN NaN
+ 4 27.5900 27.9833 NaN 39.0 NaN NaN
+ ... ... ... ... ... ... ...
+ 21211 18.2143 18.3572 NaN 1.0 NaN NaN
+ 21212 17.0000 17.0000 NaN 1.0 NaN NaN
+ 21213 11.5671 11.9452 NaN 1.0 NaN NaN
+ 21214 11.5671 11.9452 NaN 1.0 NaN NaN
+ 21215 11.1470 11.2450 NaN 1.0 NaN NaN
+
+ FILT MORS_SUBBASIN HELCOM_SUBBASIN DATE_OF_ENTRY_y
+ 0 NaN 11.0 11.0 08/20/14 00:00:00
+ 1 NaN 11.0 11.0 08/20/14 00:00:00
+ 2 NaN 11.0 3.0 08/20/14 00:00:00
+ 3 NaN 11.0 11.0 08/20/14 00:00:00
+ 4 NaN 11.0 11.0 08/20/14 00:00:00
+ ... ... ... ... ...
+ 21211 N 1.0 8.0 09/06/22 00:00:00
+ 21212 N 10.0 10.0 09/06/22 00:00:00
+ 21213 N 12.0 12.0 09/06/22 00:00:00
+ 21214 N 12.0 12.0 09/06/22 00:00:00
+ 21215 N 15.0 18.0 09/06/22 00:00:00
+
+ [21216 rows x 27 columns],
+ 'sediment': KEY NUCLIDE METHOD < VALUE_Bq/kg VALUE_Bq/kg ERROR%_kg \
+ 0 SKRIL2012048 ra226 NaN NaN 35.0 26.00
+ 1 SKRIL2012049 ra226 NaN NaN 36.0 22.00
+ 2 SKRIL2012050 ra226 NaN NaN 38.0 24.00
+ 3 SKRIL2012051 ra226 NaN NaN 36.0 25.00
+ 4 SKRIL2012052 ra226 NaN NaN 30.0 23.00
+ ... ... ... ... ... ... ...
+ 39812 SSSSM2020029 ac228 SSSM43 NaN 37.5 5.00
+ 39813 SSSSM2020030 k40 SSSM43 NaN 526.0 1.72
+ 39814 SSSSM2020030 cs137 SSSM43 NaN 17.2 2.21
+ 39815 SSSSM2020031 k40 SSSM43 NaN 1000.0 1.80
+ 39816 SSSSM2020031 cs137 SSSM43 NaN 64.0 1.20
+
+ < VALUE_Bq/m² VALUE_Bq/m² ERROR%_m² DATE_OF_ENTRY_x ... LOWSLI \
+ 0 NaN NaN NaN 08/20/14 00:00:00 ... 20.0
+ 1 NaN NaN NaN 08/20/14 00:00:00 ... 27.0
+ 2 NaN NaN NaN 08/20/14 00:00:00 ... 2.0
+ 3 NaN NaN NaN 08/20/14 00:00:00 ... 4.0
+ 4 NaN NaN NaN 08/20/14 00:00:00 ... 6.0
+ ... ... ... ... ... ... ...
+ 39812 NaN 255.0 28.0 04/22/22 00:00:00 ... 2.0
+ 39813 NaN 5690.0 2.0 04/22/22 00:00:00 ... 2.0
+ 39814 NaN 186.0 2.0 04/22/22 00:00:00 ... 2.0
+ 39815 NaN 16000.0 2.0 04/22/22 00:00:00 ... 2.0
+ 39816 NaN 1020.0 1.0 04/22/22 00:00:00 ... 2.0
+
+ AREA SEDI OXIC DW% LOI% MORS_SUBBASIN HELCOM_SUBBASIN SUM_LINK \
+ 0 0.006 NaN NaN NaN NaN 11.0 11.0 NaN
+ 1 0.006 NaN NaN NaN NaN 11.0 11.0 NaN
+ 2 0.006 NaN NaN NaN NaN 11.0 11.0 NaN
+ 3 0.006 NaN NaN NaN NaN 11.0 11.0 NaN
+ 4 0.006 NaN NaN NaN NaN 11.0 11.0 NaN
+ ... ... ... ... ... ... ... ... ...
+ 39812 0.019 0.0 O 28.73 14.0 13.0 13.0 NaN
+ 39813 0.019 0.0 O 32.03 NaN 12.0 12.0 NaN
+ 39814 0.019 0.0 O 32.03 NaN 12.0 12.0 NaN
+ 39815 0.017 0.0 O 48.77 NaN 1.0 8.0 NaN
+ 39816 0.017 0.0 O 48.77 NaN 1.0 8.0 NaN
+
+ DATE_OF_ENTRY_y
+ 0 08/20/14 00:00:00
+ 1 08/20/14 00:00:00
+ 2 08/20/14 00:00:00
+ 3 08/20/14 00:00:00
+ 4 08/20/14 00:00:00
+ ... ...
+ 39812 04/22/22 00:00:00
+ 39813 04/22/22 00:00:00
+ 39814 04/22/22 00:00:00
+ 39815 04/22/22 00:00:00
+ 39816 04/22/22 00:00:00
+
+ [39817 rows x 35 columns],
+ 'biota': KEY NUCLIDE METHOD < VALUE_Bq/kg VALUE_Bq/kg BASIS ERROR% \
+ 0 BVTIG2012041 cs134 VTIG01 < 0.010140 W NaN
+ 1 BVTIG2012041 k40 VTIG01 135.300000 W 3.57
+ 2 BVTIG2012041 co60 VTIG01 < 0.013980 W NaN
+ 3 BVTIG2012041 cs137 VTIG01 4.338000 W 3.48
+ 4 BVTIG2012040 cs134 VTIG01 < 0.009614 W NaN
+ ... ... ... ... ... ... ... ...
+ 15822 BSSSM2020016 k40 SSSM42 NaN 65.000000 D 10.20
+ 15823 BSSSM2020016 cs137 SSSM42 NaN 4.500000 D 6.20
+ 15824 BSSSM2020017 be7 SSSM42 NaN 94.000000 D 3.40
+ 15825 BSSSM2020017 k40 SSSM42 NaN 1100.000000 D 1.60
+ 15826 BSSSM2020017 cs137 SSSM42 NaN 13.000000 D 2.50
+
+ NUMBER DATE_OF_ENTRY_x COUNTRY ... BIOTATYPE TISSUE NO \
+ 0 NaN 02/27/14 00:00:00 6.0 ... F 5 16.0
+ 1 NaN 02/27/14 00:00:00 6.0 ... F 5 16.0
+ 2 NaN 02/27/14 00:00:00 6.0 ... F 5 16.0
+ 3 NaN 02/27/14 00:00:00 6.0 ... F 5 16.0
+ 4 NaN 02/27/14 00:00:00 6.0 ... F 5 17.0
+ ... ... ... ... ... ... ... ...
+ 15822 NaN 04/22/22 00:00:00 77.0 ... B 41 319.0
+ 15823 NaN 04/22/22 00:00:00 77.0 ... B 41 319.0
+ 15824 NaN 04/22/22 00:00:00 77.0 ... P 51 NaN
+ 15825 NaN 04/22/22 00:00:00 77.0 ... P 51 NaN
+ 15826 NaN 04/22/22 00:00:00 77.0 ... P 51 NaN
+
+ LENGTH WEIGHT DW% LOI% MORS_SUBBASIN HELCOM_SUBBASIN \
+ 0 45.7 948.0 18.453 92.9 2.0 16
+ 1 45.7 948.0 18.453 92.9 2.0 16
+ 2 45.7 948.0 18.453 92.9 2.0 16
+ 3 45.7 948.0 18.453 92.9 2.0 16
+ 4 45.9 964.0 18.458 92.9 2.0 16
+ ... ... ... ... ... ... ...
+ 15822 NaN NaN 41.000 0.0 1.0 8
+ 15823 NaN NaN 41.000 0.0 1.0 8
+ 15824 NaN NaN 21.000 0.0 1.0 8
+ 15825 NaN NaN 21.000 0.0 1.0 8
+ 15826 NaN NaN 21.000 0.0 1.0 8
+
+ DATE_OF_ENTRY_y
+ 0 02/27/14 00:00:00
+ 1 02/27/14 00:00:00
+ 2 02/27/14 00:00:00
+ 3 02/27/14 00:00:00
+ 4 02/27/14 00:00:00
+ ... ...
+ 15822 04/22/22 00:00:00
+ 15823 04/22/22 00:00:00
+ 15824 04/22/22 00:00:00
+ 15825 04/22/22 00:00:00
+ 15826 04/22/22 00:00:00
+
+ [15827 rows x 33 columns]}
+grp='sediment'
+check_data_sediment=tfm.dfs[grp][(tfm.dfs[grp]['DW%'] < 1) & (tfm.dfs[grp]['DW%'] > 0.001) ]
+check_data_sediment
+ | KEY | +NUCLIDE | +METHOD | +< VALUE_Bq/kg | +VALUE_Bq/kg | +ERROR%_kg | +< VALUE_Bq/m² | +VALUE_Bq/m² | +ERROR%_m² | +DATE_OF_ENTRY_x | +... | +LOWSLI | +AREA | +SEDI | +OXIC | +DW% | +LOI% | +MORS_SUBBASIN | +HELCOM_SUBBASIN | +SUM_LINK | +DATE_OF_ENTRY_y | +
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
30938 | +SLVEA2010001 | +cs137 | +LVEA01 | +NaN | +334.25 | +1.57 | +NaN | +131.886 | +41179.0 | +NaN | +... | +2.0 | +0.0151 | +5.0 | +O | +0.115 | +0.9 | +14.0 | +14.0 | +NaN | +11/11/11 00:00:00 | +
30939 | +SLVEA2010002 | +cs137 | +LVEA01 | +NaN | +343.58 | +1.49 | +NaN | +132.092 | +41179.0 | +NaN | +... | +4.0 | +0.0151 | +5.0 | +A | +0.159 | +0.8 | +14.0 | +14.0 | +NaN | +11/11/11 00:00:00 | +
30940 | +SLVEA2010003 | +cs137 | +LVEA01 | +NaN | +334.69 | +1.56 | +NaN | +134.390 | +41179.0 | +NaN | +... | +6.0 | +0.0151 | +5.0 | +A | +0.189 | +0.8 | +14.0 | +14.0 | +NaN | +11/11/11 00:00:00 | +
30941 | +SLVEA2010004 | +cs137 | +LVEA01 | +NaN | +348.50 | +1.56 | +NaN | +136.699 | +41179.0 | +NaN | +... | +8.0 | +0.0151 | +5.0 | +A | +0.194 | +0.8 | +14.0 | +14.0 | +NaN | +11/11/11 00:00:00 | +
30942 | +SLVEA2010005 | +cs137 | +LVEA01 | +NaN | +258.67 | +1.73 | +NaN | +104.894 | +41179.0 | +NaN | +... | +10.0 | +0.0151 | +5.0 | +A | +0.195 | +0.8 | +14.0 | +14.0 | +NaN | +11/11/11 00:00:00 | +
30943 | +SLVEA2010006 | +cs137 | +LVEA01 | +NaN | +182.02 | +2.05 | +NaN | +77.523 | +41179.0 | +NaN | +... | +12.0 | +0.0151 | +5.0 | +A | +0.221 | +0.8 | +14.0 | +14.0 | +NaN | +11/11/11 00:00:00 | +
30944 | +SLVEA2010007 | +cs137 | +LVEA01 | +NaN | +116.34 | +2.79 | +NaN | +46.946 | +41179.0 | +NaN | +... | +14.0 | +0.0151 | +5.0 | +A | +0.238 | +0.8 | +14.0 | +14.0 | +NaN | +11/11/11 00:00:00 | +
30945 | +SLVEA2010008 | +cs137 | +LVEA01 | +NaN | +94.07 | +2.61 | +NaN | +38.162 | +41179.0 | +NaN | +... | +16.0 | +0.0151 | +5.0 | +A | +0.234 | +0.8 | +14.0 | +14.0 | +NaN | +11/11/11 00:00:00 | +
30946 | +SLVEA2010009 | +cs137 | +LVEA01 | +NaN | +69.70 | +3.12 | +NaN | +27.444 | +41179.0 | +NaN | +... | +18.0 | +0.0151 | +5.0 | +A | +0.242 | +0.8 | +14.0 | +14.0 | +NaN | +11/11/11 00:00:00 | +
30947 | +SLVEA2010010 | +cs137 | +LVEA01 | +NaN | +59.63 | +3.40 | +NaN | +24.220 | +41179.0 | +NaN | +... | +20.0 | +0.0151 | +5.0 | +A | +0.257 | +0.7 | +14.0 | +14.0 | +NaN | +11/11/11 00:00:00 | +
30948 | +SLVEA2010011 | +cs137 | +LVEA01 | +< | +12.24 | +3.88 | +< | +5.035 | +41179.0 | +NaN | +... | +22.0 | +0.0151 | +5.0 | +A | +0.264 | +0.7 | +14.0 | +14.0 | +NaN | +11/11/11 00:00:00 | +
30949 | +SLVEA2010012 | +cs137 | +LVEA01 | +< | +0.83 | +NaN | +< | +0.330 | +41179.0 | +NaN | +... | +24.0 | +0.0151 | +5.0 | +A | +0.244 | +0.8 | +14.0 | +14.0 | +NaN | +11/11/11 00:00:00 | +
30950 | +SLVEA2010013 | +cs137 | +LVEA01 | +NaN | +331.61 | +1.40 | +NaN | +125.566 | +41179.0 | +NaN | +... | +2.0 | +0.0151 | +5.0 | +O | +0.115 | +0.9 | +14.0 | +14.0 | +NaN | +11/11/11 00:00:00 | +
30951 | +SLVEA2010014 | +cs137 | +LVEA01 | +NaN | +352.06 | +1.33 | +NaN | +144.516 | +41179.0 | +NaN | +... | +4.0 | +0.0151 | +5.0 | +A | +0.164 | +0.8 | +14.0 | +14.0 | +NaN | +11/11/11 00:00:00 | +
30952 | +SLVEA2010015 | +cs137 | +LVEA01 | +NaN | +367.11 | +1.36 | +NaN | +139.434 | +41179.0 | +NaN | +... | +6.0 | +0.0151 | +5.0 | +A | +0.191 | +0.8 | +14.0 | +14.0 | +NaN | +11/11/11 00:00:00 | +
30953 | +SLVEA2010016 | +cs137 | +LVEA01 | +NaN | +328.97 | +1.42 | +NaN | +124.348 | +41179.0 | +NaN | +... | +8.0 | +0.0151 | +5.0 | +A | +0.188 | +0.8 | +14.0 | +14.0 | +NaN | +11/11/11 00:00:00 | +
30954 | +SLVEA2010017 | +cs137 | +LVEA01 | +NaN | +356.30 | +1.37 | +NaN | +135.447 | +41179.0 | +NaN | +... | +10.0 | +0.0151 | +5.0 | +A | +0.179 | +0.8 | +14.0 | +14.0 | +NaN | +11/11/11 00:00:00 | +
30955 | +SLVEA2010018 | +cs137 | +LVEA01 | +NaN | +314.75 | +1.42 | +NaN | +118.765 | +41179.0 | +NaN | +... | +12.0 | +0.0151 | +5.0 | +A | +0.186 | +0.8 | +14.0 | +14.0 | +NaN | +11/11/11 00:00:00 | +
30956 | +SLVEA2010019 | +cs137 | +LVEA01 | +NaN | +261.64 | +1.52 | +NaN | +104.580 | +41179.0 | +NaN | +... | +14.0 | +0.0151 | +5.0 | +A | +0.194 | +0.8 | +14.0 | +14.0 | +NaN | +11/11/11 00:00:00 | +
30957 | +SLVEA2010020 | +cs137 | +LVEA01 | +NaN | +181.00 | +1.76 | +NaN | +74.058 | +41179.0 | +NaN | +... | +16.0 | +0.0151 | +5.0 | +A | +0.209 | +0.8 | +14.0 | +14.0 | +NaN | +11/11/11 00:00:00 | +
30958 | +SLVEA2010021 | +cs137 | +LVEA01 | +NaN | +143.65 | +2.02 | +NaN | +57.680 | +41179.0 | +NaN | +... | +18.0 | +0.0151 | +5.0 | +A | +0.214 | +0.8 | +14.0 | +14.0 | +NaN | +11/11/11 00:00:00 | +
30959 | +SLVEA2010022 | +cs137 | +LVEA01 | +NaN | +109.36 | +2.15 | +NaN | +42.153 | +41179.0 | +NaN | +... | +20.0 | +0.0151 | +5.0 | +A | +0.218 | +0.8 | +14.0 | +14.0 | +NaN | +11/11/11 00:00:00 | +
30960 | +SLVEA2010023 | +cs137 | +LVEA01 | +NaN | +94.12 | +1.39 | +NaN | +35.873 | +41179.0 | +NaN | +... | +22.0 | +0.0151 | +5.0 | +A | +0.212 | +0.8 | +14.0 | +14.0 | +NaN | +11/11/11 00:00:00 | +
30961 | +SLVEA2010024 | +cs137 | +LVEA01 | +NaN | +96.63 | +1.35 | +NaN | +38.864 | +41179.0 | +NaN | +... | +24.0 | +0.0151 | +5.0 | +A | +0.217 | +0.8 | +14.0 | +14.0 | +NaN | +11/11/11 00:00:00 | +
24 rows × 35 columns
++ | KEY | +NUCLIDE | +METHOD | +< VALUE_Bq/kg | +VALUE_Bq/kg | +ERROR%_kg | +< VALUE_Bq/m² | +VALUE_Bq/m² | +ERROR%_m² | +DATE_OF_ENTRY_x | +... | +LOWSLI | +AREA | +SEDI | +OXIC | +DW% | +LOI% | +MORS_SUBBASIN | +HELCOM_SUBBASIN | +SUM_LINK | +DATE_OF_ENTRY_y | +
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
9824 | +SERPC1997001 | +cs134 | +NaN | +NaN | +3.80 | +20.0 | +NaN | +5.75 | +NaN | +NaN | +... | +2.0 | +0.008 | +5.0 | +A | +0.0 | +0.0 | +11.0 | +11.0 | +a | +NaN | +
9825 | +SERPC1997001 | +cs137 | +NaN | +NaN | +389.00 | +4.0 | +NaN | +589.00 | +NaN | +NaN | +... | +2.0 | +0.008 | +5.0 | +A | +0.0 | +0.0 | +11.0 | +11.0 | +a | +NaN | +
9826 | +SERPC1997002 | +cs134 | +NaN | +NaN | +4.78 | +13.0 | +NaN | +12.00 | +NaN | +NaN | +... | +4.0 | +0.008 | +5.0 | +A | +0.0 | +0.0 | +11.0 | +11.0 | +a | +NaN | +
9827 | +SERPC1997002 | +cs137 | +NaN | +NaN | +420.00 | +4.0 | +NaN | +1060.00 | +NaN | +NaN | +... | +4.0 | +0.008 | +5.0 | +A | +0.0 | +0.0 | +11.0 | +11.0 | +a | +NaN | +
9828 | +SERPC1997003 | +cs134 | +NaN | +NaN | +3.12 | +17.0 | +NaN | +12.00 | +NaN | +NaN | +... | +6.0 | +0.008 | +5.0 | +A | +0.0 | +0.0 | +11.0 | +11.0 | +a | +NaN | +
... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +
15257 | +SKRIL1999062 | +th228 | +1 | +NaN | +68.00 | +NaN | +NaN | +NaN | +NaN | +NaN | +... | +15.0 | +0.006 | +0.0 | +O | +0.0 | +0.0 | +11.0 | +11.0 | +a | +NaN | +
15258 | +SKRIL1999063 | +k40 | +1 | +NaN | +1210.00 | +NaN | +NaN | +NaN | +NaN | +NaN | +... | +21.5 | +0.006 | +0.0 | +O | +0.0 | +0.0 | +11.0 | +11.0 | +a | +NaN | +
15259 | +SKRIL1999063 | +ra226 | +KRIL01 | +NaN | +56.50 | +NaN | +NaN | +NaN | +NaN | +NaN | +... | +21.5 | +0.006 | +0.0 | +O | +0.0 | +0.0 | +11.0 | +11.0 | +a | +NaN | +
15260 | +SKRIL1999063 | +ra228 | +KRIL01 | +NaN | +72.20 | +NaN | +NaN | +NaN | +NaN | +NaN | +... | +21.5 | +0.006 | +0.0 | +O | +0.0 | +0.0 | +11.0 | +11.0 | +a | +NaN | +
15261 | +SKRIL1999063 | +th228 | +1 | +NaN | +74.20 | +NaN | +NaN | +NaN | +NaN | +NaN | +... | +21.5 | +0.006 | +0.0 | +O | +0.0 | +0.0 | +11.0 | +11.0 | +a | +NaN | +
302 rows × 35 columns
++ | KEY | +NUCLIDE | +METHOD | +< VALUE_Bq/kg | +VALUE_Bq/kg | +BASIS | +ERROR% | +NUMBER | +DATE_OF_ENTRY_x | +COUNTRY | +... | +BIOTATYPE | +TISSUE | +NO | +LENGTH | +WEIGHT | +DW% | +LOI% | +MORS_SUBBASIN | +HELCOM_SUBBASIN | +DATE_OF_ENTRY_y | +
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
5971 | +BERPC1997002 | +k40 | +NaN | +NaN | +116.00 | +W | +3.0 | +NaN | +NaN | +91.0 | +... | +F | +5 | +0.0 | +0.0 | +0.0 | +0.0 | +0.0 | +11.0 | +11 | +NaN | +
5972 | +BERPC1997002 | +cs137 | +NaN | +NaN | +12.60 | +W | +4.0 | +NaN | +NaN | +91.0 | +... | +F | +5 | +0.0 | +0.0 | +0.0 | +0.0 | +0.0 | +11.0 | +11 | +NaN | +
5973 | +BERPC1997002 | +cs134 | +NaN | +NaN | +0.14 | +W | +18.0 | +NaN | +NaN | +91.0 | +... | +F | +5 | +0.0 | +0.0 | +0.0 | +0.0 | +0.0 | +11.0 | +11 | +NaN | +
5974 | +BERPC1997001 | +k40 | +NaN | +NaN | +116.00 | +W | +4.0 | +NaN | +NaN | +91.0 | +... | +F | +5 | +0.0 | +0.0 | +0.0 | +0.0 | +0.0 | +11.0 | +11 | +NaN | +
5975 | +BERPC1997001 | +cs137 | +NaN | +NaN | +12.00 | +W | +4.0 | +NaN | +NaN | +91.0 | +... | +F | +5 | +0.0 | +0.0 | +0.0 | +0.0 | +0.0 | +11.0 | +11 | +NaN | +
5976 | +BERPC1997001 | +cs134 | +NaN | +NaN | +0.21 | +W | +24.0 | +NaN | +NaN | +91.0 | +... | +F | +5 | +0.0 | +0.0 | +0.0 | +0.0 | +0.0 | +11.0 | +11 | +NaN | +
6 rows × 33 columns
+++This notebook contains a data pipeline (handler) that converts the master MARIS database dump into
+NetCDF
format. It enables batch encoding of all legacy datasets into NetCDF.
Key functions of this handler:
+The result is a set of NetCDF files, one for each unique reference ID in the input data.
++++DataLoader (fname:str)
Load specific MARIS dataset through its ref_id.
++ | Type | +Details | +
---|---|---|
fname | +str | +Path to the MARIS global dump file | +
class DataLoader:
+ "Load specific MARIS dataset through its ref_id."
+ LUT = {
+ 'Sediment': 'sediment', 'Seawater': 'seawater',
+ 'Suspended matter': 'suspended-matter', 'Biota': 'biota'}
+
+ def __init__(self,
+ fname: str # Path to the MARIS global dump file
+ ):
+ self.fname = fname
+ self.df = None # Lazy loading
+
+ def _load_data(self):
+ if self.df is None:
+ self.df = pd.read_csv(self.fname, sep='\t', encoding='ISO-8859-1')
+
+ def __call__(self,
+ ref_id: int # Reference ID of interest
+ ) -> dict: # Dictionary of dataframes
+ self._load_data()
+ filtered_df = self.df[self.df.ref_id == ref_id]
+ return {
+ self.LUT[name]: grp
+ for name, grp in filtered_df.groupby('samptype')
+ if name in self.LUT
+ }
+++get_zotero_key (dfs)
Retrieve Zotero key from MARIS dump.
++++get_fname (dfs)
Retrieve filename from MARIS dump.
+ +Let’s get a quick look at the input MARIS dump:
+df = pd.read_csv(fname_in, sep='\t', encoding='ISO-8859-1')
+
+print('# of unique refs: ', len(df.ref_id.unique()))
+print('columns: ', df.columns)
+df.head()
# of unique refs: 526
+columns: Index(['ref_id', 'displaytext', 'samptype', 'nuclide_id', 'latitude',
+ 'longitude', 'begperiod', 'endperiod', 'sampdepth', 'totdepth',
+ 'uncertaint', 'unit_id', 'detection', 'area_id', 'species_id',
+ 'biogroup_id', 'bodypar_id', 'sedtype_id', 'volume', 'salinity',
+ 'temperatur', 'sampmet_id', 'prepmet_id', 'counmet_id', 'activity',
+ 'zoterourl'],
+ dtype='object')
++ | ref_id | +displaytext | +samptype | +nuclide_id | +latitude | +longitude | +begperiod | +endperiod | +sampdepth | +totdepth | +... | +bodypar_id | +sedtype_id | +volume | +salinity | +temperatur | +sampmet_id | +prepmet_id | +counmet_id | +activity | +zoterourl | +
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | +182 | +Urban et al., 2015 | +Biota | +33 | +-35.140833 | +117.604444 | +2014-05-06 00:00:00 | +NaN | +-1.0 | +NaN | +... | +52 | +0 | +NaN | +NaN | +NaN | +0 | +6 | +20 | +0.387 | +https://www.zotero.org/groups/2432820/maris/it... | +
1 | +182 | +Urban et al., 2015 | +Biota | +47 | +-35.140833 | +117.604444 | +2014-05-06 00:00:00 | +NaN | +-1.0 | +NaN | +... | +52 | +0 | +NaN | +NaN | +NaN | +0 | +6 | +5 | +1.44 | +https://www.zotero.org/groups/2432820/maris/it... | +
2 | +182 | +Urban et al., 2015 | +Biota | +31 | +-16.466944 | +123.535833 | +2014-02-27 00:00:00 | +NaN | +-1.0 | +NaN | +... | +52 | +0 | +NaN | +NaN | +NaN | +0 | +6 | +20 | +0.042 | +https://www.zotero.org/groups/2432820/maris/it... | +
3 | +182 | +Urban et al., 2015 | +Biota | +33 | +-16.466944 | +123.535833 | +2014-02-27 00:00:00 | +NaN | +-1.0 | +NaN | +... | +52 | +0 | +NaN | +NaN | +NaN | +0 | +6 | +20 | +0.075 | +https://www.zotero.org/groups/2432820/maris/it... | +
4 | +182 | +Urban et al., 2015 | +Biota | +47 | +-16.466944 | +123.535833 | +2014-02-27 00:00:00 | +NaN | +-1.0 | +NaN | +... | +52 | +0 | +NaN | +NaN | +NaN | +0 | +6 | +5 | +0.069 | +https://www.zotero.org/groups/2432820/maris/it... | +
5 rows × 26 columns
+Let’s checkout if we retrieve the expected keys
(sample types) and associated dataframes:
dataloader = DataLoader(fname_in)
+ref_id = 100 # Some other ref_id examples: OSPAR: 191, HELCOM: 100, 717 (only seawater)
+
+dfs = dataloader(ref_id=ref_id)
+print(f'keys: {dfs.keys()}')
+dfs['sediment'].head()
keys: dict_keys(['biota', 'seawater', 'sediment'])
++ | ref_id | +displaytext | +samptype | +nuclide_id | +latitude | +longitude | +begperiod | +endperiod | +sampdepth | +totdepth | +... | +bodypar_id | +sedtype_id | +volume | +salinity | +temperatur | +sampmet_id | +prepmet_id | +counmet_id | +activity | +zoterourl | +
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
549778 | +100 | +HELCOM MORS, 2018 | +Sediment | +17 | +54.838333 | +9.9 | +1989-06-14 00:00:00 | +NaN | +-1.0 | +24.0 | +... | +0 | +59 | +NaN | +NaN | +NaN | +0 | +0 | +0 | +26.6 | +https://www.zotero.org/groups/2432820/maris/it... | +
549779 | +100 | +HELCOM MORS, 2018 | +Sediment | +24 | +54.838333 | +9.9 | +1989-06-14 00:00:00 | +NaN | +-1.0 | +24.0 | +... | +0 | +59 | +NaN | +NaN | +NaN | +0 | +0 | +0 | +134.0 | +https://www.zotero.org/groups/2432820/maris/it... | +
549780 | +100 | +HELCOM MORS, 2018 | +Sediment | +24 | +54.838333 | +9.9 | +1989-06-14 00:00:00 | +NaN | +-1.0 | +24.0 | +... | +0 | +59 | +NaN | +NaN | +NaN | +0 | +0 | +0 | +18.6 | +https://www.zotero.org/groups/2432820/maris/it... | +
549781 | +100 | +HELCOM MORS, 2018 | +Sediment | +31 | +54.838333 | +9.9 | +1989-06-14 00:00:00 | +NaN | +-1.0 | +24.0 | +... | +0 | +59 | +NaN | +NaN | +NaN | +0 | +0 | +0 | +42.5 | +https://www.zotero.org/groups/2432820/maris/it... | +
549782 | +100 | +HELCOM MORS, 2018 | +Sediment | +31 | +54.838333 | +9.9 | +1989-06-14 00:00:00 | +NaN | +-1.0 | +24.0 | +... | +0 | +59 | +NaN | +NaN | +NaN | +0 | +0 | +0 | +5.9 | +https://www.zotero.org/groups/2432820/maris/it... | +
5 rows × 26 columns
+Remap nuclide_id
to MARIS radionuclide standard names:
+++RemapRdnNameCB (fn_lut=<function <lambda>>)
Remap to MARIS radionuclide names.
+dfs = dataloader(ref_id=ref_id)
+tfm = Transformer(dfs, cbs=[RemapRdnNameCB()])
+
+print(tfm()['sediment']['nuclide_id'].unique())
['ru106' 'sb125' 'cs134' 'cs137' 'k40' 'co60' 'ag110m' 'ra226' 'th232'
+ 'pb212' 'pb214' 'pu238' 'am241' 'pu239_240_tot' 'zr95' 'mn54' 'ac228'
+ 'u235' 'tl208' 'be7' 'bi214' 'ra223' 'ru103' 'sr90' 'eu155' 'ba140'
+ 'co58' 'ra224' 'po210' 'ra228' 'th228' 'ce144' 'cs134_137_tot' 'pb210'
+ 'pu239' 'cd109' 'bi212' 'pu238_240_tot' 'nb95' 'ir192' 'sb124' 'zn65'
+ 'th234' 'pu241']
+Rename MARIS dump columns to MARIS netCDF standard names:
+Index(['ref_id', 'displaytext', 'samptype', 'nuclide_id', 'latitude',
+ 'longitude', 'begperiod', 'endperiod', 'sampdepth', 'totdepth',
+ 'uncertaint', 'unit_id', 'detection', 'area_id', 'species_id',
+ 'biogroup_id', 'bodypar_id', 'sedtype_id', 'volume', 'salinity',
+ 'temperatur', 'sampmet_id', 'prepmet_id', 'counmet_id', 'activity',
+ 'zoterourl'],
+ dtype='object')
++++renaming_rules ()
Rename MARIS dump columns to MARIS netCDF standard names.
+def renaming_rules():
+ "Rename MARIS dump columns to MARIS netCDF standard names."
+ vars = cdl_cfg()['vars']
+ return {
+ 'latitude': vars['defaults']['lat']['name'],
+ 'longitude': vars['defaults']['lon']['name'],
+ 'begperiod': vars['defaults']['time']['name'],
+ 'sampdepth': vars['defaults']['smp_depth']['name'],
+ 'totdepth': vars['defaults']['tot_depth']['name'],
+ 'uncertaint': vars['suffixes']['uncertainty']['name'],
+ 'unit_id': vars['suffixes']['unit']['name'],
+ 'detection': vars['suffixes']['detection_limit']['name'],
+ 'area_id': vars['defaults']['area']['name'],
+ 'species_id': vars['bio']['species']['name'],
+ 'biogroup_id': vars['bio']['bio_group']['name'],
+ 'bodypar_id': vars['bio']['body_part']['name'],
+ 'sedtype_id': vars['sed']['sed_type']['name'],
+ 'volume': vars['suffixes']['volume']['name'],
+ 'salinity': vars['suffixes']['salinity']['name'],
+ 'temperatur': vars['suffixes']['temperature']['name'],
+ 'sampmet_id': vars['suffixes']['sampling_method']['name'],
+ 'prepmet_id': vars['suffixes']['preparation_method']['name'],
+ 'counmet_id': vars['suffixes']['counting_method']['name'],
+ 'activity': 'value',
+ 'nuclide_id': 'nuclide'
+ }
+++RenameColumnCB (renaming_rules=<function renaming_rules>)
Renaming variables to MARIS standard names.
+class RenameColumnCB(Callback):
+ "Renaming variables to MARIS standard names."
+ def __init__(self, renaming_rules=renaming_rules): fc.store_attr()
+ def __call__(self, tfm):
+ lut = renaming_rules()
+ coi = lut.keys()
+ for k in tfm.dfs.keys():
+ tfm.dfs[k] = tfm.dfs[k].loc[:, coi]
+ tfm.dfs[k].rename(columns=lut, inplace=True)
dfs = dataloader(ref_id=ref_id)
+tfm = Transformer(dfs, cbs=[
+ RemapRdnNameCB(),
+ RenameColumnCB()
+ ])
+
+print(tfm()['sediment'])
lat lon time smp_depth tot_depth \
+549778 54.838333 9.9 1989-06-14 00:00:00 -1.0 24.0
+549779 54.838333 9.9 1989-06-14 00:00:00 -1.0 24.0
+549780 54.838333 9.9 1989-06-14 00:00:00 -1.0 24.0
+549781 54.838333 9.9 1989-06-14 00:00:00 -1.0 24.0
+549782 54.838333 9.9 1989-06-14 00:00:00 -1.0 24.0
+... ... ... ... ... ...
+1532415 57.619722 23.621389 2005-12-02 00:00:00 -1.0 55.0
+1532416 57.619722 23.621389 2005-12-02 00:00:00 -1.0 55.0
+1532417 57.619722 23.621389 2005-12-02 00:00:00 -1.0 55.0
+1532418 57.619722 23.621389 2005-12-02 00:00:00 -1.0 55.0
+1532419 57.619722 23.621389 2005-12-02 00:00:00 -1.0 55.0
+
+ _unc _unit _dl area species ... body_part sed_type _vol _sal \
+549778 3.99 4 = 2374 0 ... 0 59 NaN NaN
+549779 NaN 2 = 2374 0 ... 0 59 NaN NaN
+549780 1.674 4 = 2374 0 ... 0 59 NaN NaN
+549781 NaN 2 = 2374 0 ... 0 59 NaN NaN
+549782 1.829 4 = 2374 0 ... 0 59 NaN NaN
+... ... ... .. ... ... ... ... ... ... ...
+1532415 86.2836 4 = 2409 0 ... 0 58 NaN NaN
+1532416 NaN 2 = 2409 0 ... 0 58 NaN NaN
+1532417 24.45552 4 = 2409 0 ... 0 58 NaN NaN
+1532418 NaN 2 = 2409 0 ... 0 58 NaN NaN
+1532419 123.2568 4 = 2409 0 ... 0 58 NaN NaN
+
+ _temp _sampmet _prepmet _counmet value nuclide
+549778 NaN 0 0 0 26.6 ru106
+549779 NaN 0 0 0 134.0 sb125
+549780 NaN 0 0 0 18.6 sb125
+549781 NaN 0 0 0 42.5 cs134
+549782 NaN 0 0 0 5.9 cs134
+... ... ... ... ... ... ...
+1532415 NaN 0 0 0 1106.2 k40
+1532416 NaN 0 0 0 991.023 cs137
+1532417 NaN 0 0 0 550.8 cs137
+1532418 NaN 0 0 0 2461.36 k40
+1532419 NaN 0 0 0 1368.0 k40
+
+[123196 rows x 21 columns]
++++DropNAColumnsCB (na_value=0)
Drop variable containing only NaN or ‘Not available’ (id=0 in MARIS lookup tables).
+class DropNAColumnsCB(Callback):
+ "Drop variable containing only NaN or 'Not available' (id=0 in MARIS lookup tables)."
+ def __init__(self, na_value=0): fc.store_attr()
+ def isMarisNA(self, col):
+ return len(col.unique()) == 1 and col.iloc[0] == self.na_value
+
+ def dropMarisNA(self, df):
+ na_cols = [col for col in df.columns if self.isMarisNA(df[col])]
+ return df.drop(labels=na_cols, axis=1)
+
+ def __call__(self, tfm):
+ for k in tfm.dfs.keys():
+ tfm.dfs[k] = tfm.dfs[k].dropna(axis=1, how='all')
+ tfm.dfs[k] = self.dropMarisNA(tfm.dfs[k])
dfs = dataloader(ref_id=ref_id)
+tfm = Transformer(dfs, cbs=[
+ RemapRdnNameCB(),
+ RenameColumnCB(),
+ DropNAColumnsCB()
+ ])
+
+print(tfm()['sediment'])
lat lon time smp_depth tot_depth \
+549778 54.838333 9.9 1989-06-14 00:00:00 -1.0 24.0
+549779 54.838333 9.9 1989-06-14 00:00:00 -1.0 24.0
+549780 54.838333 9.9 1989-06-14 00:00:00 -1.0 24.0
+549781 54.838333 9.9 1989-06-14 00:00:00 -1.0 24.0
+549782 54.838333 9.9 1989-06-14 00:00:00 -1.0 24.0
+... ... ... ... ... ...
+1532415 57.619722 23.621389 2005-12-02 00:00:00 -1.0 55.0
+1532416 57.619722 23.621389 2005-12-02 00:00:00 -1.0 55.0
+1532417 57.619722 23.621389 2005-12-02 00:00:00 -1.0 55.0
+1532418 57.619722 23.621389 2005-12-02 00:00:00 -1.0 55.0
+1532419 57.619722 23.621389 2005-12-02 00:00:00 -1.0 55.0
+
+ _unc _unit _dl area sed_type _sampmet _prepmet value nuclide
+549778 3.99 4 = 2374 59 0 0 26.6 ru106
+549779 NaN 2 = 2374 59 0 0 134.0 sb125
+549780 1.674 4 = 2374 59 0 0 18.6 sb125
+549781 NaN 2 = 2374 59 0 0 42.5 cs134
+549782 1.829 4 = 2374 59 0 0 5.9 cs134
+... ... ... .. ... ... ... ... ... ...
+1532415 86.2836 4 = 2409 58 0 0 1106.2 k40
+1532416 NaN 2 = 2409 58 0 0 991.023 cs137
+1532417 24.45552 4 = 2409 58 0 0 550.8 cs137
+1532418 NaN 2 = 2409 58 0 0 2461.36 k40
+1532419 123.2568 4 = 2409 58 0 0 1368.0 k40
+
+[123196 rows x 14 columns]
+{'Not applicable': -1, 'Not Available': 0, '=': 1, '<': 2, 'ND': 3, 'DE': 4}
++++SanitizeDetectionLimitCB (fn_lut=<function <lambda>>)
Assign Detection Limit name to its id based on MARIS nomenclature.
+class SanitizeDetectionLimitCB(Callback):
+ "Assign Detection Limit name to its id based on MARIS nomenclature."
+ def __init__(self,
+ fn_lut=dl_name_to_id):
+ fc.store_attr()
+ self.var_name = cdl_cfg()['vars']['suffixes']['detection_limit']['name']
+
+ def __call__(self, tfm):
+ lut = self.fn_lut()
+ for k in tfm.dfs.keys():
+ tfm.dfs[k][self.var_name] = tfm.dfs[k][self.var_name].replace(lut)
dfs = dataloader(ref_id=ref_id)
+tfm = Transformer(dfs, cbs=[
+ RemapRdnNameCB(),
+ RenameColumnCB(),
+ DropNAColumnsCB(),
+ SanitizeDetectionLimitCB()
+ ])
+
+print(tfm()['sediment']['_dl'])
549778 1
+549779 1
+549780 1
+549781 1
+549782 1
+ ..
+1532415 1
+1532416 1
+1532417 1
+1532418 1
+1532419 1
+Name: _dl, Length: 123196, dtype: int64
+We remind that in netCDF
format time need to be encoded as integer
representing the number of seconds since a time of reference. In our case we chose 1970-01-01 00:00:00.0
as defined in configs.ipynb
.
+++ParseTimeCB ()
Parse time column from MARIS dump.
+dfs = dataloader(ref_id=ref_id)
+tfm = Transformer(dfs, cbs=[
+ RemapRdnNameCB(),
+ RenameColumnCB(),
+ DropNAColumnsCB(),
+ SanitizeDetectionLimitCB(),
+ ParseTimeCB(),
+ EncodeTimeCB(cfg())
+ ])
+
+print(tfm()['sediment'])
lat lon time smp_depth tot_depth _unc _unit \
+549778 54.838333 9.9 613785600 -1.0 24.0 3.99 4
+549779 54.838333 9.9 613785600 -1.0 24.0 NaN 2
+549780 54.838333 9.9 613785600 -1.0 24.0 1.674 4
+549781 54.838333 9.9 613785600 -1.0 24.0 NaN 2
+549782 54.838333 9.9 613785600 -1.0 24.0 1.829 4
+... ... ... ... ... ... ... ...
+1532415 57.619722 23.621389 1133481600 -1.0 55.0 86.2836 4
+1532416 57.619722 23.621389 1133481600 -1.0 55.0 NaN 2
+1532417 57.619722 23.621389 1133481600 -1.0 55.0 24.45552 4
+1532418 57.619722 23.621389 1133481600 -1.0 55.0 NaN 2
+1532419 57.619722 23.621389 1133481600 -1.0 55.0 123.2568 4
+
+ _dl area sed_type _sampmet _prepmet value nuclide
+549778 1 2374 59 0 0 26.6 ru106
+549779 1 2374 59 0 0 134.0 sb125
+549780 1 2374 59 0 0 18.6 sb125
+549781 1 2374 59 0 0 42.5 cs134
+549782 1 2374 59 0 0 5.9 cs134
+... ... ... ... ... ... ... ...
+1532415 1 2409 58 0 0 1106.2 k40
+1532416 1 2409 58 0 0 991.023 cs137
+1532417 1 2409 58 0 0 550.8 cs137
+1532418 1 2409 58 0 0 2461.36 k40
+1532419 1 2409 58 0 0 1368.0 k40
+
+[123196 rows x 14 columns]
+dfs = dataloader(ref_id=ref_id)
+tfm = Transformer(dfs, cbs=[
+ RemapRdnNameCB(),
+ RenameColumnCB(),
+ DropNAColumnsCB(),
+ SanitizeDetectionLimitCB(),
+ ParseTimeCB(),
+ EncodeTimeCB(cfg()),
+ ReshapeLongToWide()
+ ])
+
+print(tfm()['sediment'])
lon time area lat smp_depth tot_depth \
+org_index
+549834 9.633333 544838400 2374 54.850000 -1.0 16.0
+549835 9.633333 544838400 2374 54.850000 -1.0 16.0
+549836 9.633333 544838400 2374 54.850000 -1.0 16.0
+549837 9.633333 544838400 2374 54.850000 -1.0 16.0
+549838 9.633333 544838400 2374 54.850000 -1.0 16.0
+... ... ... ... ... ... ...
+1518808 29.833333 1128211200 2407 59.983333 -1.0 0.0
+1518809 29.833333 1128211200 2407 59.983333 -1.0 0.0
+1518810 29.833333 1128211200 2407 59.983333 -1.0 0.0
+1528756 29.833333 1128211200 2407 59.983333 -1.0 0.0
+1528757 29.833333 1128211200 2407 59.983333 -1.0 0.0
+
+ sed_type ac228_dl ag110m_dl am241_dl ... sb124 sb125 sr90 \
+org_index ...
+549834 58 NaN NaN NaN ... NaN NaN NaN
+549835 58 NaN NaN NaN ... NaN NaN NaN
+549836 58 NaN NaN NaN ... NaN NaN NaN
+549837 58 NaN NaN NaN ... NaN NaN NaN
+549838 58 NaN NaN NaN ... NaN NaN NaN
+... ... ... ... ... ... ... ... ...
+1518808 2 NaN NaN NaN ... NaN NaN NaN
+1518809 2 NaN NaN NaN ... NaN NaN NaN
+1518810 2 NaN NaN NaN ... NaN NaN NaN
+1528756 2 NaN NaN NaN ... NaN NaN NaN
+1528757 2 NaN NaN NaN ... NaN NaN NaN
+
+ th228 th232 th234 tl208 u235 zn65 zr95
+org_index
+549834 NaN NaN NaN NaN NaN NaN NaN
+549835 NaN NaN NaN NaN NaN NaN NaN
+549836 NaN NaN NaN NaN NaN NaN NaN
+549837 NaN NaN NaN NaN NaN NaN NaN
+549838 NaN NaN NaN NaN NaN NaN NaN
+... ... ... ... ... ... ... ...
+1518808 NaN NaN NaN NaN NaN NaN NaN
+1518809 NaN NaN NaN NaN NaN NaN NaN
+1518810 NaN NaN NaN NaN NaN NaN NaN
+1528756 NaN NaN NaN NaN NaN NaN NaN
+1528757 NaN NaN NaN NaN NaN NaN NaN
+
+[123196 rows x 270 columns]
+dfs = dataloader(ref_id=ref_id)
+tfm = Transformer(dfs, cbs=[
+ RemapRdnNameCB(),
+ RenameColumnCB(),
+ DropNAColumnsCB(),
+ SanitizeDetectionLimitCB(),
+ ParseTimeCB(),
+ EncodeTimeCB(cfg()),
+ ReshapeLongToWide(),
+ SanitizeLonLatCB()
+ ])
+
+tfm()['sediment']
+ | lon | +time | +area | +lat | +smp_depth | +tot_depth | +sed_type | +ac228_dl | +ag110m_dl | +am241_dl | +... | +sb124 | +sb125 | +sr90 | +th228 | +th232 | +th234 | +tl208 | +u235 | +zn65 | +zr95 | +
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
org_index | ++ | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + |
549834 | +9.633333 | +544838400 | +2374 | +54.850000 | +-1.0 | +16.0 | +58 | +NaN | +NaN | +NaN | +... | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +
549835 | +9.633333 | +544838400 | +2374 | +54.850000 | +-1.0 | +16.0 | +58 | +NaN | +NaN | +NaN | +... | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +
549836 | +9.633333 | +544838400 | +2374 | +54.850000 | +-1.0 | +16.0 | +58 | +NaN | +NaN | +NaN | +... | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +
549837 | +9.633333 | +544838400 | +2374 | +54.850000 | +-1.0 | +16.0 | +58 | +NaN | +NaN | +NaN | +... | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +
549838 | +9.633333 | +544838400 | +2374 | +54.850000 | +-1.0 | +16.0 | +58 | +NaN | +NaN | +NaN | +... | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +
... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +
1518808 | +29.833333 | +1128211200 | +2407 | +59.983333 | +-1.0 | +0.0 | +2 | +NaN | +NaN | +NaN | +... | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +
1518809 | +29.833333 | +1128211200 | +2407 | +59.983333 | +-1.0 | +0.0 | +2 | +NaN | +NaN | +NaN | +... | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +
1518810 | +29.833333 | +1128211200 | +2407 | +59.983333 | +-1.0 | +0.0 | +2 | +NaN | +NaN | +NaN | +... | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +
1528756 | +29.833333 | +1128211200 | +2407 | +59.983333 | +-1.0 | +0.0 | +2 | +NaN | +NaN | +NaN | +... | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +
1528757 | +29.833333 | +1128211200 | +2407 | +59.983333 | +-1.0 | +0.0 | +2 | +NaN | +NaN | +NaN | +... | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +NaN | +
123196 rows × 270 columns
+dfs = dataloader(ref_id=ref_id)
+tfm = Transformer(dfs, cbs=[
+ RemapRdnNameCB(),
+ RenameColumnCB(),
+ DropNAColumnsCB(),
+ SanitizeDetectionLimitCB(),
+ ParseTimeCB(),
+ EncodeTimeCB(cfg()),
+ ReshapeLongToWide(),
+ SanitizeLonLatCB()
+ ])
+
+dfs_tfm = tfm()
+tfm.logs
['Remap to MARIS radionuclide names.',
+ 'Renaming variables to MARIS standard names.',
+ "Drop variable containing only NaN or 'Not available' (id=0 in MARIS lookup tables).",
+ 'Assign Detection Limit name to its id based on MARIS nomenclature.',
+ 'Encode time as `int` representing seconds since xxx',
+ 'Drop row when both longitude & latitude equal 0. Drop unrealistic longitude & latitude values. Convert longitude & latitude `,` separator to `.` separator.']
++++get_attrs (tfm, zotero_key, kw=['oceanography', 'Earth Science > Oceans > + Ocean Chemistry> Radionuclides', 'Earth Science > Human + Dimensions > Environmental Impacts > Nuclear Radiation + Exposure', 'Earth Science > Oceans > Ocean Chemistry > Ocean + Tracers, Earth Science > Oceans > Marine Sediments', 'Earth + Science > Oceans > Ocean Chemistry, Earth Science > Oceans > + Sea Ice > Isotopes', 'Earth Science > Oceans > Water Quality > + Ocean Contaminants', 'Earth Science > Biological + Classification > Animals/Vertebrates > Fish', 'Earth Science > + Biosphere > Ecosystems > Marine Ecosystems', 'Earth Science > + Biological Classification > Animals/Invertebrates > Mollusks', + 'Earth Science > Biological Classification > + Animals/Invertebrates > Arthropods > Crustaceans', 'Earth + Science > Biological Classification > Plants > Macroalgae + (Seaweeds)'])
Retrieve global attributes from MARIS dump.
+kw = ['oceanography', 'Earth Science > Oceans > Ocean Chemistry> Radionuclides',
+ 'Earth Science > Human Dimensions > Environmental Impacts > Nuclear Radiation Exposure',
+ 'Earth Science > Oceans > Ocean Chemistry > Ocean Tracers, Earth Science > Oceans > Marine Sediments',
+ 'Earth Science > Oceans > Ocean Chemistry, Earth Science > Oceans > Sea Ice > Isotopes',
+ 'Earth Science > Oceans > Water Quality > Ocean Contaminants',
+ 'Earth Science > Biological Classification > Animals/Vertebrates > Fish',
+ 'Earth Science > Biosphere > Ecosystems > Marine Ecosystems',
+ 'Earth Science > Biological Classification > Animals/Invertebrates > Mollusks',
+ 'Earth Science > Biological Classification > Animals/Invertebrates > Arthropods > Crustaceans',
+ 'Earth Science > Biological Classification > Plants > Macroalgae (Seaweeds)']
def get_attrs(tfm, zotero_key, kw=kw):
+ "Retrieve global attributes from MARIS dump."
+ return GlobAttrsFeeder(tfm.dfs, cbs=[
+ BboxCB(),
+ DepthRangeCB(),
+ TimeRangeCB(cfg()),
+ ZoteroCB(zotero_key, cfg=cfg()),
+ KeyValuePairCB('keywords', ', '.join(kw)),
+ KeyValuePairCB('publisher_postprocess_logs', ', '.join(tfm.logs))
+ ])()
{'geospatial_lat_min': '30.435833333333335',
+ 'geospatial_lat_max': '65.75',
+ 'geospatial_lon_min': '9.633333333333333',
+ 'geospatial_lon_max': '53.5',
+ 'geospatial_bounds': 'POLYGON ((9.633333333333333 53.5, 30.435833333333335 53.5, 30.435833333333335 65.75, 9.633333333333333 65.75, 9.633333333333333 53.5))',
+ 'time_coverage_start': '1984-01-10T00:00:00',
+ 'time_coverage_end': '2018-12-14T00:00:00',
+ 'title': 'Radioactivity Monitoring of the Irish Marine Environment 1991 and 1992',
+ 'summary': '',
+ 'creator_name': '[{"creatorType": "author", "firstName": "A.", "lastName": "McGarry"}, {"creatorType": "author", "firstName": "S.", "lastName": "Lyons"}, {"creatorType": "author", "firstName": "C.", "lastName": "McEnri"}, {"creatorType": "author", "firstName": "T.", "lastName": "Ryan"}, {"creatorType": "author", "firstName": "M.", "lastName": "O\'Colmain"}, {"creatorType": "author", "firstName": "J.D.", "lastName": "Cunningham"}]',
+ 'keywords': 'oceanography, Earth Science > Oceans > Ocean Chemistry> Radionuclides, Earth Science > Human Dimensions > Environmental Impacts > Nuclear Radiation Exposure, Earth Science > Oceans > Ocean Chemistry > Ocean Tracers, Earth Science > Oceans > Marine Sediments, Earth Science > Oceans > Ocean Chemistry, Earth Science > Oceans > Sea Ice > Isotopes, Earth Science > Oceans > Water Quality > Ocean Contaminants, Earth Science > Biological Classification > Animals/Vertebrates > Fish, Earth Science > Biosphere > Ecosystems > Marine Ecosystems, Earth Science > Biological Classification > Animals/Invertebrates > Mollusks, Earth Science > Biological Classification > Animals/Invertebrates > Arthropods > Crustaceans, Earth Science > Biological Classification > Plants > Macroalgae (Seaweeds)',
+ 'publisher_postprocess_logs': "Remap to MARIS radionuclide names., Renaming variables to MARIS standard names., Drop variable containing only NaN or 'Not available' (id=0 in MARIS lookup tables)., Assign Detection Limit name to its id based on MARIS nomenclature., Encode time as `int` representing seconds since xxx, Drop row when both longitude & latitude equal 0. Drop unrealistic longitude & latitude values. Convert longitude & latitude `,` separator to `.` separator."}
++++enums_xtra (tfm, vars)
Retrieve a subset of the lengthy enum as species_t
for instance.
def enums_xtra(tfm, vars):
+ "Retrieve a subset of the lengthy enum as `species_t` for instance."
+ enums = Enums(lut_src_dir=lut_path(), cdl_enums=cdl_cfg()['enums'])
+ xtras = {}
+ for var in vars:
+ unique_vals = tfm.unique(var)
+ if unique_vals.any():
+ xtras[f'{var}_t'] = enums.filter(f'{var}_t', unique_vals)
+ return xtras
+++encode (fname_in, fname_out, nc_tpl_path, **kwargs)
def encode(fname_in, fname_out, nc_tpl_path, **kwargs):
+ dataloader = DataLoader(fname_in)
+ ref_ids = kwargs.get('ref_ids', df.ref_id.unique())
+ print('Encoding ...')
+ for ref_id in tqdm(ref_ids, leave=False):
+ dfs = dataloader(ref_id=ref_id)
+ print(get_fname(dfs))
+ tfm = Transformer(dfs, cbs=[
+ RemapRdnNameCB(),
+ RenameColumnCB(),
+ DropNAColumnsCB(),
+ SanitizeDetectionLimitCB(),
+ ParseTimeCB(),
+ EncodeTimeCB(cfg()),
+ ReshapeLongToWide(),
+ SanitizeLonLatCB(verbose=True)
+ ])
+
+ tfm()
+ encoder = NetCDFEncoder(tfm.dfs,
+ src_fname=nc_tpl_path,
+ dest_fname=Path(fname_out) / get_fname(dfs),
+ global_attrs=get_attrs(tfm, zotero_key=get_zotero_key(dfs), kw=kw),
+ verbose=kwargs.get('verbose', False),
+ enums_xtra=enums_xtra(tfm, vars=['species', 'body_part'])
+ )
+ encoder.encode()
Encoding ...
+ 0%| | 0/1 [00:00<?, ?it/s]
+100-HELCOM-MORS-2018.nc
+--------------------------------------------------------------------------------
+Group: biota, Variable: lon
+--------------------------------------------------------------------------------
+Group: biota, Variable: lat
+--------------------------------------------------------------------------------
+Group: biota, Variable: smp_depth
+--------------------------------------------------------------------------------
+Group: biota, Variable: time
+--------------------------------------------------------------------------------
+Group: biota, Variable: area
+--------------------------------------------------------------------------------
+Group: biota, Variable: bio_group
+--------------------------------------------------------------------------------
+Group: biota, Variable: species
+--------------------------------------------------------------------------------
+Group: biota, Variable: body_part
+--------------------------------------------------------------------------------
+Group: biota, Variable: be7
+--------------------------------------------------------------------------------
+Group: biota, Variable: be7_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: be7_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: be7_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: be7_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: be7_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: k40
+--------------------------------------------------------------------------------
+Group: biota, Variable: k40_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: k40_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: k40_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: k40_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: k40_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: mn54
+--------------------------------------------------------------------------------
+Group: biota, Variable: mn54_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: mn54_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: mn54_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: mn54_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: mn54_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: co57
+--------------------------------------------------------------------------------
+Group: biota, Variable: co57_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: co57_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: co57_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: co57_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: co57_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: co58
+--------------------------------------------------------------------------------
+Group: biota, Variable: co58_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: co58_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: co58_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: co58_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: co58_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: co60
+--------------------------------------------------------------------------------
+Group: biota, Variable: co60_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: co60_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: co60_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: co60_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: co60_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: zn65
+--------------------------------------------------------------------------------
+Group: biota, Variable: zn65_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: zn65_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: zn65_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: zn65_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: zn65_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: sr89
+--------------------------------------------------------------------------------
+Group: biota, Variable: sr89_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: sr89_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: sr89_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: sr89_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: sr89_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: sr90
+--------------------------------------------------------------------------------
+Group: biota, Variable: sr90_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: sr90_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: sr90_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: sr90_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: sr90_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: zr95
+--------------------------------------------------------------------------------
+Group: biota, Variable: zr95_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: zr95_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: zr95_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: zr95_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: zr95_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: nb95
+--------------------------------------------------------------------------------
+Group: biota, Variable: nb95_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: nb95_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: nb95_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: nb95_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: nb95_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: tc99
+--------------------------------------------------------------------------------
+Group: biota, Variable: tc99_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: tc99_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: tc99_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: tc99_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: tc99_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: ru103
+--------------------------------------------------------------------------------
+Group: biota, Variable: ru103_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: ru103_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: ru103_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: ru103_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: ru103_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: ru106
+--------------------------------------------------------------------------------
+Group: biota, Variable: ru106_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: ru106_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: ru106_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: ru106_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: ru106_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: ag108m
+--------------------------------------------------------------------------------
+Group: biota, Variable: ag108m_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: ag108m_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: ag108m_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: ag108m_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: ag108m_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: ag110m
+--------------------------------------------------------------------------------
+Group: biota, Variable: ag110m_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: ag110m_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: ag110m_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: ag110m_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: ag110m_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: sb124
+--------------------------------------------------------------------------------
+Group: biota, Variable: sb124_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: sb124_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: sb124_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: sb124_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: sb124_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: sb125
+--------------------------------------------------------------------------------
+Group: biota, Variable: sb125_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: sb125_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: sb125_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: sb125_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: sb125_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: te129m
+--------------------------------------------------------------------------------
+Group: biota, Variable: te129m_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: te129m_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: te129m_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: te129m_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: i131
+--------------------------------------------------------------------------------
+Group: biota, Variable: i131_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: i131_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: i131_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: i131_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: cs134
+--------------------------------------------------------------------------------
+Group: biota, Variable: cs134_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: cs134_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: cs134_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: cs134_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: cs134_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: cs137
+--------------------------------------------------------------------------------
+Group: biota, Variable: cs137_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: cs137_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: cs137_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: cs137_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: cs137_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: ba140
+--------------------------------------------------------------------------------
+Group: biota, Variable: ba140_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: ba140_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: ba140_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: ba140_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: la140
+--------------------------------------------------------------------------------
+Group: biota, Variable: la140_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: la140_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: la140_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: la140_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: ce141
+--------------------------------------------------------------------------------
+Group: biota, Variable: ce141_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: ce141_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: ce141_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: ce141_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: ce141_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: ce144
+--------------------------------------------------------------------------------
+Group: biota, Variable: ce144_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: ce144_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: ce144_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: ce144_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: ce144_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: eu155
+--------------------------------------------------------------------------------
+Group: biota, Variable: eu155_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: eu155_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: eu155_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: eu155_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: eu155_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: pb210
+--------------------------------------------------------------------------------
+Group: biota, Variable: pb210_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: pb210_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: pb210_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: pb210_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: pb210_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: pb212
+--------------------------------------------------------------------------------
+Group: biota, Variable: pb212_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: pb212_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: pb212_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: pb212_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: pb212_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: pb214
+--------------------------------------------------------------------------------
+Group: biota, Variable: pb214_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: pb214_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: pb214_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: pb214_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: pb214_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: bi214
+--------------------------------------------------------------------------------
+Group: biota, Variable: bi214_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: bi214_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: bi214_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: bi214_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: bi214_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: po210
+--------------------------------------------------------------------------------
+Group: biota, Variable: po210_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: po210_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: po210_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: po210_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: po210_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: ra223
+--------------------------------------------------------------------------------
+Group: biota, Variable: ra223_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: ra223_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: ra223_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: ra223_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: ra223_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: ra224
+--------------------------------------------------------------------------------
+Group: biota, Variable: ra224_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: ra224_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: ra224_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: ra224_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: ra224_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: ra226
+--------------------------------------------------------------------------------
+Group: biota, Variable: ra226_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: ra226_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: ra226_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: ra226_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: ra226_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: ra228
+--------------------------------------------------------------------------------
+Group: biota, Variable: ra228_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: ra228_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: ra228_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: ra228_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: ra228_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: ac228
+--------------------------------------------------------------------------------
+Group: biota, Variable: ac228_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: ac228_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: ac228_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: ac228_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: ac228_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: th228
+--------------------------------------------------------------------------------
+Group: biota, Variable: th228_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: th228_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: th228_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: th228_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: th228_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: th232
+--------------------------------------------------------------------------------
+Group: biota, Variable: th232_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: th232_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: th232_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: th232_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: th232_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: u235
+--------------------------------------------------------------------------------
+Group: biota, Variable: u235_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: u235_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: u235_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: u235_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: u235_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: pu238
+--------------------------------------------------------------------------------
+Group: biota, Variable: pu238_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: pu238_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: pu238_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: pu238_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: pu238_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: am241
+--------------------------------------------------------------------------------
+Group: biota, Variable: am241_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: am241_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: am241_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: am241_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: am241_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: cs134_137_tot
+--------------------------------------------------------------------------------
+Group: biota, Variable: cs134_137_tot_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: cs134_137_tot_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: cs134_137_tot_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: cs134_137_tot_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: cs134_137_tot_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: pu239_240_tot
+--------------------------------------------------------------------------------
+Group: biota, Variable: pu239_240_tot_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: pu239_240_tot_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: pu239_240_tot_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: pu239_240_tot_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: pu239_240_tot_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: eu152
+--------------------------------------------------------------------------------
+Group: biota, Variable: eu152_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: eu152_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: eu152_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: eu152_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: eu152_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: fe59
+--------------------------------------------------------------------------------
+Group: biota, Variable: fe59_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: fe59_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: fe59_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: fe59_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: fe59_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: gd153
+--------------------------------------------------------------------------------
+Group: biota, Variable: gd153_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: gd153_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: gd153_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: gd153_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: gd153_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: rb86
+--------------------------------------------------------------------------------
+Group: biota, Variable: rb86_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: rb86_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: rb86_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: rb86_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: rb86_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: sc46
+--------------------------------------------------------------------------------
+Group: biota, Variable: sc46_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: sc46_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: sc46_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: sc46_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: sc46_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: sn113
+--------------------------------------------------------------------------------
+Group: biota, Variable: sn113_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: sn113_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: sn113_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: sn113_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: sn113_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: sn117m
+--------------------------------------------------------------------------------
+Group: biota, Variable: sn117m_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: sn117m_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: sn117m_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: sn117m_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: sn117m_unit
+--------------------------------------------------------------------------------
+Group: biota, Variable: tl208
+--------------------------------------------------------------------------------
+Group: biota, Variable: tl208_unc
+--------------------------------------------------------------------------------
+Group: biota, Variable: tl208_dl
+--------------------------------------------------------------------------------
+Group: biota, Variable: tl208_counmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: tl208_prepmet
+--------------------------------------------------------------------------------
+Group: biota, Variable: tl208_unit
+--------------------------------------------------------------------------------
+Group: seawater, Variable: lon
+--------------------------------------------------------------------------------
+Group: seawater, Variable: lat
+--------------------------------------------------------------------------------
+Group: seawater, Variable: smp_depth
+--------------------------------------------------------------------------------
+Group: seawater, Variable: tot_depth
+--------------------------------------------------------------------------------
+Group: seawater, Variable: time
+--------------------------------------------------------------------------------
+Group: seawater, Variable: area
+--------------------------------------------------------------------------------
+Group: seawater, Variable: h3
+--------------------------------------------------------------------------------
+Group: seawater, Variable: h3_unc
+--------------------------------------------------------------------------------
+Group: seawater, Variable: h3_dl
+--------------------------------------------------------------------------------
+Group: seawater, Variable: h3_sal
+--------------------------------------------------------------------------------
+Group: seawater, Variable: h3_temp
+--------------------------------------------------------------------------------
+Group: seawater, Variable: h3_counmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: h3_sampmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: h3_prepmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: h3_unit
+--------------------------------------------------------------------------------
+Group: seawater, Variable: k40
+--------------------------------------------------------------------------------
+Group: seawater, Variable: k40_unc
+--------------------------------------------------------------------------------
+Group: seawater, Variable: k40_dl
+--------------------------------------------------------------------------------
+Group: seawater, Variable: k40_sal
+--------------------------------------------------------------------------------
+Group: seawater, Variable: k40_temp
+--------------------------------------------------------------------------------
+Group: seawater, Variable: k40_counmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: k40_sampmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: k40_prepmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: k40_unit
+--------------------------------------------------------------------------------
+Group: seawater, Variable: mn54
+--------------------------------------------------------------------------------
+Group: seawater, Variable: mn54_dl
+--------------------------------------------------------------------------------
+Group: seawater, Variable: mn54_sal
+--------------------------------------------------------------------------------
+Group: seawater, Variable: mn54_temp
+--------------------------------------------------------------------------------
+Group: seawater, Variable: mn54_counmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: mn54_sampmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: mn54_prepmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: mn54_unit
+--------------------------------------------------------------------------------
+Group: seawater, Variable: co60
+--------------------------------------------------------------------------------
+Group: seawater, Variable: co60_unc
+--------------------------------------------------------------------------------
+Group: seawater, Variable: co60_dl
+--------------------------------------------------------------------------------
+Group: seawater, Variable: co60_sal
+--------------------------------------------------------------------------------
+Group: seawater, Variable: co60_temp
+--------------------------------------------------------------------------------
+Group: seawater, Variable: co60_counmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: co60_sampmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: co60_prepmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: co60_unit
+--------------------------------------------------------------------------------
+Group: seawater, Variable: sr89
+--------------------------------------------------------------------------------
+Group: seawater, Variable: sr89_dl
+--------------------------------------------------------------------------------
+Group: seawater, Variable: sr89_sal
+--------------------------------------------------------------------------------
+Group: seawater, Variable: sr89_temp
+--------------------------------------------------------------------------------
+Group: seawater, Variable: sr89_counmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: sr89_sampmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: sr89_prepmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: sr89_unit
+--------------------------------------------------------------------------------
+Group: seawater, Variable: sr90
+--------------------------------------------------------------------------------
+Group: seawater, Variable: sr90_unc
+--------------------------------------------------------------------------------
+Group: seawater, Variable: sr90_dl
+--------------------------------------------------------------------------------
+Group: seawater, Variable: sr90_sal
+--------------------------------------------------------------------------------
+Group: seawater, Variable: sr90_temp
+--------------------------------------------------------------------------------
+Group: seawater, Variable: sr90_counmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: sr90_sampmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: sr90_prepmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: sr90_unit
+--------------------------------------------------------------------------------
+Group: seawater, Variable: zr95
+--------------------------------------------------------------------------------
+Group: seawater, Variable: zr95_unc
+--------------------------------------------------------------------------------
+Group: seawater, Variable: zr95_dl
+--------------------------------------------------------------------------------
+Group: seawater, Variable: zr95_sal
+--------------------------------------------------------------------------------
+Group: seawater, Variable: zr95_counmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: zr95_sampmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: zr95_prepmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: zr95_unit
+--------------------------------------------------------------------------------
+Group: seawater, Variable: nb95
+--------------------------------------------------------------------------------
+Group: seawater, Variable: nb95_unc
+--------------------------------------------------------------------------------
+Group: seawater, Variable: nb95_dl
+--------------------------------------------------------------------------------
+Group: seawater, Variable: nb95_sal
+--------------------------------------------------------------------------------
+Group: seawater, Variable: nb95_temp
+--------------------------------------------------------------------------------
+Group: seawater, Variable: nb95_counmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: nb95_sampmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: nb95_prepmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: nb95_unit
+--------------------------------------------------------------------------------
+Group: seawater, Variable: tc99
+--------------------------------------------------------------------------------
+Group: seawater, Variable: tc99_unc
+--------------------------------------------------------------------------------
+Group: seawater, Variable: tc99_dl
+--------------------------------------------------------------------------------
+Group: seawater, Variable: tc99_sal
+--------------------------------------------------------------------------------
+Group: seawater, Variable: tc99_temp
+--------------------------------------------------------------------------------
+Group: seawater, Variable: tc99_counmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: tc99_sampmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: tc99_prepmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: tc99_unit
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ru103
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ru103_unc
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ru103_dl
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ru103_sal
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ru103_temp
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ru103_counmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ru103_sampmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ru103_prepmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ru103_unit
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ru106
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ru106_unc
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ru106_dl
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ru106_sal
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ru106_temp
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ru106_counmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ru106_sampmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ru106_prepmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ru106_unit
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ag110m
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ag110m_unc
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ag110m_dl
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ag110m_sal
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ag110m_counmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ag110m_sampmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ag110m_prepmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ag110m_unit
+--------------------------------------------------------------------------------
+Group: seawater, Variable: sb125
+--------------------------------------------------------------------------------
+Group: seawater, Variable: sb125_unc
+--------------------------------------------------------------------------------
+Group: seawater, Variable: sb125_dl
+--------------------------------------------------------------------------------
+Group: seawater, Variable: sb125_sal
+--------------------------------------------------------------------------------
+Group: seawater, Variable: sb125_temp
+--------------------------------------------------------------------------------
+Group: seawater, Variable: sb125_counmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: sb125_sampmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: sb125_prepmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: sb125_unit
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cs134
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cs134_unc
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cs134_dl
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cs134_sal
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cs134_temp
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cs134_counmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cs134_sampmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cs134_prepmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cs134_unit
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cs137
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cs137_unc
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cs137_dl
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cs137_sal
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cs137_temp
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cs137_counmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cs137_sampmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cs137_prepmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cs137_unit
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ba140
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ba140_unc
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ba140_dl
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ba140_sal
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ba140_counmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ba140_sampmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ba140_prepmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ba140_unit
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ce144
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ce144_unc
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ce144_dl
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ce144_sal
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ce144_counmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ce144_sampmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ce144_prepmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: ce144_unit
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pb210
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pb210_unc
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pb210_dl
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pb210_sal
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pb210_temp
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pb210_counmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pb210_sampmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pb210_prepmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pb210_unit
+--------------------------------------------------------------------------------
+Group: seawater, Variable: po210
+--------------------------------------------------------------------------------
+Group: seawater, Variable: po210_unc
+--------------------------------------------------------------------------------
+Group: seawater, Variable: po210_dl
+--------------------------------------------------------------------------------
+Group: seawater, Variable: po210_sal
+--------------------------------------------------------------------------------
+Group: seawater, Variable: po210_temp
+--------------------------------------------------------------------------------
+Group: seawater, Variable: po210_counmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: po210_sampmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: po210_prepmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: po210_unit
+--------------------------------------------------------------------------------
+Group: seawater, Variable: u234
+--------------------------------------------------------------------------------
+Group: seawater, Variable: u234_dl
+--------------------------------------------------------------------------------
+Group: seawater, Variable: u234_sal
+--------------------------------------------------------------------------------
+Group: seawater, Variable: u234_temp
+--------------------------------------------------------------------------------
+Group: seawater, Variable: u234_counmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: u234_sampmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: u234_prepmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: u234_unit
+--------------------------------------------------------------------------------
+Group: seawater, Variable: u238
+--------------------------------------------------------------------------------
+Group: seawater, Variable: u238_dl
+--------------------------------------------------------------------------------
+Group: seawater, Variable: u238_sal
+--------------------------------------------------------------------------------
+Group: seawater, Variable: u238_temp
+--------------------------------------------------------------------------------
+Group: seawater, Variable: u238_counmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: u238_sampmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: u238_prepmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: u238_unit
+--------------------------------------------------------------------------------
+Group: seawater, Variable: np237
+--------------------------------------------------------------------------------
+Group: seawater, Variable: np237_unc
+--------------------------------------------------------------------------------
+Group: seawater, Variable: np237_dl
+--------------------------------------------------------------------------------
+Group: seawater, Variable: np237_sal
+--------------------------------------------------------------------------------
+Group: seawater, Variable: np237_counmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: np237_sampmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: np237_prepmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: np237_unit
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pu238
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pu238_unc
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pu238_dl
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pu238_sal
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pu238_temp
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pu238_counmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pu238_sampmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pu238_prepmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pu238_unit
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pu239
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pu239_unc
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pu239_dl
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pu239_sal
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pu239_temp
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pu239_counmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pu239_sampmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pu239_prepmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pu239_unit
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pu240
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pu240_unc
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pu240_dl
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pu240_sal
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pu240_counmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pu240_sampmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pu240_prepmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pu240_unit
+--------------------------------------------------------------------------------
+Group: seawater, Variable: am241
+--------------------------------------------------------------------------------
+Group: seawater, Variable: am241_unc
+--------------------------------------------------------------------------------
+Group: seawater, Variable: am241_dl
+--------------------------------------------------------------------------------
+Group: seawater, Variable: am241_sal
+--------------------------------------------------------------------------------
+Group: seawater, Variable: am241_temp
+--------------------------------------------------------------------------------
+Group: seawater, Variable: am241_counmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: am241_sampmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: am241_prepmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: am241_unit
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cm242
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cm242_unc
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cm242_dl
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cm242_sal
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cm242_temp
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cm242_counmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cm242_sampmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cm242_prepmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cm242_unit
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cm244
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cm244_unc
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cm244_dl
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cm244_sal
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cm244_temp
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cm244_counmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cm244_sampmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cm244_prepmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cm244_unit
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pu239_240_tot
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pu239_240_tot_unc
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pu239_240_tot_dl
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pu239_240_tot_sal
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pu239_240_tot_temp
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pu239_240_tot_counmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pu239_240_tot_sampmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pu239_240_tot_prepmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: pu239_240_tot_unit
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cm243_244_tot
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cm243_244_tot_unc
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cm243_244_tot_dl
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cm243_244_tot_sal
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cm243_244_tot_temp
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cm243_244_tot_counmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cm243_244_tot_sampmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cm243_244_tot_prepmet
+--------------------------------------------------------------------------------
+Group: seawater, Variable: cm243_244_tot_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: lon
+--------------------------------------------------------------------------------
+Group: sediment, Variable: lat
+--------------------------------------------------------------------------------
+Group: sediment, Variable: smp_depth
+--------------------------------------------------------------------------------
+Group: sediment, Variable: tot_depth
+--------------------------------------------------------------------------------
+Group: sediment, Variable: time
+--------------------------------------------------------------------------------
+Group: sediment, Variable: area
+--------------------------------------------------------------------------------
+Group: sediment, Variable: sed_type
+--------------------------------------------------------------------------------
+Group: sediment, Variable: be7
+--------------------------------------------------------------------------------
+Group: sediment, Variable: be7_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: be7_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: be7_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: be7_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: be7_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: k40
+--------------------------------------------------------------------------------
+Group: sediment, Variable: k40_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: k40_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: k40_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: k40_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: k40_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: mn54
+--------------------------------------------------------------------------------
+Group: sediment, Variable: mn54_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: mn54_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: mn54_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: mn54_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: mn54_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: co58
+--------------------------------------------------------------------------------
+Group: sediment, Variable: co58_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: co58_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: co58_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: co58_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: co58_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: co60
+--------------------------------------------------------------------------------
+Group: sediment, Variable: co60_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: co60_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: co60_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: co60_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: co60_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: zn65
+--------------------------------------------------------------------------------
+Group: sediment, Variable: zn65_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: zn65_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: zn65_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: zn65_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: zn65_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: sr90
+--------------------------------------------------------------------------------
+Group: sediment, Variable: sr90_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: sr90_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: sr90_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: sr90_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: sr90_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: zr95
+--------------------------------------------------------------------------------
+Group: sediment, Variable: zr95_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: zr95_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: zr95_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: zr95_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: zr95_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: nb95
+--------------------------------------------------------------------------------
+Group: sediment, Variable: nb95_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: nb95_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: nb95_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: nb95_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: nb95_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ru103
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ru103_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ru103_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ru103_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ru103_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ru103_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ru106
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ru106_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ru106_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ru106_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ru106_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ru106_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ag110m
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ag110m_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ag110m_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ag110m_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ag110m_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ag110m_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: sb124
+--------------------------------------------------------------------------------
+Group: sediment, Variable: sb124_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: sb124_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: sb124_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: sb124_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: sb124_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: sb125
+--------------------------------------------------------------------------------
+Group: sediment, Variable: sb125_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: sb125_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: sb125_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: sb125_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: sb125_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: cs134
+--------------------------------------------------------------------------------
+Group: sediment, Variable: cs134_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: cs134_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: cs134_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: cs134_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: cs134_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: cs137
+--------------------------------------------------------------------------------
+Group: sediment, Variable: cs137_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: cs137_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: cs137_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: cs137_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: cs137_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ba140
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ba140_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ba140_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ba140_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ba140_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ba140_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ce144
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ce144_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ce144_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ce144_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ce144_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ce144_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: eu155
+--------------------------------------------------------------------------------
+Group: sediment, Variable: eu155_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: eu155_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: eu155_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: eu155_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: eu155_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pb210
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pb210_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pb210_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pb210_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pb210_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pb210_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pb212
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pb212_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pb212_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pb212_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pb212_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pb212_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pb214
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pb214_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pb214_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pb214_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pb214_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pb214_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: bi214
+--------------------------------------------------------------------------------
+Group: sediment, Variable: bi214_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: bi214_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: bi214_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: bi214_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: bi214_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: po210
+--------------------------------------------------------------------------------
+Group: sediment, Variable: po210_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: po210_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: po210_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: po210_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: po210_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ra223
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ra223_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ra223_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ra223_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ra223_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ra223_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ra224
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ra224_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ra224_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ra224_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ra224_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ra224_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ra226
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ra226_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ra226_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ra226_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ra226_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ra226_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ra228
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ra228_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ra228_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ra228_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ra228_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ra228_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ac228
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ac228_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ac228_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ac228_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ac228_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ac228_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: th228
+--------------------------------------------------------------------------------
+Group: sediment, Variable: th228_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: th228_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: th228_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: th228_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: th228_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: th232
+--------------------------------------------------------------------------------
+Group: sediment, Variable: th232_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: th232_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: th232_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: th232_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: th232_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: th234
+--------------------------------------------------------------------------------
+Group: sediment, Variable: th234_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: th234_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: th234_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: th234_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: th234_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: u235
+--------------------------------------------------------------------------------
+Group: sediment, Variable: u235_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: u235_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: u235_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: u235_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: u235_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pu238
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pu238_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pu238_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pu238_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pu238_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pu238_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pu239
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pu239_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pu239_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pu239_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pu239_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pu239_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pu241
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pu241_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pu241_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pu241_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pu241_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pu241_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: am241
+--------------------------------------------------------------------------------
+Group: sediment, Variable: am241_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: am241_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: am241_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: am241_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: am241_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: cs134_137_tot
+--------------------------------------------------------------------------------
+Group: sediment, Variable: cs134_137_tot_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: cs134_137_tot_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: cs134_137_tot_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: cs134_137_tot_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: cs134_137_tot_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pu239_240_tot
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pu239_240_tot_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pu239_240_tot_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pu239_240_tot_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pu239_240_tot_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pu239_240_tot_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: cd109
+--------------------------------------------------------------------------------
+Group: sediment, Variable: cd109_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: cd109_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: cd109_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: cd109_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: cd109_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ir192
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ir192_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ir192_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ir192_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ir192_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: ir192_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pu238_240_tot
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pu238_240_tot_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pu238_240_tot_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pu238_240_tot_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: pu238_240_tot_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: tl208
+--------------------------------------------------------------------------------
+Group: sediment, Variable: tl208_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: tl208_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: tl208_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: tl208_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: tl208_unit
+--------------------------------------------------------------------------------
+Group: sediment, Variable: bi212
+--------------------------------------------------------------------------------
+Group: sediment, Variable: bi212_unc
+--------------------------------------------------------------------------------
+Group: sediment, Variable: bi212_dl
+--------------------------------------------------------------------------------
+Group: sediment, Variable: bi212_sampmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: bi212_prepmet
+--------------------------------------------------------------------------------
+Group: sediment, Variable: bi212_unit
+
+Get the current working directory (cwd). .
+ +++ ++netcdf4_to_df (fname_in)
+ | lon | +lat | +smp_depth | +tot_depth | +time | +h3 | +h3_unc | +h3_dl | +h3_sal | +h3_temp | +... | +pu239_240_tot_dl | +pu239_240_tot_sal | +pu239_240_tot_temp | +pu239_240_tot_unit | +cm243_244_tot | +cm243_244_tot_unc | +cm243_244_tot_dl | +cm243_244_tot_sal | +cm243_244_tot_temp | +cm243_244_tot_unit | +
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
sample | ++ | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + |
0 | +14.257800 | +53.942200 | +0.0 | +10.0 | +1339545600 | +NaN | +NaN | +-1 | +NaN | +NaN | +... | +-1 | +NaN | +NaN | +-1 | +NaN | +NaN | +-1 | +NaN | +NaN | +-1 | +
1 | +14.257800 | +53.942200 | +8.0 | +10.0 | +1339545600 | +NaN | +NaN | +-1 | +NaN | +NaN | +... | +-1 | +NaN | +NaN | +-1 | +NaN | +NaN | +-1 | +NaN | +NaN | +-1 | +
2 | +14.257800 | +53.942200 | +0.0 | +10.0 | +1339545600 | +NaN | +NaN | +-1 | +NaN | +NaN | +... | +-1 | +NaN | +NaN | +-1 | +NaN | +NaN | +-1 | +NaN | +NaN | +-1 | +
3 | +14.257800 | +53.942200 | +8.0 | +10.0 | +1339545600 | +NaN | +NaN | +-1 | +NaN | +NaN | +... | +-1 | +NaN | +NaN | +-1 | +NaN | +NaN | +-1 | +NaN | +NaN | +-1 | +
4 | +14.257800 | +53.942200 | +0.0 | +9.0 | +1370390400 | +NaN | +NaN | +-1 | +NaN | +NaN | +... | +-1 | +NaN | +NaN | +-1 | +NaN | +NaN | +-1 | +NaN | +NaN | +-1 | +
... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +
20237 | +24.334999 | +65.634697 | +0.0 | +17.0 | +773971200 | +NaN | +NaN | +-1 | +NaN | +NaN | +... | +-1 | +NaN | +NaN | +-1 | +NaN | +NaN | +-1 | +NaN | +NaN | +-1 | +
20238 | +24.334999 | +65.634697 | +0.0 | +17.0 | +773971200 | +NaN | +NaN | +-1 | +NaN | +NaN | +... | +-1 | +NaN | +NaN | +-1 | +NaN | +NaN | +-1 | +NaN | +NaN | +-1 | +
20239 | +24.334999 | +65.634697 | +0.0 | +17.0 | +773971200 | +NaN | +NaN | +-1 | +NaN | +NaN | +... | +-1 | +NaN | +NaN | +-1 | +NaN | +NaN | +-1 | +NaN | +NaN | +-1 | +
20240 | +24.334999 | +65.634697 | +0.0 | +17.0 | +841190400 | +NaN | +NaN | +-1 | +NaN | +NaN | +... | +-1 | +NaN | +NaN | +-1 | +NaN | +NaN | +-1 | +NaN | +NaN | +-1 | +
20241 | +24.334999 | +65.634697 | +0.0 | +17.0 | +841190400 | +NaN | +NaN | +-1 | +NaN | +NaN | +... | +-1 | +NaN | +NaN | +-1 | +NaN | +NaN | +-1 | +NaN | +NaN | +-1 | +
20242 rows × 175 columns
++ | lon | +lat | +smp_depth | +time | +bio_group | +species | +body_part | +be7 | +be7_unc | +be7_dl | +... | +sn113_dl | +sn113_unit | +sn117m | +sn117m_unc | +sn117m_dl | +sn117m_unit | +tl208 | +tl208_unc | +tl208_dl | +tl208_unit | +
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
sample | ++ | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + |
0 | +14.300000 | +53.500000 | +NaN | +1443052800 | +4 | +247 | +52 | +NaN | +NaN | +-1 | +... | +-1 | +-1 | +NaN | +NaN | +-1 | +-1 | +NaN | +NaN | +-1 | +-1 | +
1 | +14.300000 | +53.500000 | +NaN | +1443052800 | +4 | +247 | +52 | +NaN | +NaN | +-1 | +... | +-1 | +-1 | +NaN | +NaN | +-1 | +-1 | +NaN | +NaN | +-1 | +-1 | +
2 | +14.300000 | +53.500000 | +NaN | +1443052800 | +4 | +247 | +52 | +NaN | +NaN | +-1 | +... | +-1 | +-1 | +NaN | +NaN | +-1 | +-1 | +NaN | +NaN | +-1 | +-1 | +
3 | +14.300000 | +53.500000 | +NaN | +1443052800 | +4 | +247 | +52 | +NaN | +NaN | +-1 | +... | +-1 | +-1 | +NaN | +NaN | +-1 | +-1 | +NaN | +NaN | +-1 | +-1 | +
4 | +14.300000 | +53.500000 | +NaN | +1443052800 | +4 | +247 | +52 | +NaN | +NaN | +-1 | +... | +-1 | +-1 | +NaN | +NaN | +-1 | +-1 | +NaN | +NaN | +-1 | +-1 | +
... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +
14868 | +23.000000 | +65.716698 | +0.0 | +1127606400 | +4 | +50 | +52 | +NaN | +NaN | +-1 | +... | +-1 | +-1 | +NaN | +NaN | +-1 | +-1 | +NaN | +NaN | +-1 | +-1 | +
14869 | +23.049999 | +65.716698 | +0.0 | +1064016000 | +4 | +50 | +52 | +NaN | +NaN | +-1 | +... | +-1 | +-1 | +NaN | +NaN | +-1 | +-1 | +NaN | +NaN | +-1 | +-1 | +
14870 | +23.049999 | +65.716698 | +0.0 | +1064016000 | +4 | +50 | +52 | +NaN | +NaN | +-1 | +... | +-1 | +-1 | +NaN | +NaN | +-1 | +-1 | +NaN | +NaN | +-1 | +-1 | +
14871 | +23.000000 | +65.750000 | +0.0 | +1284940800 | +4 | +50 | +52 | +NaN | +NaN | +-1 | +... | +-1 | +-1 | +NaN | +NaN | +-1 | +-1 | +NaN | +NaN | +-1 | +-1 | +
14872 | +23.000000 | +65.750000 | +0.0 | +1284940800 | +4 | +50 | +52 | +NaN | +NaN | +-1 | +... | +-1 | +-1 | +NaN | +NaN | +-1 | +-1 | +NaN | +NaN | +-1 | +-1 | +
14873 rows × 211 columns
++++ReshapeWideToLong (columns='nuclide', values=['value'])
Convert data from wide to long with renamed columns.
+{'seawater': lon lat smp_depth tot_depth time sample \
+ 0 14.199800 54.006001 0.0 11.0 1497744000 78
+ 1 14.202300 54.006199 0.0 12.0 1339632000 83
+ 2 14.199500 54.006302 0.0 12.0 1402876800 86
+ 3 14.201000 54.006500 0.0 12.0 1278460800 91
+ 4 14.200500 54.006668 0.0 12.0 1309910400 101
+ ... ... ... ... ... ... ...
+ 20237 20.030001 57.343300 0.0 236.0 524620800 14175
+ 20238 21.500000 59.433300 0.0 156.0 555033600 15712
+ 20239 21.525801 59.439800 0.0 160.0 585446400 15840
+ 20240 21.525801 59.439800 150.0 160.0 585446400 15847
+ 20241 23.555000 65.239998 0.0 73.0 587001600 20130
+
+ nuclide value _unc _dl _sal _temp _unit
+ 0 h3 850.0000 59.669998 1 7.50 NaN 1
+ 1 h3 970.0000 29.100000 1 6.77 NaN 1
+ 2 h3 910.0000 24.570000 1 6.80 NaN 1
+ 3 h3 1070.0000 21.400000 1 5.82 NaN 1
+ 4 h3 1020.0000 20.400000 1 5.40 NaN 1
+ ... ... ... ... ... ... ... ...
+ 20237 cm243_244_tot 0.0064 0.001280 1 6.90 NaN 1
+ 20238 cm243_244_tot 0.0045 0.000900 1 6.75 NaN 1
+ 20239 cm243_244_tot 0.0022 0.000660 1 5.83 20.4 1
+ 20240 cm243_244_tot 0.0064 0.001920 1 9.77 3.9 1
+ 20241 cm243_244_tot 0.0039 0.001170 1 3.10 15.6 1
+
+ [20242 rows x 13 columns],
+ 'sediment': lon lat tot_depth time sed_type sample nuclide \
+ 0 10.850000 54.049999 22.0 866592000 58 842 be7
+ 1 10.203300 54.415001 13.0 811641600 58 4064 be7
+ 2 10.203300 54.415001 13.0 811641600 58 4069 be7
+ 3 10.203300 54.415001 13.0 811641600 58 4074 be7
+ 4 11.750000 54.416698 24.0 838512000 58 4535 be7
+ ... ... ... ... ... ... ... ...
+ 37084 23.391199 65.277496 90.0 1283299200 2 37039 bi212
+ 37085 23.391199 65.277496 90.0 1283299200 2 37048 bi212
+ 37086 23.391199 65.277496 90.0 1283299200 2 37065 bi212
+ 37087 23.391199 65.277496 90.0 1283299200 2 37074 bi212
+ 37088 23.391199 65.277496 90.0 1283299200 2 37083 bi212
+
+ value _unc _dl _unit
+ 0 24.299999 7.7760 1 4
+ 1 45.500000 4.5500 1 4
+ 2 7.000000 NaN 2 4
+ 3 4.800000 NaN 2 4
+ 4 6.900000 1.9320 1 4
+ ... ... ... ... ...
+ 37084 42.900002 6.1347 1 4
+ 37085 58.400002 6.1904 1 4
+ 37086 51.400002 5.9624 1 4
+ 37087 41.799999 5.4758 1 4
+ 37088 43.700001 3.4523 1 4
+
+ [37089 rows x 11 columns],
+ 'biota': lon lat smp_depth time bio_group species \
+ 0 11.5000 54.080002 0.0 908755200 11 96
+ 1 11.5000 54.080002 0.0 900547200 11 96
+ 2 11.5000 54.080002 0.0 874540800 14 129
+ 3 11.5000 54.080002 0.0 865900800 14 129
+ 4 11.5000 54.080002 0.0 874886400 11 96
+ ... ... ... ... ... ... ...
+ 14868 12.0742 57.335201 0.0 1253145600 11 96
+ 14869 12.0742 57.335201 0.0 1225670400 11 96
+ 14870 12.0742 57.335201 0.0 1160352000 11 96
+ 14871 12.0742 57.335201 0.0 1380240000 11 96
+ 14872 12.0742 57.335201 0.0 1409788800 11 96
+
+ body_part sample nuclide value _unc _dl _unit
+ 0 54 150 be7 46.500 1.813500 1 4
+ 1 54 159 be7 66.500 6.317500 1 4
+ 2 1 168 be7 5.430 1.574700 1 4
+ 3 1 177 be7 13.700 4.384000 1 4
+ 4 54 183 be7 11.300 0.000000 2 4
+ ... ... ... ... ... ... ... ...
+ 14868 54 11586 tl208 0.880 0.079200 1 4
+ 14869 54 11598 tl208 0.770 0.069300 1 4
+ 14870 54 11620 tl208 1.310 0.142790 1 4
+ 14871 54 11766 tl208 0.668 0.057448 1 4
+ 14872 54 11775 tl208 0.684 0.072504 1 4
+
+ [14873 rows x 13 columns]}
++ | lon | +lat | +tot_depth | +time | +sed_type | +sample | +nuclide | +value | +_unc | +_dl | +_unit | +
---|---|---|---|---|---|---|---|---|---|---|---|
0 | +10.850000 | +54.049999 | +22.0 | +866592000 | +58 | +842 | +be7 | +24.299999 | +7.7760 | +1 | +4 | +
1 | +10.203300 | +54.415001 | +13.0 | +811641600 | +58 | +4064 | +be7 | +45.500000 | +4.5500 | +1 | +4 | +
2 | +10.203300 | +54.415001 | +13.0 | +811641600 | +58 | +4069 | +be7 | +7.000000 | +NaN | +2 | +4 | +
3 | +10.203300 | +54.415001 | +13.0 | +811641600 | +58 | +4074 | +be7 | +4.800000 | +NaN | +2 | +4 | +
4 | +11.750000 | +54.416698 | +24.0 | +838512000 | +58 | +4535 | +be7 | +6.900000 | +1.9320 | +1 | +4 | +
... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +
37084 | +23.391199 | +65.277496 | +90.0 | +1283299200 | +2 | +37039 | +bi212 | +42.900002 | +6.1347 | +1 | +4 | +
37085 | +23.391199 | +65.277496 | +90.0 | +1283299200 | +2 | +37048 | +bi212 | +58.400002 | +6.1904 | +1 | +4 | +
37086 | +23.391199 | +65.277496 | +90.0 | +1283299200 | +2 | +37065 | +bi212 | +51.400002 | +5.9624 | +1 | +4 | +
37087 | +23.391199 | +65.277496 | +90.0 | +1283299200 | +2 | +37074 | +bi212 | +41.799999 | +5.4758 | +1 | +4 | +
37088 | +23.391199 | +65.277496 | +90.0 | +1283299200 | +2 | +37083 | +bi212 | +43.700001 | +3.4523 | +1 | +4 | +
37089 rows × 11 columns
++++LookupTimeFromEncodedTime (cfg)
Base class for callbacks.
+dfs = netcdf4_to_df(fname_in)
+tfm = Transformer(dfs, cbs=[ReshapeWideToLong(),
+ LookupTimeFromEncodedTime(cfg())])
+tfm()
{'seawater': lon lat smp_depth tot_depth time sample \
+ 0 14.199800 54.006001 0.0 11.0 1497744000 78
+ 1 14.202300 54.006199 0.0 12.0 1339632000 83
+ 2 14.199500 54.006302 0.0 12.0 1402876800 86
+ 3 14.201000 54.006500 0.0 12.0 1278460800 91
+ 4 14.200500 54.006668 0.0 12.0 1309910400 101
+ ... ... ... ... ... ... ...
+ 20237 20.030001 57.343300 0.0 236.0 524620800 14175
+ 20238 21.500000 59.433300 0.0 156.0 555033600 15712
+ 20239 21.525801 59.439800 0.0 160.0 585446400 15840
+ 20240 21.525801 59.439800 150.0 160.0 585446400 15847
+ 20241 23.555000 65.239998 0.0 73.0 587001600 20130
+
+ nuclide value _unc _dl _sal _temp _unit \
+ 0 h3 850.0000 59.669998 1 7.50 NaN 1
+ 1 h3 970.0000 29.100000 1 6.77 NaN 1
+ 2 h3 910.0000 24.570000 1 6.80 NaN 1
+ 3 h3 1070.0000 21.400000 1 5.82 NaN 1
+ 4 h3 1020.0000 20.400000 1 5.40 NaN 1
+ ... ... ... ... ... ... ... ...
+ 20237 cm243_244_tot 0.0064 0.001280 1 6.90 NaN 1
+ 20238 cm243_244_tot 0.0045 0.000900 1 6.75 NaN 1
+ 20239 cm243_244_tot 0.0022 0.000660 1 5.83 20.4 1
+ 20240 cm243_244_tot 0.0064 0.001920 1 9.77 3.9 1
+ 20241 cm243_244_tot 0.0039 0.001170 1 3.10 15.6 1
+
+ Sampling start date Sampling start time
+ 0 18-Jun-2017 00:00:00
+ 1 14-Jun-2012 00:00:00
+ 2 16-Jun-2014 00:00:00
+ 3 07-Jul-2010 00:00:00
+ 4 06-Jul-2011 00:00:00
+ ... ... ...
+ 20237 17-Aug-1986 00:00:00
+ 20238 04-Aug-1987 00:00:00
+ 20239 21-Jul-1988 00:00:00
+ 20240 21-Jul-1988 00:00:00
+ 20241 08-Aug-1988 00:00:00
+
+ [20242 rows x 15 columns],
+ 'sediment': lon lat tot_depth time sed_type sample nuclide \
+ 0 10.850000 54.049999 22.0 866592000 58 842 be7
+ 1 10.203300 54.415001 13.0 811641600 58 4064 be7
+ 2 10.203300 54.415001 13.0 811641600 58 4069 be7
+ 3 10.203300 54.415001 13.0 811641600 58 4074 be7
+ 4 11.750000 54.416698 24.0 838512000 58 4535 be7
+ ... ... ... ... ... ... ... ...
+ 37084 23.391199 65.277496 90.0 1283299200 2 37039 bi212
+ 37085 23.391199 65.277496 90.0 1283299200 2 37048 bi212
+ 37086 23.391199 65.277496 90.0 1283299200 2 37065 bi212
+ 37087 23.391199 65.277496 90.0 1283299200 2 37074 bi212
+ 37088 23.391199 65.277496 90.0 1283299200 2 37083 bi212
+
+ value _unc _dl _unit Sampling start date Sampling start time
+ 0 24.299999 7.7760 1 4 18-Jun-1997 00:00:00
+ 1 45.500000 4.5500 1 4 21-Sep-1995 00:00:00
+ 2 7.000000 NaN 2 4 21-Sep-1995 00:00:00
+ 3 4.800000 NaN 2 4 21-Sep-1995 00:00:00
+ 4 6.900000 1.9320 1 4 28-Jul-1996 00:00:00
+ ... ... ... ... ... ... ...
+ 37084 42.900002 6.1347 1 4 01-Sep-2010 00:00:00
+ 37085 58.400002 6.1904 1 4 01-Sep-2010 00:00:00
+ 37086 51.400002 5.9624 1 4 01-Sep-2010 00:00:00
+ 37087 41.799999 5.4758 1 4 01-Sep-2010 00:00:00
+ 37088 43.700001 3.4523 1 4 01-Sep-2010 00:00:00
+
+ [37089 rows x 13 columns],
+ 'biota': lon lat smp_depth time bio_group species \
+ 0 11.5000 54.080002 0.0 908755200 11 96
+ 1 11.5000 54.080002 0.0 900547200 11 96
+ 2 11.5000 54.080002 0.0 874540800 14 129
+ 3 11.5000 54.080002 0.0 865900800 14 129
+ 4 11.5000 54.080002 0.0 874886400 11 96
+ ... ... ... ... ... ... ...
+ 14868 12.0742 57.335201 0.0 1253145600 11 96
+ 14869 12.0742 57.335201 0.0 1225670400 11 96
+ 14870 12.0742 57.335201 0.0 1160352000 11 96
+ 14871 12.0742 57.335201 0.0 1380240000 11 96
+ 14872 12.0742 57.335201 0.0 1409788800 11 96
+
+ body_part sample nuclide value _unc _dl _unit \
+ 0 54 150 be7 46.500 1.813500 1 4
+ 1 54 159 be7 66.500 6.317500 1 4
+ 2 1 168 be7 5.430 1.574700 1 4
+ 3 1 177 be7 13.700 4.384000 1 4
+ 4 54 183 be7 11.300 0.000000 2 4
+ ... ... ... ... ... ... ... ...
+ 14868 54 11586 tl208 0.880 0.079200 1 4
+ 14869 54 11598 tl208 0.770 0.069300 1 4
+ 14870 54 11620 tl208 1.310 0.142790 1 4
+ 14871 54 11766 tl208 0.668 0.057448 1 4
+ 14872 54 11775 tl208 0.684 0.072504 1 4
+
+ Sampling start date Sampling start time
+ 0 19-Oct-1998 00:00:00
+ 1 16-Jul-1998 00:00:00
+ 2 18-Sep-1997 00:00:00
+ 3 10-Jun-1997 00:00:00
+ 4 22-Sep-1997 00:00:00
+ ... ... ...
+ 14868 17-Sep-2009 00:00:00
+ 14869 03-Nov-2008 00:00:00
+ 14870 09-Oct-2006 00:00:00
+ 14871 27-Sep-2013 00:00:00
+ 14872 04-Sep-2014 00:00:00
+
+ [14873 rows x 15 columns]}
+0 18-Jun-2017
+1 14-Jun-2012
+2 16-Jun-2014
+3 07-Jul-2010
+4 06-Jul-2011
+ ...
+20237 17-Aug-1986
+20238 04-Aug-1987
+20239 21-Jul-1988
+20240 21-Jul-1988
+20241 08-Aug-1988
+Name: Sampling start date, Length: 20242, dtype: object
++++GetSampleTypeCB ()
Base class for callbacks.
+dfs = netcdf4_to_df(fname_in)
+tfm = Transformer(dfs, cbs=[ReshapeWideToLong(),
+ LookupTimeFromEncodedTime(cfg()),
+ GetSampleTypeCB()])
+tfm()['biota']['Sample type']
0 BIOTA
+1 BIOTA
+2 BIOTA
+3 BIOTA
+4 BIOTA
+ ...
+14868 BIOTA
+14869 BIOTA
+14870 BIOTA
+14871 BIOTA
+14872 BIOTA
+Name: Sample type, Length: 14873, dtype: object
++++get_nucnames_lut ()
+++LookupNuclideByIdCB (fn_lut=<function get_nucnames_lut>)
Lookup MARIS nuclide_id.
+dfs = netcdf4_to_df(fname_in)
+tfm = Transformer(dfs, cbs=[ReshapeWideToLong(),
+ LookupTimeFromEncodedTime(cfg()),
+ GetSampleTypeCB(),
+ LookupNuclideByIdCB(),
+ ])
+tfm()
{'seawater': lon lat smp_depth tot_depth time sample \
+ 0 14.199800 54.006001 0.0 11.0 1497744000 78
+ 1 14.202300 54.006199 0.0 12.0 1339632000 83
+ 2 14.199500 54.006302 0.0 12.0 1402876800 86
+ 3 14.201000 54.006500 0.0 12.0 1278460800 91
+ 4 14.200500 54.006668 0.0 12.0 1309910400 101
+ ... ... ... ... ... ... ...
+ 20237 20.030001 57.343300 0.0 236.0 524620800 14175
+ 20238 21.500000 59.433300 0.0 156.0 555033600 15712
+ 20239 21.525801 59.439800 0.0 160.0 585446400 15840
+ 20240 21.525801 59.439800 150.0 160.0 585446400 15847
+ 20241 23.555000 65.239998 0.0 73.0 587001600 20130
+
+ nuclide value _unc _dl _sal _temp _unit \
+ 0 h3 850.0000 59.669998 1 7.50 NaN 1
+ 1 h3 970.0000 29.100000 1 6.77 NaN 1
+ 2 h3 910.0000 24.570000 1 6.80 NaN 1
+ 3 h3 1070.0000 21.400000 1 5.82 NaN 1
+ 4 h3 1020.0000 20.400000 1 5.40 NaN 1
+ ... ... ... ... ... ... ... ...
+ 20237 cm243_244_tot 0.0064 0.001280 1 6.90 NaN 1
+ 20238 cm243_244_tot 0.0045 0.000900 1 6.75 NaN 1
+ 20239 cm243_244_tot 0.0022 0.000660 1 5.83 20.4 1
+ 20240 cm243_244_tot 0.0064 0.001920 1 9.77 3.9 1
+ 20241 cm243_244_tot 0.0039 0.001170 1 3.10 15.6 1
+
+ Sampling start date Sampling start time Sample type Nuclide
+ 0 18-Jun-2017 00:00:00 SEAWATER 3H
+ 1 14-Jun-2012 00:00:00 SEAWATER 3H
+ 2 16-Jun-2014 00:00:00 SEAWATER 3H
+ 3 07-Jul-2010 00:00:00 SEAWATER 3H
+ 4 06-Jul-2011 00:00:00 SEAWATER 3H
+ ... ... ... ... ...
+ 20237 17-Aug-1986 00:00:00 SEAWATER 243_244Cm
+ 20238 04-Aug-1987 00:00:00 SEAWATER 243_244Cm
+ 20239 21-Jul-1988 00:00:00 SEAWATER 243_244Cm
+ 20240 21-Jul-1988 00:00:00 SEAWATER 243_244Cm
+ 20241 08-Aug-1988 00:00:00 SEAWATER 243_244Cm
+
+ [20242 rows x 17 columns],
+ 'sediment': lon lat tot_depth time sed_type sample nuclide \
+ 0 10.850000 54.049999 22.0 866592000 58 842 be7
+ 1 10.203300 54.415001 13.0 811641600 58 4064 be7
+ 2 10.203300 54.415001 13.0 811641600 58 4069 be7
+ 3 10.203300 54.415001 13.0 811641600 58 4074 be7
+ 4 11.750000 54.416698 24.0 838512000 58 4535 be7
+ ... ... ... ... ... ... ... ...
+ 37084 23.391199 65.277496 90.0 1283299200 2 37039 bi212
+ 37085 23.391199 65.277496 90.0 1283299200 2 37048 bi212
+ 37086 23.391199 65.277496 90.0 1283299200 2 37065 bi212
+ 37087 23.391199 65.277496 90.0 1283299200 2 37074 bi212
+ 37088 23.391199 65.277496 90.0 1283299200 2 37083 bi212
+
+ value _unc _dl _unit Sampling start date Sampling start time \
+ 0 24.299999 7.7760 1 4 18-Jun-1997 00:00:00
+ 1 45.500000 4.5500 1 4 21-Sep-1995 00:00:00
+ 2 7.000000 NaN 2 4 21-Sep-1995 00:00:00
+ 3 4.800000 NaN 2 4 21-Sep-1995 00:00:00
+ 4 6.900000 1.9320 1 4 28-Jul-1996 00:00:00
+ ... ... ... ... ... ... ...
+ 37084 42.900002 6.1347 1 4 01-Sep-2010 00:00:00
+ 37085 58.400002 6.1904 1 4 01-Sep-2010 00:00:00
+ 37086 51.400002 5.9624 1 4 01-Sep-2010 00:00:00
+ 37087 41.799999 5.4758 1 4 01-Sep-2010 00:00:00
+ 37088 43.700001 3.4523 1 4 01-Sep-2010 00:00:00
+
+ Sample type Nuclide
+ 0 SEDIMENT 7Be
+ 1 SEDIMENT 7Be
+ 2 SEDIMENT 7Be
+ 3 SEDIMENT 7Be
+ 4 SEDIMENT 7Be
+ ... ... ...
+ 37084 SEDIMENT 212Bi
+ 37085 SEDIMENT 212Bi
+ 37086 SEDIMENT 212Bi
+ 37087 SEDIMENT 212Bi
+ 37088 SEDIMENT 212Bi
+
+ [37089 rows x 15 columns],
+ 'biota': lon lat smp_depth time bio_group species \
+ 0 11.5000 54.080002 0.0 908755200 11 96
+ 1 11.5000 54.080002 0.0 900547200 11 96
+ 2 11.5000 54.080002 0.0 874540800 14 129
+ 3 11.5000 54.080002 0.0 865900800 14 129
+ 4 11.5000 54.080002 0.0 874886400 11 96
+ ... ... ... ... ... ... ...
+ 14868 12.0742 57.335201 0.0 1253145600 11 96
+ 14869 12.0742 57.335201 0.0 1225670400 11 96
+ 14870 12.0742 57.335201 0.0 1160352000 11 96
+ 14871 12.0742 57.335201 0.0 1380240000 11 96
+ 14872 12.0742 57.335201 0.0 1409788800 11 96
+
+ body_part sample nuclide value _unc _dl _unit \
+ 0 54 150 be7 46.500 1.813500 1 4
+ 1 54 159 be7 66.500 6.317500 1 4
+ 2 1 168 be7 5.430 1.574700 1 4
+ 3 1 177 be7 13.700 4.384000 1 4
+ 4 54 183 be7 11.300 0.000000 2 4
+ ... ... ... ... ... ... ... ...
+ 14868 54 11586 tl208 0.880 0.079200 1 4
+ 14869 54 11598 tl208 0.770 0.069300 1 4
+ 14870 54 11620 tl208 1.310 0.142790 1 4
+ 14871 54 11766 tl208 0.668 0.057448 1 4
+ 14872 54 11775 tl208 0.684 0.072504 1 4
+
+ Sampling start date Sampling start time Sample type Nuclide
+ 0 19-Oct-1998 00:00:00 BIOTA 7Be
+ 1 16-Jul-1998 00:00:00 BIOTA 7Be
+ 2 18-Sep-1997 00:00:00 BIOTA 7Be
+ 3 10-Jun-1997 00:00:00 BIOTA 7Be
+ 4 22-Sep-1997 00:00:00 BIOTA 7Be
+ ... ... ... ... ...
+ 14868 17-Sep-2009 00:00:00 BIOTA 208Tl
+ 14869 03-Nov-2008 00:00:00 BIOTA 208Tl
+ 14870 09-Oct-2006 00:00:00 BIOTA 208Tl
+ 14871 27-Sep-2013 00:00:00 BIOTA 208Tl
+ 14872 04-Sep-2014 00:00:00 BIOTA 208Tl
+
+ [14873 rows x 17 columns]}
+array(['7Be', '40K', '54Mn', '57Co', '58Co', '60Co', '65Zn', '89Sr',
+ '90Sr', '95Zr', '95Nb', '99Tc', '103Ru', '106Ru', '108mAg',
+ '110mAg', '124Sb', '125Sb', '129mTe', '131I', '134Cs', '137Cs',
+ '140Ba', '140La', '141Ce', '144Ce', '155Eu', '210Pb', '212Pb',
+ '214Pb', '214Bi', '210Po', '223Ra', '224Ra', '226Ra', '228Ra',
+ '228Ac', '228Th', '232Th', '235U', '238Pu', '241Am', '134_137Cs',
+ '239_240Pu', '152Eu', '59Fe', '153Gd', '86Rb', '46Sc', '113Sn',
+ '117mSn', '208Tl'], dtype=object)
+Convert from Longitude and Latitude DDD.DDDDD° to degrees, minutes, seconds and direction.
++++deg_to_dms (deg, coordinate='lat')
Convert from decimal degrees to degrees, minutes, seconds.
++++ConvertLonLatCB (fn_convert=<function deg_to_dms>)
Convert from Longitude and Latitude DDD.DDDDD° to degrees, minutes, seconds and direction.
+dfs = netcdf4_to_df(fname_in)
+tfm = Transformer(dfs, cbs=[ReshapeWideToLong(),
+ LookupTimeFromEncodedTime(cfg()),
+ GetSampleTypeCB(),
+ LookupNuclideByIdCB(),
+ ConvertLonLatCB()
+ ])
+tfm()
{'seawater': lon lat smp_depth tot_depth time sample \
+ 0 14.199800 54.006001 0.0 11.0 1497744000 78
+ 1 14.202300 54.006199 0.0 12.0 1339632000 83
+ 2 14.199500 54.006302 0.0 12.0 1402876800 86
+ 3 14.201000 54.006500 0.0 12.0 1278460800 91
+ 4 14.200500 54.006668 0.0 12.0 1309910400 101
+ ... ... ... ... ... ... ...
+ 20237 20.030001 57.343300 0.0 236.0 524620800 14175
+ 20238 21.500000 59.433300 0.0 156.0 555033600 15712
+ 20239 21.525801 59.439800 0.0 160.0 585446400 15840
+ 20240 21.525801 59.439800 150.0 160.0 585446400 15847
+ 20241 23.555000 65.239998 0.0 73.0 587001600 20130
+
+ nuclide value _unc _dl ... Sample type Nuclide \
+ 0 h3 850.0000 59.669998 1 ... SEAWATER 3H
+ 1 h3 970.0000 29.100000 1 ... SEAWATER 3H
+ 2 h3 910.0000 24.570000 1 ... SEAWATER 3H
+ 3 h3 1070.0000 21.400000 1 ... SEAWATER 3H
+ 4 h3 1020.0000 20.400000 1 ... SEAWATER 3H
+ ... ... ... ... ... ... ... ...
+ 20237 cm243_244_tot 0.0064 0.001280 1 ... SEAWATER 243_244Cm
+ 20238 cm243_244_tot 0.0045 0.000900 1 ... SEAWATER 243_244Cm
+ 20239 cm243_244_tot 0.0022 0.000660 1 ... SEAWATER 243_244Cm
+ 20240 cm243_244_tot 0.0064 0.001920 1 ... SEAWATER 243_244Cm
+ 20241 cm243_244_tot 0.0039 0.001170 1 ... SEAWATER 243_244Cm
+
+ Latitude degrees Latitude minutes Latitude seconds Latitude direction \
+ 0 54 0 21.601868 N
+ 1 54 0 22.315979 N
+ 2 54 0 22.686768 N
+ 3 54 0 23.400879 N
+ 4 54 0 24.005127 N
+ ... ... ... ... ...
+ 20237 57 20 35.879517 N
+ 20238 59 25 59.880066 N
+ 20239 59 26 23.280945 N
+ 20240 59 26 23.280945 N
+ 20241 65 14 23.992310 N
+
+ Longitude degrees Longitude minutes Longitude seconds \
+ 0 14 11 59.278336
+ 1 14 12 8.280258
+ 2 14 11 58.200302
+ 3 14 12 3.600769
+ 4 14 12 1.798325
+ ... ... ... ...
+ 20237 20 1 48.002472
+ 20238 21 30 0.000000
+ 20239 21 31 32.882538
+ 20240 21 31 32.882538
+ 20241 23 33 18.001099
+
+ Longitude direction
+ 0 E
+ 1 E
+ 2 E
+ 3 E
+ 4 E
+ ... ...
+ 20237 E
+ 20238 E
+ 20239 E
+ 20240 E
+ 20241 E
+
+ [20242 rows x 25 columns],
+ 'sediment': lon lat tot_depth time sed_type sample nuclide \
+ 0 10.850000 54.049999 22.0 866592000 58 842 be7
+ 1 10.203300 54.415001 13.0 811641600 58 4064 be7
+ 2 10.203300 54.415001 13.0 811641600 58 4069 be7
+ 3 10.203300 54.415001 13.0 811641600 58 4074 be7
+ 4 11.750000 54.416698 24.0 838512000 58 4535 be7
+ ... ... ... ... ... ... ... ...
+ 37084 23.391199 65.277496 90.0 1283299200 2 37039 bi212
+ 37085 23.391199 65.277496 90.0 1283299200 2 37048 bi212
+ 37086 23.391199 65.277496 90.0 1283299200 2 37065 bi212
+ 37087 23.391199 65.277496 90.0 1283299200 2 37074 bi212
+ 37088 23.391199 65.277496 90.0 1283299200 2 37083 bi212
+
+ value _unc _dl ... Sample type Nuclide Latitude degrees \
+ 0 24.299999 7.7760 1 ... SEDIMENT 7Be 54
+ 1 45.500000 4.5500 1 ... SEDIMENT 7Be 54
+ 2 7.000000 NaN 2 ... SEDIMENT 7Be 54
+ 3 4.800000 NaN 2 ... SEDIMENT 7Be 54
+ 4 6.900000 1.9320 1 ... SEDIMENT 7Be 54
+ ... ... ... ... ... ... ... ...
+ 37084 42.900002 6.1347 1 ... SEDIMENT 212Bi 65
+ 37085 58.400002 6.1904 1 ... SEDIMENT 212Bi 65
+ 37086 51.400002 5.9624 1 ... SEDIMENT 212Bi 65
+ 37087 41.799999 5.4758 1 ... SEDIMENT 212Bi 65
+ 37088 43.700001 3.4523 1 ... SEDIMENT 212Bi 65
+
+ Latitude minutes Latitude seconds Latitude direction \
+ 0 2 59.997253 N
+ 1 24 54.003296 N
+ 2 24 54.003296 N
+ 3 24 54.003296 N
+ 4 25 0.114441 N
+ ... ... ... ...
+ 37084 16 38.986816 N
+ 37085 16 38.986816 N
+ 37086 16 38.986816 N
+ 37087 16 38.986816 N
+ 37088 16 38.986816 N
+
+ Longitude degrees Longitude minutes Longitude seconds \
+ 0 10 51 0.001373
+ 1 10 12 11.881714
+ 2 10 12 11.881714
+ 3 10 12 11.881714
+ 4 11 45 0.000000
+ ... ... ... ...
+ 37084 23 23 28.316803
+ 37085 23 23 28.316803
+ 37086 23 23 28.316803
+ 37087 23 23 28.316803
+ 37088 23 23 28.316803
+
+ Longitude direction
+ 0 E
+ 1 E
+ 2 E
+ 3 E
+ 4 E
+ ... ...
+ 37084 E
+ 37085 E
+ 37086 E
+ 37087 E
+ 37088 E
+
+ [37089 rows x 23 columns],
+ 'biota': lon lat smp_depth time bio_group species \
+ 0 11.5000 54.080002 0.0 908755200 11 96
+ 1 11.5000 54.080002 0.0 900547200 11 96
+ 2 11.5000 54.080002 0.0 874540800 14 129
+ 3 11.5000 54.080002 0.0 865900800 14 129
+ 4 11.5000 54.080002 0.0 874886400 11 96
+ ... ... ... ... ... ... ...
+ 14868 12.0742 57.335201 0.0 1253145600 11 96
+ 14869 12.0742 57.335201 0.0 1225670400 11 96
+ 14870 12.0742 57.335201 0.0 1160352000 11 96
+ 14871 12.0742 57.335201 0.0 1380240000 11 96
+ 14872 12.0742 57.335201 0.0 1409788800 11 96
+
+ body_part sample nuclide value ... Sample type Nuclide \
+ 0 54 150 be7 46.500 ... BIOTA 7Be
+ 1 54 159 be7 66.500 ... BIOTA 7Be
+ 2 1 168 be7 5.430 ... BIOTA 7Be
+ 3 1 177 be7 13.700 ... BIOTA 7Be
+ 4 54 183 be7 11.300 ... BIOTA 7Be
+ ... ... ... ... ... ... ... ...
+ 14868 54 11586 tl208 0.880 ... BIOTA 208Tl
+ 14869 54 11598 tl208 0.770 ... BIOTA 208Tl
+ 14870 54 11620 tl208 1.310 ... BIOTA 208Tl
+ 14871 54 11766 tl208 0.668 ... BIOTA 208Tl
+ 14872 54 11775 tl208 0.684 ... BIOTA 208Tl
+
+ Latitude degrees Latitude minutes Latitude seconds Latitude direction \
+ 0 54 4 48.006592 N
+ 1 54 4 48.006592 N
+ 2 54 4 48.006592 N
+ 3 54 4 48.006592 N
+ 4 54 4 48.006592 N
+ ... ... ... ... ...
+ 14868 57 20 6.724548 N
+ 14869 57 20 6.724548 N
+ 14870 57 20 6.724548 N
+ 14871 57 20 6.724548 N
+ 14872 57 20 6.724548 N
+
+ Longitude degrees Longitude minutes Longitude seconds \
+ 0 11 30 0.000000
+ 1 11 30 0.000000
+ 2 11 30 0.000000
+ 3 11 30 0.000000
+ 4 11 30 0.000000
+ ... ... ... ...
+ 14868 12 4 27.118835
+ 14869 12 4 27.118835
+ 14870 12 4 27.118835
+ 14871 12 4 27.118835
+ 14872 12 4 27.118835
+
+ Longitude direction
+ 0 E
+ 1 E
+ 2 E
+ 3 E
+ 4 E
+ ... ...
+ 14868 E
+ 14869 E
+ 14870 E
+ 14871 E
+ 14872 E
+
+ [14873 rows x 25 columns]}
+Index(['lon', 'lat', 'smp_depth', 'tot_depth', 'time', 'sample', 'nuclide',
+ 'value', '_unc', '_dl', '_sal', '_temp', '_unit', 'Sampling start date',
+ 'Sampling start time', 'Sample type', 'Nuclide', 'Latitude degrees',
+ 'Latitude minutes', 'Latitude seconds', 'Latitude direction',
+ 'Longitude degrees', 'Longitude minutes', 'Longitude seconds',
+ 'Longitude direction'],
+ dtype='object')
++++get_unitnames_lut ()
+++LookupUnitByIdCB (fn_lut=<function get_unitnames_lut>)
Lookup MARIS unit by unit_id.
+dfs = netcdf4_to_df(fname_in)
+tfm = Transformer(dfs, cbs=[ReshapeWideToLong(),
+ LookupTimeFromEncodedTime(cfg()),
+ GetSampleTypeCB(),
+ LookupNuclideByIdCB(),
+ ConvertLonLatCB(),
+ LookupUnitByIdCB()
+ ])
+tfm()
{'seawater': lon lat smp_depth tot_depth time sample \
+ 0 14.199800 54.006001 0.0 11.0 1497744000 78
+ 1 14.202300 54.006199 0.0 12.0 1339632000 83
+ 2 14.199500 54.006302 0.0 12.0 1402876800 86
+ 3 14.201000 54.006500 0.0 12.0 1278460800 91
+ 4 14.200500 54.006668 0.0 12.0 1309910400 101
+ ... ... ... ... ... ... ...
+ 20237 20.030001 57.343300 0.0 236.0 524620800 14175
+ 20238 21.500000 59.433300 0.0 156.0 555033600 15712
+ 20239 21.525801 59.439800 0.0 160.0 585446400 15840
+ 20240 21.525801 59.439800 150.0 160.0 585446400 15847
+ 20241 23.555000 65.239998 0.0 73.0 587001600 20130
+
+ nuclide value _unc _dl ... Nuclide \
+ 0 h3 850.0000 59.669998 1 ... 3H
+ 1 h3 970.0000 29.100000 1 ... 3H
+ 2 h3 910.0000 24.570000 1 ... 3H
+ 3 h3 1070.0000 21.400000 1 ... 3H
+ 4 h3 1020.0000 20.400000 1 ... 3H
+ ... ... ... ... ... ... ...
+ 20237 cm243_244_tot 0.0064 0.001280 1 ... 243_244Cm
+ 20238 cm243_244_tot 0.0045 0.000900 1 ... 243_244Cm
+ 20239 cm243_244_tot 0.0022 0.000660 1 ... 243_244Cm
+ 20240 cm243_244_tot 0.0064 0.001920 1 ... 243_244Cm
+ 20241 cm243_244_tot 0.0039 0.001170 1 ... 243_244Cm
+
+ Latitude degrees Latitude minutes Latitude seconds Latitude direction \
+ 0 54 0 21.601868 N
+ 1 54 0 22.315979 N
+ 2 54 0 22.686768 N
+ 3 54 0 23.400879 N
+ 4 54 0 24.005127 N
+ ... ... ... ... ...
+ 20237 57 20 35.879517 N
+ 20238 59 25 59.880066 N
+ 20239 59 26 23.280945 N
+ 20240 59 26 23.280945 N
+ 20241 65 14 23.992310 N
+
+ Longitude degrees Longitude minutes Longitude seconds \
+ 0 14 11 59.278336
+ 1 14 12 8.280258
+ 2 14 11 58.200302
+ 3 14 12 3.600769
+ 4 14 12 1.798325
+ ... ... ... ...
+ 20237 20 1 48.002472
+ 20238 21 30 0.000000
+ 20239 21 31 32.882538
+ 20240 21 31 32.882538
+ 20241 23 33 18.001099
+
+ Longitude direction Unit
+ 0 E Bq/m3
+ 1 E Bq/m3
+ 2 E Bq/m3
+ 3 E Bq/m3
+ 4 E Bq/m3
+ ... ... ...
+ 20237 E Bq/m3
+ 20238 E Bq/m3
+ 20239 E Bq/m3
+ 20240 E Bq/m3
+ 20241 E Bq/m3
+
+ [20242 rows x 26 columns],
+ 'sediment': lon lat tot_depth time sed_type sample nuclide \
+ 0 10.850000 54.049999 22.0 866592000 58 842 be7
+ 1 10.203300 54.415001 13.0 811641600 58 4064 be7
+ 2 10.203300 54.415001 13.0 811641600 58 4069 be7
+ 3 10.203300 54.415001 13.0 811641600 58 4074 be7
+ 4 11.750000 54.416698 24.0 838512000 58 4535 be7
+ ... ... ... ... ... ... ... ...
+ 37084 23.391199 65.277496 90.0 1283299200 2 37039 bi212
+ 37085 23.391199 65.277496 90.0 1283299200 2 37048 bi212
+ 37086 23.391199 65.277496 90.0 1283299200 2 37065 bi212
+ 37087 23.391199 65.277496 90.0 1283299200 2 37074 bi212
+ 37088 23.391199 65.277496 90.0 1283299200 2 37083 bi212
+
+ value _unc _dl ... Nuclide Latitude degrees Latitude minutes \
+ 0 24.299999 7.7760 1 ... 7Be 54 2
+ 1 45.500000 4.5500 1 ... 7Be 54 24
+ 2 7.000000 NaN 2 ... 7Be 54 24
+ 3 4.800000 NaN 2 ... 7Be 54 24
+ 4 6.900000 1.9320 1 ... 7Be 54 25
+ ... ... ... ... ... ... ... ...
+ 37084 42.900002 6.1347 1 ... 212Bi 65 16
+ 37085 58.400002 6.1904 1 ... 212Bi 65 16
+ 37086 51.400002 5.9624 1 ... 212Bi 65 16
+ 37087 41.799999 5.4758 1 ... 212Bi 65 16
+ 37088 43.700001 3.4523 1 ... 212Bi 65 16
+
+ Latitude seconds Latitude direction Longitude degrees \
+ 0 59.997253 N 10
+ 1 54.003296 N 10
+ 2 54.003296 N 10
+ 3 54.003296 N 10
+ 4 0.114441 N 11
+ ... ... ... ...
+ 37084 38.986816 N 23
+ 37085 38.986816 N 23
+ 37086 38.986816 N 23
+ 37087 38.986816 N 23
+ 37088 38.986816 N 23
+
+ Longitude minutes Longitude seconds Longitude direction Unit
+ 0 51 0.001373 E Bq/kgd
+ 1 12 11.881714 E Bq/kgd
+ 2 12 11.881714 E Bq/kgd
+ 3 12 11.881714 E Bq/kgd
+ 4 45 0.000000 E Bq/kgd
+ ... ... ... ... ...
+ 37084 23 28.316803 E Bq/kgd
+ 37085 23 28.316803 E Bq/kgd
+ 37086 23 28.316803 E Bq/kgd
+ 37087 23 28.316803 E Bq/kgd
+ 37088 23 28.316803 E Bq/kgd
+
+ [37089 rows x 24 columns],
+ 'biota': lon lat smp_depth time bio_group species \
+ 0 11.5000 54.080002 0.0 908755200 11 96
+ 1 11.5000 54.080002 0.0 900547200 11 96
+ 2 11.5000 54.080002 0.0 874540800 14 129
+ 3 11.5000 54.080002 0.0 865900800 14 129
+ 4 11.5000 54.080002 0.0 874886400 11 96
+ ... ... ... ... ... ... ...
+ 14868 12.0742 57.335201 0.0 1253145600 11 96
+ 14869 12.0742 57.335201 0.0 1225670400 11 96
+ 14870 12.0742 57.335201 0.0 1160352000 11 96
+ 14871 12.0742 57.335201 0.0 1380240000 11 96
+ 14872 12.0742 57.335201 0.0 1409788800 11 96
+
+ body_part sample nuclide value ... Nuclide Latitude degrees \
+ 0 54 150 be7 46.500 ... 7Be 54
+ 1 54 159 be7 66.500 ... 7Be 54
+ 2 1 168 be7 5.430 ... 7Be 54
+ 3 1 177 be7 13.700 ... 7Be 54
+ 4 54 183 be7 11.300 ... 7Be 54
+ ... ... ... ... ... ... ... ...
+ 14868 54 11586 tl208 0.880 ... 208Tl 57
+ 14869 54 11598 tl208 0.770 ... 208Tl 57
+ 14870 54 11620 tl208 1.310 ... 208Tl 57
+ 14871 54 11766 tl208 0.668 ... 208Tl 57
+ 14872 54 11775 tl208 0.684 ... 208Tl 57
+
+ Latitude minutes Latitude seconds Latitude direction Longitude degrees \
+ 0 4 48.006592 N 11
+ 1 4 48.006592 N 11
+ 2 4 48.006592 N 11
+ 3 4 48.006592 N 11
+ 4 4 48.006592 N 11
+ ... ... ... ... ...
+ 14868 20 6.724548 N 12
+ 14869 20 6.724548 N 12
+ 14870 20 6.724548 N 12
+ 14871 20 6.724548 N 12
+ 14872 20 6.724548 N 12
+
+ Longitude minutes Longitude seconds Longitude direction Unit
+ 0 30 0.000000 E Bq/kgd
+ 1 30 0.000000 E Bq/kgd
+ 2 30 0.000000 E Bq/kgd
+ 3 30 0.000000 E Bq/kgd
+ 4 30 0.000000 E Bq/kgd
+ ... ... ... ... ...
+ 14868 4 27.118835 E Bq/kgd
+ 14869 4 27.118835 E Bq/kgd
+ 14870 4 27.118835 E Bq/kgd
+ 14871 4 27.118835 E Bq/kgd
+ 14872 4 27.118835 E Bq/kgd
+
+ [14873 rows x 26 columns]}
+Index(['lon', 'lat', 'smp_depth', 'tot_depth', 'time', 'sample', 'nuclide',
+ 'value', '_unc', '_dl', '_sal', '_temp', '_unit', 'Sampling start date',
+ 'Sampling start time', 'Sample type', 'Nuclide', 'Latitude degrees',
+ 'Latitude minutes', 'Latitude seconds', 'Latitude direction',
+ 'Longitude degrees', 'Longitude minutes', 'Longitude seconds',
+ 'Longitude direction', 'Unit'],
+ dtype='object')
++++get_detectionlimitnames_lut ()
+++LookupValueTypeByIdCB (fn_lut=<function get_detectionlimitnames_lut>)
Lookup MARIS Value Type.
+dfs = netcdf4_to_df(fname_in)
+tfm = Transformer(dfs, cbs=[ReshapeWideToLong(),
+ LookupTimeFromEncodedTime(cfg()),
+ GetSampleTypeCB(),
+ LookupNuclideByIdCB(),
+ ConvertLonLatCB(),
+ LookupUnitByIdCB(),
+ LookupValueTypeByIdCB()
+ ])
+tfm()
{'seawater': lon lat smp_depth tot_depth time sample \
+ 0 14.199800 54.006001 0.0 11.0 1497744000 78
+ 1 14.202300 54.006199 0.0 12.0 1339632000 83
+ 2 14.199500 54.006302 0.0 12.0 1402876800 86
+ 3 14.201000 54.006500 0.0 12.0 1278460800 91
+ 4 14.200500 54.006668 0.0 12.0 1309910400 101
+ ... ... ... ... ... ... ...
+ 20237 20.030001 57.343300 0.0 236.0 524620800 14175
+ 20238 21.500000 59.433300 0.0 156.0 555033600 15712
+ 20239 21.525801 59.439800 0.0 160.0 585446400 15840
+ 20240 21.525801 59.439800 150.0 160.0 585446400 15847
+ 20241 23.555000 65.239998 0.0 73.0 587001600 20130
+
+ nuclide value _unc _dl ... Latitude degrees \
+ 0 h3 850.0000 59.669998 1 ... 54
+ 1 h3 970.0000 29.100000 1 ... 54
+ 2 h3 910.0000 24.570000 1 ... 54
+ 3 h3 1070.0000 21.400000 1 ... 54
+ 4 h3 1020.0000 20.400000 1 ... 54
+ ... ... ... ... ... ... ...
+ 20237 cm243_244_tot 0.0064 0.001280 1 ... 57
+ 20238 cm243_244_tot 0.0045 0.000900 1 ... 59
+ 20239 cm243_244_tot 0.0022 0.000660 1 ... 59
+ 20240 cm243_244_tot 0.0064 0.001920 1 ... 59
+ 20241 cm243_244_tot 0.0039 0.001170 1 ... 65
+
+ Latitude minutes Latitude seconds Latitude direction \
+ 0 0 21.601868 N
+ 1 0 22.315979 N
+ 2 0 22.686768 N
+ 3 0 23.400879 N
+ 4 0 24.005127 N
+ ... ... ... ...
+ 20237 20 35.879517 N
+ 20238 25 59.880066 N
+ 20239 26 23.280945 N
+ 20240 26 23.280945 N
+ 20241 14 23.992310 N
+
+ Longitude degrees Longitude minutes Longitude seconds \
+ 0 14 11 59.278336
+ 1 14 12 8.280258
+ 2 14 11 58.200302
+ 3 14 12 3.600769
+ 4 14 12 1.798325
+ ... ... ... ...
+ 20237 20 1 48.002472
+ 20238 21 30 0.000000
+ 20239 21 31 32.882538
+ 20240 21 31 32.882538
+ 20241 23 33 18.001099
+
+ Longitude direction Unit Value type
+ 0 E Bq/m3 =
+ 1 E Bq/m3 =
+ 2 E Bq/m3 =
+ 3 E Bq/m3 =
+ 4 E Bq/m3 =
+ ... ... ... ...
+ 20237 E Bq/m3 =
+ 20238 E Bq/m3 =
+ 20239 E Bq/m3 =
+ 20240 E Bq/m3 =
+ 20241 E Bq/m3 =
+
+ [20242 rows x 27 columns],
+ 'sediment': lon lat tot_depth time sed_type sample nuclide \
+ 0 10.850000 54.049999 22.0 866592000 58 842 be7
+ 1 10.203300 54.415001 13.0 811641600 58 4064 be7
+ 2 10.203300 54.415001 13.0 811641600 58 4069 be7
+ 3 10.203300 54.415001 13.0 811641600 58 4074 be7
+ 4 11.750000 54.416698 24.0 838512000 58 4535 be7
+ ... ... ... ... ... ... ... ...
+ 37084 23.391199 65.277496 90.0 1283299200 2 37039 bi212
+ 37085 23.391199 65.277496 90.0 1283299200 2 37048 bi212
+ 37086 23.391199 65.277496 90.0 1283299200 2 37065 bi212
+ 37087 23.391199 65.277496 90.0 1283299200 2 37074 bi212
+ 37088 23.391199 65.277496 90.0 1283299200 2 37083 bi212
+
+ value _unc _dl ... Latitude degrees Latitude minutes \
+ 0 24.299999 7.7760 1 ... 54 2
+ 1 45.500000 4.5500 1 ... 54 24
+ 2 7.000000 NaN 2 ... 54 24
+ 3 4.800000 NaN 2 ... 54 24
+ 4 6.900000 1.9320 1 ... 54 25
+ ... ... ... ... ... ... ...
+ 37084 42.900002 6.1347 1 ... 65 16
+ 37085 58.400002 6.1904 1 ... 65 16
+ 37086 51.400002 5.9624 1 ... 65 16
+ 37087 41.799999 5.4758 1 ... 65 16
+ 37088 43.700001 3.4523 1 ... 65 16
+
+ Latitude seconds Latitude direction Longitude degrees \
+ 0 59.997253 N 10
+ 1 54.003296 N 10
+ 2 54.003296 N 10
+ 3 54.003296 N 10
+ 4 0.114441 N 11
+ ... ... ... ...
+ 37084 38.986816 N 23
+ 37085 38.986816 N 23
+ 37086 38.986816 N 23
+ 37087 38.986816 N 23
+ 37088 38.986816 N 23
+
+ Longitude minutes Longitude seconds Longitude direction Unit \
+ 0 51 0.001373 E Bq/kgd
+ 1 12 11.881714 E Bq/kgd
+ 2 12 11.881714 E Bq/kgd
+ 3 12 11.881714 E Bq/kgd
+ 4 45 0.000000 E Bq/kgd
+ ... ... ... ... ...
+ 37084 23 28.316803 E Bq/kgd
+ 37085 23 28.316803 E Bq/kgd
+ 37086 23 28.316803 E Bq/kgd
+ 37087 23 28.316803 E Bq/kgd
+ 37088 23 28.316803 E Bq/kgd
+
+ Value type
+ 0 =
+ 1 =
+ 2 <
+ 3 <
+ 4 =
+ ... ...
+ 37084 =
+ 37085 =
+ 37086 =
+ 37087 =
+ 37088 =
+
+ [37089 rows x 25 columns],
+ 'biota': lon lat smp_depth time bio_group species \
+ 0 11.5000 54.080002 0.0 908755200 11 96
+ 1 11.5000 54.080002 0.0 900547200 11 96
+ 2 11.5000 54.080002 0.0 874540800 14 129
+ 3 11.5000 54.080002 0.0 865900800 14 129
+ 4 11.5000 54.080002 0.0 874886400 11 96
+ ... ... ... ... ... ... ...
+ 14868 12.0742 57.335201 0.0 1253145600 11 96
+ 14869 12.0742 57.335201 0.0 1225670400 11 96
+ 14870 12.0742 57.335201 0.0 1160352000 11 96
+ 14871 12.0742 57.335201 0.0 1380240000 11 96
+ 14872 12.0742 57.335201 0.0 1409788800 11 96
+
+ body_part sample nuclide value ... Latitude degrees \
+ 0 54 150 be7 46.500 ... 54
+ 1 54 159 be7 66.500 ... 54
+ 2 1 168 be7 5.430 ... 54
+ 3 1 177 be7 13.700 ... 54
+ 4 54 183 be7 11.300 ... 54
+ ... ... ... ... ... ... ...
+ 14868 54 11586 tl208 0.880 ... 57
+ 14869 54 11598 tl208 0.770 ... 57
+ 14870 54 11620 tl208 1.310 ... 57
+ 14871 54 11766 tl208 0.668 ... 57
+ 14872 54 11775 tl208 0.684 ... 57
+
+ Latitude minutes Latitude seconds Latitude direction \
+ 0 4 48.006592 N
+ 1 4 48.006592 N
+ 2 4 48.006592 N
+ 3 4 48.006592 N
+ 4 4 48.006592 N
+ ... ... ... ...
+ 14868 20 6.724548 N
+ 14869 20 6.724548 N
+ 14870 20 6.724548 N
+ 14871 20 6.724548 N
+ 14872 20 6.724548 N
+
+ Longitude degrees Longitude minutes Longitude seconds \
+ 0 11 30 0.000000
+ 1 11 30 0.000000
+ 2 11 30 0.000000
+ 3 11 30 0.000000
+ 4 11 30 0.000000
+ ... ... ... ...
+ 14868 12 4 27.118835
+ 14869 12 4 27.118835
+ 14870 12 4 27.118835
+ 14871 12 4 27.118835
+ 14872 12 4 27.118835
+
+ Longitude direction Unit Value type
+ 0 E Bq/kgd =
+ 1 E Bq/kgd =
+ 2 E Bq/kgd =
+ 3 E Bq/kgd =
+ 4 E Bq/kgd <
+ ... ... ... ...
+ 14868 E Bq/kgd =
+ 14869 E Bq/kgd =
+ 14870 E Bq/kgd =
+ 14871 E Bq/kgd =
+ 14872 E Bq/kgd =
+
+ [14873 rows x 27 columns]}
+Biogroup is in netcdf but not in OPEN REfINE csv format. Should we include this in Netcdf?
++++get_species_lut ()
+++LookupSpeciesByIdCB (fn_lut=<function get_species_lut>)
Lookup MARIS species by species_id.
+dfs = netcdf4_to_df(fname_in)
+tfm = Transformer(dfs, cbs=[ReshapeWideToLong(),
+ LookupTimeFromEncodedTime(cfg()),
+ GetSampleTypeCB(),
+ LookupNuclideByIdCB(),
+ ConvertLonLatCB(),
+ LookupUnitByIdCB(),
+ LookupValueTypeByIdCB(),
+ LookupSpeciesByIdCB()
+ ])
+tfm()
{'seawater': lon lat smp_depth tot_depth time sample \
+ 0 14.199800 54.006001 0.0 11.0 1497744000 78
+ 1 14.202300 54.006199 0.0 12.0 1339632000 83
+ 2 14.199500 54.006302 0.0 12.0 1402876800 86
+ 3 14.201000 54.006500 0.0 12.0 1278460800 91
+ 4 14.200500 54.006668 0.0 12.0 1309910400 101
+ ... ... ... ... ... ... ...
+ 20237 20.030001 57.343300 0.0 236.0 524620800 14175
+ 20238 21.500000 59.433300 0.0 156.0 555033600 15712
+ 20239 21.525801 59.439800 0.0 160.0 585446400 15840
+ 20240 21.525801 59.439800 150.0 160.0 585446400 15847
+ 20241 23.555000 65.239998 0.0 73.0 587001600 20130
+
+ nuclide value _unc _dl ... Latitude degrees \
+ 0 h3 850.0000 59.669998 1 ... 54
+ 1 h3 970.0000 29.100000 1 ... 54
+ 2 h3 910.0000 24.570000 1 ... 54
+ 3 h3 1070.0000 21.400000 1 ... 54
+ 4 h3 1020.0000 20.400000 1 ... 54
+ ... ... ... ... ... ... ...
+ 20237 cm243_244_tot 0.0064 0.001280 1 ... 57
+ 20238 cm243_244_tot 0.0045 0.000900 1 ... 59
+ 20239 cm243_244_tot 0.0022 0.000660 1 ... 59
+ 20240 cm243_244_tot 0.0064 0.001920 1 ... 59
+ 20241 cm243_244_tot 0.0039 0.001170 1 ... 65
+
+ Latitude minutes Latitude seconds Latitude direction \
+ 0 0 21.601868 N
+ 1 0 22.315979 N
+ 2 0 22.686768 N
+ 3 0 23.400879 N
+ 4 0 24.005127 N
+ ... ... ... ...
+ 20237 20 35.879517 N
+ 20238 25 59.880066 N
+ 20239 26 23.280945 N
+ 20240 26 23.280945 N
+ 20241 14 23.992310 N
+
+ Longitude degrees Longitude minutes Longitude seconds \
+ 0 14 11 59.278336
+ 1 14 12 8.280258
+ 2 14 11 58.200302
+ 3 14 12 3.600769
+ 4 14 12 1.798325
+ ... ... ... ...
+ 20237 20 1 48.002472
+ 20238 21 30 0.000000
+ 20239 21 31 32.882538
+ 20240 21 31 32.882538
+ 20241 23 33 18.001099
+
+ Longitude direction Unit Value type
+ 0 E Bq/m3 =
+ 1 E Bq/m3 =
+ 2 E Bq/m3 =
+ 3 E Bq/m3 =
+ 4 E Bq/m3 =
+ ... ... ... ...
+ 20237 E Bq/m3 =
+ 20238 E Bq/m3 =
+ 20239 E Bq/m3 =
+ 20240 E Bq/m3 =
+ 20241 E Bq/m3 =
+
+ [20242 rows x 27 columns],
+ 'sediment': lon lat tot_depth time sed_type sample nuclide \
+ 0 10.850000 54.049999 22.0 866592000 58 842 be7
+ 1 10.203300 54.415001 13.0 811641600 58 4064 be7
+ 2 10.203300 54.415001 13.0 811641600 58 4069 be7
+ 3 10.203300 54.415001 13.0 811641600 58 4074 be7
+ 4 11.750000 54.416698 24.0 838512000 58 4535 be7
+ ... ... ... ... ... ... ... ...
+ 37084 23.391199 65.277496 90.0 1283299200 2 37039 bi212
+ 37085 23.391199 65.277496 90.0 1283299200 2 37048 bi212
+ 37086 23.391199 65.277496 90.0 1283299200 2 37065 bi212
+ 37087 23.391199 65.277496 90.0 1283299200 2 37074 bi212
+ 37088 23.391199 65.277496 90.0 1283299200 2 37083 bi212
+
+ value _unc _dl ... Latitude degrees Latitude minutes \
+ 0 24.299999 7.7760 1 ... 54 2
+ 1 45.500000 4.5500 1 ... 54 24
+ 2 7.000000 NaN 2 ... 54 24
+ 3 4.800000 NaN 2 ... 54 24
+ 4 6.900000 1.9320 1 ... 54 25
+ ... ... ... ... ... ... ...
+ 37084 42.900002 6.1347 1 ... 65 16
+ 37085 58.400002 6.1904 1 ... 65 16
+ 37086 51.400002 5.9624 1 ... 65 16
+ 37087 41.799999 5.4758 1 ... 65 16
+ 37088 43.700001 3.4523 1 ... 65 16
+
+ Latitude seconds Latitude direction Longitude degrees \
+ 0 59.997253 N 10
+ 1 54.003296 N 10
+ 2 54.003296 N 10
+ 3 54.003296 N 10
+ 4 0.114441 N 11
+ ... ... ... ...
+ 37084 38.986816 N 23
+ 37085 38.986816 N 23
+ 37086 38.986816 N 23
+ 37087 38.986816 N 23
+ 37088 38.986816 N 23
+
+ Longitude minutes Longitude seconds Longitude direction Unit \
+ 0 51 0.001373 E Bq/kgd
+ 1 12 11.881714 E Bq/kgd
+ 2 12 11.881714 E Bq/kgd
+ 3 12 11.881714 E Bq/kgd
+ 4 45 0.000000 E Bq/kgd
+ ... ... ... ... ...
+ 37084 23 28.316803 E Bq/kgd
+ 37085 23 28.316803 E Bq/kgd
+ 37086 23 28.316803 E Bq/kgd
+ 37087 23 28.316803 E Bq/kgd
+ 37088 23 28.316803 E Bq/kgd
+
+ Value type
+ 0 =
+ 1 =
+ 2 <
+ 3 <
+ 4 =
+ ... ...
+ 37084 =
+ 37085 =
+ 37086 =
+ 37087 =
+ 37088 =
+
+ [37089 rows x 25 columns],
+ 'biota': lon lat smp_depth time bio_group species \
+ 0 11.5000 54.080002 0.0 908755200 11 96
+ 1 11.5000 54.080002 0.0 900547200 11 96
+ 2 11.5000 54.080002 0.0 874540800 14 129
+ 3 11.5000 54.080002 0.0 865900800 14 129
+ 4 11.5000 54.080002 0.0 874886400 11 96
+ ... ... ... ... ... ... ...
+ 14868 12.0742 57.335201 0.0 1253145600 11 96
+ 14869 12.0742 57.335201 0.0 1225670400 11 96
+ 14870 12.0742 57.335201 0.0 1160352000 11 96
+ 14871 12.0742 57.335201 0.0 1380240000 11 96
+ 14872 12.0742 57.335201 0.0 1409788800 11 96
+
+ body_part sample nuclide value ... Latitude minutes \
+ 0 54 150 be7 46.500 ... 4
+ 1 54 159 be7 66.500 ... 4
+ 2 1 168 be7 5.430 ... 4
+ 3 1 177 be7 13.700 ... 4
+ 4 54 183 be7 11.300 ... 4
+ ... ... ... ... ... ... ...
+ 14868 54 11586 tl208 0.880 ... 20
+ 14869 54 11598 tl208 0.770 ... 20
+ 14870 54 11620 tl208 1.310 ... 20
+ 14871 54 11766 tl208 0.668 ... 20
+ 14872 54 11775 tl208 0.684 ... 20
+
+ Latitude seconds Latitude direction Longitude degrees \
+ 0 48.006592 N 11
+ 1 48.006592 N 11
+ 2 48.006592 N 11
+ 3 48.006592 N 11
+ 4 48.006592 N 11
+ ... ... ... ...
+ 14868 6.724548 N 12
+ 14869 6.724548 N 12
+ 14870 6.724548 N 12
+ 14871 6.724548 N 12
+ 14872 6.724548 N 12
+
+ Longitude minutes Longitude seconds Longitude direction Unit \
+ 0 30 0.000000 E Bq/kgd
+ 1 30 0.000000 E Bq/kgd
+ 2 30 0.000000 E Bq/kgd
+ 3 30 0.000000 E Bq/kgd
+ 4 30 0.000000 E Bq/kgd
+ ... ... ... ... ...
+ 14868 4 27.118835 E Bq/kgd
+ 14869 4 27.118835 E Bq/kgd
+ 14870 4 27.118835 E Bq/kgd
+ 14871 4 27.118835 E Bq/kgd
+ 14872 4 27.118835 E Bq/kgd
+
+ Value type Species
+ 0 = Fucus vesiculosus
+ 1 = Fucus vesiculosus
+ 2 = Mytilus edulis
+ 3 = Mytilus edulis
+ 4 < Fucus vesiculosus
+ ... ... ...
+ 14868 = Fucus vesiculosus
+ 14869 = Fucus vesiculosus
+ 14870 = Fucus vesiculosus
+ 14871 = Fucus vesiculosus
+ 14872 = Fucus vesiculosus
+
+ [14873 rows x 28 columns]}
++++get_bodypart_lut ()
+++LookupBodypartByIdCB (fn_lut=<function get_bodypart_lut>)
Lookup MARIS bodypart by bodypar_id.
+dfs = netcdf4_to_df(fname_in)
+tfm = Transformer(dfs, cbs=[ReshapeWideToLong(),
+ LookupTimeFromEncodedTime(cfg()),
+ GetSampleTypeCB(),
+ LookupNuclideByIdCB(),
+ ConvertLonLatCB(),
+ LookupUnitByIdCB(),
+ LookupValueTypeByIdCB(),
+ LookupSpeciesByIdCB(),
+ LookupBodypartByIdCB()
+ ])
+tfm()
{'seawater': lon lat smp_depth tot_depth time sample \
+ 0 14.199800 54.006001 0.0 11.0 1497744000 78
+ 1 14.202300 54.006199 0.0 12.0 1339632000 83
+ 2 14.199500 54.006302 0.0 12.0 1402876800 86
+ 3 14.201000 54.006500 0.0 12.0 1278460800 91
+ 4 14.200500 54.006668 0.0 12.0 1309910400 101
+ ... ... ... ... ... ... ...
+ 20237 20.030001 57.343300 0.0 236.0 524620800 14175
+ 20238 21.500000 59.433300 0.0 156.0 555033600 15712
+ 20239 21.525801 59.439800 0.0 160.0 585446400 15840
+ 20240 21.525801 59.439800 150.0 160.0 585446400 15847
+ 20241 23.555000 65.239998 0.0 73.0 587001600 20130
+
+ nuclide value _unc _dl ... Latitude degrees \
+ 0 h3 850.0000 59.669998 1 ... 54
+ 1 h3 970.0000 29.100000 1 ... 54
+ 2 h3 910.0000 24.570000 1 ... 54
+ 3 h3 1070.0000 21.400000 1 ... 54
+ 4 h3 1020.0000 20.400000 1 ... 54
+ ... ... ... ... ... ... ...
+ 20237 cm243_244_tot 0.0064 0.001280 1 ... 57
+ 20238 cm243_244_tot 0.0045 0.000900 1 ... 59
+ 20239 cm243_244_tot 0.0022 0.000660 1 ... 59
+ 20240 cm243_244_tot 0.0064 0.001920 1 ... 59
+ 20241 cm243_244_tot 0.0039 0.001170 1 ... 65
+
+ Latitude minutes Latitude seconds Latitude direction \
+ 0 0 21.601868 N
+ 1 0 22.315979 N
+ 2 0 22.686768 N
+ 3 0 23.400879 N
+ 4 0 24.005127 N
+ ... ... ... ...
+ 20237 20 35.879517 N
+ 20238 25 59.880066 N
+ 20239 26 23.280945 N
+ 20240 26 23.280945 N
+ 20241 14 23.992310 N
+
+ Longitude degrees Longitude minutes Longitude seconds \
+ 0 14 11 59.278336
+ 1 14 12 8.280258
+ 2 14 11 58.200302
+ 3 14 12 3.600769
+ 4 14 12 1.798325
+ ... ... ... ...
+ 20237 20 1 48.002472
+ 20238 21 30 0.000000
+ 20239 21 31 32.882538
+ 20240 21 31 32.882538
+ 20241 23 33 18.001099
+
+ Longitude direction Unit Value type
+ 0 E Bq/m3 =
+ 1 E Bq/m3 =
+ 2 E Bq/m3 =
+ 3 E Bq/m3 =
+ 4 E Bq/m3 =
+ ... ... ... ...
+ 20237 E Bq/m3 =
+ 20238 E Bq/m3 =
+ 20239 E Bq/m3 =
+ 20240 E Bq/m3 =
+ 20241 E Bq/m3 =
+
+ [20242 rows x 27 columns],
+ 'sediment': lon lat tot_depth time sed_type sample nuclide \
+ 0 10.850000 54.049999 22.0 866592000 58 842 be7
+ 1 10.203300 54.415001 13.0 811641600 58 4064 be7
+ 2 10.203300 54.415001 13.0 811641600 58 4069 be7
+ 3 10.203300 54.415001 13.0 811641600 58 4074 be7
+ 4 11.750000 54.416698 24.0 838512000 58 4535 be7
+ ... ... ... ... ... ... ... ...
+ 37084 23.391199 65.277496 90.0 1283299200 2 37039 bi212
+ 37085 23.391199 65.277496 90.0 1283299200 2 37048 bi212
+ 37086 23.391199 65.277496 90.0 1283299200 2 37065 bi212
+ 37087 23.391199 65.277496 90.0 1283299200 2 37074 bi212
+ 37088 23.391199 65.277496 90.0 1283299200 2 37083 bi212
+
+ value _unc _dl ... Latitude degrees Latitude minutes \
+ 0 24.299999 7.7760 1 ... 54 2
+ 1 45.500000 4.5500 1 ... 54 24
+ 2 7.000000 NaN 2 ... 54 24
+ 3 4.800000 NaN 2 ... 54 24
+ 4 6.900000 1.9320 1 ... 54 25
+ ... ... ... ... ... ... ...
+ 37084 42.900002 6.1347 1 ... 65 16
+ 37085 58.400002 6.1904 1 ... 65 16
+ 37086 51.400002 5.9624 1 ... 65 16
+ 37087 41.799999 5.4758 1 ... 65 16
+ 37088 43.700001 3.4523 1 ... 65 16
+
+ Latitude seconds Latitude direction Longitude degrees \
+ 0 59.997253 N 10
+ 1 54.003296 N 10
+ 2 54.003296 N 10
+ 3 54.003296 N 10
+ 4 0.114441 N 11
+ ... ... ... ...
+ 37084 38.986816 N 23
+ 37085 38.986816 N 23
+ 37086 38.986816 N 23
+ 37087 38.986816 N 23
+ 37088 38.986816 N 23
+
+ Longitude minutes Longitude seconds Longitude direction Unit \
+ 0 51 0.001373 E Bq/kgd
+ 1 12 11.881714 E Bq/kgd
+ 2 12 11.881714 E Bq/kgd
+ 3 12 11.881714 E Bq/kgd
+ 4 45 0.000000 E Bq/kgd
+ ... ... ... ... ...
+ 37084 23 28.316803 E Bq/kgd
+ 37085 23 28.316803 E Bq/kgd
+ 37086 23 28.316803 E Bq/kgd
+ 37087 23 28.316803 E Bq/kgd
+ 37088 23 28.316803 E Bq/kgd
+
+ Value type
+ 0 =
+ 1 =
+ 2 <
+ 3 <
+ 4 =
+ ... ...
+ 37084 =
+ 37085 =
+ 37086 =
+ 37087 =
+ 37088 =
+
+ [37089 rows x 25 columns],
+ 'biota': lon lat smp_depth time bio_group species \
+ 0 11.5000 54.080002 0.0 908755200 11 96
+ 1 11.5000 54.080002 0.0 900547200 11 96
+ 2 11.5000 54.080002 0.0 874540800 14 129
+ 3 11.5000 54.080002 0.0 865900800 14 129
+ 4 11.5000 54.080002 0.0 874886400 11 96
+ ... ... ... ... ... ... ...
+ 14868 12.0742 57.335201 0.0 1253145600 11 96
+ 14869 12.0742 57.335201 0.0 1225670400 11 96
+ 14870 12.0742 57.335201 0.0 1160352000 11 96
+ 14871 12.0742 57.335201 0.0 1380240000 11 96
+ 14872 12.0742 57.335201 0.0 1409788800 11 96
+
+ body_part sample nuclide value ... Latitude seconds \
+ 0 54 150 be7 46.500 ... 48.006592
+ 1 54 159 be7 66.500 ... 48.006592
+ 2 1 168 be7 5.430 ... 48.006592
+ 3 1 177 be7 13.700 ... 48.006592
+ 4 54 183 be7 11.300 ... 48.006592
+ ... ... ... ... ... ... ...
+ 14868 54 11586 tl208 0.880 ... 6.724548
+ 14869 54 11598 tl208 0.770 ... 6.724548
+ 14870 54 11620 tl208 1.310 ... 6.724548
+ 14871 54 11766 tl208 0.668 ... 6.724548
+ 14872 54 11775 tl208 0.684 ... 6.724548
+
+ Latitude direction Longitude degrees Longitude minutes \
+ 0 N 11 30
+ 1 N 11 30
+ 2 N 11 30
+ 3 N 11 30
+ 4 N 11 30
+ ... ... ... ...
+ 14868 N 12 4
+ 14869 N 12 4
+ 14870 N 12 4
+ 14871 N 12 4
+ 14872 N 12 4
+
+ Longitude seconds Longitude direction Unit Value type \
+ 0 0.000000 E Bq/kgd =
+ 1 0.000000 E Bq/kgd =
+ 2 0.000000 E Bq/kgd =
+ 3 0.000000 E Bq/kgd =
+ 4 0.000000 E Bq/kgd <
+ ... ... ... ... ...
+ 14868 27.118835 E Bq/kgd =
+ 14869 27.118835 E Bq/kgd =
+ 14870 27.118835 E Bq/kgd =
+ 14871 27.118835 E Bq/kgd =
+ 14872 27.118835 E Bq/kgd =
+
+ Species Body part
+ 0 Fucus vesiculosus Whole haptophytic plants
+ 1 Fucus vesiculosus Whole haptophytic plants
+ 2 Mytilus edulis Whole animal
+ 3 Mytilus edulis Whole animal
+ 4 Fucus vesiculosus Whole haptophytic plants
+ ... ... ...
+ 14868 Fucus vesiculosus Whole haptophytic plants
+ 14869 Fucus vesiculosus Whole haptophytic plants
+ 14870 Fucus vesiculosus Whole haptophytic plants
+ 14871 Fucus vesiculosus Whole haptophytic plants
+ 14872 Fucus vesiculosus Whole haptophytic plants
+
+ [14873 rows x 29 columns]}
++++get_sediments_lut ()
+++LookupSedimentTypeByIdCB (fn_lut=<function get_sediments_lut>)
Lookup MARIS sedtype by sedtype_id.
+dfs = netcdf4_to_df(fname_in)
+tfm = Transformer(dfs, cbs=[ReshapeWideToLong(),
+ LookupTimeFromEncodedTime(cfg()),
+ GetSampleTypeCB(),
+ LookupNuclideByIdCB(),
+ ConvertLonLatCB(),
+ LookupUnitByIdCB(),
+ LookupValueTypeByIdCB(),
+ LookupSpeciesByIdCB(),
+ LookupBodypartByIdCB(),
+ LookupSedimentTypeByIdCB()
+ ])
+tfm()
{'seawater': lon lat smp_depth tot_depth time sample \
+ 0 14.199800 54.006001 0.0 11.0 1497744000 78
+ 1 14.202300 54.006199 0.0 12.0 1339632000 83
+ 2 14.199500 54.006302 0.0 12.0 1402876800 86
+ 3 14.201000 54.006500 0.0 12.0 1278460800 91
+ 4 14.200500 54.006668 0.0 12.0 1309910400 101
+ ... ... ... ... ... ... ...
+ 20237 20.030001 57.343300 0.0 236.0 524620800 14175
+ 20238 21.500000 59.433300 0.0 156.0 555033600 15712
+ 20239 21.525801 59.439800 0.0 160.0 585446400 15840
+ 20240 21.525801 59.439800 150.0 160.0 585446400 15847
+ 20241 23.555000 65.239998 0.0 73.0 587001600 20130
+
+ nuclide value _unc _dl ... Latitude degrees \
+ 0 h3 850.0000 59.669998 1 ... 54
+ 1 h3 970.0000 29.100000 1 ... 54
+ 2 h3 910.0000 24.570000 1 ... 54
+ 3 h3 1070.0000 21.400000 1 ... 54
+ 4 h3 1020.0000 20.400000 1 ... 54
+ ... ... ... ... ... ... ...
+ 20237 cm243_244_tot 0.0064 0.001280 1 ... 57
+ 20238 cm243_244_tot 0.0045 0.000900 1 ... 59
+ 20239 cm243_244_tot 0.0022 0.000660 1 ... 59
+ 20240 cm243_244_tot 0.0064 0.001920 1 ... 59
+ 20241 cm243_244_tot 0.0039 0.001170 1 ... 65
+
+ Latitude minutes Latitude seconds Latitude direction \
+ 0 0 21.601868 N
+ 1 0 22.315979 N
+ 2 0 22.686768 N
+ 3 0 23.400879 N
+ 4 0 24.005127 N
+ ... ... ... ...
+ 20237 20 35.879517 N
+ 20238 25 59.880066 N
+ 20239 26 23.280945 N
+ 20240 26 23.280945 N
+ 20241 14 23.992310 N
+
+ Longitude degrees Longitude minutes Longitude seconds \
+ 0 14 11 59.278336
+ 1 14 12 8.280258
+ 2 14 11 58.200302
+ 3 14 12 3.600769
+ 4 14 12 1.798325
+ ... ... ... ...
+ 20237 20 1 48.002472
+ 20238 21 30 0.000000
+ 20239 21 31 32.882538
+ 20240 21 31 32.882538
+ 20241 23 33 18.001099
+
+ Longitude direction Unit Value type
+ 0 E Bq/m3 =
+ 1 E Bq/m3 =
+ 2 E Bq/m3 =
+ 3 E Bq/m3 =
+ 4 E Bq/m3 =
+ ... ... ... ...
+ 20237 E Bq/m3 =
+ 20238 E Bq/m3 =
+ 20239 E Bq/m3 =
+ 20240 E Bq/m3 =
+ 20241 E Bq/m3 =
+
+ [20242 rows x 27 columns],
+ 'sediment': lon lat tot_depth time sed_type sample nuclide \
+ 0 10.850000 54.049999 22.0 866592000 58 842 be7
+ 1 10.203300 54.415001 13.0 811641600 58 4064 be7
+ 2 10.203300 54.415001 13.0 811641600 58 4069 be7
+ 3 10.203300 54.415001 13.0 811641600 58 4074 be7
+ 4 11.750000 54.416698 24.0 838512000 58 4535 be7
+ ... ... ... ... ... ... ... ...
+ 37084 23.391199 65.277496 90.0 1283299200 2 37039 bi212
+ 37085 23.391199 65.277496 90.0 1283299200 2 37048 bi212
+ 37086 23.391199 65.277496 90.0 1283299200 2 37065 bi212
+ 37087 23.391199 65.277496 90.0 1283299200 2 37074 bi212
+ 37088 23.391199 65.277496 90.0 1283299200 2 37083 bi212
+
+ value _unc _dl ... Latitude minutes Latitude seconds \
+ 0 24.299999 7.7760 1 ... 2 59.997253
+ 1 45.500000 4.5500 1 ... 24 54.003296
+ 2 7.000000 NaN 2 ... 24 54.003296
+ 3 4.800000 NaN 2 ... 24 54.003296
+ 4 6.900000 1.9320 1 ... 25 0.114441
+ ... ... ... ... ... ... ...
+ 37084 42.900002 6.1347 1 ... 16 38.986816
+ 37085 58.400002 6.1904 1 ... 16 38.986816
+ 37086 51.400002 5.9624 1 ... 16 38.986816
+ 37087 41.799999 5.4758 1 ... 16 38.986816
+ 37088 43.700001 3.4523 1 ... 16 38.986816
+
+ Latitude direction Longitude degrees Longitude minutes \
+ 0 N 10 51
+ 1 N 10 12
+ 2 N 10 12
+ 3 N 10 12
+ 4 N 11 45
+ ... ... ... ...
+ 37084 N 23 23
+ 37085 N 23 23
+ 37086 N 23 23
+ 37087 N 23 23
+ 37088 N 23 23
+
+ Longitude seconds Longitude direction Unit Value type \
+ 0 0.001373 E Bq/kgd =
+ 1 11.881714 E Bq/kgd =
+ 2 11.881714 E Bq/kgd <
+ 3 11.881714 E Bq/kgd <
+ 4 0.000000 E Bq/kgd =
+ ... ... ... ... ...
+ 37084 28.316803 E Bq/kgd =
+ 37085 28.316803 E Bq/kgd =
+ 37086 28.316803 E Bq/kgd =
+ 37087 28.316803 E Bq/kgd =
+ 37088 28.316803 E Bq/kgd =
+
+ Sediment type
+ 0 Pure mud
+ 1 Pure mud
+ 2 Pure mud
+ 3 Pure mud
+ 4 Pure mud
+ ... ...
+ 37084 Gravel
+ 37085 Gravel
+ 37086 Gravel
+ 37087 Gravel
+ 37088 Gravel
+
+ [37089 rows x 26 columns],
+ 'biota': lon lat smp_depth time bio_group species \
+ 0 11.5000 54.080002 0.0 908755200 11 96
+ 1 11.5000 54.080002 0.0 900547200 11 96
+ 2 11.5000 54.080002 0.0 874540800 14 129
+ 3 11.5000 54.080002 0.0 865900800 14 129
+ 4 11.5000 54.080002 0.0 874886400 11 96
+ ... ... ... ... ... ... ...
+ 14868 12.0742 57.335201 0.0 1253145600 11 96
+ 14869 12.0742 57.335201 0.0 1225670400 11 96
+ 14870 12.0742 57.335201 0.0 1160352000 11 96
+ 14871 12.0742 57.335201 0.0 1380240000 11 96
+ 14872 12.0742 57.335201 0.0 1409788800 11 96
+
+ body_part sample nuclide value ... Latitude seconds \
+ 0 54 150 be7 46.500 ... 48.006592
+ 1 54 159 be7 66.500 ... 48.006592
+ 2 1 168 be7 5.430 ... 48.006592
+ 3 1 177 be7 13.700 ... 48.006592
+ 4 54 183 be7 11.300 ... 48.006592
+ ... ... ... ... ... ... ...
+ 14868 54 11586 tl208 0.880 ... 6.724548
+ 14869 54 11598 tl208 0.770 ... 6.724548
+ 14870 54 11620 tl208 1.310 ... 6.724548
+ 14871 54 11766 tl208 0.668 ... 6.724548
+ 14872 54 11775 tl208 0.684 ... 6.724548
+
+ Latitude direction Longitude degrees Longitude minutes \
+ 0 N 11 30
+ 1 N 11 30
+ 2 N 11 30
+ 3 N 11 30
+ 4 N 11 30
+ ... ... ... ...
+ 14868 N 12 4
+ 14869 N 12 4
+ 14870 N 12 4
+ 14871 N 12 4
+ 14872 N 12 4
+
+ Longitude seconds Longitude direction Unit Value type \
+ 0 0.000000 E Bq/kgd =
+ 1 0.000000 E Bq/kgd =
+ 2 0.000000 E Bq/kgd =
+ 3 0.000000 E Bq/kgd =
+ 4 0.000000 E Bq/kgd <
+ ... ... ... ... ...
+ 14868 27.118835 E Bq/kgd =
+ 14869 27.118835 E Bq/kgd =
+ 14870 27.118835 E Bq/kgd =
+ 14871 27.118835 E Bq/kgd =
+ 14872 27.118835 E Bq/kgd =
+
+ Species Body part
+ 0 Fucus vesiculosus Whole haptophytic plants
+ 1 Fucus vesiculosus Whole haptophytic plants
+ 2 Mytilus edulis Whole animal
+ 3 Mytilus edulis Whole animal
+ 4 Fucus vesiculosus Whole haptophytic plants
+ ... ... ...
+ 14868 Fucus vesiculosus Whole haptophytic plants
+ 14869 Fucus vesiculosus Whole haptophytic plants
+ 14870 Fucus vesiculosus Whole haptophytic plants
+ 14871 Fucus vesiculosus Whole haptophytic plants
+ 14872 Fucus vesiculosus Whole haptophytic plants
+
+ [14873 rows x 29 columns]}
++++get_renaming_rules_netcdf2OpenRefine ()
class SelectAndRenameColumnCB(Callback):
+ def __init__(self,
+ fn_renaming_rules,
+ ):
+ fc.store_attr()
+ def __call__(self, tfm):
+ renaming = self.fn_renaming_rules()
+ for grp in tfm.dfs.keys():
+ # get columns related to the grp (e.g. 'biota').
+ coi = [v for k, v in renaming.items() if grp in k]
+ # Join cols of interest
+ coi_rename = {}
+ for d in coi:
+ for k, v in d.items():
+ coi_rename[k]=v
+ # list cols
+ cols = list(coi_rename.keys())
+ # select cols in df
+ tfm.dfs[grp] = tfm.dfs[grp].loc[:, cols]
+ # Rename cols
+ tfm.dfs[grp].rename(columns=coi_rename, inplace=True)
dfs = netcdf4_to_df(fname_in)
+tfm = Transformer(dfs, cbs=[ReshapeWideToLong(),
+ LookupTimeFromEncodedTime(cfg()),
+ GetSampleTypeCB(),
+ LookupNuclideByIdCB(),
+ ConvertLonLatCB(),
+ LookupUnitByIdCB(),
+ LookupValueTypeByIdCB(),
+ LookupSpeciesByIdCB(),
+ LookupBodypartByIdCB(),
+ LookupSedimentTypeByIdCB(),
+ SelectAndRenameColumnCB(get_renaming_rules_netcdf2OpenRefine)
+ ])
+tfm()
{'seawater': Sample type Latitude degrees Latitude minutes Latitude seconds \
+ 0 SEAWATER 54 0 21.601868
+ 1 SEAWATER 54 0 22.315979
+ 2 SEAWATER 54 0 22.686768
+ 3 SEAWATER 54 0 23.400879
+ 4 SEAWATER 54 0 24.005127
+ ... ... ... ... ...
+ 20237 SEAWATER 57 20 35.879517
+ 20238 SEAWATER 59 25 59.880066
+ 20239 SEAWATER 59 26 23.280945
+ 20240 SEAWATER 59 26 23.280945
+ 20241 SEAWATER 65 14 23.992310
+
+ Latitude direction Longitude degrees Longitude minutes \
+ 0 N 14 11
+ 1 N 14 12
+ 2 N 14 11
+ 3 N 14 12
+ 4 N 14 12
+ ... ... ... ...
+ 20237 N 20 1
+ 20238 N 21 30
+ 20239 N 21 31
+ 20240 N 21 31
+ 20241 N 23 33
+
+ Longitude seconds Longitude direction Latitude decimal ... \
+ 0 59.278336 E 54.006001 ...
+ 1 8.280258 E 54.006199 ...
+ 2 58.200302 E 54.006302 ...
+ 3 3.600769 E 54.006500 ...
+ 4 1.798325 E 54.006668 ...
+ ... ... ... ... ...
+ 20237 48.002472 E 57.343300 ...
+ 20238 0.000000 E 59.433300 ...
+ 20239 32.882538 E 59.439800 ...
+ 20240 32.882538 E 59.439800 ...
+ 20241 18.001099 E 65.239998 ...
+
+ Sampling start time Nuclide Value type Unit Activity or MDA \
+ 0 00:00:00 3H = Bq/m3 850.0000
+ 1 00:00:00 3H = Bq/m3 970.0000
+ 2 00:00:00 3H = Bq/m3 910.0000
+ 3 00:00:00 3H = Bq/m3 1070.0000
+ 4 00:00:00 3H = Bq/m3 1020.0000
+ ... ... ... ... ... ...
+ 20237 00:00:00 243_244Cm = Bq/m3 0.0064
+ 20238 00:00:00 243_244Cm = Bq/m3 0.0045
+ 20239 00:00:00 243_244Cm = Bq/m3 0.0022
+ 20240 00:00:00 243_244Cm = Bq/m3 0.0064
+ 20241 00:00:00 243_244Cm = Bq/m3 0.0039
+
+ Uncertainty Total depth Sampling depth Salinity Temperature
+ 0 59.669998 11.0 0.0 7.50 NaN
+ 1 29.100000 12.0 0.0 6.77 NaN
+ 2 24.570000 12.0 0.0 6.80 NaN
+ 3 21.400000 12.0 0.0 5.82 NaN
+ 4 20.400000 12.0 0.0 5.40 NaN
+ ... ... ... ... ... ...
+ 20237 0.001280 236.0 0.0 6.90 NaN
+ 20238 0.000900 156.0 0.0 6.75 NaN
+ 20239 0.000660 160.0 0.0 5.83 20.4
+ 20240 0.001920 160.0 150.0 9.77 3.9
+ 20241 0.001170 73.0 0.0 3.10 15.6
+
+ [20242 rows x 22 columns],
+ 'sediment': Sample type Latitude degrees Latitude minutes Latitude seconds \
+ 0 SEDIMENT 54 2 59.997253
+ 1 SEDIMENT 54 24 54.003296
+ 2 SEDIMENT 54 24 54.003296
+ 3 SEDIMENT 54 24 54.003296
+ 4 SEDIMENT 54 25 0.114441
+ ... ... ... ... ...
+ 37084 SEDIMENT 65 16 38.986816
+ 37085 SEDIMENT 65 16 38.986816
+ 37086 SEDIMENT 65 16 38.986816
+ 37087 SEDIMENT 65 16 38.986816
+ 37088 SEDIMENT 65 16 38.986816
+
+ Latitude direction Longitude degrees Longitude minutes \
+ 0 N 10 51
+ 1 N 10 12
+ 2 N 10 12
+ 3 N 10 12
+ 4 N 11 45
+ ... ... ... ...
+ 37084 N 23 23
+ 37085 N 23 23
+ 37086 N 23 23
+ 37087 N 23 23
+ 37088 N 23 23
+
+ Longitude seconds Longitude direction Latitude decimal \
+ 0 0.001373 E 54.049999
+ 1 11.881714 E 54.415001
+ 2 11.881714 E 54.415001
+ 3 11.881714 E 54.415001
+ 4 0.000000 E 54.416698
+ ... ... ... ...
+ 37084 28.316803 E 65.277496
+ 37085 28.316803 E 65.277496
+ 37086 28.316803 E 65.277496
+ 37087 28.316803 E 65.277496
+ 37088 28.316803 E 65.277496
+
+ Longitude decimal Sampling start date Sampling start time Nuclide \
+ 0 10.850000 18-Jun-1997 00:00:00 7Be
+ 1 10.203300 21-Sep-1995 00:00:00 7Be
+ 2 10.203300 21-Sep-1995 00:00:00 7Be
+ 3 10.203300 21-Sep-1995 00:00:00 7Be
+ 4 11.750000 28-Jul-1996 00:00:00 7Be
+ ... ... ... ... ...
+ 37084 23.391199 01-Sep-2010 00:00:00 212Bi
+ 37085 23.391199 01-Sep-2010 00:00:00 212Bi
+ 37086 23.391199 01-Sep-2010 00:00:00 212Bi
+ 37087 23.391199 01-Sep-2010 00:00:00 212Bi
+ 37088 23.391199 01-Sep-2010 00:00:00 212Bi
+
+ Value type Unit Activity or MDA Uncertainty Total depth \
+ 0 = Bq/kgd 24.299999 7.7760 22.0
+ 1 = Bq/kgd 45.500000 4.5500 13.0
+ 2 < Bq/kgd 7.000000 NaN 13.0
+ 3 < Bq/kgd 4.800000 NaN 13.0
+ 4 = Bq/kgd 6.900000 1.9320 24.0
+ ... ... ... ... ... ...
+ 37084 = Bq/kgd 42.900002 6.1347 90.0
+ 37085 = Bq/kgd 58.400002 6.1904 90.0
+ 37086 = Bq/kgd 51.400002 5.9624 90.0
+ 37087 = Bq/kgd 41.799999 5.4758 90.0
+ 37088 = Bq/kgd 43.700001 3.4523 90.0
+
+ Sediment type
+ 0 Pure mud
+ 1 Pure mud
+ 2 Pure mud
+ 3 Pure mud
+ 4 Pure mud
+ ... ...
+ 37084 Gravel
+ 37085 Gravel
+ 37086 Gravel
+ 37087 Gravel
+ 37088 Gravel
+
+ [37089 rows x 20 columns],
+ 'biota': Sample type Latitude degrees Latitude minutes Latitude seconds \
+ 0 BIOTA 54 4 48.006592
+ 1 BIOTA 54 4 48.006592
+ 2 BIOTA 54 4 48.006592
+ 3 BIOTA 54 4 48.006592
+ 4 BIOTA 54 4 48.006592
+ ... ... ... ... ...
+ 14868 BIOTA 57 20 6.724548
+ 14869 BIOTA 57 20 6.724548
+ 14870 BIOTA 57 20 6.724548
+ 14871 BIOTA 57 20 6.724548
+ 14872 BIOTA 57 20 6.724548
+
+ Latitude direction Longitude degrees Longitude minutes \
+ 0 N 11 30
+ 1 N 11 30
+ 2 N 11 30
+ 3 N 11 30
+ 4 N 11 30
+ ... ... ... ...
+ 14868 N 12 4
+ 14869 N 12 4
+ 14870 N 12 4
+ 14871 N 12 4
+ 14872 N 12 4
+
+ Longitude seconds Longitude direction Latitude decimal ... \
+ 0 0.000000 E 54.080002 ...
+ 1 0.000000 E 54.080002 ...
+ 2 0.000000 E 54.080002 ...
+ 3 0.000000 E 54.080002 ...
+ 4 0.000000 E 54.080002 ...
+ ... ... ... ... ...
+ 14868 27.118835 E 57.335201 ...
+ 14869 27.118835 E 57.335201 ...
+ 14870 27.118835 E 57.335201 ...
+ 14871 27.118835 E 57.335201 ...
+ 14872 27.118835 E 57.335201 ...
+
+ Sampling start date Sampling start time Nuclide Value type Unit \
+ 0 19-Oct-1998 00:00:00 7Be = Bq/kgd
+ 1 16-Jul-1998 00:00:00 7Be = Bq/kgd
+ 2 18-Sep-1997 00:00:00 7Be = Bq/kgd
+ 3 10-Jun-1997 00:00:00 7Be = Bq/kgd
+ 4 22-Sep-1997 00:00:00 7Be < Bq/kgd
+ ... ... ... ... ... ...
+ 14868 17-Sep-2009 00:00:00 208Tl = Bq/kgd
+ 14869 03-Nov-2008 00:00:00 208Tl = Bq/kgd
+ 14870 09-Oct-2006 00:00:00 208Tl = Bq/kgd
+ 14871 27-Sep-2013 00:00:00 208Tl = Bq/kgd
+ 14872 04-Sep-2014 00:00:00 208Tl = Bq/kgd
+
+ Activity or MDA Uncertainty Species \
+ 0 46.500 1.813500 Fucus vesiculosus
+ 1 66.500 6.317500 Fucus vesiculosus
+ 2 5.430 1.574700 Mytilus edulis
+ 3 13.700 4.384000 Mytilus edulis
+ 4 11.300 0.000000 Fucus vesiculosus
+ ... ... ... ...
+ 14868 0.880 0.079200 Fucus vesiculosus
+ 14869 0.770 0.069300 Fucus vesiculosus
+ 14870 1.310 0.142790 Fucus vesiculosus
+ 14871 0.668 0.057448 Fucus vesiculosus
+ 14872 0.684 0.072504 Fucus vesiculosus
+
+ Body part bio_group
+ 0 Whole haptophytic plants 11
+ 1 Whole haptophytic plants 11
+ 2 Whole animal 14
+ 3 Whole animal 14
+ 4 Whole haptophytic plants 11
+ ... ... ...
+ 14868 Whole haptophytic plants 11
+ 14869 Whole haptophytic plants 11
+ 14870 Whole haptophytic plants 11
+ 14871 Whole haptophytic plants 11
+ 14872 Whole haptophytic plants 11
+
+ [14873 rows x 21 columns]}
+++ ++encode (fname_in, fname_out, ref_id=-1, **kwargs)
TODO Review Maris Nuclides lut. Cs127?
+TODO: Should the var be called ‘detection limit’? Is ‘value type’ more appropriate?
+TODO Biogroup not used in OPEN REfINE csv format. Confirm this
+TODO Ask about Species dbo. Paul said there is a larger one.
+TODO: Maintain sample (i.e. index) in the output.
+ + +EXle}^cd#aD zUljcfE$T4=p(!w^32i+7D>_|zHj6B8=S%fZx0qU4gLIT@XZ|S+(as%`5Tlnj0#Vqc z+xggTe}KT!4gQBBk_XW1OTgtk2hhAigQVLHtuFb>13&Wq{B02yjO=DNced`5j>8>R zlI6C8(Hp4D!^89Tug*z+>INd5x?908M3Ywc%08lYi#UzSfFj&m9`?Xf-ofXa#N{>r zb|5%6&f!IcTOXOx+=EaxNbsCc{IYw`;{{PkrF~1~Y6q?xG0^85zLE}XO+Z8l1hr}O znImK)GYO})t`3v%>9TUO@h)~lR+I>-T`V)2U2irmlr=hvzdp!V- zlixjq%bkw@ZK88UHPCZ2A|UgKE8H6y2m~vssF2^EuLwN(KR;{|xa;O#fZr|33F$SN zD MZeO0P$=IXl;liQ(PspM6kUHx+k?OGi^ zk_#_>{7G~cvHL-5@Fn v>wy;qC6Hq*m{I z^RQ{Z`f|5q06Mmjl}UZdh EuO=L+3;dCLUeK=&Fv!4*KR#WYG>Kca$u%Up9rVLJ(b-ZIwjs zlDih{mXwqv{5k)Z-M-|{P T2`RqEAtL{*nS6;TXg~%R4;vTXi+KhxH`>YtxhjL&v;vF9cT57UYnokVtr lIDtE!K-x~LDM5Zw z#YU0ztp}c@Fo}tgJr5_!hHK!ZaPtXoAM>8ifZtA_AYLnQm(mt1fQw5XH?!T^{;DK2 zG_=j@?gWDZ&u2k0d6Jj^(}N)+bG4R#haJ~psaAd|KAVCI6-^Bli}%aAx(6OQExA${ zTH{j?xe{OH@#11R>*8MtFeYJOJw%$E!4D=kLKo4-s|}xf%SuE5XG7QJR*e%4Qc)Y9 zfvh0UOP #)iHn@)Q9N_9qMG~mjtT-ZAljo@eh)D) z15_t9_=yf1!K `3p zjqZIpt*}a@ohjfC4sI;HYdbaEr|>1G_~6^5qz8@tK>fQ*u=|DLW~ju6@2pl?Oft`X zO_`@yAsd%=n<8BRbUSXh#!c1VVn&YEyZbnZl7kYap{bIl`OgI)V`ymTGLgYids6>! zFg3iyG|c<4C+PdkKb5v9gZo$Y6l7ZNW^hB=z`S3DREFe&k$p@X+c!gbyX9GSCe8ai z0(5MF9C!-I-8}#h$XX2{LQERFOwPIL_XaS~(JFbio?>K>0^e~#LAX2$)!;4s z<)w6(?$1f!JXk$&KDFWA`@~PaMwV8p_r5xb#fgRr=Az2uOI;nSY$JwUoA-I@4VAvj zjKqSSs>Hv+I@7TX*Q5D+BR%-+KQ%i9fH|8g$Ewa 85X!gVNAsHX#c;`UG9VV^GWAddAK-{Dx(WnhNQo Y@7k>GQRqV3~%(5n)>E_UVR>W{od+RHw}q@3+^36`$bq(f;`4I*gl*X7x5j zaUSv =gsOG%fK(dd6=&^^^Vt7RlPII3&Q_uwv)O@ufadS{LIW|Ve zGb?Gt|16QJ=OS9Y8|zOX?DA|N3(23L{9XFUsgfOI7AtyRkTO}8Q>IMieH+co38|G^ zNR{Al6x}O;$f%6mSucT8+esH^znBxPHTGsHvD(54R_!uyg&KAa-r=tA7+-Hm1M_T+ z$MCF};o|<}ZqjUUmTP7@ev+p3ee2U+65*5a8VkYtyqq=NWpY?M0m$iI%B8v2kReRg z9BUj>$=1Fncsj^M<1%6KCiVWc+p;L6;}Gx~{&oWB>t}Y+)fh_@sZpX+Z&JC@ScCl< z<)9Aj_F?6AoSR+$Kqdoi_O+c!QexLoYw^?;#JRbVC$=P^?y0Xn^>jVP og1C*4=e@9a>K$ bUHm)N6Z{ zJUoz)hj-&krs||AA&3t~EA3Rj_%r5XNk9)0ZC{h-zSd*^$!>P80Z)mD%T7I Mj-ui~nXE{U)O~AM_@?Pks?Z z#Gu9b@fvg3xo3c>f+KFQWd_9|519@_cy*#ho2O8YENO~M(Kk&@7?pRX$4qH-rRg)9 zf|yC9(_}g}OE&Oc10Bs+KKGw*KHjc)8oZ#UJxMUcb+>Bei4I;;TDmRtQ7zWL{a092 z>hoZ0iPEG;IdW>xBBe4&rqm;6PByu}>35)L+0-wAt)AV3>M=$HNV>$K?FZt=1b|z9 zt=tFA?yO^R*sYm4yVgUFylQl{zV&+D;l7d)#x=o*_u&`yjmEMD(ShjCtjC~H r`0o?b znHs8D_Y5*@=P8l7C<6-#ZLsIr?i;FMa?7mWEWVYgD|0%^9Q6$TSO5c^cI2~-sei*z z7R>kaKt_xSs@L@vqw%fo0aPX2AIrU@b}!#@I@*RR^*grj-rrs-)Ph}5zNiZaz;=8m zo}q#8_kG$QkDB7YNs6^y nMzkH|DA6AAGR1k?Al%)E)rf)6 K~Eq3V=R1kuwxZdiTae3SVSxa|}~&7{%D(Cj41$r`<&J@y7e-^iMhMs!895?Li7kdLtSsWV%$nzC!0Cl^QYHId%Ooz@q3#+ zW)A+3bPZjk4m5_|rplO)I0=ohNOpb$saqUM9o8~2%2u$(k->#=H}5%dt27Ieii1sH z-97-#WJ!WY`|U)2{xTULgZ(M5m7QOT7y0;kHRcmVJtNH3o@q@r@Z-5Dx2}FOmSN|W zH*PnTB&P?3Hp&sGpVBDUJ$gzg-UYjUu}v?t8k@g?vc$iLP~pxAFSq>=Ms!rec)F~a z9?~cI)daL|!x6K$bLTR~rL-4mVDa*x+bu4?M} wI3Z`_tWW z7&N4}mH&f>nPwI;E;wl|E5ytSj{ZqSvw1?R!_GF}MA31a-EaZN9AU$$=AG#Dr6kvH zwfP_;cOB(>IN?n53Ndbsn51Ob_o!<)Q~%=NH$MLg6z9yOpm7XxVH4a2H4U?47i!68^S~tKl lJJchCHtTwq?oTu_ 0IObQ)l8SpmxIuA1!wg lU|KWT+zVd37Om0?R?qHzZ^Co(uBd!e#`$OQ6A3>C2t%vNg z5GXH-oI!-*HE>>t=4Dy`gO!bSpQen)y+|MfvVW(@ap{iG>(oB(?Mzw> PPUWePAi!?UflChpMdcK$Jaf7Lk}jcPSUIlH-nk!TjjJ z5jBW5x?Fo%WYT&!1L=3TEhUu6Oq7QHXHI)X$|wLvxf9vmb|0O-A9>CjS{|{-JdEPw zN%-r@A$UcybY)SX_va+(ev?-DK4z05Iuh{+Tac#8Efhp2dAziZPP4-{aJ`M|e2>4) z p`AN{0u^V8$$zo~jk0C84NCWqB(zGI@a znWQz!G%7Y9GJ8k3ZCi|so_ZKyC3+M!-nfP+*}DeRZxaDWM>2%YVGv7n2+X2)b)biL znPkpBRdC3D(5QSiRBN(ZFXwo@`imZ~P|Eh-K&0oDw}c(U5#_oXJ)CP~xejU}TgbbJ z?ozCdvBc9dviFdN-2XHP*E*2iOYKL{_9i|?h0AhYrVU_lBrqSCTyHzD(e1jL^Lu+f zd#wc_#LMSLBxI81P46OTeifKjW{XBM{L?HRANKkosC7Du&g)0SjRLK{JsP66&;L6@ ziTPVK9UOmrcT&S>9zp|)({6V=BE 60#p7miO^G9z0WJl)0n4L#04qUEL zMxGt9IN#I_E`$St>L7F>A$hLMnT+s}6I9XhNSa&Hbo}!E`JxDuPtn;mKAsD}8T+>e zXSEJnv1zy+ofQ@9e7SK11nP6Gco(Li_ow6H5v%UJKbLH|JD=y?%ZE?PAm3jIKO$hj zqSqjwYl0>me2gRyDDUg$B;+196hwulx8u9FqRa89D=j^W`Ua0_Gdw!FdSe{_TF!bp zmf{PlHXdU9mQm~~E$i)PTFV;Fg>e@9`H}J4nn?KA{l-No5#rOjHhjQj=G#9G0-)*U zzeV9V<4JjqG6vIJT4vpa3RsA8HQgYox85vE+&s*J{{)78o6REX^^ca)Uk z!FZ=+#`y!(0U9$b*BOox*z&sRUK(#cz8_Kj`Udd{?@E%RJ`T+(a(z5;EX5vl+mDaJ zO7O!JIU)_O_j1Wp;LfKZNseISTKu9w8!;iNYsFrLcnhMN-OXmxofPj?_>mFzG|@;9 zT3w}GyVEhJD3pB~m^|L)d2_&VvEH^M*9y!AzZg;!b<~CG#{@?|D?VWi?XV=(6oeHs zd<*dOj&x(tYx7%o0$9Keif7ubx4L5>@M359 s-`?f1S;GPJp@W?*C pfV2DzoV}`C4GAWW+BqAmlv1@+B!I15XfG$uJ zT8!tdH}ve}y$xh>?s_yjp3W0 )P9|G3+dU z`6Ydtix2AX4xf^b$9{BM`{w6WD%{1f78I%wa4CW&rR;YZ1IG`*>_|!UG?cZ?A4mG% zvy6cLdvVctPcWA~cGGp6X>N}H`%_`Dz_1Uf#_R5C0@UasH8&n>CBl@8%h7`V07o!& z)i=K5SaCb? ow!9fk)oTbYALPhltyxL;cEepxgRXui*+(8g4m^vJxFpYsKkFT7rac@RG5@ z%^$;l`XyiErv!?rTk!e|osgk)c7I7RUWYB{Sq{u)q4w)23^k6fq*qO}I!*Qm1xF0z zYVFWxdVSk @;dom@GjLs{=M5(X7PkABB1~kb953;h5 z gf3q1yAU|^69bml)~*8+bNw;lG-^b0 zdi5b3EG&63oCMDrjCi>wi`=>3rGRNU1{hX(J#hx}?ZG>|>ymfx#gc~&Z-TNVWJ9;{ zOfIIT!z;Z_g!Jtj^ N>KCV1j)OkK z{MP6_lH1*u!<@0sBXf-nBDl~CPL_!xg9Fq#qp^lwCW|nAP7$2xzk{DWrp;@M4%`=^ z*}Xje442=AIZ_SY_et1872b%x9F@BD0haXEi%r3`qBUNyqk@wTFO<|*Ar3;%Zrd7M zcV~Px+L2v O|A5D3HxB-^ zoee4BZ?*E#4c1*kK*xf?2Qw2heaP-Q0?UK$X%R$xa+kOfl%_3exqs0aXzBkjnQ^~~ z!S@-iyXsjl4ytc%P6Kr=i2UfWRfNVq(XJ2F$uOg_jtMAp`7lGXvD1T*1<{Tk74krn zSeA*S)39=PezdD*wQo9CU(>e#)IdgJ`m~wchF{402?`12_NE~j3)-$V*QqC(Rhh8_ zaAqCLn_9CK_5D~?uO-!F#~d)CnWfYA%66{0Y#WH0I$x+*+UfJy^JCJgE9^eqc$+n+ zAM03U2dww7)?lxlhij9R6~Op# ?Ol_Ih2bZ>b@GDrQZE`@!TIBo zEw Zk{Sa944zZ(+3FE=}^M7} ^K%_752r2!~osuA9`j{geF zxw9~r0zU9hX}7(H!_!2cT}m0>9C+eP;JI~ZTC9@g{5q={7m(e$78+SV=)h9rUFpt= zeIhVv=cPpN-UPw`(T7-QBPF7S%uVD!1dM-7x;+-CJOIf>tG$igmKpqPulMXebJi#F zDM#=8V8j${R2qE^8P^;kx*)G Z5)$jK9_5w`S#!uL^ zlR=yPMU+d_45d8mYtH!8lp35){T?oFab!#foAa}|1|kkCxk#VaBkYfh4JWgocEMM% zyKd&V=D1u5`t+=+U0Gzb$<$4{DluxfxixSX^7p0IZ@Sih+lz%%(@0YagN4t5RzX zw7jJTRKD?hOnMsTIOMxGoiR)X`xx=wI^p75llz!m46)gueyju$H9^*=6IxWmmFs;v zyO*7=k2j 8}jNwIyx!Yh;hg5~N_m>)ywIWcI=VNu_f3(CZHTeWLk@gcS1$MkN` zdMjS;jvo#T`Rhhg|KkvdxG(E1GwiZ 9&z3%E z?Sn3LKiKSM AW2sXMlKk99C?pf8QNHwO#LI*yfcbzUu(4w^)|@fXOs79XMfE?_UKHun8Mu zfmCg|&E(WygQ7mOGEuhZWIBzi&T{X=NqE1n(*z5QjG8rn9jEjKQt)u?17gjag~61o z#ovk_Q>2&~k7|DRGp40I3=@5LI04kMK&`IetC&nq8VCFmyhgFb)0Ek~QQYtoe{!q_ zR1q41_nCScB3++C*SR>_BiFryHg_9*wQA)=bKRjcSp3~@7~{&?>2g?24Rfd}FXbs3SrPwGEhPMd2qi26T Mf}|q;JcfXe)1?QaPv3T~{t82$ z+7b&{w`Tv#nc*wf!OhRRk@#ht@RIktakc`h+3{MVfyj;QHkb56I7N+yxdoq|2hILa z!*h3pit7-1s8-9l;u%|=2CqU%;a61t{z$Ai*751w081?uTEzpSpBT#y=Ub8}hluYn z1A2SK@M7>S5Ojp-I5^t?9Zs9(R*6ic+z(jd7iIT&Sb>;uBHMbh^Y)uaY3f_~pWInd z2Tm?QeOc@AN}7!QWg$SL@!)>U@7P-WK{q?u-tL=RhiygZOgP*%+%Y5-wZIOw%lm*a zrnXzx#Hhv$-J}`swRpk_>FBZq9?sFatmOj$9<=+B(!VHH*<&m{uPc~h!<>l#$DxLV z^d0)C_>BwJXr%1NX}V445=C$S6ZKR*x0QQbD7F})dc(dj=sRaNyW!#SQFGlXl@LOc zPzWt5f&xYHP;vGEnlv0JvBO_{EQ&A>>xh~D&9$~5eYI`om_ztaIVAG=VXptiV}Br& z!S0ty2|Rk $AU3=5emT+DTT%3BPtn2)Kk62L7qJ !27n%` zTMqx|4bH Cov~*gUT+ zIXF0Y{FzU-((z>9eQ7pjc)td(EM<8Qwru`{!VmX}`XuoxNyH}o6_#T7*?uIr&X_`C zRd!MWzO;@WUJQwO^y)m%z^E$n&$`sZSWzFSVRPAKrN_s|<9u%+At8kp7IoIsv*YCe z+#vM1MMmP&p+)q65+rBP@a@e~_Je!WKT+%adu(c9c`yv7XmxT!(1&_>DwUl{Q&e z@RD|_kh@NXjZ3r=#<{z9qkUJo;Z37@`Jl$S@81^!-_LwDPH=E&56<~u+;ZB!JD{xh zPC-(o`#S2n+X^}Gq5d{&FypXw{FsJ93 >0OE5V=}As+T4X^OGScb!r`PE zbQ{ekGTJ|Ju`W9;Obzv|Bvt%LB6-%N8U~qDMz1699L;}T6D J Uq1l7h(*N>_v+&&4UDDG3aBUy>? z+M|r5=_?G|Q_YeU^o<Rav9Xqw+A7OWR2=%sY z0B(G{@o&iRlk$jZ2epM)EYx=l%rDx*2%Z*E4*XM)Q*zD3m46H+-LWnh7#SkYtmaE+ z?n6wg=oo3V^1#o?phg>-EEe<01A|Scu}LvXACl!>Q#!w;D(j_i7M|Ql=AYrRU&5lC zwVE?_Tkb2TKD?h-%yVOs!LchW_8aX7p97P*@2T%SR)?Ly9mY-?QQ_f&(Ha(V&K>)E z&`7c4ZZNZpR}DrGe@toE_LCo*!bHU5Jf}}vVxyyL #tQP^mr^oXysPtrO{5T?GsaszjF&l39_-`rwRB18*to`PS@G;^%H-72=29bkDCz* zG?#z+TMdjdUYCs$=Yx8F 8Dm2Z2@!8h}xa>A>7DZs)1#c()?r<4HRIOfLpgA7bjfRuCMqE zgIn35Q3q=-y>P+z_+y>Z|0;++^Ny3D+q4Z-tf8|uxApAgL^jDKSHwQ;!t8d@iZ7xM zuQeR>eMg5#Sh!0m^sfh*@GtjgZRcrj2N1{~UROJd1grI}MScn$a$kj06K2ua=*WUe z@_v=8oaVYbSCBU_Y&6>R49%x&0_2lp2B0VaG``OvsOradMK^yV5oU$^Xib1%) c@+oZ>k?6QDpG0EAX?vRyDWB=DHu>^@v zK>J|8_^JeV?_fi}R fe-LTNKXX3)`ULqH9M Uq?9RnHgBVoLBBFb%*xz6H0B2q5dJ(^ WsqQa^} zeJy{&i`2t8KSe_JOA3SqNvPAYf8MKMkU=YL%~xW4EO}KJP@T!9Yc~8+t=svswui{g zVpW%5g4=XM1M=r@S>>XXn$K4>pR)5FEGzN*r4>O~_b0sTv4~eIS&qEV1e%*UrpCvP z-B2qd@56BXFjpu;$l{N)cnps$)>Il>0jL|&JjONDyV06YTU=&RMH?=`b}N@P7)O?p zdc| $V*Qqs0S;OhNtI{nrRjPfr;mrWmOqNzwRt%?5>-IFcf2-2jYZYPwdi=N7E}Q}mR% zzEe-j;bNpXLtXRKeXm9ar=<%At-ua-k$K1~qOuYO{hwuu8Jl;z?fl8Nmxt6)Z?jis zvaEk}C5mlG1e0SUlZ5obxAW{_R}1C6_+j9uh}bV^k}(nTr90+g@SHjxLO8(W{6*>P zrJgXv3v?7N{mf@i;rDX($}AX1lK$KEwx*4hX2%Nk26c)K!@ex(0%!puF0lhZ7TC!> z*cJSre61w>2>}gG=N(!!3@!!ZAp|8ZtPiKl9Kj#kr4as8PkGFm01;p{lh+rvej`M3 z{n70V*heVwI|?ksm8M-jBUb*cmgse~MIK^!^x G!1Iq_Kk zqtZ$aEmV&%rU9*DqGW}sA+0wTN~Wt}u01oU$EUG>u;+&wBiVI`@ct(|dtI1TIKHsZ zWZxR0S@Gyg_r2~sdpooHWR{MVYT67kn9E=_Un{Z61(3*3V|?g)$JHs@2E6q$=Vts# zirS^YbsOTaO_c$>Tja6t;X$_4-31 qtG<6S2ygPG; tK({|r~y!xia zwc$2E2L$U!PO^@UROPc(BxtH)b%n%?(8xPLL@wH|x@ax^tP-rU%@(7vvuF2mCdYrK zlguu2DQ(rA6c|z`Toj~{qheU0GBrlzuY;0dfQ5nY?NcdwR)b?~*0=lpnxO%yOn!Dr z6aTbtvlZCYT7CU+0cI^iH!aHyCoZyLkF8xtE%Q?3MzYyde;m69WD03z4jMfzHk|RX z+qV6NGzpF(iTgr#MoIAWC2N{TK@!knM`Mhjeq<<<;b$K7gtbQ`#o;Tw_FL)?ksvn{ z@ntuCP$pf{fihIEaW<(b6$f-+tKI?6gZf#Z7Q{u=l<>&Uor@`SK;I_JqEFn}>Ob=- zDAi(l&vPY$7b~Oo%Lf#O6#>$Oc!5C2$sloY8RxGVVLpkf%u{3gxtIK7|Ii=8N&P*u zYJC!Bt&OIwws6FcI3Sz=0!I6-t^(Q(76O@;f1)NGX8X9*W*kfT^TtBavUmPq33%KU zl~AtvRT9+D?u0t-9U>6=+`;+22-=-!2mf;9;{N$`4X%HIXV;I~Bm9vA2TQWBha!DO z?Ftj2tHg`I3FlE&hBqsNq`DVIy6ro=z2jUcejE<}&2}Svehj^pylZ~qM* ZvI_%XH$wNYo4-z#x~liXNSowg (jD+$1ow!CVp%qDDg6BwWb~X#Y pQ%3g>IwxuYycdS_hE*7nlTgq@1oYt7ulvN4 zys;Rw48XlyEYVfwQ#t-fp`z|uDMfj-1OP&s1E?)l=2`cw8#sL }9A_mLcnPJ6u44@2 @ z3hKEw9j*VP29m8dXs{SlYsM$nDkzMZ6(mexulZ40U6*>Vms^wkz)!BamtUn3) W`-|Ff}ZA>y7*5Im-JsM zFWYtZ8<0>q< NM8?hjsez*B&BVp03PPLLa+bVn*mXj2`Eyea0WSOS<8gPO9xu_- z*~Y2Ep6F058*`9on9y^GzCNqQS>;i6JbB6%F0!0dMRyu)LLw5;$lV=X-kE^CGgI zs`r;yCPR?jF!P>AYmS2e3BK4awK2TGb&5A9$gQ~;+zM``UIm9$104lE>G|Mj*&jTp zD&+1 riED>Nl#-<7Td zy~KeGYcd77GT=O)6;&R<)EL4iX1+Sxkjcrg+3YvBIO}mt=+IDVCaUY~qG)oQVFrS> z&>nVMkN$0G)?hJ{e|68GgL`)YYKl1X&|@D|y0)}*x-{NRN}`%c3@KKd?AgJor@~P^ z2ZaVpEVyIJHR8S9no=^&eI0T(y< dsbo@ovXfR$L=hq%a2H#~j z01wR@P3l#iGG#j=#MZu%z+A?SyzVE2%@2sS#p?AM #7F_KOrx++c-aE9oP{i2#erue~H UJ{~;1lGp^YCSUY#qJVt$|;rV=43Ra@kf+{#&pW{ clPaXLAJKWvthxf*O&PGupukur63RlcXEqr!^bCBca=hz>QD z$R5>6dX3h`Ij1NdaWfI%pq0w7!C|UL3>dMjv&();)vVjj>k mYI+7Rn*m4Wa| zj(~*HZ}A9hyuJsOV1KX0 BcXNA5J)>c(h*`%XR&mRf(Z+I?=($V ~R z?o`@8f4-VxeWMtpGnQ4FVldLhd$}^tee*r~d!lH~<9agxXQKCv-_G4)uGMylaL834 z6V9<5!UCnn>je2b2YC `4((t+Bb>} z=(z&N!-~WAyKA{F9M9c@6a1zi%emH^kHj0lZ$S5-(o1b5peaO7p8u-_(5n3^&OAXU zJJy+)d(EtIfAdMuAFU7;h71T&Hi&k5L}$RxN2MkWlF+f8^2TaftxQrmANmOhp*H=b zV7FKI1%Pzc#e<)4>+m>ET%NlFpYZ#Y`j|zro5Q{mau4PK#ba@prxBqCltJ+3*j*Dw zr}mwzUA%@uJKna-b^GI=5a7E`vz>qz^ALhaZ=_7g%sl>&N8(#>_m%Js5~lSHzvk^R z6;Te0$?%_iGCSDyt+Y>(xu5)Yb!<31=fz7hvI}_n7#RmQ(NH(-ge5Rk@vR>ZTYDgs z6?Qj26iMs|(b_uWAKO8!_dAn+>!F2=cQ~j|ic+D{AEe$#75*)&>bLStpo>MX#^#FD z_ur*xu504G#vfe!eGfzH9wQV3;V)}qE$3Wuf*IZ?-GLLWjh0-)#-~9kRoX6)0babq zv2^0YzsB7~T*xmY*_E_4s+X}`x>YUBk~46;aTU3L;ihR0Obrkm1;_v3>3jbUL)NNW z>v5Q)$HbV?cGN-R{Lywc@cR0gS1-iPB_y=DRsoK&Z$D_2<7==L=yeuHo^th+luo@j zCDGl|^pL>t2u({2_(P}?5<3~*83N1<8;1*kG4oAaxn$0R A88?>=4I{Yt$RQT}7*FiN z;#jGvERVgZ_E=x#=xB2V-}foQWD`25eQIlj$}b7uGKH>Y N{2%HxaJKfkB6l&WM+j zGuV6iTq$Y*_+)tfV!D0JB7`CeQ$z>)Q?h$8m^opPj}ok!sY)UY8kzQ45f!%AA^bGL zfo6{8NvqMp_>{Sj>v4-Yy+G2t&}CCsL&sf;j@{VEIIo6rW$IzGWdC%bEsykC5U1Zk z_&W8aQk5s=N+8&MzsBMvHc>;MUV#IJuEr)v#|eV^(%!A5)pdb+wrUE30F(Y*YaX(hHtMmY-9DMhLLcjQdK>MYz8H zb`cu!M8KC^bGBiK@H3(?7J` 0m5b1m^AFG=P2)^1kYJwKV~3G?lr7h{qo*+Gd8NsOp_ zF6GlLR^ctQ>9rlk!j7M`rRjN33E-66p0D%UA5H79imq16|L75j&PE!>q0oR#u@(`R zZPVqkAlWufA)10LHlERdkWwkJ=j}m6w9?rd6s{U# 5pUqezlZr(MU9Godak-QT=Bu~!bS_w{+!YYw{S=zr*U&U({&C!Z fkH6X}HwY_s+AVO2rTm(Xjog6pn0}`4_&E3Rx}E60x&AcHHZ`B&wPvyL zsgq8}i;B9_z10~WudnYFZL0V4HS6Eqo`XR;@j0=i+FWb2&BTZkdc8Dm0AymwgM!>Q z&s?4cu(~E7Vw+63%OQbPoY79`25@ GHRPs0q1J}vjK#C zXW_M7vFpigxy(1#U!>WNvX$C!RAcEMRK8dnU79>hY#H&@3PMNNH`th#3QilZ(yb@| z7#{qRTC4Lx!KN{yhdr%_bw@DyT(D%|9mYba^pS^y!T$MQ)YO|~rq1gF( PFOyHHT|)X1<0CmN^g9=}! y#P;no_Rxc zezh5}^bza*#K& KLFT3C%-|1*475RsSSsR01^7( z;Z|#|Diq?rOn^V@nC}D$!?vY*6s0;%!?YhLBPodv?VD0{sxe%IcTfH7v$KZ|8dR%N ziw3|x__uNA|Mt#1CWQd9Wkqu-VU#CyX$#~GqPG3@v(G+z$$huqUfT-Fu&u?#Z)b#! zOT#)!)^aPNGS2y#=v-kzNJpC1r8I@FQWQpYyws#9?*U2ZXq%QA3UnMm(Hy{X;$Vc( zVU5cwaHt57>TiU~6Kj?&|9;Grsnh24>fWv4+2; yOEIgy({dD)w+#ZaxySo_tuw zaSPBvH?<}IdH?;BYFjLKFz52~$L2t&v_ULt=L*dW0q8sw3gb@7ay*f6HFwire)F3e z@y|G%1qMC&ixT!ogKK{e8(hbvQF>}-sY(c4*i1_DmM3=XD%^2Ri>6IKKAdOMHMIi) zBJ|oZ4u{nWR2BZ}p@*IY%f2DYG*psDjCbM;5~zU1IA5%U#gQ=lELf wvzhehx**= zJb&W*@7KThf(yRT0lI4FU509OrX@4Y$*gAFQ{?G_Ds7p-8sn(82Jo|XCLIECSEk|T zl27G`$}zT`1TY+9%x#YeQ@G}D0%XO^sK#NPCm_ocUJHYBP1@cuY)Rh7lMw@0J~qgC zj+-ZZFUeh|yj9uoFkd(lyvoj}h~LwFuW84FFgo(dY )(9i z?K5t=`to(viSv=(>!X=7TW9Asy1Gl3j;9)i-aMph4Lt#2bOetjESKwMTzH~bGuT5p zjQ#sapLaB`|9ruM#{G})vsB~I$UtqKFQEs|#NUuyCW4jU;WA7zZrctsj5{(JBRl%h z9H66oZzA-g4Divs{jg!fwC4&6J~seP#qcLZ=>7sLeU~`6!j|>64NW=6pwQ=Xu(lN! zk8YEb^Y^3Kz9YH!gO83o|ABk&m}Gb;p$CNS$7-h#x{bzTfG@^B^~yz~@A!GGDxmT5 z2OspmV#tsW7(f$_;e(j;1AX0PE^T#Sag$ggE{t$wrz>EtC7_vL(B5E>YdgR~%G567 z)sdZpm~|P)&IBZ4aT!vEs&ZXdp=B`(-=ou=sv@SwU6D9S(MbHoCB=}Hl_AyYm|hj_ z-N}-!e%@4c)Wb5eIFFD3EZj^e#ef%cp(v!m%9!J<%hdHg!gV{EoH2~3h{a-HhB4zE zQ=0(1htSa FRase?9%n+3(&te0Z(%I| K?r zmnat(d(h>=8nZpn48C(`!nvkJ*!b`$G^<)}IeN@L2mSu;QK)ins0iJ+|6%f#B%w3u z#qTlr?#XALx%9pfBUV?-!>j2gFTDHiuq!UQ_&E+ PXJg1R)E@f|UG)B5BlKNcFg; zOB=n+HIGuiWkbaDtL_OQWBSqi8PCO-WKB0zRzaDOO_P8kDMvChR2ZEjfay{*B4PmX zs3T^-^zzH&c5UDNU5l0tw_W$MpWRo_G)KF(qoZj-mNUiBNym~J2?LWHVJcTWRPx4~ zlW)5Dn!Gow3F5=L4QjT2amU>kHEx*wR}RoV5x4b7Sm9$?E|{iYPF4Cmc)(={n38Ha zK#2gD_xhwscioVeS1Zj`+lotX&CQHF0iHk?#pEUs`xg*8zQ;L&@eF9HL5+(I@Y-J< ze)!tj>hyP5gZ(4BhzPw75BP`kmNq}7-*Kx$o)gi9T7+b}k0klwUK U--ZoC--}hgV0e`UbeWRn5%wzGK7eMvvlqHKfe3Y3x7ym zOke-}>vPUK{lu#^fKx&M?S+64=9ibb#eG}14vv)IRXX3G+Bn`h^}V0x)K{^m=2w@5 zj++6DS(X|E%piuYZJ0(^P8oE%Ps9P%Zz|mV^;h%fPriBZPu@+PH}lu5KfOo$)>F8E zT$xQBJaCu`SR_alQc;PGY)9C07cKhT*(dkIn)WHlR%><^PM-GhRp+1gld&d;Mye!F z*-;*~^4}$6Eo6}wSm6SU$}(83nR7vFzg+m;pgz62zYU(K=hv)J`Syb}#Qv%uB%G4I z7&!i%&;EH{-kG&EEU#u~N!@%J5qjMmH`(eb^A-*~qhIftrswc-gzihUGHzSAAUrIm zs)44PknjNF0>1pi v%R~TgthUtWjKfZ@|Riv8bAJD8bBTg!O9alvKf~}Nqh}M@e30t|7_$n!=_bi zKj!HdB_-`I`RRpyyLIe%f4%ICPC7s+;kpcy(x5D!RKuqxB;OCAqlzDoWCc2(bft&7 zj4@Y4E{mLSi0vvaUAlPrvd2FB^goM=Eo)6>$!pM8;KH9?u*h_wHTt|ujXfqg@*J6r z1|t%blYQ-ZgF9xMLC?_0+_Poo^UE`~v{!?ARVeFv=k1@V8l&P9wG5 zQQ8&H!(SC!TV%%{ykp9gHzwuf9kb#&6&w4^Yp?aVV(8^>WrU2bxHVH@o@IoN=PQ$q z+GqvgpO)~}ytd$*ZX^2luQefswdMC*SK2{@UROq{l6JoL^*7fKI _1Cb?Dz zWh!L`H4k(-!f3xmsZb@Fi2+Re^OH|r{pc@$xx2P@KmWPcTa3Ey$^z37r)dnmt f?1 z&K9;TzG7-7U`Qoqm9aT|I`aP>&U-FDe^*V <_T}*`_rafb@hc8&adhI?C*JF#*S?@YUIcQ4lsbBQ==sG zAVHDeXMdf*d&t5%Ttg|n=<8+6TMp{cqt=Gz)fS9vYdeV0YwO5Vp `GJhM|_?sO10{`hvVWN1b}CB)^J%dhNqmJuW`~v{?+G9%k?> zTUgbZ0ik1Jn4NL=i?$K{w5qD1um9z}&DynV-s^#T@0#cVG^z~G%B6OtdJm*?w8Y5s zFzXyfMVLBf5G>gfkKg+KlqsLyke65GFu+xXMK8CipM7lzH6~U91DruH?4=H_V*zTA z92-E37#pU{{NlVT2A#URD)XQ^&-rrk@?V_Lv+ENYK*WkW5DDvk3f_vlpZyU!w#Jv} zQL;s?5r7*N;_d%To_tMSUS5r)tUVYe6DCfq|BK qW6hh4x z`cXIl_U!rdk2|M-|61jXa }%0E r(-wYqnug{A*1H+VQQU%xv3o^s1+8S~Y<~Kp7 zEyrWgnRDixeBq$eYDIFH1?#t7)vjgJmqQ$MBy@CM3l>$muwn!P;}G>2>@1E=>|8(N znyO62U;piIjV?LooS`T7>2odj;51pmHN_lVvGz53)>lgSQr!x`gF19Nng9Uymc~o< z% |ukhOWEy_UFR}A1;Gk%FVqBz7-688*EAF=+E)>qDAMQ)~8Rc^9!g6 z+|uD02c5s^(8+Y{!@YkQ-|V;d{_@Q*xM!H?i7Qp^N*$dlp?hvZs#MX_Sa1VMU4Vrv ze)#R_Jv)x6?YK;M|NZ(`UvR-knt&70aETsa@^FRBi04_#`&t?=HGpl!@n`=0*z>=h zkbnCzR~gP-vuSL{wk_@mabWu+bR0Fbqi-*c|G7<0 OeZ+1I# z>>k;XaE3I49>BLusooDl=;$pk6TrEkVPu37*yE*+w|e82O@Dg+l~<F~#zoPQ{_{(SiG`I-mq zF_{jQ6~ ww4@&5= z%qrJquA-eqB_o^XW z0 c$)tIRy>H&N*XWosMoPOf-HQM0Adl=gkOFYydY>rk|UKMMP ptq$wat0bKMa$-q^Lcy-7@S NYm}Uo4xv0M@R})v1*n?*zo#NyNayuyBmBtgY zLK-^U#XX1%4s%|3<&Ec}j`!vLzqoqM{@S>B tB5Q@jZt()vNca#cS4%?$W;PpJXVjEbyRw_JS&fNv%W>At5zJ zGA{4$k3RhHZ);;Z=;}o%J?}<@UPs6Ep2sKj`t7f7o16h|=a38)!X>Co`zn91=uD>k zm1S}n!3?l84*&UL!DSbn+V7pZ(DlE(@XG1eUv=f{IU&6zF2YDOo@2|RAu_|6hK_!o zEYE3UoU{k8Hqh90$Fgtv`T6>7zk6tQM1zwsx*6lgF|kY;>H5x9AoBv0Jox7^gCGCH z=-Cz9eBQP9WabSYe8m-m&w0=QXbm11fm60E9GRnEg({+3Q$ +)l_`2zWEhk)UH@BS(ORvGNAY|I3Rno|}Ke4K+r7Q@{NB!P8GV@j(NGCNS>W zG-QQ4%~0{@spzGa8%^zJvRYy4$r$p5g=#p=zUMdhT`}RWf333L!O@(fGZ!u#(f|0~ zPvPc8t^pMYJ$QCdYzGqyTZ}<8VZHkLzn;EnR4w+eIhwfD^?MVc*U=HY@3C?He}C5< zZ-oT3L9&o}&M{*)sy3ywyK<0|Rj@o)+R 4j&X{83%#`sk;hZhrqQ zx4bPZI3>dbscpbYWN4p7GlGS=!$TPmvs}P>+%X3Vr+oU^fNKX2T6L`Ym;dC;P42w) z+E39F-So4(VNG#K9MTa@5xS%N8CKqL_vlL}jeDeA+0&;BrnEVK@Sr;zW@X-FGRQ*x z3X(Re&@lrW{*&cPK}aVwOw+1}sVupSi&^2A3h%?`qg3v=7U+I>8YZa0axcEY1{?=5 z<$&2jQL@)f28o0VMo5ztUpxk$E5x*EA5I-NZSv%qFTVI 5Fj{42T6UY5!V|BLuaPRfe*YmGF zt?vo1V*Xm?IvQ;1xE}#grsu&vdyL_R7|;6PuM;L*`Ty))2Y6J~*4?+xOcIK{J)gZi zlp?)K6#)Se0wMxZqzlqJGAK=u-aAs2Nbk}@?;_nU>a&B?WM*zV|G&<;69Nw~2_z&q z zyyq8y5e9Ugaz* dKpgfb)<=CCjn zMao$v(=bWVJY+xzR#sTRX3OTpEpFAJL9JhJ#y$3!K0Q~L#*J6_h#t`)&f=P@2*3@u zN)Z{dL lvxwxI?G zr!?2>1uAi2tT8P{nkw1BkoBeJQBO*aT}YCd4?H?NGooIs3Q-wHLi(Rf!y;V+`#Y%v zl1x*8I{-gqnU2rv3H*8T&y8z0Z|TwK$dTjZ_d9SS8_~D#2h22EMQfUjEDDz+=6V6* zyWk6ia8*_MdHDD#ue{Ty`QD6Jp9Y;fM~@ylbfS;wb?m1QlUqiRtLR;lj&K28NaG~) z3{N)5D*-wj<(Q@%Zr;6n)miVq|6@kn)8AcV-Tu#OKA%7DLZN>Et0s}Lg9I=CZFp1Z zn8Ndcx3o%RO0;GC*cq`s+co*~@2(N~_sll~5kSv;Ym%Z7bsjXdT>tK!KT&MDEj577 z6wO()c~6 L96#4&Qt2SXP0^;dYA(ZGkKKG-$^#w*E}3BWL4uYE{EvC zk0;M|e)6uIH^pe8&%|lPI=5)D&_{Hi3P4Aa1o;z0%E<&@Y)NA08O62l(5~6sF=NgR znlSab@-LKc^weVyziNg;_kbtBmBg8y)NTf?bn!_RVYwTHpj#K_cB&j>=!`_Pa_Op7 z!@Nqi-z&aYs*Pb<&7w8+E&v-OTD@+{*)&4jLKT|^Xuw;BP~pQT05cX}SSKE}a9(k@ zgAO~gZgYI!`4c9rz;RPPTF^ ve0{ zY+Xh?UI6q7Lx)Zv=F}(4mb|hoB$AB~$sV}Vp;X2A#v`duKRhs==DIk_D^UzX79&TM z_O W{f*>&Fh %`Wr z$K&JUd2<@fd>VJ?mFLN)p6Gwy?NQ}rBHgka$*YR<5_eV$|DMPQnVh4bXfkgRv?L;s z!jXOtfydy{fCWa-w6}Ka)T3poa>WbOsZnE2C=}3i?&c>9=M9LlRTj7-Z)c~C-y1jDi-sfwSF1#Zv z-FnXvWAk@u-v&|d!+0TEp#XOX!v`r(yo`naJE7MZ;9;9YahvzNR5A zYrWdHp6p{3BYNsC0y?B>#RpNL `$s(N@7unfY15lA3O!^ywhS^H zcox|B1rq}l<%>#sJdl!ym>g2Fs*oP!Ajs?L4wp}5r_Y>Ub^6SyukOG9{ KxdvQ95|5Zw;2V${QA32&*ge-BR}#9001BWNkl 8peDvt+&ph$u z }mP_>Ro%Q zlq*tj<>e{& x?nSFB))Tuvi|EjZNi$>nv~2u!TCN-U&dr^H2%z8G`A_rJcYgUz z<=l@w8s{M?8m}aWTX9{ip@#uo1RFM&fJKDv$hy5UerNGl$`{|0=8NB?FMfIU+^~DH zWqUhXA|;fNK+#drTxJxJsqpCb^6)8Sm?VQBXWH!Kx$`3*yX&@&89C%()91EoQunn{ zF)AQK06;iG2cUD;u5ilJ21ue0T&xeAzHCll1GKvBZf-6DpvqhK!fLM|W4YNUOEet7OuIkXo1`vo_gs1W)@Kn<}k_YI>On8AuL!Wna?qPAKok( zrpN|>#Eut8;)O(R#R}K1@bl)=Je;$^!Uipl!i{y1#)pHiVbJhWZCmu(KY4P7=!X0K zes8-zeYZ!;lpjPr9VAg0{lH0$Pnb|@54QncO*&UFO~pL&^aSK`ElOCrYITdIFISnH zk #MAU%iy~$;UrV4WJ{MvL(%4vS~-DIu%On%v3j&g2B}9?tgsj zpuV$Jn~FrsB!z_TI0ZvOD8x1`GGPrGEr4E9RXQ6C{qXr0U)L-DTyETNihwQ!L%G%= z(ynh#bjtteLxVL0eF%Gas@=xRQlvLsKnG|bDhy}N0V=^@kbK@SV;u^3$P5c`2cUE3 zJol4e*fx`tp96+7`u#DZ7mn=F;Y4c3UcE_^Y`uE)uJoVBAE|3Is;Eg6%?lQ`Lpm7m zW{9`5K{C_?3*Pky$5@aZ-~l^W $>uJ#%d}So1;tQ!wV@Kp>w5k-ijl0(F}lv!!AbX zNVzZDw4+?D@+GoP4L#XRZ9Moz^@7hnHPh?RZJ>Vx@DSmGosE_&uGeCiB%#k3?NX|W zn(}|gOr2V%ce7@@u4~d$@Z~aP%BagWZF lW{&d*t~Y{kH?EgD_7_I7Q*UvFHc#w$e& z0S{QuoL- zsI=pq!4s+u?vZf@L1k*x$T@V#h(XUj@$j265*^5PaASJcxP*`U%G!HLFUoLc#a)X; ziB86?*;Kbt<#HQR@OWg{A*=R&R=Hfk=T@qy#Br#G*pJX9;8hy9=YaLj^(pXN!de-} z(4kS0lzWD)(~Z7R1ki8vjAqdJ{$lKPpG`A#yeSd?UDmc1{=wz-R>f6|La(VKJImvD zl+CJu?jJEDXPefI=H}F?k{MvKtbrWHy+9}w2$IK(RwN=T7$h~yO9oz8vcmpK3@w~F z k@7@slSfEy#BG@d2TyS@UbHR) zEu7j#p;zG%lyly64*h=e&$y0lx=z}#c-D=)A8F>Q4Ud+8zEnG}SFRePC=c__Y~If< zobJM;JP;7k-o_CcAP5} E0{uZyf0|@*Z)m6+8C7Shh(1)wruG0lI9XULqoN-U13#SGn}>(b1D<#`bK{ zIK$0zGJ1|~#p^`?J&UgDUyRjr0bRcqpodxRcqt?LFdWrcvw3@&8Wl=rnYOjdmuTed z1y8+Er*@o2g@q_dz;I3>U_?e@0#^a!La~gA*hJPSgdmt`{_FMIy|sGom~-j1>?NvK z&o*y<+(-9D=~ZM?q@?k}bYuv%iqs~L%sE;(WH1 +5J83z?*PU=d4% zZJR-q7$Vx*tzW;lruqHpDx5Q7?t*-c>eU|SSXQBERn jS8g-FS=UFT}lS%LJ?3RN0!m ` zDTf7sFbv8TC0y8e_=I0Pa+E1xTBPV63Ni}DZ{FSKz4_Ber572iM*H?Trw$*nG8<8G z9&ckPh+ZYeQ|_W=2obvLU8+cA*dfvtkjPLr0I>58GEqJN07_&A9QxtJuYU}i88>g~ z$POJcq*mK~#E6Gquk%{9`|rK0b(AbWV%e6*r=WR`cx@dKH}?o|vj~=h7AX7@Dt4%u zqX(UT9Ens#c&?-NZW9Ie9z0T^^mDn> Du4*oO4^LL&sYGMpw0X1q{hKxWKFvSG z>G;K!yAHfup-8^P;gmc7v5+p3;=+I)PPy}TO0hm)jGppgZ1?7kQtY&sj<*pxbt-2d z0_drnu*`D!X~vp(ZJMm}w%L>ey2WIB)#g1#UoKl@UuL !t0_K zJ;y>O>F^jwrobh{g#lf4uWTzqh28D`<_vw0u2DjQK~d2 M&(=}IZ&%9Y!CO4lt>u1UjJ2YHEd zVgKP%L@EbaA3Q=JfvX;(^FfDnuMFLQA6ISJG4`8H>qcc}1Cu53JFC4=vgi^oIG4QF z-2DY^4fc|5gpTKqZ&*Nb1PPMp%h403#rA6T=1qClZuHONKka-GK+mEpewwky9*a%! z$Ow_H4(P~;;^oRLlcz^brrMwG*jK#T^M$fp+uG$z^?FzTzxwy?(f{^r`s)fK6|o>Q zAzg~dQKYYJq_9roxFYl2FX?RI!$#H-thkFVj)OBAgNK dM2-SqRZIKSx8%cP&> z+xAweSfb!E)fRF0l>j|SQpXjRabyV&dq;
A)JH4lO%CcpTyjrc=Rz~yy-oRmtRm(I< z (tWL( zH}9wsHIa#K$HqkP1Voxz*tNQM3a7QOS&Xc4(j3Y@f;cDB&wu ar^N$;G z4=BaF-TR#mxt@5kdv>3G2Tz+zP^Pm^?cBe=Z?Cp(Chy)$&@ E7;E9NX 6mQRN@k?fI-> z)glGbL=Z%3w^@&O@)pfssM+0j##ARpccb^*BNLek0n&IsIav0ng9}A6;XO{IxfKEI zK+c~}q?jC0?1Jh1`QJbOSiS$q(P<+WE!dS_b^hyJR2ay#?K`M+v4VM9Xtq !lB8lk&fhvb0HH3HCiV1u~HQ|^aGjhPwS=k176T$(2SCVwdc=r?%*(t7#7 z7;72D*jNn>q{&k53IxNGd81<^E+vc5L$+e9h~H8)_Jz_1(t7R8_`Qt>kJrlg?9+36 zGI=GNxQZG2xw(TF)Z{pq8Hox+?n1>Z jzx!MtPVr1JUFHakYg)mHuB;89P@bo15Gl6Ks?$3{I{_dCGKkfWd zd1#j3a=^Z5)7Gk0%9LE9VGqlRec>owY;wXIEuK+<6C?f(DR)Mnj~+WYwokhj*R5@k z6`tW+ZHy5>&!Q`O%CH*yh)vODMM1q7VG$amWB>&hzd*W~3q_D^Tjb%o-`It##?lQt zO4h1S;+9CcCoP}fFK?_-zgMxmkG*9DoV#PZ;{HwB;DRRs4k8*Uq>=##Fhv(V(K0z9 zn@)WE$>Nt=)vcK#B2ed{6Yg%`vQgXHW4tXjo9>VpNg%xeoH(F~mT_VdZ_g40F;2d> zIr*N;sLI?CEyNW1?6YIvE?mB9-Rut%em{AmYWl7H{+M8p?$#XhcA3dOS#~7DvI9)< z{(AQNqrb*o%KA?8PXXBEJsLH8_W5E3-&TogsiqT?O_TY3;+cf#D4sh!mwXOf7Q7Ij zNm8g6>TxFfE@)a~nl^uX*2;M);|#pswr%#NuQhnPc(Fq7Kqm&9CN!1-usqPh=Y}&X zR}SQRUs<9=%W`y8wGxRoY&r0Kt6KRV{UOa0kV(I|V$1fIDwHU`Oo#1z5}> zJd=F0<)+*X?yf2w9UJLG&!nsR_rr<+`rpq{raa;tV{gU~O+*%x*DQISQwWj>KXli0 z!6n6oU9xSG&1D;Ql&+ZtrC z(-npeJ(S<+)`Fzp*>xd40 zs)q}tpdT8fDU&h!{jYNin>KCLGJg5|lh?XV*RJnp>(amTaLsWVM60R?Y;|B2XR(WS zBVP*6hvR}W?u(P_XETBvm!v8<-nn4g%ikR^wEfRVKl|xM0qpzJmS%gge4z^ey62u+ z*}QTtc&ghDMZ+h>BDz3CCysvk&EA8Dj?DS_*uew-8^rW) A{?&ns9H{ZwS#lP $(baj8i#NRV=P(a2eLSn`R%W8a=$20FnJURXe zQ=e_puu-cRuTsS)(}Q?LTc#s{VIJ*+=QmLZ>3#IQJG?_FU=nS&h?e#rIdble5=Z$n z5S)@tlHZhJX&a@JA)MejpmUWu$c-X0bmvEL+3$fsfOL tlN9w LlHpFBny$+VVt&Dlk5tcL=N7FDT~m#IfkOy%i?#Jsadw@w$xrTs~lgqYuEc* zG<;)f!Dn(+vI7Arhu4R2T-c84f^7|b^nei}ud0#58NOT~)A{z7Q{&sWZ`FH!T-@2@ z_uGH+^y1AMH=Y!u(le5cb}dG{EzLy|6Qr=PRtz^K5&gZ~UCnj!LWC|-(Gq~pJZqXQ z?(_L(T<5$>{U=T>)viU;4`ibJQ2B+>4r<6CsD;g3Ob`LlHx6KTY~k#t>6B>M=MzK5 zjFA(^zc;LN=WAvsQZmlp&j%*o;qx@ZbuT1bAU!6EbKX@OLa>(tI ozp6zi0cN C1s89?+(%7iL*YA?es?7uTpxg$Z?rx1|oo-dDbFTBRR)dlPKm)k{FW8DF<{W zYtFJwSy#(FrIX~JGUZ?G-+Xh9kEoP+J}5`ST9xNwEv`O;msrR)36Z3(dMISsT=!k` zYSy_B+t9Jg+i%VtGlsLS2Thz>scnm 1 2ZmZ zhSf02F{gPnugXPKcq$WS3xb`68am#YA+FX1|7QfL7CDlKE#0)W)T 1k(pC8xyci|h;r;Dq$RJJUGWxt&{y%{3_F~)ooDL~)PJ@w&Bk}A&NGAK7X|}sh z*T<)I&IV1KU8#MG`jf1X^>437u=qovAVqnE$qV-}R|FXnEIeUj8rCV7h2V>#at;7M z{m=8k4@V9k*KL?T?UDLlA$wL1MQ(r+2d_QoVq7RqBv1V&wX>lMA&^KKHe8NFh6Qy! zofFtZdz;Kmhc0~c?YBb~tywj?Yn!&MWQldxB`HV12uYILy`b|59U4ADp^oe3y2j<9 zkWFfo!p<9Xc*>lQyL4^(#d@L}uM(Knbxe=%t=_%6SgCvkH~SRW7vbaZ2weQ`A}J2* zz%67U(F^xNqJxvB&57+80rd1(mUKU71klrciqq!;XBn#(?a)k#NV^=+1 *MMn!`cLb4Z{o*H=24u# +g-J<}7#DzywV9unJdl zKZ z+d)5?NvbT9hJ+QrfcO!>5;2t=lj!if!^Sro+_}xI)!lKGMOnY+K%vrw@@+?iel0*p zri9z*z^9%E0QA`h!^g(<@6t9?YSgFX8T=<769M!ry834stM9X%=?V-)FN@Hx2I#0& z%9_VnxA#!R%7ydZ(kaK}%Q|ks(#PvnuRcAShe|rgMl+i%$0S|VNrDTQB=b71=FvIO zAW8(d7_f3 P?P};RfPOYU$pKv;m_$sGIG~?4?N$%! zYN}K25VYwwXn5 }; z?pOnCthm1p*n2^Pun}`xrri<(H+F^MpU9MO4I9CH!#Qz_NFiH9c8JlgF_UJseW%qM zpWli-=T~}#E#0>5sVXH(9R52%51X@~9mK?D)uO$FhmDSXzx&(h&yRev*bGDfJ&P?^ zc({LA?DZTnkI=b2SMrp57|{7^o@+=r1b#PI?xbkWvdw!+)+|%xmL&vTX-P-TTJ&s# z2Cq!;5ETy_X#r|bskrz<;7Vb-l9*wV+r^$~(e8wAgyqTs>WFM0?2UQm!ZILfxfv1Y zV*_ASX!W@fIsiRLWSuqbX8+RFnW=pQm+jhLv{=FX^KvTm2=`TYtyo ({^``Yj!)j1V{~fAh2wuYADDbkw6~dKVzU5USeb%G zkfKyC&mM5|85iV(uq@TlPA2;HQ6fNEo@W<8lv7o{SK-07JK@4UUT_4$cg9JFC?~)M z6(ld$rZvD=K#`;cqo>aq)Te2KR0{{C<^D6vcb06~lB;6LQU^T_SLRKYa);xZC{1wh zB3b67Q1C~1J0;P+zQe}F4(t5ZE!Uf$S=J^!#}NVa^qk#vJoh=q8vATEL6Id`UZn=m z?Il}w7K$xh{FYLROTko2{yB3VX#M)@vvi_TlFd|)A_-NuWM5_+D8j%l{viw|!YcIQ z(EyMD>a$)2k*y&U{bAcqbd)0B=I}N-ydXu5@(O@%o-ynu4|>!&DH!`Dhx`;|ZSVBZ za2Z)`cQ_b#xGf3e6>ukt0QiXff;;-;x@g$sVWXkoEze5 =sq^UZ|uGl-W2#MS%$IInk5QK+pCh3v{>=z_9a1IoaUBK`J zguC$V-Y;iqbKVFxDU1`l6p-nVks#vcHh-KwKlhC`tvc?WJNH~_?<1=pzii9S{4bU& zx*J;VR|0g9LHYdJ$c(5Q&`H;z+PByL?)$N${M{oFdKP>LB7mL+7Y#$cfRsC tY|e<}CblTmHJ0N*&Ciw|-s2ZZ&Acqctm6=v5@|v-Ks5>1gi?YmW=W z6Ut^BM3M+($sELz!eG~sL>QA?n6q%{*l)i1_M sdo*j zZ#fKz7vgsxX!KIi{0mb%*}t4OrraH+y@~xnAQ&LoqmTmYQ~)(s-cf`Do-DI~X?>!2 z)F&*WJhDSF()4`G a5vOYd =W9< fAY)^ zAGquG8k$UMazN*@p~*;F7o+oVgexLm0H`eyb0j+V%bA4fO`CUpCq8ccg&l{E49}nY znGT?4;mEKQDfsl9<&D!T5(Okeg#P-!JZfC39>LnRmA@7&*sPjP2`;gE8QO*a$kVEq zo%o7?j$_cu$4di};HT<~001BWNkl _COI|&=$T|8Qag;lELJZYu@4n) zG=1%qdvZW0MGq~G-=4Qlxsv~=n&Rd2`*t_~ziZd3F}zTLysyZNw*@49gg1G(Ml1dr z?C>O?Z=QzdfPQB2+AaN8e>7v<%$YN}*Y=j9$KNTG>*;=h^A{*bHtxU9BYpUS^9U8B zIOw=bWSka&{z {q_ Lz3(RhYsc{k}uy^(1RcU zcK4!ray{}sV^kOpf{6(iC|h(47XcFSWq2dy5&(H5kCsd!Ok+X2-4_)|uxP~m`HM&0 zC`;B ^ zy$sN?VgaJynUG!Q0k66|Rrg({jQZE6O@Cx3x=n^!ic1E&{2bviigvYdw0`L(1se^< zBq8p?1z;dVBT;}e#if}sQop9D?bkjPuqS0# g&izlFy>Y4*G~->CoEDBCcjd{Fd5mw`)g6QJ(4yGf2)CK19kSR0!pN(`9a ze0K2I3uTKv2SV4?_BpAV?yP$7C!2OWUFG>=hdeCYws!3Z-E=!DO`=162aT&eyysgH zrMN7(_z^(Qf{TVBgXJD&J0I#A`hHSXLpK(0*jcd73&js-(ObXXVN1KeI#K1RhaMUk zqtoLe-G1@iA1;yG$?E7h0ueSoLg7q&EJ^w+{@|B$KH0sw^W+vSz{jF($G>^wnWr9` zp;*x5MUOR-@8~Uq9I1F~A{8BA6g+F#weIt(t5S7CWy+|3ZrJpD4vB8pxbhl6_~Ix0 zr=)BGNApX8Tx>JK-%>b;i|vO1O_0hGZJK 2Fn;sfsYwsretUBW#qmCHawv#s zUDi4HBDJfk=(J-$yz a~2Eq3tA9s6=uC{|$q-$m#Q8& Uk`7fC4vgZomjgPlJG)?xlzI5~ zh;A)Oq68u*$Y}M<>2umcF7wm Ni)pF-Ozg#z-C`c@HI^@(&ON4%I z)25?dhn_^+U2?EXJ~3DP!FGW?oPEIYh;Gq$&pqPdc80!uMRF1x8gi#1^-L9@GbWu3 z()ioG)Y1xBNFI zr74n-fpyvLxX*CfU2Lq}K7aTB5(#-%zLW}mzHHr&CgTV8I+9j_oa=hXGQpRZZr=M` zrRNKR<<5m=!hYzye$J~F*v5(H8D$V`wBU1q2)%dz(XpfZyd6ooXM%N1?GPe>p4u79 zB*&k!*sC#)JzGTqjMsC~Q0 DF#tim-qXH~r4r!#HvXSP9x> zn2x<~z`*_;Ua3~?$>;JsH_Hb>UXuqs$#dTP54H?p89Q_)WYvDa>rGwCok%{%*a|zq zD&_RLI}Kj*$A%5{Q1K&egly71VGjHiiVES#9gSn~!v@1o5qjQ1BIB${Z64O8si|F@ zA5SKX`PUs$Z!yT}q7t9f`W~h!2>Ds&NX(IGrlHXN5>ZKoC|ZL|0?(`CS$0`{m}_(e zgo5Ppc!lDf>l%kvHT3yogMw&YC6$ZPD;haLgT7w9X=|rim7ZUj+IzSaj$giMXPFA6 zi>}k%K6EC*4_ Rvcv%E zKoq}1rg= {svic$TE{5tM#6*`Vwyb%q>0 zAltTrgk}dsvlb*#At93t%fsjALOGXQ49aStqvIb`Iu|fV_34yI^v!#NN7o(Q #ckCww=4>(@k-H z|38ZHzG3g7$|VctUWPt&QF1})0t|QA1$0;lIiiJ7Rd^pdSnd+h7o(@lkL}*PUIfrn z^YCYqV 4Z%3=o*1N&-XNLHbZtB3c@cQM;3t2+o#+fbUrqB69frc z2*&WChM!^EBAZF1bAAF6@ebd}K#T?@giuIq%phFj5dFIU*mqq^KJ(c8RFw}>d*hkx z_+^{66)j(?#8xkqB(DJI5>rK&fXl;h6q&0XT0}?t4H_0ZtY_zcY+Gw4Tgp@oCj#iH zny$=q@L9%QhdGPPNEN=@BIS-3CjJXY=y 6Nm?XI?ItY?7Ny)BuF+nep4Z#>y6&w((A5z0mwF?D# zfmqj7UM|6PWWJUnDFR=3S%upjeY@tm2k=(5~WIL)w(TD)OfLUs%1U1*i=cofB0|gGyi_zeGQSh39}LaG9o_L zr3+EItF^{8z eDR(=viW=FCAnA(6iv8VaR70l;UVMg2hWC^s56pB6O3~ zz|wW`dFoUudrP;awCMA0vG?EY6Xzv*02WfnP=v{E$srk)MAXAgZa>OdzQBgRK0p3d z|6@mweqHnB>hHv8N`BWrghw?h0Eju LV&Vo&%vK6ZqJH{F)Seq zKzFUWEh6ho$gcH(S6!B-N3eEn<;Qso+UHcMHxcDT6sB|g6^EDuD^bO*j{`` H-z7eN#VmuS;>dk-FJ|6+l>sU_+q-NH$mjgOY86;ip_|0HdGk^#EAr*Tv4*a}^& zv4=rj^u&u#$Tm%{q8-|`Z{M)o&p!LONA=WKRp|*(8BurVOgNAkK?Zankb`}R%Jc19 zLmwwqi4qL@DSp?UJ_k0fp6mDfMSA8I{jA<|pitRD`L?U@WpY#Qj>K(jM0o=dI<}!U zdSi%3=$McY(Sd&b2FH%@_xPugW^ainAp+=GbQw>D*sD>@p5sv!6)9lbm32hQU9_zU z@)lZ$IQY1%ZG)-7Me8>etXHYrElYaoGJJTcw_COxbBE8DSB8!~j}VDqm;#wlkjFp* zl3}yyA}l9ef!(GkCysvqecuL6n#7%mj}MLhXz` j@{^}~1*inf5P38Y1wsMRHLuWy52xs{`$9XHL)IDdIsl!BDmG{k{pK4@Tf9`h z^Z?6dUR?px8@lmeL^?#4dGWe ioBN zdpvUW;?Ea*y-w_O)uD$y5=llVz=>VLMpXb5m&i#n-hm;E0rL?iY0~MvM~=5Hy>LMs z`TdtS=x5Ql!T$~$N^xb172fMZKi@R~J>2Ub;AwXqkMq${<7e0I)289^TV^u;K{pZs z^enpAXDk*QBhh?OL%%wpi--)G*DlMQDZynMw-k?kq4X{3w)RGs-j9tM^4=#ZQC=is zA?^YhQXa7aSY0BV;n*C|(ZT~@Gl`rK(eDSo{_d@Ey}B<=o|gS!-ts%^zFKuyc9~vL ziLwDUByKPw`gw(0nM@Gr;A(u_r`sUi ebg( zWBw%}Fz~i0Z#NA=plv0&rg5;o r^k>TVYfXQUdrpa2a5|ip>IFa}5#=?ki4$nw+q?wTX_b-@dEsD zHeebj7ix>P%;}N<@MqDjImlTD9NV`K=hUnPPkyB&)cW4#?^vt}#R~cIb z(6iv8VaR`kSnb;snyV0L*8+5|BM!$e%iwifThc;H)^93N=f!fVYDvlp)6}|u{|EbZ z>^VkPoa)iKBH31m`*vI;umO7!sR|}^k}o%?-74)r_{~qfdNymg;Ywm8U8gP0R;Oa| zYES;_&LOHp_j_fM&>{t9Jbzi*wh%$V=E}CYzPRSmc}p82RAIe}jEQh}vn;X`EUJHd zl=RW{&TWb6)w6Z(()+dYB@6W>hwk;rBy-0y*R{;uW02*b3}BPTtB@tkWo(1!`vYHm z-*NTcZ5uym*f7 UkM^k5$h0M06NEsElMRs|$7UkzX6M!( z+!KJM&K=uDi< H2kFUKXX$qn6=FUY$F> z3oyrK203@vxFzI3=mr)={FR56{gZLa*Szq?%aw0Q3FM{Qwmnt8c!|S4ydQWn9UB=G zmo81Y%h+(DofYo}G4~Oc`}O`n-o)=m0R1LT{`Fk?ti|d_OEd#%V8lLGOS$7365i~v z^284=Q_bZYw-=ACSaRR>TtBmY*Y6(})3QhFQPC3Bi`K{+Oh_bOG~!hv!!~$*JP-^~ zls87CVnG&k2wXXI@}Jv({D*oaay@)a5t3ZBYwP2B4SJ<$-pAV6Au8hakYbyNz8NRn z0mQ+`hA9bv3{koavKrTQM}){ol~5Al*JeSBmQA0(_=AtuY~8;zZbX_$XyJ7zP@sU; zW%#(VFFar5HDdIlSE75vG_(*h@3IiNack6u;n4R?rnMV(9$qxQXS?lbR#fx*<#D;5 zd$e}7>Tg7Osg6YSnCv*%O$nC2E;t07IG9Yvm3IrE6e204NT~b^W_rM6Cz&S2EnK;N zOtY#l{B4vtZTW_ZRV$V2n_Z>i47g>O E2f?^k&RMrZ+C3DCJDG1o;PvSf)&hYr8{{_xo0Jv;rQ zbSg62`z)>Fi2!<9XYM9{@1(&R`7Cp)*Mku8?`!B`fs{l;ves_fRjSqtC3oHATexAD z_fJ|L)vkHf_Wy4T-`7i|Lv~ctWwMPRF%=rDs464+KzbbEt3;A5vaw92?+Y;-dY7a?Vu-Rwc{nNsg8}k+eH07@MPv%#a*ah$ zE9Pb|OhO&q4l=s%%};+W?eb2~t{di0{o{Ijr5`R+##_7QYY!ADRIqxUT)8U6#CZSh zl_?tU>ky+er_LmNdE)z@;x=vCvSsz|_2;g~VzgTG=23(C4Cwa2J$JVBYIGYH-E%$7 z-6}oVrX%{uv5DcEOR?;t(k*x%2_X}n=8h@T?&IJ7+^cv0-aF%6_qr53B+}TA7Clk7 z_A5(cWO`D=-ogEnZ5uqzEGrO`6{>(9)k`**@v=q+BlDMYf$gz%-l(yEecag;Tr0~Q zvUJO?JQYh7*{}Z{pmPB#H$s;s-iJPD*x1-Xo!dtG(6h{HUmR!z(6iv8VaVqhYaUHB z9$h-u0(3-0=tI|`0wr-KE8DR5Q0YoV@@~zdw|>d67c5wy6t7;Rd3IIn=~L(d+2*2c zWSb$b84loJ5_)XG{1$rUT#Z%WN#MxWzjkc#fq&Zm$& (zei$G&RINKc^HF;)i&lZg%fSy$rJJ~oRfSv^x4MRR@ zu-4fn8UxZcx|&7G9WS!~Wx2x-o%64mObLeLY}~Q`xtDJd`_$zdca$$%rq}{aqT2u+ zjQ5+1TDEYKk}1)IdYX@uqLMXmojP#*ME@qSUB{=H)$RAo{`((!{H 2@`nbeP;7rI`->G}gzVk*7O_Q)odOyw$ zd-S;D{dWeoktJ%K-9vXN;L(E ;Px-Wd22Psybi3xSEeeI~r(!%|<^-ODu$Yi=v%!DNbjVR83gS-+ zT{v|<&~N(iA;bN*2=jf@zC%x!ERg%K5{}ThJ0N}nME)WS=$xR*9I_>ur`#Q)qhlg~ zp4Lb3rhhL2=r?@{(l&rIjJ1lgXzZnc4yjVKVqI)oLxj#vTR5oj2;Et`{j Maf^*{f9ptfYu^B#$^BlT>6X|Bm!;3RBeDkVkeDpz#lA0aA7 ziKc (-~QcYHVK`|_++pOz|Cph%PZ@6KLFCGyA&T?A;s z6Fh=&IvpEGY+6L!9Doj@8Y5Nbs%=U2@(Z@TcKPb >aNh1WySzQ;=+XoKVB7@c1Y>jF>@Sdal${`<-DwJ$fQUg@*6UOY2>Z`0m` z&lby<=QEEiNOAn<0C@^m%Uwvnx*JWTPgK!|ZV??EJ8e!ZoC7n{{Y1u+vKfc~ddlW0 z(;ao5v9}b{8R_NyShkRU<(1+vpu19|kOJj-Pzw%MP>M5GY}o!>Y^9P}x(~ff?b Q{@{Z&GtjU1;jPiAea?6K_2~ZOzwhn@(LtXk1YJTw(K(^R(1vTr z8(drm8tz Q@b`do>;xfV){ zJ)}EK_HaW3i{MzpWn?ui$wwa96H{bz-~k^$ZN}`TU79ug{zi`}qtCbP^P|rc$&>4V z%B~IQ9Om&eqG@43|9rya4`aKwX!K8zhR*0^N%!kT06pEOIDIZ~im|3qmOaS>M+$VB zyS=8!z$f=Z2cs8&&cG^WI8F~NS{I+|waR5rq|eAQ da4j)j^r zaX?3FA;6ET%K^k4zEVVvL9(ur5eo6Xb@Wnm#X4I;3m0Oi#9#h6Klkvr-^?4{vt!!X z=xMpxGG$bMjk?9VyxpQZbEt$WQ+C9;s87Rq#^V)T1Q6_jYcEO&ysY5*tm6)aRuAl1 z;hH8QbPn)DwCnT3Q_4S=r+He2cCGLD{eJcJF5Ro#8RZ!i#ppjekz@zKsqlm|&OGf8 z`W#r^V84g>hC?z=C)(2oP8;8TOuMYD`Mz=6-ls|yD|lE#+Zq@5;cODuK2>yDhzMQc zKReK8;7tNVpHH3=0rVUFOlH*iB7mMzGkt@v^QU3F8KX!OJy3(-VjSXC$zwg9))fq5 zF4860+y)dq*e25w7p;kZs9v?QCvWhOGVFAn-y2$?clXY5x=c9%F5m~K*t9rkPC`-8 z;3-6ejkcsvBA0cQ?Es_Ilc&vpvsbfv>DH#S?!b}uMe{u~!h> e zDo;g&*8nb_Bl^mC#4eo4xMl!U)3A#CrAzGCSKo~vJ$cH=IU@$Adpfk@E3e$W^pizn zREer9j6|57>}?TtNQ1;HZYX31NOembARv0>s3Zq9+#b^Lh7}V1__E5M%dH2FjVYJ^ znRe;*;B5Q$M3sDxJ~T&nD5r|9O U8r!W| zv;U^w=ptuMuNjB{dV0-lI-eIcbj=Em@hM&x&;@u12Q5UY7Zt&v6a_#BoLG{UxOCOV zC+k!z|8qJIF MH>?U=V< zd5ezqsvo;S_1Y`m-=~L;HY%1o*L1~nBoFG!AdA5R-HlGefUen`!bgrBBnho`vf}o| z!}3(+8HTX(S XO!a(4Yd`t$gLNLf z=dKqNPF#02-dtxi0SrPQN5*5tJzOo`=WK9QLY+*LaeG?sx@Izx6{yDHnUuMxO~_zu z-_b7zmM!oMwCL02)7 d~oZgL`ANzFtQ6;WmOs0(pHNIv-4+C~p+kGDk)vsfEsB zj-*N(ckbWc^o4>2e@vHwWxz?d>_42Rc>ZVi%kV?L2B3q;$Rl@BIiSm`eBk3nD`T6~ zsv6O9&wzEh(U*t-`i-8^3_2g$)>OwB?~(LW0lK4TfrYDg ?;4eo{pVj1@rf=*_@8wy4_;}TPPd>dQ zr%no)mas8(0bNA3T*`w~=$iszTo=|*C`5{;C#l*6Ly6?|cuDx2dk91`fI2KLf%ORi z+A(U}#4Ppw^#A}M07*naR5k; Y6Y z4Zt^#U|FK(??DeUlD0^zJ7gO&`E(5x1>$%qIb>QPa%6=RStr9G=irw=G%ud(;b}K& z{8u~Aucw2ZqBLbt_9zuLY~(oDe4vd{5W`@-s%jnzL0})*XOBXs1NPiO-8wWKGiJ -QWeR5W+)9Z_(pxeB239~H7mf bg7qi<$F?*E8G=+11cvI>BM(9Fi!{MYjL?O1U8cRRjnK$;u;#sQC zw07;<$}7!UH?H HK*&$4KNw${o#U5H~@DE=X}Mprb5;mNgr-bI~7<4{tH#wf2h{ zZh*bV;~@(&orvV2sAiC(>GajtC;nKybk(X^b5<-ko)#VUT)ArN?|slZf1W(~UcKku zoUy7z_k)6feFiV)xTZclCg=4Vlybytg9Ey7LgDG$pp{6vin==T5HeY)d?ZQ8+dSni zYq(d10+wU!JaDi|<-++lq+)v0bZ8g ELk$B70k5Vjq-#O~mERAzB`A-; z4G)_p8!d*?A8R*kZdCWhGHcQ^qm!l^O2L=59y(s2Xzr(XAwuUZgjYzp !H3Hf=c#s1Z~{yG^of(F z-+K10+vlfnTCe-CO~=1~xX{xNe Zc(6M#l_lx`H;xlw~ z;>`K6og3H7QXBpZp59D*xd@ b_VF%kKp-3E1NZh?vnF|@nU&u!r)ec^|b z=lXmwbmFw>L%N;MsAZnDcJ19SSFE^AHQ5t7#>tt*4(BN{nM=wDvKW?xIiZjwhZKck zxZVsC8x^sj=;aRUm@;yVuogeq`!Z=>m2cQo*Da1D3jBFCxar&PzW;d9vNbD4_&fe} zwR 8GVzz4#*JZZqU_@V_U^ zJ#yb26Fm|=?t~1<>(zPIKvmI^AVhEgZmO Uvwe*OAqHNhFA+)EeAx8v_3 zbZ`_=azH5r3x-z7;p0Axedn!4S-Pz?LmsG1c##O8XTn8E$&mgs&F(QO4OASSQTtm! z53}4IMKhMI-(ET^X}K@iyd!t%QpMK!B)Uh)cM@;&@^EQYQ)q>gObH>2R5+Xw(Wyjx zdH)WrYfVn2q-#p%{#wVBsnICs;>9yo>Lx25#UpgLWr(Mxu~eew&-YlivwO|fts{$- zEZscXBb7=tY}KPG#B(l=&7*YQZU`GvjhnU#b5vpPF3iNZ)GA13CaGQxGP3YuS=_x} zzoFy&{{PvuE#0>Fsj?*s4q!yph>;$JbGc?UID5d=p_frzbrsgc1D2fPE`IOGM2YA> zCo;eL_Qaylg|l-C<;^>vY!;)!=))vJG%UI;K$(MYJldmVcGhMq>b~|?z4*BC*DdUm z(lzt@{c@!SjVk4S {-H(qPH!0TNt|iE$kxv$zfe4^yu>}hc_hcy4 zBU)htb;-m33c@moR>Lg!u kj9bcr*OJy^qwCLr_(bn(J-oJ0( zF)C35koCDvIzY#Vb|XNyKqTn3+i341qB9{n@!#M7XkO@{d)8(2^5&`7AbQK%S#xwo zFDs@cA$^W `^Agm{T|Sklmb-mwNhNh67LqCWbBI=d^W6{A z`*!NJwRz*0pLxA@?TutRUgU2*ye=X(9I^t =X@O7p<&&>b-K%_dx=~p 3kj2)ex6XPE-DSz8G z&Evc>JqkiQQqPF~V7dWGPyT_y2SZDq46_Z~dhhoA`VGHcZSb_*SAhb}wGI0w4U|o% zW0V5pPLZePz36Vcmk{+pY^WR>v?acPzie4 (`kD52HSM%3i>j!uU84kgp z99&FI#h4AANddpXmgG<(QO`U2^&3eyK|6iHh8=~glqoh_m+48KEpfRg3@mS#xQKc> z>N zu%w_4V_N^;YfBd*KK@IiT~O-0 09^=9aw~ z#sT6yxRdmPzYr=^z%DJ-+lSt-{u_u;;WAv zP9FPJa?9CSYuApdRk31Qd`ncM#V^aS$OAiD=)bBuoeddhrp{l`xLe}}OVWESu5^AV z$-UXJd!_6?&qu1tq9w;9&Go4d&y52U7gdO@BpQzM-``Gldw0*~5pi|ugvIV|#?VuI z&3#9|Dw5|vPi$2kAuN*|(8W_2HaUQ{A7^Wz6oO6Lv8C&`#MXVGbR
Nt_pX(4M~yHXp*xBmTC^^{VBHrpe}rD3Kmjd&-@ZLgh~@Js z5|8vmz$;pyAY%_R8iL=sM06^_fb&NGyF60AR88g$KDhJX;r04;?J&v1DCSy#Zj+oa z?W2XoTi1&{bomfo>(uGa_un1V_x32Qc|t-!%NgzCE^d%q^?CIqNmUUuBb_Z>0_Xzf zlax+Lc96M2=>^a&bmhTbH^4cEVF$G3c}74)ZpZ*2Naa}tndrPtOi}3QA0fNf(_VG? z6 D z-mcd>>%1zJ0bx{^;eaQLZUWpmvLYPhz*3Yj=FJ~2UR>*~nl;l!GC0KpPvL<8^ymKb z#8%lBfbPf_mEtY|^zgIe-m9caU# {T4y(qbsJI}~Updq<0eMJkcSiiGSjTed^&g{27ZRzUjwW?NHs9KI1t;u8>qTH%^ zgheM;twint5hd8VM7uVBcC10A0?(woH0rhHuJwC^Dh})2eXeBFogV%ZCL;831dVu_ z_W(<>6>z9+i@J5D#Kpz^Pvlp*a%E4$E&E}g_2VyLsE486<=CN1fD7|T!qPNZWR9pj!6Z}F>EM>nkH;-v6F+4_k1M;s zRcqeq&e@Ybm?hgzC7&Woymip+rRUeTO`gt93>hT*qP`n4cJfR9wk>XITK6nt?R27{ z60>z|yim#kpkr>K)@Rw+#i&Bi$8vu8?DIpfmo4;Md}jZ-!T1gw|E74}ryq-#tc!py zGDPBYG3_pt;$#V8KU~r9*veJwW9wFZArhfyg@wLsj1fT3f{TVBpE84;bE@)CazsXr zyRAtMm*eD7m~>Igoy%l6cu_HuG>^Gt!`6ba6`uc}s4%ll#gc7%#=cmhP#Z4Hi9X>_ zkTkEC6S<&n#l6d56)O>4Fo+#Ea->h`!S4=Eo(8@d_w-i(_X`g0)pfCl=wG~Bi|etA z*0pdA8t86IvJ-G<%G-^)becW;>TYWAAb+Fw>lJh7dU`;Nt~{tAm?V c2WQ4Is|-I7yg8y-z^33VBKZkb2_xUMUaREY&vf%w6J-{9s~b8u zWl}V6xgj2d{QMI zp %3+J(64g>Z`Rk(+Gal=v))l8uj`^F z0G*TKt^(*596nU9weZue1?pAK+<@NP|G}N_z0>JqHE5N~?t||pSjaXh9+fQ1;y%^@ zb4_TRQ^28L1`HokWJtGeH{DdC>-(es)u(IQPrXF{xf-A&Znj8*W#`t$&6~cqZ2I(H z{_a^CKX=7##R@)M`l%-#>n=0Ouj2OIFTpVkpK!1EMV+}zZN`x)5FY{T=!@qYLPY&& zgVVTh&H0n(jD;KHHw|d;awSNB{;gMe|H-q9cW7FFZInbY07py3c?2~cIRh0kKMVjb zk)tZ>hR&Q>-QT!z@bB(9J^vkP_h#+e54<}^jxL;sAv4}EfCyayA 7O$)*~ z@__}jXP0W&u;ER!vQ6oH>^=C!Q~95H>agrEk#biADK5PF;RqcvxNuuT%N_uoDeCd{ z8@I;RsPKFQ&{I0undrC(pl71RNX<~r+NR%Q8+}nj7q+MuBXrU5BqCSa#)}Gu8BEk! z6)m(jes9s2$`;PlZEN+q^nPOOkoP{(9Lf_N_FI8T3EbZi{c+E2D1R_gREYwNHkL0` zu+skh`(5gLYG(7s9_#nd@OJLoXsc{G1$|sy<$oh|06K_kA)^0d?>oSxsIu>G#SRmQ z0tQ4>Pyt0i1OyZj1yn?32K|{`cQFS*1=N*hL0v&HAS$e);+h!6EP@~?Nf3;PfFwy0 zB+pED#rOZ8`>JOK1Q}qM>2bThZwNirRj=NA_3pd(o_o%UTl)0x{@N3dmNo^VJTqoY zv&(w)d^9r}?PwzL8${1{HR#39#2Cq-;u<73QZQ7SeBt<9s^y{rB9LL(>Cl%Se)#eA z`y `jddU7HOBr$+*AdVsF;JsMS2S8giXUO%v_{kU@F87CZF z?@Pr8ZqOqG FGr#>dr&IIhX@h=XQ>sG0l{V-V+WiN9^R13GFsdmr zT^M^c=#t|dUYpFJ2O)QB{AaUT_dKs{N*na6?itX2 AyMs$DclJ-K0@xMwc^l3umfjZ8Kj6)Gb^n=KQ*0 zXYPr0vftm=)lm{RdTig;^^QKObJfgfOW}!*s)}N=U6f6WE`#ys@ Sjot+4iO>K;EXT7F8ChV|!@=gA-uPdN#kEIxp%@woZ`0zd)v?V>r~ zFU)CoYU4ChT HBi$UOjxvG_9xN!UMfnS#`H ze28OM?|(d_QI8ATr!@1`U9pER9&-PHHzEp!nWi4b2wp}|8x4bwRn`Vor%qd+e*UFv z?!EbMAEnB0ezkPPQ;q5$b6XT?Y*5s)=DAXoM|as{mI;#eo8osITPym^-r8RKlv6Tl zRIi@(>hq)fHa`BiT%AN+O;Z%taTKV8;egp>qs{BK48*hUHwb7LudR^fSQIfrT>6Ln zHn0ewqvf?exOuVku8O_DZ%bCL`SgQHlLt@y{FC3O<>evNyhlT}e4G73?M&l_DCV3? zq#ANg`oblH!FH&3Ho)bQ_+{