diff --git a/marisco/configs.py b/marisco/configs.py index df7e22c..18b5781 100644 --- a/marisco/configs.py +++ b/marisco/configs.py @@ -14,7 +14,7 @@ import os import re from functools import partial -from typing import Union +from typing import List, Dict, Callable, Tuple, Any, Optional from .inout import read_toml, write_toml import pandas as pd @@ -27,7 +27,9 @@ MARISCO_CFG_DIRNAME = '.marisco' # %% ../nbs/api/configs.ipynb 5 -def base_path(): return Path.home() / MARISCO_CFG_DIRNAME +def base_path(): + "Return the path to the `.marisco` folder under your home directory." + return Path.home() / MARISCO_CFG_DIRNAME # %% ../nbs/api/configs.ipynb 8 CONFIGS = { @@ -56,16 +58,24 @@ def base_path(): return Path.home() / MARISCO_CFG_DIRNAME } # %% ../nbs/api/configs.ipynb 12 -def cfg(): return read_toml(base_path() / CFG_FNAME) +def cfg(): + "Return the configuration as a dictionary." + return read_toml(base_path() / CFG_FNAME) # %% ../nbs/api/configs.ipynb 13 -def nuc_lut_path(): return Path(cfg()['dirs']['lut']) / NUCLIDE_LOOKUP_FNAME +def nuc_lut_path(): + "Return the path to the nuclide lookup table." + return Path(cfg()['dirs']['lut']) / NUCLIDE_LOOKUP_FNAME # %% ../nbs/api/configs.ipynb 14 -def lut_path(): return Path(cfg()['dirs']['lut']) +def lut_path(): + "Return the path to the lookup tables directory." + return Path(cfg()['dirs']['lut']) # %% ../nbs/api/configs.ipynb 15 -def cache_path(): return Path(cfg()['dirs']['cache']) +def cache_path(): + "Return the path to the cache directory." + return Path(cfg()['dirs']['cache']) # %% ../nbs/api/configs.ipynb 16 CONFIGS_CDL = { @@ -411,7 +421,7 @@ def cache_path(): return Path(cfg()['dirs']['cache']) # %% ../nbs/api/configs.ipynb 19 def cdl_cfg(): - "Return the CDL configuration as a dictionary." + "Return the CDL (Common Data Language) configuration as a dictionary." try: return read_toml(base_path() / CDL_FNAME) except FileNotFoundError: @@ -419,53 +429,61 @@ def cdl_cfg(): # %% ../nbs/api/configs.ipynb 20 def grp_names(): - "Return the group names as defined in `cdl.toml`" + "Return the group names as defined in `cdl.toml`." return [v['name'] for v in cdl_cfg()['grps'].values()] # %% ../nbs/api/configs.ipynb 21 def species_lut_path(): + "Return the path to the species lookup table." src_dir = lut_path() fname = [enum for enum in cdl_cfg()['enums'] if enum['name'] == 'species_t'][0]['fname'] return src_dir / fname # %% ../nbs/api/configs.ipynb 22 def bodyparts_lut_path(): + "Return the path to the body parts lookup table." src_dir = lut_path() fname = [enum for enum in cdl_cfg()['enums'] if enum['name'] == 'body_part_t'][0]['fname'] return src_dir / fname # %% ../nbs/api/configs.ipynb 23 def biogroup_lut_path(): + "Return the path to the biota group lookup table." src_dir = lut_path() fname = [enum for enum in cdl_cfg()['enums'] if enum['name'] == 'bio_group_t'][0]['fname'] return src_dir / fname # %% ../nbs/api/configs.ipynb 24 def sediments_lut_path(): + "Return the path to the sediment type lookup table." src_dir = lut_path() fname = [enum for enum in cdl_cfg()['enums'] if enum['name'] == 'sed_type_t'][0]['fname'] return src_dir / fname # %% ../nbs/api/configs.ipynb 25 def unit_lut_path(): + "Return the path to the unit lookup table." src_dir = lut_path() fname = [enum for enum in cdl_cfg()['enums'] if enum['name'] == 'unit_t'][0]['fname'] return src_dir / fname # %% ../nbs/api/configs.ipynb 26 def detection_limit_lut_path(): + "Return the path to the detection limit lookup table." src_dir = lut_path() fname = [enum for enum in cdl_cfg()['enums'] if enum['name'] == 'dl_t'][0]['fname'] return src_dir / fname # %% ../nbs/api/configs.ipynb 27 def filtered_lut_path(): + "Return the path to the filtered lookup table." src_dir = lut_path() fname = [enum for enum in cdl_cfg()['enums'] if enum['name'] == 'filt_t'][0]['fname'] return src_dir / fname # %% ../nbs/api/configs.ipynb 28 def area_lut_path(): + "Return the path to the area lookup table." src_dir = lut_path() fname = [enum for enum in cdl_cfg()['enums'] if enum['name'] == 'area_t'][0]['fname'] return src_dir / fname @@ -478,31 +496,34 @@ def area_lut_path(): # %% ../nbs/api/configs.ipynb 31 def name2grp( - name:str, # Name of the group - cdl:dict, # CDL configuration + name: str, # Group name + cdl: dict, # CDL configuration ): # Reverse `cdl.toml` config group dict so that group config key can be retrieve based on its name return {v['name']:k for k, v in cdl['grps'].items()}[name] # %% ../nbs/api/configs.ipynb 34 def nc_tpl_name(): + "Return the name of the MARIS NetCDF template as defined in `configs.toml`" p = base_path() return read_toml(p / 'configs.toml')['names']['nc_template'] # %% ../nbs/api/configs.ipynb 35 def nc_tpl_path(): - "Return the name of the MARIS NetCDF template as defined in `configs.toml`" + "Return the path of the MARIS NetCDF template as defined in `configs.toml`" p = base_path() return p / read_toml(p / 'configs.toml')['names']['nc_template'] # %% ../nbs/api/configs.ipynb 37 -def sanitize(s:Union[str, float] # String to sanitize - ) -> str: # Sanitized string +def sanitize( + s: str|float # String or float to sanitize + ) -> str|float: # Sanitized string or original float """ - Sanitize dictionary key to comply with NetCDF enumeration type: + Sanitize dictionary key to comply with NetCDF enumeration type: - - remove `(`, `)`, `.`, `/`, `-` - - strip the string + - Remove `(`, `)`, `.`, `/`, `-` + - Strip the string + - Return original value if it's not a string (e.g., NaN) """ if isinstance(s, str): s = re.sub(r'[().]', '', s) @@ -521,13 +542,14 @@ def try_int(x): return x # %% ../nbs/api/configs.ipynb 42 -def get_lut(src_dir: str, # Directory containing lookup tables - fname: str, # Excel file lookup table name - key: str, # Excel file column name to be used as dict keys - value: str, # Excel file column name to be used as dict values - do_sanitize: bool=True, # Sanitization required? - reverse: bool=False # Reverse lookup table (value, key) - ) -> dict: # MARIS lookup table (key, value) +def get_lut( + src_dir: str, # Directory containing lookup tables + fname: str, # Excel file lookup table name + key: str, # Excel file column name to be used as dict keys + value: str, # Excel file column name to be used as dict values + do_sanitize: bool=True, # Sanitization required? + reverse: bool=False # Reverse lookup table (value, key) + ) -> dict: # MARIS lookup table (key, value) "Convert MARIS db lookup table excel file to dictionary `{'name': id, ...}` or `{id: name, ...}` if `reverse` is True." fname = Path(src_dir) / fname df = pd.read_excel(fname, usecols=[key, value]).dropna(subset=value) @@ -543,10 +565,10 @@ def get_lut(src_dir: str, # Directory containing lookup tables # %% ../nbs/api/configs.ipynb 45 class Enums(): - "Return dictionaries of MARIS NetCDF's enumeration types" + "Return dictionaries of MARIS NetCDF's enumeration types." def __init__(self, - lut_src_dir:str, - cdl_enums:dict + lut_src_dir:str, # Directory containing lookup tables + cdl_enums:dict # CDL configuration enumeration types ): fc.store_attr() self.types = self.lookup() @@ -564,11 +586,11 @@ def lookup(self): # %% ../nbs/api/configs.ipynb 49 def get_enum_dicts( - lut_src_dir:str, - cdl_enums:dict, - **kwargs + lut_src_dir:str, # Directory containing lookup tables + cdl_enums:dict, # CDL configuration enumeration types + **kwargs # Additional arguments ): - "Return a dict of NetCDF enumeration types" + "Return a dict of NetCDF enumeration types." enum_types = {} for enum in cdl_enums: name, fname, key, value = enum.values() diff --git a/nbs/api/configs.ipynb b/nbs/api/configs.ipynb index 92d8e89..619d9f6 100644 --- a/nbs/api/configs.ipynb +++ b/nbs/api/configs.ipynb @@ -32,7 +32,7 @@ "import os\n", "import re\n", "from functools import partial\n", - "from typing import Union\n", + "from typing import List, Dict, Callable, Tuple, Any, Optional\n", "\n", "from marisco.inout import read_toml, write_toml\n", "import pandas as pd\n", @@ -69,7 +69,9 @@ "outputs": [], "source": [ "#| exports\n", - "def base_path(): return Path.home() / MARISCO_CFG_DIRNAME" + "def base_path(): \n", + " \"Return the path to the `.marisco` folder under your home directory.\"\n", + " return Path.home() / MARISCO_CFG_DIRNAME" ] }, { @@ -180,7 +182,9 @@ "outputs": [], "source": [ "#| exports\n", - "def cfg(): return read_toml(base_path() / CFG_FNAME)" + "def cfg(): \n", + " \"Return the configuration as a dictionary.\"\n", + " return read_toml(base_path() / CFG_FNAME)" ] }, { @@ -191,7 +195,9 @@ "outputs": [], "source": [ "#| exports\n", - "def nuc_lut_path(): return Path(cfg()['dirs']['lut']) / NUCLIDE_LOOKUP_FNAME" + "def nuc_lut_path(): \n", + " \"Return the path to the nuclide lookup table.\"\n", + " return Path(cfg()['dirs']['lut']) / NUCLIDE_LOOKUP_FNAME" ] }, { @@ -202,7 +208,9 @@ "outputs": [], "source": [ "#| exports\n", - "def lut_path(): return Path(cfg()['dirs']['lut'])" + "def lut_path(): \n", + " \"Return the path to the lookup tables directory.\"\n", + " return Path(cfg()['dirs']['lut'])" ] }, { @@ -213,7 +221,9 @@ "outputs": [], "source": [ "#| exports\n", - "def cache_path(): return Path(cfg()['dirs']['cache'])" + "def cache_path(): \n", + " \"Return the path to the cache directory.\"\n", + " return Path(cfg()['dirs']['cache'])" ] }, { @@ -697,7 +707,7 @@ "source": [ "#| exports\n", "def cdl_cfg():\n", - " \"Return the CDL configuration as a dictionary.\"\n", + " \"Return the CDL (Common Data Language) configuration as a dictionary.\"\n", " try:\n", " return read_toml(base_path() / CDL_FNAME)\n", " except FileNotFoundError:\n", @@ -713,7 +723,7 @@ "source": [ "#| exports\n", "def grp_names(): \n", - " \"Return the group names as defined in `cdl.toml`\"\n", + " \"Return the group names as defined in `cdl.toml`.\"\n", " return [v['name'] for v in cdl_cfg()['grps'].values()]" ] }, @@ -726,6 +736,7 @@ "source": [ "#| exports\n", "def species_lut_path():\n", + " \"Return the path to the species lookup table.\"\n", " src_dir = lut_path()\n", " fname = [enum for enum in cdl_cfg()['enums'] if enum['name'] == 'species_t'][0]['fname']\n", " return src_dir / fname" @@ -740,6 +751,7 @@ "source": [ "#| exports\n", "def bodyparts_lut_path():\n", + " \"Return the path to the body parts lookup table.\"\n", " src_dir = lut_path()\n", " fname = [enum for enum in cdl_cfg()['enums'] if enum['name'] == 'body_part_t'][0]['fname']\n", " return src_dir / fname" @@ -754,6 +766,7 @@ "source": [ "#| exports\n", "def biogroup_lut_path():\n", + " \"Return the path to the biota group lookup table.\"\n", " src_dir = lut_path()\n", " fname = [enum for enum in cdl_cfg()['enums'] if enum['name'] == 'bio_group_t'][0]['fname']\n", " return src_dir / fname" @@ -768,6 +781,7 @@ "source": [ "#| exports\n", "def sediments_lut_path():\n", + " \"Return the path to the sediment type lookup table.\"\n", " src_dir = lut_path()\n", " fname = [enum for enum in cdl_cfg()['enums'] if enum['name'] == 'sed_type_t'][0]['fname']\n", " return src_dir / fname" @@ -782,6 +796,7 @@ "source": [ "#| exports\n", "def unit_lut_path():\n", + " \"Return the path to the unit lookup table.\"\n", " src_dir = lut_path()\n", " fname = [enum for enum in cdl_cfg()['enums'] if enum['name'] == 'unit_t'][0]['fname']\n", " return src_dir / fname" @@ -796,6 +811,7 @@ "source": [ "#| exports\n", "def detection_limit_lut_path():\n", + " \"Return the path to the detection limit lookup table.\"\n", " src_dir = lut_path()\n", " fname = [enum for enum in cdl_cfg()['enums'] if enum['name'] == 'dl_t'][0]['fname']\n", " return src_dir / fname" @@ -810,6 +826,7 @@ "source": [ "#| exports\n", "def filtered_lut_path():\n", + " \"Return the path to the filtered lookup table.\"\n", " src_dir = lut_path()\n", " fname = [enum for enum in cdl_cfg()['enums'] if enum['name'] == 'filt_t'][0]['fname']\n", " return src_dir / fname" @@ -824,6 +841,7 @@ "source": [ "#| exports\n", "def area_lut_path():\n", + " \"Return the path to the area lookup table.\"\n", " src_dir = lut_path()\n", " fname = [enum for enum in cdl_cfg()['enums'] if enum['name'] == 'area_t'][0]['fname']\n", " return src_dir / fname" @@ -860,8 +878,8 @@ "source": [ "#| exports\n", "def name2grp(\n", - " name:str, # Name of the group\n", - " cdl:dict, # CDL configuration\n", + " name: str, # Group name\n", + " cdl: dict, # CDL configuration\n", " ):\n", " # Reverse `cdl.toml` config group dict so that group config key can be retrieve based on its name\n", " return {v['name']:k for k, v in cdl['grps'].items()}[name]" @@ -906,6 +924,7 @@ "source": [ "#| exports\n", "def nc_tpl_name():\n", + " \"Return the name of the MARIS NetCDF template as defined in `configs.toml`\"\n", " p = base_path()\n", " return read_toml(p / 'configs.toml')['names']['nc_template']" ] @@ -919,7 +938,7 @@ "source": [ "#| exports\n", "def nc_tpl_path():\n", - " \"Return the name of the MARIS NetCDF template as defined in `configs.toml`\"\n", + " \"Return the path of the MARIS NetCDF template as defined in `configs.toml`\"\n", " p = base_path()\n", " return p / read_toml(p / 'configs.toml')['names']['nc_template']" ] @@ -942,13 +961,15 @@ "outputs": [], "source": [ "#| exports\n", - "def sanitize(s:Union[str, float] # String to sanitize\n", - " ) -> str: # Sanitized string\n", + "def sanitize(\n", + " s: str|float # String or float to sanitize\n", + " ) -> str|float: # Sanitized string or original float\n", " \"\"\"\n", - " Sanitize dictionary key to comply with NetCDF enumeration type: \n", + " Sanitize dictionary key to comply with NetCDF enumeration type:\n", " \n", - " - remove `(`, `)`, `.`, `/`, `-` \n", - " - strip the string\n", + " - Remove `(`, `)`, `.`, `/`, `-`\n", + " - Strip the string\n", + " - Return original value if it's not a string (e.g., NaN)\n", " \"\"\"\n", " if isinstance(s, str):\n", " s = re.sub(r'[().]', '', s)\n", @@ -1012,13 +1033,14 @@ "outputs": [], "source": [ "#| exports\n", - "def get_lut(src_dir: str, # Directory containing lookup tables\n", - " fname: str, # Excel file lookup table name\n", - " key: str, # Excel file column name to be used as dict keys \n", - " value: str, # Excel file column name to be used as dict values \n", - " do_sanitize: bool=True, # Sanitization required?\n", - " reverse: bool=False # Reverse lookup table (value, key)\n", - " ) -> dict: # MARIS lookup table (key, value)\n", + "def get_lut(\n", + " src_dir: str, # Directory containing lookup tables\n", + " fname: str, # Excel file lookup table name\n", + " key: str, # Excel file column name to be used as dict keys \n", + " value: str, # Excel file column name to be used as dict values \n", + " do_sanitize: bool=True, # Sanitization required?\n", + " reverse: bool=False # Reverse lookup table (value, key)\n", + " ) -> dict: # MARIS lookup table (key, value)\n", " \"Convert MARIS db lookup table excel file to dictionary `{'name': id, ...}` or `{id: name, ...}` if `reverse` is True.\"\n", " fname = Path(src_dir) / fname\n", " df = pd.read_excel(fname, usecols=[key, value]).dropna(subset=value)\n", @@ -1088,10 +1110,10 @@ "source": [ "#| exports\n", "class Enums():\n", - " \"Return dictionaries of MARIS NetCDF's enumeration types\"\n", + " \"Return dictionaries of MARIS NetCDF's enumeration types.\"\n", " def __init__(self, \n", - " lut_src_dir:str,\n", - " cdl_enums:dict\n", + " lut_src_dir:str, # Directory containing lookup tables\n", + " cdl_enums:dict # CDL configuration enumeration types\n", " ):\n", " fc.store_attr()\n", " self.types = self.lookup()\n", @@ -1195,11 +1217,11 @@ "source": [ "#| exports\n", "def get_enum_dicts(\n", - " lut_src_dir:str,\n", - " cdl_enums:dict,\n", - " **kwargs\n", + " lut_src_dir:str, # Directory containing lookup tables\n", + " cdl_enums:dict, # CDL configuration enumeration types\n", + " **kwargs # Additional arguments\n", " ):\n", - " \"Return a dict of NetCDF enumeration types\"\n", + " \"Return a dict of NetCDF enumeration types.\"\n", " enum_types = {}\n", " for enum in cdl_enums:\n", " name, fname, key, value = enum.values()\n",