Skip to content

Commit

Permalink
docs: add some docstrings, update to python3.10 typehints
Browse files Browse the repository at this point in the history
  • Loading branch information
schmidni committed Aug 16, 2023
1 parent f7b4c3c commit 29c9204
Show file tree
Hide file tree
Showing 7 changed files with 508 additions and 26 deletions.
34 changes: 30 additions & 4 deletions catalog_tools/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
'time', 'magnitude', 'magnitude_type']


def _check_required_cols(df, required_cols=REQUIRED_COLS):
def _check_required_cols(df: pd.DataFrame,
required_cols: list[str] = REQUIRED_COLS):
if not set(required_cols).issubset(set(df.columns)):
return False
return True
Expand Down Expand Up @@ -44,7 +45,22 @@ def wrapper_require(self, *args, **kwargs):


class Catalog(pd.DataFrame):

"""
A subclass of pandas DataFrame that represents a catalog of items.
Args:
data : array-like, Iterable, dict, or DataFrame, optional
Data to initialize the catalog with.
name : str, optional
Name of the catalog.
*args, **kwargs : optional
Additional arguments and keyword arguments to pass to pandas
DataFrame constructor.
Notes:
The Catalog class is a subclass of pandas DataFrame, and inherits
all of its methods and attributes.
"""
_metadata = ['name']

def __init__(self, data=None, *args, name=None, **kwargs):
Expand All @@ -57,8 +73,18 @@ def _constructor(self):
return _catalog_constructor_with_fallback

@require_cols
def strip(self, inplace=False):
"""Remove all columns except the required ones.
def strip(self, inplace: bool = False):
"""
Remove all columns except the required ones.
Args:
inplace : bool, optional
If True, do operation inplace.
Returns:
Catalog or None
If inplace is True, returns None. Otherwise, returns a new
Catalog with the stripped columns.
"""
df = self.drop(columns=set(self.columns).difference(set(REQUIRED_COLS)),
inplace=inplace)
Expand Down
Empty file added catalog_tools/io/__init__.py
Empty file.
37 changes: 19 additions & 18 deletions catalog_tools/io/client.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from datetime import datetime
from typing import Optional
from xml.sax import handler, make_parser

import pandas as pd
Expand All @@ -10,24 +9,24 @@


class FDSNWSEventClient():
def __init__(self, url: str):
"""
Args:
url: base url of the FDSNWS event service
(eg. 'https://earthquake.usgs.gov/fdsnws/event/1/query')
"""
"""
Args:
url: base url of the FDSNWS event service
(eg. 'https://earthquake.usgs.gov/fdsnws/event/1/query')
"""

def __init__(self, url: str):
self.url = url

def get_events(self, start_time: Optional[datetime] = None,
end_time: Optional[datetime] = None,
min_latitude: Optional[float] = None,
max_latitude: Optional[float] = None,
min_longitude: Optional[float] = None,
max_longitude: Optional[float] = None,
min_magnitude: Optional[float] = None,
include_all_magnitudes: Optional[bool] = None,
event_type: Optional[str] = None,
def get_events(self, start_time: datetime | None = None,
end_time: datetime | None = None,
min_latitude: float | None = None,
max_latitude: float | None = None,
min_longitude: float | None = None,
max_longitude: float | None = None,
min_magnitude: float | None = None,
include_all_magnitudes: bool | None = None,
event_type: str | None = None,
delta_m: float = 0.1,
include_uncertainty: bool = False) -> pd.DataFrame:
"""Downloads an earthquake catalog based on a URL.
Expand All @@ -45,9 +44,10 @@ def get_events(self, start_time: Optional[datetime] = None,
event_type: type of event to download.
delta_m: magnitude bin size. if >0, then events of
magnitude >= (min_magnitude - delta_m/2) will be downloaded.
include_uncertainty: whether to include uncertainty columns.
Returns:
The catalog as a pandas DataFrame.
The catalog as a Catalog Object.
"""
request_url = self.url + '?'
Expand Down Expand Up @@ -75,6 +75,7 @@ def get_events(self, start_time: Optional[datetime] = None,
request_url += f'&eventtype={event_type}'

catalog = []

parser = make_parser()
parser.setFeature(handler.feature_namespaces, False)
parser.setContentHandler(QuakeMLHandler(
Expand All @@ -89,7 +90,7 @@ def get_events(self, start_time: Optional[datetime] = None,
if not include_uncertainty:
rgx = "(_uncertainty|_lowerUncertainty|" \
"_upperUncertainty|_confidenceLevel)$"
# df = df.filter(regex=rgx)

cols = df.filter(regex=rgx).columns
df = df.drop(columns=cols)

Expand Down
37 changes: 33 additions & 4 deletions catalog_tools/io/parser.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import xml.sax
from datetime import datetime
from typing import Union


def get_realvalue(key: str, value: str) -> dict:
Expand Down Expand Up @@ -41,7 +40,7 @@ def get_realvalue(key: str, value: str) -> dict:
}


def get_preferred_magnitude(magnitudes: list, id: Union[str, None]) \
def get_preferred_magnitude(magnitudes: list, id: str | None) \
-> tuple[dict, list]:
preferred = next((m for m in magnitudes if id
== m['magnitudepublicID']), DUMMY_MAGNITUDE)
Expand Down Expand Up @@ -119,6 +118,25 @@ def extract_secondary_magnitudes(magnitudes: list) -> dict:

def parse_to_dict(event: dict, origins: list, magnitudes: list,
includeallmagnitudes: bool = True) -> dict:
"""
Parse earthquake event information dictionaries as produced by the
QuakeMLHandler and return a dictionary of event parameters.
Args:
event : dict
A dictionary representing the earthquake event.
origins : list
A list of dictionaries representing the earthquake origins.
magnitudes : list
A list of dictionaries representing the earthquake magnitudes.
includeallmagnitudes : bool, optional
If True, include all magnitudes in the output dictionary.
Otherwise, only include the preferred magnitude.
Returns:
dict
A dictionary of earthquake event parameters.
"""
preferred_origin = \
get_preferred_origin(origins,
event.get('preferredOriginID', None))
Expand All @@ -143,8 +161,19 @@ def parse_to_dict(event: dict, origins: list, magnitudes: list,

class QuakeMLHandler(xml.sax.ContentHandler):
"""
Custom ContentHandler class that extends ContenHandler to
stream parse QuakeML files.
A SAX ContentHandler that is used to parse QuakeML files and extract
earthquake event information.
Args:
catalog : Catalog
A Catalog object to store the extracted earthquake events.
includeallmagnitudes : bool, optional
If True, include all magnitudes in the catalog. Otherwise,
only include the preferred magnitude.
Notes:
This class is a SAX ContentHandler, and is used in conjunction
with an xml.sax parser to extract earthquake event information
from QuakeML files.
"""

def __init__(self, catalog, includeallmagnitudes=True):
Expand Down
Loading

0 comments on commit 29c9204

Please sign in to comment.