Skip to content

Commit

Permalink
feat: adapt and add tests, improve Catalog.to_quakeml functionality a…
Browse files Browse the repository at this point in the history
…nd do some connected refactoring
  • Loading branch information
schmidni committed Sep 28, 2023
1 parent 6932299 commit adbd44c
Show file tree
Hide file tree
Showing 6 changed files with 153 additions and 28 deletions.
4 changes: 2 additions & 2 deletions catalog_tools/analysis/tests/test_estimate_beta.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def test_estimate_beta_tinti(n: int, beta: float, mc: float, delta_m: float,
precision: float):
mags = simulate_magnitudes_w_offset(n, beta, mc, delta_m)
beta_estimate = estimate_beta_tinti(mags, mc, delta_m)
print((beta_estimate - beta) / beta)

assert abs(beta - beta_estimate) / beta <= precision


Expand All @@ -46,7 +46,7 @@ def test_estimate_b_tinti(n: int, b: float, mc: float, delta_m: float,
b_parameter: str, precision: float):
mags = simulate_magnitudes_w_offset(n, b, mc, delta_m)
b_estimate = estimate_b_tinti(mags, mc, delta_m, b_parameter=b_parameter)
print((b_estimate - b) / b)

assert abs(b - b_estimate) / b <= precision


Expand Down
10 changes: 2 additions & 8 deletions catalog_tools/io/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,16 +91,10 @@ def get_events(self, start_time: datetime | None = None,
df = Catalog.from_dict(catalog)

if not include_uncertainty:
rgx = "(_uncertainty|_lowerUncertainty|" \
"_upperUncertainty|_confidenceLevel)$"

cols = df.filter(regex=rgx).columns
df = df.drop(columns=cols)
df = df.drop_uncertainties()

if not include_ids:
rgx = "(eventid|originid|magnitudeid)$"
cols = df.filter(regex=rgx).columns
df = df.drop(columns=cols)
df = df.drop_ids()

df['magnitude'] = pd.to_numeric(df['magnitude'])
df['latitude'] = pd.to_numeric(df['latitude'])
Expand Down
2 changes: 1 addition & 1 deletion catalog_tools/io/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,4 +84,4 @@ def test_download_catalog():
min_magnitude=min_mag,
)

assert len(cat.columns) == 22
assert len(cat.columns) == 21
111 changes: 95 additions & 16 deletions catalog_tools/seismicity/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,9 @@ def __init__(self, data=None, *args, name=None, **kwargs):
self.name = name

@classmethod
def from_quakeml(cls, quakeml: str) -> Catalog:
def from_quakeml(cls, quakeml: str,
include_uncertainties: bool = False,
include_ids: bool = False) -> Catalog:
"""
Create a Catalog from a QuakeML file.
Expand All @@ -68,8 +70,36 @@ def from_quakeml(cls, quakeml: str) -> Catalog:
Catalog
"""
catalog = parse_quakeml_file(quakeml)
df = cls.from_dict(catalog)

return cls.from_dict(catalog)
if not include_uncertainties:
df = df.drop_uncertainties()
if not include_ids:
df = df.drop_ids()

return df

def drop_uncertainties(self):
"""
Drop uncertainty columns from the catalog.
"""

rgx = "(_uncertainty|_lowerUncertainty|" \
"_upperUncertainty|_confidenceLevel)$"

cols = self.filter(regex=rgx).columns
df = self.drop(columns=cols)
return df

def drop_ids(self):
"""
Drop event, origin, and magnitude IDs from the catalog.
"""

rgx = "(eventid|originid|magnitudeid)$"
cols = self.filter(regex=rgx).columns
df = self.drop(columns=cols)
return df

@property
def _constructor(self):
Expand Down Expand Up @@ -119,26 +149,72 @@ def bin_magnitudes(
if not inplace:
return df

@require_cols(require=_required_cols)
def to_quakeml(self, agencyID=' ', author=' ') -> str:
df = self.copy()
if 'eventid' not in df.columns:
df['eventid'] = uuid.uuid4()
if 'originid' not in df.columns:
df['originid'] = uuid.uuid4()
if 'magnitudeid' not in df.columns:
df['magnitudeid'] = uuid.uuid4()
def _secondary_magnitudekeys(self) -> list[str]:
"""
Get a list of secondary magnitude keys in the catalog.
This will always include also the preferred magnitude type.
"""

vals = ['_uncertainty',
'_lowerUncertainty',
'_upperUncertainty',
'_confidenceLevel']
'_confidenceLevel',
'_type']

secondary_mags = [mag for mag in df.columns if
secondary_mags = [mag for mag in self.columns if
'magnitude_' in mag
and not mag == 'magnitude_type'
and not any(['magnitude' + val
in mag for val in vals])]
return secondary_mags

def _create_ids(self):
"""
Create missing event, origin, and magnitude IDs for the catalog.
Will fill in missing IDs with UUIDs.
"""

if 'eventid' not in self.columns:
self['eventid'] = uuid.uuid4()
if 'originid' not in self.columns:
self['originid'] = uuid.uuid4()
if 'magnitudeid' not in self.columns:
self['magnitudeid'] = uuid.uuid4()

mag_types = set([mag.split('_')[1]
for mag in self._secondary_magnitudekeys()])

for mag_type in mag_types:
if f'magnitude_{mag_type}_magnitudeid' not in self.columns:
self[f'magnitude_{mag_type}_magnitudeid'] = \
self.apply(
lambda x: uuid.uuid4()
if not mag_type == x['magnitude_type']
else x['magnitudeid'], axis=1)

return self

@require_cols(require=_required_cols)
def to_quakeml(self, agencyID=' ', author=' ') -> str:
"""
Convert the catalog to QuakeML format.
Args:
agencyID : str, optional
Agency ID.
author : str, optional
Author of the catalog.
Returns:
str
The catalog in QuakeML format.
"""

df = self.copy()
df = df._create_ids()

secondary_mags = self._secondary_magnitudekeys()

data = dict(events=df.to_dict(orient='records'),
agencyID=agencyID, author=author)
Expand Down Expand Up @@ -185,5 +261,8 @@ class ForecastCatalog(Catalog):
_required_cols = REQUIRED_COLS_CATALOG + ['catalog_id']

@require_cols(require=_required_cols)
def to_quakeml(self) -> str:
raise NotImplementedError
def to_quakeml(self, agencyID=' ', author=' ') -> str:
catalogs = []
for _, group in self.groupby('catalog_id'):
catalogs.append(Catalog(group).to_quakeml(agencyID, author))
return catalogs
52 changes: 52 additions & 0 deletions catalog_tools/seismicity/tests/test_catalog.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import re
import uuid

import pandas as pd

Expand Down Expand Up @@ -94,8 +95,59 @@ def test_catalog_bin():
def test_to_quakeml():
xml_file = os.path.join(PATH_RESOURCES, 'quakeml_data.xml')

catalog = Catalog.from_quakeml(
xml_file, include_uncertainties=True, include_ids=True)
catalog_xml = catalog.to_quakeml(agencyID='SED', author='catalog-tools')
catalog_xml = re.sub(r"[\n\t\s]*", "", catalog_xml)

with open(xml_file, 'r') as file:
xml = file.read()
xml = re.sub(r"[\n\t\s]*", "", xml)

assert catalog_xml == xml


def test_to_quakeml_without():
xml_file = os.path.join(PATH_RESOURCES, 'quakeml_data.xml')

catalog = Catalog.from_quakeml(xml_file)

rgx = "(eventid|originid|magnitudeid)$"
cols = catalog.filter(regex=rgx).columns
assert len(cols) == 0

rgx = "(_uncertainty|_lowerUncertainty|" \
"_upperUncertainty|_confidenceLevel)$"
cols = catalog.filter(regex=rgx).columns
assert len(cols) == 0

catalog = catalog._create_ids()
event = catalog.iloc[0]
assert uuid.UUID(str(event['magnitudeid']))
assert uuid.UUID(str(event['originid']))
assert uuid.UUID(str(event['eventid']))
assert event['magnitudeid'] == event['magnitude_MLhc_magnitudeid']
assert event['magnitudeid'] != event['magnitude_MLv_magnitudeid']
assert uuid.UUID(str(event['magnitude_MLv_magnitudeid']))


def test_to_quakeml_forecast():
xml_file = os.path.join(PATH_RESOURCES, 'quakeml_data.xml')

catalog = Catalog.from_quakeml(
xml_file, include_uncertainties=True, include_ids=True)
catalog2 = catalog.copy()

catalog['catalog_id'] = 1
catalog2['catalog_id'] = 2

catalog = pd.concat([catalog, catalog2]).reset_index(drop=True)

catalog_xml = catalog.to_quakeml(agencyID='SED', author='catalog-tools')

assert len(catalog_xml) == 2

catalog_xml = catalog_xml[0]
catalog_xml = re.sub(r"[\n\t\s]*", "", catalog_xml)

with open(xml_file, 'r') as file:
Expand Down
2 changes: 1 addition & 1 deletion catalog_tools/utils/tests/test_coordinate_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def test_bedretto_transformation(self):
MOCK_INJ_EASTING,
MOCK_INJ_NORTHING,
altitude=MOCK_INJ_ALTITUDE_LOCAL)
print(lon, lat, altitude)

self.assertAlmostEqual(lat, MOCK_INJ_LAT)
self.assertAlmostEqual(lon, MOCK_INJ_LON)
self.assertAlmostEqual(altitude, MOCK_INJ_ALTITUDE)

0 comments on commit adbd44c

Please sign in to comment.