From 2662025710c9a3a5ea9ca28d4b7c698746fa548f Mon Sep 17 00:00:00 2001 From: Athanasios Papadopoulos Date: Tue, 1 Aug 2023 14:59:43 +0200 Subject: [PATCH 1/6] implemented Weichert (1980) --- catalog_tools/analysis/estimate_beta.py | 154 ++++++++++++++++++ .../analysis/tests/test_estimate_beta.py | 73 ++++++++- catalog_tools/utils/simulate_distributions.py | 20 ++- 3 files changed, 241 insertions(+), 6 deletions(-) diff --git a/catalog_tools/analysis/estimate_beta.py b/catalog_tools/analysis/estimate_beta.py index 998881b..402b08a 100644 --- a/catalog_tools/analysis/estimate_beta.py +++ b/catalog_tools/analysis/estimate_beta.py @@ -2,6 +2,9 @@ """ from typing import Optional, Tuple, Union import numpy as np +import pandas as pd +from scipy.optimize import minimize +import warnings def estimate_b_tinti(magnitudes: np.ndarray, @@ -230,6 +233,157 @@ def estimate_b_laplace( error=error) +def estimate_b_weichert( + magnitudes: np.ndarray, + years: np.ndarray, + completeness_table, + mag_max, + tend: int = None, + delta_m: float = 0.1, + b_parameter: str = 'b_value' + ) -> [float, float, float, float, float]: + """ applies the Weichert (1980) algorithm for estimation of the + Gutenberg-Richter magnitude-frequency distribution parameters in + the case of unequal completeness periods for different magnitude + values. + + Source: + Weichert (1980), Estimation of the earthquake recurrence parameters + for unequal observation periods for different magnitudes, + Bulletin of the Seismological Society of America, + Vol 70, No. 4, pp. 1337-1346 + + Args: + magnitudes: vector of earthquake magnitudes + years: vector of years of occurrence of each earthquake + completeness_table: Nx2 array, where the first column + contains the leftmost edge of magnitude bins and + the second column the associated year of completeness, i.e. + the year after which all earthquakes larger than the value in + the first column are considered detected. An example is given + below: + + np.array([[ 3.95, 1980], + [ 4.95, 1920], + [ 5.95, 1810], + [ 6.95, 1520]]) + + mag_max: maximum possible magnitude + tend: last year of observation (the default is None, in which case + it is set to the latest year in years). + delta_m: magnitude resolution, the default is 0.1. + b_parameter:either 'b-value', then the corresponding value of the + Gutenberg-Richter law is returned, otherwise 'beta' + from the exponential distribution [p(M) = exp(-beta*(M-mc))] + + Returns:( + b_parameter: maximum likelihood point estimate of 'b-value' or 'beta' + std_b_parameter: standard error of b_parameter + rate_at_lmc: maximum likelihood point estimate of earthquake rate + at the lower magnitude of completeness + std_rate_at_lmc: standard error of rate_at_lmc + a_val: maximum likelihood point estimate of a-value ( = log10(rate at mag=0) ) + of Gutenberg-Richter magnitude frequency distribution + """ + assert len(magnitudes) == len(years), \ + "the magnitudes and years arrays have different lengths" + assert completeness_table.shape[1] == 2 + assert np.all(np.ediff1d(completeness_table[:, 0]) >= 0),\ + "magnitudes in completeness table not in ascending order" + assert [i-delta_m in np.arange(completeness_table[0, 0], mag_max+0.001, delta_m) + for i in np.unique(magnitudes)],\ + "magnitude bins not aligned with completeness edges" + if not np.all(magnitudes >= completeness_table[:, 0].min()): + warnings.warn("magnitudes below %.2f are not covered by the " + "completeness table and are discarded" % completeness_table[0, 0]) + assert b_parameter == 'b_value' or b_parameter == 'beta', \ + "please choose either 'b_value' or 'beta' as b_parameter" + factor = 1/np.log(10) if b_parameter == 'b_value' else 1 + tend = tend if tend else np.max(years) + + # Get the magnitudes and completeness years as separate arrays + completeness_magnitudes = completeness_table[:, 0] + completeness_years = completeness_table[:, 1] + + # Obtain the completeness start year for each value in magnitudes + insertion_indices = np.searchsorted(completeness_magnitudes, magnitudes) + cmpl_yrs = np.array( + [completeness_years[idx-1] if idx not in [0, len(completeness_years)] + else {0: -1, len(completeness_years): completeness_years[-1]}[idx] + for i, idx in enumerate(insertion_indices)] + ) + + # filter out events outside completeness window and + # get number of "complete" events in each magnitude bin + # and associated year of completeness + idxcomp = ((cmpl_yrs > 0) & (years - cmpl_yrs >= 0)) + complete_events = pd.DataFrame.groupby( + pd.DataFrame(data={ + 'mag_left_edge': np.array([i.left for i in pd.cut(magnitudes[idxcomp], + bins=np.arange(completeness_magnitudes[0], + mag_max+0.01, delta_m), right=False)] + ), + 'completeness_start': cmpl_yrs[idxcomp] + }), by=['mag_left_edge', 'completeness_start'])\ + .size()\ + .to_frame('num')\ + .reset_index() + assert np.all(complete_events.completeness_start > 0) # should be the case by design + + # minimization + beta = np.log(10) # initialization of beta + output = minimize(weichert_minimization, + beta, + args=(tend, complete_events, delta_m), + method='Nelder-Mead', + options={'maxiter': 5000, 'disp': True}, + tol=1e5 * np.finfo(float).eps) + b_parameter = output.x[0]*factor + + # compute rate at lower magnitude of completeness bin + wf = np.sum(np.exp(-output.x[0] * + (complete_events.mag_left_edge + delta_m * 0.5))) /\ + np.sum((tend-complete_events.completeness_start.values) * + np.exp(-output.x[0] * (complete_events.mag_left_edge + delta_m * 0.5))) + rate_at_lmc = complete_events.num.sum() * wf + + # compute a-value ( a_val = log10(rate at M=0) ) + a_val = np.log10(rate_at_lmc) + (output.x[0]/np.log(10)) * \ + complete_events.mag_left_edge.values[0] + + # compute uncertainty in b-parameter according to Weichert (1980) + nominator = np.sum((tend-complete_events.completeness_start.values) * + np.exp(-output.x[0]*(complete_events.mag_left_edge + delta_m * 0.5)))**2 + denominator_term1 = np.sum((tend-complete_events.completeness_start.values) * + (complete_events.mag_left_edge + delta_m * 0.5) * + np.exp(-output.x[0]*(complete_events.mag_left_edge + + delta_m * 0.5)))**2 + denominator_term2 = np.sqrt(nominator) * \ + np.sum((tend-complete_events.completeness_start.values) * + ((complete_events.mag_left_edge + delta_m * 0.5)**2) * + np.exp(-output.x[0] * (complete_events.mag_left_edge + + delta_m * 0.5)) + ) + var_beta = -(1/complete_events.num.sum())*nominator/(denominator_term1-denominator_term2) + std_b_parameter = np.sqrt(var_beta)*factor + + # compute uncertainty in rate at lower magnitude of completeness + std_rate_at_lmc = rate_at_lmc/np.sqrt(complete_events.num.sum()) + + return b_parameter, std_b_parameter, rate_at_lmc, std_rate_at_lmc, a_val + + +def weichert_minimization(beta, tend, complete_events, delta_m): + magbins = complete_events.mag_left_edge + delta_m * 0.5 + nom = np.sum((tend-complete_events.completeness_start.values) * + magbins * np.exp(-beta * magbins)) + denom = np.sum((tend-complete_events.completeness_start.values) * + np.exp(-beta * magbins)) + left = nom/denom + right = np.sum(complete_events.num.values*magbins)/complete_events.num.sum() + return np.abs(left-right) + + def shi_bolt_confidence( magnitudes: np.ndarray, b_value: Optional[float] = None, diff --git a/catalog_tools/analysis/tests/test_estimate_beta.py b/catalog_tools/analysis/tests/test_estimate_beta.py index 27c17c0..0c69e6b 100644 --- a/catalog_tools/analysis/tests/test_estimate_beta.py +++ b/catalog_tools/analysis/tests/test_estimate_beta.py @@ -12,14 +12,23 @@ estimate_b_utsu,\ estimate_b_elst,\ estimate_b_laplace,\ + estimate_b_weichert,\ differences,\ shi_bolt_confidence def simulate_magnitudes_w_offset(n: int, beta: float, mc: float, - delta_m: float) -> np.ndarray: + delta_m: float, mag_max: float = None) -> np.ndarray: """ This function simulates the magnitudes with the correct offset""" - mags = simulate_magnitudes(n, beta, mc - delta_m / 2) + mags = simulate_magnitudes(n, beta, mc - delta_m / 2, mag_max) + if delta_m > 0: + mags = bin_to_precision(mags, delta_m) + return mags + +def simulate_magnitudes_w_offset2(n: int, beta: float, mc: float, + delta_m: float, mag_max: float = None) -> np.ndarray: + """ This function simulates the magnitudes with the correct offset""" + mags = simulate_magnitudes(n, beta, mc, mag_max) if delta_m > 0: mags = bin_to_precision(mags, delta_m) return mags @@ -97,6 +106,66 @@ def test_estimate_b_laplace(n: int, b: float, mc: float, delta_m: float, b_parameter=b_parameter) assert abs(b - b_estimate) / b <= precision +@pytest.mark.parametrize( + "a_val_true,b_val_true,precision", + [(7, 1, 0.01)] +) +def test_estimate_b_weichert(a_val_true: float, + b_val_true: float, + precision: float): + + # annual expected rates: + r45 = 10**(a_val_true-b_val_true*3.95) - 10**(a_val_true-b_val_true*4.95) + r56 = 10**(a_val_true-b_val_true*4.95) - 10**(a_val_true-b_val_true*5.95) + r67 = 10**(a_val_true-b_val_true*5.95) - 10**(a_val_true-b_val_true*6.95) + r78 = 10**(a_val_true-b_val_true*6.95) - 10**(a_val_true-b_val_true*7.95) + + # assume a catalogue from year 1000 to end of 1999 with completeness as follows: + # 3.95 - 1940 / 4.95 - 1880 / 5.95 - 1500 / 6.95 - 1000 + + # sample earthquakes over 1,000 year period + n45 = np.random.poisson(r45*(2000-1940)) + mags45 = simulate_magnitudes_w_offset(n=n45, beta=np.log(10), mc=4, delta_m=0.1, mag_max=4.95) + years45 = np.random.randint(1940, 2000, n45) + + n56 = np.random.poisson(r56*(2000-1880)) + mags56 = simulate_magnitudes_w_offset(n=n56, beta=np.log(10), mc=5, delta_m=0.1, mag_max=5.95) + years56 = np.random.randint(1880, 2000, n56) + + n67 = np.random.poisson(r67*(2000-1500)) + mags67 = simulate_magnitudes_w_offset(n=n67, beta=np.log(10), mc=6, delta_m=0.1, mag_max=6.95) + years67 = np.random.randint(1500, 2000, n67) + + n78 = np.random.poisson(r78*(2000-1000)) + mags78 = simulate_magnitudes_w_offset(n=n78, beta=np.log(10), mc=7, delta_m=0.1, mag_max=7.95) + years78 = np.random.randint(1000, 2000, n78) + + # add some earthquakes in incomplete years + mags_inc = np.concatenate([ + np.random.randint(40, 50, 100) / 10, + np.random.randint(50, 60, 10) / 10, + np.random.randint(60, 70, 1) / 10 + ]) + years_inc = np.concatenate([ + np.random.randint(1000, 1940, 100), + np.random.randint(1000, 1880, 10), + np.random.randint(1000, 1500, 1) + ]) + + mags = np.concatenate([mags45,mags56,mags67,mags78, mags_inc]) + years = np.concatenate([years45, years56, years67,years78, years_inc]) + + b_val, std_b_val, rate_at_mref, std_rate_at_mref, a_val = estimate_b_weichert( + magnitudes=mags, + years=years, + completeness_table=np.array([[3.95, 1940], [4.95, 1880], [5.95, 1500], [6.95, 1000]]), + mag_max=7.95, + tend=2000, + delta_m=0.1, + b_parameter='b_value') + + assert abs(b_val_true - b_val) / b_val_true <= precision + assert abs(a_val_true - a_val) / a_val_true <= precision @pytest.mark.parametrize( "magnitudes,b_value,std_b_value,std_beta", diff --git a/catalog_tools/utils/simulate_distributions.py b/catalog_tools/utils/simulate_distributions.py index e60aa04..10c8b35 100644 --- a/catalog_tools/utils/simulate_distributions.py +++ b/catalog_tools/utils/simulate_distributions.py @@ -1,20 +1,32 @@ import numpy as np +from scipy import stats -def simulate_magnitudes(n: int, beta: float, mc: float) -> np.ndarray: +def simulate_magnitudes(n: int, beta: float, mc: float, + mag_max: float = None) -> np.ndarray: """ Generates a vector of n elements drawn from an exponential distribution exp(-beta*M) Args: n: number of sample magnitudes beta: scale factor of the exponential distribution - mc: completeness magnitude + mc: cut-off magnitude + mag_max: maximum magnitude. If it is not None, the exponential + distribution is truncated at mag_max. Returns: mags: vector of length n of magnitudes drawn from an exponential distribution """ - - mags = np.random.exponential(1 / beta, n) + mc + if mag_max: + quantile1 = stats.expon.cdf(mc, loc=0, scale=1/beta) + quantile2 = stats.expon.cdf(mag_max, loc=0, scale=1/beta) + mags = stats.expon.ppf( + np.random.uniform(quantile1, quantile2, size=n), + loc=0, + scale=1/beta, + ) + else: + mags = np.random.exponential(1 / beta, n) + mc return mags From f9322e3bceb27c23ad62f8ceefafcc40ab567f5f Mon Sep 17 00:00:00 2001 From: Athanasios Papadopoulos Date: Tue, 1 Aug 2023 17:55:40 +0200 Subject: [PATCH 2/6] formatted code according to PEP 8 --- catalog_tools/analysis/estimate_beta.py | 94 ++++++++++--------- .../analysis/tests/test_estimate_beta.py | 62 ++++++------ catalog_tools/utils/simulate_distributions.py | 6 +- 3 files changed, 89 insertions(+), 73 deletions(-) diff --git a/catalog_tools/analysis/estimate_beta.py b/catalog_tools/analysis/estimate_beta.py index 402b08a..156a19e 100644 --- a/catalog_tools/analysis/estimate_beta.py +++ b/catalog_tools/analysis/estimate_beta.py @@ -241,7 +241,7 @@ def estimate_b_weichert( tend: int = None, delta_m: float = 0.1, b_parameter: str = 'b_value' - ) -> [float, float, float, float, float]: +) -> [float, float, float, float, float]: """ applies the Weichert (1980) algorithm for estimation of the Gutenberg-Richter magnitude-frequency distribution parameters in the case of unequal completeness periods for different magnitude @@ -282,23 +282,27 @@ def estimate_b_weichert( rate_at_lmc: maximum likelihood point estimate of earthquake rate at the lower magnitude of completeness std_rate_at_lmc: standard error of rate_at_lmc - a_val: maximum likelihood point estimate of a-value ( = log10(rate at mag=0) ) - of Gutenberg-Richter magnitude frequency distribution + a_val: maximum likelihood point estimate of a-value + ( =log10(rate at mag=0) ) of Gutenberg-Richter + magnitude frequency distribution """ assert len(magnitudes) == len(years), \ "the magnitudes and years arrays have different lengths" assert completeness_table.shape[1] == 2 assert np.all(np.ediff1d(completeness_table[:, 0]) >= 0),\ "magnitudes in completeness table not in ascending order" - assert [i-delta_m in np.arange(completeness_table[0, 0], mag_max+0.001, delta_m) + assert [i - delta_m in np.arange(completeness_table[0, 0], + mag_max + 0.001, delta_m) for i in np.unique(magnitudes)],\ "magnitude bins not aligned with completeness edges" if not np.all(magnitudes >= completeness_table[:, 0].min()): - warnings.warn("magnitudes below %.2f are not covered by the " - "completeness table and are discarded" % completeness_table[0, 0]) + warnings.warn( + "magnitudes below %.2f are not covered by the " + "completeness table and are discarded" % + completeness_table[0, 0]) assert b_parameter == 'b_value' or b_parameter == 'beta', \ "please choose either 'b_value' or 'beta' as b_parameter" - factor = 1/np.log(10) if b_parameter == 'b_value' else 1 + factor = 1 / np.log(10) if b_parameter == 'b_value' else 1 tend = tend if tend else np.max(years) # Get the magnitudes and completeness years as separate arrays @@ -308,9 +312,9 @@ def estimate_b_weichert( # Obtain the completeness start year for each value in magnitudes insertion_indices = np.searchsorted(completeness_magnitudes, magnitudes) cmpl_yrs = np.array( - [completeness_years[idx-1] if idx not in [0, len(completeness_years)] - else {0: -1, len(completeness_years): completeness_years[-1]}[idx] - for i, idx in enumerate(insertion_indices)] + [completeness_years[idx - 1] if idx not in [0, len(completeness_years)] + else {0: -1, len(completeness_years): completeness_years[-1]}[idx] + for i, idx in enumerate(insertion_indices)] ) # filter out events outside completeness window and @@ -319,16 +323,17 @@ def estimate_b_weichert( idxcomp = ((cmpl_yrs > 0) & (years - cmpl_yrs >= 0)) complete_events = pd.DataFrame.groupby( pd.DataFrame(data={ - 'mag_left_edge': np.array([i.left for i in pd.cut(magnitudes[idxcomp], - bins=np.arange(completeness_magnitudes[0], - mag_max+0.01, delta_m), right=False)] - ), + 'mag_left_edge': np.array( + [i.left for i in pd.cut(magnitudes[idxcomp], + bins=np.arange(completeness_magnitudes[0], + mag_max + 0.01, delta_m), right=False)]), 'completeness_start': cmpl_yrs[idxcomp] }), by=['mag_left_edge', 'completeness_start'])\ .size()\ .to_frame('num')\ .reset_index() - assert np.all(complete_events.completeness_start > 0) # should be the case by design + assert np.all( + complete_events.completeness_start > 0) # should be the case by design # minimization beta = np.log(10) # initialization of beta @@ -338,50 +343,55 @@ def estimate_b_weichert( method='Nelder-Mead', options={'maxiter': 5000, 'disp': True}, tol=1e5 * np.finfo(float).eps) - b_parameter = output.x[0]*factor + b_parameter = output.x[0] * factor # compute rate at lower magnitude of completeness bin - wf = np.sum(np.exp(-output.x[0] * - (complete_events.mag_left_edge + delta_m * 0.5))) /\ - np.sum((tend-complete_events.completeness_start.values) * - np.exp(-output.x[0] * (complete_events.mag_left_edge + delta_m * 0.5))) + wf = np.sum(np.exp(-output.x[0] + * (complete_events.mag_left_edge + delta_m * 0.5)))\ + / np.sum((tend - complete_events.completeness_start.values) + * np.exp(-output.x[0] + * (complete_events.mag_left_edge + delta_m * 0.5))) rate_at_lmc = complete_events.num.sum() * wf # compute a-value ( a_val = log10(rate at M=0) ) - a_val = np.log10(rate_at_lmc) + (output.x[0]/np.log(10)) * \ - complete_events.mag_left_edge.values[0] + a_val = np.log10(rate_at_lmc) + \ + (output.x[0] / np.log(10)) * complete_events.mag_left_edge.values[0] # compute uncertainty in b-parameter according to Weichert (1980) - nominator = np.sum((tend-complete_events.completeness_start.values) * - np.exp(-output.x[0]*(complete_events.mag_left_edge + delta_m * 0.5)))**2 - denominator_term1 = np.sum((tend-complete_events.completeness_start.values) * - (complete_events.mag_left_edge + delta_m * 0.5) * - np.exp(-output.x[0]*(complete_events.mag_left_edge + - delta_m * 0.5)))**2 + nominator = np.sum((tend - complete_events.completeness_start.values) + * np.exp(-output.x[0] * (complete_events.mag_left_edge + + delta_m * 0.5)))**2 + denominator_term1 = np.sum( + (tend - complete_events.completeness_start.values) + * (complete_events.mag_left_edge + delta_m * 0.5) + * np.exp(- output.x[0] * (complete_events.mag_left_edge + + delta_m * 0.5)))**2 denominator_term2 = np.sqrt(nominator) * \ - np.sum((tend-complete_events.completeness_start.values) * - ((complete_events.mag_left_edge + delta_m * 0.5)**2) * - np.exp(-output.x[0] * (complete_events.mag_left_edge + - delta_m * 0.5)) + np.sum((tend - complete_events.completeness_start.values) + * ((complete_events.mag_left_edge + delta_m * 0.5)**2) + * np.exp(-output.x[0] * (complete_events.mag_left_edge + + delta_m * 0.5)) ) - var_beta = -(1/complete_events.num.sum())*nominator/(denominator_term1-denominator_term2) - std_b_parameter = np.sqrt(var_beta)*factor + var_beta = -(1 / complete_events.num.sum()) * nominator\ + / (denominator_term1 - denominator_term2) + std_b_parameter = np.sqrt(var_beta) * factor # compute uncertainty in rate at lower magnitude of completeness - std_rate_at_lmc = rate_at_lmc/np.sqrt(complete_events.num.sum()) + std_rate_at_lmc = rate_at_lmc / np.sqrt(complete_events.num.sum()) return b_parameter, std_b_parameter, rate_at_lmc, std_rate_at_lmc, a_val def weichert_minimization(beta, tend, complete_events, delta_m): magbins = complete_events.mag_left_edge + delta_m * 0.5 - nom = np.sum((tend-complete_events.completeness_start.values) * - magbins * np.exp(-beta * magbins)) - denom = np.sum((tend-complete_events.completeness_start.values) * - np.exp(-beta * magbins)) - left = nom/denom - right = np.sum(complete_events.num.values*magbins)/complete_events.num.sum() - return np.abs(left-right) + nom = np.sum((tend - complete_events.completeness_start.values + ) * magbins * np.exp(-beta * magbins)) + denom = np.sum((tend - complete_events.completeness_start.values + ) * np.exp(-beta * magbins)) + left = nom / denom + right = np.sum(complete_events.num.values * magbins)\ + / complete_events.num.sum() + return np.abs(left - right) def shi_bolt_confidence( diff --git a/catalog_tools/analysis/tests/test_estimate_beta.py b/catalog_tools/analysis/tests/test_estimate_beta.py index 0c69e6b..3bdc301 100644 --- a/catalog_tools/analysis/tests/test_estimate_beta.py +++ b/catalog_tools/analysis/tests/test_estimate_beta.py @@ -17,22 +17,15 @@ shi_bolt_confidence -def simulate_magnitudes_w_offset(n: int, beta: float, mc: float, - delta_m: float, mag_max: float = None) -> np.ndarray: +def simulate_magnitudes_w_offset( + n: int, beta: float, mc: float, + delta_m: float, mag_max: float = None) -> np.ndarray: """ This function simulates the magnitudes with the correct offset""" mags = simulate_magnitudes(n, beta, mc - delta_m / 2, mag_max) if delta_m > 0: mags = bin_to_precision(mags, delta_m) return mags -def simulate_magnitudes_w_offset2(n: int, beta: float, mc: float, - delta_m: float, mag_max: float = None) -> np.ndarray: - """ This function simulates the magnitudes with the correct offset""" - mags = simulate_magnitudes(n, beta, mc, mag_max) - if delta_m > 0: - mags = bin_to_precision(mags, delta_m) - return mags - @pytest.mark.parametrize( "n,beta,mc,delta_m,precision", @@ -106,6 +99,7 @@ def test_estimate_b_laplace(n: int, b: float, mc: float, delta_m: float, b_parameter=b_parameter) assert abs(b - b_estimate) / b <= precision + @pytest.mark.parametrize( "a_val_true,b_val_true,precision", [(7, 1, 0.01)] @@ -115,29 +109,38 @@ def test_estimate_b_weichert(a_val_true: float, precision: float): # annual expected rates: - r45 = 10**(a_val_true-b_val_true*3.95) - 10**(a_val_true-b_val_true*4.95) - r56 = 10**(a_val_true-b_val_true*4.95) - 10**(a_val_true-b_val_true*5.95) - r67 = 10**(a_val_true-b_val_true*5.95) - 10**(a_val_true-b_val_true*6.95) - r78 = 10**(a_val_true-b_val_true*6.95) - 10**(a_val_true-b_val_true*7.95) - - # assume a catalogue from year 1000 to end of 1999 with completeness as follows: + r45 = 10 ** (a_val_true - b_val_true * 3.95)\ + - 10 ** (a_val_true - b_val_true * 4.95) + r56 = 10 ** (a_val_true - b_val_true * 4.95)\ + - 10 ** (a_val_true - b_val_true * 5.95) + r67 = 10 ** (a_val_true - b_val_true * 5.95)\ + - 10 ** (a_val_true - b_val_true * 6.95) + r78 = 10 ** (a_val_true - b_val_true * 6.95)\ + - 10 ** (a_val_true - b_val_true * 7.95) + + # assume a catalogue from year 1000 to end of 1999 + # with completeness as follows: # 3.95 - 1940 / 4.95 - 1880 / 5.95 - 1500 / 6.95 - 1000 # sample earthquakes over 1,000 year period - n45 = np.random.poisson(r45*(2000-1940)) - mags45 = simulate_magnitudes_w_offset(n=n45, beta=np.log(10), mc=4, delta_m=0.1, mag_max=4.95) + n45 = np.random.poisson(r45 * (2000 - 1940)) + mags45 = simulate_magnitudes_w_offset( + n=n45, beta=np.log(10), mc=4, delta_m=0.1, mag_max=4.95) years45 = np.random.randint(1940, 2000, n45) - n56 = np.random.poisson(r56*(2000-1880)) - mags56 = simulate_magnitudes_w_offset(n=n56, beta=np.log(10), mc=5, delta_m=0.1, mag_max=5.95) + n56 = np.random.poisson(r56 * (2000 - 1880)) + mags56 = simulate_magnitudes_w_offset( + n=n56, beta=np.log(10), mc=5, delta_m=0.1, mag_max=5.95) years56 = np.random.randint(1880, 2000, n56) - n67 = np.random.poisson(r67*(2000-1500)) - mags67 = simulate_magnitudes_w_offset(n=n67, beta=np.log(10), mc=6, delta_m=0.1, mag_max=6.95) + n67 = np.random.poisson(r67 * (2000 - 1500)) + mags67 = simulate_magnitudes_w_offset( + n=n67, beta=np.log(10), mc=6, delta_m=0.1, mag_max=6.95) years67 = np.random.randint(1500, 2000, n67) - n78 = np.random.poisson(r78*(2000-1000)) - mags78 = simulate_magnitudes_w_offset(n=n78, beta=np.log(10), mc=7, delta_m=0.1, mag_max=7.95) + n78 = np.random.poisson(r78 * (2000 - 1000)) + mags78 = simulate_magnitudes_w_offset( + n=n78, beta=np.log(10), mc=7, delta_m=0.1, mag_max=7.95) years78 = np.random.randint(1000, 2000, n78) # add some earthquakes in incomplete years @@ -152,13 +155,15 @@ def test_estimate_b_weichert(a_val_true: float, np.random.randint(1000, 1500, 1) ]) - mags = np.concatenate([mags45,mags56,mags67,mags78, mags_inc]) - years = np.concatenate([years45, years56, years67,years78, years_inc]) + mags = np.concatenate([mags45, mags56, mags67, mags78, mags_inc]) + years = np.concatenate([years45, years56, years67, years78, years_inc]) - b_val, std_b_val, rate_at_mref, std_rate_at_mref, a_val = estimate_b_weichert( + b_val, std_b_val, rate_at_mref, std_rate_at_mref, a_val = \ + estimate_b_weichert( magnitudes=mags, years=years, - completeness_table=np.array([[3.95, 1940], [4.95, 1880], [5.95, 1500], [6.95, 1000]]), + completeness_table=np.array([[3.95, 1940], [4.95, 1880], + [5.95, 1500], [6.95, 1000]]), mag_max=7.95, tend=2000, delta_m=0.1, @@ -167,6 +172,7 @@ def test_estimate_b_weichert(a_val_true: float, assert abs(b_val_true - b_val) / b_val_true <= precision assert abs(a_val_true - a_val) / a_val_true <= precision + @pytest.mark.parametrize( "magnitudes,b_value,std_b_value,std_beta", [(np.array([0.20990507, 0.04077336, 0.27906596, 0.57406287, 0.64256544, diff --git a/catalog_tools/utils/simulate_distributions.py b/catalog_tools/utils/simulate_distributions.py index 10c8b35..c2696e9 100644 --- a/catalog_tools/utils/simulate_distributions.py +++ b/catalog_tools/utils/simulate_distributions.py @@ -19,12 +19,12 @@ def simulate_magnitudes(n: int, beta: float, mc: float, distribution """ if mag_max: - quantile1 = stats.expon.cdf(mc, loc=0, scale=1/beta) - quantile2 = stats.expon.cdf(mag_max, loc=0, scale=1/beta) + quantile1 = stats.expon.cdf(mc, loc=0, scale=1 / beta) + quantile2 = stats.expon.cdf(mag_max, loc=0, scale=1 / beta) mags = stats.expon.ppf( np.random.uniform(quantile1, quantile2, size=n), loc=0, - scale=1/beta, + scale=1 / beta, ) else: mags = np.random.exponential(1 / beta, n) + mc From 89a9158c8fe8db330320098082d2cf5fdac7539f Mon Sep 17 00:00:00 2001 From: thpap Date: Fri, 4 Aug 2023 20:25:22 +0200 Subject: [PATCH 3/6] minor changes --- catalog_tools/analysis/estimate_beta.py | 68 ++++++++++--------- .../analysis/tests/test_estimate_beta.py | 12 +--- 2 files changed, 40 insertions(+), 40 deletions(-) diff --git a/catalog_tools/analysis/estimate_beta.py b/catalog_tools/analysis/estimate_beta.py index 156a19e..bdff1c4 100644 --- a/catalog_tools/analysis/estimate_beta.py +++ b/catalog_tools/analysis/estimate_beta.py @@ -236,12 +236,12 @@ def estimate_b_laplace( def estimate_b_weichert( magnitudes: np.ndarray, years: np.ndarray, - completeness_table, - mag_max, - tend: int = None, + completeness_table: np.ndarray, + mag_max: Union[int, float], + last_year: Union[int, float] = None, delta_m: float = 0.1, b_parameter: str = 'b_value' -) -> [float, float, float, float, float]: +) -> Tuple[float, float, float, float, float]: """ applies the Weichert (1980) algorithm for estimation of the Gutenberg-Richter magnitude-frequency distribution parameters in the case of unequal completeness periods for different magnitude @@ -269,7 +269,7 @@ def estimate_b_weichert( [ 6.95, 1520]]) mag_max: maximum possible magnitude - tend: last year of observation (the default is None, in which case + last_year: last year of observation (the default is None, in which case it is set to the latest year in years). delta_m: magnitude resolution, the default is 0.1. b_parameter:either 'b-value', then the corresponding value of the @@ -300,10 +300,11 @@ def estimate_b_weichert( "magnitudes below %.2f are not covered by the " "completeness table and are discarded" % completeness_table[0, 0]) + assert delta_m > 0, "delta_m cannot be zero" assert b_parameter == 'b_value' or b_parameter == 'beta', \ "please choose either 'b_value' or 'beta' as b_parameter" factor = 1 / np.log(10) if b_parameter == 'b_value' else 1 - tend = tend if tend else np.max(years) + last_year = last_year if last_year else np.max(years) # Get the magnitudes and completeness years as separate arrays completeness_magnitudes = completeness_table[:, 0] @@ -337,39 +338,40 @@ def estimate_b_weichert( # minimization beta = np.log(10) # initialization of beta - output = minimize(weichert_minimization, - beta, - args=(tend, complete_events, delta_m), - method='Nelder-Mead', - options={'maxiter': 5000, 'disp': True}, - tol=1e5 * np.finfo(float).eps) - b_parameter = output.x[0] * factor + solution = minimize(_weichert_objective_function, + beta, + args=(last_year, complete_events, delta_m), + method='Nelder-Mead', + options={'maxiter': 5000, 'disp': True}, + tol=1e5 * np.finfo(float).eps) + beta = solution.x[0] + b_parameter = beta * factor # compute rate at lower magnitude of completeness bin - wf = np.sum(np.exp(-output.x[0] - * (complete_events.mag_left_edge + delta_m * 0.5)))\ - / np.sum((tend - complete_events.completeness_start.values) - * np.exp(-output.x[0] - * (complete_events.mag_left_edge + delta_m * 0.5))) - rate_at_lmc = complete_events.num.sum() * wf + weichert_multiplier = np.sum( + np.exp(-beta * (complete_events.mag_left_edge + delta_m * 0.5)))\ + / np.sum( + (last_year - complete_events.completeness_start.values) + * np.exp(-beta * (complete_events.mag_left_edge + delta_m * 0.5))) + rate_at_lmc = complete_events.num.sum() * weichert_multiplier # compute a-value ( a_val = log10(rate at M=0) ) - a_val = np.log10(rate_at_lmc) + \ - (output.x[0] / np.log(10)) * complete_events.mag_left_edge.values[0] + a_val = np.log10(rate_at_lmc)\ + + (beta / np.log(10)) * complete_events.mag_left_edge.values[0] # compute uncertainty in b-parameter according to Weichert (1980) - nominator = np.sum((tend - complete_events.completeness_start.values) - * np.exp(-output.x[0] * (complete_events.mag_left_edge - + delta_m * 0.5)))**2 + nominator = np.sum((last_year - complete_events.completeness_start.values) + * np.exp(-beta * (complete_events.mag_left_edge + + delta_m * 0.5)))**2 denominator_term1 = np.sum( - (tend - complete_events.completeness_start.values) + (last_year - complete_events.completeness_start.values) * (complete_events.mag_left_edge + delta_m * 0.5) - * np.exp(- output.x[0] * (complete_events.mag_left_edge + * np.exp(- beta * (complete_events.mag_left_edge + delta_m * 0.5)))**2 denominator_term2 = np.sqrt(nominator) * \ - np.sum((tend - complete_events.completeness_start.values) + np.sum((last_year - complete_events.completeness_start.values) * ((complete_events.mag_left_edge + delta_m * 0.5)**2) - * np.exp(-output.x[0] * (complete_events.mag_left_edge + * np.exp(-beta * (complete_events.mag_left_edge + delta_m * 0.5)) ) var_beta = -(1 / complete_events.num.sum()) * nominator\ @@ -382,11 +384,15 @@ def estimate_b_weichert( return b_parameter, std_b_parameter, rate_at_lmc, std_rate_at_lmc, a_val -def weichert_minimization(beta, tend, complete_events, delta_m): +def _weichert_objective_function(beta, last_year, complete_events, delta_m): + """ + function to be minimized for estimation of GR parameters as per + Weichert (1980). Used internally within estimate_b_weichert function. + """ magbins = complete_events.mag_left_edge + delta_m * 0.5 - nom = np.sum((tend - complete_events.completeness_start.values + nom = np.sum((last_year - complete_events.completeness_start.values ) * magbins * np.exp(-beta * magbins)) - denom = np.sum((tend - complete_events.completeness_start.values + denom = np.sum((last_year - complete_events.completeness_start.values ) * np.exp(-beta * magbins)) left = nom / denom right = np.sum(complete_events.num.values * magbins)\ diff --git a/catalog_tools/analysis/tests/test_estimate_beta.py b/catalog_tools/analysis/tests/test_estimate_beta.py index 3bdc301..122c7a5 100644 --- a/catalog_tools/analysis/tests/test_estimate_beta.py +++ b/catalog_tools/analysis/tests/test_estimate_beta.py @@ -159,15 +159,9 @@ def test_estimate_b_weichert(a_val_true: float, years = np.concatenate([years45, years56, years67, years78, years_inc]) b_val, std_b_val, rate_at_mref, std_rate_at_mref, a_val = \ - estimate_b_weichert( - magnitudes=mags, - years=years, - completeness_table=np.array([[3.95, 1940], [4.95, 1880], - [5.95, 1500], [6.95, 1000]]), - mag_max=7.95, - tend=2000, - delta_m=0.1, - b_parameter='b_value') + estimate_b_weichert(magnitudes=mags, years=years, completeness_table=np.array([[3.95, 1940], [4.95, 1880], + [5.95, 1500], [6.95, 1000]]), + mag_max=7.95, last_year=2000, delta_m=0.1, b_parameter='b_value') assert abs(b_val_true - b_val) / b_val_true <= precision assert abs(a_val_true - a_val) / a_val_true <= precision From 2b1e0ae3d64a8e503f33367a7c6fedfd7b286472 Mon Sep 17 00:00:00 2001 From: thpap Date: Fri, 4 Aug 2023 22:31:17 +0200 Subject: [PATCH 4/6] minor changes --- catalog_tools/analysis/estimate_beta.py | 28 +++++++++++-------- catalog_tools/utils/simulate_distributions.py | 3 +- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/catalog_tools/analysis/estimate_beta.py b/catalog_tools/analysis/estimate_beta.py index bdff1c4..685751e 100644 --- a/catalog_tools/analysis/estimate_beta.py +++ b/catalog_tools/analysis/estimate_beta.py @@ -307,28 +307,31 @@ def estimate_b_weichert( last_year = last_year if last_year else np.max(years) # Get the magnitudes and completeness years as separate arrays - completeness_magnitudes = completeness_table[:, 0] - completeness_years = completeness_table[:, 1] + completeness_table_magnitudes = completeness_table[:, 0] + completeness_table_years = completeness_table[:, 1] # Obtain the completeness start year for each value in magnitudes - insertion_indices = np.searchsorted(completeness_magnitudes, magnitudes) - cmpl_yrs = np.array( - [completeness_years[idx - 1] if idx not in [0, len(completeness_years)] - else {0: -1, len(completeness_years): completeness_years[-1]}[idx] + insertion_indices = np.searchsorted( + completeness_table_magnitudes, magnitudes) + completeness_starts = np.array( + [completeness_table_years[idx - 1] if + idx not in [0, len(completeness_table_years)] + else {0: -1, + len(completeness_table_years): completeness_table_years[-1]}[idx] for i, idx in enumerate(insertion_indices)] ) # filter out events outside completeness window and # get number of "complete" events in each magnitude bin # and associated year of completeness - idxcomp = ((cmpl_yrs > 0) & (years - cmpl_yrs >= 0)) + idxcomp = ((completeness_starts > 0) & (years - completeness_starts >= 0)) complete_events = pd.DataFrame.groupby( pd.DataFrame(data={ 'mag_left_edge': np.array( [i.left for i in pd.cut(magnitudes[idxcomp], - bins=np.arange(completeness_magnitudes[0], - mag_max + 0.01, delta_m), right=False)]), - 'completeness_start': cmpl_yrs[idxcomp] + bins=np.arange(completeness_table_magnitudes[0], + mag_max + 0.01, delta_m), right=False)]), + 'completeness_start': completeness_starts[idxcomp] }), by=['mag_left_edge', 'completeness_start'])\ .size()\ .to_frame('num')\ @@ -384,7 +387,10 @@ def estimate_b_weichert( return b_parameter, std_b_parameter, rate_at_lmc, std_rate_at_lmc, a_val -def _weichert_objective_function(beta, last_year, complete_events, delta_m): +def _weichert_objective_function(beta: float, + last_year: Union[float, int], + complete_events: pd.DataFrame, + delta_m: float) -> float: """ function to be minimized for estimation of GR parameters as per Weichert (1980). Used internally within estimate_b_weichert function. diff --git a/catalog_tools/utils/simulate_distributions.py b/catalog_tools/utils/simulate_distributions.py index c2696e9..c45e82c 100644 --- a/catalog_tools/utils/simulate_distributions.py +++ b/catalog_tools/utils/simulate_distributions.py @@ -1,9 +1,10 @@ import numpy as np from scipy import stats +from typing import Optional, Tuple, Union def simulate_magnitudes(n: int, beta: float, mc: float, - mag_max: float = None) -> np.ndarray: + mag_max: Union[float, None] = None) -> np.ndarray: """ Generates a vector of n elements drawn from an exponential distribution exp(-beta*M) From f1a016a743d24c1e165c16c85cc4cf0658b11e19 Mon Sep 17 00:00:00 2001 From: thpap Date: Fri, 4 Aug 2023 22:46:23 +0200 Subject: [PATCH 5/6] minor changes --- catalog_tools/analysis/estimate_beta.py | 2 +- catalog_tools/utils/simulate_distributions.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/catalog_tools/analysis/estimate_beta.py b/catalog_tools/analysis/estimate_beta.py index 685751e..7c4e387 100644 --- a/catalog_tools/analysis/estimate_beta.py +++ b/catalog_tools/analysis/estimate_beta.py @@ -238,7 +238,7 @@ def estimate_b_weichert( years: np.ndarray, completeness_table: np.ndarray, mag_max: Union[int, float], - last_year: Union[int, float] = None, + last_year: Optional[Union[int, float]] = None, delta_m: float = 0.1, b_parameter: str = 'b_value' ) -> Tuple[float, float, float, float, float]: diff --git a/catalog_tools/utils/simulate_distributions.py b/catalog_tools/utils/simulate_distributions.py index c45e82c..01f2fbd 100644 --- a/catalog_tools/utils/simulate_distributions.py +++ b/catalog_tools/utils/simulate_distributions.py @@ -4,7 +4,7 @@ def simulate_magnitudes(n: int, beta: float, mc: float, - mag_max: Union[float, None] = None) -> np.ndarray: + mag_max: Optional[float] = None) -> np.ndarray: """ Generates a vector of n elements drawn from an exponential distribution exp(-beta*M) From 077654242b51feeb4ab10be116dbef3a261cffad Mon Sep 17 00:00:00 2001 From: thpap Date: Fri, 4 Aug 2023 23:19:13 +0200 Subject: [PATCH 6/6] minor changes --- catalog_tools/analysis/estimate_beta.py | 13 +++++++++---- catalog_tools/analysis/tests/test_estimate_beta.py | 9 +++++++-- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/catalog_tools/analysis/estimate_beta.py b/catalog_tools/analysis/estimate_beta.py index 7c4e387..bca7662 100644 --- a/catalog_tools/analysis/estimate_beta.py +++ b/catalog_tools/analysis/estimate_beta.py @@ -1,6 +1,6 @@ """This module contains functions for the estimation of beta and the b-value. """ -from typing import Optional, Tuple, Union +from typing import Optional, Tuple, Union, List import numpy as np import pandas as pd from scipy.optimize import minimize @@ -235,7 +235,7 @@ def estimate_b_laplace( def estimate_b_weichert( magnitudes: np.ndarray, - years: np.ndarray, + dates: List[np.datetime64], completeness_table: np.ndarray, mag_max: Union[int, float], last_year: Optional[Union[int, float]] = None, @@ -255,7 +255,7 @@ def estimate_b_weichert( Args: magnitudes: vector of earthquake magnitudes - years: vector of years of occurrence of each earthquake + dates: list of datetime objects of occurrence of each earthquake completeness_table: Nx2 array, where the first column contains the leftmost edge of magnitude bins and the second column the associated year of completeness, i.e. @@ -286,7 +286,7 @@ def estimate_b_weichert( ( =log10(rate at mag=0) ) of Gutenberg-Richter magnitude frequency distribution """ - assert len(magnitudes) == len(years), \ + assert len(magnitudes) == len(dates), \ "the magnitudes and years arrays have different lengths" assert completeness_table.shape[1] == 2 assert np.all(np.ediff1d(completeness_table[:, 0]) >= 0),\ @@ -304,6 +304,11 @@ def estimate_b_weichert( assert b_parameter == 'b_value' or b_parameter == 'beta', \ "please choose either 'b_value' or 'beta' as b_parameter" factor = 1 / np.log(10) if b_parameter == 'b_value' else 1 + + # convert datetime to integer calendar year + years = np.array(dates).astype('datetime64[Y]').astype(int) + 1970 + + # get last year of catalogue if last_year not defined last_year = last_year if last_year else np.max(years) # Get the magnitudes and completeness years as separate arrays diff --git a/catalog_tools/analysis/tests/test_estimate_beta.py b/catalog_tools/analysis/tests/test_estimate_beta.py index 122c7a5..9c7244d 100644 --- a/catalog_tools/analysis/tests/test_estimate_beta.py +++ b/catalog_tools/analysis/tests/test_estimate_beta.py @@ -127,21 +127,25 @@ def test_estimate_b_weichert(a_val_true: float, mags45 = simulate_magnitudes_w_offset( n=n45, beta=np.log(10), mc=4, delta_m=0.1, mag_max=4.95) years45 = np.random.randint(1940, 2000, n45) + dates45 = np.array(['%d-06-15' % i for i in years45], dtype='datetime64') n56 = np.random.poisson(r56 * (2000 - 1880)) mags56 = simulate_magnitudes_w_offset( n=n56, beta=np.log(10), mc=5, delta_m=0.1, mag_max=5.95) years56 = np.random.randint(1880, 2000, n56) + dates56 = np.array(['%d-06-15' % i for i in years56], dtype='datetime64') n67 = np.random.poisson(r67 * (2000 - 1500)) mags67 = simulate_magnitudes_w_offset( n=n67, beta=np.log(10), mc=6, delta_m=0.1, mag_max=6.95) years67 = np.random.randint(1500, 2000, n67) + dates67 = np.array(['%d-06-15' % i for i in years67], dtype='datetime64') n78 = np.random.poisson(r78 * (2000 - 1000)) mags78 = simulate_magnitudes_w_offset( n=n78, beta=np.log(10), mc=7, delta_m=0.1, mag_max=7.95) years78 = np.random.randint(1000, 2000, n78) + dates78 = np.array(['%d-06-15'%i for i in years78], dtype='datetime64') # add some earthquakes in incomplete years mags_inc = np.concatenate([ @@ -154,12 +158,13 @@ def test_estimate_b_weichert(a_val_true: float, np.random.randint(1000, 1880, 10), np.random.randint(1000, 1500, 1) ]) + dates_inc = np.array(['%d-06-15' % i for i in years_inc], dtype='datetime64') mags = np.concatenate([mags45, mags56, mags67, mags78, mags_inc]) - years = np.concatenate([years45, years56, years67, years78, years_inc]) + dates = np.concatenate([dates45, dates56, dates67, dates78, dates_inc]) b_val, std_b_val, rate_at_mref, std_rate_at_mref, a_val = \ - estimate_b_weichert(magnitudes=mags, years=years, completeness_table=np.array([[3.95, 1940], [4.95, 1880], + estimate_b_weichert(magnitudes=mags, dates=dates, completeness_table=np.array([[3.95, 1940], [4.95, 1880], [5.95, 1500], [6.95, 1000]]), mag_max=7.95, last_year=2000, delta_m=0.1, b_parameter='b_value')