Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Harmonic oscillation reduction #285

Closed
134 changes: 116 additions & 18 deletions cesium/features/lomb_scargle.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,19 @@


def lomb_scargle_model(
time, signal, error, sys_err=0.05, nharm=8, nfreq=3, tone_control=5.0
time,
signal,
error,
sys_err=0.05,
nharm=8,
nfreq=3,
tone_control=5.0,
opt_normalize=False,
):
"""Simultaneous fit of a sum of sinusoids by weighted least squares:
y(t) = Sum_k Ck*t^k + Sum_i Sum_j A_ij sin(2*pi*j*fi*(t-t0)+phi_j),
i=[1,nfreq], j=[1,nharm]
"""Simultaneous fit of a sum of sinusoids by weighted least squares.

y(t) = Sum_k Ck*t^k + Sum_i Sum_j A_ij sin(2*pi*j*fi*(t-t0)+phi_j),
i=[1,nfreq], j=[1,nharm]

Parameters
----------
Expand All @@ -21,11 +29,22 @@ def lomb_scargle_model(
error : array_like
Array containing measurement error values.

nharm : int
Number of harmonics to fit for each frequency.
sys_err : float, optional
Defaults to 0.05

nharm : int, optional
Number of harmonics to fit for each frequency. Defaults to 8.

nfreq : int, optional
Number of frequencies to fit. Defaults to 3.

nfreq : int
Number of frequencies to fit.
tone_control : float, optional
Defaults to 5.0

opt_normalize : boolean, optional
Normalize the timeseries before fitting? This can
help with instabilities seen at large values of `nharm`
Defaults to False.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does opt_normalize do?

We also need tests for these new flags.

Copy link
Author

@sarajamal57 sarajamal57 May 31, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The normalization resolves the numerical overflow observed for instance in issue #284

Modified/Added functions

  • lomb_scargle_model()
  • rescale_lomb_model()

Procedure

If opt_normalize==False, this refers to the original Lomb-Scargle periodogram function.

Otherwise:
Step 1 - the time-serie (TS) is normalized, and the (mean, std scale) are stored
Step 2 - Lomb-Scargle periodogram & the best fitted model are computed on the normalized (signal, error)
Step 3 - Results are scaled back to the original/raw format using the stored (mean, scale)

Step 1
In function lomb_scargle_model():

### Normalization ###
    mmean  = np.nanmean(signal)
    mscale = np.nanstd(signal)
    if opt_normalize:
        signal = (signal-mmean)/mscale
        error  = error/mscale

Step 2
The lomb-scargle (LS) periodogram is estimated on the normalized time-serie (signal, associated error).

Step 3
The best fitted Lomb-Scargle model is rescaled back.
In function lomb_scargle_model() :

current_fit = rescale_lomb_model(fit, mmean, mscale) if (opt_normalize&(i==0)) else fit 
## function rescale_lomb_model() is used to rescale the best fitted model (computed on normalized TS)

The first component (i==0) of var freq_fits requires to re-inject the scale

if (opt_normalize&(i==0)):
            model_dict['freq_fits'][-1]['resid']*=mscale


Returns
-------
Expand All @@ -36,13 +55,18 @@ def lomb_scargle_model(
fit_lomb_scargle(...)

"""

dy0 = np.sqrt(error**2 + sys_err**2)

wt = 1.0 / dy0**2
time = time.copy() - min(time) # speeds up lomb_scargle code to have min(time)==0
signal = signal.copy()

# Normalization
mmean = np.nanmean(signal)
mscale = np.nanstd(signal)
if opt_normalize:
signal = (signal - mmean) / mscale
error = error / mscale

dy0 = np.sqrt(error**2 + sys_err**2)
wt = 1.0 / dy0**2
chi0 = np.dot(signal**2, wt)

# TODO parametrize?
Expand Down Expand Up @@ -71,6 +95,7 @@ def lomb_scargle_model(
detrend_order=1,
)
model_dict["trend"] = fit["trend_coef"][1]
#
else:
fit = fit_lomb_scargle(
time,
Expand All @@ -84,22 +109,75 @@ def lomb_scargle_model(
nharm=nharm,
detrend_order=0,
)
model_dict["freq_fits"].append(fit)
signal -= fit["model"]
model_dict["freq_fits"][-1]["resid"] = signal.copy()

# remove current estim_freq for next run [**NORM]
norm_residual = signal - fit["model"]
signal = norm_residual

# store current estim_freq results, [**RESCALED for first fund freq]
current_fit = (
rescale_lomb_model(fit, mmean, mscale)
if (opt_normalize & (i == 0))
else fit
)
model_dict["freq_fits"].append(current_fit)
model_dict["freq_fits"][-1]["resid"] = norm_residual

if opt_normalize & (i == 0):
model_dict["freq_fits"][-1]["resid"] *= mscale

if i == 0:
model_dict["varrat"] = np.dot(signal**2, wt) / chi0
model_dict["varrat"] = (
np.dot(norm_residual**2, wt) / chi0
) # no need to rescale

model_dict["nfreq"] = nfreq
model_dict["nharm"] = nharm
model_dict["chi2"] = fit["chi2"]
model_dict["chi2"] = current_fit["chi2"]
model_dict["f0"] = f0
model_dict["df"] = df
model_dict["numf"] = numf

return model_dict


def rescale_lomb_model(norm_model, mmean, mscale):
"""Rescale Lomb-Scargle model if compiled on normalized data.

Parameters
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

docstring needs formatting

----------
norm_model : dict
Lomb-Scargle model computed on normalized data

mmean : float64
mean of input_signal

mscale : float64
standard-deviation of input_signal

Returns
-------
rescaled_model: dict
acrellin marked this conversation as resolved.
Show resolved Hide resolved
rescaled Lomb-Scargle model (in adequacy with the input_signal)

"""
rescaled_model = norm_model.copy()
# unchanged : 'psd', 'chi0', 'freq', 'chi2', 'trace', 'nu0', 'nu', 'npars',
# 'rel_phase', 'rel_phase_error', 'time0', 'signif'
rescaled_model["s0"] = rescaled_model["s0"] / mscale**2
rescaled_model["lambda"] = rescaled_model["lambda"] / mscale**2
rescaled_model["model"] = rescaled_model["model"] * mscale + mmean
rescaled_model["model_error"] = rescaled_model["model_error"] * mscale
rescaled_model["trend"] = rescaled_model["trend"] * mscale + mmean
rescaled_model["trend_error"] = rescaled_model["trend_error"] * mscale
rescaled_model["trend_coef"] = rescaled_model["trend_coef"] * mscale + mmean
rescaled_model["amplitude"] = rescaled_model["amplitude"] * mscale
rescaled_model["amplitude_error"] = rescaled_model["amplitude_error"] * mscale
rescaled_model["y_offset"] = rescaled_model["y_offset"] * mscale

return rescaled_model


def lprob2sigma(lprob):
"""Translate a log_e(probability) to units of Gaussian sigmas."""
if lprob > -36.0:
Expand Down Expand Up @@ -131,7 +209,8 @@ def fit_lomb_scargle(
lomb_scargle_model in order to produce a fit with multiple distinct
frequencies.

Inputs:
Parameters
----------
time : array_like
Array containing time values.

Expand Down Expand Up @@ -304,6 +383,12 @@ def fit_lomb_scargle(
out_dict["nu"] = ntime - npars
out_dict["npars"] = npars

allfreqs = [
f0 + df * ii + (ifreq / freq_zoom - 1 / 2.0) * df for ii in range(len(psd))
]
out_dict["freqs_vector"] = np.asarray(allfreqs)
out_dict["psd_vector"] = psd

A0, B0 = soln[0:nharm], soln[nharm:]
hat_hat /= np.outer(
np.hstack(((1.0 + ii) ** 2, (1.0 + ii) ** 2)),
Expand Down Expand Up @@ -439,3 +524,16 @@ def get_lomb_trend(lomb_model):
def get_lomb_y_offset(lomb_model):
"""Get the y-intercept of a fitted Lomb-Scargle model."""
return lomb_model["freq_fits"][0]["y_offset"]


def get_lomb_psd(lomb_model):
"""
Get the power spectrum of a fitted Lomb-Scargle model per fitted frequency.
"""
dict_psd = {}
for i in range(lomb_model["nfreq"]):
dict_psd[i] = {
"freqs_vector": lomb_model["freq_fits"][i]["freqs_vector"],
"psd_vector": lomb_model["freq_fits"][i]["psd_vector"],
}
return dict_psd
35 changes: 35 additions & 0 deletions cesium/features/tests/test_lomb_scargle_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,41 @@ def test_lomb_scargle_regular_multi_freq():
npt.assert_array_less(10.0, all_lomb["freq1_signif"])


def test_lomb_scargle_irregular_multi_freq_opt_normalize():
"""Use the opt_normalize parameter on irregularly sampled data
and make sure that we still get back the frequencies that
we expect.
"""
frequencies = WAVE_FREQS
amplitudes = np.zeros((len(frequencies), 4))
amplitudes[:, 0] = [4, 2, 1]
phase = 0.1
times, values, errors = irregular_periodic(frequencies, amplitudes, phase)

sys_err = 0.15
nfreq = len(frequencies)
tone_control = 20.0
for nharm in range(1, 21):
model_cesium = lomb_scargle.lomb_scargle_model(
times - min(times),
values,
errors,
sys_err=sys_err,
nharm=nharm,
nfreq=nfreq,
tone_control=tone_control,
opt_normalize=True,
)
for i, frequency in enumerate(frequencies):
npt.assert_allclose(
frequency, model_cesium["freq_fits"][i]["freq"], rtol=1e-1
)
# check to see if the power spectrum is returned
assert len(model_cesium["freq_fits"][i]["freqs_vector"]) == len(
model_cesium["freq_fits"][i]["psd_vector"]
)


def test_lomb_scargle_irregular_multi_freq():
"""Test Lomb-Scargle model features on irregularly-sampled periodic data
with multiple frequencies, each with a single harmonic. More difficult than
Expand Down