Skip to content

Commit

Permalink
Merge pull request #232 from jpreszler/issue_129_docstring_additions
Browse files Browse the repository at this point in the history
Issue 129: increase docstring coverage
  • Loading branch information
drbenvincent authored Sep 15, 2023
2 parents d7a12cb + c80d78e commit 234a0cd
Show file tree
Hide file tree
Showing 24 changed files with 1,108 additions and 75 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ jobs:
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Run doctests
run: |
pip install -e .[test]
pytest --doctest-modules causalpy/
- name: Run tests
run: |
pip install -e .[test]
Expand Down
14 changes: 14 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,20 @@ We recommend that your contribution complies with the following guidelines befor

- All public methods must have informative docstrings with sample usage when appropriate.

- Example usage in docstrings is tested via doctest, which can be run via

```bash
make doctest
```

- Doctest can also be run directly via pytest, which can be helpful to run only specific tests during development. The following commands run all doctests, only doctests in the pymc_models module, and only the doctests for the `ModelBuilder` class in pymc_models:

```bash
pytest --doctest-modules causalpy/
pytest --doctest-modules causalpy/pymc_models.py
pytest --doctest-modules causalpy/pmyc_models.py::causalpy.pymc_models.ModelBuilder
```

- To indicate a work in progress please mark the PR as `draft`. Drafts may be useful to (1) indicate you are working on something to avoid duplicated work, (2) request broad review of functionality or API, or (3) seek collaborators.

- All other tests pass when everything is rebuilt from scratch. Tests can be run with:
Expand Down
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ check_lint:
nbqa isort --check-only .
interrogate .

doctest:
pip install causalpy[test]
pytest --doctest-modules causalpy/

test:
pip install causalpy[test]
pytest
Expand Down
11 changes: 8 additions & 3 deletions causalpy/custom_exceptions.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,26 @@
"""
Custom Exceptions for CausalPy.
"""


class BadIndexException(Exception):
"""Custom exception used when we have a mismatch in types between the dataframe
index and an event, typically a treatment or intervention."""

def __init__(self, message):
def __init__(self, message: str):
self.message = message


class FormulaException(Exception):
"""Exception raised given when there is some error in a user-provided model
formula"""

def __init__(self, message):
def __init__(self, message: str):
self.message = message


class DataException(Exception):
"""Exception raised given when there is some error in user-provided dataframe"""

def __init__(self, message):
def __init__(self, message: str):
self.message = message
3 changes: 3 additions & 0 deletions causalpy/data/datasets.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
"""
Functions to load example datasets
"""
import pathlib

import pandas as pd
Expand Down
102 changes: 92 additions & 10 deletions causalpy/data/simulate_data.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
"""
Functions that generate data sets used in examples
"""
import numpy as np
import pandas as pd
from scipy.stats import dirichlet, gamma, norm, uniform
Expand All @@ -11,6 +14,18 @@
def _smoothed_gaussian_random_walk(
gaussian_random_walk_mu, gaussian_random_walk_sigma, N, lowess_kwargs
):
"""
Generates Gaussian random walk data and applies LOWESS
:param gaussian_random_walk_mu:
Mean of the random walk
:param gaussian_random_walk_sigma:
Standard deviation of the random walk
:param N:
Length of the random walk
:param lowess_kwargs:
Keyword argument dictionary passed to statsmodels lowess
"""
x = np.arange(N)
y = norm(gaussian_random_walk_mu, gaussian_random_walk_sigma).rvs(N).cumsum()
filtered = lowess(y, x, **lowess_kwargs)
Expand All @@ -26,12 +41,25 @@ def generate_synthetic_control_data(
lowess_kwargs=default_lowess_kwargs,
):
"""
Example:
>> import pathlib
>> df, weightings_true = generate_synthetic_control_data(
treatment_time=treatment_time
)
>> df.to_csv(pathlib.Path.cwd() / 'synthetic_control.csv', index=False)
Generates data for synthetic control example.
:param N:
Number fo data points
:param treatment_time:
Index where treatment begins in the generated dataframe
:param grw_mu:
Mean of Gaussian Random Walk
:param grw_sigma:
Standard deviation of Gaussian Random Walk
:lowess_kwargs:
Keyword argument dictionary passed to statsmodels lowess
Example
--------
>>> from causalpy.data.simulate_data import generate_synthetic_control_data
>>> df, weightings_true = generate_synthetic_control_data(
... treatment_time=70
... )
"""

# 1. Generate non-treated variables
Expand Down Expand Up @@ -70,6 +98,21 @@ def generate_synthetic_control_data(
def generate_time_series_data(
N=100, treatment_time=70, beta_temp=-1, beta_linear=0.5, beta_intercept=3
):
"""
Generates interrupted time series example data
:param N:
Length of the time series
:param treatment_time:
Index of when treatment begins
:param beta_temp:
The temperature coefficient
:param beta_linear:
The linear coefficient
:param beta_intercept:
The intercept
"""
x = np.arange(0, 100, 1)
df = pd.DataFrame(
{
Expand Down Expand Up @@ -99,6 +142,9 @@ def generate_time_series_data(


def generate_time_series_data_seasonal(treatment_time):
"""
Generates 10 years of monthly data with seasonality
"""
dates = pd.date_range(
start=pd.to_datetime("2010-01-01"), end=pd.to_datetime("2020-01-01"), freq="M"
)
Expand Down Expand Up @@ -146,6 +192,14 @@ def generate_time_series_data_simple(treatment_time, slope=0.0):


def generate_did():
"""
Generate Difference in Differences data
Example
--------
>>> from causalpy.data.simulate_data import generate_did
>>> df = generate_did()
"""
# true parameters
control_intercept = 1
treat_intercept_delta = 0.25
Expand All @@ -157,6 +211,7 @@ def generate_did():
def outcome(
t, control_intercept, treat_intercept_delta, trend, Δ, group, post_treatment
):
"""Compute the outcome of each unit"""
return (
control_intercept
+ (treat_intercept_delta * group)
Expand Down Expand Up @@ -191,16 +246,23 @@ def generate_regression_discontinuity_data(
N=100, true_causal_impact=0.5, true_treatment_threshold=0.0
):
"""
Example use:
>> import pathlib
>> df = generate_regression_discontinuity_data(true_treatment_threshold=0.5)
>> df.to_csv(pathlib.Path.cwd() / 'regression_discontinuity.csv', index=False)
Generate regression discontinuity example data
Example
--------
>>> import pathlib
>>> from causalpy.data.simulate_data import generate_regression_discontinuity_data
>>> df = generate_regression_discontinuity_data(true_treatment_threshold=0.5)
>>> df.to_csv(pathlib.Path.cwd() / 'regression_discontinuity.csv',
... index=False) # doctest: +SKIP
"""

def is_treated(x):
"""Check if x was treated"""
return np.greater_equal(x, true_treatment_threshold)

def impact(x):
"""Assign true_causal_impact to all treaated entries"""
y = np.zeros(len(x))
y[is_treated(x)] = true_causal_impact
return y
Expand All @@ -214,6 +276,22 @@ def impact(x):
def generate_ancova_data(
N=200, pre_treatment_means=np.array([10, 12]), treatment_effect=2, sigma=1
):
"""
Generate ANCOVA eample data
Example
--------
>>> import pathlib
>>> from causalpy.data.simulate_data import generate_ancova_data
>>> df = generate_ancova_data(
... N=200,
... pre_treatment_means=np.array([10, 12]),
... treatment_effect=2,
... sigma=1
... )
>>> df.to_csv(pathlib.Path.cwd() / 'ancova_data.csv',
... index=False) # doctest: +SKIP
"""
group = np.random.choice(2, size=N)
pre = np.random.normal(loc=pre_treatment_means[group])
post = pre + treatment_effect * group + np.random.normal(size=N) * sigma
Expand All @@ -233,6 +311,10 @@ def generate_geolift_data():
causal_impact = 0.2

def create_series(n=52, amplitude=1, length_scale=2):
"""
Returns numpy tile with generated seasonality data repeated over
multiple years
"""
return np.tile(
generate_seasonality(n=n, amplitude=amplitude, length_scale=2) + 3, n_years
)
Expand Down
21 changes: 20 additions & 1 deletion causalpy/plot_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
"""
Plotting utility functions.
"""

from typing import Any, Dict, Optional, Tuple, Union

import arviz as az
Expand All @@ -17,7 +21,22 @@ def plot_xY(
hdi_prob: float = 0.94,
label: Union[str, None] = None,
) -> Tuple[Line2D, PolyCollection]:
"""Utility function to plot HDI intervals."""
"""
Utility function to plot HDI intervals.
:param x:
Pandas datetime index or numpy array of x-axis values
:param y:
Xarray data array of y-axis data
:param ax:
Matplotlib ax object
:param plot_hdi_kwargs:
Dictionary of keyword arguments passed to ax.plot()
:param hdi_prob:
The size of the HDI, default is 0.94
:param label:
The plot label
"""

if plot_hdi_kwargs is None:
plot_hdi_kwargs = {}
Expand Down
Loading

0 comments on commit 234a0cd

Please sign in to comment.