Skip to content

Commit

Permalink
Merge pull request #87 from pymc-labs/quickstart-fixes-and-general-im…
Browse files Browse the repository at this point in the history
…provement

Quickstart fixes and general improvement
  • Loading branch information
drbenvincent authored Nov 23, 2022
2 parents cce4909 + 960c8f2 commit 4a23838
Show file tree
Hide file tree
Showing 17 changed files with 302 additions and 249 deletions.
23 changes: 22 additions & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,31 @@ If there are autodoc issues/errors in remote builds of the docs, we need to add

## New releases [work in progress]

### Test release to `test.pypi.org` (manual)

1. Bump the release version in `causalpy/version.py`. This is automatically read by `setup.py` and `docs/config.py`.
2. Update on test.pypi.org. _Note that this requires username and password for test.pypi.org_. In the root directory type the following:
```bash
rm -rf dist
python setup.py sdist
twine upload --repository testpypi dist/*
```
3. At this point the updated build is available on test.pypi.org. We can test that this is working as expected by installing (into a test environment) from test.pypi.org with

```bash
conda create -n causalpy-test python
conda activate causalpy-test
python3 -m pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ causalpy
```

4. Now load a python or ipython session and follow the quickstart instructions to confirm things work.

### Actual release to `pypi.org` (manual)

1. If not done in the previous step, bump the release version in `causalpy/version.py`. This is automatically read by `setup.py` and `docs/config.py`.
2. Update on pypi.org. In the root directory:
- `python setup.py sdist`
- update to pypi.org with `twine upload dist/*`
- update to pypi.org with `twine upload dist/*` Note that this requires username and password for pypi.org.
3. Readthedocs:
- Docs should be built remotely every time there is a pull request
- See here https://docs.readthedocs.io/en/stable/tutorial/#versioning-documentation for versioning the docs
12 changes: 4 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,27 +37,23 @@ pip install git+https://github.com/pymc-labs/CausalPy.git
## Quickstart

```python
from causalpy.pymc_experiments import RegressionDiscontinuity
from causalpy.pymc_models import LinearRegression
import pandas as pd
import pathlib
import causalpy as cp


# Import and process data
rd_data_path = pathlib.Path.cwd().parents[1] / "causalpy" / "data" / "drinking.csv"
df = (
pd.read_csv(rd_data_path)[["agecell", "all", "mva", "suicide"]]
cp.load_data("drinking")
.rename(columns={"agecell": "age"})
.assign(treated=lambda df_: df_.age > 21)
.dropna(axis=0)
)

# Run the analysis
result = RegressionDiscontinuity(
result = cp.pymc_experiments.RegressionDiscontinuity(
df,
formula="all ~ 1 + age + treated",
running_variable_name="age",
prediction_model=LinearRegression(),
prediction_model=cp.pymc_models.LinearRegression(),
treatment_threshold=21,
)

Expand Down
6 changes: 6 additions & 0 deletions causalpy/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import causalpy.pymc_experiments
import causalpy.pymc_models
import causalpy.skl_experiments
import causalpy.skl_models

from .data import load_data
4 changes: 4 additions & 0 deletions causalpy/data/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
"""Code for loading datasets."""
from .datasets import load_data

__all__ = ["load_data"]
33 changes: 33 additions & 0 deletions causalpy/data/datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import os
import pathlib

import pandas as pd

import causalpy as cp

DATASETS = {
"banks": {"filename": "banks.csv"},
"did": {"filename": "did.csv"},
"drinking": {"filename": "drinking.csv"},
"its": {"filename": "its.csv"},
"its simple": {"filename": "its_simple.csv"},
"rd": {"filename": "regression_discontinuity.csv"},
"sc": {"filename": "synthetic_control.csv"},
}


def get_data_home():
"""Return the path of the data directory"""
return pathlib.Path(cp.__file__).parents[1] / "causalpy" / "data"


def load_data(dataset: str = None):

if dataset in DATASETS:

data_dir = get_data_home()
datafile = DATASETS[dataset]
file_path = data_dir / datafile["filename"]
return pd.read_csv(file_path)
else:
raise ValueError(f"Dataset {dataset} not found!")
2 changes: 1 addition & 1 deletion causalpy/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.0.2"
__version__ = "0.0.3"
12 changes: 4 additions & 8 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,27 +29,23 @@ Quickstart

.. code-block:: python
from causalpy.pymc_experiments import RegressionDiscontinuity
from causalpy.pymc_models import LinearRegression
import pandas as pd
import pathlib
import causalpy as cp
# Import and process data
rd_data_path = pathlib.Path.cwd().parents[1] / "causalpy" / "data" / "drinking.csv"
df = (
pd.read_csv(rd_data_path)[["agecell", "all", "mva", "suicide"]]
cp.load_data("drinking")
.rename(columns={"agecell": "age"})
.assign(treated=lambda df_: df_.age > 21)
.dropna(axis=0)
)
# Run the analysis
result = RegressionDiscontinuity(
result = cp.pymc_experiments.RegressionDiscontinuity(
df,
formula="all ~ 1 + age + treated",
running_variable_name="age",
prediction_model=LinearRegression(),
prediction_model=cp.pymc_models.LinearRegression(),
treatment_threshold=21,
)
Expand Down
28 changes: 11 additions & 17 deletions docs/notebooks/did_pymc.ipynb

Large diffs are not rendered by default.

50 changes: 30 additions & 20 deletions docs/notebooks/did_pymc_banks.ipynb

Large diffs are not rendered by default.

31 changes: 3 additions & 28 deletions docs/notebooks/did_skl.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import pathlib\n",
"import causalpy as cp\n",
"import arviz as az"
]
},
Expand All @@ -27,28 +26,13 @@
"az.style.use(\"arviz-darkgrid\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load data"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"did_data_path = pathlib.Path.cwd().parents[1] / \"causalpy\" / \"data\" / \"did.csv\"\n",
"data = pd.read_csv(did_data_path)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Run the analysis"
"data = cp.load_data(\"did\")"
]
},
{
Expand All @@ -57,26 +41,17 @@
"metadata": {},
"outputs": [],
"source": [
"from causalpy.skl_experiments import DifferenceInDifferences\n",
"from sklearn.linear_model import LinearRegression\n",
"\n",
"# NOTE: `treated` is a deterministic function of `t` and `group`. So add this function into the formula.\n",
"\n",
"result = DifferenceInDifferences(\n",
"result = cp.skl_experiments.DifferenceInDifferences(\n",
" data,\n",
" formula=\"y ~ 1 + group + t + treated:group\",\n",
" time_variable_name=\"t\",\n",
" prediction_model=LinearRegression(),\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Examine the results"
]
},
{
"cell_type": "code",
"execution_count": 5,
Expand Down
45 changes: 12 additions & 33 deletions docs/notebooks/rd_pymc.ipynb

Large diffs are not rendered by default.

80 changes: 41 additions & 39 deletions docs/notebooks/rd_pymc_drinking.ipynb

Large diffs are not rendered by default.

107 changes: 88 additions & 19 deletions docs/notebooks/rd_skl.ipynb

Large diffs are not rendered by default.

25 changes: 4 additions & 21 deletions docs/notebooks/rd_skl_drinking.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,8 @@
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import pathlib\n",
"import arviz as az"
"import arviz as az\n",
"import causalpy as cp"
]
},
{
Expand All @@ -31,45 +30,29 @@
"az.style.use(\"arviz-darkgrid\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load data"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"rd_data_path = pathlib.Path.cwd().parents[1] / \"causalpy\" / \"data\" / \"drinking.csv\"\n",
"df = (\n",
" pd.read_csv(rd_data_path)[[\"agecell\", \"all\", \"mva\", \"suicide\"]]\n",
" cp.load_data(\"drinking\")\n",
" .rename(columns={\"agecell\": \"age\"})\n",
" .assign(treated=lambda df_: df_.age > 21)\n",
" .dropna(axis=0)\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Linear model"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"from causalpy.skl_experiments import RegressionDiscontinuity\n",
"from sklearn.linear_model import LinearRegression\n",
"\n",
"result = RegressionDiscontinuity(\n",
"result = cp.skl_experiments.RegressionDiscontinuity(\n",
" df,\n",
" formula=\"all ~ 1 + age + treated\",\n",
" running_variable_name=\"age\",\n",
Expand Down
73 changes: 33 additions & 40 deletions docs/notebooks/sc_pymc.ipynb

Large diffs are not rendered by default.

18 changes: 5 additions & 13 deletions docs/notebooks/sc_skl.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import pathlib\n",
"import causalpy as cp\n",
"import arviz as az"
]
},
Expand All @@ -40,10 +39,7 @@
"metadata": {},
"outputs": [],
"source": [
"sc_data_path = (\n",
" pathlib.Path.cwd().parents[1] / \"causalpy\" / \"data\" / \"synthetic_control.csv\"\n",
")\n",
"df = pd.read_csv(sc_data_path)\n",
"df = cp.load_data(\"sc\")\n",
"treatment_time = 70"
]
},
Expand All @@ -60,15 +56,12 @@
"metadata": {},
"outputs": [],
"source": [
"from causalpy.skl_models import WeightedProportion\n",
"from causalpy.skl_experiments import SyntheticControl\n",
"\n",
"# Note, we do not want an intercept in this model\n",
"result = SyntheticControl(\n",
"result = cp.skl_experiments.SyntheticControl(\n",
" df,\n",
" treatment_time,\n",
" formula=\"actual ~ 0 + a + b + c + d + e + f + g\",\n",
" prediction_model=WeightedProportion(),\n",
" prediction_model=cp.skl_models.WeightedProportion(),\n",
")"
]
},
Expand Down Expand Up @@ -132,11 +125,10 @@
"metadata": {},
"outputs": [],
"source": [
"from causalpy.skl_experiments import SyntheticControl\n",
"from sklearn.linear_model import LinearRegression\n",
"\n",
"# Note, we do not want an intercept in this model\n",
"result = SyntheticControl(\n",
"result = cp.skl_experiments.SyntheticControl(\n",
" df,\n",
" treatment_time,\n",
" formula=\"actual ~ 0 + a + b + c + d + e + f + g\",\n",
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
arviz>=0.13.0rc1
arviz>=0.14.0
graphviz
matplotlib>=3.5.3
numpy
Expand Down

0 comments on commit 4a23838

Please sign in to comment.