Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add BetaGeoBetaBinom Distribution Block #431

Merged
merged 35 commits into from
May 27, 2024
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
81c6e54
init commit
ColtAllen Nov 11, 2023
882018e
removed scan
ColtAllen Nov 11, 2023
a96929b
Fix logp
ricardoV94 Nov 14, 2023
30c39a0
Remove print statement
ricardoV94 Nov 14, 2023
777f715
Add test for logp notimplemented errors
ricardoV94 Nov 14, 2023
9b59449
Merge branch 'pymc-labs:main' into bgbb_dist
ColtAllen Dec 11, 2023
0aac157
docstrings
ColtAllen Dec 11, 2023
2bac71d
dev notebook added
ColtAllen Dec 11, 2023
7472fee
updated to vectorize_graph
ColtAllen Dec 16, 2023
6a0239d
Merge branch 'pymc-labs:main' into bgbb_dist
ColtAllen Jan 7, 2024
e70caa6
Merge branch 'pymc-labs:main' into bgbb_dist
ColtAllen Jan 11, 2024
2eb60ae
import order
ColtAllen Jan 12, 2024
3c07ad4
update oldest pymc_version
ColtAllen Jan 12, 2024
6cec635
Update ci.yml pymc version
ColtAllen Jan 12, 2024
c2d44b0
Update pyproject.toml pymc version
ColtAllen Jan 12, 2024
bd0fe40
Merge branch 'bgbb_dist' of https://github.com/ColtAllen/pymc-marketi…
ColtAllen Jan 12, 2024
d2d02cd
WIP sample prior testing
ColtAllen Jan 30, 2024
348691a
Merge branch 'pymc-labs:main' into bgbb_dist
ColtAllen Jan 30, 2024
4329380
sample prior compared against lifetimes
ColtAllen Feb 3, 2024
2423c12
Merge branch 'pymc-labs:main' into bgbb_dist
ColtAllen Feb 9, 2024
4040ffc
Merge branch 'pymc-labs:main' into bgbb_dist
ColtAllen Feb 24, 2024
d4d3455
increase rtol
ColtAllen Feb 24, 2024
bb6b893
Merge branch 'main' into bgbb_dist
ColtAllen Mar 25, 2024
22d1269
Merge branch 'pymc-labs:main' into bgbb_dist
ColtAllen Apr 12, 2024
618f0b3
Merge branch 'main' into bgbb_dist
ColtAllen Apr 19, 2024
f0d9588
Merge branch 'pymc-labs:main' into bgbb_dist
ColtAllen Apr 25, 2024
d839bee
Merge branch 'pymc-labs:main' into bgbb_dist
ColtAllen Apr 29, 2024
50158a9
Merge branch 'pymc-labs:main' into bgbb_dist
ColtAllen May 1, 2024
5366087
Merge branch 'pymc-labs:main' into bgbb_dist
ColtAllen May 4, 2024
455b7a9
Merge branch 'pymc-labs:main' into bgbb_dist
ColtAllen May 22, 2024
c63e94e
remove commented code, add logp reference
ColtAllen May 26, 2024
60a1f55
fix latex docstring
ColtAllen May 26, 2024
783d6f5
notebook testing and misc edits
ColtAllen May 26, 2024
512aea6
revert latex in docstring
ColtAllen May 26, 2024
c376695
add ruff ignore comment
ColtAllen May 27, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:
branches: [main]

env:
OLDEST_PYMC_VERSION: "5.8.2"
OLDEST_PYMC_VERSION: "5.10.0"

jobs:
lint:
Expand Down
383 changes: 383 additions & 0 deletions docs/source/notebooks/clv/dev/beta_geo_beta_binom.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,383 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "5e06e043-4631-47ae-a658-a9a928ff15e5",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from lifetimes import BetaGeoBetaBinomFitter\n",
"from lifetimes.datasets import load_donations, load_cdnow_summary_data_with_monetary_value"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "8ff42e71-fc43-4d45-8446-7205e3d37bce",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ -3.94031398, -10.25427751, -6.82582822])"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"value = np.array([[1.5, 1], [5.3, 4], [6, 2]])\n",
"alpha = 0.55\n",
"beta = 10.58\n",
"gamma = 0.61\n",
"delta = 11.67\n",
"T = 12\n",
"\n",
"BetaGeoBetaBinomFitter._loglikelihood((alpha, beta, gamma, delta), value[..., 1], value[..., 0], T)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "371bb7a1-9f5c-4bdf-81b1-a9504261badb",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<lifetimes.BetaGeoBetaBinomFitter: fitted with 22 subjects, alpha: 1.20, beta: 0.75, delta: 2.78, gamma: 0.66>"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"discrete_noncontract_df = load_donations()\n",
"\n",
"periods = 6\n",
"bgbb = BetaGeoBetaBinomFitter().fit(discrete_noncontract_df['frequency'].values,\n",
" discrete_noncontract_df['recency'].values,\n",
" discrete_noncontract_df['periods'].values,\n",
" discrete_noncontract_df['weights'].values)\n",
"bgbb"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "f8052f01-5aca-48fd-917e-1f2bbeb6326f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['conditional_expected_number_of_purchases_up_to_time', 'conditional_probability_alive', 'expected_number_of_transactions_in_first_n_periods', 'fit', 'load_model', 'save_model', 'summary']\n"
]
}
],
"source": [
"method_list = [method for method in dir(BetaGeoBetaBinomFitter) if not method.startswith('_')]\n",
"print(method_list)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "f4f56b83-f830-4610-8f4f-e67ff242967e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 0.072863\n",
"1 0.085696\n",
"2 0.314238\n",
"3 0.593853\n",
"4 0.839396\n",
"5 1.021689\n",
"6 1.147885\n",
"7 0.119121\n",
"8 0.536111\n",
"9 1.057604\n",
"10 1.443042\n",
"11 1.668817\n",
"12 0.223595\n",
"13 1.034572\n",
"14 1.804703\n",
"15 2.189749\n",
"16 0.583192\n",
"17 2.030024\n",
"18 2.710681\n",
"19 1.812942\n",
"20 3.231612\n",
"21 3.752544\n",
"dtype: float64"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# equation 13 in paper\n",
"bgbb.conditional_expected_number_of_purchases_up_to_time(5,\n",
" discrete_noncontract_df['frequency'],\n",
" discrete_noncontract_df['recency'],\n",
" discrete_noncontract_df['periods'])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "7ea5dc42-160f-4f96-9e16-a97f01dd4bdc",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 0.070072\n",
"1 0.045012\n",
"2 0.165056\n",
"3 0.311927\n",
"4 0.440900\n",
"5 0.536651\n",
"6 0.602936\n",
"7 0.043038\n",
"8 0.193695\n",
"9 0.382108\n",
"10 0.521365\n",
"11 0.602936\n",
"12 0.061566\n",
"13 0.284864\n",
"14 0.496916\n",
"15 0.602936\n",
"16 0.129719\n",
"17 0.451538\n",
"18 0.602936\n",
"19 0.338249\n",
"20 0.602936\n",
"21 0.602936\n",
"dtype: float64"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# equation 11 in paper\n",
"bgbb.conditional_probability_alive(10,\n",
" discrete_noncontract_df['frequency'],\n",
" discrete_noncontract_df['recency'],\n",
" discrete_noncontract_df['periods'])"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "96bcab46-7279-400a-82c1-e8b509ece774",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>model</th>\n",
" </tr>\n",
" <tr>\n",
" <th>frequency</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>3195.925987</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1560.549020</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>964.135361</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>668.795916</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>497.960966</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>389.113685</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>314.983874</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" model\n",
"frequency \n",
"0 3195.925987\n",
"1 1560.549020\n",
"2 964.135361\n",
"3 668.795916\n",
"4 497.960966\n",
"5 389.113685\n",
"6 314.983874"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# TODO: write and test (8) as a replacement. Compare against just aggregating means across the exploded DF \n",
"# TODO: Can the arviz functions in the BetaGeoBetaBinom distribution block preclude the need for this?\n",
"# TODO: Replace this with (9) or (10) in a future PR, since that expression can predict interval ranges\n",
"\n",
"# equation 7 in paper, but that's for probabilities. should it be 8 for predicting mean n?\n",
"# yeah, this function should be renamed for clarity. \n",
"# it distributes customers in the dataset across n transaction opportunies\n",
"# it works better as an evaluation function, since it assumes a fixed customer population size\n",
"# if n > n_periods, it will keep right on predicting. This may be a bug\n",
"bgbb.expected_number_of_transactions_in_first_n_periods(n=50)"
]
},
{
"cell_type": "markdown",
"id": "9d55e986-d1f2-4c0d-8c25-3e289e90d5fe",
"metadata": {},
"source": [
"### Expected transactions in N periods\n",
"This expression will blow up to inf with large values of n (n=167 in this example). Recalculating on the log scale will allow for larger values, but this isn't possible if gamma < 1 because term1 will be negative."
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "2e82f5b4-1b4a-4477-843b-58cbd411d348",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"average of 1.938137499995133 purchases expected in 5 opportunities\n"
]
}
],
"source": [
"from scipy import special\n",
"from numpy import log,exp\n",
"\n",
"n = 5\n",
"alpha,beta,delta,gamma = bgbb._unload_params('alpha','beta','delta','gamma')\n",
"\n",
"# add a larger gamma value for testing\n",
"#gamma = .9\n",
"\n",
"log_scale = False\n",
"\n",
"if not log_scale:\n",
" term1 = alpha/(alpha+beta)*delta/(gamma-1)\n",
" term2 = 1-(special.gamma(gamma+delta))/special.gamma(gamma+delta+n)*(special.gamma(1+delta+n))/special.gamma(1+delta)\n",
" expected_purchases_n_periods = term1 * term2\n",
"else:\n",
" term1 = log(alpha/(alpha+beta)) + log(delta/(gamma-1))\n",
" term2 = special.gammaln(gamma+delta) - special.gammaln(gamma+delta+n) + special.gammaln(1+delta+n) - special.gammaln(1+delta)\n",
" expected_purchases_n_periods = exp(term1) - exp(term2)\n",
"\n",
"print(f'average of {expected_purchases_n_periods} purchases expected in {n} opportunities')"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "5186cf4d-710d-4e85-bef9-b66ccced5586",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[1.2035223936080357,\n",
" 0.7497163581757648,\n",
" 2.7834419828877737,\n",
" 0.6567181695499797]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"bgbb._unload_params('alpha','beta','delta','gamma')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "80d11cc8-98fb-426e-89b2-693f0a8d22fa",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.18"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading
Loading