-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
10 changed files
with
324 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
from .._computation._bridge import glmlg1 | ||
|
||
import numpy as np | ||
|
||
class _Prediction: | ||
def __init__(self, model, xp): | ||
self.model_ = model | ||
self.xp_ = xp | ||
|
||
def pdf(self, p): | ||
"""Probability density function for the prediction posterior in p""" | ||
return glmlg1.prediction_pdf(self.model_, self.xp_, p) | ||
|
||
def cdf(self, p): | ||
"""Cumulative density function for the prediction posterior in p""" | ||
return glmlg1.prediction_cdf(self.model_, self.xp_, p) | ||
|
||
def ppf(self, t): | ||
"""Percentage point function for the prediction posterior in p""" | ||
return glmlg1.prediction_ppf(self.model_, self.xp_, t) | ||
|
||
class BayesianLogisticRegression1: | ||
"""Model logistic regression with a single unknown weight variable and the reference prior. | ||
Note: Here the reference prior works out to be the same as Jeffreys prior. | ||
Additionally, using the parameters w_min and w_max the prior can be restricted to an | ||
interval. | ||
Parameters | ||
---------- | ||
w_min : default='-∞' | ||
A minimum weight restriction for the prior. | ||
w_max : default='∞' | ||
A maximum weight restriction for the prior. | ||
Example | ||
-------- | ||
>>> from bbai.glm import BayesianLogisticRegression1 | ||
>>> | ||
>>> x = [-5, 2, 8, 1] | ||
>>> y = [0, 1, 0, 1] | ||
>>> model = BayesianLogisticRegression1() | ||
>>> | ||
>>> # Fit a posterior distribution for w with the logistic | ||
>>> # regression reference prior | ||
>>> model.fit(x, y) | ||
>>> | ||
>>> # Print the probability that w < 0.123 | ||
>>> print(model.cdf(0.123)) | ||
""" | ||
def __init__(self, w_min=-np.inf, w_max=np.inf): | ||
self.w_min_ = w_min | ||
self.w_max_ = w_max | ||
|
||
def fit(self, x, y): | ||
yp = np.zeros(len(y)) | ||
for i, yi in enumerate(y): | ||
assert yi == 0 or yi == 1 | ||
yp[i] = -1.0 if yi == 0 else 1.0 | ||
self.model_ = glmlg1.logistic1_fit(x, yp, self.w_min_, self.w_max_) | ||
|
||
def prior_pdf(self, w): | ||
"""Probability density function for the prior in w""" | ||
return glmlg1.prior_pdf(self.model_, w) | ||
|
||
def pdf(self, w): | ||
"""Probability density function for the posterior in w""" | ||
return glmlg1.pdf(self.model_, w) | ||
|
||
def cdf(self, w): | ||
"""Cumulative density function for the posterior in w""" | ||
return glmlg1.cdf(self.model_, w) | ||
|
||
def ppf(self, t): | ||
"""Percentage point function for the posterior in w""" | ||
return glmlg1.ppf(self.model_, t) | ||
|
||
def predict(self, xp): | ||
"""Return the posterior distribution for p where p denotes the probability 1 / (1 + exp(-xp * w))""" | ||
assert xp != 0.0 | ||
return _Prediction(self.model_, xp) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,10 @@ | ||
from ._delta_binomial import DeltaBinomialModel | ||
from ._bounded_normal import BoundedNormal | ||
from ._restricted_t import RestrictedT | ||
|
||
__all__ = [ | ||
'DeltaBinomialModel', | ||
'BoundedNormal', | ||
'RestrictedT', | ||
] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
from bbai._computation._bridge import mdlbn | ||
|
||
import numpy as np | ||
|
||
class BoundedNormal: | ||
def __init__(self, a, b): | ||
assert a < b | ||
self.a_ = a | ||
self.b_ = b | ||
|
||
def fit_mean_var(self, n, mean, variance): | ||
self.model_ = mdlbn.bounded_normal_fit(self.a_, self.b_, n, mean, variance) | ||
|
||
def fit(self, y): | ||
n = len(y) | ||
mean = np.mean(y) | ||
sigma = np.std(y) | ||
self.fit_mean_var(n, mean, sigma**2) | ||
|
||
def pdf(self, x): | ||
assert self.model_ is not None | ||
return mdlbn.pdf(self.model_, x) | ||
|
||
def cdf(self, x): | ||
assert self.model_ is not None | ||
return mdlbn.cdf(self.model_, x) | ||
|
||
def ppf(self, t): | ||
assert 0 <= t and t <= 1 | ||
assert self.model_ is not None | ||
return mdlbn.ppf(self.model_, t) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
import numpy as np | ||
import scipy | ||
|
||
class RestrictedT: | ||
def __init__(self, a, b): | ||
assert a < b | ||
self.a_ = a | ||
self.b_ = b | ||
|
||
def fit_mean_var(self, n, mean, variance): | ||
s = np.sqrt(variance * n / (n - 1)) | ||
s /= np.sqrt(n) | ||
self.model_ = scipy.stats.t(n-1, loc=mean, scale=s) | ||
|
||
self.cdf_a_ = self.model_.cdf(self.a_) | ||
self.norm_ = self.model_.cdf(self.b_) - self.cdf_a_ | ||
|
||
def fit(self, y): | ||
mean = np.mean(y) | ||
var = np.std(y)**2 | ||
self.fit_mean_var(len(y), mean, var) | ||
|
||
def pdf(self, x): | ||
if x < self.a_ or x > self.b_: | ||
return 0 | ||
return self.model_.pdf(x) / self.norm_ | ||
|
||
def cdf(self, x): | ||
if x < self.a_: | ||
return 0 | ||
if x > self.b_: | ||
return 1 | ||
return (self.model_.cdf(x) - self.cdf_a) / self.norm_ | ||
|
||
def ppf(self, t): | ||
assert 0 <= t and t <= 1 | ||
t *= self.norm_ | ||
t += self.cdf_a_ | ||
return self.model_.ppf(t) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
import pytest | ||
import numpy as np | ||
from bbai.glm import BayesianLogisticRegression1 | ||
import scipy | ||
|
||
def log_prior(x, w): | ||
s = 0 | ||
for xi in x: | ||
p = 1 / (1 + np.exp(-xi * w)) | ||
q = 1 - p | ||
s += xi**2 * p * q | ||
return 0.5 * np.log(s) | ||
|
||
def log_like(x, y, w): | ||
res = 0 | ||
for xi, yi in zip(x, y): | ||
mult = 2 * yi - 1 | ||
res += np.log(1 / (1 + np.exp(-xi * mult * w))) | ||
return res | ||
|
||
def log_post(x, y, w): | ||
return log_like(x, y, w) + log_prior(x, w) | ||
|
||
def test_bayesian_logistic_regression1(): | ||
x = [1.2, -3.4, 5.1] | ||
y = [1.0, 1.0, 0.0] | ||
model = BayesianLogisticRegression1() | ||
model.fit(x, y) | ||
|
||
# test relative pdfs | ||
w1 = 0.123 | ||
w2 = 0.321 | ||
relative_pdf = model.pdf(w2) / model.pdf(w1) | ||
expected = np.exp(log_post(x, y, w2) - log_post(x, y, w1)) | ||
assert relative_pdf == pytest.approx(expected) | ||
|
||
# pdf integrates to 1 | ||
N = 1000 | ||
wmin = -10 | ||
wmax = 10 | ||
step = (wmax - wmin) / N | ||
integral = 0 | ||
for i in range(N): | ||
integral += model.pdf(wmin + i * step) * step | ||
assert integral == pytest.approx(1.0) | ||
|
||
# cdf matches the integral of pdf | ||
N = 2000 | ||
wmin = -10 | ||
wmax = 0.123 | ||
step = (wmax - wmin) / N | ||
integral = 0 | ||
for i in range(N): | ||
integral += model.pdf(wmin + i * step) * step | ||
assert integral == pytest.approx(model.cdf(wmax), rel=1.0e-3) | ||
|
||
# cdf and ppf match up | ||
assert 0.123 == pytest.approx(model.ppf(model.cdf(0.123)), rel=1.0e-3) | ||
|
||
# prediction pdf matches the transformed posterior | ||
xp = 0.67 | ||
pred = model.predict(xp) | ||
def phi(p): | ||
return np.log(p / (1 - p)) / xp | ||
eps = 1.0e-9 | ||
p = 0.123 | ||
w = phi(p) | ||
phi_dot = (phi(p + eps) - phi(p)) / eps | ||
assert pred.pdf(p) == pytest.approx(model.pdf(w) * phi_dot) | ||
|
||
# prediction cdf matches w cdf | ||
p = 0.123 | ||
xp = 0.67 | ||
pred = model.predict(xp) | ||
w = np.log(p / (1 - p)) / xp | ||
assert p == pytest.approx(1 / (1 + np.exp(-xp * w))) | ||
median = pred.cdf(p) | ||
expected = model.cdf(w) | ||
assert median == pytest.approx(expected) | ||
|
||
# prediction cdf and ppf match up | ||
xp = 0.67 | ||
pred = model.predict(xp) | ||
p = pred.ppf(0.123) | ||
assert pred.cdf(p) == pytest.approx(0.123) | ||
|
||
# we properly handle endpoints | ||
assert pred.pdf(0.0) == 0.0 | ||
assert pred.pdf(1.0) == 0.0 | ||
|
||
# prediction pdf integrates to 1 | ||
N = 1000 | ||
pred = model.predict(0.123) | ||
pmin = 0.0 | ||
pmax = 1.0 | ||
step = (pmax - pmin) / N | ||
integral = 0 | ||
for i in range(N): | ||
integral += pred.pdf(pmin + i * step) * step | ||
assert integral == pytest.approx(1.0) | ||
|
||
def test_bayesian_logistic_regression1_separable(): | ||
x = [1.2, -3.4, 5.1] | ||
y = [1.0, 0.0, 1.0] | ||
model = BayesianLogisticRegression1() | ||
model.fit(x, y) | ||
|
||
xp = 0.67 | ||
pred = model.predict(xp) | ||
|
||
assert pred.pdf(0.0) == 0.0 | ||
assert pred.pdf(1.0) == np.inf | ||
|
||
if __name__ == "__main__": | ||
raise SystemExit(pytest.main([__file__])) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
import pytest | ||
import numpy as np | ||
from bbai.model import BoundedNormal | ||
import scipy | ||
|
||
def test_bounded_normal_model(): | ||
a = -0.123 | ||
b = 4.5 | ||
model = BoundedNormal(a, b) | ||
y = [0, 3, 4] | ||
model.fit(y) | ||
|
||
# PDF goes to zero at the endpoints | ||
assert model.pdf(a) == 0.0 | ||
assert model.pdf(b) == 0.0 | ||
|
||
# PDF is zero outside of the boundaries | ||
assert model.pdf(a - 0.01) == 0.0 | ||
assert model.pdf(b + 0.01) == 0.0 | ||
|
||
# PDF integrates to 1 | ||
S = 0 | ||
N = 2000 | ||
step = (b - a) / N | ||
for i in range(N): | ||
S += model.pdf(a + i * step) * step | ||
assert S == pytest.approx(1.0) | ||
|
||
# cdf matches up with numeric integration | ||
t = 1.23 | ||
S = 0 | ||
step = (t - a) / N | ||
for i in range(N): | ||
S += model.pdf(a + i * step) * step | ||
assert S == pytest.approx(model.cdf(t), rel=1.0e-3) | ||
|
||
# cdf and ppf match up | ||
median = model.ppf(0.5) | ||
assert model.cdf(median) == pytest.approx(0.5, rel=1.0e-3) | ||
|
||
if __name__ == "__main__": | ||
raise SystemExit(pytest.main([__file__])) | ||
|
Binary file renamed
BIN
+18.1 MB
...bbai-1.7.1-py3-none-manylinux1_x86_64.whl → ...bbai-1.8.0-py3-none-manylinux1_x86_64.whl
Binary file not shown.