Skip to content

Commit

Permalink
- statistics.arr: add more descriptive doc strings
Browse files Browse the repository at this point in the history
-- multiple-regrssion output type made more specific
-- multiple-regression: predictor fn raises exception on wrong # of args
-- chi-square: simplify
-- use simpler is-roughly rather than is%(within(0.01))
- test-statistics.arr: add check for mult reg predictor arg# mismatch
- test-matrices.arr: add additional tests for .to-vector()
  • Loading branch information
ds26gte committed Jul 5, 2024
1 parent bddb03f commit d36171f
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 21 deletions.
33 changes: 20 additions & 13 deletions src/arr/trove/statistics.arr
Original file line number Diff line number Diff line change
Expand Up @@ -224,20 +224,26 @@ end

# please see: https://online.stat.psu.edu/stat462/

fun multiple-regression(x_s_s :: List<List<Number>>, y_s :: List<Number>) -> (Any -> Number):
doc: "multiple-regression"
fun multiple-regression(x_s_s :: List<List<Number>>, y_s :: List<Number>) -> (List<Number> -> Number):
doc: "returns a predictor function given a list of list of independent inputs and the correspoinding list of outputs"
intercepts = MX.col-matrix.make(raw-array-of(1, y_s.length()))
mx_X = intercepts.augment(MX.lists-to-matrix(x_s_s))
mx_Y = MX.col-matrix.make(raw-array-from-list(y_s))
B = MX.mtx-to-vector(MX.mtx-least-squares-solve(mx_X, mx_Y))
B_n = B.length() - 1
fun B_pred_fn(x_s :: List<Number>) -> Number:
B.dot(MX.list-to-vector(x_s.push(1)))
x_s_n = x_s.length()
if B_n <> x_s_n:
raise(E.message-exception("multiple-regression: the regression expected " + tostring(B_n) + " inputs, but received " + tostring(x_s_n) + " instead"))
else:
B.dot(MX.list-to-vector(x_s.push(1)))
end
end
B_pred_fn
end

fun linear-regression(x-s :: List<Number>, y-s :: List<Number>) -> (Number -> Number):
doc: "returns a linear predictor function for a single independent variable"
doc: "returns a linear predictor function given a list of single inputs and the corresponding list of outputs"
x-s-n = x-s.length()
if x-s-n <> y-s.length():
raise(E.message-exception("linear-regression: input lists must have equal lengths"))
Expand All @@ -253,6 +259,7 @@ fun linear-regression(x-s :: List<Number>, y-s :: List<Number>) -> (Number -> Nu
end

fun r-squared(x :: List<Number>, y :: List<Number>, f :: (Number -> Number)) -> Number:
doc: "given a list of inputs, a list of outputs, and a model predictor, finds the r-squared of the model"
shadow x = map(num-to-roughnum, x)
shadow y = map(num-to-roughnum, y)
y-mean = mean(y)
Expand All @@ -268,15 +275,15 @@ fun r-squared(x :: List<Number>, y :: List<Number>, f :: (Number -> Number)) ->
end

fun z-test(sample-list :: List, population-sd :: Number, population-mean :: Number) -> Number:
doc: "z-test"
doc: "given a sample and the population SD and mean, find the z-score of the sample's mean"
sample-size = sample-list.length()
sample-mean = mean(sample-list)
sd-of-mean = population-sd / num-sqrt(sample-size)
(sample-mean - population-mean) / sd-of-mean
end

fun t-test(sample-list :: List, population-mean :: Number) -> Number:
doc: "t-test"
doc: "given a sample and the population mean, find the t-score of the sample's mean"
sample-size = sample-list.length()
sample-mean = mean(sample-list)
estimated-population-variance = variance-sample(sample-list)
Expand All @@ -290,7 +297,7 @@ end
# (this has a typo for the pooled t-test, corrected here)

fun t-test-paired(l1 :: List, l2 :: List) -> Number:
doc: "t-test-paired"
doc: "given two paired samples, find the t-score of the difference of their means"
n = l1.length()
n2 = l2.length()
if n <> n2:
Expand All @@ -306,7 +313,7 @@ fun t-test-paired(l1 :: List, l2 :: List) -> Number:
end

fun t-test-pooled(sample-list-1 :: List, sample-list-2 :: List) -> Number:
doc: "t-test-pooled"
doc: "given two independent samples of different sizes or variances, find the t-score of the difference of their means"
n1 = sample-list-1.length()
n2 = sample-list-2.length()
if (n1 == 0) or (n2 == 0):
Expand All @@ -322,7 +329,7 @@ fun t-test-pooled(sample-list-1 :: List, sample-list-2 :: List) -> Number:
end

fun t-test-independent(sample-list-1 :: List, sample-list-2 :: List) -> Number:
doc: "t-test-independent"
doc: "given two independent samples of similar size or variance, find the t-score of the difference of their means"
n1 = sample-list-1.length()
n2 = sample-list-2.length()
if (n1 == 0) or (n2 == 0):
Expand All @@ -337,8 +344,8 @@ fun t-test-independent(sample-list-1 :: List, sample-list-2 :: List) -> Number:
end

fun chi-square(observed-values :: List, predicted-values :: List) -> Number:
doc: "chi-square"
math.sum(map2(lam(observed-value, predicted-value):
num-expt(observed-value - predicted-value, 2) / predicted-value end,
observed-values, predicted-values))
doc: "given a list of observed and predicted values, returns the chi-square statistic"
for fold2(sum from 0, obs from observed-values, pred from predicted-values):
sum + (num-sqr(obs - pred) / pred)
end
end
2 changes: 2 additions & 0 deletions tests/pyret/tests/test-matrices.arr
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ check "Basic Matrix Accessors":
M.mtx-row(mtx1, 0).to-vector() is [vector: 1, 2, 3]
M.mtx-col(mtx1, 1).to-vector() is [vector: 2, 5]

[row-matrix: 1, 2, 3].to-vector() is [vector: 1, 2, 3]
[col-matrix: 1, 2, 3].to-vector() is [vector: 1, 2, 3]
end

check "Submatrices":
Expand Down
17 changes: 9 additions & 8 deletions tests/pyret/tests/test-statistics.arr
Original file line number Diff line number Diff line change
Expand Up @@ -69,25 +69,25 @@ check "numeric helpers":
stdev-sample([list: 1, 1, 1, 1]) is-roughly ~0
stdev-sample([list:]) raises "empty"

z-test([list: 1, 2, 3], 1.58, 2.58) is%(within(0.01)) -0.6358
z-test([list: 1, 2, 3], 1.58, 2.58) is-roughly ~-0.635816119

t-test([list: 1, 2, 3], 2.58) is%(within(0.01)) -1.0046
t-test([list: 1, 2, 3], 2.58) is-roughly ~-1.004589468

t-test-paired([list: 1], [list: 2, 3]) raises "lists must have equal lengths"
t-test-paired([list:], [list:]) raises "lists must have at least one element"
t-test-paired([list: 1, 2, 3], [list: 4, 6, 8]) is%(within(0.01)) -6.928
t-test-paired([list: 1, 2, 3], [list: 4, 6, 8]) is-roughly ~-6.928203230

t-test-pooled([list:], [list: 1, 2, 3]) raises "lists must have at least one element"
t-test-pooled([list: 1, 2, 3], [list: 4, 5, 6]) is%(within(0.01)) -3.674
t-test-pooled([list: 1, 2, 3], [list: 4, 5, 6, 7]) is%(within(0.01)) -3.873
t-test-pooled([list: 1, 2, 3], [list: 4, 5, 6]) is%(within(0.01)) -3.674234614
t-test-pooled([list: 1, 2, 3], [list: 4, 5, 6, 7]) is-roughly ~-3.872983346

t-test-independent([list:], [list: 1, 2, 3]) raises "lists must have at least one element"
t-test-independent([list: 1, 2, 3], [list: 4, 5, 6]) is%(within(0.01)) -3.674
t-test-independent([list: 1, 2, 3], [list: 4, 5, 6, 7]) is%(within(0.01)) -4.041
t-test-independent([list: 1, 2, 3], [list: 4, 5, 6]) is-roughly ~-3.674234614
t-test-independent([list: 1, 2, 3], [list: 4, 5, 6, 7]) is-roughly ~-4.041451884


chi-square([list: 1, 2, 3, 4], [list: 1, 2, 3, 4]) is 0
chi-square([list: 1, 2, 3, 4], [list: 0.9, 1.8, 3.5, 4.7]) is%(within(0.01)) 0.209
chi-square([list: 1, 2, 3, 4], [list: 0.9, 1.8, 3.5, 4.7]) is-roughly ~0.2090172239
end

check "polymorphic modes":
Expand Down Expand Up @@ -136,6 +136,7 @@ check "multiple regression":
y-s = [list: 33, 42, 45, 51, 53, 61, 62]
pf1 = multiple-regression(x-s-s, y-s)
pf1([list: 8]) is-roughly 73.3214
pf1([list: 8, 9]) raises "received"
#
# check it matches linear-regression function on the same single variable
x-s = [list: 4, 4.5, 5, 5.5, 6, 6.5, 7]
Expand Down

0 comments on commit d36171f

Please sign in to comment.