From a52ef2d3f4c1bc92b74eb1fb4ce4a2c408cc07ac Mon Sep 17 00:00:00 2001 From: Dorai Sitaram Date: Fri, 12 Apr 2024 16:48:28 -0400 Subject: [PATCH 01/14] statistics.arr: added variance, variance-sample, t-test-paired, t-test-pooled, t-test-independent, chi-square #1732 --- src/arr/trove/statistics.arr | 80 +++++++++++++++++++++++++++++++----- 1 file changed, 70 insertions(+), 10 deletions(-) diff --git a/src/arr/trove/statistics.arr b/src/arr/trove/statistics.arr index 2532a39401..01db5d7abe 100644 --- a/src/arr/trove/statistics.arr +++ b/src/arr/trove/statistics.arr @@ -8,10 +8,16 @@ provide { mode-smallest: mode-smallest, mode-largest: mode-largest, mode-any: mode-any, + variance: variance, stdev: stdev, + variance-sample: variance-sample, stdev-sample: stdev-sample, linear-regression: linear-regression, - r-squared: r-squared + r-squared: r-squared, + t-test-paired: t-test-paired, + t-test-pooled: t-test-pooled, + t-test-independent: t-test-independent, + chi-square: chi-square } end provide-types * import global as _ @@ -143,24 +149,36 @@ fun mode-any(l :: List) -> Number: ms.get(num-random(ms.length())) end end - -fun stdev(l :: List) -> Number: - doc: ```returns the standard deviation of the list + +fun variance(l :: List) -> Number: + doc: ```returns the variance of the list of numbers, or raises an error if the list is empty``` reg-mean = mean(l) sq-diff = l.map(lam(k): num-expt((k - reg-mean), 2) end) sq-mean = mean(sq-diff) - num-sqrt(sq-mean) + sq-mean end -fun stdev-sample(l :: List) -> Number: - doc: ```returns the standard deviation of the list +fun stdev(l :: List) -> Number: + doc: ```returns the standard deviation of the list + of numbers, or raises an error if the list is empty``` + num-sqrt(variance(l)) +end + +fun variance-sample(l :: List) -> Number: + doc: ```returns the variance of the list of numbers, or raises an error if the list is empty``` len = l.length() reg-mean = mean(l) sq-diff = l.map(lam(k): num-expt((k - reg-mean), 2) end) sq-mean = math.sum(sq-diff) / (len - 1) - num-sqrt(sq-mean) + sq-mean +end + +fun stdev-sample(l :: List) -> Number: + doc: ```returns the standard deviation of the list + of numbers, or raises an error if the list is empty``` + num-sqrt(variance-sample(l)) end fun linear-regression(x :: List, y :: List) -> (Number -> Number): @@ -177,8 +195,8 @@ fun linear-regression(x :: List, y :: List) -> (Number -> Number covariance = xpt-xy - xpt-x-xpt-y v1 = math.sum(map(lam(n): n * n end, x)) v2 = (math.sum(x) * math.sum(x)) / x.length() - variance = v1 - v2 - beta = covariance / variance + variance1 = v1 - v2 + beta = covariance / variance1 alpha = mean(y) - (beta * mean(x)) fun predictor(in :: Number) -> Number: @@ -204,3 +222,45 @@ fun r-squared(x :: List, y :: List, f :: (Number -> Number)) -> end end +fun t-test-paired(l1 :: List, l2 :: List) -> Number: + doc: "t-test-paired" + n1 = l1.length() + n2 = l2.length() + if n1 <> n2: + raise(E.message-exception("t-test-paired: input lists must have equal lengths")) + else if n1 == 0: + raise(E.message-exception("t-test-paired: input lists should have at least one element")) + else: + diffs = map2(lam(x1, x2): x1 - x2 end, l1, l2) + diffs-mean = mean(diffs) + s-hat = stdev-sample(diffs) + diffs-mean / (s-hat / num-sqrt(n1)) + end +end + +fun t-test-pooled(l1 :: List, l2 :: List) -> Number: + doc: "t-test-pooled" + n1 = l1.length() + n2 = l2.length() + m1 = mean(l1) + m2 = mean(l2) + v1 = variance-sample(l1) + v2 = variance-sample(l2) + (m1 - m2) / (((((n1 - 1) * num-expt(v1, 2)) + ((n2 - 1) * num-expt(v2, 2))) / ((n1 + n2) - 2)) * num-sqrt((1 / n1) + (1 / n2))) +end + +fun t-test-independent(l1 :: List, l2 :: List) -> Number: + doc: "t-test-independent" + n1 = l1.length() + n2 = l2.length() + m1 = mean(l1) + m2 = mean(l2) + v1 = variance-sample(l1) + v2 = variance-sample(l2) + (m1 - m2) / num-sqrt((v1 / n1) + (v2 / n2)) +end + +fun chi-square(obs :: List, exp :: List) -> Number: + doc: "chi-square" + math.sum(map2(lam(o, e): num-expt(o - e, 2) / e end, obs, exp)) +end From 2ba6dd10f8957e78ca49793c9728cb07b61c2586 Mon Sep 17 00:00:00 2001 From: Dorai Sitaram Date: Sat, 13 Apr 2024 09:14:53 -0400 Subject: [PATCH 02/14] - test-statistics.arr: added tests for the new stats functions #1732 - statistics.arr: added exceptions for t-test-{pooled, independent} --- src/arr/trove/statistics.arr | 30 +++++++++++++++++---------- tests/pyret/tests/test-statistics.arr | 28 ++++++++++++++++++++++--- 2 files changed, 44 insertions(+), 14 deletions(-) diff --git a/src/arr/trove/statistics.arr b/src/arr/trove/statistics.arr index 01db5d7abe..7354a277d9 100644 --- a/src/arr/trove/statistics.arr +++ b/src/arr/trove/statistics.arr @@ -229,7 +229,7 @@ fun t-test-paired(l1 :: List, l2 :: List) -> Number: if n1 <> n2: raise(E.message-exception("t-test-paired: input lists must have equal lengths")) else if n1 == 0: - raise(E.message-exception("t-test-paired: input lists should have at least one element")) + raise(E.message-exception("t-test-paired: input lists must have at least one element")) else: diffs = map2(lam(x1, x2): x1 - x2 end, l1, l2) diffs-mean = mean(diffs) @@ -242,22 +242,30 @@ fun t-test-pooled(l1 :: List, l2 :: List) -> Number: doc: "t-test-pooled" n1 = l1.length() n2 = l2.length() - m1 = mean(l1) - m2 = mean(l2) - v1 = variance-sample(l1) - v2 = variance-sample(l2) - (m1 - m2) / (((((n1 - 1) * num-expt(v1, 2)) + ((n2 - 1) * num-expt(v2, 2))) / ((n1 + n2) - 2)) * num-sqrt((1 / n1) + (1 / n2))) + if (n1 == 0) or (n2 == 0): + raise(E.message-exception("t-test-pooled: input lists must have at least one element")) + else: + m1 = mean(l1) + m2 = mean(l2) + v1 = variance-sample(l1) + v2 = variance-sample(l2) + (m1 - m2) / (((((n1 - 1) * num-expt(v1, 2)) + ((n2 - 1) * num-expt(v2, 2))) / ((n1 + n2) - 2)) * num-sqrt((1 / n1) + (1 / n2))) + end end fun t-test-independent(l1 :: List, l2 :: List) -> Number: doc: "t-test-independent" n1 = l1.length() n2 = l2.length() - m1 = mean(l1) - m2 = mean(l2) - v1 = variance-sample(l1) - v2 = variance-sample(l2) - (m1 - m2) / num-sqrt((v1 / n1) + (v2 / n2)) + if (n1 == 0) or (n2 == 0): + raise(E.message-exception("t-test-independent: input lists must have at least one element")) + else: + m1 = mean(l1) + m2 = mean(l2) + v1 = variance-sample(l1) + v2 = variance-sample(l2) + (m1 - m2) / num-sqrt((v1 / n1) + (v2 / n2)) + end end fun chi-square(obs :: List, exp :: List) -> Number: diff --git a/tests/pyret/tests/test-statistics.arr b/tests/pyret/tests/test-statistics.arr index 46e4502f67..fad4226c40 100644 --- a/tests/pyret/tests/test-statistics.arr +++ b/tests/pyret/tests/test-statistics.arr @@ -49,18 +49,40 @@ check "numeric helpers": modes([list: -1, 2, -1, 2, -1]) is [list: -1] modes([list: 1, 1, 2, 2, 3, 3, 3]) is [list: 3] + variance([list:]) raises "empty" + variance([list: 5]) is 0 + variance([list: 3, 4, 5, 6, 7]) is%(within(0.01)) 2.0 + variance([list: 1, 1, 1, 1]) is-roughly ~0 + stdev([list:]) raises "empty" stdev([list: 5]) is 0 - stdev([list: 3, 4, 5, 6, 7]) is%(within(0.01)) 1.41 stdev([list: 1, 1, 1, 1]) is-roughly ~0 - stdev([list:]) raises "empty" - + + variance-sample([list: 3, 4, 5, 6, 7]) is%(within(0.01)) (10 / 4) + variance-sample([list: 3]) raises "division by zero" + variance-sample([list: 1, 1, 1, 1]) is-roughly ~0 + variance-sample([list:]) raises "empty" + stdev-sample([list: 3, 4, 5, 6, 7]) is%(within(0.01)) num-sqrt(10 / 4) stdev-sample([list: 3]) raises "division by zero" stdev-sample([list: 1, 1, 1, 1]) is-roughly ~0 stdev-sample([list:]) raises "empty" + t-test-paired([list: 1], [list: 2, 3]) raises "lists must have equal lengths" + t-test-paired([list:], [list:]) raises "lists must have at least one element" + t-test-paired([list: 1, 2, 3], [list: 4, 6, 8]) is%(within(0.01)) -6.928 + + t-test-pooled([list:], [list: 1, 2, 3]) raises "lists must have at least one element" + t-test-pooled([list: 1, 2, 3], [list: 4, 5, 6]) is%(within(0.01)) -3.674 + t-test-pooled([list: 1, 2, 3], [list: 4, 5, 6, 7]) is%(within(0.01)) -2.217 + + t-test-independent([list:], [list: 1, 2, 3]) raises "lists must have at least one element" + t-test-independent([list: 1, 2, 3], [list: 4, 5, 6]) is%(within(0.01)) -3.674 + t-test-independent([list: 1, 2, 3], [list: 4, 5, 6, 7]) is%(within(0.01)) -4.041 + + chi-square([list: 1, 2, 3, 4], [list: 1, 2, 3, 4]) is 0 + chi-square([list: 1, 2, 3, 4], [list: 0.9, 1.8, 3.5, 4.7]) is%(within(0.01)) 0.209 end check "linear regression": From 363038c81a89d9515154ab898712a6238a02b286 Mon Sep 17 00:00:00 2001 From: Dorai Sitaram Date: Mon, 15 Apr 2024 09:56:29 -0400 Subject: [PATCH 03/14] Added z-test impl & test #1732 --- src/arr/trove/statistics.arr | 10 ++++++++++ tests/pyret/tests/test-statistics.arr | 2 ++ 2 files changed, 12 insertions(+) diff --git a/src/arr/trove/statistics.arr b/src/arr/trove/statistics.arr index 7354a277d9..84c49dbfba 100644 --- a/src/arr/trove/statistics.arr +++ b/src/arr/trove/statistics.arr @@ -17,6 +17,7 @@ provide { t-test-paired: t-test-paired, t-test-pooled: t-test-pooled, t-test-independent: t-test-independent, + z-test: z-test, chi-square: chi-square } end provide-types * @@ -268,6 +269,15 @@ fun t-test-independent(l1 :: List, l2 :: List) -> Number: end end +fun z-test(l1 :: List, l2 :: List, sd1 :: Number, sd2 :: Number) -> Number: + doc: "z-test" + n1 = l1.length() + n2 = l2.length() + x-bar-1 = mean(l1) + x-bar-2 = mean(l2) + (x-bar-1 - x-bar-2) / num-sqrt((num-expt(sd1, 2) / n1) + (num-expt(sd2, 2) / n2)) +end + fun chi-square(obs :: List, exp :: List) -> Number: doc: "chi-square" math.sum(map2(lam(o, e): num-expt(o - e, 2) / e end, obs, exp)) diff --git a/tests/pyret/tests/test-statistics.arr b/tests/pyret/tests/test-statistics.arr index fad4226c40..b55fddfbc7 100644 --- a/tests/pyret/tests/test-statistics.arr +++ b/tests/pyret/tests/test-statistics.arr @@ -81,6 +81,8 @@ check "numeric helpers": t-test-independent([list: 1, 2, 3], [list: 4, 5, 6]) is%(within(0.01)) -3.674 t-test-independent([list: 1, 2, 3], [list: 4, 5, 6, 7]) is%(within(0.01)) -4.041 + z-test([list: 1, 2, 3], [list: 4, 5, 6], 1.58, 2.58) is%(within(0.01)) -1.718 + chi-square([list: 1, 2, 3, 4], [list: 1, 2, 3, 4]) is 0 chi-square([list: 1, 2, 3, 4], [list: 0.9, 1.8, 3.5, 4.7]) is%(within(0.01)) 0.209 end From bcdfd0ee4c0c515d526473527bf19eac757e6bbe Mon Sep 17 00:00:00 2001 From: Dorai Sitaram Date: Thu, 16 May 2024 16:16:49 -0400 Subject: [PATCH 04/14] - arr/trove/statistics.arr: added multiple-regression(): takes a list of list of known x-inputs, and a list of the corresponding known y-outputs, and returns a predictor function that takes a list of x-inputs and returns its estimated y-output #1732 - js/trove/multiple-regression.js contains the JS implementation of multiple-regression and all its matrix subroutines - tests/test-statistics.arr: added a basic test (can add more from curriculum examples, when these are added) --- src/arr/trove/statistics.arr | 7 + src/js/trove/multiple-regression.js | 182 ++++++++++++++++++++++++++ tests/pyret/tests/test-statistics.arr | 9 ++ 3 files changed, 198 insertions(+) create mode 100644 src/js/trove/multiple-regression.js diff --git a/src/arr/trove/statistics.arr b/src/arr/trove/statistics.arr index 84c49dbfba..3b9d684279 100644 --- a/src/arr/trove/statistics.arr +++ b/src/arr/trove/statistics.arr @@ -13,6 +13,7 @@ provide { variance-sample: variance-sample, stdev-sample: stdev-sample, linear-regression: linear-regression, + multiple-regression: multiple-regression, r-squared: r-squared, t-test-paired: t-test-paired, t-test-pooled: t-test-pooled, @@ -26,6 +27,7 @@ include lists import error as E import math as math import string-dict as SD +import multiple-regression as MR fun mean(l :: List) -> Number: doc: "Find the average of a list of numbers" @@ -208,6 +210,11 @@ fun linear-regression(x :: List, y :: List) -> (Number -> Number end end +fun multiple-regression(x_s_s :: List>, y_s :: List) -> (List -> Number): + doc: "multiple-regression" + MR.multiple-regression(x_s_s, y_s) +end + fun r-squared(x :: List, y :: List, f :: (Number -> Number)) -> Number: shadow x = map(num-to-roughnum, x) shadow y = map(num-to-roughnum, y) diff --git a/src/js/trove/multiple-regression.js b/src/js/trove/multiple-regression.js new file mode 100644 index 0000000000..ae9300fa85 --- /dev/null +++ b/src/js/trove/multiple-regression.js @@ -0,0 +1,182 @@ +({ + requires: [], + nativeRequires: ["pyret-base/js/namespace"], + provides: { + values: { + "multiple-regression": "tany" + } + }, + theModule: function(runtime, namespace, uri) { + + function makeColumnMatrix(y_s) { + let n = y_s.length; + let M = new Array(n); + for (let i = 0; i < n; i++) { + M[i][0] = y_s[i]; + } + return M; + } + + function matrixTranspose(m) { + let numrows = m.length; + let numcols = m[0].length; + let mT = new Array(numcols); + for (let i = 0; i < numcols; i++) { + mT[i] = new Array(m); + for (let j = 0; j < numrows; j++) { + mT[i][j] = m[j][i]; + } + } + return mT; + } + + function matrixDeterminant(m) { + let size = m.length; + if (size === 0) { + return 0; + } + if (m[0].length !== size) { + return 0; + } + if (size === 1) { + return m[0][0]; + } + if (size === 2) { + return (m[0][0] * m[1][1] - m[1][0] * m[0][1]); + } + let det = 0; + for (let i = 0; i < size; i++) { + det += m[0][i] * matrixCofactor(m, 0, i); + } + return det; + } + + function matrixCofactor(m, i, j) { + let size = m.length; + if (size === 0) { + return 0; + } + if (m[0].length !== size) { + return 0; + } + let mm = new Array(size - 1); + for (let ii = 0; ii < size; ii++) { + if (ii === i) { continue; } + let iii = (ii > i ? ii - 1 : ii) + mm[iii] = new Array(size - 1); + for (let jj = 0; jj < size; jj++) { + if (jj === j) { continue; } + let jjj = (jj > j ? jj - 1 : jj) + mm[iii][jjj] = m[ii][jj]; + } + } + let sign = (i + j) % 2; + sign = (sign === 0) ? 1 : -1; + return sign * matrixDeterminant(mm) + } + + function matrixAdjoint(m) { + let size = m.length; + let mc = new Array(size); + for (let i = 0; i < size; i++) { + mc[i] = new Array(size); + for (let j = 0; j < size; j++) { + mc[i][j] = matrixCofactor(m, i, j); + } + } + return matrixTranspose(mc) + } + + function matrixInverse(m) { + let det = matrixDeterminant(m); + let mI = matrixAdjoint(m); + let k = m.length; + for (let i = 0; i < k; i++) { + for (let j = 0; j < k; j++) { + let x = mI[i][j] + mI[i][j] = x/det + } + } + return mI; + } + + function matrixMultiply2(m1, m2) { + let m1r = m1.length; + let m1c = m1[0].length; + let m2c = m2[0].length; + let mP = new Array(m1r); + for (let i = 0; i < m1r; i++) { + mP[i] = new Array(m2c); + for (let j = 0; j < m2c; j++) { + let x = 0; + for (let k = 0; k < m1c; k++) { + x += m1[i][k] * m2[k][j]; + } + mP[i][j] = x; + } + } + return mP; + } + + function multipleRegression(x_s_s, y_s) { + runtime.ffi.checkArity(2, arguments, "multiple-regression", false); + runtime.checkList(x_s_s); + runtime.checkList(y_s); + let js_x_s_s = runtime.ffi.toArray(x_s_s); + let js_y_s = runtime.ffi.toArray(y_s); + let num_mappings = js_x_s_s.length; + if (js_y_s.length !== num_mappings) { + throw runtime.ffi.throwMessageException("multiple-regression: number of mappings incorrect"); + } + let X = new Array(num_mappings); + let Y = new Array(num_mappings); + let x_s_len = false; + js_x_s_s.forEach(function(x_s, i) { + runtime.checkList(x_s); + let js_x_s = runtime.ffi.toArray(x_s); + let x_s_n = js_x_s.length; + if (x_s_len === false) { + x_s_len = x_s_n; + } else if (x_s_n !== x_s_len) { + throw runtime.ffi.throwMessageException("multiple-regression: bad mapping"); + } + X[i] = new Array(x_s_len + 1) + let Xi = X[i]; + Xi[0] = 1; + js_x_s.forEach(function(x, j) { + runtime.checkNumber(x); + Xi[j+1] = runtime.num_to_fixnum(x); + }); + }); + js_y_s.forEach(function(y, i) { + runtime.checkNumber(y); + Y[i] = [runtime.num_to_fixnum(y)]; + }); + + let XT = matrixTranspose(X); + let B = matrixMultiply2(matrixInverse(matrixMultiply2(XT, X)), matrixMultiply2(XT, Y)); + let Bfunc = function(x_s) { + runtime.checkList(x_s); + let js_x_s = runtime.ffi.toArray(x_s); + if (js_x_s.length !== x_s_len) { + throw runtime.ffi.throwMessageException("predictor: wrong number of arguments"); + } + let result = B[0][0]; + for (let i = 0; i < x_s_len; i++) { + result += runtime.num_to_fixnum(js_x_s[i]) * B[i+1][0] + } + return runtime.makeNumber(result); + } + return runtime.makeFunction(Bfunc, "predictor"); + } + + let vals = { + + "multiple-regression": runtime.makeFunction(multipleRegression, "multiple-regression") + + }; + + return runtime.makeModuleReturn(vals, {}); + } +}) + diff --git a/tests/pyret/tests/test-statistics.arr b/tests/pyret/tests/test-statistics.arr index b55fddfbc7..6d6f86441e 100644 --- a/tests/pyret/tests/test-statistics.arr +++ b/tests/pyret/tests/test-statistics.arr @@ -113,3 +113,12 @@ check "linear regression": end +check "multiple regression": + x-s-s = [list: [list: 4], [list: 4.5], [list: 5], [list: 5.5], [list: 6], [list: 6.5], [list: 7]] + y-s = [list: 33, 42, 45, 51, 53, 61, 62] + pf = multiple-regression(x-s-s, y-s) + pf([list: 8]) is%(within-abs(0.001)) 73.3214 +end + + + From 7cf32b95e5090e97fb7502240d046369cc28293f Mon Sep 17 00:00:00 2001 From: Dorai Sitaram Date: Fri, 17 May 2024 10:03:23 -0400 Subject: [PATCH 05/14] multiple-regression(): first arg is now a list of N-tuples (each N-tuple representing one input (setting of indep vars to values). The returned predictor fn also takes an N-tuple #1732 --- src/js/trove/multiple-regression.js | 8 ++++---- tests/pyret/tests/test-statistics.arr | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/js/trove/multiple-regression.js b/src/js/trove/multiple-regression.js index ae9300fa85..1484e12791 100644 --- a/src/js/trove/multiple-regression.js +++ b/src/js/trove/multiple-regression.js @@ -132,8 +132,8 @@ let Y = new Array(num_mappings); let x_s_len = false; js_x_s_s.forEach(function(x_s, i) { - runtime.checkList(x_s); - let js_x_s = runtime.ffi.toArray(x_s); + runtime.checkTuple(x_s); + let js_x_s = x_s.vals; let x_s_n = js_x_s.length; if (x_s_len === false) { x_s_len = x_s_n; @@ -156,8 +156,8 @@ let XT = matrixTranspose(X); let B = matrixMultiply2(matrixInverse(matrixMultiply2(XT, X)), matrixMultiply2(XT, Y)); let Bfunc = function(x_s) { - runtime.checkList(x_s); - let js_x_s = runtime.ffi.toArray(x_s); + runtime.checkTuple(x_s); + let js_x_s = x_s.vals; if (js_x_s.length !== x_s_len) { throw runtime.ffi.throwMessageException("predictor: wrong number of arguments"); } diff --git a/tests/pyret/tests/test-statistics.arr b/tests/pyret/tests/test-statistics.arr index 6d6f86441e..b3c227376c 100644 --- a/tests/pyret/tests/test-statistics.arr +++ b/tests/pyret/tests/test-statistics.arr @@ -114,10 +114,10 @@ check "linear regression": end check "multiple regression": - x-s-s = [list: [list: 4], [list: 4.5], [list: 5], [list: 5.5], [list: 6], [list: 6.5], [list: 7]] + x-s-s = [list: {4}, {4.5}, {5}, {5.5}, {6}, {6.5}, {7}] y-s = [list: 33, 42, 45, 51, 53, 61, 62] pf = multiple-regression(x-s-s, y-s) - pf([list: 8]) is%(within-abs(0.001)) 73.3214 + pf({8}) is%(within-abs(0.001)) 73.3214 end From 7501262b2a8ec1459d43d42e6bf8d026cad2ca14 Mon Sep 17 00:00:00 2001 From: Dorai Sitaram Date: Fri, 17 May 2024 13:42:53 -0400 Subject: [PATCH 06/14] statistics.arr: Correct type of multiple-regression() #1732 multiple-regression.js: clean-up w/ better row/col indexing names --- src/arr/trove/statistics.arr | 7 +- src/js/trove/multiple-regression.js | 100 ++++++++++++++-------------- 2 files changed, 55 insertions(+), 52 deletions(-) diff --git a/src/arr/trove/statistics.arr b/src/arr/trove/statistics.arr index 3b9d684279..34a2a5138e 100644 --- a/src/arr/trove/statistics.arr +++ b/src/arr/trove/statistics.arr @@ -210,7 +210,7 @@ fun linear-regression(x :: List, y :: List) -> (Number -> Number end end -fun multiple-regression(x_s_s :: List>, y_s :: List) -> (List -> Number): +fun multiple-regression(x_s_s :: List, y_s :: List) -> (Any -> Number): doc: "multiple-regression" MR.multiple-regression(x_s_s, y_s) end @@ -257,7 +257,10 @@ fun t-test-pooled(l1 :: List, l2 :: List) -> Number: m2 = mean(l2) v1 = variance-sample(l1) v2 = variance-sample(l2) - (m1 - m2) / (((((n1 - 1) * num-expt(v1, 2)) + ((n2 - 1) * num-expt(v2, 2))) / ((n1 + n2) - 2)) * num-sqrt((1 / n1) + (1 / n2))) + v1-squared = num-sqr(v1) + v2-squared = num-sqr(v2) + (m1 - m2) / (((((n1 - 1) * v1-squared) + ((n2 - 1) * v2-squared)) / ((n1 + n2) - 2)) * + num-sqrt((1 / n1) + (1 / n2))) end end diff --git a/src/js/trove/multiple-regression.js b/src/js/trove/multiple-regression.js index 1484e12791..c0b58192c5 100644 --- a/src/js/trove/multiple-regression.js +++ b/src/js/trove/multiple-regression.js @@ -9,22 +9,22 @@ theModule: function(runtime, namespace, uri) { function makeColumnMatrix(y_s) { - let n = y_s.length; - let M = new Array(n); - for (let i = 0; i < n; i++) { - M[i][0] = y_s[i]; + let size = y_s.length; + let M = new Array(size); + for (let r = 0; r < size; r++) { + M[r][0] = y_s[r]; } return M; } function matrixTranspose(m) { - let numrows = m.length; - let numcols = m[0].length; - let mT = new Array(numcols); - for (let i = 0; i < numcols; i++) { - mT[i] = new Array(m); - for (let j = 0; j < numrows; j++) { - mT[i][j] = m[j][i]; + let numRows = m.length; + let numCols = m[0].length; + let mT = new Array(numCols); + for (let r = 0; r < numCols; r++) { + mT[r] = new Array(numRows); + for (let c = 0; c < numRows; c++) { + mT[r][c] = m[c][r]; } } return mT; @@ -45,13 +45,13 @@ return (m[0][0] * m[1][1] - m[1][0] * m[0][1]); } let det = 0; - for (let i = 0; i < size; i++) { - det += m[0][i] * matrixCofactor(m, 0, i); + for (let c = 0; c < size; c++) { + det += m[0][c] * matrixCofactor(m, 0, c); } return det; } - function matrixCofactor(m, i, j) { + function matrixCofactor(m, r, c) { let size = m.length; if (size === 0) { return 0; @@ -60,17 +60,17 @@ return 0; } let mm = new Array(size - 1); - for (let ii = 0; ii < size; ii++) { - if (ii === i) { continue; } - let iii = (ii > i ? ii - 1 : ii) - mm[iii] = new Array(size - 1); - for (let jj = 0; jj < size; jj++) { - if (jj === j) { continue; } - let jjj = (jj > j ? jj - 1 : jj) - mm[iii][jjj] = m[ii][jj]; + for (let ri = 0; ri < size; ri++) { + if (ri === r) { continue; } + let rii = (ri > r ? ri - 1 : ri) + mm[rii] = new Array(size - 1); + for (let ci = 0; ci < size; ci++) { + if (ci === c) { continue; } + let cii = (ci > c ? ci - 1 : ci) + mm[rii][cii] = m[ri][ci]; } } - let sign = (i + j) % 2; + let sign = (r + c) % 2; sign = (sign === 0) ? 1 : -1; return sign * matrixDeterminant(mm) } @@ -78,10 +78,10 @@ function matrixAdjoint(m) { let size = m.length; let mc = new Array(size); - for (let i = 0; i < size; i++) { - mc[i] = new Array(size); - for (let j = 0; j < size; j++) { - mc[i][j] = matrixCofactor(m, i, j); + for (let r = 0; r < size; r++) { + mc[r] = new Array(size); + for (let c = 0; c < size; c++) { + mc[r][c] = matrixCofactor(m, r, c); } } return matrixTranspose(mc) @@ -90,29 +90,29 @@ function matrixInverse(m) { let det = matrixDeterminant(m); let mI = matrixAdjoint(m); - let k = m.length; - for (let i = 0; i < k; i++) { - for (let j = 0; j < k; j++) { - let x = mI[i][j] - mI[i][j] = x/det + let size = m.length; + for (let r = 0; r < size; r++) { + for (let c = 0; c < size; c++) { + let x = mI[r][c] + mI[r][c] = x/det } } return mI; } function matrixMultiply2(m1, m2) { - let m1r = m1.length; - let m1c = m1[0].length; - let m2c = m2[0].length; - let mP = new Array(m1r); - for (let i = 0; i < m1r; i++) { - mP[i] = new Array(m2c); - for (let j = 0; j < m2c; j++) { + let m1numRows = m1.length; + let m1numCols = m1[0].length; // this will be === m2numRows + let m2numCols = m2[0].length; + let mP = new Array(m1numRows); + for (let r = 0; r < m1numRows; r++) { + mP[r] = new Array(m2numCols); + for (let c = 0; c < m2numCols; c++) { let x = 0; - for (let k = 0; k < m1c; k++) { - x += m1[i][k] * m2[k][j]; + for (let k = 0; k < m1numCols; k++) { + x += m1[r][k] * m2[k][c]; } - mP[i][j] = x; + mP[r][c] = x; } } return mP; @@ -131,7 +131,7 @@ let X = new Array(num_mappings); let Y = new Array(num_mappings); let x_s_len = false; - js_x_s_s.forEach(function(x_s, i) { + js_x_s_s.forEach(function(x_s, r) { runtime.checkTuple(x_s); let js_x_s = x_s.vals; let x_s_n = js_x_s.length; @@ -140,17 +140,17 @@ } else if (x_s_n !== x_s_len) { throw runtime.ffi.throwMessageException("multiple-regression: bad mapping"); } - X[i] = new Array(x_s_len + 1) - let Xi = X[i]; - Xi[0] = 1; - js_x_s.forEach(function(x, j) { + X[r] = new Array(x_s_len + 1) + let Xr = X[r]; + Xr[0] = 1; + js_x_s.forEach(function(x, c) { runtime.checkNumber(x); - Xi[j+1] = runtime.num_to_fixnum(x); + Xr[c+1] = runtime.num_to_fixnum(x); }); }); - js_y_s.forEach(function(y, i) { + js_y_s.forEach(function(y, r) { runtime.checkNumber(y); - Y[i] = [runtime.num_to_fixnum(y)]; + Y[r] = [runtime.num_to_fixnum(y)]; }); let XT = matrixTranspose(X); From d341e3392060286432b1fa689b1834f2636e2849 Mon Sep 17 00:00:00 2001 From: Dorai Sitaram Date: Fri, 17 May 2024 14:45:51 -0400 Subject: [PATCH 07/14] test-statistics.arr: #1732 - check mulreg test on 1 var matches our linreg on same var - add mulreg test for 2 vars statistics.arr: add pointers to docs for formulas used --- src/arr/trove/statistics.arr | 6 ++++++ tests/pyret/tests/test-statistics.arr | 19 ++++++++++++++----- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/src/arr/trove/statistics.arr b/src/arr/trove/statistics.arr index 34a2a5138e..933b76f012 100644 --- a/src/arr/trove/statistics.arr +++ b/src/arr/trove/statistics.arr @@ -210,6 +210,8 @@ fun linear-regression(x :: List, y :: List) -> (Number -> Number end end +# please see: https://online.stat.psu.edu/stat462/ + fun multiple-regression(x_s_s :: List, y_s :: List) -> (Any -> Number): doc: "multiple-regression" MR.multiple-regression(x_s_s, y_s) @@ -246,6 +248,10 @@ fun t-test-paired(l1 :: List, l2 :: List) -> Number: end end +# please see: +# https://en.wikipedia.org/wiki/Student's_t-test +# https://www.investopedia.com/terms/t/t-test.asp + fun t-test-pooled(l1 :: List, l2 :: List) -> Number: doc: "t-test-pooled" n1 = l1.length() diff --git a/tests/pyret/tests/test-statistics.arr b/tests/pyret/tests/test-statistics.arr index b3c227376c..b2eb5bb826 100644 --- a/tests/pyret/tests/test-statistics.arr +++ b/tests/pyret/tests/test-statistics.arr @@ -114,11 +114,20 @@ check "linear regression": end check "multiple regression": + # multiple-regression function for single independent variable x-s-s = [list: {4}, {4.5}, {5}, {5.5}, {6}, {6.5}, {7}] y-s = [list: 33, 42, 45, 51, 53, 61, 62] - pf = multiple-regression(x-s-s, y-s) - pf({8}) is%(within-abs(0.001)) 73.3214 + pf1 = multiple-regression(x-s-s, y-s) + pf1({8}) is-roughly 73.3214 + # + # check it matches linear-regression function on the same single variable + x-s = [list: 4, 4.5, 5, 5.5, 6, 6.5, 7] + pf2 = linear-regression(x-s, y-s) + pf2(8) is-roughly 73.3214 + # + # multiple-regression with two independent variables + x-s-s-i = [list: {4; 3}, {4.5; 2}, {5; 1.2}, {5.5; 4.5}, {6; 3.3}, {6.5; 10}, {7; 0}] + y-s-i = [list: 33, 42, 45, 51, 53, 61, 62] + pf-i = multiple-regression(x-s-s-i, y-s-i) + pf-i({8; 9}) is-roughly 74.52888 end - - - From 6f09230cd8fa26dafbb0f8be6f6043d883d0a93a Mon Sep 17 00:00:00 2001 From: Dorai Sitaram Date: Fri, 17 May 2024 14:52:15 -0400 Subject: [PATCH 08/14] multiple-regression.js: gen'd predictor function shd check that all its arg tuple's elts are numbers #1732 --- src/js/trove/multiple-regression.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/js/trove/multiple-regression.js b/src/js/trove/multiple-regression.js index c0b58192c5..6c95bd4916 100644 --- a/src/js/trove/multiple-regression.js +++ b/src/js/trove/multiple-regression.js @@ -163,7 +163,9 @@ } let result = B[0][0]; for (let i = 0; i < x_s_len; i++) { - result += runtime.num_to_fixnum(js_x_s[i]) * B[i+1][0] + let x = js_x_s[i]; + runtime.checkNumber(x); + result += runtime.num_to_fixnum(x) * B[i+1][0] } return runtime.makeNumber(result); } From cb49a3f750971b55ee6f45824bb06c2b81a3dbd3 Mon Sep 17 00:00:00 2001 From: Dorai Sitaram Date: Fri, 17 May 2024 15:01:59 -0400 Subject: [PATCH 09/14] multiple-regression.js: better exception msgs #1732 --- src/js/trove/multiple-regression.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/js/trove/multiple-regression.js b/src/js/trove/multiple-regression.js index 6c95bd4916..2e4740899d 100644 --- a/src/js/trove/multiple-regression.js +++ b/src/js/trove/multiple-regression.js @@ -126,7 +126,7 @@ let js_y_s = runtime.ffi.toArray(y_s); let num_mappings = js_x_s_s.length; if (js_y_s.length !== num_mappings) { - throw runtime.ffi.throwMessageException("multiple-regression: number of mappings incorrect"); + throw runtime.ffi.throwMessageException("multiple-regression: number of inputs doesn't match number of outputs"); } let X = new Array(num_mappings); let Y = new Array(num_mappings); @@ -138,7 +138,7 @@ if (x_s_len === false) { x_s_len = x_s_n; } else if (x_s_n !== x_s_len) { - throw runtime.ffi.throwMessageException("multiple-regression: bad mapping"); + throw runtime.ffi.throwMessageException("multiple-regression: lengths of input tuples are different"); } X[r] = new Array(x_s_len + 1) let Xr = X[r]; From b9ea62d21bac290262e1014855ed765cb1114d86 Mon Sep 17 00:00:00 2001 From: Dorai Sitaram Date: Sun, 19 May 2024 11:50:49 -0400 Subject: [PATCH 10/14] redefine linear-regression() as a special case of multiple-regression() #1732 --- src/arr/trove/statistics.arr | 42 +++++++++++------------------ src/js/trove/multiple-regression.js | 15 ++++++----- 2 files changed, 25 insertions(+), 32 deletions(-) diff --git a/src/arr/trove/statistics.arr b/src/arr/trove/statistics.arr index 933b76f012..4aabb05c58 100644 --- a/src/arr/trove/statistics.arr +++ b/src/arr/trove/statistics.arr @@ -184,32 +184,6 @@ fun stdev-sample(l :: List) -> Number: num-sqrt(variance-sample(l)) end -fun linear-regression(x :: List, y :: List) -> (Number -> Number): - doc: "returns a linear predictor function calculated with ordinary least squares regression" - if x.length() <> y.length(): - raise(E.message-exception("linear-regression: input lists must have equal lengths")) - else if x.length() < 2: - raise(E.message-exception("linear-regression: input lists must have at least 2 elements each")) - else: - shadow y = map(num-to-roughnum, y) - shadow x = map(num-to-roughnum, x) - xpt-xy = math.sum(map2(lam(xi, yi): xi * yi end, x, y)) - xpt-x-xpt-y = (math.sum(x) * math.sum(y)) / x.length() - covariance = xpt-xy - xpt-x-xpt-y - v1 = math.sum(map(lam(n): n * n end, x)) - v2 = (math.sum(x) * math.sum(x)) / x.length() - variance1 = v1 - v2 - beta = covariance / variance1 - alpha = mean(y) - (beta * mean(x)) - - fun predictor(in :: Number) -> Number: - (beta * in) + alpha - end - - predictor - end -end - # please see: https://online.stat.psu.edu/stat462/ fun multiple-regression(x_s_s :: List, y_s :: List) -> (Any -> Number): @@ -217,6 +191,22 @@ fun multiple-regression(x_s_s :: List, y_s :: List) -> (Any -> Numb MR.multiple-regression(x_s_s, y_s) end +fun linear-regression(x-s :: List, y-s :: List) -> (Number -> Number): + doc: "returns a linear predictor function for a single independent variable" + x-s-n = x-s.length() + if x-s-n <> y-s.length(): + raise(E.message-exception("linear-regression: input lists must have equal lengths")) + else if x-s-n < 2: + raise(E.message-exception("linear-regression: input lists must have at least 2 elements each")) + else: + predictor1 = MR.multiple-regression(x-s.map(lam(x1 :: Number): {x1} end), y-s) + fun predictor2(x2 :: Number) -> Number: + predictor1({x2}) + end + predictor2 + end +end + fun r-squared(x :: List, y :: List, f :: (Number -> Number)) -> Number: shadow x = map(num-to-roughnum, x) shadow y = map(num-to-roughnum, y) diff --git a/src/js/trove/multiple-regression.js b/src/js/trove/multiple-regression.js index 2e4740899d..dccd0f55b6 100644 --- a/src/js/trove/multiple-regression.js +++ b/src/js/trove/multiple-regression.js @@ -72,7 +72,7 @@ } let sign = (r + c) % 2; sign = (sign === 0) ? 1 : -1; - return sign * matrixDeterminant(mm) + return sign * matrixDeterminant(mm); } function matrixAdjoint(m) { @@ -84,7 +84,7 @@ mc[r][c] = matrixCofactor(m, r, c); } } - return matrixTranspose(mc) + return matrixTranspose(mc); } function matrixInverse(m) { @@ -93,8 +93,8 @@ let size = m.length; for (let r = 0; r < size; r++) { for (let c = 0; c < size; c++) { - let x = mI[r][c] - mI[r][c] = x/det + let x = mI[r][c]; + mI[r][c] = x/det; } } return mI; @@ -126,7 +126,10 @@ let js_y_s = runtime.ffi.toArray(y_s); let num_mappings = js_x_s_s.length; if (js_y_s.length !== num_mappings) { - throw runtime.ffi.throwMessageException("multiple-regression: number of inputs doesn't match number of outputs"); + throw runtime.ffi.throwMessageException("multiple-regression: lists must have equal lengths"); + if (num_mappings < 2) { + throw runtime.ffi.throwMessageException("multiple-regression: lists must have at least 2 elements each"); + } } let X = new Array(num_mappings); let Y = new Array(num_mappings); @@ -140,7 +143,7 @@ } else if (x_s_n !== x_s_len) { throw runtime.ffi.throwMessageException("multiple-regression: lengths of input tuples are different"); } - X[r] = new Array(x_s_len + 1) + X[r] = new Array(x_s_len + 1); let Xr = X[r]; Xr[0] = 1; js_x_s.forEach(function(x, c) { From 207d18b3410d66ed36285f2e14cb0007e5cc373c Mon Sep 17 00:00:00 2001 From: Dorai Sitaram Date: Fri, 14 Jun 2024 13:02:23 -0400 Subject: [PATCH 11/14] statistics.arr: Corrected t-test, z-test, t-test-pooled Simplified def of t-test-paired, t-test-independent, chi-square (Still think the `test` in these names should be `score` or `value`) --- src/arr/trove/statistics.arr | 86 +++++++++++++++------------ tests/pyret/tests/test-statistics.arr | 7 ++- 2 files changed, 53 insertions(+), 40 deletions(-) diff --git a/src/arr/trove/statistics.arr b/src/arr/trove/statistics.arr index 4aabb05c58..fafcd880da 100644 --- a/src/arr/trove/statistics.arr +++ b/src/arr/trove/statistics.arr @@ -15,10 +15,11 @@ provide { linear-regression: linear-regression, multiple-regression: multiple-regression, r-squared: r-squared, + z-test: z-test, + t-test: t-test, t-test-paired: t-test-paired, t-test-pooled: t-test-pooled, t-test-independent: t-test-independent, - z-test: z-test, chi-square: chi-square } end provide-types * @@ -222,69 +223,78 @@ fun r-squared(x :: List, y :: List, f :: (Number -> Number)) -> end end +fun z-test(sample-list :: List, population-sd :: Number, population-mean :: Number) -> Number: + doc: "z-test" + sample-size = sample-list.length() + sample-mean = mean(sample-list) + sd-of-mean = population-sd / num-sqrt(sample-size) + (sample-mean - population-mean) / sd-of-mean +end + +fun t-test(sample-list :: List, population-mean :: Number) -> Number: + doc: "t-test" + sample-size = sample-list.length() + sample-mean = mean(sample-list) + estimated-population-variance = variance-sample(sample-list) + variance-of-mean = estimated-population-variance / sample-size + (sample-mean - population-mean) / num-sqrt(variance-of-mean) +end + +# please see: +# https://en.wikipedia.org/wiki/Student's_t-test +# https://www.investopedia.com/terms/t/t-test.asp +# (this has a typo for the pooled t-test, corrected here) + fun t-test-paired(l1 :: List, l2 :: List) -> Number: doc: "t-test-paired" - n1 = l1.length() + n = l1.length() n2 = l2.length() - if n1 <> n2: + if n <> n2: raise(E.message-exception("t-test-paired: input lists must have equal lengths")) - else if n1 == 0: + else if n == 0: raise(E.message-exception("t-test-paired: input lists must have at least one element")) else: diffs = map2(lam(x1, x2): x1 - x2 end, l1, l2) diffs-mean = mean(diffs) s-hat = stdev-sample(diffs) - diffs-mean / (s-hat / num-sqrt(n1)) + diffs-mean / (s-hat / num-sqrt(n)) end end -# please see: -# https://en.wikipedia.org/wiki/Student's_t-test -# https://www.investopedia.com/terms/t/t-test.asp - -fun t-test-pooled(l1 :: List, l2 :: List) -> Number: +fun t-test-pooled(sample-list-1 :: List, sample-list-2 :: List) -> Number: doc: "t-test-pooled" - n1 = l1.length() - n2 = l2.length() + n1 = sample-list-1.length() + n2 = sample-list-2.length() if (n1 == 0) or (n2 == 0): raise(E.message-exception("t-test-pooled: input lists must have at least one element")) else: - m1 = mean(l1) - m2 = mean(l2) - v1 = variance-sample(l1) - v2 = variance-sample(l2) - v1-squared = num-sqr(v1) - v2-squared = num-sqr(v2) - (m1 - m2) / (((((n1 - 1) * v1-squared) + ((n2 - 1) * v2-squared)) / ((n1 + n2) - 2)) * - num-sqrt((1 / n1) + (1 / n2))) + m1 = mean(sample-list-1) + m2 = mean(sample-list-2) + v1 = variance-sample(sample-list-1) + v2 = variance-sample(sample-list-2) + v = (((n1 - 1) * v1) + ((n2 - 1) * v2)) / ((n1 + n2) - 2) + (m1 - m2) / num-sqrt((v / n1) + (v / n2)) end end -fun t-test-independent(l1 :: List, l2 :: List) -> Number: +fun t-test-independent(sample-list-1 :: List, sample-list-2 :: List) -> Number: doc: "t-test-independent" - n1 = l1.length() - n2 = l2.length() + n1 = sample-list-1.length() + n2 = sample-list-2.length() if (n1 == 0) or (n2 == 0): raise(E.message-exception("t-test-independent: input lists must have at least one element")) else: - m1 = mean(l1) - m2 = mean(l2) - v1 = variance-sample(l1) - v2 = variance-sample(l2) + m1 = mean(sample-list-1) + m2 = mean(sample-list-2) + v1 = variance-sample(sample-list-1) + v2 = variance-sample(sample-list-2) (m1 - m2) / num-sqrt((v1 / n1) + (v2 / n2)) end end -fun z-test(l1 :: List, l2 :: List, sd1 :: Number, sd2 :: Number) -> Number: - doc: "z-test" - n1 = l1.length() - n2 = l2.length() - x-bar-1 = mean(l1) - x-bar-2 = mean(l2) - (x-bar-1 - x-bar-2) / num-sqrt((num-expt(sd1, 2) / n1) + (num-expt(sd2, 2) / n2)) -end - -fun chi-square(obs :: List, exp :: List) -> Number: +fun chi-square(observed-values :: List, predicted-values :: List) -> Number: doc: "chi-square" - math.sum(map2(lam(o, e): num-expt(o - e, 2) / e end, obs, exp)) + math.sum(map2(lam(observed-value, predicted-value): + num-expt(observed-value - predicted-value, 2) / predicted-value end, + observed-values, predicted-values)) end diff --git a/tests/pyret/tests/test-statistics.arr b/tests/pyret/tests/test-statistics.arr index b2eb5bb826..b2f158ddaf 100644 --- a/tests/pyret/tests/test-statistics.arr +++ b/tests/pyret/tests/test-statistics.arr @@ -69,19 +69,22 @@ check "numeric helpers": stdev-sample([list: 1, 1, 1, 1]) is-roughly ~0 stdev-sample([list:]) raises "empty" + z-test([list: 1, 2, 3], 1.58, 2.58) is%(within(0.01)) -0.6358 + + t-test([list: 1, 2, 3], 2.58) is%(within(0.01)) -1.0046 + t-test-paired([list: 1], [list: 2, 3]) raises "lists must have equal lengths" t-test-paired([list:], [list:]) raises "lists must have at least one element" t-test-paired([list: 1, 2, 3], [list: 4, 6, 8]) is%(within(0.01)) -6.928 t-test-pooled([list:], [list: 1, 2, 3]) raises "lists must have at least one element" t-test-pooled([list: 1, 2, 3], [list: 4, 5, 6]) is%(within(0.01)) -3.674 - t-test-pooled([list: 1, 2, 3], [list: 4, 5, 6, 7]) is%(within(0.01)) -2.217 + t-test-pooled([list: 1, 2, 3], [list: 4, 5, 6, 7]) is%(within(0.01)) -3.873 t-test-independent([list:], [list: 1, 2, 3]) raises "lists must have at least one element" t-test-independent([list: 1, 2, 3], [list: 4, 5, 6]) is%(within(0.01)) -3.674 t-test-independent([list: 1, 2, 3], [list: 4, 5, 6, 7]) is%(within(0.01)) -4.041 - z-test([list: 1, 2, 3], [list: 4, 5, 6], 1.58, 2.58) is%(within(0.01)) -1.718 chi-square([list: 1, 2, 3, 4], [list: 1, 2, 3, 4]) is 0 chi-square([list: 1, 2, 3, 4], [list: 0.9, 1.8, 3.5, 4.7]) is%(within(0.01)) 0.209 From c5326d6394f2e080dab8e69d2bdcfd8c368628a6 Mon Sep 17 00:00:00 2001 From: Dorai Sitaram Date: Thu, 4 Jul 2024 11:20:58 -0400 Subject: [PATCH 12/14] - matrices.arr: Add fun list-to-vector -- method to-vector: typo fix (<> shd be or) - statistics.arr: use vector dot product instead of doing list manip - test-matrices.arr: add test for list-to-vector --- src/arr/trove/matrices.arr | 8 +++++++- src/arr/trove/statistics.arr | 7 ++++--- tests/pyret/tests/test-matrices.arr | 2 ++ 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/arr/trove/matrices.arr b/src/arr/trove/matrices.arr index 3d9293f13b..ba7c97d18c 100644 --- a/src/arr/trove/matrices.arr +++ b/src/arr/trove/matrices.arr @@ -61,6 +61,7 @@ provide: make-matrix, build-matrix, vector-to-matrix, + list-to-vector, list-to-matrix, list-to-row-matrix, list-to-col-matrix, @@ -351,7 +352,7 @@ sharing: method to-vector(self) -> Vector: doc: "Returns the one-row/one-column matrix as a vector" - if ((self.cols == 1) <> (self.rows == 1)): + if ((self.cols == 1) or (self.rows == 1)): vector-of-raw-array(self.elts) else: raise("Cannot convert non-vector matrix to vector") @@ -974,6 +975,11 @@ where: [mk-mtx(1, 3): 1, 2, 3] end +fun list-to-vector(lst :: List): + doc: "Converts the given list of numbers into a vector" + list-to-row-matrix(lst).to-vector() +end + fun list-to-matrix(rows :: NonZeroNat, cols :: NonZeroNat, lst :: List) block: doc: "Converts the given list of numbers into a matrix of the given size" when not(lst.length() == (rows * cols)): diff --git a/src/arr/trove/statistics.arr b/src/arr/trove/statistics.arr index f9d873b1f3..8123559a89 100644 --- a/src/arr/trove/statistics.arr +++ b/src/arr/trove/statistics.arr @@ -226,11 +226,12 @@ end fun multiple-regression(x_s_s :: List>, y_s :: List) -> (Any -> Number): doc: "multiple-regression" - mx_X = MX.lists-to-matrix(x_s_s.map(lam(x_s): x_s.push(1) end)) + intercepts = MX.col-matrix.make(raw-array-of(1, y_s.length())) + mx_X = intercepts.augment(MX.lists-to-matrix(x_s_s)) mx_Y = MX.lists-to-matrix(y_s.map(lam(y): [list: y] end)) - B = MX.mtx-to-list(MX.mtx-least-squares-solve(mx_X, mx_Y)) + B = MX.mtx-to-vector(MX.mtx-least-squares-solve(mx_X, mx_Y)) fun B_pred_fn(x_s :: List) -> Number: - math.sum(map2(lam(b_i, x_i): b_i * x_i end, B, x_s.push(1))) + B.dot(MX.list-to-vector(x_s.push(1))) end B_pred_fn end diff --git a/tests/pyret/tests/test-matrices.arr b/tests/pyret/tests/test-matrices.arr index 7752cb7ae7..12b5685c99 100644 --- a/tests/pyret/tests/test-matrices.arr +++ b/tests/pyret/tests/test-matrices.arr @@ -57,6 +57,8 @@ check "Vector Operations": M.vec-normalize([vector: 1, 2, 3]) is%(vector-within(0.001)) [vector: (1 / num-sqrt(14)), (2 / num-sqrt(14)), (3 / num-sqrt(14))] + + M.list-to-vector([list: 1, 2, 3]) is [vector: 1, 2, 3] end check "Basic Matrix Accessors": From bddb03f9408c4da48c070002dd3a75c538288625 Mon Sep 17 00:00:00 2001 From: Dorai Sitaram Date: Thu, 4 Jul 2024 23:50:50 -0400 Subject: [PATCH 13/14] efficiencies suggested by @blerner added --- src/arr/trove/matrices.arr | 2 +- src/arr/trove/statistics.arr | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/arr/trove/matrices.arr b/src/arr/trove/matrices.arr index ba7c97d18c..01415df976 100644 --- a/src/arr/trove/matrices.arr +++ b/src/arr/trove/matrices.arr @@ -977,7 +977,7 @@ end fun list-to-vector(lst :: List): doc: "Converts the given list of numbers into a vector" - list-to-row-matrix(lst).to-vector() + vector(raw-array-from-list(lst)) end fun list-to-matrix(rows :: NonZeroNat, cols :: NonZeroNat, lst :: List) block: diff --git a/src/arr/trove/statistics.arr b/src/arr/trove/statistics.arr index 8123559a89..26e667e138 100644 --- a/src/arr/trove/statistics.arr +++ b/src/arr/trove/statistics.arr @@ -228,7 +228,7 @@ fun multiple-regression(x_s_s :: List>, y_s :: List) -> (An doc: "multiple-regression" intercepts = MX.col-matrix.make(raw-array-of(1, y_s.length())) mx_X = intercepts.augment(MX.lists-to-matrix(x_s_s)) - mx_Y = MX.lists-to-matrix(y_s.map(lam(y): [list: y] end)) + mx_Y = MX.col-matrix.make(raw-array-from-list(y_s)) B = MX.mtx-to-vector(MX.mtx-least-squares-solve(mx_X, mx_Y)) fun B_pred_fn(x_s :: List) -> Number: B.dot(MX.list-to-vector(x_s.push(1))) From d36171ff510cba6d68c1de03fbb4d7894ee94a57 Mon Sep 17 00:00:00 2001 From: Dorai Sitaram Date: Fri, 5 Jul 2024 10:40:53 -0400 Subject: [PATCH 14/14] - statistics.arr: add more descriptive doc strings -- multiple-regrssion output type made more specific -- multiple-regression: predictor fn raises exception on wrong # of args -- chi-square: simplify -- use simpler is-roughly rather than is%(within(0.01)) - test-statistics.arr: add check for mult reg predictor arg# mismatch - test-matrices.arr: add additional tests for .to-vector() --- src/arr/trove/statistics.arr | 33 ++++++++++++++++----------- tests/pyret/tests/test-matrices.arr | 2 ++ tests/pyret/tests/test-statistics.arr | 17 +++++++------- 3 files changed, 31 insertions(+), 21 deletions(-) diff --git a/src/arr/trove/statistics.arr b/src/arr/trove/statistics.arr index 26e667e138..98f65a0294 100644 --- a/src/arr/trove/statistics.arr +++ b/src/arr/trove/statistics.arr @@ -224,20 +224,26 @@ end # please see: https://online.stat.psu.edu/stat462/ -fun multiple-regression(x_s_s :: List>, y_s :: List) -> (Any -> Number): - doc: "multiple-regression" +fun multiple-regression(x_s_s :: List>, y_s :: List) -> (List -> Number): + doc: "returns a predictor function given a list of list of independent inputs and the correspoinding list of outputs" intercepts = MX.col-matrix.make(raw-array-of(1, y_s.length())) mx_X = intercepts.augment(MX.lists-to-matrix(x_s_s)) mx_Y = MX.col-matrix.make(raw-array-from-list(y_s)) B = MX.mtx-to-vector(MX.mtx-least-squares-solve(mx_X, mx_Y)) + B_n = B.length() - 1 fun B_pred_fn(x_s :: List) -> Number: - B.dot(MX.list-to-vector(x_s.push(1))) + x_s_n = x_s.length() + if B_n <> x_s_n: + raise(E.message-exception("multiple-regression: the regression expected " + tostring(B_n) + " inputs, but received " + tostring(x_s_n) + " instead")) + else: + B.dot(MX.list-to-vector(x_s.push(1))) + end end B_pred_fn end fun linear-regression(x-s :: List, y-s :: List) -> (Number -> Number): - doc: "returns a linear predictor function for a single independent variable" + doc: "returns a linear predictor function given a list of single inputs and the corresponding list of outputs" x-s-n = x-s.length() if x-s-n <> y-s.length(): raise(E.message-exception("linear-regression: input lists must have equal lengths")) @@ -253,6 +259,7 @@ fun linear-regression(x-s :: List, y-s :: List) -> (Number -> Nu end fun r-squared(x :: List, y :: List, f :: (Number -> Number)) -> Number: + doc: "given a list of inputs, a list of outputs, and a model predictor, finds the r-squared of the model" shadow x = map(num-to-roughnum, x) shadow y = map(num-to-roughnum, y) y-mean = mean(y) @@ -268,7 +275,7 @@ fun r-squared(x :: List, y :: List, f :: (Number -> Number)) -> end fun z-test(sample-list :: List, population-sd :: Number, population-mean :: Number) -> Number: - doc: "z-test" + doc: "given a sample and the population SD and mean, find the z-score of the sample's mean" sample-size = sample-list.length() sample-mean = mean(sample-list) sd-of-mean = population-sd / num-sqrt(sample-size) @@ -276,7 +283,7 @@ fun z-test(sample-list :: List, population-sd :: Number, population-mean :: Numb end fun t-test(sample-list :: List, population-mean :: Number) -> Number: - doc: "t-test" + doc: "given a sample and the population mean, find the t-score of the sample's mean" sample-size = sample-list.length() sample-mean = mean(sample-list) estimated-population-variance = variance-sample(sample-list) @@ -290,7 +297,7 @@ end # (this has a typo for the pooled t-test, corrected here) fun t-test-paired(l1 :: List, l2 :: List) -> Number: - doc: "t-test-paired" + doc: "given two paired samples, find the t-score of the difference of their means" n = l1.length() n2 = l2.length() if n <> n2: @@ -306,7 +313,7 @@ fun t-test-paired(l1 :: List, l2 :: List) -> Number: end fun t-test-pooled(sample-list-1 :: List, sample-list-2 :: List) -> Number: - doc: "t-test-pooled" + doc: "given two independent samples of different sizes or variances, find the t-score of the difference of their means" n1 = sample-list-1.length() n2 = sample-list-2.length() if (n1 == 0) or (n2 == 0): @@ -322,7 +329,7 @@ fun t-test-pooled(sample-list-1 :: List, sample-list-2 :: List) -> Number: end fun t-test-independent(sample-list-1 :: List, sample-list-2 :: List) -> Number: - doc: "t-test-independent" + doc: "given two independent samples of similar size or variance, find the t-score of the difference of their means" n1 = sample-list-1.length() n2 = sample-list-2.length() if (n1 == 0) or (n2 == 0): @@ -337,8 +344,8 @@ fun t-test-independent(sample-list-1 :: List, sample-list-2 :: List) -> Number: end fun chi-square(observed-values :: List, predicted-values :: List) -> Number: - doc: "chi-square" - math.sum(map2(lam(observed-value, predicted-value): - num-expt(observed-value - predicted-value, 2) / predicted-value end, - observed-values, predicted-values)) + doc: "given a list of observed and predicted values, returns the chi-square statistic" + for fold2(sum from 0, obs from observed-values, pred from predicted-values): + sum + (num-sqr(obs - pred) / pred) + end end diff --git a/tests/pyret/tests/test-matrices.arr b/tests/pyret/tests/test-matrices.arr index 12b5685c99..7a25792abb 100644 --- a/tests/pyret/tests/test-matrices.arr +++ b/tests/pyret/tests/test-matrices.arr @@ -97,6 +97,8 @@ check "Basic Matrix Accessors": M.mtx-row(mtx1, 0).to-vector() is [vector: 1, 2, 3] M.mtx-col(mtx1, 1).to-vector() is [vector: 2, 5] + [row-matrix: 1, 2, 3].to-vector() is [vector: 1, 2, 3] + [col-matrix: 1, 2, 3].to-vector() is [vector: 1, 2, 3] end check "Submatrices": diff --git a/tests/pyret/tests/test-statistics.arr b/tests/pyret/tests/test-statistics.arr index 13e685ea2e..79581ba2df 100644 --- a/tests/pyret/tests/test-statistics.arr +++ b/tests/pyret/tests/test-statistics.arr @@ -69,25 +69,25 @@ check "numeric helpers": stdev-sample([list: 1, 1, 1, 1]) is-roughly ~0 stdev-sample([list:]) raises "empty" - z-test([list: 1, 2, 3], 1.58, 2.58) is%(within(0.01)) -0.6358 + z-test([list: 1, 2, 3], 1.58, 2.58) is-roughly ~-0.635816119 - t-test([list: 1, 2, 3], 2.58) is%(within(0.01)) -1.0046 + t-test([list: 1, 2, 3], 2.58) is-roughly ~-1.004589468 t-test-paired([list: 1], [list: 2, 3]) raises "lists must have equal lengths" t-test-paired([list:], [list:]) raises "lists must have at least one element" - t-test-paired([list: 1, 2, 3], [list: 4, 6, 8]) is%(within(0.01)) -6.928 + t-test-paired([list: 1, 2, 3], [list: 4, 6, 8]) is-roughly ~-6.928203230 t-test-pooled([list:], [list: 1, 2, 3]) raises "lists must have at least one element" - t-test-pooled([list: 1, 2, 3], [list: 4, 5, 6]) is%(within(0.01)) -3.674 - t-test-pooled([list: 1, 2, 3], [list: 4, 5, 6, 7]) is%(within(0.01)) -3.873 + t-test-pooled([list: 1, 2, 3], [list: 4, 5, 6]) is%(within(0.01)) -3.674234614 + t-test-pooled([list: 1, 2, 3], [list: 4, 5, 6, 7]) is-roughly ~-3.872983346 t-test-independent([list:], [list: 1, 2, 3]) raises "lists must have at least one element" - t-test-independent([list: 1, 2, 3], [list: 4, 5, 6]) is%(within(0.01)) -3.674 - t-test-independent([list: 1, 2, 3], [list: 4, 5, 6, 7]) is%(within(0.01)) -4.041 + t-test-independent([list: 1, 2, 3], [list: 4, 5, 6]) is-roughly ~-3.674234614 + t-test-independent([list: 1, 2, 3], [list: 4, 5, 6, 7]) is-roughly ~-4.041451884 chi-square([list: 1, 2, 3, 4], [list: 1, 2, 3, 4]) is 0 - chi-square([list: 1, 2, 3, 4], [list: 0.9, 1.8, 3.5, 4.7]) is%(within(0.01)) 0.209 + chi-square([list: 1, 2, 3, 4], [list: 0.9, 1.8, 3.5, 4.7]) is-roughly ~0.2090172239 end check "polymorphic modes": @@ -136,6 +136,7 @@ check "multiple regression": y-s = [list: 33, 42, 45, 51, 53, 61, 62] pf1 = multiple-regression(x-s-s, y-s) pf1([list: 8]) is-roughly 73.3214 + pf1([list: 8, 9]) raises "received" # # check it matches linear-regression function on the same single variable x-s = [list: 4, 4.5, 5, 5.5, 6, 6.5, 7]