From d83b89b7d308a5435d467d4aefa1aff10d57caa9 Mon Sep 17 00:00:00 2001 From: Simon Garnier Date: Wed, 7 Aug 2024 18:18:32 +0200 Subject: [PATCH] First pass --- .Rbuildignore | 5 + .github/.gitignore | 1 + .github/workflows/R-CMD-check.yaml | 52 ++++++++ .github/workflows/pkgdown.yaml | 50 ++++++++ .github/workflows/test-coverage.yaml | 61 ++++++++++ .gitignore | 2 + DESCRIPTION | 10 +- NAMESPACE | 4 + R/RcppExports.R | 33 ++++++ R/wcec-package.R | 57 +++++++++ R/wcec.R | 171 +++++++++++++++++++++++++++ README.md | 31 ++++- _pkgdown.yml | 4 + data/four_Gaussians.rda | Bin 0 -> 6289 bytes data/three_Gaussians.rda | Bin 0 -> 4757 bytes man/dot-wcov.Rd | 35 ++++++ man/four_Gaussians.Rd | 15 +++ man/three_Gaussians.Rd | 15 +++ man/wcec.Rd | 35 ++++++ src/.gitignore | 3 + src/Makevars | 8 ++ src/Makevars.win | 8 ++ src/RcppExports.cpp | 35 ++++++ src/fast.cpp | 47 ++++++++ 24 files changed, 680 insertions(+), 2 deletions(-) create mode 100644 .github/.gitignore create mode 100644 .github/workflows/R-CMD-check.yaml create mode 100644 .github/workflows/pkgdown.yaml create mode 100644 .github/workflows/test-coverage.yaml create mode 100644 R/RcppExports.R create mode 100644 R/wcec-package.R create mode 100644 R/wcec.R create mode 100644 _pkgdown.yml create mode 100644 data/four_Gaussians.rda create mode 100644 data/three_Gaussians.rda create mode 100644 man/dot-wcov.Rd create mode 100644 man/four_Gaussians.Rd create mode 100644 man/three_Gaussians.Rd create mode 100644 man/wcec.Rd create mode 100644 src/.gitignore create mode 100644 src/Makevars create mode 100644 src/Makevars.win create mode 100644 src/RcppExports.cpp create mode 100644 src/fast.cpp diff --git a/.Rbuildignore b/.Rbuildignore index 8fa460c..0d639f6 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,2 +1,7 @@ ^wcec\.Rproj$ ^\.Rproj\.user$ +^_pkgdown\.yml$ +^docs$ +^pkgdown$ +^\.github$ +^sandbox$ diff --git a/.github/.gitignore b/.github/.gitignore new file mode 100644 index 0000000..2d19fc7 --- /dev/null +++ b/.github/.gitignore @@ -0,0 +1 @@ +*.html diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml new file mode 100644 index 0000000..d46a617 --- /dev/null +++ b/.github/workflows/R-CMD-check.yaml @@ -0,0 +1,52 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: R-CMD-check.yaml + +permissions: read-all + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: macos-latest, r: 'release'} + - {os: windows-latest, r: 'release'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'release'} + - {os: ubuntu-latest, r: 'oldrel-1'} + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: true + build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml new file mode 100644 index 0000000..4bbce75 --- /dev/null +++ b/.github/workflows/pkgdown.yaml @@ -0,0 +1,50 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + release: + types: [published] + workflow_dispatch: + +name: pkgdown.yaml + +permissions: read-all + +jobs: + pkgdown: + runs-on: ubuntu-latest + # Only restrict concurrency for non-PR jobs + concurrency: + group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + permissions: + contents: write + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::pkgdown, local::. + needs: website + + - name: Build site + run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) + shell: Rscript {0} + + - name: Deploy to GitHub pages 🚀 + if: github.event_name != 'pull_request' + uses: JamesIves/github-pages-deploy-action@v4.5.0 + with: + clean: false + branch: gh-pages + folder: docs diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml new file mode 100644 index 0000000..9882260 --- /dev/null +++ b/.github/workflows/test-coverage.yaml @@ -0,0 +1,61 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: test-coverage.yaml + +permissions: read-all + +jobs: + test-coverage: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::covr, any::xml2 + needs: coverage + + - name: Test coverage + run: | + cov <- covr::package_coverage( + quiet = FALSE, + clean = FALSE, + install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") + ) + covr::to_cobertura(cov) + shell: Rscript {0} + + - uses: codecov/codecov-action@v4 + with: + fail_ci_if_error: ${{ github.event_name != 'pull_request' && true || false }} + file: ./cobertura.xml + plugin: noop + disable_search: true + token: ${{ secrets.CODECOV_TOKEN }} + + - name: Show testthat output + if: always() + run: | + ## -------------------------------------------------------------------- + find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true + shell: bash + + - name: Upload test results + if: failure() + uses: actions/upload-artifact@v4 + with: + name: coverage-test-failures + path: ${{ runner.temp }}/package diff --git a/.gitignore b/.gitignore index accfb7b..5bbbb81 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,5 @@ .httr-oauth .DS_Store .quarto +docs +sandbox diff --git a/DESCRIPTION b/DESCRIPTION index 5e1cb57..117164f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -10,5 +10,13 @@ License: `use_mit_license()`, `use_gpl3_license()` or friends to pick a Encoding: UTF-8 Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.2 -URL: https://github.com/swarm-lab/wcec +URL: https://github.com/swarm-lab/wcec, https://swarm-lab.github.io/wcec/ BugReports: https://github.com/swarm-lab/wcec/issues +LazyData: true +LinkingTo: + Rcpp, + RcppEigen +Imports: + Rcpp, + RcppEigen, + Rfast diff --git a/NAMESPACE b/NAMESPACE index 6ae9268..42825af 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,2 +1,6 @@ # Generated by roxygen2: do not edit by hand +export(wcec) +import(RcppEigen) +importFrom(Rcpp,evalCpp) +useDynLib(wcec, .registration = TRUE) diff --git a/R/RcppExports.R b/R/RcppExports.R new file mode 100644 index 0000000..2f49a37 --- /dev/null +++ b/R/RcppExports.R @@ -0,0 +1,33 @@ +# Generated by using Rcpp::compileAttributes() -> do not edit by hand +# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +#' @name .wcov +#' +#' @title Weighted Covariance Matrix +#' +#' @description \code{wcov} computes the estimates of the weighted covariance +#' matrix and the weighted mean of the data. +#' +#' @param x A matrix with \eqn{m} columns and \eqn{n} rows, where each column +#' represents a different variable and each row a different observation. +#' +#' @param w A non-negative and non-zero vector of weights for each observation. +#' Its length must equal the number of rows of x. +#' +#' @return A list with two components: +#' \itemize{ +#' \item \code{center}: an estimate for the center (mean) of the data. +#' \item \code{cov}: the estimated (weighted) covariance matrix. +#' } +#' +#' @author Simon Garnier, \email{garnier@@njit.edu} +#' +#' @examples +#' m <- matrix(c(rnorm(500, 6), rnorm(500, 11, 3)), ncol = 2) +#' w <- runif(500) +#' gravitree:::.wcov(m, w) +#' +.wcov <- function(x, w) { + .Call('_wcec_wcov', PACKAGE = 'wcec', x, w) +} + diff --git a/R/wcec-package.R b/R/wcec-package.R new file mode 100644 index 0000000..a06bcaa --- /dev/null +++ b/R/wcec-package.R @@ -0,0 +1,57 @@ +#' wcec: ... +#' +#' The \code{wcec} package provides ... +#' +#' @author Simon Garnier +#' @author Jason Graham +#' +#' Maintainer: Simon Garnier +#' +#' @details +#' \tabular{ll}{ +#' Package: \tab wcec\cr +#' Type: \tab Package\cr +#' Version: \tab 0.1\cr +#' Date: \tab 2024-08-07\cr +#' License: \tab GPL-3\cr +#' } +#' +#' @name wcec +#' @import RcppEigen +#' @importFrom Rcpp evalCpp +#' @useDynLib wcec, .registration = TRUE +"_PACKAGE" + + +#' @title Four Gaussian Clusters +#' +#' @description Matrix of 2-dimensional points forming four Gaussian clusters. +#' +#' @name four_Gaussians +#' +#' @docType data +#' +#' @keywords datasets +#' +#' @examples +#' data(four_Gaussians) +#' plot(four_Gaussians, pch = 19) +#' +NULL + + +#' @title Three Gaussian Clusters +#' +#' @description Matrix of 2-dimensional points forming three Gaussian clusters. +#' +#' @name three_Gaussians +#' +#' @docType data +#' +#' @keywords datasets +#' +#' @examples +#' data(three_Gaussians) +#' plot(three_Gaussians, pch = 19) +#' +NULL diff --git a/R/wcec.R b/R/wcec.R new file mode 100644 index 0000000..d97d38b --- /dev/null +++ b/R/wcec.R @@ -0,0 +1,171 @@ +.init_density <- function(x, k, w) { + n_blocks <- k + 2 + block_list <- apply(x, 2, function(y) { + breaks <- seq( + from = min(y, na.rm = TRUE), + to = max(y, na.rm = TRUE), + length.out = n_blocks + ) + cut(y, breaks, labels = FALSE, include.lowest = TRUE, right = FALSE) + }, simplify = FALSE) + fq <- table(block_list) + ord <- order(fq, decreasing = TRUE) + idx <- arrayInd(ord, dim(fq), dimnames(fq)) + init <- matrix(NA, nrow = k, ncol = ncol(x)) + blocks <- simplify2array(block_list) + + for (i in seq_len(k)) { + ix <- Rfast::rowAll(sweep(blocks, 2, idx[1, ], "==")) + init[i, ] <- apply(x[ix, , drop = FALSE], 2, weighted.mean, w = w[ix]) + d <- Rfast::rowMaxs(sweep(idx, 2, idx[1, ])^2, TRUE) + idx <- idx[d > sqrt(n_blocks), , drop = FALSE] + } + + Rfast::rowMins( + Rfast::dista(x, init) + ) +} + +.array_split <- function(x, k) { + nr <- nrow(x) + n <- rep(floor(nr / k), k) + r <- nr - sum(n) + + for (i in seq_len(r)) { + n[i] <- n[i] + 1 + } + + end <- cumsum(n) + start <- c(1, end[1:(k - 1)] + 1) + + l <- lapply(1:k, function(i) { + x[start[i]:end[i], , drop = FALSE] + }) + names(l) <- paste0(start, "-", end) + l +} + +.init_sharding <- function(x, k) { + ord <- order(Rfast::rowsums(x)) + xsplit <- .array_split(x[ord, ], k) + Rfast::rowMins( + Rfast::dista(x, t(sapply(xsplit, Rfast::colmeans))) + ) +} + +.init_clusters <- function(x, k, w, method) { + n <- nrow(x) + + if (n != length(w)) { + stop("The number of elements in `w` is not the same as the number of rows in `x`.") + } + + if (length(k) > 1) { + if (n != length(k)) { + stop("The number of elements in `k` is not the same as the number of rows in `x`.") + } + + if (!all((floor(k) - k) == 0)) { + stop("Not all values in `k` are integers.") + } + + suk <- sort(unique(k)) + lookup <- cbind(suk, as.numeric(as.factor(suk))) + match(k, lookup) + } else if (length(k) == 1) { + if (k > 1) { + if (method == "sharding") { + .init_sharding(x, k) + } else if (method == "density") { + .init_density(x, k, w) + } else if (method == "random") { + sample(1:k, n, TRUE) + } else { + stop("Invalid initialization method.") + } + } else { + rep(1, n) + } + } else { + stop("Invalid 'k'.") + } +} + +.ce_gaussian <- function(n, cov) { + (n * log(2.0 * pi * exp(1)) + log(det(cov))) / 2 +} + +.cost_gaussian <- function(p, n, params) { + sum( + sapply(1:length(p), function(i) { + .ce_gaussian(p[i] * n, params[[i]]$cov) + }) + ) +} + +.dist_gaussian <- function(x, p, mu, sigma) { + -log(p) - log(mvtnorm::dmvnorm(x, mean = mu, sigma = sigma, checkSymmetry = FALSE)) +} + +.wcec <- function(x, k, w, iter_max) { + n <- nrow(x) + out <- list( + clustering = k, + probability = table(k) / n, + params = lapply(unique(k), function(clust) { + idx <- k == clust + wcec:::.wcov( + x[idx, , drop = FALSE], + w[idx] + ) + }), + iter = 0 + ) + d <- matrix(Inf, nrow = n, ncol = length(out$params)) + out$cost <- wcec:::.cost_gaussian(out$probability, n, out$params) + + for (i in 1:iter_max) { + for (j in seq_along(out$params)) { + d[, j] <- wcec:::.dist_gaussian(x, out$probability[j], out$params[[j]]$center, out$params[[j]]$cov) + } + + new_clustering <- Rfast::rowMins(d) + new_clustering <- match(new_clustering, sort(unique(new_clustering))) + test <- all(new_clustering == out$clustering) + out$clustering <- new_clustering + + if (test == TRUE) { + break + } else { + out$params <- lapply(unique(out$clustering), function(clust) { + idx <- out$clustering == clust + wcec:::.wcov( + x[idx, , drop = FALSE], + w[idx] + ) + }) + + d <- d * Inf + out$probability <- table(out$clustering) / n + out$cost <- wcec:::.cost_gaussian(out$probability, n, out$params) + } + } + + out$iter <- i + out +} + +#' @export +wcec <- function( + x, k, w = rep(1 / nrow(x), nrow(x)), iter_max = 10, + init_method = "sharding") { + # CHECKS + + # INITIALIZATION + k <- .init_clusters(x, k, w, method = init_method) + + # FIRST PASS + out <- .wcec(x = x, k = k, w = w, iter_max = iter_max) + + out +} diff --git a/README.md b/README.md index 779d4d7..7061fbe 100644 --- a/README.md +++ b/README.md @@ -1 +1,30 @@ -# wcec \ No newline at end of file + +# wcec + + +[![CRAN status](https://www.r-pkg.org/badges/version/wcec)](https://CRAN.R-project.org/package=wcec) +[![R-CMD-check](https://github.com/swarm-lab/wcec/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/swarm-lab/wcec/actions/workflows/R-CMD-check.yaml) +[![Codecov test coverage](https://codecov.io/gh/swarm-lab/wcec/graph/badge.svg)](https://app.codecov.io/gh/swarm-lab/wcec) +[![test-coverage](https://github.com/swarm-lab/wcec/actions/workflows/test-coverage.yaml/badge.svg)](https://github.com/swarm-lab/wcec/actions/workflows/test-coverage.yaml) + + +The goal of wcec is to ... + +## Installation + +You can install the development version of wcec from [GitHub](https://github.com/) with: + +``` r +# install.packages("pak") +pak::pak("swarm-lab/wcec") +``` + +## Example + +This is a basic example which shows you how to solve a common problem: + +``` r +library(wcec) +## basic example code +``` + diff --git a/_pkgdown.yml b/_pkgdown.yml new file mode 100644 index 0000000..60eea50 --- /dev/null +++ b/_pkgdown.yml @@ -0,0 +1,4 @@ +url: https://swarm-lab.github.io/wcec/ +template: + bootstrap: 5 + diff --git a/data/four_Gaussians.rda b/data/four_Gaussians.rda new file mode 100644 index 0000000000000000000000000000000000000000..2a95fb66ce1685bac9a8858af11b696113ff635d GIT binary patch literal 6289 zcmV;C7;fhuiwFP!0000016^5ZG}T|!zQ$|3?#0EO2Sa5n8B+LdN=O+}Y0#jgQc=p# zfDDOHNF+n1l!RnTDkqW%A)-OZl&HUX9^U(|_uKnnuYLAe>+H4nv-aBiIp^%$vUk1M zUNIaFCx8ED-MhAGY{20J`FRE(P7o)`(*zfHPmjaqPM#MooN{uzfWz?< zc&FG{U+tgwRRYEpT~0M=)~^<^@!*=Ur&a~nXn^k2IkPZqP(iOiP|2y{-5>RYVNrz5 zoi|91O1pr~tT{^xy)A+L-5vk7H%1(U!I6yqcgY-GAtVqkaR3l*}IGl*=3&Fo@0ZJ4b1JB zeUgOr++*%8y3m2Ox!5bYS0!Of2i0rve(Jz4!ET#IK3I36_Rnqi?qPr2(#=i_zQQKz z!mOGiPl8b7;j4!SZeX+C>ctlwyDOG&NJrW8{FNp|+}x3R68p_Z|8P+^1^8ZAd%Mlu z!j^Ei8arhdv1Xar2?^q3?PzO+n|dQF8az$d0!arH{jlwf+J&v%qfKX_bkM9v%4za!S~OXpZPQ zL)nU9vj()>R@B-za9_g>0uLUsn;GDpRV}kG3;Vs;f!^D zke&%yh^wg9ix~U;^BFd=(@5jWYiVrU=2k1;C3;1{;6tNDl_+fD#SzNNaxykM!u*}G zmWz#f4v&L!-+1(XAtF=A^xQK!19`*AeJ_Ld;@zj7n z^B{gqUxz-)#s*7tyYy}sfv^lmps4f})-C2wHYx4K#`;c5r|2nTfA1f|Q|z0tN$cAV zy(LcAydX`hG%*QrUB zJM(tL6;X+wmA>ZiBhH(hO^2^sM$Cp=LuUQo5#y}oz`>r6h_I?;v&Rz-lD|pa*laI` z=v&{9-tkICvR)ZAIrp}PTb{)jehig}KxIXg4=L{kyjHQ`_?;vN zhar~5ROPmP1VkMi**S@8LUf@Y>uh^F5eL1#zUz80qO0>$L@p^HE?imF`ky;eFr`SG zdv*%3x4NECkGqA~37T^KZF>;=?#IF|jVZ(lOu5Qxa6wXeLhpLal@Q}iVN2`WJfisK zH@^O!g4kGh?LUPQ4(<5^)5Tly^f{Ka*Vm9~N$`uzNw$|&WfBm|M;+JYxv#=S_ zk}BV`3Um=oM-l%wo@Y-uw7l~4MpVb`+8Hv-kEbvZVLf1zjErQo1P-s+xy-l$wbHw#S^VQjvH5K?uHX=&G?3E`jvqmaJ%Je)Pb49ElPf;!f89h3Ok6GI zfGlG3duQJvzvAKW=f_$`IFgaSU3xOt7jd?X81=+t%XIVA_Hq$K^V?@H;U9{WC2llVXD1*wqaehrdNzf1peBt-S1ZLW1L6?9wyOB^=JSX?+HTradLD7g49|EQEFnr;;`+`WQ$z_|^wO>( zB8G|OLRG{(l8?~U{HIcb7@It6>zwlut;zGMm5K-A#%;fIo#>2cd@Th(ejNgm#e?2~ z833x%)k^&&1)x+}t-KBs1hU`8b@5;gB&+v{&QT#CBslzKv(*ex4o!5`ir)i#uHAnM zl8PYq)lASguoBoVlx^m!B_OSqZa;3U0`fk$2hIe!fYg-)wT#O*fW}v-W)go07}g3K z%?_A@jNt~$#xu`A+P>c5#>TV21k8CZDhX)MM!!1h^aFQAP=s=?5vY<)(;j_)K;D|| zbt|t0@Ol{|N^0TwqUJv#IP4}_r6@~;z+$cF9ZP5-g|7<0why@(q$>6>Eyu9=9 z9cOcjeF}9ma8ot>%3Je5G(yiUf58+e2W*(XEdB!NNn5B{{<-} zJs*KhUO>nYW0bL}!0JADO!dK2ARbWb{ulQZWOuB}e4}$2$cGFEOWN*%?8NE}&PP6w zFwT6js!ajda-JPd7h{1^^X1Rsl01-4okh|6qd;hv`Xkt^$&nF(WFs z2t=DMsPC$<14?tJ)J{V#h@-0YeY-XR@zzh9%Ft9`?x|^7Y?=gy-#fd#!kIwpd6xgu z;vI<7bZQ1T*Fa)%lPE=K1Z3~}-C4R33kt2b&DuINfpk%OY|vT^otb&5=4f`Jox@epR60i2>rY7DQg$b0Ka zwVY*Ozjt>tA3FzhhtwCQFZKgtvcbXqxhk(dw{PQ@5e7E*pvO#?8_<-mXn3Xz0_$Mw zRHQ~c(9fOK*{byuI6j1~z+_`kHu|qv>#7+riT^T9j2wX(y}dENi~)@Orxlk4UIC-n z-(z>>7EtmDK)-h8-L|{sK;LZde4$AiR8j*J!>$(rNpnr;+K(|ntG(&E`erPs2)?@( zeDo%8n>uGFXpNw(b6|tbz6f9i#pZr)umx7ZQ>~}~3^CL0qoJ5 zhZT0GK_LtblimW*OMH{P1v$Vu6yp@4bq01!!I+Z#Azpqzbr>QXV2`An zPjsY{b^jngQ9w=&gj!Cs_0M6aW z+nk`SAXTVt-tZ|H%nQ+flOL0a&^3{g<&3^oxu!2*>> zy%A++U4U`Ccbd<%9^|v{92AK74NBPwH-Xga-Dk-P@ADm4j(=^_@U6 zkn(&`cnC!9t%;ny5dehdBSuzbCqag9B&lLP2Z*6>gRhPffEXw`{B(F3n-<2M3U>Gb zc*&jU&R*qlBzPk z)?teccA4h+^&npNN9y0bY2bS@bR%Si1%hblS6bFM5UXwUer|IEeDlZH=lQ*WXNQwl zj{XIa5EkvfyDC6-#!LsaG=OA6k$Jb9fPMGx#}0Oe0v;!JS;=56Hol#mDZT87Ev($@ zwT~(Ufxiy7U2?jydC3AR=T=)F;zWrs#|8fQl1o*>CP0*LKk4J<3W6TF*jAn#KTnA}|0>LVn&m$udh*+f1c@iC`Cat z-CaQJBIj?`eg-14`n{J(P9U&O|7Vcxer(CBVBykJ6;NPhIWL`Hg8T&OCcV9=qBlre zGSQVc7h}K2bIYCq(FXsvN$mxYif`R&4YL3tBSPk#r77&g2356l#!c9G`71}#q1_ep zuJ2U${_F$cEoV1}+;Rh)FU&NuQ?Nd#uUa|-McABOy!zuOTR~Xx$8?pj3gC+)DyqCc z0ipO8jy{KUU0%+f(gGNUI&;2mrZIl9H2f{_W zC*^>AlpB+H^%{uvzxKhcj|cHLg~B^nEg*cRHB=^`28cU)-#)JF1DOl+KepbN#b$Fl zwd=z^0Ac^liTAE@AZoo&`+0OY@M#_0+}>FVxP#2tZz_WzF+O7XM_U@3N;@E?*iHpO z@xQH~@_6%BGCS|sx^BSPtjNctz5-%g-bVjZ*&rQrvfl8QItW!hwb=A482IzH=`ohb zfVX%(fAF6Y2pn`lY@JeUnH9KnMuZC@HD~n0Uh-j!+qX@6MJ9nL^Ju<<#VR0N?VlU8 zSq91aR_cxFa1aO$saeZD3StFb&#T7^LBugsgL`lY2+`)v`*6JedOwGnNBa(9clHMt z`||4iW#EqTZU6z_HKxqoOb|%d&Kjs&0fNipZ+o<#VGDo%sy&R(1K}6}k@UT8*s^|` z!`6{L5L-J}?csj{TU2Y6ZeI}q+@PFY2u>D+j+4@zWbi;ZY>~Ga{II1*4M(@w`2d+I z!^o1M1JP$vSXZtdaC4{DMl*DP#2>MaasDC*zqIN1h;IdgZqClj7H>g3|FDn1@+}a` zHoQ6FU;wgyA&!@9eqdv5UM-dj&w!6?W#R9?14KW|4EusWK*dlsVL7CS_FlNKE?h?SM-s-q;Mg>TzsohfqX&_qeeJP)s z%;O)GOGmZ_fKaWmBJ&0f#6uo1yq_h5z^ZKa)|b(MpLqvE6Y5`0FLC z4Gsi=poZierP&w+H-;R}3p<2OeBXCIROC7c43gu{jAa5o?u>~1f)t30MrmgY2LZ7* zBki!V3lJ4sj;vSh2T7vS#$j3`w(O^|3ET4lBpHTp?@{ak=Ut{;PC$S!{PJM(YbZ!* z9v>tabbxr5l=|g$Z$QF2&y^%P0z#d6+vAc}Km>31zJ6H&q@>%4fzL@G^}Fjr4vB!R z1ef}d{n^-P>O^BmMl#5>^Hm6mGO+p0g2{E6F(8sY=)0yeABcTRqautcknaD`)Z_{v zl4_9irPUvlzP_)Sp-_S8xdE%|s0Q|)l?RW?mx1fJrRXee0NAV3x7%J20fuN=Q)hV< zFdAR(it8!_uC4bIiR&y-9yp)0v&FC_cNXx%JXtP@LLxI5J=W7}UTR<_5|7hgHpTK?_r-X#l_}vUDxuq)!Qg1;K=Tm;G&JEZVJF}uvc>Mg` z!XH)l9ANgnSbxs@DJbz7%W7_~1La=5l+rJsfm6HxN(QR|Xco(={ z2{>PtV*AbS0r!5&)9|^s!1?~oWAIQtP@;_eb?S70%?SGH{eTIqX}(C45=CGN6Oa6) zGy?Z%`%FjZ0y3Nu->AVHLVMGL!i2{3@hKoTpBF=Ss zQ53j_U9Q%HIUv6;!bn-K5|kH9)+Z@U09S8iNpA#sCl7jdpMB3a%|$o;z)$>eVP!uRAjB3xPXEzS2Gk}vyoKYeHb zu}wQtB0jH0;)w!sgJ#bV!(Yb8`1u1Q`X=iz^^^}{nkO};#?~O&z57nh#*QQE{Z~7> z3LFueE0oo5w-!#mjf92|tG`pfA$^*0gq@Q}2@&M!zNMtRK( zzj(yAXTOQ@`Z`3C`=F*v*p3uh-+9Cx8Lb$r<%`Pg=HYQ`FuI%>Suypvrrz=rFRq^J zCr$pSA(`JlJr&kJLkeC?+bd@ykyP5D(d{%NMB-oflhCY+@JH?ESE+d+X@{rBaJ$zd z{KmR{`LG_zE%#Zn?28c*-&Mx;YCxomp-y>6t|HRdg1TN(2VxIhs7lhjg`_!_??n#y zBl-jWf^kn_B=xJF9%C7TSTVc88dCoEeJn^_< z%a-tdBu!CF3mI=jq$|R2$~WmCTFm$V=B-i@rTjz}YqKjNk5Ak?En|ZSCf%wp5)u)a zbM3)(UrnUYuJW+4J_pGeJMs(X9YXlNws28+jU+xlRCDNWLk#}K>^)0NB-YaH(dprW z7-7z8Haf0I*10Ht;uQhOCj@*>535EbvLH+Oj2jYM$ltQ~{w0$9@7k%?UmX$ciMREO z*&ZaGyCyBsf1EEn=+I#R#X}^s8o~+ zl~hP%PN575uaY8}66GL?B)mj}kSQ5n^E`aM_5J&PJnK1o|JZBoXPxuxy^ifxhxMWk zq67j#fFLL!KoAgGJ`#j>?=oGzkw6gSUk>;Pf&|hsOI|$be*E|$Gp9=zE}U|5y+9!F zODw|ybyfZSP%dCt(&kk8*s^#YjrvyxJ-5h4!#8y%&Y1+EUU}VYL1m|s_kT4W^ph}} zIj@%x@%RFoT6LBh7*0lmyJJc_qb1RkWNFo34l*0<78-r=N^jRQ3G~Zxeb@QXeI;MsnnbPSH!u0M zoh@uIqJc(iZ@%%}TT{|=zx>PBXCi2Lim*|dZ3P12j)kfYi>S{g_1umOD>TwGvvc}c zJnD#L?asZ>f|{J|l-zYLW8m*BBqZgU7)U|PG9 z8%~4J58bi_}K|oV;oF-d~R5W~K;1kFyqd8IrX`Q}INxxx2 zTBLzK>d#75pIVy(gwE>SJxbbWr1|&L{!Cpork-@;k>Tl*@9zE1dgjMa>qn`nfVt?B z3f+*AKfhk0FFb5|s=J4C`qHmV8#bGAdh~gvOdC_%~%z>#RFlF&DqRwdIuyCP4s4ecE*yKLhn1wlri&NPDA6&ET&(}s(t$-5p&k= zn%FFqi)C5arw3+UV`c^a1yxmh%&PCUmd(q?oHcLjhPrhy-6zSUa&8M|#Fu?wXX{{w zwi0nLW;q_SZz#Uxi4`2{hmMCz-qo@Vy_fcqnE!(~18Ep2JGuyI8TTvwT?`+X>}h`Id`E@ALa& zh3^+l()dTQQhjMXA^RYvf2Jkt9Y2hfI($FqW1JFF5()8s=^tFzC3~jyb1G%T=?N^)qqgt~49PjAvVkBlb~PQOitw zqq_s<>eL%+gt}wtoFP??q9tY|SaF9mdN6a6wZMvag}I$CLLdHY$Bcc`ZXU%!n0s_i zt;(5#+2ruF;Jy^hHs0dhTqJ@SK6~xR*8{N%`F4Fp`UA{iW(SxQO<{&^qYGEw9n<-b zUy9cF2U9GDyZHshF#S@Fp+Zv}W~k5_I*$ZnWySI1j;F)1VzuAH6XDk}yW_Zcwz?Fi zdY6f9yBUoYtIG5K-Etl)4mTUO6`aT1Lj5zIdJCA|6t}*$!x+*cRn^C`Fr)60w}q-Z=0)#_x8XxeW}y9Cf3ZgTS5~S>E>~C*63oJm#W+LK&@pAZ^qjop&mIOuUv9+KeUN`C9^%vwUIUZq|4M5aQ z8Bp#>1WJ&b{s)y9V9oKn$?OdS+R1izi{s1ni~fnMu6O|o9g>l!{!Rm9r-}DjJiKhJ z+uv1E13_+jI^E;ZK49oPC0%8bfqXlEzm$11NDo<)6)+pP*T(%FHZ1p@No+ZrF$r>% z4<8a0wg4|_wNFuFCXhmOU9;wlfxh30_1k<9sLz@L4N8Q7?nceno?{N&BqP-+LwTTr z7bGypK(>`8^G`blW>s^?ZM}XVb&GXZ6f@DZXkWblQXtRXfIw`)%r~Uv+{XQUEUx_D(iX)Z1w<& z6j5d&M*-OF2ac*edJdHR>g_|(-#}*Piqv=7SAcdN%v_NG=o8<`1g&>ihhuN@OePk zIvVI}a~ADQQ-FPX*y7N?s=&F|U)W%+2jsbtaqr3r;E-xDxpf6lMGct|FLHrYcR^!U zi4D-}TP1Dvc_4<%*LUsO43w~6R%L-nz}iz;H(xgnOrQ5Q4#KIx=y;j+#{4~qF|;dt zxHo`2znMfA8UUI5K2Zy|?}2<{xJgrsCQvVGjr3ZIf}HWZy;-mz$m)GtC-Tz(hC=)0Tf?S|MUJuG$~9t)Q934G7`ZU-!}K@czeew;Q6 zlnN`|U)x=QSpVtXtE^5Sa)N0~M+QM8fX(>tzADg;A>*DktAT1nmwvxn0{yt&jSjR1 z0+AqkMOkkx8r{K3m0EN}b4#(Eb`d!sFlZm{oY9VE#j`DrHCh9OK%&455BOv9FP94& z0Y$F)q?fA;2)e&O_gAk&b1%vQ-OM>4+e7@4S=<0(n&$O71z$n1*;MX#x+_SBTWek} zoCT`xV17o<6%h9|I{bh<3DTK=9z8Jr3DmI?>Pr7|5V1R_?3q6UMD@Om*Ao>$@qA)9 zbJQIK{*(QB!o3#c^qJzr#)3e-(f)bec}ozZ|F~2XmjogQvpPx#_MnCSm=nGp-avkw zJK|Sc3G(@~qR)=Bqv^9v%^r;|K8B+g=d%3n@BN736}|+D6|uBV{WVbY!onJZ%t6RNA~kAZ z0)5=5rhd+_4viKSJ5mqsE}3J->iN-qB_wgB}oBI<>STayOb=B%ij~R)favj7u-BHU|N` z_sIc+BS7SR9P>0t1kwG$r1o)HpdI1ejq|<%qTO%32|KuVPIdzbpJ@z~zF7&B zot>pm%ep}N!t77ehcak7qgAUW=p#t%yEFE|MHWbwd$nFg1_R%kBU_qV3xIHdb?>`s zFOWwEw*A$TLKBboiz+oMfS}l5w97id5;0%NErkUIAe}>0a|4HE}RkJfk@?<4MA`C(EN_= z;~t^$Kw=%qvNvA=65ic2y;h4LUel;>yCN6_0s|`7a*lv#w#TcA(HszQ3|!4S&<7Hc zX7zgsxj=oGp^(Y=0isd+{PV6Y-}5)VJB!)@1g@kCOb(J zLE?~k<`(cl3r{{D*=pkjG?p|oOdk1+M-XS~m&Vd< zfZ$oEQY3+anDh0~__sii)I8oRq1OUpZIT*S*1ZF=Wu^<2Gyp=anLDE6mq3JQ^Py%@ z9weojDZa0$Ao-{5LIza=E%_IC(XMmQaMD<9KuQ8gH}jPUk(g+9i(o=^>Rk{??!C6E zEDI=I3&SGJ36SdkSXbu)Ad;k)@wM?fD1ZA zj2_^uOx|IAK?Inj$91hm<-n|cvn#qS2YA+=&&apfpwe?b-nQ@q~E~w+&xh7Hh8(Ofk<5;eo!5lo?2zT9k}^a!s49V^ zC=tAh!m3??Q(~JIk+{5nyr284=9U4huGj0&c|HeaK0_JJ9o3-Hshe2v^M>Tt{`s z;+94pkfSGjB`|jCFSW%dY{q!?|6We)L zZ|$-^H(w;hPZoko@aSiQrhT9)yVRGms+IsoGfRr{ypAKQqZQc-eNlp26T9#eCjRs`kq z=ETnfBA`MMaH#9g1GSK7m&th&@bueUEPFFRZf}Tzif$RG%;&F@6BT_*8mET{Q2{*MWZ|BL?tp&TAYE)xI%a#A;a literal 0 HcmV?d00001 diff --git a/man/dot-wcov.Rd b/man/dot-wcov.Rd new file mode 100644 index 0000000..00eb3c3 --- /dev/null +++ b/man/dot-wcov.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RcppExports.R +\name{.wcov} +\alias{.wcov} +\title{Weighted Covariance Matrix} +\usage{ +.wcov(x, w) +} +\arguments{ +\item{x}{A matrix with \eqn{m} columns and \eqn{n} rows, where each column +represents a different variable and each row a different observation.} + +\item{w}{A non-negative and non-zero vector of weights for each observation. +Its length must equal the number of rows of x.} +} +\value{ +A list with two components: +\itemize{ +\item \code{center}: an estimate for the center (mean) of the data. +\item \code{cov}: the estimated (weighted) covariance matrix. +} +} +\description{ +\code{wcov} computes the estimates of the weighted covariance +matrix and the weighted mean of the data. +} +\examples{ +m <- matrix(c(rnorm(500, 6), rnorm(500, 11, 3)), ncol = 2) +w <- runif(500) +gravitree:::.wcov(m, w) + +} +\author{ +Simon Garnier, \email{garnier@njit.edu} +} diff --git a/man/four_Gaussians.Rd b/man/four_Gaussians.Rd new file mode 100644 index 0000000..dc45bf9 --- /dev/null +++ b/man/four_Gaussians.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wcec-package.R +\docType{data} +\name{four_Gaussians} +\alias{four_Gaussians} +\title{Four Gaussian Clusters} +\description{ +Matrix of 2-dimensional points forming four Gaussian clusters. +} +\examples{ +data(four_Gaussians) +plot(four_Gaussians, pch = 19) + +} +\keyword{datasets} diff --git a/man/three_Gaussians.Rd b/man/three_Gaussians.Rd new file mode 100644 index 0000000..ea252fe --- /dev/null +++ b/man/three_Gaussians.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wcec-package.R +\docType{data} +\name{three_Gaussians} +\alias{three_Gaussians} +\title{Three Gaussian Clusters} +\description{ +Matrix of 2-dimensional points forming three Gaussian clusters. +} +\examples{ +data(three_Gaussians) +plot(three_Gaussians, pch = 19) + +} +\keyword{datasets} diff --git a/man/wcec.Rd b/man/wcec.Rd new file mode 100644 index 0000000..e808a7e --- /dev/null +++ b/man/wcec.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wcec-package.R +\docType{package} +\name{wcec} +\alias{wcec-package} +\alias{wcec} +\title{wcec: ...} +\description{ +The \code{wcec} package provides ... +} +\details{ +\tabular{ll}{ +Package: \tab wcec\cr +Type: \tab Package\cr +Version: \tab 0.1\cr +Date: \tab 2024-08-07\cr +License: \tab GPL-3\cr +} +} +\seealso{ +Useful links: +\itemize{ + \item \url{https://github.com/swarm-lab/wcec} + \item \url{https://swarm-lab.github.io/wcec/} + \item Report bugs at \url{https://github.com/swarm-lab/wcec/issues} +} + +} +\author{ +Simon Garnier \href{mailto:garnier@njit.edu}{garnier@njit.edu} + +Jason Graham \href{mailto:jason.graham@scranton.edu}{jason.graham@scranton.edu} + +Maintainer: Simon Garnier \href{mailto:garnier@njit.edu}{garnier@njit.edu} +} diff --git a/src/.gitignore b/src/.gitignore new file mode 100644 index 0000000..22034c4 --- /dev/null +++ b/src/.gitignore @@ -0,0 +1,3 @@ +*.o +*.so +*.dll diff --git a/src/Makevars b/src/Makevars new file mode 100644 index 0000000..6868846 --- /dev/null +++ b/src/Makevars @@ -0,0 +1,8 @@ +## With Rcpp 0.11.0 and later, we no longer need to set PKG_LIBS as there is +## no user-facing library. The include path to headers is already set by R. +PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) +PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) + +## With R 3.1.0 or later, you can uncomment the following line to tell R to +## enable compilation with C++11 (or even C++14) where available +#CXX_STD = CXX11 diff --git a/src/Makevars.win b/src/Makevars.win new file mode 100644 index 0000000..6868846 --- /dev/null +++ b/src/Makevars.win @@ -0,0 +1,8 @@ +## With Rcpp 0.11.0 and later, we no longer need to set PKG_LIBS as there is +## no user-facing library. The include path to headers is already set by R. +PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) +PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) + +## With R 3.1.0 or later, you can uncomment the following line to tell R to +## enable compilation with C++11 (or even C++14) where available +#CXX_STD = CXX11 diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp new file mode 100644 index 0000000..3bc4627 --- /dev/null +++ b/src/RcppExports.cpp @@ -0,0 +1,35 @@ +// Generated by using Rcpp::compileAttributes() -> do not edit by hand +// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +#include +#include + +using namespace Rcpp; + +#ifdef RCPP_USE_GLOBAL_ROSTREAM +Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); +Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); +#endif + +// wcov +Rcpp::List wcov(Eigen::MatrixXd& x, Eigen::VectorXd& w); +RcppExport SEXP _wcec_wcov(SEXP xSEXP, SEXP wSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Eigen::MatrixXd& >::type x(xSEXP); + Rcpp::traits::input_parameter< Eigen::VectorXd& >::type w(wSEXP); + rcpp_result_gen = Rcpp::wrap(wcov(x, w)); + return rcpp_result_gen; +END_RCPP +} + +static const R_CallMethodDef CallEntries[] = { + {"_wcec_wcov", (DL_FUNC) &_wcec_wcov, 2}, + {NULL, NULL, 0} +}; + +RcppExport void R_init_wcec(DllInfo *dll) { + R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); + R_useDynamicSymbols(dll, FALSE); +} diff --git a/src/fast.cpp b/src/fast.cpp new file mode 100644 index 0000000..55830b6 --- /dev/null +++ b/src/fast.cpp @@ -0,0 +1,47 @@ +#include +// [[Rcpp::depends(RcppEigen)]] + +//' @name .wcov +//' +//' @title Weighted Covariance Matrix +//' +//' @description \code{wcov} computes the estimates of the weighted covariance +//' matrix and the weighted mean of the data. +//' +//' @param x A matrix with \eqn{m} columns and \eqn{n} rows, where each column +//' represents a different variable and each row a different observation. +//' +//' @param w A non-negative and non-zero vector of weights for each observation. +//' Its length must equal the number of rows of x. +//' +//' @return A list with two components: +//' \itemize{ +//' \item \code{center}: an estimate for the center (mean) of the data. +//' \item \code{cov}: the estimated (weighted) covariance matrix. +//' } +//' +//' @author Simon Garnier, \email{garnier@@njit.edu} +//' +//' @examples +//' m <- matrix(c(rnorm(500, 6), rnorm(500, 11, 3)), ncol = 2) +//' w <- runif(500) +//' gravitree:::.wcov(m, w) +//' +// [[Rcpp::export(.wcov)]] +Rcpp::List wcov(Eigen::MatrixXd &x, Eigen::VectorXd &w) { + Eigen::VectorXd center(x.cols()); + double ws = w.sum(); + for (int i = 0; i < x.cols(); i++) { + center(i) = (x.col(i).array() * w.array()).sum() / ws; + } + + int p = x.cols(); + Eigen::VectorXd sqw = (w.array() / ws).cwiseSqrt(); + Eigen::MatrixXd X = x.array().rowwise() - center.transpose().array(); + Eigen::MatrixXd cov = Eigen::MatrixXd(p, p) + .setZero() + .selfadjointView() + .rankUpdate(X.transpose() * sqw.asDiagonal()); + + return Rcpp::List::create(Rcpp::_["center"] = center, Rcpp::_["cov"] = cov); +}