From 887578081faffb289f333585211ec436d494ba97 Mon Sep 17 00:00:00 2001 From: Zhian Kamvar Date: Thu, 13 Apr 2017 09:46:22 -0500 Subject: [PATCH] force mlg.filter threads to be 1 to fix #138 I also updated documentation, added tests, and updated the NEWS file --- DESCRIPTION | 2 +- NEWS | 4 ++++ R/filter_stats.R | 5 +++-- R/internal_methods.R | 13 ++++++++----- R/methods.r | 29 +++++++++++++---------------- R/msn_handlers.R | 2 ++ man/filter_stats.Rd | 5 +++-- man/mlg.filter.Rd | 13 +++++-------- tests/testthat/test-filter.R | 33 ++++++++++++++++++++++++++------- 9 files changed, 65 insertions(+), 41 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 48257d49..94ba9389 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: poppr Type: Package Title: Genetic Analysis of Populations with Mixed Reproduction Version: 2.4.1 -Date: 2017-04-10 +Date: 2017-04-13 Authors@R: c(person(c("Zhian", "N."), "Kamvar", role = c("cre", "aut"), email = "zkamvar@gmail.com"), person(c("Javier", "F."), "Tabima", role = "aut", diff --git a/NEWS b/NEWS index 0817c89b..76294ab2 100644 --- a/NEWS +++ b/NEWS @@ -7,6 +7,10 @@ BUG FIX * A corner case where repeat length vectors out of order would be erroneously subset with `test_replen()` and `fix_replen()` has been fixed. See https://github.com/grunwaldlab/poppr/issues/136 for details. +* All functions that perform filtering will now run serially due to a bug on + Fedora machines with at least two threads. Details can be found at + https://github.com/grunwaldlab/poppr/issues/138. + poppr 2.4.0 =========== diff --git a/R/filter_stats.R b/R/filter_stats.R index 06015fae..5e886da6 100644 --- a/R/filter_stats.R +++ b/R/filter_stats.R @@ -65,7 +65,8 @@ #' select a method here. Available methods are "sturges", "fd", or "scott" #' (default) as documented in \code{\link[graphics]{hist}}. If you don't want #' to plot the histogram, set \code{hist = NULL}. -#' @param threads the number of threads to use. Passed on to \code{\link{mlg.filter}}. +#' @param threads (unused) Previously the number of threads to be used. As of +#' poppr version 2.4.1, this is by default set to 1. #' @param ... extra parameters passed on to the distance function. #' #' @return a list of results from mlg.filter from the three @@ -96,7 +97,7 @@ filter_stats <- function(x, distance = bitwise.dist, threshold = 1e6 + .Machine$double.eps^0.5, stats = "All", missing = "ignore", plot = FALSE, cols = NULL, nclone = NULL, hist = "Scott", - threads = 0L, ...){ + threads = 1L, ...){ if (!inherits(distance, "dist")){ DIST <- match.fun(distance) if (inherits(x, "genind")){ diff --git a/R/internal_methods.R b/R/internal_methods.R index a04b42c9..3e2c149f 100644 --- a/R/internal_methods.R +++ b/R/internal_methods.R @@ -220,9 +220,13 @@ mll.levels.internal <- function(x, set = TRUE, value){ #==============================================================================# mlg.filter.internal <- function(gid, threshold = 0.0, missing = "asis", memory = FALSE, algorithm = "farthest_neighbor", - distance = "diss.dist", threads = 0, + distance = "diss.dist", threads = 1L, stats = "MLGs", the_call = match.call(), ...){ + if (threads != 1L){ + warning(paste("As of poppr version 2.4.1, mlg.filter can no longer run in", + "parallel. This function will run serially."), call. = FALSE) + } # This will return a vector indicating the multilocus genotypes after applying # a minimum required distance threshold between multilocus genotypes. if (is.character(distance) || is.function(distance)) { @@ -296,6 +300,9 @@ mlg.filter.internal <- function(gid, threshold = 0.0, missing = "asis", stop("Distance matrix must be a square matrix of numeric or integer values.", call. = FALSE) } + if (any(dis < 0 - .Machine$double.eps^0.5)){ + stop("Distance matrix must not contain negative distances.", call. = FALSE) + } if (nrow(dis) != ncol(dis)){ stop("The distance matrix must be a square matrix", call. = FALSE) } @@ -313,10 +320,6 @@ mlg.filter.internal <- function(gid, threshold = 0.0, missing = "asis", # Threshold must be something that can cast to numeric if (!is.numeric(threshold) && !is.integer(threshold)){ stop("Threshold must be a numeric or integer value", .call = FALSE) - } - # Threads must be something that can cast to integer - if (!is.numeric(threads) && !is.integer(threads) && threads >= 0){ - stop("Threads must be a non-negative numeric or integer value", .call = FALSE) } # Stats must be logical STATARGS <- c("MLGS", "THRESHOLDS", "DISTANCES", "SIZES", "ALL") diff --git a/R/methods.r b/R/methods.r index 27e4191a..e4be0bca 100644 --- a/R/methods.r +++ b/R/methods.r @@ -1237,12 +1237,9 @@ setMethod( #' \code{\link{bitwise.dist}} for snpclone objects. A matrix or table #' containing distances between individuals (such as the output of #' \code{\link{rogers.dist}}) is also accepted for this parameter. -#' @param threads The maximum number of parallel threads to be used within this -#' function. A value of 0 (default) will attempt to use as many threads as -#' there are available cores/CPUs. In most cases this is ideal. A value of 1 -#' will force the function to run serially, which may increase stability on -#' some systems. Other values may be specified, but should be used with -#' caution. +#' @param threads (unused) Previously, this was the maximum number of parallel +#' threads to be used within this function. Default is 1 indicating that this +#' function will run serially. Any other number will result in a warning. #' @param stats a character vector specifying which statistics should be #' returned (details below). Choices are "MLG", "THRESHOLDS", "DISTANCES", #' "SIZES", or "ALL". If choosing "ALL" or more than one, a named list will be @@ -1394,7 +1391,7 @@ setMethod( #==============================================================================# mlg.filter <- function(pop, threshold=0.0, missing="asis", memory=FALSE, algorithm="farthest_neighbor", - distance="diss.dist", threads=0, stats="MLGs", ...){ + distance="diss.dist", threads=1L, stats="MLGs", ...){ standardGeneric("mlg.filter") } @@ -1406,7 +1403,7 @@ setMethod( signature(pop = "genind"), definition = function(pop, threshold=0.0, missing="asis", memory=FALSE, algorithm="farthest_neighbor", distance="diss.dist", - threads=0, stats="MLGs", ...){ + threads=1L, stats="MLGs", ...){ the_call <- match.call() mlg.filter.internal(pop, threshold, missing, memory, algorithm, distance, threads, stats, the_call, ... ) @@ -1418,7 +1415,7 @@ setMethod( signature(pop = "genlight"), definition = function(pop, threshold=0.0, missing="asis", memory=FALSE, algorithm="farthest_neighbor", distance="bitwise.dist", - threads=0, stats="MLGs", ...){ + threads=1, stats="MLGs", ...){ the_call <- match.call() mlg.filter.internal(pop, threshold, missing, memory, algorithm, distance, threads, stats, the_call, ...) @@ -1430,7 +1427,7 @@ setMethod( signature(pop = "genclone"), definition = function(pop, threshold=0.0, missing="asis", memory=FALSE, algorithm="farthest_neighbor", distance="diss.dist", - threads=0, stats="MLGs", ...){ + threads=1L, stats="MLGs", ...){ the_call <- match.call() mlg.filter.internal(pop, threshold, missing, memory, algorithm, distance, threads, stats, the_call, ...) } @@ -1441,7 +1438,7 @@ setMethod( signature(pop = "snpclone"), definition = function(pop, threshold=0.0, missing="asis", memory=FALSE, algorithm="farthest_neighbor", distance="bitwise.dist", - threads=0, stats="MLGs", ...){ + threads=1L, stats="MLGs", ...){ the_call <- match.call() mlg.filter.internal(pop, threshold, missing, memory, algorithm, distance, threads, stats, the_call, ...) @@ -1460,7 +1457,7 @@ setMethod( #==============================================================================# "mlg.filter<-" <- function(pop, missing = "asis", memory = FALSE, algorithm = "farthest_neighbor", distance = "diss.dist", - threads = 0, ..., value){ + threads = 1L, ..., value){ standardGeneric("mlg.filter<-") } @@ -1473,7 +1470,7 @@ setMethod( signature(pop = "genind"), definition = function(pop, missing = "asis", memory = FALSE, algorithm = "farthest_neighbor", distance = "diss.dist", - threads = 0, ..., value){ + threads = 1L, ..., value){ if (!is.genclone(pop)){ the_warning <- paste("mlg.filter<- only has an effect on genclone", "objects.\n", "If you want to utilize this", @@ -1491,7 +1488,7 @@ setMethod( signature(pop = "genlight"), definition = function(pop, missing = "asis", memory = FALSE, algorithm = "farthest_neighbor", distance = "bitwise.dist", - threads = 0, ..., value){ + threads = 1L, ..., value){ if (!is.snpclone(pop)){ the_warning <- paste("mlg.filter<- only has an effect on snpclone", "objects.\n", "If you want to utilize this", @@ -1510,7 +1507,7 @@ setMethod( signature(pop = "genclone"), definition = function(pop, missing = "asis", memory = FALSE, algorithm = "farthest_neighbor", distance = "diss.dist", - threads = 0, ..., value){ + threads = 1L, ..., value){ pop <- callNextMethod() the_call <- match.call() callnames <- names(the_call) @@ -1587,7 +1584,7 @@ setMethod( signature(pop = "snpclone"), definition = function(pop, missing = "mean", memory = FALSE, algorithm = "farthest_neighbor", distance = "bitwise.dist", - threads = 0, ..., value){ + threads = 1L, ..., value){ pop <- callNextMethod() the_call <- match.call() callnames <- names(the_call) diff --git a/R/msn_handlers.R b/R/msn_handlers.R index 2ee63f9d..182c7106 100644 --- a/R/msn_handlers.R +++ b/R/msn_handlers.R @@ -66,6 +66,7 @@ filter_at_threshold <- function(gid, threshold, indist, clustering.algorithm, distance = bruvo.dist, algorithm = clustering.algorithm, stats="ALL", + threads = 1L, replen = bruvo_args$replen, add = bruvo_args$add, loss = bruvo_args$loss) @@ -73,6 +74,7 @@ filter_at_threshold <- function(gid, threshold, indist, clustering.algorithm, filter.stats <- mlg.filter(gid, threshold, distance = indist, + threads = 1L, algorithm = clustering.algorithm, stats="ALL") } diff --git a/man/filter_stats.Rd b/man/filter_stats.Rd index 2c504e2f..4cc3e0db 100644 --- a/man/filter_stats.Rd +++ b/man/filter_stats.Rd @@ -7,7 +7,7 @@ filter_stats(x, distance = bitwise.dist, threshold = 1e+06 + .Machine$double.eps^0.5, stats = "All", missing = "ignore", plot = FALSE, cols = NULL, nclone = NULL, hist = "Scott", - threads = 0L, ...) + threads = 1L, ...) } \arguments{ \item{x}{a \code{\link{genind}}, \code{\link{genclone}}, @@ -37,7 +37,8 @@ select a method here. Available methods are "sturges", "fd", or "scott" (default) as documented in \code{\link[graphics]{hist}}. If you don't want to plot the histogram, set \code{hist = NULL}.} -\item{threads}{the number of threads to use. Passed on to \code{\link{mlg.filter}}.} +\item{threads}{(unused) Previously the number of threads to be used. As of +poppr version 2.4.1, this is by default set to 1.} \item{...}{extra parameters passed on to the distance function.} } diff --git a/man/mlg.filter.Rd b/man/mlg.filter.Rd index f19d0dfe..ce26cea4 100644 --- a/man/mlg.filter.Rd +++ b/man/mlg.filter.Rd @@ -15,11 +15,11 @@ \title{MLG definitions based on genetic distance} \usage{ mlg.filter(pop, threshold = 0, missing = "asis", memory = FALSE, - algorithm = "farthest_neighbor", distance = "diss.dist", threads = 0, + algorithm = "farthest_neighbor", distance = "diss.dist", threads = 1L, stats = "MLGs", ...) mlg.filter(pop, missing = "asis", memory = FALSE, - algorithm = "farthest_neighbor", distance = "diss.dist", threads = 0, + algorithm = "farthest_neighbor", distance = "diss.dist", threads = 1L, ...) <- value } \arguments{ @@ -55,12 +55,9 @@ to pop. Defaults to \code{\link{diss.dist}} for genclone objects and containing distances between individuals (such as the output of \code{\link{rogers.dist}}) is also accepted for this parameter.} -\item{threads}{The maximum number of parallel threads to be used within this -function. A value of 0 (default) will attempt to use as many threads as -there are available cores/CPUs. In most cases this is ideal. A value of 1 -will force the function to run serially, which may increase stability on -some systems. Other values may be specified, but should be used with -caution.} +\item{threads}{(unused) Previously, this was the maximum number of parallel +threads to be used within this function. Default is 1 indicating that this +function will run serially. Any other number will result in a warning.} \item{stats}{a character vector specifying which statistics should be returned (details below). Choices are "MLG", "THRESHOLDS", "DISTANCES", diff --git a/tests/testthat/test-filter.R b/tests/testthat/test-filter.R index b1f24387..9f91e6ca 100644 --- a/tests/testthat/test-filter.R +++ b/tests/testthat/test-filter.R @@ -11,7 +11,8 @@ xd <- dist(grid_example) xdm <- as.matrix(xd) set.seed(999) -gc <- as.snpclone(glSim(100, 0, n.snp.struc = 1e3, ploidy = 2, parallel = FALSE), parallel = FALSE) +gc <- as.snpclone(glSim(100, 0, n.snp.struc = 1e3, ploidy = 2, parallel = FALSE), + parallel = FALSE, n.cores = 1L) test_that("multilocus genotype filtering algorithms work", { @@ -169,9 +170,32 @@ assign("x20160810_mon20", monpop[1:20], envir = .GlobalEnv) assign("x20160810_let", matrix(letters[1:9], 3, 3), envir = .GlobalEnv) assign("x20160810_neifun", function(x) nei.dist(genind2genpop(x, quiet = TRUE)), envir = .GlobalEnv) + +test_that("internal filtering will throw an error for negative distances", { + skip_on_cran() + xdn <- xd + xdn[1] <- -3 + expect_error(mlg.filter(x, distance = xdn, threshold = 4.51), "Distance matrix must not contain negative distances") + xdn[1] <- -0.3 + expect_warning(.Call("neighbor_clustering", as.matrix(xdn), mll(x), 4.51, "f", 1L), "The data resulted in a negative or invalid distance or cluster id") +}) + +test_that("a warning is thrown if the user specifies more than one thread.", { + skip_on_cran() + expect_warning(mlg.filter(x, distance = xd, threshold = 4.51, threads = 2L)) +}) + +test_that("Infinite distances will produce an error", { + skip_on_cran() + xdn <- xd + xdn[1] <- Inf + expect_error(mlg.filter(x, distance = xdn, threshold = 4.51), "Data set contains missing or invalid distances") +}) + + test_that("mlg.filter errors when distance matrix is not square", { skip_on_cran() - mat <- matrix(rnorm(nInd(monpop)*10), nInd(monpop), 10) + mat <- matrix(runif(nInd(monpop)*10), nInd(monpop), 10) msg <- "must be a square matrix" expect_error(mlg.filter(monpop, distance = mat) <- 0, msg) }) @@ -197,11 +221,6 @@ test_that("mlg.filter throws an error if the threshold is not a number", { expect_error(mlg.filter(x) <- "A", "Threshold must be") }) -test_that("mlg.filter throws an error if the threads is not a number", { - skip_on_cran() - expect_error(mlg.filter(x, threads = "A") <- 1L, "Threads must be") -}) - test_that("mlg.filter throws an error if the distance is not numeric", { skip_on_cran() expect_error(mlg.filter(x, distance = xdm > 4) <- 1, "Distance matrix must be")