Skip to content

Commit

Permalink
force mlg.filter threads to be 1 to fix #138
Browse files Browse the repository at this point in the history
I also updated documentation, added tests, and
updated the NEWS file
  • Loading branch information
zkamvar committed Apr 13, 2017
1 parent b5984dd commit 8875780
Show file tree
Hide file tree
Showing 9 changed files with 65 additions and 41 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Package: poppr
Type: Package
Title: Genetic Analysis of Populations with Mixed Reproduction
Version: 2.4.1
Date: 2017-04-10
Date: 2017-04-13
Authors@R: c(person(c("Zhian", "N."), "Kamvar", role = c("cre", "aut"),
email = "[email protected]"),
person(c("Javier", "F."), "Tabima", role = "aut",
Expand Down
4 changes: 4 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ BUG FIX
* A corner case where repeat length vectors out of order would be erroneously
subset with `test_replen()` and `fix_replen()` has been fixed. See
https://github.com/grunwaldlab/poppr/issues/136 for details.
* All functions that perform filtering will now run serially due to a bug on
Fedora machines with at least two threads. Details can be found at
https://github.com/grunwaldlab/poppr/issues/138.


poppr 2.4.0
===========
Expand Down
5 changes: 3 additions & 2 deletions R/filter_stats.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@
#' select a method here. Available methods are "sturges", "fd", or "scott"
#' (default) as documented in \code{\link[graphics]{hist}}. If you don't want
#' to plot the histogram, set \code{hist = NULL}.
#' @param threads the number of threads to use. Passed on to \code{\link{mlg.filter}}.
#' @param threads (unused) Previously the number of threads to be used. As of
#' poppr version 2.4.1, this is by default set to 1.
#' @param ... extra parameters passed on to the distance function.
#'
#' @return a list of results from mlg.filter from the three
Expand Down Expand Up @@ -96,7 +97,7 @@ filter_stats <- function(x, distance = bitwise.dist,
threshold = 1e6 + .Machine$double.eps^0.5,
stats = "All", missing = "ignore", plot = FALSE,
cols = NULL, nclone = NULL, hist = "Scott",
threads = 0L, ...){
threads = 1L, ...){
if (!inherits(distance, "dist")){
DIST <- match.fun(distance)
if (inherits(x, "genind")){
Expand Down
13 changes: 8 additions & 5 deletions R/internal_methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -220,9 +220,13 @@ mll.levels.internal <- function(x, set = TRUE, value){
#==============================================================================#
mlg.filter.internal <- function(gid, threshold = 0.0, missing = "asis",
memory = FALSE, algorithm = "farthest_neighbor",
distance = "diss.dist", threads = 0,
distance = "diss.dist", threads = 1L,
stats = "MLGs", the_call = match.call(), ...){

if (threads != 1L){
warning(paste("As of poppr version 2.4.1, mlg.filter can no longer run in",
"parallel. This function will run serially."), call. = FALSE)
}
# This will return a vector indicating the multilocus genotypes after applying
# a minimum required distance threshold between multilocus genotypes.
if (is.character(distance) || is.function(distance)) {
Expand Down Expand Up @@ -296,6 +300,9 @@ mlg.filter.internal <- function(gid, threshold = 0.0, missing = "asis",
stop("Distance matrix must be a square matrix of numeric or integer values.",
call. = FALSE)
}
if (any(dis < 0 - .Machine$double.eps^0.5)){
stop("Distance matrix must not contain negative distances.", call. = FALSE)
}
if (nrow(dis) != ncol(dis)){
stop("The distance matrix must be a square matrix", call. = FALSE)
}
Expand All @@ -313,10 +320,6 @@ mlg.filter.internal <- function(gid, threshold = 0.0, missing = "asis",
# Threshold must be something that can cast to numeric
if (!is.numeric(threshold) && !is.integer(threshold)){
stop("Threshold must be a numeric or integer value", .call = FALSE)
}
# Threads must be something that can cast to integer
if (!is.numeric(threads) && !is.integer(threads) && threads >= 0){
stop("Threads must be a non-negative numeric or integer value", .call = FALSE)
}
# Stats must be logical
STATARGS <- c("MLGS", "THRESHOLDS", "DISTANCES", "SIZES", "ALL")
Expand Down
29 changes: 13 additions & 16 deletions R/methods.r
Original file line number Diff line number Diff line change
Expand Up @@ -1237,12 +1237,9 @@ setMethod(
#' \code{\link{bitwise.dist}} for snpclone objects. A matrix or table
#' containing distances between individuals (such as the output of
#' \code{\link{rogers.dist}}) is also accepted for this parameter.
#' @param threads The maximum number of parallel threads to be used within this
#' function. A value of 0 (default) will attempt to use as many threads as
#' there are available cores/CPUs. In most cases this is ideal. A value of 1
#' will force the function to run serially, which may increase stability on
#' some systems. Other values may be specified, but should be used with
#' caution.
#' @param threads (unused) Previously, this was the maximum number of parallel
#' threads to be used within this function. Default is 1 indicating that this
#' function will run serially. Any other number will result in a warning.
#' @param stats a character vector specifying which statistics should be
#' returned (details below). Choices are "MLG", "THRESHOLDS", "DISTANCES",
#' "SIZES", or "ALL". If choosing "ALL" or more than one, a named list will be
Expand Down Expand Up @@ -1394,7 +1391,7 @@ setMethod(
#==============================================================================#
mlg.filter <- function(pop, threshold=0.0, missing="asis", memory=FALSE,
algorithm="farthest_neighbor",
distance="diss.dist", threads=0, stats="MLGs", ...){
distance="diss.dist", threads=1L, stats="MLGs", ...){
standardGeneric("mlg.filter")
}

Expand All @@ -1406,7 +1403,7 @@ setMethod(
signature(pop = "genind"),
definition = function(pop, threshold=0.0, missing="asis", memory=FALSE,
algorithm="farthest_neighbor", distance="diss.dist",
threads=0, stats="MLGs", ...){
threads=1L, stats="MLGs", ...){
the_call <- match.call()
mlg.filter.internal(pop, threshold, missing, memory, algorithm, distance,
threads, stats, the_call, ... )
Expand All @@ -1418,7 +1415,7 @@ setMethod(
signature(pop = "genlight"),
definition = function(pop, threshold=0.0, missing="asis", memory=FALSE,
algorithm="farthest_neighbor", distance="bitwise.dist",
threads=0, stats="MLGs", ...){
threads=1, stats="MLGs", ...){
the_call <- match.call()
mlg.filter.internal(pop, threshold, missing, memory, algorithm, distance,
threads, stats, the_call, ...)
Expand All @@ -1430,7 +1427,7 @@ setMethod(
signature(pop = "genclone"),
definition = function(pop, threshold=0.0, missing="asis", memory=FALSE,
algorithm="farthest_neighbor", distance="diss.dist",
threads=0, stats="MLGs", ...){
threads=1L, stats="MLGs", ...){
the_call <- match.call()
mlg.filter.internal(pop, threshold, missing, memory, algorithm, distance,
threads, stats, the_call, ...) }
Expand All @@ -1441,7 +1438,7 @@ setMethod(
signature(pop = "snpclone"),
definition = function(pop, threshold=0.0, missing="asis", memory=FALSE,
algorithm="farthest_neighbor", distance="bitwise.dist",
threads=0, stats="MLGs", ...){
threads=1L, stats="MLGs", ...){
the_call <- match.call()
mlg.filter.internal(pop, threshold, missing, memory, algorithm, distance,
threads, stats, the_call, ...)
Expand All @@ -1460,7 +1457,7 @@ setMethod(
#==============================================================================#
"mlg.filter<-" <- function(pop, missing = "asis", memory = FALSE,
algorithm = "farthest_neighbor", distance = "diss.dist",
threads = 0, ..., value){
threads = 1L, ..., value){
standardGeneric("mlg.filter<-")
}

Expand All @@ -1473,7 +1470,7 @@ setMethod(
signature(pop = "genind"),
definition = function(pop, missing = "asis", memory = FALSE,
algorithm = "farthest_neighbor", distance = "diss.dist",
threads = 0, ..., value){
threads = 1L, ..., value){
if (!is.genclone(pop)){
the_warning <- paste("mlg.filter<- only has an effect on genclone",
"objects.\n", "If you want to utilize this",
Expand All @@ -1491,7 +1488,7 @@ setMethod(
signature(pop = "genlight"),
definition = function(pop, missing = "asis", memory = FALSE,
algorithm = "farthest_neighbor", distance = "bitwise.dist",
threads = 0, ..., value){
threads = 1L, ..., value){
if (!is.snpclone(pop)){
the_warning <- paste("mlg.filter<- only has an effect on snpclone",
"objects.\n", "If you want to utilize this",
Expand All @@ -1510,7 +1507,7 @@ setMethod(
signature(pop = "genclone"),
definition = function(pop, missing = "asis", memory = FALSE,
algorithm = "farthest_neighbor", distance = "diss.dist",
threads = 0, ..., value){
threads = 1L, ..., value){
pop <- callNextMethod()
the_call <- match.call()
callnames <- names(the_call)
Expand Down Expand Up @@ -1587,7 +1584,7 @@ setMethod(
signature(pop = "snpclone"),
definition = function(pop, missing = "mean", memory = FALSE,
algorithm = "farthest_neighbor", distance = "bitwise.dist",
threads = 0, ..., value){
threads = 1L, ..., value){
pop <- callNextMethod()
the_call <- match.call()
callnames <- names(the_call)
Expand Down
2 changes: 2 additions & 0 deletions R/msn_handlers.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,15 @@ filter_at_threshold <- function(gid, threshold, indist, clustering.algorithm,
distance = bruvo.dist,
algorithm = clustering.algorithm,
stats="ALL",
threads = 1L,
replen = bruvo_args$replen,
add = bruvo_args$add,
loss = bruvo_args$loss)
} else {
filter.stats <- mlg.filter(gid,
threshold,
distance = indist,
threads = 1L,
algorithm = clustering.algorithm,
stats="ALL")
}
Expand Down
5 changes: 3 additions & 2 deletions man/filter_stats.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 5 additions & 8 deletions man/mlg.filter.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 26 additions & 7 deletions tests/testthat/test-filter.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ xd <- dist(grid_example)
xdm <- as.matrix(xd)

set.seed(999)
gc <- as.snpclone(glSim(100, 0, n.snp.struc = 1e3, ploidy = 2, parallel = FALSE), parallel = FALSE)
gc <- as.snpclone(glSim(100, 0, n.snp.struc = 1e3, ploidy = 2, parallel = FALSE),
parallel = FALSE, n.cores = 1L)


test_that("multilocus genotype filtering algorithms work", {
Expand Down Expand Up @@ -169,9 +170,32 @@ assign("x20160810_mon20", monpop[1:20], envir = .GlobalEnv)
assign("x20160810_let", matrix(letters[1:9], 3, 3), envir = .GlobalEnv)
assign("x20160810_neifun", function(x) nei.dist(genind2genpop(x, quiet = TRUE)), envir = .GlobalEnv)


test_that("internal filtering will throw an error for negative distances", {
skip_on_cran()
xdn <- xd
xdn[1] <- -3
expect_error(mlg.filter(x, distance = xdn, threshold = 4.51), "Distance matrix must not contain negative distances")
xdn[1] <- -0.3
expect_warning(.Call("neighbor_clustering", as.matrix(xdn), mll(x), 4.51, "f", 1L), "The data resulted in a negative or invalid distance or cluster id")
})

test_that("a warning is thrown if the user specifies more than one thread.", {
skip_on_cran()
expect_warning(mlg.filter(x, distance = xd, threshold = 4.51, threads = 2L))
})

test_that("Infinite distances will produce an error", {
skip_on_cran()
xdn <- xd
xdn[1] <- Inf
expect_error(mlg.filter(x, distance = xdn, threshold = 4.51), "Data set contains missing or invalid distances")
})


test_that("mlg.filter errors when distance matrix is not square", {
skip_on_cran()
mat <- matrix(rnorm(nInd(monpop)*10), nInd(monpop), 10)
mat <- matrix(runif(nInd(monpop)*10), nInd(monpop), 10)
msg <- "must be a square matrix"
expect_error(mlg.filter(monpop, distance = mat) <- 0, msg)
})
Expand All @@ -197,11 +221,6 @@ test_that("mlg.filter throws an error if the threshold is not a number", {
expect_error(mlg.filter(x) <- "A", "Threshold must be")
})

test_that("mlg.filter throws an error if the threads is not a number", {
skip_on_cran()
expect_error(mlg.filter(x, threads = "A") <- 1L, "Threads must be")
})

test_that("mlg.filter throws an error if the distance is not numeric", {
skip_on_cran()
expect_error(mlg.filter(x, distance = xdm > 4) <- 1, "Distance matrix must be")
Expand Down

0 comments on commit 8875780

Please sign in to comment.