Skip to content

Commit

Permalink
Merge pull request #64 from leopoldguyot/reference_drop_issue
Browse files Browse the repository at this point in the history
Issue #63
  • Loading branch information
cvanderaa authored Sep 3, 2024
2 parents 8373e72 + bddff79 commit 285af81
Show file tree
Hide file tree
Showing 21 changed files with 606 additions and 44 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ Collate: 'msqrob-framework.R' 'allGenerics.R'
Encoding: UTF-8
VignetteBuilder: knitr
Roxygen: list(markdown=TRUE)
RoxygenNote: 7.3.1
RoxygenNote: 7.3.2
biocViews:
Proteomics,
MassSpectrometry,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ export(getDF)
export(getDfPosterior)
export(getFitMethod)
export(getModel)
export(getReferencePresent)
export(getSigma)
export(getSigmaPosterior)
export(getVar)
Expand Down
16 changes: 13 additions & 3 deletions R/StatModel-methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
#' the linear model coefficients to be tested equal to zero.
#' The rownames of the matrix should be equal to the names of
#' parameters of the model.
#' @param acceptDifferentReference `boolean(1)` to indicate if the contrasts that involve
#' parameters with modified reference levels are returned. Watch out putting this
#' parameter to TRUE can change the interpretation of the logFC. Default is FALSE.
#'
#' @examples
#' data(pe)
#' # Aggregate peptide intensities in protein expression values
Expand All @@ -33,11 +37,14 @@

setMethod(
"getContrast", "StatModel",
function(object, L) {
function(object, L, acceptDifferentReference = FALSE) {
if (!is(L, "matrix")) L <- as.matrix(L)
coefs <- getCoef(object)
out <- matrix(rep(NA, ncol(L)))
rownames(out) <- colnames(L)
if (!referenceContrast(getReferencePresent(object), L, acceptDifferentReference)) {
return(out)
}
coefs <- getCoef(object)
hlp <- try(t(L) %*% coefs[rownames(L)], silent = TRUE)
if (!is(hlp, "try-error")) out[] <- hlp
return(out)
Expand All @@ -49,10 +56,13 @@ setMethod(

setMethod(
"varContrast", "StatModel",
function(object, L) {
function(object, L, acceptDifferentReference = FALSE) {
if (!is(L, "matrix")) L <- as.matrix(L)
out <- matrix(NA, ncol(L), ncol(L))
rownames(out) <- colnames(out) <- colnames(L)
if (!referenceContrast(getReferencePresent(object), L, acceptDifferentReference)) {
return(out)
}
vcovTmp <- getVcovUnscaled(object) * object@varPosterior
hlp <- try(t(L) %*% vcovTmp[rownames(L), rownames(L)] %*% L, silent = TRUE)
if (!is(hlp, "try-error")) out[] <- hlp
Expand Down
8 changes: 8 additions & 0 deletions R/accessors.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
#' \item{getSigmaPosterior(object)}{to get the empirical Bayes standard deviation}
#' \item{getVcovUnscaled(object)}{to get the unscaled variance covariance matrix
#' of the model parameters}
#' \item{getReferencePresent(object)}{to get the vector of logicals indicating if the
#' reference level associated with a model parameter is present}
#' }
#'
#' @rdname statModelAccessors
Expand Down Expand Up @@ -95,3 +97,9 @@ setMethod("getVcovUnscaled",
signature = "StatModel",
definition = function(object) object@params$vcovUnscaled
)

#' @rdname statModelAccessors
setMethod("getReferencePresent",
signature = "StatModel",
definition = function(object) object@params$referencePresent
)
4 changes: 3 additions & 1 deletion R/allGenerics.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ setGeneric("getVarPosterior", function(object) standardGeneric("getVarPosterior"
#' @export
setGeneric("getSigmaPosterior", function(object) standardGeneric("getSigmaPosterior"))
#' @export
setGeneric("getContrast", function(object, L) standardGeneric("getContrast"))
setGeneric("getReferencePresent", function(object) standardGeneric("getReferencePresent"))
#' @export
setGeneric("getContrast", function(object, ...) standardGeneric("getContrast"))
#' @export
setGeneric("getVcovUnscaled", function(object) standardGeneric("getVcovUnscaled"))
#' @export
Expand Down
27 changes: 15 additions & 12 deletions R/hypothesisTest-methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -122,16 +122,16 @@ setMethod(
adjust.method = "BH",
modelColumn = "msqrobModels",
resultsColumnNamePrefix = "",
overwrite = FALSE) {
overwrite = FALSE,
acceptDifferentReference = FALSE) {
if (!(modelColumn %in% colnames(rowData(object)))) stop("There is no column named \'", modelColumn, "\' with stored models of an msqrob fit in the rowData of the SummarizedExperiment object")
if (is.null(colnames(contrast)) & resultsColumnNamePrefix == "") resultsColumnNamePrefix <- "msqrobResults"
if (is.null(colnames(contrast)) & ncol(contrast) > 1) colnames(contrast) <- seq_len(ncol(contrast))
if ((sum(paste0(resultsColumnNamePrefix, colnames(contrast)) %in% colnames(rowData(object))) > 0) & !overwrite) stop("There is/are already column(s) with names starting with\'", resultsColumnNamePrefix, "\' in the rowData of the SummarizedExperiment object, set the argument overwrite=TRUE to replace the column(s) with the new results or use another name for the argument resultsColumnNamePrefix")
for (j in seq_len(ncol(contrast)))
{
contrHlp <- contrast[, j]
names(contrHlp) <- rownames(contrast)
rowData(object)[[paste0(resultsColumnNamePrefix, colnames(contrast)[j])]] <- topFeatures(rowData(object)[, modelColumn], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1)
contrHlp <- contrast[, j, drop = FALSE]
rowData(object)[[paste0(resultsColumnNamePrefix, colnames(contrast)[j])]] <- topFeatures(rowData(object)[, modelColumn], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1, acceptDifferentReference = acceptDifferentReference)
}
return(object)
}
Expand All @@ -149,7 +149,8 @@ setMethod(
adjust.method = "BH",
modelColumn = "msqrobHurdle",
resultsColumnNamePrefix = "hurdle_",
overwrite = FALSE) {
overwrite = FALSE,
acceptDifferentReference = FALSE) {
if (sum(paste0(modelColumn, c("Intensity", "Count")) %in% colnames(rowData(object))) != 2) stop("There are no columns for the models of the hurdle components in the rowData of the SummarizedExperiment")
if (is.null(colnames(contrast)) & resultsColumnNamePrefix == "hurdle_") resultsColumnNamePrefix <- "hurdleResults"
if (is.null(colnames(contrast)) & ncol(contrast) > 1) colnames(contrast) <- seq_len(ncol(contrast))
Expand All @@ -158,8 +159,8 @@ setMethod(
{
contrHlp <- contrast[, j]
names(contrHlp) <- rownames(contrast)
intensityComponent <- topFeatures(rowData(object)[, paste0(modelColumn, "Intensity")], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1)
countComponent <- topFeatures(rowData(object)[, paste0(modelColumn, "Count")], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1)
intensityComponent <- topFeatures(rowData(object)[, paste0(modelColumn, "Intensity")], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1, acceptDifferentReference = acceptDifferentReference)
countComponent <- topFeatures(rowData(object)[, paste0(modelColumn, "Count")], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1, acceptDifferentReference = acceptDifferentReference)

sam <- cbind(
intensityComponent[, seq_len(5)],
Expand Down Expand Up @@ -205,7 +206,8 @@ setMethod(
adjust.method = "BH",
modelColumn = "msqrobModels",
resultsColumnNamePrefix = "",
overwrite = FALSE) {
overwrite = FALSE,
acceptDifferentReference = FALSE) {
if (is.null(object[[i]])) stop("QFeatures object does not contain an assay with the name ", i)
if (!(modelColumn %in% colnames(rowData(object[[i]])))) stop("There is no column named \'", modelColumn, "\' with stored models of an msqrob fit in the rowData of assay ", i, "of the QFeatures object.")
if (is.null(colnames(contrast)) & resultsColumnNamePrefix == "") resultsColumnNamePrefix <- "msqrobResults"
Expand All @@ -215,7 +217,7 @@ setMethod(
{
contrHlp <- contrast[, j]
names(contrHlp) <- rownames(contrast)
rowData(object[[i]])[[paste0(resultsColumnNamePrefix, colnames(contrast)[j])]] <- topFeatures(rowData(object[[i]])[, modelColumn], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1)
rowData(object[[i]])[[paste0(resultsColumnNamePrefix, colnames(contrast)[j])]] <- topFeatures(rowData(object[[i]])[, modelColumn], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1, acceptDifferentReference = acceptDifferentReference)
}
return(object)
}
Expand All @@ -232,7 +234,8 @@ setMethod(
adjust.method = "BH",
modelColumn = "msqrobHurdle",
resultsColumnNamePrefix = "hurdle_",
overwrite = FALSE) {
overwrite = FALSE,
acceptDifferentReference = FALSE) {
if (is.null(object[[i]])) stop("QFeatures object does not contain an assay with the name ", i)
if (sum(paste0(modelColumn, c("Intensity", "Count")) %in% colnames(rowData(object[[i]]))) != 2) stop("There are no columns for the models of the hurdle components in the rowData of assay ", i, "of the QFeatures object.")
if (is.null(colnames(contrast)) & resultsColumnNamePrefix == "hurdle_") resultsColumnNamePrefix <- "hurdleResults"
Expand All @@ -242,8 +245,8 @@ setMethod(
{
contrHlp <- contrast[, j]
names(contrHlp) <- rownames(contrast)
intensityComponent <- topFeatures(rowData(object[[i]])[, paste0(modelColumn, "Intensity")], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1)
countComponent <- topFeatures(rowData(object[[i]])[, paste0(modelColumn, "Count")], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1)
intensityComponent <- topFeatures(rowData(object[[i]])[, paste0(modelColumn, "Intensity")], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1, acceptDifferentReference = acceptDifferentReference)
countComponent <- topFeatures(rowData(object[[i]])[, paste0(modelColumn, "Count")], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1, acceptDifferentReference = acceptDifferentReference)

sam <- cbind(
intensityComponent[, seq_len(5)],
Expand Down
134 changes: 134 additions & 0 deletions R/msqrob-utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,142 @@ makeContrast <- function(contrasts, parameterNames) {
return(t(.chrlinfct2matrix(contrasts, parameterNames)$K))
}


#' Check for presence of reference levels
#' @description Check if the reference levels of the factors in the model are present for a specific feature.
#'
#' @param y A numeric vector with the feature values for each sample.
#'
#' @param data A `DataFrame` with information on the design. It has
#' the same number of rows as the number of columns (samples) of
#' `y`.
#'
#' @param paramNames A `character` vector with the names of the parameters in the model.
#'
#' @param factorLevels A list with the levels of the factors in the model. The names of the list
#' should correspond to the factor variables present in the model.
#'
#' @return A logical vector indicating for each parameter (that comes from a factor variable) in the model
#' if his reference level has values for the current feature.
#'
#' @rdname checkReference
#'
checkReference <- function(y, data, paramNames, factorLevels) {
if (length(factorLevels) == 0) {
return(logical(0))
}
factors <- factorLevels
subset <- droplevels(data[!is.na(y), names(factors), drop = FALSE])
paramSplit <- list()
for (x in names(factors)) {
paramSplit[[x]] <- paramNames[grep(x, paramNames)]
}
factor_status <- unlist(lapply(names(factors), function(x) {
factorLevels <- factors[[x]]
referenceRef <- factorLevels[1]
noIntercept <- all(paste0(x, factorLevels) %in% paramNames)
refPres <- referenceRef %in% levels(subset[, x])
return(noIntercept || refPres)
}))
names(factor_status) <- names(factors)

referencePresent <- propagateFalseStatus(paramSplit, factor_status)

return(unlist(referencePresent))
}

#' Get the levels of the factors in the model
#'
#' @description Get the levels of the factors in the model associated with the factors
#' names.
#'
#' @param data A `DataFrame` with information on the design. Contains the variables that are used in the model.
#'
#' @param formula A `formula` object that specifies the model.
#'
#' @return A list with the levels of the factors in the model. The names of the list
#' correspond to the factor variables present in the model.
#'
#' @rdname getFormulaFactors
#'
getFormulaFactors <- function(formula, dataFrame) {
vars <- all.vars(formula)
if(length(vars) == 0) {
return(character(0))
}
nam <- vars[sapply(vars, function(x) is.factor(dataFrame[, x]) || is.character(dataFrame[, x]) )]
res <- lapply(nam, function(x) {
levels(as.factor(dataFrame[, x]))
})
names(res) <- nam
return(res)
}

#' Check if the reference levels associated with the contrast are present
#' @description Check if the reference levels associated with the contrast are present
#'
#' @param referencePresent A logical vector indicating for each parameter in the model
#' if his reference level has values for the current feature.
#'
#' @param L A numeric contrast matrix with rownames that equal the model parameters
#' that are involved in the contrasts
#'
#' @param acceptDifferentReference `boolean(1)` to indicate if the contrasts that involve
#' parameters with modified reference levels are returned.
#'
#' @return A boolean
#'
#' @rdname referenceContrast
#'
referenceContrast <- function(referencePresent, L, acceptDifferentReference) {
acceptDifferentReference || is.null(referencePresent) || !any(!referencePresent[rownames(L)])
}

checkFullRank <- function(modelMatrix) {
return(qr(modelMatrix)$rank == ncol(modelMatrix))
}


##### None exported functions from multcomp package is included here to
##### During R and Bioc checks

propagateFalseStatus <- function(vectors, statuses) {
# Initialize a flag to track if any status has changed
has_changed <- TRUE

# Use a while loop that runs until no statuses change
while (has_changed) {
# Collect all elements from vectors marked as FALSE
false_elements <- unique(unlist(vectors[statuses == FALSE]))

# Initialize new statuses as the current statuses
new_statuses <- statuses

# Check each vector and update status if it contains any false element
for (i in seq_along(vectors)) {
if (statuses[i] == TRUE && any(vectors[[i]] %in% false_elements)) {
new_statuses[i] <- FALSE # Change status to FALSE
}
}

# Check if any status has changed
has_changed <- !all(new_statuses == statuses)

# Update statuses with new values
statuses <- new_statuses
}
# Create a named vector of unique elements with their final statuses
all_elements <- unique(unlist(vectors))
element_status <- setNames(rep(TRUE, length(all_elements)), all_elements)

# Set the final status of each element based on the propagated statuses
for (i in seq_along(vectors)) {
element_status[vectors[[i]]] <- statuses[i]
}

return(element_status)
}


.chrlinfct2matrix <- function(ex, var) {
if (!is.character(ex)) {
Expand Down Expand Up @@ -668,3 +801,4 @@ makeContrast <- function(contrasts, parameterNames) {
ex, " to character"
)
}

30 changes: 20 additions & 10 deletions R/msqrob.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,22 +57,29 @@ msqrobLm <- function(y,
robust = TRUE,
maxitRob = 5) {
myDesign <- model.matrix(formula, data)
missingSamples <- colSums(!is.na(y)) == 0
subsettedDesign <- myDesign[!missingSamples, , drop = FALSE]
stopifnot("The provided model must be full rank" = checkFullRank(subsettedDesign))
paramNames <- colnames(myDesign)
factorLevels <- getFormulaFactors(formula, data)
models <- apply(y, 1,
function(y, design) {
## computatability check
obs <- is.finite(y)
type <- "fitError"
referencePresent <- checkReference(y, data, paramNames, factorLevels)
model <- list(
coefficients = NA, vcovUnscaled = NA,
sigma = NA, df.residual = NA, w = NA
referencePresent = referencePresent, coefficients = NA,
vcovUnscaled = NA, sigma = NA,
df.residual = NA, w = NA
)

if (sum(obs) > 0) {
## subset to finite observations, attention with R column switching
X <- design[obs, , drop = FALSE]
X <- X[,colMeans(X == 0) != 1 , drop = FALSE]
qrX <- qr(X)
X <- X[, qrX$pivot[seq_len(qrX$rank)], drop = FALSE]
y <- y[obs]
colnames_orig <- colnames(design)

if (robust) {
## use robust regression from MASS package, "M" estimation is used
Expand Down Expand Up @@ -108,16 +115,17 @@ msqrobLm <- function(y,
}

if (type != "fitError") {
coef <- rep(NA, length(colnames_orig))
names(coef) <- colnames_orig
coef <- rep(NA, length(paramNames))
names(coef) <- paramNames
coef[names(mod$coef)] <- mod$coef
vcovUnscaled <- matrix(NA, nrow =length(colnames_orig), ncol = length(colnames_orig))
rownames(vcovUnscaled) <- colnames(vcovUnscaled) <- colnames_orig
vcovUnscaled <- matrix(NA, nrow =length(paramNames), ncol = length(paramNames))
rownames(vcovUnscaled) <- colnames(vcovUnscaled) <- paramNames
vcovUnscaled[names(mod$coef), names(mod$coef)] <- msqrob2:::.vcovUnscaled(mod)

model <- list(
coefficients = mod$coef,
vcovUnscaled = .vcovUnscaled(mod),
referencePresent = referencePresent,
coefficients = coef,
vcovUnscaled = vcovUnscaled,
sigma = sigma,
df.residual = df.residual,
w = w
Expand Down Expand Up @@ -770,3 +778,5 @@ msqrobGlm <- function(y,

return(models)
}


Loading

0 comments on commit 285af81

Please sign in to comment.