diff --git a/somatic/bafSegmentation.R b/somatic/bafSegmentation.R index 254a7cb..0b56b92 100644 --- a/somatic/bafSegmentation.R +++ b/somatic/bafSegmentation.R @@ -50,8 +50,8 @@ returnBAlleleFreqs <- function(healthySampleName, tumorSampleName, folderBAF, be colnames(healthySample) <- c("chr", "start", "end", "Feature", "freq", "depth") colnames(tumorSample) <- c("chr", "start", "end", "Feature", "freq", "depth") - healthySample = healthySample[which(healthySample[,6] > max(median(healthySample[,6]) / 10, 40) & healthySample[,6] < quantile(healthySample[,6], 0.975)),] - tumorSample = tumorSample[which(tumorSample[,6] > max(median(tumorSample[,6]) / 10, 40)),] + healthySample = healthySample[which(healthySample[,6] > max(median(healthySample[,6]) / 10, 30) & healthySample[,6] < quantile(healthySample[,6], 0.975)),] + tumorSample = tumorSample[which(tumorSample[,6] > max(median(tumorSample[,6]) / 10, 30)),] indicesOfSNVsToRemove <- c() currentChrom = "chrN" @@ -96,8 +96,14 @@ returnBAlleleFreqs <- function(healthySampleName, tumorSampleName, folderBAF, be tumorSample = tumorSample[order(tumorSample[,1],tumorSample[,2] ),] healthySample = healthySample[order(healthySample[,1], healthySample[,2]),] - - heterozygousPositions <- apply(healthySample[,5:6], 1, function(vec) {determineHeterozygousPositions(as.numeric(vec[1]), as.numeric(vec[2]))}) + # Determining positions which are heterozygous + potentiallyHeterozygous = as.numeric(healthySample[which(!(healthySample[,1] %in% c("chrX","chrY","X","Y")) & as.numeric(healthySample[,6]) > 30),5]) + potentiallyHeterozygous <- potentiallyHeterozygous[which(potentiallyHeterozygous > 0.25 & potentiallyHeterozygous < 0.75)] + if (!is.na(potentiallyHeterozygous)) { + heterozygousAlleleShift <- median(potentiallyHeterozygous) + } else { heterozygousAlleleShift = 0.48 } + print(paste("Heterozygous allele shift for particular sample", heterozygousAlleleShift)) + heterozygousPositions <- apply(healthySample[,5:6], 1, function(vec) {determineHeterozygousPositions(as.numeric(vec[1]), as.numeric(vec[2]), heterozygousAlleleShift)}) healthySample = healthySample[heterozygousPositions, ] tumorSample = tumorSample[heterozygousPositions, ] @@ -227,8 +233,8 @@ returnAllowedChromsBaf <- function(pairs, normalCov, tumorCov, inputFolderBAF, b } else { print("BAF did not work well") print(i) - print(sampleNames1) - print(sampleName2) + print(colnames(tumorCov)[sampleName2]) + print(colnames(normalCov)[i]) } } } diff --git a/somatic/firstStep.R b/somatic/firstStep.R index d7aab67..871a89b 100644 --- a/somatic/firstStep.R +++ b/somatic/firstStep.R @@ -925,18 +925,7 @@ for (sam_no in 1:ncol(matrixOfLogFold)) { bAlleleFreqsNormal <- bAlleleFreqsAllSamples[[position]][[ strsplit(colnames(matrixOfLogFold)[sam_no], split="-")[[1]][2] ]] # calculate median correction factor - allowedChromosomesAutosomesOnly = c() - for (allowedArm in allowedChromsBafSample) { - splittedValue <- strsplit(allowedArm, "-") - chrom = splittedValue[[1]][1] - if (!chrom %in% c("chrY", "Y", "chrX", "X")) { - startOfArm = as.numeric(splittedValue[[1]][2]) - endOfArm = as.numeric(splittedValue[[1]][3]) - allowedChromosomesAutosomesOnly = union(allowedChromosomesAutosomesOnly, which(bAlleleFreqsTumor[,1] == chrom & - bAlleleFreqsTumor[,2] >= startOfArm & - bAlleleFreqsTumor[,3] <= endOfArm)) - } - } + allowedChromosomesAutosomesOnly = which(!bAlleleFreqsTumor[,1] %in% c("X","Y","chrX","chrY")) multiplierOfSNVsDueToMapping <- median(as.numeric(bAlleleFreqsNormal[allowedChromosomesAutosomesOnly,5])) print("Multiplier of allele balance of a particular sample") print(multiplierOfSNVsDueToMapping) diff --git a/somatic/helpersBalleleFreq.R b/somatic/helpersBalleleFreq.R index 4f8b7c4..546a12a 100644 --- a/somatic/helpersBalleleFreq.R +++ b/somatic/helpersBalleleFreq.R @@ -3,7 +3,7 @@ likelihoodOfSNV <- function(a, b, p) { value = dbinom(a, size=b, prob=p) - if (is.nan(value) | value < 10**-100) return(10**-100) + if (is.nan(value) | value < 10**-50) return(10**-50) return((value)) } @@ -20,10 +20,9 @@ passPropTest <- function(numOne, numTwo, refOne, refTwo) { } } -determineHeterozygousPositions <- function(freq, depth) { - prob = pbinom(round(freq * depth), prob=0.48, size=depth) - if ((prob > 0.01 & prob < 0.99 & depth < 100) | - (prob > 0.001 & prob < 0.999 & depth >= 100)) { +determineHeterozygousPositions <- function(freq, depth, probAB=0.48) { + prob = pbinom(round(freq * depth), prob=probAB, size=depth) + if ((prob > 0.01 & prob < 0.99)) { return(T) } else { return(F)