Skip to content

Commit

Permalink
Bugfixes in heterzygosity detemination
Browse files Browse the repository at this point in the history
  • Loading branch information
GermanDemidov committed Nov 27, 2018
1 parent aae021e commit 90f0a56
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 23 deletions.
18 changes: 12 additions & 6 deletions somatic/bafSegmentation.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ returnBAlleleFreqs <- function(healthySampleName, tumorSampleName, folderBAF, be
colnames(healthySample) <- c("chr", "start", "end", "Feature", "freq", "depth")
colnames(tumorSample) <- c("chr", "start", "end", "Feature", "freq", "depth")

healthySample = healthySample[which(healthySample[,6] > max(median(healthySample[,6]) / 10, 40) & healthySample[,6] < quantile(healthySample[,6], 0.975)),]
tumorSample = tumorSample[which(tumorSample[,6] > max(median(tumorSample[,6]) / 10, 40)),]
healthySample = healthySample[which(healthySample[,6] > max(median(healthySample[,6]) / 10, 30) & healthySample[,6] < quantile(healthySample[,6], 0.975)),]
tumorSample = tumorSample[which(tumorSample[,6] > max(median(tumorSample[,6]) / 10, 30)),]

indicesOfSNVsToRemove <- c()
currentChrom = "chrN"
Expand Down Expand Up @@ -96,8 +96,14 @@ returnBAlleleFreqs <- function(healthySampleName, tumorSampleName, folderBAF, be
tumorSample = tumorSample[order(tumorSample[,1],tumorSample[,2] ),]
healthySample = healthySample[order(healthySample[,1], healthySample[,2]),]


heterozygousPositions <- apply(healthySample[,5:6], 1, function(vec) {determineHeterozygousPositions(as.numeric(vec[1]), as.numeric(vec[2]))})
# Determining positions which are heterozygous
potentiallyHeterozygous = as.numeric(healthySample[which(!(healthySample[,1] %in% c("chrX","chrY","X","Y")) & as.numeric(healthySample[,6]) > 30),5])
potentiallyHeterozygous <- potentiallyHeterozygous[which(potentiallyHeterozygous > 0.25 & potentiallyHeterozygous < 0.75)]
if (!is.na(potentiallyHeterozygous)) {
heterozygousAlleleShift <- median(potentiallyHeterozygous)
} else { heterozygousAlleleShift = 0.48 }
print(paste("Heterozygous allele shift for particular sample", heterozygousAlleleShift))
heterozygousPositions <- apply(healthySample[,5:6], 1, function(vec) {determineHeterozygousPositions(as.numeric(vec[1]), as.numeric(vec[2]), heterozygousAlleleShift)})
healthySample = healthySample[heterozygousPositions, ]
tumorSample = tumorSample[heterozygousPositions, ]

Expand Down Expand Up @@ -227,8 +233,8 @@ returnAllowedChromsBaf <- function(pairs, normalCov, tumorCov, inputFolderBAF, b
} else {
print("BAF did not work well")
print(i)
print(sampleNames1)
print(sampleName2)
print(colnames(tumorCov)[sampleName2])
print(colnames(normalCov)[i])
}
}
}
Expand Down
13 changes: 1 addition & 12 deletions somatic/firstStep.R
Original file line number Diff line number Diff line change
Expand Up @@ -925,18 +925,7 @@ for (sam_no in 1:ncol(matrixOfLogFold)) {
bAlleleFreqsNormal <- bAlleleFreqsAllSamples[[position]][[ strsplit(colnames(matrixOfLogFold)[sam_no], split="-")[[1]][2] ]]

# calculate median correction factor
allowedChromosomesAutosomesOnly = c()
for (allowedArm in allowedChromsBafSample) {
splittedValue <- strsplit(allowedArm, "-")
chrom = splittedValue[[1]][1]
if (!chrom %in% c("chrY", "Y", "chrX", "X")) {
startOfArm = as.numeric(splittedValue[[1]][2])
endOfArm = as.numeric(splittedValue[[1]][3])
allowedChromosomesAutosomesOnly = union(allowedChromosomesAutosomesOnly, which(bAlleleFreqsTumor[,1] == chrom &
bAlleleFreqsTumor[,2] >= startOfArm &
bAlleleFreqsTumor[,3] <= endOfArm))
}
}
allowedChromosomesAutosomesOnly = which(!bAlleleFreqsTumor[,1] %in% c("X","Y","chrX","chrY"))
multiplierOfSNVsDueToMapping <- median(as.numeric(bAlleleFreqsNormal[allowedChromosomesAutosomesOnly,5]))
print("Multiplier of allele balance of a particular sample")
print(multiplierOfSNVsDueToMapping)
Expand Down
9 changes: 4 additions & 5 deletions somatic/helpersBalleleFreq.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ likelihoodOfSNV <- function(a, b, p) {

value = dbinom(a, size=b, prob=p)

if (is.nan(value) | value < 10**-100) return(10**-100)
if (is.nan(value) | value < 10**-50) return(10**-50)
return((value))
}

Expand All @@ -20,10 +20,9 @@ passPropTest <- function(numOne, numTwo, refOne, refTwo) {
}
}

determineHeterozygousPositions <- function(freq, depth) {
prob = pbinom(round(freq * depth), prob=0.48, size=depth)
if ((prob > 0.01 & prob < 0.99 & depth < 100) |
(prob > 0.001 & prob < 0.999 & depth >= 100)) {
determineHeterozygousPositions <- function(freq, depth, probAB=0.48) {
prob = pbinom(round(freq * depth), prob=probAB, size=depth)
if ((prob > 0.01 & prob < 0.99)) {
return(T)
} else {
return(F)
Expand Down

0 comments on commit 90f0a56

Please sign in to comment.