Skip to content

Commit

Permalink
Allowed chromosomes replaced with chromosome arms
Browse files Browse the repository at this point in the history
  • Loading branch information
GermanDemidov committed Nov 26, 2018
1 parent 60a9cad commit 3637b6f
Show file tree
Hide file tree
Showing 4 changed files with 128 additions and 85 deletions.
68 changes: 45 additions & 23 deletions somatic/bafSegmentation.R
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ returnBAlleleFreqs <- function(healthySampleName, tumorSampleName, folderBAF, be
}


determineAllowedChroms <- function(healthySample, tumorSample, healthySampleName, tumorSampleName, folderBAF) {
determineAllowedChroms <- function(healthySample, tumorSample, healthySampleName, tumorSampleName, folderBAF, leftBorders, rightBorders, endsOfChroms) {
vecOfPvalues <- rep(0, nrow(tumorSample))
for (i in 1:nrow(tumorSample)) {
refAleleTum <- (round(as.numeric(tumorSample[i,5]) * as.numeric(tumorSample[i,6])))
Expand All @@ -134,37 +134,58 @@ determineAllowedChroms <- function(healthySample, tumorSample, healthySampleName
thresholdOfNonNormalVariant = 0.01 # pvalue, if lower = BAF is different in normal and tumor
chroms = paste0("chr", 1:22)
chroms = c(chroms, "chrX")
numberOfSNVs = rep(0, length(chroms))
evaluated = rep(0, length(chroms))
numberOfSNVs = rep(0, 2 * length(left_borders))
evaluated = rep(0, 2 * length(left_borders))
namesOfChromArms <- rep(0, 2 * length(left_borders))
plotLabels <- rep(0, 2 * length(left_borders))
counter = 1
for (chrom in chroms) {
rowsFromChrom = which(healthySample[,1] == chrom)
if (length(rowsFromChrom) > 1) {
evaluationOfChorm = (length(which(pvalues[rowsFromChrom] < thresholdOfNonNormalVariant))) / (length(rowsFromChrom))
} else {
evaluationOfChorm = 1.0
for (l in 1:length(left_borders)) {
chrom = names(left_borders)[l]
start = left_borders[[l]]
end = right_borders[[l]]
endOfChrom = endsOfChroms[[l]]
for (k in 1:2) {
if (k == 1) {
rowsFromChrom = which(healthySample[,1] == chrom & healthySample[,2] <= as.numeric(start) )
namesOfChromArms[counter] = paste(chrom, 0, start, sep="-")
plotLabels[counter] = paste(chrom, "left #", length(rowsFromChrom), "SNVs")
} else {
rowsFromChrom = which(healthySample[,1] == chrom & healthySample[,2] >= as.numeric(end) )
namesOfChromArms[counter] = paste(chrom, end, endOfChrom, sep="-")
plotLabels[counter] = paste(chrom, "right #", length(rowsFromChrom), "SNVs")
}

if (length(rowsFromChrom) > 5) {
evaluationOfChorm = (length(which(pvalues[rowsFromChrom] < thresholdOfNonNormalVariant))) / (length(rowsFromChrom))
} else {
evaluationOfChorm = 1.0
}

evaluated[counter] = evaluationOfChorm
numberOfSNVs[counter] = length(rowsFromChrom)

counter = counter + 1
}

evaluated[counter] = evaluationOfChorm
numberOfSNVs[counter] = length(rowsFromChrom)

counter = counter + 1
}
indicesOfAllowedChroms = which(evaluated < max(sort(evaluated)[3], 0.1))
colVec <- rep("red", length(chroms))
indicesOfAllowedButNotBestChroms = which(evaluated > 0.1 & evaluated < sort(evaluated)[3])
indicesOfAllowedChroms = which(evaluated < max(sort(evaluated)[3], 0.075))
colVec <- rep("red", length(evaluated))
indicesOfAllowedButNotBestChroms = which(evaluated > 0.075 & evaluated < sort(evaluated)[5])
colVec[indicesOfAllowedChroms] = "darkgreen"
colVec[indicesOfAllowedButNotBestChroms] = "darkorange"
names(evaluated) = paste(chroms, "\n", numberOfSNVs)
names(evaluated) = namesOfChromArms
allowedChroms = names(evaluated)[indicesOfAllowedChroms]



subDir = paste0(tumorSampleName, "_", healthySampleName)
dir.create(file.path(folderBAF, "result", subDir))
setwd(file.path(folderBAF, "result", subDir))
png(paste0(tumorSampleName, "_", healthySampleName, ".png"), width=1200, height=600)
barplot(evaluated, col=colVec, main=paste(tumorSampleName, healthySampleName), ylim=c(0,1))
png(paste0(tumorSampleName, "_", healthySampleName, ".png"), width=1400, height=600)
op <- par(mar=c(11,4,4,2))
x <- barplot(evaluated, col=colVec, main=paste(tumorSampleName, healthySampleName), ylim=c(0,1), xaxt="n")
text(x-2.5, par("usr")[3] - 0.15, labels = plotLabels, srt = 45, pos = 1, xpd = TRUE)
par(mar=c(5.1, 4.1, 4.1, 2.1), mgp=c(3, 1, 0), las=0)
dev.off()
allowedChroms = chroms[indicesOfAllowedChroms]


trackFilename = paste0(tumorSampleName, "_", healthySampleName, ".igv")
Expand All @@ -187,7 +208,7 @@ determineAllowedChroms <- function(healthySample, tumorSample, healthySampleName
}


returnAllowedChromsBaf <- function(pairs, normalCov, tumorCov, inputFolderBAF, bedFileForFiltering) {
returnAllowedChromsBaf <- function(pairs, normalCov, tumorCov, inputFolderBAF, bedFileForFiltering, leftBorders, rightBorders, endsOfChroms) {
allowedChromsBaf <- list()
bAlleleFreqsAllSamples <- list()
for (i in 1:ncol(normalCov)) {
Expand All @@ -200,7 +221,8 @@ returnAllowedChromsBaf <- function(pairs, normalCov, tumorCov, inputFolderBAF, b
if (!is.null(bAlleleFreqs[[1]])) {
allowedChromsBaf[[paste(colnames(tumorCov)[sampleName2], colnames(normalCov)[i], sep="-")]] = determineAllowedChroms(bAlleleFreqs[[1]], bAlleleFreqs[[2]],
colnames(normalCov)[i], colnames(tumorCov)[sampleName2],
inputFolderBAF)
inputFolderBAF,
leftBorders, rightBorders, endsOfChroms)
bAlleleFreqsAllSamples[[paste(colnames(tumorCov)[sampleName2], colnames(normalCov)[i], sep="-")]] = bAlleleFreqs
} else {
print("BAF did not work well")
Expand Down
96 changes: 48 additions & 48 deletions somatic/cytobands.txt
Original file line number Diff line number Diff line change
@@ -1,48 +1,48 @@
chr1 121500000 125000000 p11.1 acen
chr1 125000000 128900000 q11 acen
chr10 38000000 40200000 p11.1 acen
chr10 40200000 42300000 q11.1 acen
chr11 51600000 53700000 p11.11 acen
chr11 53700000 55700000 q11 acen
chr12 33300000 35800000 p11.1 acen
chr12 35800000 38200000 q11 acen
chr13 16300000 17900000 p11.1 acen
chr13 17900000 19500000 q11 acen
chr14 16100000 17600000 p11.1 acen
chr14 17600000 19100000 q11.1 acen
chr15 15800000 19000000 p11.1 acen
chr15 19000000 20700000 q11.1 acen
chr16 34600000 36600000 p11.1 acen
chr16 36600000 38600000 q11.1 acen
chr17 22200000 24000000 p11.1 acen
chr17 24000000 25800000 q11.1 acen
chr18 15400000 17200000 p11.1 acen
chr18 17200000 19000000 q11.1 acen
chr19 24400000 26500000 p11 acen
chr19 26500000 28600000 q11 acen
chr2 90500000 93300000 p11.1 acen
chr2 93300000 96800000 q11.1 acen
chr20 25600000 27500000 p11.1 acen
chr20 27500000 29400000 q11.1 acen
chr21 10900000 13200000 p11.1 acen
chr21 13200000 14300000 q11.1 acen
chr22 12200000 14700000 p11.1 acen
chr22 14700000 17900000 q11.1 acen
chr3 87900000 91000000 p11.1 acen
chr3 91000000 93900000 q11.1 acen
chr4 48200000 50400000 p11 acen
chr4 50400000 52700000 q11 acen
chr5 46100000 48400000 p11 acen
chr5 48400000 50700000 q11.1 acen
chr6 58700000 61000000 p11.1 acen
chr6 61000000 63300000 q11.1 acen
chr7 58000000 59900000 p11.1 acen
chr7 59900000 61700000 q11.1 acen
chr8 43100000 45600000 p11.1 acen
chr8 45600000 48100000 q11.1 acen
chr9 47300000 49000000 p11.1 acen
chr9 49000000 50700000 q11 acen
chrX 58100000 60600000 p11.1 acen
chrX 60600000 63000000 q11.1 acen
chrY 11600000 12500000 p11.1 acen
chrY 12500000 13400000 q11.1 acen
chr1 121500000 125000000 p11.1 249250621
chr1 125000000 128900000 q11 249250621
chr10 38000000 40200000 p11.1 135534747
chr10 40200000 42300000 q11.1 135534747
chr11 51600000 53700000 p11.11 135006516
chr11 53700000 55700000 q11 135006516
chr12 33300000 35800000 p11.1 133851895
chr12 35800000 38200000 q11 133851895
chr13 16300000 17900000 p11.1 115169878
chr13 17900000 19500000 q11 115169878
chr14 16100000 17600000 p11.1 107349540
chr14 17600000 19100000 q11.1 107349540
chr15 15800000 19000000 p11.1 102531392
chr15 19000000 20700000 q11.1 102531392
chr16 34600000 36600000 p11.1 90354753
chr16 36600000 38600000 q11.1 90354753
chr17 22200000 24000000 p11.1 81195210
chr17 24000000 25800000 q11.1 81195210
chr18 15400000 17200000 p11.1 78077248
chr18 17200000 19000000 q11.1 78077248
chr19 24400000 26500000 p11 59128983
chr19 26500000 28600000 q11 59128983
chr2 90500000 93300000 p11.1 243199373
chr2 93300000 96800000 q11.1 243199373
chr20 25600000 27500000 p11.1 63025520
chr20 27500000 29400000 q11.1 63025520
chr21 10900000 13200000 p11.1 48129895
chr21 13200000 14300000 q11.1 48129895
chr22 12200000 14700000 p11.1 51304566
chr22 14700000 17900000 q11.1 51304566
chr3 87900000 91000000 p11.1 198022430
chr3 91000000 93900000 q11.1 198022430
chr4 48200000 50400000 p11 191154276
chr4 50400000 52700000 q11 191154276
chr5 46100000 48400000 p11 180915260
chr5 48400000 50700000 q11.1 180915260
chr6 58700000 61000000 p11.1 171115067
chr6 61000000 63300000 q11.1 171115067
chr7 58000000 59900000 p11.1 159138663
chr7 59900000 61700000 q11.1 159138663
chr8 43100000 45600000 p11.1 146364022
chr8 45600000 48100000 q11.1 146364022
chr9 47300000 49000000 p11.1 141213431
chr9 49000000 50700000 q11 141213431
chrX 58100000 60600000 p11.1 155270560
chrX 60600000 63000000 q11.1 155270560
chrY 11600000 12500000 p11.1 59373566
chrY 12500000 13400000 q11.1 59373566
21 changes: 8 additions & 13 deletions somatic/firstStep.R
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,11 @@ if (!is.null(opt$tumorSample)) {
stopifnot(!opt$tumorSample %in% pairs[coordOfNormalInPairs,1])
}

lstOfChromBorders <- getCytobands("cytobands.txt")
left_borders <- lstOfChromBorders[[1]]
right_borders <- lstOfChromBorders[[2]]
ends_of_chroms <- lstOfChromBorders[[3]]

if (frameworkDataTypes == "covdepthBAF") {
setwd(opt$folderWithScript)
source("bafSegmentation.R",local=TRUE)
Expand All @@ -234,7 +239,7 @@ if (frameworkDataTypes == "covdepthBAF") {
} else {
pairsForBAF = pairs
}
listOfValues <- returnAllowedChromsBaf(pairsForBAF, normal, tumor, opt$bafFolder, bedFile)
listOfValues <- returnAllowedChromsBaf(pairsForBAF, normal, tumor, opt$bafFolder, bedFile, left_borders, right_borders, ends_of_chroms)
allowedChromsBaf <- listOfValues[[1]]
bAlleleFreqsAllSamples <- listOfValues[[2]]
}
Expand Down Expand Up @@ -347,7 +352,7 @@ sdsOfGermlineSamples <- apply(coverage.normalised[autosomes,], 2, determineSDsOf

#locationsShiftedLogFoldChanges <- sweep(matrixOfLogFold, 1, locations)

listOfVarianceAndMultiplicator <- esimtateVarianceFromSampleNoise(sdsOfGermlineSamples, 30000)
listOfVarianceAndMultiplicator <- esimtateVarianceFromSampleNoise(sdsOfGermlineSamples, 100000)
esimtatedVarianceFromSampleNoise <- listOfVarianceAndMultiplicator[[2]]
multiplicator <- listOfVarianceAndMultiplicator[[1]]

Expand All @@ -359,17 +364,7 @@ cn_states <- 0:20



cytobands <- read.table("cytobands.txt",stringsAsFactors = F, header = F, sep="\t", row.names=NULL)
left_borders <- vector(mode="list", length=nrow(cytobands)/2)
right_borders <- vector(mode="list", length=nrow(cytobands)/2)
odd_numbers <- seq(from=1, to=nrow(cytobands), by=2)
even_numbers <- seq(from=2, to=nrow(cytobands), by=2)
names(left_borders) = as.vector(t(cytobands[,1]))[odd_numbers]
names(right_borders) = as.vector(t(cytobands[,1]))[even_numbers]
for (i in 1:length(odd_numbers)) {
left_borders[[i]] = cytobands[odd_numbers[i], 2]
right_borders[[i]] = cytobands[even_numbers[i], 3]
}




Expand Down
28 changes: 27 additions & 1 deletion somatic/generalHelpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,23 @@ return_norm_likelik <- function(x) {
return(vect_of_norm_likeliks[x])
}

getCytobands <- function(fileName) {
cytobands <- read.table(fileName,stringsAsFactors = F, header = F, sep="\t", row.names=NULL)
left_borders <- vector(mode="list", length=nrow(cytobands)/2)
right_borders <- vector(mode="list", length=nrow(cytobands)/2)
ends_of_chroms <- vector(mode="list", length=nrow(cytobands)/2)
odd_numbers <- seq(from=1, to=nrow(cytobands), by=2)
even_numbers <- seq(from=2, to=nrow(cytobands), by=2)
names(left_borders) = as.vector(t(cytobands[,1]))[odd_numbers]
names(right_borders) = as.vector(t(cytobands[,1]))[even_numbers]
names(ends_of_chroms) = as.vector(t(cytobands[,1]))[even_numbers]
for (i in 1:length(odd_numbers)) {
left_borders[[i]] = cytobands[odd_numbers[i], 2]
right_borders[[i]] = cytobands[even_numbers[i], 3]
ends_of_chroms[[i]] = cytobands[even_numbers[i], 5]
}
return(list(left_borders, right_borders, ends_of_chroms))
}


EstimateModeSimple <- function(x) {
Expand Down Expand Up @@ -112,7 +129,16 @@ gc_and_sample_size_normalise <- function(info, coverages, averageCoverage=T, all
tumorName = colnames(coverages)[j]
position <- which(startsWith(names(allowedChroms), prefix=tumorName))
if (length(position) == 1) {
allowedChromosomesAutosomesOnly = which(!info[,1] %in% c("chrX", "chrY") & info[,1] %in% allowedChroms[[position]])
allowedChromosomesAutosomesOnly = c()
for (allowedArm in allowedChroms[[position]]) {
splittedValue <- strsplit(allowedArm, "-")
chrom = splittedValue[[1]][1]
startOfArm = as.numeric(splittedValue[[1]][2])
endOfArm = as.numeric(splittedValue[[1]][3])
allowedChromosomesAutosomesOnly = union(allowedChromosomesAutosomesOnly, which(info[,1] == chrom &
info[,2] >= startOfArm &
info[,3] <= endOfArm))
}
} else {
allowedChromosomesAutosomesOnly = which(!info[,1] %in% c("chrX", "chrY"))
}
Expand Down

0 comments on commit 3637b6f

Please sign in to comment.