Skip to content

Commit

Permalink
select Montitoring variants: add support for somatic dragen vcf format
Browse files Browse the repository at this point in the history
  • Loading branch information
Ott-Alexander committed Oct 18, 2023
1 parent 51d9fee commit ece25ec
Showing 1 changed file with 47 additions and 21 deletions.
68 changes: 47 additions & 21 deletions select_monitoring_variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,15 +113,29 @@ def evaluate_variants(self, reference_fasta):
else:
fh_vcf_in = open(self.input_vcf_file)

caller = "unknown";

with fh_vcf_in as vcf_file:
for vcf_line in vcf_file:
vcf_line = vcf_line.rstrip('\n')

# VCF Header lines are simply printed to output vcf file
if vcf_line[0] == '#':
fh_vcf.write(vcf_line + '\n')

#check for caller:

if vcf_line.startswith("##source="):
if "strelka" in vcf_line:
caller = "strelka"
if "dragen" in vcf_line.lower():
caller = "dragen"

continue


if caller == "unknown":
raise ValueError("Unknown caller for the VCF file, couldn't find caller line of supported caller (Strelka2, Dragen). Expected ##source=strelka or ##source=Dragen_somatic_calling")

# Get VCF fields of a variant entry
vcf_column = vcf_line.split("\t")

Expand Down Expand Up @@ -163,27 +177,39 @@ def evaluate_variants(self, reference_fasta):
# Get depth of coverage for tumor and normal sample
normal_dp = int(normal_table['DP'])
tumor_dp = int(tumor_table['DP'])


if caller == "strelka":
# Compute REF_COUNT, ALT_COUNT and AF for SNVs
if is_indel == 0:
ref_format = vcf_column[3] + "U"
alt_format = vcf_column[4] + "U"
ref_t1, ref_t2 = tumor_table[ref_format].split(",")
alt_t1, alt_t2 = tumor_table[alt_format].split(",")
if (int(ref_t1) + int(alt_t1)) > 0:
af = int(alt_t1) / (int(ref_t1) + int(alt_t1))
if is_indel == 0:
ref_format = vcf_column[3] + "U"
alt_format = vcf_column[4] + "U"
ref_t1, ref_t2 = tumor_table[ref_format].split(",")
alt_t1, alt_t2 = tumor_table[alt_format].split(",")
if (int(ref_t1) + int(alt_t1)) > 0:
af = int(alt_t1) / (int(ref_t1) + int(alt_t1))
else:
af = 0

# Compute REF_COUNT, ALT_COUNT and AF for indels
else:
af = 0

# Compute REF_COUNT, ALT_COUNT and AF for indels
ref_t1, ref_t2 = tumor_table['TAR'].split(",")
alt_t1, alt_t2 = tumor_table['TIR'].split(",")
tumor_dp = int(ref_t1) + int(alt_t1)
if tumor_dp > 0:
af = int(alt_t1) / tumor_dp
else:
af = 0
elif caller == "dragen":
af = tumor_table['AF']
#depth of reference allele and alternate allele
ref_t1, alt_t1 = tumor_table['AD'].split(",")

af = float(af)
ref_t1 = int(ref_t1)
alt_t1 = int(alt_t1)

else:
ref_t1, ref_t2 = tumor_table['TAR'].split(",")
alt_t1, alt_t2 = tumor_table['TIR'].split(",")
tumor_dp = int(ref_t1) + int(alt_t1)
if tumor_dp > 0:
af = int(alt_t1) / tumor_dp
else:
af = 0
raise ValueError("Unknown caller when trying to determine ref_count, alt_count and af.")

# Generate locus ID from chr and pos (note: deletions in GSvar are not pos - 1
locus_vcf = vcf_column[0] + "_" + vcf_column[1]
Expand Down Expand Up @@ -268,7 +294,7 @@ def evaluate_variants(self, reference_fasta):
score -= 1
if base_bias >= 0.8:
score -= 1

# add score to VCF info column
if vcf_column[7].strip() == "" or vcf_column[7].strip() == ".":
vcf_column[7] = "MonitoringScore=" + str(round(score, 3))
Expand All @@ -282,7 +308,7 @@ def evaluate_variants(self, reference_fasta):
+ str(alt_t1) + "\t" + str(round(af, 4)) + "\t" + str(vcf_column[6]) + "\t" \
+ str(impact) + "\t" + str(gene) + "\t" + sequence_context + "\t" + str(round(score, 3))

if "PASS" in vcf_column[6]:
if "PASS" in vcf_column[6] or "." in vcf_column[6]:
self.good_mutation_counter += 1
self.scoring_TSV[candidate] = score
self.scoring_VCF[candidate] = vcf_line
Expand Down

0 comments on commit ece25ec

Please sign in to comment.