diff --git a/workflows/theiacov/wf_theiacov_fasta.wdl b/workflows/theiacov/wf_theiacov_fasta.wdl index 8ef716a17..cc30b9527 100644 --- a/workflows/theiacov/wf_theiacov_fasta.wdl +++ b/workflows/theiacov/wf_theiacov_fasta.wdl @@ -33,7 +33,8 @@ workflow theiacov_fasta { # qc check parameters File? qc_check_table # vadr parameters - Int? max_length + Int? vadr_max_length + Int? vadr_skip_length String? vadr_opts Int? vadr_memory } @@ -55,7 +56,8 @@ workflow theiacov_fasta { genome_length_input = genome_length, nextclade_dataset_tag_input = nextclade_dataset_tag, nextclade_dataset_name_input = nextclade_dataset_name, - vadr_max_length = max_length, + vadr_max_length = vadr_max_length, + vadr_skip_length = vadr_skip_length, vadr_options = vadr_opts, vadr_mem = vadr_memory } @@ -96,6 +98,7 @@ workflow theiacov_fasta { assembly_length_unambiguous = consensus_qc.number_ATCG, max_length = organism_parameters.vadr_maxlength, vadr_opts = organism_parameters.vadr_opts, + skip_length = organism_parameters.vadr_skiplength, memory = organism_parameters.vadr_memory } } diff --git a/workflows/theiacov/wf_theiacov_illumina_pe.wdl b/workflows/theiacov/wf_theiacov_illumina_pe.wdl index db28251c1..c1f75b486 100644 --- a/workflows/theiacov/wf_theiacov_illumina_pe.wdl +++ b/workflows/theiacov/wf_theiacov_illumina_pe.wdl @@ -50,6 +50,7 @@ workflow theiacov_illumina_pe { String? nextclade_dataset_name # vadr parameters Int? vadr_max_length + Int? vadr_skip_length String? vadr_options Int? vadr_memory # read screen parameters @@ -77,6 +78,7 @@ workflow theiacov_illumina_pe { nextclade_dataset_tag_input = nextclade_dataset_tag, nextclade_dataset_name_input = nextclade_dataset_name, vadr_max_length = vadr_max_length, + vadr_skip_length = vadr_skip_length, vadr_options = vadr_options, primer_bed_file = primer_bed, pangolin_docker_image = pangolin_docker_image, @@ -186,6 +188,7 @@ workflow theiacov_illumina_pe { nextclade_dataset_tag_input = nextclade_dataset_tag, nextclade_dataset_name_input = nextclade_dataset_name, vadr_max_length = vadr_max_length, + vadr_skip_length = vadr_skip_length, vadr_options = vadr_options, vadr_mem = vadr_memory, primer_bed_file = primer_bed, @@ -206,6 +209,7 @@ workflow theiacov_illumina_pe { nextclade_dataset_tag_input = nextclade_dataset_tag, nextclade_dataset_name_input = nextclade_dataset_name, vadr_max_length = vadr_max_length, + vadr_skip_length = vadr_skip_length, vadr_options = vadr_options, primer_bed_file = primer_bed, gene_locations_bed_file = reference_gene_locations_bed, @@ -299,6 +303,7 @@ workflow theiacov_illumina_pe { assembly_length_unambiguous = consensus_qc.number_ATCG, vadr_opts = organism_parameters.vadr_opts, max_length = organism_parameters.vadr_maxlength, + skip_length = organism_parameters.vadr_skiplength, memory = organism_parameters.vadr_memory } } diff --git a/workflows/theiacov/wf_theiacov_illumina_se.wdl b/workflows/theiacov/wf_theiacov_illumina_se.wdl index a493babdc..3a6af2ec3 100644 --- a/workflows/theiacov/wf_theiacov_illumina_se.wdl +++ b/workflows/theiacov/wf_theiacov_illumina_se.wdl @@ -52,6 +52,7 @@ workflow theiacov_illumina_se { Boolean skip_mash = false # vadr parameters Int? vadr_max_length + Int? vadr_skip_length String? vadr_options Int? vadr_memory # pangolin parameters @@ -69,6 +70,7 @@ workflow theiacov_illumina_se { nextclade_dataset_tag_input = nextclade_dataset_tag, nextclade_dataset_name_input = nextclade_dataset_name, vadr_max_length = vadr_max_length, + vadr_skip_length = vadr_skip_length, vadr_options = vadr_options, vadr_mem = vadr_memory, primer_bed_file = primer_bed, @@ -173,6 +175,7 @@ workflow theiacov_illumina_se { assembly_length_unambiguous = consensus_qc.number_ATCG, vadr_opts = organism_parameters.vadr_opts, max_length = organism_parameters.vadr_maxlength, + skip_length = organism_parameters.vadr_skiplength, memory = organism_parameters.vadr_memory } } diff --git a/workflows/theiacov/wf_theiacov_ont.wdl b/workflows/theiacov/wf_theiacov_ont.wdl index 44381afe5..fdb7d1d81 100644 --- a/workflows/theiacov/wf_theiacov_ont.wdl +++ b/workflows/theiacov/wf_theiacov_ont.wdl @@ -53,6 +53,7 @@ workflow theiacov_ont { Boolean skip_mash = false # vadr parameters Int? vadr_max_length + Int? vadr_skip_length String? vadr_options Int? vadr_memory # pangolin parameters @@ -69,6 +70,7 @@ workflow theiacov_ont { nextclade_dataset_tag_input = nextclade_dataset_tag, nextclade_dataset_name_input = nextclade_dataset_name, vadr_max_length = vadr_max_length, + vadr_skip_length = vadr_skip_length, vadr_options = vadr_options, vadr_mem = vadr_memory, primer_bed_file = primer_bed, @@ -165,6 +167,7 @@ workflow theiacov_ont { nextclade_dataset_tag_input = nextclade_dataset_tag, nextclade_dataset_name_input = nextclade_dataset_name, vadr_max_length = vadr_max_length, + vadr_skip_length = vadr_skip_length, vadr_options = vadr_options, primer_bed_file = primer_bed, gene_locations_bed_file = reference_gene_locations_bed, @@ -183,6 +186,7 @@ workflow theiacov_ont { nextclade_dataset_tag_input = nextclade_dataset_tag, nextclade_dataset_name_input = nextclade_dataset_name, vadr_max_length = vadr_max_length, + vadr_skip_length = vadr_skip_length, vadr_options = vadr_options, primer_bed_file = primer_bed, gene_locations_bed_file = reference_gene_locations_bed, @@ -291,6 +295,7 @@ workflow theiacov_ont { assembly_length_unambiguous = consensus_qc.number_ATCG, vadr_opts = organism_parameters.vadr_opts, max_length = organism_parameters.vadr_maxlength, + skip_length = organism_parameters.vadr_skiplength, memory = organism_parameters.vadr_memory } } diff --git a/workflows/utilities/wf_organism_parameters.wdl b/workflows/utilities/wf_organism_parameters.wdl index b7faa83c8..f5af2752d 100644 --- a/workflows/utilities/wf_organism_parameters.wdl +++ b/workflows/utilities/wf_organism_parameters.wdl @@ -27,6 +27,7 @@ workflow organism_parameters { # vadr parameters Int? vadr_max_length + Int? vadr_skip_length String? vadr_options Int? vadr_mem @@ -45,6 +46,7 @@ workflow organism_parameters { String sc2_pangolin_docker = "us-docker.pkg.dev/general-theiagen/staphb/pangolin:4.3.1-pdata-1.26" Int sc2_genome_len = 29903 Int sc2_vadr_max_length = 30000 + Int sc2_vadr_skip_length = 10000 String sc2_vadr_options = "--noseqnamemax --glsearch -s -r --nomisc --mkey sarscov2 --lowsim5seq 6 --lowsim3seq 6 --alt_fail lowscore,insertnn,deletinn --out_allfasta" Int sc2_vadr_memory = 8 } @@ -59,6 +61,7 @@ workflow organism_parameters { String mpox_reference_gff_file = "gs://theiagen-public-files/terra/mpxv-files/Mpox-MT903345.1.reference.gff3" String mpox_vadr_options = "--glsearch -s -r --nomisc --mkey mpxv --r_lowsimok --r_lowsimxd 100 --r_lowsimxl 2000 --alt_pass discontn,dupregin --out_allfasta --minimap2 --s_overhang 150" Int mpox_vadr_max_length = 210000 + Int mpox_vadr_skip_length = 65480 Int mpox_vadr_memory = 8 Int mpox_genome_len = 197200 } @@ -70,6 +73,7 @@ workflow organism_parameters { Int wnv_genome_len = 11000 String wnv_vadr_options = "--mkey flavi --mdir /opt/vadr/vadr-models-flavi/ --nomisc --noprotid --out_allfasta" Int wnv_vadr_max_length = 11000 + Int wnv_vadr_skip_length = 3000 Int wnv_vadr_memory = 8 String wnv_nextclade_ds_tag = "NA" String wnv_nextclade_ds_name = "NA" @@ -78,9 +82,10 @@ workflow organism_parameters { String flu_org_name = "flu" Int flu_genome_len = 13500 - # vadr options are dummy options for flu right now + # vadr options for flu String flu_vadr_options = "--atgonly --xnocomp --nomisc --alt_fail extrant5,extrant3 --mkey flu" Int flu_vadr_max_length = 13500 + Int flu_vadr_skip_length = 500 Int flu_vadr_memory = 8 # setting nextclade parameters @@ -137,6 +142,7 @@ workflow organism_parameters { Int rsv_a_genome_len = 16000 String rsv_a_vadr_options = "-r --mkey rsv --xnocomp" Int rsv_a_vadr_max_length = 15500 + Int rsv_a_vadr_skip_length = 5000 Int rsv_a_vadr_memory = 32 } if (organism == "rsv_b" || organism == "rsv-b" || organism == "RSV-B" || organism == "RSV_B") { @@ -147,6 +153,7 @@ workflow organism_parameters { Int rsv_b_genome_len = 16000 String rsv_b_vadr_options = "-r --mkey rsv --xnocomp" Int rsv_b_vadr_max_length = 15500 + Int rsv_b_vadr_skip_length = 5000 Int rsv_b_vadr_memory = 32 } if (organism == "HIV" && hiv_primer_version == "v1") { @@ -184,6 +191,7 @@ workflow organism_parameters { String vadr_opts = select_first([vadr_options, sc2_vadr_options, mpox_vadr_options, wnv_vadr_options, flu_vadr_options, rsv_a_vadr_options, rsv_b_vadr_options, "NA"]) Int vadr_maxlength = select_first([vadr_max_length, sc2_vadr_max_length, mpox_vadr_max_length, wnv_vadr_max_length, flu_vadr_max_length, rsv_a_vadr_max_length, rsv_b_vadr_max_length, 0]) Int vadr_memory = select_first([vadr_mem, sc2_vadr_memory, mpox_vadr_memory, wnv_vadr_memory, flu_vadr_memory, rsv_a_vadr_memory, rsv_b_vadr_memory, 0]) + Int vadr_skiplength = select_first([vadr_skip_length, sc2_vadr_skip_length, mpox_vadr_skip_length, wnv_vadr_skip_length, flu_vadr_skip_length, rsv_a_vadr_skip_length, rsv_b_vadr_skip_length, 0]) # kraken options String kraken_target_organism = select_first([kraken_target_organism_input, mpox_kraken_target_organism, wnv_kraken_target_organism, hiv_v1_target_organism, hiv_v2_target_organism, ""]) }