Skip to content

Commit

Permalink
refactor in progress; testing
Browse files Browse the repository at this point in the history
  • Loading branch information
zachary-foster committed Aug 4, 2023
1 parent 3ed9c7e commit 4bc90b0
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 34 deletions.
24 changes: 17 additions & 7 deletions subworkflows/local/align_reads_to_ref.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,11 @@ workflow ALIGN_READS_TO_REF {

ch_versions = Channel.empty()

ch_reads = ch_input.map { [it[0], it[1]] }
ch_bwa_index = ch_input.map { [it[0], it[5]] }
samp_ref_combo = ch_input
.map { [[id: "${it[2].id}_${it[0].id}", ref: it[2], sample: it[0]]] + it } // make composite ID for read/ref combos

ch_reads = samp_ref_combo.map { [it[0], it[2]] }
ch_bwa_index = samp_ref_combo.map { [it[0], it[6]] }
BWA_MEM ( ch_reads, ch_bwa_index, false )
ch_versions = ch_versions.mix(BWA_MEM.out.versions.toSortedList().map{it[0]})

Expand All @@ -25,8 +28,8 @@ workflow ALIGN_READS_TO_REF {
PICARD_SORTSAM_1 ( PICARD_ADDORREPLACEREADGROUPS.out.bam, 'coordinate' )
ch_versions = ch_versions.mix(PICARD_SORTSAM_1.out.versions.toSortedList().map{it[0]})

ch_reference = ch_input.map { [it[0], it[3]] } // channel: [ val(meta), file(reference) ]
ch_ref_index = ch_input.map { [it[0], it[4]] } // channel: [ val(meta), file(ref_index) ]
ch_reference = samp_ref_combo.map { [it[0], it[4]] } // channel: [ val(ref_samp_meta), file(reference) ]
ch_ref_index = samp_ref_combo.map { [it[0], it[5]] } // channel: [ val(ref_samp_meta), file(ref_index) ]
picard_input = PICARD_SORTSAM_1.out.bam // joined to associated right reference with each sample
.join(ch_reference)
.join(ch_ref_index)
Expand All @@ -43,9 +46,16 @@ workflow ALIGN_READS_TO_REF {
SAMTOOLS_INDEX ( PICARD_SORTSAM_2.out.bam )
ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.toSortedList().map{it[0]})

// Revet combined metas back to seperate ones for sample and reference
out_bam = PICARD_SORTSAM_2.out.bam // channel: [ val(ref_samp_meta), [ bam ] ]
.map { [it[0].sample, it[0].ref, it[1]] }
out_bai = SAMTOOLS_INDEX.out.bai // channel: [ val(ref_samp_meta), [ bai ] ]
.map { [it[0].sample, it[0].ref, it[1]] }


emit:
bam = PICARD_SORTSAM_2.out.bam // channel: [ val(meta), [ bam ] ]
bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ]
versions = ch_versions // channel: [ versions.yml ]
bam = out_bam // channel: [ val(meta), val(ref_meta), [ bam ] ]
bai = out_bai // channel: [ val(meta), val(ref_meta), [ bai ] ]
versions = ch_versions // channel: [ versions.yml ]
}

2 changes: 1 addition & 1 deletion subworkflows/local/core_genome_phylogeny.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ include { COREGENOMEPHYLOGENYREPORT } from '../../modules/local/core_gene_phylog
workflow CORE_GENOME_PHYLOGENY {

take:
ch_input // [ val(meta), file(gff), val(ref_meta), file(reference) ]
ch_input // [ val(meta), file(gff), val(group_meta) ]
ch_samplesheet // channel: path

main:
Expand Down
13 changes: 8 additions & 5 deletions subworkflows/local/genome_assembly.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,16 @@ include { BAKTA_BAKTA } from '../../modules/nf-core/bakta/bakta/main'
workflow GENOME_ASSEMBLY {

take:
ch_input // channel: [ val(meta), [fastq_1, fastq_2 ], val(ref_meta), file(reference) ]
ch_input // channel: [ val(meta), [fastq_1, fastq_2], val(ref_meta), file(reference), val(group_meta), val(kingdom) ]

main:

ch_versions = Channel.empty()
ch_reads = ch_input.map { it[0..1] }
ch_ref = ch_input.map { [it[0], it[2], it[3]] }
ch_input_filtered = ch_input
.filter { it[5] == "Bacteria" }
ch_reads = ch_input_filtered
.map { it[0..1] }
.unique()

FASTP ( ch_reads, [], false, false )
ch_versions = ch_versions.mix(FASTP.out.versions.first())
Expand All @@ -30,8 +33,8 @@ workflow GENOME_ASSEMBLY {
)
ch_versions = ch_versions.mix(FILTER_ASSEMBLY.out.versions.first())

ch_ref_grouped = FILTER_ASSEMBLY.out.filtered
.join(ch_ref)
ch_ref_grouped = ch_input_filtered
.combine(FILTER_ASSEMBLY.out.filtered)
.groupTuple(by: 2)
.map { [it[2], it[3].sort()[0], it[1]] }
QUAST (
Expand Down
11 changes: 6 additions & 5 deletions subworkflows/local/variant_calling_analysis.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,23 +14,24 @@ workflow VARIANT_CALLING_ANALYSIS {
ch_reference = input.map { [it[0], it[2], it[3]] }
ch_versions = Channel.empty()

MAKE_REFERENCE_INDEX ( ch_reference )
MAKE_REFERENCE_INDEX ( ch_reference.unique() )
ch_versions = ch_versions.mix(MAKE_REFERENCE_INDEX.out.versions)

ALIGN_READS_TO_REF (
ch_reads
.join(ch_reference)
.join(MAKE_REFERENCE_INDEX.out.samtools_fai)
.join(MAKE_REFERENCE_INDEX.out.bwa_index)
.unique()
)
ch_versions = ch_versions.mix(ALIGN_READS_TO_REF.out.versions)

CALL_VARIANTS (
ALIGN_READS_TO_REF.out.bam
.join(ALIGN_READS_TO_REF.out.bai)
.join(input.map { [it[0], it[2], it[3], it[4]] })
.join(MAKE_REFERENCE_INDEX.out.samtools_fai)
.join(MAKE_REFERENCE_INDEX.out.picard_dict)
.join(ALIGN_READS_TO_REF.out.bai, by: 0..1) // [meta, ref_meta, bam, bai]
.combine(input.map { [it[0], it[2], it[3], it[4]] }, by: 0..1) // [meta, ref_meta, bam, bai, ref, group_meta]
.combine(MAKE_REFERENCE_INDEX.out.samtools_fai, by: 1)
.combine(MAKE_REFERENCE_INDEX.out.picard_dict, by: 1)
)
ch_versions = ch_versions.mix(CALL_VARIANTS.out.versions)

Expand Down
17 changes: 11 additions & 6 deletions test/data/metadata_medium.csv
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
sample,fastq_1,fastq_2,reference,reference_id,lab_id,report_group,lab_name,date_isolated,date_received,year,host,cv_key,nusery,breeder,breeder_long,br_key,notes
22-299,test/data/reads/22-299_R1.fastq.gz,test/data/reads/22-299_R2.fastq.gz,,,202200055,xan_test;subgroup,"Karen Rane,Univ. MD",3/2/22,3/29/22,2022,Pelargonium x hortorum,CV-1,MD,Dummen,Dummen,Br-1,Karen Rane - originally contacted Melodie | Dummen is original source
22-310,test/data/reads/22-310_R1.fastq.gz,test/data/reads/22-310_R2.fastq.gz,,,MD-30A,xan_test;subgroup,"Margery Daughtrey, Cornell",3/4/22,4/5/22,2022,P. x hortorum,CV-8,NY,Dummen,Dummen,Br-1,NA
22-331,test/data/reads/22-331_R1.fastq.gz,test/data/reads/22-331_R2.fastq.gz,test/data/refs/reference-22-331.fna,22_331_assembly,22-00152,xan_test,"John Bonkowski, Purdue",3/21/22,4/21/22,2022,Pelargonium x,CV-20,IN,Syngenta,Syngenta Calliope,Br-2,Samples from Syngenta (Purdue?) housed in same house as Dummen
pram1,test/data/reads/lane6-s013-indexRPI27-ATTCCT-7612-C7_S13_L006_R1_001.fastq.gz,test/data/reads/lane6-s013-indexRPI27-ATTCCT-7612-C7_S13_L006_R2_001.fastq.gz,test/data/refs/PR-102_v3.1.fasta.gz,PR-102_v3.1,NA,pram_test,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
pram2,test/data/reads/lane6-s008-indexRPI16-CCGTCC-7612-D1_S8_L006_R1_001.fastq.gz,test/data/reads/lane6-s008-indexRPI16-CCGTCC-7612-D1_S8_L006_R2_001.fastq.gz,,,NA,pram_test,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
sample,fastq_1,fastq_2,reference,reference_id,report_group,lab_id,lab_name,date_isolated,date_received,year,host,cv_key,nusery,breeder,breeder_long,br_key,notes
22-299,test/data/reads/22-299_R1.fastq.gz,test/data/reads/22-299_R2.fastq.gz,,,xan_test;subgroup,202200055,"Karen Rane,Univ. MD",3/2/22,3/29/22,2022,Pelargonium x hortorum,CV-1,MD,Dummen,Dummen,Br-1,Karen Rane - originally contacted Melodie | Dummen is original source
22-300,test/data/reads/22-300_R1.fastq.gz,test/data/reads/22-300_R2.fastq.gz,,,xan_test;subgroup,202200056,"Karen Rane,Univ. MD",3/2/22,3/30/22,2022,Pelargonium x hortorum,CV-2,MD,Dummen,Dummen,Br-1,NA
22-301,test/data/reads/22-301_R1.fastq.gz,test/data/reads/22-301_R2.fastq.gz,,,xan_test;subgroup,202200057,"Karen Rane,Univ. MD",3/2/22,3/31/22,2022,Pelargonium x hortorum,CV-3,MD,Dummen,Dummen,Br-1,NA
22-324,test/data/reads/22-324_R1.fastq.gz,test/data/reads/22-324_R2.fastq.gz,,,xan_test;subgroup,MD 5-1-7,"Margery Daughtrey, Cornell",1987,4/19/22,1987,P. x hortorum,NA,NA,NA,NA,NA,NA
22-329A,test/data/reads/22-329A_R1.fastq.gz,test/data/reads/22-329A_R2.fastq.gz,test/data/refs/reference-22-331.fna,22_331_assembly,xan_test,MD-52B,"Margery Daughtrey, Cornell",3/25/22,4/30/22,2022,P. x hortorum,CV-28,PA,Dummen,Dummen,Br-1,from same plant as 22-329A
22-329B,test/data/reads/22-329B_R1.fastq.gz,test/data/reads/22-329B_R2.fastq.gz,test/data/refs/reference-22-331.fna,22_331_assembly,xan_test,MD-52A,"Margery Daughtrey, Cornell",3/25/22,4/29/22,2022,P. x hortorum,CV-28,PA,Dummen,Dummen,Br-1,NA
22-330,test/data/reads/22-330_R1.fastq.gz,test/data/reads/22-330_R2.fastq.gz,test/data/refs/reference-22-331.fna,22_331_assembly,xan_test,MD-53B,"Margery Daughtrey, Cornell",3/25/22,5/1/22,2022,P. x hortorum,CV-29,PA,Syngenta,Syngenta,Br-2,NA
SRR17286018,test/data/reads/SRR17286018_R1.fastq.gz,test/data/reads/SRR17286018_R2.fastq.gz,test/data/refs/reference-22-331.fna,22_331_assembly,xan_test,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
pram1,test/data/reads/lane6-s013-indexRPI27-ATTCCT-7612-C7_S13_L006_R1_001.fastq.gz,test/data/reads/lane6-s013-indexRPI27-ATTCCT-7612-C7_S13_L006_R2_001.fastq.gz,test/data/refs/PR-102_v3.1.fasta.gz,PR-102_v3.1,pram_test,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
pram2,test/data/reads/lane6-s008-indexRPI16-CCGTCC-7612-D1_S8_L006_R1_001.fastq.gz,test/data/reads/lane6-s008-indexRPI16-CCGTCC-7612-D1_S8_L006_R2_001.fastq.gz,test/data/refs/PR-102_v3.1.fasta.gz,PR-102_v3.1,pram_test,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
24 changes: 14 additions & 10 deletions workflows/plantpathsurveil.nf
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ include { CORE_GENOME_PHYLOGENY } from '../subworkflows/local/core_genome_phy
include { VARIANT_CALLING_ANALYSIS } from '../subworkflows/local/variant_calling_analysis'
include { DOWNLOAD_REFERENCES } from '../subworkflows/local/download_references'
include { ASSIGN_REFERENCES } from '../subworkflows/local/assign_references'
include { GENOME_ASSEMBLY } from '../subworkflows/local/genome_assembly'


/*
Expand Down Expand Up @@ -78,7 +79,7 @@ workflow PLANTPATHSURVEIL {
INPUT_CHECK (
ch_input
)
ch_reads = INPUT_CHECK.out.sample_data
ch_reads = INPUT_CHECK.out.sample_data // [val(meta), [file(fastq)], val(ref_meta), file(reference), val(group_meta)]
.map { it[0..1] }
.distinct()
ch_versions = ch_versions.mix(INPUT_CHECK.out.versions)
Expand Down Expand Up @@ -117,17 +118,20 @@ workflow PLANTPATHSURVEIL {
)

// Assemble and annotate bacterial genomes
//GENOME_ASSEMBLY (
// ch_reads
// .join(ch_reference)
//)
//ch_versions = ch_versions.mix(GENOME_ASSEMBLY.out.versions)
GENOME_ASSEMBLY (
ASSIGN_REFERENCES.out.sample_data
.combine(COARSE_SAMPLE_TAXONOMY.out.kingdom, by: 0)
)
ch_versions = ch_versions.mix(GENOME_ASSEMBLY.out.versions)

// Create core gene phylogeny for bacterial samples
//CORE_GENOME_PHYLOGENY (
// GENOME_ASSEMBLY.out.gff.join(ch_reference),
// ch_samplesheet
//)
gff_and_group = ASSIGN_REFERENCES.out.sample_data // [val(meta), [file(fastq)], val(ref_meta), file(reference), val(group_meta)]
.combine(GENOME_ASSEMBLY.out.gff, by: 0) // [val(meta), [file(fastq)], val(ref_meta), file(reference), val(group_meta), file(gff)]
.map { [it[0], it[5], it[4]] } // [ val(meta), file(gff), val(group_meta) ]
CORE_GENOME_PHYLOGENY (
gff_and_group,
ch_input
)

// Read2tree phylogeny for eukaryotes
//READ2TREE_ANALYSIS (
Expand Down

0 comments on commit 4bc90b0

Please sign in to comment.