diff --git a/assets/main_report/01-identification.Rmd b/assets/main_report/01-identification.Rmd index d334fcd1..00e2cfa9 100644 --- a/assets/main_report/01-identification.Rmd +++ b/assets/main_report/01-identification.Rmd @@ -12,7 +12,6 @@ params = list( params = lapply(params, function(x) file.path(work_dir, x)) ``` - ```{r id_setup, include=FALSE} knitr::opts_chunk$set(echo = FALSE, fig.width = 10, warning = FALSE) ``` @@ -33,6 +32,9 @@ core_tree <- ape::read.tree(params$core_phylo) snp_trees <- ape::read.tree(params$snp_phylos) ``` +## Sendsketch + +A table with sendsketch results ## Initial ANI tree diff --git a/assets/main_report/02-diversity.Rmd b/assets/main_report/02-diversity.Rmd index bdc60b8c..25a9e4e7 100644 --- a/assets/main_report/02-diversity.Rmd +++ b/assets/main_report/02-diversity.Rmd @@ -1,5 +1,8 @@ # Diversity +How do the samples compare to eachother? + + ```{r include=FALSE, eval=FALSE} work_dir = "/media/fosterz/external_primary/files/projects/work/current/nf-core-plantpathsurveil/work/60/71f547293aa5d22f469ea3ee215581" params = list( diff --git a/assets/main_report/05-map.Rmd b/assets/main_report/05-map.Rmd new file mode 100644 index 00000000..a99ee781 --- /dev/null +++ b/assets/main_report/05-map.Rmd @@ -0,0 +1,7 @@ +# Spatial distribution + +Color by: +* sample metadata +* time isolated/submitted +* gene content +* relatedness \ No newline at end of file diff --git a/assets/main_report/06-quality_control.Rmd b/assets/main_report/06-quality_control.Rmd new file mode 100644 index 00000000..c211c3cf --- /dev/null +++ b/assets/main_report/06-quality_control.Rmd @@ -0,0 +1,35 @@ +# Quality control + +* A quick indicator of status of each step. + +## Input data quality + +* multiqc link + +## Downloaded references + +* quast +* table with rows for each sample with info on references chosen + - sample id + - reference id + - ANI between sample and reference +* table with one row per reference (taxon id, GSA id, classification, link to ncbi) +* sourmash output (tree?) + +## Assembly and annotation + +* depth of coverage +* quast link +* BUSCO gene content? +* bakta output? + +## Variant calling? + +* vcfr for plots +* iqtree model selection, number of informative sites, indels + +## Core genome phylogeny + +* core gene info (how many genes, length, paralogs) +* outlier samples causing few genes to be chosen +* iqtree model selection, number of informative sites, indels \ No newline at end of file diff --git a/assets/main_report/07-references.Rmd b/assets/main_report/07-references.Rmd index b216bb75..d54597e4 100644 --- a/assets/main_report/07-references.Rmd +++ b/assets/main_report/07-references.Rmd @@ -1,3 +1,14 @@ +## Software used + + +```{r include=FALSE} +# automatically create a bib database for R packages +knitr::write_bib(c( + .packages(), 'bookdown', 'knitr', 'rmarkdown' +), 'packages.bib') +``` + + `r if (knitr::is_html_output()) ' # References {-} '` diff --git a/assets/main_report/README.md b/assets/main_report/README.md deleted file mode 100644 index 217c365e..00000000 --- a/assets/main_report/README.md +++ /dev/null @@ -1,13 +0,0 @@ -Welcome! - -This is a minimal example of a book based on R Markdown and **bookdown** (https://github.com/rstudio/bookdown). - -This template provides a skeleton file structure that you can edit to create your book. - -The contents inside the .Rmd files provide some pointers to help you get started, but feel free to also delete the content in each file and start fresh. - -Additional resources: - -The **bookdown** book: https://bookdown.org/yihui/bookdown/ - -The **bookdown** package reference site: https://pkgs.rstudio.com/bookdown diff --git a/assets/main_report/_main.Rmd b/assets/main_report/_main.Rmd new file mode 100644 index 00000000..9d4453cd --- /dev/null +++ b/assets/main_report/_main.Rmd @@ -0,0 +1,198 @@ +--- +title: "Pathogensurveillance Report" +date: "`r Sys.Date()`" +site: bookdown::bookdown_site +documentclass: book +bibliography: [book.bib, packages.bib] +# url: your book url like https://bookdown.org/yihui/bookdown +# cover-image: path to the social sharing image like images/cover.jpg +description: | + A report generated by the Pathogensurveillance genome analysis pipeline. +link-citations: yes +github-repo: rstudio/bookdown-demo +params: + samp_data: "/media/fosterz/external_primary/files/projects/work/current/pathogensurveillance/test/data/metadata_medium.csv" + ref_data: "/media/fosterz/external_primary/files/projects/work/current/pathogensurveillance/work/60/71f547293aa5d22f469ea3ee215581/merged_assembly_stats.tsv" + snp_phylos: "/media/fosterz/external_primary/files/projects/work/current/pathogensurveillance/work/60/71f547293aa5d22f469ea3ee215581/xan_test_22_331_assembly.treefile" + ani_matrix: "/media/fosterz/external_primary/files/projects/work/current/pathogensurveillance/work/60/71f547293aa5d22f469ea3ee215581/comp.csv" + core_phylo: "/media/fosterz/external_primary/files/projects/work/current/pathogensurveillance/work/60/71f547293aa5d22f469ea3ee215581/xan_test.treefile" +--- + +# Run info + +This is the first page a user sees. +What should go here? + +## Input settings + +## Software used + + +```{r include=FALSE} +# automatically create a bib database for R packages +knitr::write_bib(c( + .packages(), 'bookdown', 'knitr', 'rmarkdown' +), 'packages.bib') +``` + + + +# Identification + +```{r include=FALSE, eval=FALSE} +work_dir = "/media/fosterz/external_primary/files/projects/work/current/nf-core-plantpathsurveil/work/60/71f547293aa5d22f469ea3ee215581" +params = list( + samp_data = "metadata_medium.csv", + ref_data = "merged_assembly_stats.tsv", + snp_phylos = list("xan_test_22_331_assembly.treefile"), + ani_matrix = "comp.csv", + core_phylo = "xan_test.treefile" +) +params = lapply(params, function(x) file.path(work_dir, x)) +``` + + +```{r id_setup, include=FALSE} +knitr::opts_chunk$set(echo = FALSE, fig.width = 10, warning = FALSE) +``` + +```{r id_libraries} +library(phylocanvas) +library(ape) +library(magrittr) +``` + +```{r id_parse_inputs} +ref_meta <- read.csv(params$ref_data, sep = '\t') +ref_meta$modified_id <- gsub(ref_meta$LastMajorReleaseAccession, pattern = ".", replacement = "_", fixed = TRUE) +samp_meta <- read.csv(params$samp_data, sep = ',') +samp_meta$modified_id <- paste0(gsub(samp_meta$sample, pattern = "-", replacement = "_", fixed = TRUE), "_T1") +ani_matrix <- read.csv(params$ani_matrix, sep = ',', check.names = FALSE) +core_tree <- ape::read.tree(params$core_phylo) +snp_trees <- ape::read.tree(params$snp_phylos) +``` + + +## Initial ANI tree + + +## Core genome phylogeny + + +```{r id_core_phylo, fig.height = 7, eval = ! is.null(core_tree)} +# Identify which tips are samples and references +sample_ids <- core_tree$tip.label[core_tree$tip.label %in% samp_meta$modified_id] + +# Root tree +colnames(ani_matrix) <- gsub(colnames(ani_matrix), pattern = "[.-]", replacement = "_") +rownames(ani_matrix) <- colnames(ani_matrix) +group_ani <- ani_matrix[rownames(ani_matrix) %in% core_tree$tip.label, colnames(ani_matrix) %in% core_tree$tip.label] +core_tree <- root(core_tree, names(which.min(colMeans(group_ani[sample_ids, ])))) + +# Set tip labels to taxon names for reference sequences +# TODO: need a more reliable way to get IDs +name_key <- c( + ref_meta$Organism, + samp_meta$sample +) +names(name_key) <- c( + ref_meta$modified_id, + samp_meta$modified_id +) +core_tree$tip.label <- name_key[core_tree$tip.label] + +# Plot tree +phycanv <- phylocanvas(core_tree, treetype = "rectangular", alignlabels = T, showscalebar = T, width = "100%") +for (x in name_key[sample_ids]) { + phycanv <- style_node(phycanv, x, labelcolor = "green", labeltextsize = 30) +} + +phycanv +``` + +```{asis id_no_core_phylo, echo = is.null(core_tree)} +There is no tree to draw, probably because there were too few samples. +More info will be added later. +``` + + + + + + + + +# Diversity + +```{r include=FALSE, eval=FALSE} +work_dir = "/media/fosterz/external_primary/files/projects/work/current/nf-core-plantpathsurveil/work/60/71f547293aa5d22f469ea3ee215581" +params = list( + samp_data = "metadata_medium.csv", + ref_data = "merged_assembly_stats.tsv", + snp_phylos = list("xan_test_22_331_assembly.treefile"), + ani_matrix = "comp.csv", + core_phylo = "xan_test.treefile" +) +params = lapply(params, function(x) file.path(work_dir, x)) +``` + + +```{r div_setup, include=FALSE} +knitr::opts_chunk$set(echo = FALSE, fig.width = 10, warning = FALSE) +``` + +```{r div_libraries} +library(phylocanvas) +library(ape) +``` + +```{r div_parse_inputs} +ref_meta <- read.csv(params$ref_data, sep = '\t') +ref_meta$modified_id <- gsub(ref_meta$LastMajorReleaseAccession, pattern = ".", replacement = "_", fixed = TRUE) +samp_meta <- read.csv(params$samp_data, sep = ',') +samp_meta$modified_id <- paste0(gsub(samp_meta$sample, pattern = "-", replacement = "_", fixed = TRUE), "_T1") +ani_matrix <- read.csv(params$ani_matrix, sep = ',', check.names = FALSE) +core_tree <- ape::read.tree(params$core_phylo) +snp_trees <- ape::read.tree(params$snp_phylos) +``` + +## SNP phylogeny + +```{r div_snp_phylo, fig.height = 7, eval = ! is.null(snp_trees)} +# Root tree +snp_trees <- root(snp_trees, "REF") + +# Plot tree +phycanv <- phylocanvas(snp_trees, treetype = "rectangular", alignlabels = T, showscalebar = T, width = "100%") +phycanv +``` + +```{asis div_no_snp_phylo, echo = is.null(snp_trees)} +There is no tree to draw, probably because there were too few samples. +More info will be added later. +``` + + +## Minimum spanning network + + + + +# Gene content + +## Antibotic resistance genes + +## Effectors + +## Plasmids + +## User-defined genes + + + +`r if (knitr::is_html_output()) ' +# References {-} +'` + + + diff --git a/assets/main_report/classification.txt b/assets/main_report/classification.txt deleted file mode 100644 index 34b865c9..00000000 --- a/assets/main_report/classification.txt +++ /dev/null @@ -1 +0,0 @@ -sk:Bacteria;p:Proteobacteria;c:Alphaproteobacteria;o:Hyphomicrobiales;f:Rhizobiaceae;g:Sinorhizobium;s:Sinorhizobium sp. FG01 diff --git a/assets/main_report/index.Rmd b/assets/main_report/index.Rmd index 8d548368..9fd233b5 100644 --- a/assets/main_report/index.Rmd +++ b/assets/main_report/index.Rmd @@ -11,11 +11,11 @@ description: | link-citations: yes github-repo: rstudio/bookdown-demo params: - samp_data: "/media/fosterz/external_primary/files/projects/work/current/nf-core-plantpathsurveil/test/data/metadata_medium.csv" - ref_data: "/media/fosterz/external_primary/files/projects/work/current/nf-core-plantpathsurveil/work/60/71f547293aa5d22f469ea3ee215581/merged_assembly_stats.tsv" - snp_phylos: "/media/fosterz/external_primary/files/projects/work/current/nf-core-plantpathsurveil/work/60/71f547293aa5d22f469ea3ee215581/xan_test_22_331_assembly.treefile" - ani_matrix: "/media/fosterz/external_primary/files/projects/work/current/nf-core-plantpathsurveil/work/60/71f547293aa5d22f469ea3ee215581/comp.csv" - core_phylo: "/media/fosterz/external_primary/files/projects/work/current/nf-core-plantpathsurveil/work/60/71f547293aa5d22f469ea3ee215581/xan_test.treefile" + samp_data: "/media/fosterz/external_primary/files/projects/work/current/pathogensurveillance/test/data/metadata_medium.csv" + ref_data: "/media/fosterz/external_primary/files/projects/work/current/pathogensurveillance/work/60/71f547293aa5d22f469ea3ee215581/merged_assembly_stats.tsv" + snp_phylos: "/media/fosterz/external_primary/files/projects/work/current/pathogensurveillance/work/60/71f547293aa5d22f469ea3ee215581/xan_test_22_331_assembly.treefile" + ani_matrix: "/media/fosterz/external_primary/files/projects/work/current/pathogensurveillance/work/60/71f547293aa5d22f469ea3ee215581/comp.csv" + core_phylo: "/media/fosterz/external_primary/files/projects/work/current/pathogensurveillance/work/60/71f547293aa5d22f469ea3ee215581/xan_test.treefile" --- # Run info @@ -23,14 +23,8 @@ params: This is the first page a user sees. What should go here? -## Input settings - -## Software used +* status of pipeline execution +## Input settings -```{r include=FALSE} -# automatically create a bib database for R packages -knitr::write_bib(c( - .packages(), 'bookdown', 'knitr', 'rmarkdown' -), 'packages.bib') -``` +## Things that require user attention diff --git a/assets/main_report/kingdom.txt b/assets/main_report/kingdom.txt deleted file mode 100644 index bd9ff45f..00000000 --- a/assets/main_report/kingdom.txt +++ /dev/null @@ -1 +0,0 @@ -Bacteria diff --git a/assets/main_report/packages.bib b/assets/main_report/packages.bib index 1b37af92..e0b0199c 100644 --- a/assets/main_report/packages.bib +++ b/assets/main_report/packages.bib @@ -1,3 +1,11 @@ +@Manual{R-ape, + title = {ape: Analyses of Phylogenetics and Evolution}, + author = {Emmanuel Paradis and Simon Blomberg and Ben Bolker and Joseph Brown and Santiago Claramunt and Julien Claude and Hoa Sien Cuong and Richard Desper and Gilles Didier and Benoit Durand and Julien Dutheil and RJ Ewing and Olivier Gascuel and Thomas Guillerme and Christoph Heibl and Anthony Ives and Bradley Jones and Franz Krah and Daniel Lawson and Vincent Lefort and Pierre Legendre and Jim Lemon and Guillaume Louvel and Eric Marcon and Rosemary McCloskey and Johan Nylander and Rainer Opgen-Rhein and Andrei-Alin Popescu and Manuela Royer-Carenzi and Klaus Schliep and Korbinian Strimmer and Damien {de Vienne}}, + year = {2023}, + note = {R package version 5.7-1}, + url = {https://CRAN.R-project.org/package=ape}, +} + @Manual{R-base, title = {R: A Language and Environment for Statistical Computing}, author = {{R Core Team}}, @@ -23,6 +31,23 @@ @Manual{R-knitr url = {https://yihui.org/knitr/}, } +@Manual{R-magrittr, + title = {magrittr: A Forward-Pipe Operator for R}, + author = {Stefan Milton Bache and Hadley Wickham}, + year = {2022}, + note = {R package version 2.0.3}, + url = {https://CRAN.R-project.org/package=magrittr}, +} + +@Manual{R-phylocanvas, + title = {phylocanvas: Interactive Phylogenetic Trees Using the Phylocanvas +JavaScript Library}, + author = {zachary charlop-powers}, + year = {2017}, + note = {R package version 0.1.3}, + url = {https://CRAN.R-project.org/package=phylocanvas}, +} + @Manual{R-rmarkdown, title = {rmarkdown: Dynamic Documents for R}, author = {JJ Allaire and Yihui Xie and Christophe Dervieux and Jonathan McPherson and Javier Luraschi and Kevin Ushey and Aron Atkins and Hadley Wickham and Joe Cheng and Winston Chang and Richard Iannone}, @@ -31,6 +56,16 @@ @Manual{R-rmarkdown url = {https://CRAN.R-project.org/package=rmarkdown}, } +@Article{ape2019, + title = {ape 5.0: an environment for modern phylogenetics and evolutionary analyses in {R}}, + author = {Emmanuel Paradis and Klaus Schliep}, + journal = {Bioinformatics}, + year = {2019}, + volume = {35}, + pages = {526-528}, + doi = {10.1093/bioinformatics/bty633}, +} + @Book{bookdown2016, title = {bookdown: Authoring Books and Technical Documents with {R} Markdown}, author = {Yihui Xie}, diff --git a/workflows/pathogensurveillance.nf b/workflows/pathogensurveillance.nf index e7bcf7a6..71573770 100644 --- a/workflows/pathogensurveillance.nf +++ b/workflows/pathogensurveillance.nf @@ -85,6 +85,8 @@ workflow PATHOGENSURVEILLANCE { .distinct() ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) + ch_reads.map { [it[0], null, it[1]]}.view() + // Run FastQC FASTQC ( ch_reads @@ -96,7 +98,7 @@ workflow PATHOGENSURVEILLANCE { ch_reads ) ch_versions = ch_versions.mix(COARSE_SAMPLE_TAXONOMY.out.versions) - + // Search for and download reference assemblies for all samples DOWNLOAD_REFERENCES ( COARSE_SAMPLE_TAXONOMY.out.species, @@ -104,6 +106,13 @@ workflow PATHOGENSURVEILLANCE { COARSE_SAMPLE_TAXONOMY.out.families ) + // Create main summary report + MAIN_REPORT ( + INPUT_CHECK.out.sample_data.map {[ it[4], it[2], null ]}.groupTuple().map {it + [null, null]}, + ch_input, + DOWNLOAD_REFERENCES.out.stats + ) + // Assign closest reference for samples without a user-assigned reference ASSIGN_REFERENCES ( INPUT_CHECK.out.sample_data,