Skip to content

Commit

Permalink
Merge pull request #68 from umccr/oncoanalyser-sash-inputs
Browse files Browse the repository at this point in the history
Support oncoanalyser and sash inputs
  • Loading branch information
pdiakumis authored Apr 16, 2024
2 parents 7884814 + f704447 commit 14c1f5e
Show file tree
Hide file tree
Showing 31 changed files with 1,393 additions and 1,139 deletions.
14 changes: 9 additions & 5 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
# Generated by roxygen2: do not edit by hand

export(abbreviate_effect)
export(af_summary)
export(bcftools_stats_plot)
export(cancer_rmd)
export(count_pieces)
export(date_log)
export(dragen_hrd)
export(get_oncokb_genes)
export(hrd_results_tabs)
export(is_vcf)
export(is_vcf_bpi)
export(mkdir)
export(pkg_exists)
export(plot_bnd_sr_pr_tot_hist)
export(plot_bnd_sr_pr_tot_lines)
export(process_cnv_tsv)
export(process_sv)
export(purple_cnv_germ_process)
export(purple_cnv_germ_read)
export(purple_cnv_som_gene_process)
export(purple_cnv_som_gene_read)
export(purple_cnv_som_process)
Expand All @@ -21,11 +24,12 @@ export(purple_kataegis)
export(purple_purity_read)
export(purple_qc_read)
export(purple_snv_vcf_read)
export(purple_version_read)
export(read_oncokb)
export(session_info_kable)
export(tsv_is_empty)
export(umccrise_read_sv_tsv)
export(vcf_is_empty)
export(virusbreakend_summary_read)
export(virusbreakend_vcf_read)
export(write_jsongz)
export(write_tsvgz)
export(write_tsvjsongz)
Expand Down
24 changes: 24 additions & 0 deletions R/oncokb.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#' @export
read_oncokb <- function(x) {
readr::read_tsv(x) |>
dplyr::filter(
`OncoKB Annotated` == "Yes"
) |>
dplyr::pull("Hugo Symbol")
}

#' @export
get_oncokb_genes <- function(x, oncokb_genes) {
delimiters <- " ,&-"
delimiter_re <- paste0("[", delimiters, "]")

oncokb_genes |>
# Create regexes for each match, utilising delimiters for boundaries. Handles most cases where a gene symbol contains the '-' delimiter
purrr::map(function(n) paste0("(?<=^|", delimiter_re, ")", n, "(?=", delimiter_re, "|$)")) |>
# Loop with nm iterations through regex and gene symbols
purrr::map(function(n) stringr::str_detect(x, n) |> tibble::as_tibble_row(.name_repair="unique_quiet")) |>
# Combine as tibble to access dplyr::summarise and compile list of detected OncoKB gene symbols for each effect
dplyr::bind_rows() |>
dplyr::summarise(dplyr::across(dplyr::everything(), function(v) { paste0(sort(oncokb_genes[v]), collapse=", ") })) |>
unlist()
}
141 changes: 141 additions & 0 deletions R/oncoviral.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
#' Read VIRUSBreakend Summary File
#'
#' Reads the `virusbreakend.vcf.summary.tsv` file.
#'
#' @param x Path to `virusbreakend.vcf.summary.tsv` file.
#'
#' @return List with two elements:
#' * `tab`: Tibble containing data.
#' * `descr`: Description of tibble columns.
#'
#' @examples
#' x <- system.file("extdata/virusbreakend/virusbreakend.vcf.summary.tsv", package = "gpgr")
#' (vb <- virusbreakend_summary_read(x))
#' @testexamples
#' expect_equal(colnames(vb)[ncol(vb)], "QCStatus")
#'
#' @export
virusbreakend_summary_read <- function(x) {

nm <- c(
"taxid_genus" = "c",
"name_genus" = "c",
"reads_genus_tree" = "i",
"taxid_species" = "c",
"name_species" = "c",
"reads_species_tree" = "i",
"taxid_assigned" = "c",
"name_assigned" = "c",
"reads_assigned_tree" = "i",
"reads_assigned_direct" = "i",
"reference" = "c",
"reference_taxid" = "c",
"reference_kmer_count" = "i",
"alternate_kmer_count" = "i",
"rname" = "c",
"startpos" = "i",
"endpos" = "i",
"numreads" = "i",
"covbases" = "d",
"coverage" = "d",
"meandepth" = "d",
"meanbaseq" = "d",
"meanmapq" = "d",
"integrations" = "i",
"QCStatus" = "c"
)

ctypes <- paste(nm, collapse = "")
virusbreakend_summary <- readr::read_tsv(x, col_types = ctypes)

if (nrow(virusbreakend_summary) > 0) {
assertthat::assert_that(ncol(virusbreakend_summary) == length(nm))
assertthat::assert_that(all(colnames(virusbreakend_summary) == names(nm)))

virusbreakend_summary <- virusbreakend_summary |>
dplyr::select(
Virus="name_assigned",
Length="endpos",
Reads="numreads",
Coverage="coverage",
`Mean depth`="meandepth",
Intergrations="integrations",
QC="QCStatus",
)
}

descr <- dplyr::tribble(
~Column, ~Description,
"Virus", "Assigned NCBI taxonomy name of viral reference",
"Length", "Length of viral contig",
"Reads", "Number of reads mapped to adjusted viral reference",
"Coverage", "Percentage of viral positions with at least one read mapped",
"Mean depth", "Mean alignment depth",
"Intergrations", "Number of detected integration breakpoints",
"QC", "QC status of viral intergrations",
)

list(
tab = virusbreakend_summary,
descr = descr
)
}

#' Read VIRUSBreakend VCF File
#'
#' Reads the `virusbreakend.vcf` file and selects data to present.
#'
#' @param x Path to `virusbreakend.vcf` file.
#'
#' @return List with two elements:
#' * `tab`: Tibble containing selected data.
#' * `descr`: Description of tibble columns.
#'
#' @examples
#' x <- system.file("extdata/virusbreakend/virusbreakend.vcf", package = "gpgr")
#' (vb <- virusbreakend_vcf_read(x))
#' @testexamples
#' expect_equal(colnames(vb)[ncol(vb)], "QC")
#'
#' @export
virusbreakend_vcf_read <- function(x) {

d <- bedr::read.vcf(x, split.info = TRUE, verbose = FALSE)

if (nrow(d$vcf) > 0) {
virusbreakend_integrations <- tibble::as_tibble(d$vcf) |>
dplyr::select(
Contig="CHROM",
Position="POS",
"Fragment support"="BVF",
"Fragment support (unmapped)"="BUM",
"Softclip read support"="BSC",
Reference="REF",
Alt="ALT",
`Breakend ID`="ID",
`Mate ID`="MATEID",
QC="FILTER",
)
} else {
virusbreakend_integrations <- tibble::tibble()
}

descr <- dplyr::tribble(
~Column, ~Description,
"Contig", "Name of contig",
"Position", "Position of breakend in contig",
"Breakend ID", "ID of integration breakend",
"Mate ID", "ID of integration breakend mate",
"Reference", "Reference allele",
"Alt", "Alternative allele",
"QC", "VCF filter values",
"Fragment support", "Total number of fragments supporting breakend",
"Fragment support (unmapped)", "Number of fragments supporting breakend that have one read unmapped",
"Softclip read support", "Number of softclipped reads supporting breakend"
)

list(
tab = virusbreakend_integrations,
description = descr
)
}
Loading

0 comments on commit 14c1f5e

Please sign in to comment.