-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #68 from umccr/oncoanalyser-sash-inputs
Support oncoanalyser and sash inputs
- Loading branch information
Showing
31 changed files
with
1,393 additions
and
1,139 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
#' @export | ||
read_oncokb <- function(x) { | ||
readr::read_tsv(x) |> | ||
dplyr::filter( | ||
`OncoKB Annotated` == "Yes" | ||
) |> | ||
dplyr::pull("Hugo Symbol") | ||
} | ||
|
||
#' @export | ||
get_oncokb_genes <- function(x, oncokb_genes) { | ||
delimiters <- " ,&-" | ||
delimiter_re <- paste0("[", delimiters, "]") | ||
|
||
oncokb_genes |> | ||
# Create regexes for each match, utilising delimiters for boundaries. Handles most cases where a gene symbol contains the '-' delimiter | ||
purrr::map(function(n) paste0("(?<=^|", delimiter_re, ")", n, "(?=", delimiter_re, "|$)")) |> | ||
# Loop with nm iterations through regex and gene symbols | ||
purrr::map(function(n) stringr::str_detect(x, n) |> tibble::as_tibble_row(.name_repair="unique_quiet")) |> | ||
# Combine as tibble to access dplyr::summarise and compile list of detected OncoKB gene symbols for each effect | ||
dplyr::bind_rows() |> | ||
dplyr::summarise(dplyr::across(dplyr::everything(), function(v) { paste0(sort(oncokb_genes[v]), collapse=", ") })) |> | ||
unlist() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
#' Read VIRUSBreakend Summary File | ||
#' | ||
#' Reads the `virusbreakend.vcf.summary.tsv` file. | ||
#' | ||
#' @param x Path to `virusbreakend.vcf.summary.tsv` file. | ||
#' | ||
#' @return List with two elements: | ||
#' * `tab`: Tibble containing data. | ||
#' * `descr`: Description of tibble columns. | ||
#' | ||
#' @examples | ||
#' x <- system.file("extdata/virusbreakend/virusbreakend.vcf.summary.tsv", package = "gpgr") | ||
#' (vb <- virusbreakend_summary_read(x)) | ||
#' @testexamples | ||
#' expect_equal(colnames(vb)[ncol(vb)], "QCStatus") | ||
#' | ||
#' @export | ||
virusbreakend_summary_read <- function(x) { | ||
|
||
nm <- c( | ||
"taxid_genus" = "c", | ||
"name_genus" = "c", | ||
"reads_genus_tree" = "i", | ||
"taxid_species" = "c", | ||
"name_species" = "c", | ||
"reads_species_tree" = "i", | ||
"taxid_assigned" = "c", | ||
"name_assigned" = "c", | ||
"reads_assigned_tree" = "i", | ||
"reads_assigned_direct" = "i", | ||
"reference" = "c", | ||
"reference_taxid" = "c", | ||
"reference_kmer_count" = "i", | ||
"alternate_kmer_count" = "i", | ||
"rname" = "c", | ||
"startpos" = "i", | ||
"endpos" = "i", | ||
"numreads" = "i", | ||
"covbases" = "d", | ||
"coverage" = "d", | ||
"meandepth" = "d", | ||
"meanbaseq" = "d", | ||
"meanmapq" = "d", | ||
"integrations" = "i", | ||
"QCStatus" = "c" | ||
) | ||
|
||
ctypes <- paste(nm, collapse = "") | ||
virusbreakend_summary <- readr::read_tsv(x, col_types = ctypes) | ||
|
||
if (nrow(virusbreakend_summary) > 0) { | ||
assertthat::assert_that(ncol(virusbreakend_summary) == length(nm)) | ||
assertthat::assert_that(all(colnames(virusbreakend_summary) == names(nm))) | ||
|
||
virusbreakend_summary <- virusbreakend_summary |> | ||
dplyr::select( | ||
Virus="name_assigned", | ||
Length="endpos", | ||
Reads="numreads", | ||
Coverage="coverage", | ||
`Mean depth`="meandepth", | ||
Intergrations="integrations", | ||
QC="QCStatus", | ||
) | ||
} | ||
|
||
descr <- dplyr::tribble( | ||
~Column, ~Description, | ||
"Virus", "Assigned NCBI taxonomy name of viral reference", | ||
"Length", "Length of viral contig", | ||
"Reads", "Number of reads mapped to adjusted viral reference", | ||
"Coverage", "Percentage of viral positions with at least one read mapped", | ||
"Mean depth", "Mean alignment depth", | ||
"Intergrations", "Number of detected integration breakpoints", | ||
"QC", "QC status of viral intergrations", | ||
) | ||
|
||
list( | ||
tab = virusbreakend_summary, | ||
descr = descr | ||
) | ||
} | ||
|
||
#' Read VIRUSBreakend VCF File | ||
#' | ||
#' Reads the `virusbreakend.vcf` file and selects data to present. | ||
#' | ||
#' @param x Path to `virusbreakend.vcf` file. | ||
#' | ||
#' @return List with two elements: | ||
#' * `tab`: Tibble containing selected data. | ||
#' * `descr`: Description of tibble columns. | ||
#' | ||
#' @examples | ||
#' x <- system.file("extdata/virusbreakend/virusbreakend.vcf", package = "gpgr") | ||
#' (vb <- virusbreakend_vcf_read(x)) | ||
#' @testexamples | ||
#' expect_equal(colnames(vb)[ncol(vb)], "QC") | ||
#' | ||
#' @export | ||
virusbreakend_vcf_read <- function(x) { | ||
|
||
d <- bedr::read.vcf(x, split.info = TRUE, verbose = FALSE) | ||
|
||
if (nrow(d$vcf) > 0) { | ||
virusbreakend_integrations <- tibble::as_tibble(d$vcf) |> | ||
dplyr::select( | ||
Contig="CHROM", | ||
Position="POS", | ||
"Fragment support"="BVF", | ||
"Fragment support (unmapped)"="BUM", | ||
"Softclip read support"="BSC", | ||
Reference="REF", | ||
Alt="ALT", | ||
`Breakend ID`="ID", | ||
`Mate ID`="MATEID", | ||
QC="FILTER", | ||
) | ||
} else { | ||
virusbreakend_integrations <- tibble::tibble() | ||
} | ||
|
||
descr <- dplyr::tribble( | ||
~Column, ~Description, | ||
"Contig", "Name of contig", | ||
"Position", "Position of breakend in contig", | ||
"Breakend ID", "ID of integration breakend", | ||
"Mate ID", "ID of integration breakend mate", | ||
"Reference", "Reference allele", | ||
"Alt", "Alternative allele", | ||
"QC", "VCF filter values", | ||
"Fragment support", "Total number of fragments supporting breakend", | ||
"Fragment support (unmapped)", "Number of fragments supporting breakend that have one read unmapped", | ||
"Softclip read support", "Number of softclipped reads supporting breakend" | ||
) | ||
|
||
list( | ||
tab = virusbreakend_integrations, | ||
description = descr | ||
) | ||
} |
Oops, something went wrong.