Skip to content

Commit

Permalink
get_biomarkers, NCI update
Browse files Browse the repository at this point in the history
  • Loading branch information
sigven committed Sep 1, 2023
1 parent c7074da commit 9b0cd4a
Show file tree
Hide file tree
Showing 14 changed files with 302 additions and 125 deletions.
18 changes: 11 additions & 7 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
Package: pharmOncoX
Type: Package
Title: Annotated targeted and non-targeted anticancer drugs
Title: Molecularly targeted cancer drugs and biomarkers
Version: 1.4.3
Date: 2023-08-07
Date: 2023-09-01
Authors@R:
c(person(given = "Sigve",
family = "Nakken",
role = c("aut", "cre"),
email = "[email protected]",
comment = c(ORCID = "0000-0001-8468-2050")))
Maintainer: Sigve Nakken <[email protected]>
Description: This data package collects anticancer drug information from multiple resources,
including Open Targets Platform/ChEMBL, NCI Thesaurus, and PubChem. The main dataset
contains a list of all anticancer drugs per indication/tumor type (where this is provided).
which can be queried with a range of parameters, including clinical development phase, approval
status, drug targets, mechanism-of-action/drug category etc.
Description: This data package collects anticancer drug information from
multiple resources, including Open Targets Platform/ChEMBL, NCI Thesaurus,
and PubChem. The main dataset contains a list of all molecularly targeted
anticancer drugs per indication/tumor type (where this is provided),
which can be queried with a range of parameters, including
clinical development phase, approval status, drug targets,
mechanism-of-action/drug category etc. The package
also allows for the retrieval of curated biomarkers from multiple
freely available resources (CIViC, CGI, Mitelman database).
License: MIT + file LICENSE
URL: https://github.com/sigven/pharmOncoX
BugReports: https://github.com/sigven/pharmOncoX/issues
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand

export(get_biomarkers)
export(get_drugs)
importFrom(rlang,":=")
importFrom(rlang,.data)
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# Version 1.4.4 (September 1st 2023)

* Updated NCI Thesaurus - 23.08d
* Updated CIViC data
* Updated Mitelman database (20230803)
* `get_biomarkers()` now exported as a main function

# Version 1.4.3 (August 7th 2023)

* NCI Thesaurus 23.07e
Expand Down
106 changes: 0 additions & 106 deletions R/helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -413,110 +413,4 @@ get_drug_records <- function(cache_dir = NA,
}


#' Function that retrieves pharmOncoX biomarker data (CIViC, CGI, MitellmanDB)
#' from Google Drive
#'
#' @param cache_dir Local directory for data download
#' @param force_download Logical indicating if local cache should force downloaded
#' (i.e. set to TRUE to re-download even if data exists in cache)
#'
#' @keywords internal
#'
#'
get_biomarkers <- function(cache_dir = NA,
force_download = F) {


lgr::lgr$appenders$console$set_layout(
lgr::LayoutFormat$new(timestamp_fmt = "%Y-%m-%d %T"))

if (is.na(cache_dir)) {
lgr::lgr$fatal(paste0("Argument cache_dir = '",
cache_dir, "' is not defined"))
stop()
}

if (!dir.exists(cache_dir)) {
lgr::lgr$fatal(paste0("Argument cache_dir = '",
cache_dir, "' does not exist"))
stop()
}


biomarker_datasets <- list()
file_maps <- c('biomarkers')

for (elem in file_maps) {

fname_local <- file.path(
cache_dir,
paste0(elem,"_v",
db_id_ref[db_id_ref$name == elem,]$pVersion,
'.rds')
)

fname_gd <- googledrive::as_id(
db_id_ref[db_id_ref$name == elem,]$gid)

md5checksum_package <-
db_id_ref[db_id_ref$name == elem,]$md5Checksum

#dat <- NULL
if (file.exists(fname_local) & force_download == F) {
biomarker_datasets[[elem]] <- readRDS(fname_local)
biomarker_datasets[[elem]][['fpath']] <- fname_local
if (!is.null(biomarker_datasets[[elem]][['data']]) &
!is.null(biomarker_datasets[[elem]][['metadata']])) {
lgr::lgr$info(paste0(
"Reading from cache_dir = '",
cache_dir, "', argument force_download = F"))
lgr::lgr$info(paste0("Object '",elem,"' sucessfully loaded"))

}

}else{

googledrive::drive_deauth()

lgr::lgr$info("Downloading remote dataset from Google Drive to cache_dir")
dl <- googledrive::with_drive_quiet(
googledrive::drive_download(
fname_gd,
path = fname_local,
overwrite = TRUE)
)

md5checksum_remote <- dl$drive_resource[[1]]$md5Checksum
md5checksum_local <- tools::md5sum(fname_local)
names(md5checksum_local) <- NULL

if (md5checksum_remote == md5checksum_local) {
biomarker_datasets[[elem]] <- readRDS(fname_local)
biomarker_datasets[[elem]]$fpath <- fname_local
if (!is.null(biomarker_datasets[[elem]][['data']]) &
!is.null(biomarker_datasets[[elem]][['metadata']])) {

lgr::lgr$info(paste0(
"Reading from cache_dir = ' (",
cache_dir, "'), argument force_download = F"))
lgr::lgr$info(paste0("Object '", elem, "' sucessfully loaded"))
lgr::lgr$info(paste0("md5 checksum is valid: ", md5checksum_remote))

}
}else{
lgr::lgr$error(paste0("md5 checksum of local file (", md5checksum_local,
") is inconsistent with remote file (",
md5checksum_remote,")"))
stop()
}

}
}

#biomarker_data <- biomarker_datasets[['biomarkers']]

return(biomarker_datasets[['biomarkers']])

}


136 changes: 135 additions & 1 deletion R/pharm_oncox.R
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,9 @@ get_drugs <- function(
lgr::LayoutFormat$new(timestamp_fmt = "%Y-%m-%d %T"))

valid_output_resolutions <-
c("drug","drug2target","drug2target2indication")
c("drug",
"drug2target",
"drug2target2indication")
valid_drug_action_types <-
c("INHIBITOR",
"AGONIST",
Expand Down Expand Up @@ -846,6 +848,138 @@ get_drugs <- function(
}



#' Get curated cancer biomarker datasets
#'
#' @description
#' Downloads preprocessed datasets to a local cache directory and returns a
#' curated set of genomic biomarkers from multiple sources
#' (CIViC, CGI, MitelmanDB)
#'
#' The dataset comes as a `list` object, with three elements:
#'
#' * `metadata` - a data frame with metadata regarding drug resources used
#' * `data` - a list with four elements ('civic','cgi','mitelmandb','custom_fusions')
#' * `fpath` - path to cache file
#'
#' @param cache_dir Local directory for data download
#' @param force_download Logical indicating if local cache should force downloaded
#' (i.e. set to TRUE to re-download even if data exists in cache)
#'
#' @return
#'
#' Each entry of the source-specific (e.g. 'civic') entry in the `data` list contains
#' a list of three data frames:
#'
#' \itemize{
#' \item \emph{variant} - list of all biomarker variants, extensively populated
#' according to variant aliases (identifer - column \strong{variant_id})
#' \item \emph{clinical} - cross-references between variants recorded in
#' the `variant` data frame and clinical evidence items (identifier -
#' column \strong{evidence_id}) and underlying literature evidence
#' (identifier - column \strong{source_id})
#' \item \emph{literature} - lists literature for all source_id's listed in
#' the `clinical` data frame
#' }
#'
#' @export
#'
get_biomarkers <- function(cache_dir = NA,
force_download = F) {


lgr::lgr$appenders$console$set_layout(
lgr::LayoutFormat$new(timestamp_fmt = "%Y-%m-%d %T"))

if (is.na(cache_dir)) {
lgr::lgr$fatal(paste0("Argument cache_dir = '",
cache_dir, "' is not defined"))
stop()
}

if (!dir.exists(cache_dir)) {
lgr::lgr$fatal(paste0("Argument cache_dir = '",
cache_dir, "' does not exist"))
stop()
}


biomarker_datasets <- list()
file_maps <- c('biomarkers')

for (elem in file_maps) {

fname_local <- file.path(
cache_dir,
paste0(elem,"_v",
db_id_ref[db_id_ref$name == elem,]$pVersion,
'.rds')
)

fname_gd <- googledrive::as_id(
db_id_ref[db_id_ref$name == elem,]$gid)

md5checksum_package <-
db_id_ref[db_id_ref$name == elem,]$md5Checksum

#dat <- NULL
if (file.exists(fname_local) & force_download == F) {
biomarker_datasets[[elem]] <- readRDS(fname_local)
biomarker_datasets[[elem]][['fpath']] <- fname_local
if (!is.null(biomarker_datasets[[elem]][['data']]) &
!is.null(biomarker_datasets[[elem]][['metadata']])) {
lgr::lgr$info(paste0(
"Reading from cache_dir = '",
cache_dir, "', argument force_download = F"))
lgr::lgr$info(paste0("Object '",elem,"' sucessfully loaded"))

}

}else{

googledrive::drive_deauth()

lgr::lgr$info("Downloading remote dataset from Google Drive to cache_dir")
dl <- googledrive::with_drive_quiet(
googledrive::drive_download(
fname_gd,
path = fname_local,
overwrite = TRUE)
)

md5checksum_remote <- dl$drive_resource[[1]]$md5Checksum
md5checksum_local <- tools::md5sum(fname_local)
names(md5checksum_local) <- NULL

if (md5checksum_remote == md5checksum_local) {
biomarker_datasets[[elem]] <- readRDS(fname_local)
biomarker_datasets[[elem]]$fpath <- fname_local
if (!is.null(biomarker_datasets[[elem]][['data']]) &
!is.null(biomarker_datasets[[elem]][['metadata']])) {

lgr::lgr$info(paste0(
"Reading from cache_dir = ' (",
cache_dir, "'), argument force_download = F"))
lgr::lgr$info(paste0("Object '", elem, "' sucessfully loaded"))
lgr::lgr$info(paste0("md5 checksum is valid: ", md5checksum_remote))

}
}else{
lgr::lgr$error(paste0("md5 checksum of local file (", md5checksum_local,
") is inconsistent with remote file (",
md5checksum_remote,")"))
stop()
}

}
}

return(biomarker_datasets[['biomarkers']])

}



#' Tidy eval helpers
#'
#' <https://cran.r-project.org/web/packages/dplyr/vignettes/programming.html>
Expand Down
Binary file modified R/sysdata.rda
Binary file not shown.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# pharmOncoX <a href="https://sigven.github.io/pharmOncoX/"><img src="man/figures/logo.png" align="right" height="130" width="113"/></a>

**pharmOncoX** is an R package that provides access to targeted and non-targeted cancer drugs, including comprehensive annotations per target, drug mechanism-of-action, approval dates, clinical trial phases for various indications etc. Drugs are further classified according to the [Anatomical Therapeutic Chemical (ATC) Classification System](https://www.whocc.no/atc_ddd_index/), enabling a filtering of cancer drugs according to their main types of action.
**pharmOncoX** is an R package that provides access to targeted and non-targeted cancer drugs, and genomic cancer biomarkers. Cancer drugs include comprehensive annotations per target, drug mechanism-of-action, approval dates, clinical trial phases for various indications etc. Drugs are further classified according to the [Anatomical Therapeutic Chemical (ATC) Classification System](https://www.whocc.no/atc_ddd_index/), enabling a filtering of cancer drugs according to their main types of action.


## Getting started
Expand Down
4 changes: 2 additions & 2 deletions data-raw/biomarker_utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -2578,10 +2578,10 @@ load_mitelman_db <- function(cache_dir = NA) {
dplyr::distinct() |>
dplyr::rename(citation_id = Pubmed)

mbca_data <- read.table(
mbca_data <- readr::read_tsv(
file = file.path(
cache_dir, "mitelmandb", "MBCA.TXT.DATA"),
sep = "\t", stringsAsFactors = F, header = T) |>
show_col_types = F) |>
dplyr::filter(stringr::str_detect(GeneShort,"::")) |>
dplyr::rename(variant = GeneShort) |>
dplyr::rename(karyotype = KaryShort) |>
Expand Down
3 changes: 3 additions & 0 deletions data-raw/custom_drug_target_regex_nci.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ KTX-1001 NSD2
Anti-LGR5 Monoclonal Antibody LGR5
Apamistamab PTPRC
Garsorasib KRAS
Anti-BTLA BTLA
Cifurtilimab CD40
MRTX1133 KRAS
Izalontamab HER1
Izalontamab ERBB3
ALDH1/3 Inhibitor ABD-3001 ALDH1
Expand Down
2 changes: 1 addition & 1 deletion data-raw/drug_utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -1754,7 +1754,7 @@ map_curated_targets <- function(gene_info = NULL,
# ) |>
dplyr::filter(
stringr::str_detect(
nci_concept_definition, "antineoplastic"
nci_concept_definition, "antineoplastic|tumor|cancer"
)
) |>
dplyr::select(nci_cd_name,
Expand Down
Binary file modified data-raw/metadata_pharm_oncox.xlsx
Binary file not shown.
Loading

0 comments on commit 9b0cd4a

Please sign in to comment.