From 20b04da07e1d5511b18c857bea26b7241a0fd82a Mon Sep 17 00:00:00 2001 From: Sigve Nakken Date: Sat, 2 Nov 2024 12:18:58 +0100 Subject: [PATCH] NCI 24.09e, OTP 2024.09, skip DGIDB chembl map --- R/sysdata.rda | Bin 734 -> 733 bytes data-raw/custom_drug_target_regex_nci.tsv | 10 +- data-raw/data-raw.R | 19 +- data-raw/drug_name_black_list.txt | 1 + data-raw/drug_utilities.R | 1501 +++------------------ data-raw/metadata_pharm_oncox.xlsx | Bin 12810 -> 12832 bytes pkgdown/index.md | 12 +- 7 files changed, 178 insertions(+), 1365 deletions(-) diff --git a/R/sysdata.rda b/R/sysdata.rda index 937ab35910664f34b9d9683f836ff894189ba65f..9a07f3aee52b7d0417736c5a53a49bbb159b9a9c 100644 GIT binary patch literal 733 zcmV<30wVoFT4*^jL0KkKSrf^XVE_Tb|G@uy%5h)=f8am=|Iokx|L{Nn01?0fTpO-P z>KcelN#L3%l)%bJ<8k0>7jExgh+7djVXwi@XkkAbc0i#BP zLaFLJr>W^Yhz<)Y?w>_O$nJx*TA#3J_I)tBZUT8{(uV;IUpc zU)JT?6k?ED0{#`da|!5Bqch1CQ^RebtjbYX+zEBCON-}$1)hA)7%s%Zd}huBeDdrT zCNc#jj(Fjb2dxsgt~Iy-9^5qM2D_GvK{6ec6d+9+zeXWhovvr%2w5rFK1jV^O zs@`i{KbYgmGu^VFn}V53wpQxJSM(;M8fYzBl+8zh-EWLe^(8gOVm3qaZFGKZ60 zNz@1qqKOAflFNjuJ*vdm{yv&(-|r?}+u;O_vO-fh#1L0TtM3gjQH7Nv& zi~Yq~j(laIO$N_d{x56P#+0>!2FA~Js#TYt*1*sa6|;m&IfUmz`42dJcrV)WXZ6Ap zZ@$|AgJ_i^60zihL_xGHIcXd~#-^<<2pq3c%-bzmJPS0%QgI^~ys?qA@ePE~&Zfw} zk(<#BBMsT>I*yi=3a+=)GpC#jWTG>KevvW=8cY-A)zCx8(?71A`6r)eW?q-{`n~p@ P$Nv{{ML1B96Umif^oUdj literal 734 zcmV<40wMiET4*^jL0KkKSpfC+J^%r?|G@uy%5h)=f8am=|M0*6|L{Nn01?0fUDKJ{ zty#oGBg$wE14fz+00Te(000dGL)03Hq`{CUp{b#%kYY3&QRxjBnrJiwK*-PyDd{~; z4Fw(weyC^w&;SNQKpJVGh-3gsBoaLl;X@$M(@h3|XlbBm27olvK+rfbVxZ~sm_%+yIF-n4E+xs8q-kPzf$J76fidO0<=8u#LExiy#C%ax_xM z8FfJ;Rhn!O0esOJcA<5so}!tnB2*WdT8`-)jz}GHpa|H>(nZ9_hkJ?yl}`YD7-_eZ zsMiYAxMR1)04B(9DO#-$WxsabOHyl|r#8D;_>7ju8hTKr5sPoI9-{WMF>apYBQJ% zL9kt0hE zTokT<2Bbp(PN_2KpE0O9QXZZHrpI@+C!eSYBp$T$z)WLsNz6Yo-!R?`*bd{`W5N5( z>Z^qb3rp8zGAc=?f&v79#SopI35{#S33$>6s2n1d-ft@Lh{mmppMB|}tG6(ue1(f~ z5i{Z%vq1E$jjst{fMJ@W^M|L6D-2e-gd&H0;auqQd#;^KzP&ukaN3qV*nDWFEI+u2 Q%pdr>k}1N3fCsO!@JokIHvj+t diff --git a/data-raw/custom_drug_target_regex_nci.tsv b/data-raw/custom_drug_target_regex_nci.tsv index 6839a79..c7ee599 100755 --- a/data-raw/custom_drug_target_regex_nci.tsv +++ b/data-raw/custom_drug_target_regex_nci.tsv @@ -74,7 +74,7 @@ Tolinapant|Xevinapant|Smac Mimetic BIRC2 Xevinapant|(Smac|SMAC) Mimetic BIRC3 Tolinapant|Idronoxil|Xevinapant|Smac Mimetic XIAP hSTC810 BTN1A1 -Divarasib|Opnurasib KRAS +Divarasib|Opnurasib|INCB161734|QTX3046|RMC-9805|TSN1611|QTX3034|RMC-6236 KRAS Ras Inhibitor LUNA18 KRAS Ras Inhibitor LUNA18 NRAS Ras Inhibitor LUNA18 HRAS @@ -90,11 +90,11 @@ Tinengotinib FLT1 Tinengotinib KDR Tinengotinib FLT3 Tinengotinib FLT4 -pan-RAF Inhibitor|pan-RAF Kinase Inhibitor BRAF -pan-RAF Inhibitor|pan-RAF Kinase Inhibitor ARAF -pan-RAF Inhibitor|pan-RAF Kinase Inhibitor CRAF +pan-RAF Inhibitor|pan-RAF Kinase Inhibitor|Brimarafenib|BDTX-4933|DCC-3084 BRAF +pan-RAF Inhibitor|pan-RAF Kinase Inhibitor|BDTX-4933|DCC-3084 ARAF +pan-RAF Inhibitor|pan-RAF Kinase Inhibitor|BDTX-4933|DCC-3084 CRAF Ebvaciclib|Tagtociclib CDK2 -Ebvaciclib CDK4 +Ebvaciclib|Atirmociclib CDK4 Ebvaciclib CDK6 Utatrectinib NTRK1 Utatrectinib NTRK2 diff --git a/data-raw/data-raw.R b/data-raw/data-raw.R index 5c1eee2..666e5d7 100755 --- a/data-raw/data-raw.R +++ b/data-raw/data-raw.R @@ -21,7 +21,7 @@ opentargets_version <- metadata$compounds[metadata$compounds$source_abbreviation == "opentargets", "source_version"] package_datestamp <- stringr::str_replace_all(Sys.Date(),"-","") -chembl_pubchem_datestamp <- '20240708' +chembl_pubchem_datestamp <- '20241024' ## set logging layout lgr::lgr$appenders$console$set_layout( @@ -81,7 +81,7 @@ drug_sets[['nci']] <- get_nci_drugs( #### -- Open Targets Platform - drugs ---#### ## Get all targeted anticancer/other drugs from Open Targets Platform drug_sets[['otp']] <- - get_opentargets_cancer_drugs( + get_otp_cancer_drugs( path_data_raw = path_data_raw, ot_version = opentargets_version) @@ -101,7 +101,7 @@ drug_sets[['nci_otp_curated']] <- map_curated_targets( gene_info = gene_info, path_data_raw = path_data_raw, drug_df = drug_sets[['nci_otp']] -) +)$curated ####-- Cancer drugs classified into categories (ATC) ---#### drug_sets[['nci_otp_curated_classified']] <- assign_drug_category( @@ -194,7 +194,8 @@ raw_biomarkers[['depmap']] <- raw_biomarkers[['custom_fusions']]$variant <- raw_biomarkers[['custom_fusions']]$variant |> dplyr::anti_join( - raw_biomarkers[["mitelmandb"]][['variant']], by = "variant_alias") + raw_biomarkers[["mitelmandb"]][['variant']], + by = "variant_alias") biomarkers <- list() biomarkers[['data']] <- raw_biomarkers @@ -202,11 +203,11 @@ biomarkers[['metadata']] <- metadata$biomarkers #rm(biomarkers_all) ## upload to Google Drive -version_bump <- paste0( - substr(as.character(packageVersion("pharmOncoX")),1,4), - as.character(as.integer(substr(as.character(packageVersion("pharmOncoX")),5,5)) + 1)) +#version_bump <- paste0( +# substr(as.character(packageVersion("pharmOncoX")),1,4), +# as.character(as.integer(substr(as.character(packageVersion("pharmOncoX")),5,5)) + 1)) - +version_bump <- "1.8.0" db <- list() db[['biomarkers']] <- biomarkers @@ -221,7 +222,7 @@ db[['drug_map_basic']][['records']] <- drug_index_map[['id2basic']] db[['drug_map_alias']] <- list() db[['drug_map_alias']][['records']] <- drug_index_map[['id2alias']] -googledrive::drive_auth_configure(api_key = Sys.getenv("GD_KEY")) +#googledrive::drive_auth_configure(api_key = Sys.getenv("GD_KEY")) gd_records <- list() db_id_ref <- data.frame() diff --git a/data-raw/drug_name_black_list.txt b/data-raw/drug_name_black_list.txt index d197ad5..fcc5b29 100644 --- a/data-raw/drug_name_black_list.txt +++ b/data-raw/drug_name_black_list.txt @@ -9,6 +9,7 @@ Sodium Caseinate AXL-1717 AZD-7451 TAS-115 +RAC-3-N-BUTYLPHTHALIDE Lenperone Hydrochloride ABT-126 AZD-3759 diff --git a/data-raw/drug_utilities.R b/data-raw/drug_utilities.R index 58748c9..e178729 100644 --- a/data-raw/drug_utilities.R +++ b/data-raw/drug_utilities.R @@ -29,201 +29,6 @@ readUrl <- function(q_url) { return(out) } -### PUBCHEM/CHEMBL - WEB SERVICE FUNCTIONS FOR COMPOUND INFORMATION - -## Retrieval of compound properties from PubChem -#' -#' Function that retrieves PubChem compound properties using PubChem public user gateway (PUG) -#' -#' @param pubchem_cid PubChem compound identifier -#' @return a data frame with the following columns of chemical compound properties: -#' \itemize{ -#' \item pubchem_isomeric_SMILES -#' \item pubchem_TPSA -#' \item pubchem_IUPAC_name -#' \item pubchem_complexity -#' \item pubchem_hbond_donor_count -#' \item pubchem_hbond_acceptor_count -#' \item pubchem_molecular_weight -#' \item pubchem_cid -#' \item pubchem_name -#' \item molecule_chembl_id -#' } -#' @examples -#' ## Retrieve compound properties for Azacitidine (Pubchem compound ID = 9444) -#' \dontrun{ -#' pubchem_compound_props(pubchem_cid = 9444) -#' } -#' -pubchem_compound_properties <- function(PUBCHEM_PUG_URL = 'https://pubchem.ncbi.nlm.nih.gov/rest/pug/', pubchem_cid = 9444){ - compound_description_list <- jsonlite::fromJSON(paste0(PUBCHEM_PUG_URL,'compound/cid/',pubchem_cid,'/description/JSON')) - compound_description <- dplyr::filter(compound_description_list$InformationList$Information, !is.na(Title)) - if(nrow(compound_description) > 1){ - cat("More than a single description line\n",pubchem_cid) - cat('\n') - return(NULL) - } - compound_properties <- read.csv(paste0(PUBCHEM_PUG_URL,'compound/cid/',pubchem_cid,'/property/IsomericSMILES,TPSA,IUPACName,Complexity,HBondDonorCount,HBondAcceptorCount,MolecularWeight/CSV'),stringsAsFactors = F) - - compound_properties$pubchem_name <- compound_description$Title - chembl_id <- NA - synonym_url <- paste0(PUBCHEM_PUG_URL,'compound/cid/',pubchem_cid,'/synonyms/JSON') - pubchem_compound_synonyms <- NULL - if(!is.null(readUrl(synonym_url))){ - compound_synonyms <- jsonlite::fromJSON(synonym_url) - all_synonyms <- compound_synonyms$InformationList$Information$Synonym[[1]] - pubchem_compound_synonyms <- paste(all_synonyms,collapse="@@@@") - j <- 1 - while(j <= length(all_synonyms)){ - if(stringr::str_detect(all_synonyms[j],"^CHEMBL[0-9]{1,}")){ - chembl_id <- all_synonyms[j] - break - } - j <- j + 1 - } - } - - compound_properties$molecule_chembl_id <- chembl_id - compound_properties$pubchem_synonyms <- pubchem_compound_synonyms - compound_properties <- dplyr::rename(compound_properties, pubchem_isomeric_SMILES = IsomericSMILES, pubchem_TPSA = TPSA, pubchem_IUPAC_name = IUPACName, pubchem_complexity = Complexity, pubchem_hbond_donor_count = HBondDonorCount, pubchem_hbond_acceptor_count = HBondAcceptorCount, pubchem_molecular_weight = MolecularWeight, pubchem_id = CID) - - return(compound_properties) -} - -## Retrieval of molecular compound properties from ChEMBL -#' -#' Function that accepts a ChEMBL molecule identifer and uses the ChEMBL web service API to return a range of compound properties -#' -#' @param molecule_chembl_id ChEMBL molecule identifier -#' @return a data frame with the following columns of chemical compound properties: -#' \itemize{ -#' \item molecule_chembl_id -#' \item parent_chembl_id -#' \item chembl_acd_logp -#' \item chembl_acd_logd -#' \item chembl_acd_most_apka -#' \item chembl_acd_most_bpka -#' \item chembl_alogp -#' \item chembl_aromatic_rings -#' \item chembl_full_molformula -#' \item chembl_full_mwt -#' \item chembl_hba -#' \item chembl_hba_lipinski -#' \item chembl_hbd -#' \item chembl_hbd_lipinski -#' \item chembl_heavy_atoms -#' \item chembl_molecular_species -#' \item chembl_mw_freebase -#' \item chembl_mw_monoisotopic -#' \item chembl_num_alerts -#' \item chembl_num_lipinski_ro5_violations -#' \item chembl_num_ro5_violations -#' \item chembl_psa -#' \item chembl_qed_weighted -#' \item chembl_ro3_pass -#' \item chembl_rtb -#' \item chembl_canonical_smiles -#' \item chembl_standard_inchi -#' \item chembl_standard_inchi_key -#' \item chembl_pref_name -#' \item lincs_id -#' \item drugbank_id -#' \item pharmgkb_id -#' } -#' @examples -#' ## Retrieve compound properties for IRINOTECAN (ChEMBL molecular compound ID = 'CHEMBL481') -#' \dontrun{ -#' chembl_compound_props(molecule_chembl_id = 'CHEMBL481') -#' } -#' -chembl_compound_properties <- function(chembl_ws_base_url = 'https://www.ebi.ac.uk/chembl/api/data', molecule_chembl_id = NA){ - all_molecule_properties <- data.frame("molecule_chembl_id" = molecule_chembl_id, "parent_chembl_id" = NA, "chembl_acd_logp" = NA, "chembl_acd_logd" = NA, "chembl_acd_most_apka" = NA, - "chembl_acd_most_bpka" = NA, "chembl_alogp" = NA, "chembl_aromatic_rings" = NA, "chembl_full_molformula" = NA, - "chembl_full_mwt" = NA, "chembl_hba" = NA, "chembl_hba_lipinski" = NA, "chembl_hbd" = NA, "chembl_hbd_lipinski" = NA, - "chembl_heavy_atoms" = NA, "chembl_molecular_species" = NA, "chembl_mw_freebase" = NA, "chembl_mw_monoisotopic" = NA, - "chembl_num_alerts" = NA, "chembl_num_lipinski_ro5_violations" = NA, "chembl_num_ro5_violations" = NA, "chembl_psa" = NA, - "chembl_qed_weighted" = NA, "chembl_ro3_pass" = NA, "chembl_rtb" = NA, "chembl_canonical_smiles" = NA, - "chembl_standard_inchi" = NA, "chembl_standard_inchi_key" = NA, "chembl_pref_name" = NA, "lincs_id" = NA, "drugbank_id" = NA, - "pharmgkb_id" = NA, stringsAsFactors = F) - - molecule_url <- paste0(chembl_ws_base_url,'/molecule?molecule_chembl_id=',molecule_chembl_id) - molecule_hierarchy <- NULL - molecule_properties <- NULL - molecule_structures <- NULL - props_all <- NULL - lincs_id <- NA - drugbank_id <- NA - pharmgkb_id <- NA - if(!is.null(readUrl(molecule_url))){ - raw_xml_string <- rawToChar(httr::GET(molecule_url)$content) - doc <- XML::xmlParse(raw_xml_string) - if(stringr::str_detect(raw_xml_string, '')){ - molecule_hierarchy <- XML::xmlToDataFrame(nodes = XML::getNodeSet(doc, "//response/molecules/molecule/molecule_hierarchy"), collectNames = T, stringsAsFactors = F) - } - if(stringr::str_detect(raw_xml_string, '')){ - molecule_properties <- XML::xmlToDataFrame(nodes = XML::getNodeSet(doc, "//response/molecules/molecule/molecule_properties"), collectNames = T, stringsAsFactors = F) - } - if(!is.null(molecule_properties)){ - colnames(molecule_properties) <- paste0('chembl_',colnames(molecule_properties)) - } - if(stringr::str_detect(raw_xml_string, '')){ - molecule_structures <- XML::xmlToDataFrame(nodes = XML::getNodeSet(doc, "//response/molecules/molecule/molecule_structures"), collectNames = T, stringsAsFactors = F) - } - if(!is.null(molecule_structures)){ - colnames(molecule_structures) <- paste0('chembl_',colnames(molecule_structures)) - } - if(stringr::str_detect(raw_xml_string, '')){ - props_all <- XML::xmlToDataFrame(nodes = XML::getNodeSet(doc, "//response/molecules/molecule"), collectNames = T, stringsAsFactors = F) - props_all <- dplyr::select(props_all, pref_name) |> dplyr::rename(chembl_pref_name = pref_name) - } - - if(!is.null(molecule_structures)){ - unichem_url <- paste0('https://www.ebi.ac.uk/unichem/rest/verbose_inchikey/',molecule_structures$chembl_standard_inchi_key) - if(!is.null(readUrl(unichem_url))){ - unichem_xrefs <- httr::content(httr::GET(unichem_url)) - i <- 1 - while(i <= length(unichem_xrefs)){ - list_item <- unichem_xrefs[[i]] - if(list_item$name == 'lincs'){ - lincs_id <- list_item$src_compound_id[[1]] - } - if(list_item$name == 'drugbank'){ - drugbank_id <- list_item$src_compound_id[[1]] - } - if(list_item$name == 'pharmgkb'){ - pharmgkb_id <- list_item$src_compound_id[[1]] - } - i <- i + 1 - } - } - } - - if(!is.null(molecule_properties)){ - for(c in colnames(molecule_properties)){ - all_molecule_properties[,c] <- molecule_properties[,c] - } - } - if(!is.null(molecule_structures)){ - for(c in colnames(molecule_structures)){ - all_molecule_properties[,c] <- molecule_structures[,c] - } - } - if(!is.null(props_all)){ - for(c in colnames(props_all)){ - all_molecule_properties[,c] <- props_all[,c] - } - } - - all_molecule_properties$lincs_id <- lincs_id - all_molecule_properties$drugbank_id <- drugbank_id - all_molecule_properties$pharmgkb_id <- pharmgkb_id - if(!is.null(molecule_hierarchy)){ - all_molecule_properties$parent_chembl_id <- molecule_hierarchy$parent_chembl_id - } - } - return(all_molecule_properties) -} - ### NCI DRUG DISPLAY LABELS @@ -244,8 +49,8 @@ process_nci_labels <- function(path_data_raw, overwrite = F) { ### CHEMBL-PUBCHEM COMPOUND CROSS-REFERENCE -get_chembl_pubchem_compound_xref <- function(datestamp = '20220906', - chembl_release = "v31", +get_chembl_pubchem_xref <- function(datestamp = '20241024', + chembl_release = "v34", path_data_raw = NULL, update = F){ chembl_pubchem_xref_fname <- file.path( @@ -268,9 +73,9 @@ get_chembl_pubchem_compound_xref <- function(datestamp = '20220906', ### TARGETED ANTICANCER COMPOUNDS FROM OPEN TARGETS -get_opentargets_cancer_drugs <- +get_otp_cancer_drugs <- function(path_data_raw = NULL, - ot_version = "2023.12"){ + ot_version = "2024.09"){ cancer_terms <- list() cancer_terms[['all']] <- phenOncoX::get_terms( @@ -699,37 +504,15 @@ get_atc_drug_classification <- function( } -get_fda_ndc_mapping <- function( - path_data_raw = NULL){ - - fda_ndc_fname <- file.path( - path_data_raw,"national_drug_code_fda","product.txt") - - drug2epc <- readr::read_tsv(fda_ndc_fname, show_col_types = F) |> - janitor::clean_names() |> - ## ignore drug regimens/combos - dplyr::filter(!stringr::str_detect(substancename,"; ")) |> - ## separate entries of pharm classes - tidyr::separate_rows(pharm_classes, sep=", ") |> - dplyr::select(substancename, pharm_classes) |> - dplyr::filter(!is.na(pharm_classes)) |> - dplyr::filter(stringr::str_detect(pharm_classes," \\[EPC\\]")) |> - dplyr::rename(drug = substancename, - fda_epc_category = pharm_classes) |> - dplyr::distinct() - - return(drug2epc) -} - -#### NCI THESAURUS CANCER DRUGS/TREATMENTS +## NCI THESAURUS CANCER DRUGS/TREATMENTS get_nci_drugs <- function(nci_db_release = nci_db_release, overwrite = F, path_data_raw = NULL, path_data_processed = NULL){ nci_antineo_thesaurus <- NULL nci_drugs <- NULL - + nci_ftp_base <- paste0( "https://evs.nci.nih.gov/ftp1/NCI_Thesaurus/archive/", nci_db_release, @@ -780,75 +563,15 @@ get_nci_drugs <- function(nci_db_release = nci_db_release, file.path( path_data_processed, "nci_thesaurus", - "nci_treatment_thesaurus_antineo.rds")) | overwrite == T){ + "nci_treatment_thesaurus_antineo_v2.rds")) | overwrite == T){ sorafenib_definition <- 'A synthetic compound targeting growth signaling and angiogenesis. Sorafenib blocks the enzyme RAF kinase, a critical component of the RAF/MEK/ERK signaling pathway that controls cell division and proliferation; in addition, sorafenib inhibits the VEGFR-2/PDGFR-beta signaling cascade, thereby blocking tumor angiogenesis.' - + nci_display_labels <- process_nci_labels( path_data_raw = path_data_raw, overwrite = overwrite) - drug2chembl <- - read.table( - file = file.path( - path_data_raw,"dgidb","dgidb.202202.tsv"), - header=T, quote="", comment.char="", sep="\t", - stringsAsFactors = F) |> - dplyr::filter(nchar(drug_name) > 0) |> - dplyr::select(-drug_claim_source) |> - dplyr::rename(molecule_chembl_id = concept_id) |> - dplyr::mutate(molecule_chembl_id = - stringr::str_replace(molecule_chembl_id, "chembl:","")) |> - dplyr::filter(!startsWith(molecule_chembl_id,"wiki")) |> - dplyr::distinct() |> - dplyr::mutate(drug_claim_name = tolower(drug_claim_name)) |> - dplyr::filter(!startsWith(drug_claim_name,"chembl")) |> - dplyr::filter(!stringr::str_detect(drug_claim_name,"^[0-9]{1,}$")) |> - dplyr::mutate(drug_name = tolower(drug_name)) |> - - ### remove and correct wrong drug to molecule_chembl_id associations - dplyr::mutate(molecule_chembl_id = dplyr::if_else( - drug_name == "calcifediol", - as.character("CHEMBL1040"), - as.character(molecule_chembl_id) - )) |> - dplyr::mutate(molecule_chembl_id = dplyr::if_else( - drug_name == "leucovorin", - as.character("CHEMBL1040"), - as.character(molecule_chembl_id) - )) |> - dplyr::mutate(molecule_chembl_id = dplyr::if_else( - drug_name == "adl-5747", - as.character("CHEMBL561339"), - as.character(molecule_chembl_id) - )) |> - dplyr::filter(molecule_chembl_id != "CHEMBL1200796") |> #cyclophosphamide - dplyr::filter(molecule_chembl_id != "CHEMBL3989496") |> #tezacitabine - dplyr::filter(molecule_chembl_id != "CHEMBL1588") |> #thiamine - dplyr::filter(molecule_chembl_id != "CHEMBL1200751") |> #mercaptopurine - dplyr::filter(molecule_chembl_id != "CHEMBL541887") |> #irinotecan - dplyr::filter(molecule_chembl_id != "CHEMBL513000") |> #emetine hydrochloride - dplyr::filter(molecule_chembl_id != "CHEMBL3989727") |> #vitamine e - dplyr::filter(molecule_chembl_id != "CHEMBL1889436") |> #ouabain - dplyr::filter(molecule_chembl_id != "CHEMBL1201113") |> #cetirizine hydrochloride - dplyr::filter(molecule_chembl_id != "CHEMBL1200792") |> #fluphenazine hydrochloride - dplyr::filter(molecule_chembl_id != "CHEMBL935") |> #pentamidine isethionate - dplyr::filter(molecule_chembl_id != "CHEMBL482811") |> #u-50488 methane sulfonate - dplyr::distinct() - - drug2chembl_all <- - dplyr::select(drug2chembl, molecule_chembl_id, drug_claim_name) |> - dplyr::rename(drug_name = drug_claim_name) |> - dplyr::bind_rows(dplyr::select(drug2chembl, drug_name, molecule_chembl_id)) |> - dplyr::filter(!stringr::str_detect(drug_name,"(^[0-9]{1,}$)|^chembl[0-9]{1,}")) |> - dplyr::distinct() |> - dplyr::bind_rows(data.frame('drug_name' = 'gemtuzumab', - 'molecule_chembl_id' = 'CHEMBL2108342', - stringsAsFactors = F)) |> - dplyr::arrange(drug_name) - - ## Agents/compounds marked as antineplastic according to NCI nci_antineo_agents <- read.table(file = file.path(path_data_raw,"nci_thesaurus","Antineoplastic_Agent.txt"), @@ -859,37 +582,39 @@ get_nci_drugs <- function(nci_db_release = nci_db_release, dplyr::select(nci_t) |> dplyr::mutate(antineoplastic_agent = TRUE) |> dplyr::distinct() - - - ## parse all entries in nci thesaurus where the semantic concept type is treatment-related - nci_antineo_thesaurus_raw <- as.data.frame( + + + ## parse all entries in nci thesaurus where the + ## semantic concept type is treatment-related + nci_antineo_thesaurus_raw2 <- as.data.frame( read.table(file = file.path(path_data_raw, "nci_thesaurus", "Thesaurus.txt"), - header = F, stringsAsFactors = F, sep="\t", - comment.char="", quote = "") |> - dplyr::rename(nci_t = V1, nci_concept_name = V2, - nci_t_parent = V3, concept_synonym = V4, - nci_concept_definition = V5, - nci_cd_name = V6, nci_concept_status = V7, - nci_concept_semantic_type = V8) |> - - dplyr::filter( - stringr::str_detect( - nci_concept_semantic_type,"Chemical|Substance|Therapeutic|Drug|Immunologic")) |> - dplyr::left_join( - nci_antineo_agents, by = c("nci_t"), - multiple = "all", relationship = "many-to-many") |> - dplyr::select(-c(nci_concept_name, nci_cd_name)) |> - dplyr::mutate(nci_concept_synonym_all = concept_synonym) |> - tidyr::separate_rows(concept_synonym,sep="\\|") |> - dplyr::mutate(nci_concept_synonym = tolower(concept_synonym)) |> - dplyr::select(-concept_synonym) |> - dplyr::filter( - nci_t != 'C147908' & nci_t != 'C71622') |> ## Hormone Therapy Agent - dplyr::mutate( - nci_concept_semantic_type = - dplyr::if_else( - nci_t == 'C61948','Pharmacologic Substance', - nci_concept_semantic_type)) |> ## redundant Sorafenib entries + header = F, stringsAsFactors = F, sep="\t", + comment.char="", quote = "") |> + dplyr::rename(nci_t = V1, nci_concept_name = V2, + nci_t_parent = V3, concept_synonym = V4, + nci_concept_definition = V5, + nci_cd_name = V6, nci_concept_status = V7, + nci_concept_semantic_type = V8) |> + + dplyr::filter( + stringr::str_detect( + nci_concept_semantic_type, + "Chemical|Substance|Therapeutic|Drug|Immunologic")) |> + dplyr::left_join( + nci_antineo_agents, by = c("nci_t"), + multiple = "all", relationship = "many-to-many") |> + dplyr::select(-c(nci_concept_name, nci_cd_name)) |> + dplyr::mutate(nci_concept_synonym_all = concept_synonym) |> + tidyr::separate_rows(concept_synonym,sep="\\|") |> + dplyr::mutate(nci_concept_synonym = tolower(concept_synonym)) |> + dplyr::select(-concept_synonym) |> + dplyr::filter( + nci_t != 'C147908' & nci_t != 'C71622') |> ## Hormone Therapy Agent + dplyr::mutate( + nci_concept_semantic_type = + dplyr::if_else( + nci_t == 'C61948','Pharmacologic Substance', + nci_concept_semantic_type)) |> ## redundant Sorafenib entries dplyr::mutate( nci_concept_definition = dplyr::if_else( @@ -897,433 +622,63 @@ get_nci_drugs <- function(nci_db_release = nci_db_release, nci_concept_definition)) |> ## redundant Sorafenib entries dplyr::distinct() |> dplyr::left_join( - nci_display_labels, by = c("nci_t"), - multiple = "all", relationship = "many-to-many") |> - dplyr::filter(!(nci_t == "C1806" & nci_concept_synonym == "gemtuzumab")) |> - dplyr::filter(!(nci_t == "C405" & nci_concept_synonym == "ctx")) |> - - dplyr::left_join( - drug2chembl_all, - by = c("nci_concept_synonym" = "drug_name"), - multiple = "all", relationship = "many-to-many") |> - dplyr::mutate(nci_db_version = nci_db_release) |> - dplyr::filter(!is.na(nci_cd_name)) |> - dplyr::filter(!stringr::str_detect( + nci_display_labels, by = c("nci_t"), + multiple = "all", relationship = "many-to-many") |> + dplyr::filter( + !(nci_t == "C1806" & nci_concept_synonym == "gemtuzumab")) |> + dplyr::filter( + !(nci_t == "C405" & nci_concept_synonym == "ctx")) |> + dplyr::mutate(nci_db_version = nci_db_release) |> + dplyr::filter(!is.na(nci_cd_name)) |> + dplyr::filter(!stringr::str_detect( tolower(nci_concept_definition), "coronavirus")) |> - dplyr::filter(!stringr::str_detect( - nci_concept_synonym_all, - "SARS-CoV-2|COVID-19|CoV-19|Coronary|Corona|Covid-19|covid-19")) |> - #)) - dplyr::mutate(antineoplastic_agent = dplyr::if_else( - is.na(antineoplastic_agent), - as.logical(TRUE), - as.logical(antineoplastic_agent) - )) |> - dplyr::filter(!stringr::str_detect(nci_cd_name," (Gel|Oil|Cream|Seed|Block|Field|Supplement|Factor)$")) |> - dplyr::filter(!stringr::str_detect(nci_cd_name,"(Vaccination|Lotion|Therapeutic Heat|Procedure|Rehabilitation|Prevention|Rinse)$")) |> - dplyr::filter(!stringr::str_detect(nci_cd_name,"(Epitope|Exract|Influenza|Ginseng|Ointment|Management|Injection|Tool)$")) |> - dplyr::filter(!stringr::str_detect(nci_cd_name,"^(Vitamin A Compound|Inactivated Poliovirus|Antineoplastic Immune Cell|Topical)")) |> - dplyr::filter(!stringr::str_detect(nci_cd_name,"^(Sheng-Yu|Ginseng|Dry Cleaning|Boost|Tobacco|Microwave)")) |> - dplyr::filter(!stringr::str_detect(nci_cd_name,"^(Blood|Interruption of|Gum Arabic|Vaginal Cylinder|Laser Ablation|Wheatgrass)")) |> - dplyr::filter(!stringr::str_detect(nci_cd_name,"^(Chemical Challenge|Prevention of|Magic Mouthwash|Wood Dust|Soot|Cocaine)")) |> - dplyr::filter(!stringr::str_detect(nci_cd_name,"^(Antibody|Antigen|Antioxidant|Vaccination|Acetate|Antiserum|Asbestos|Aspirate|Autoantigen|Cytokine)$")) |> - dplyr::filter(!stringr::str_detect(nci_cd_name," Spray| Extract| Antidiabetic| Implant|(Green Tea|Living Healthy|Pollutant|Probe|Protective Agent|Supportive Care|Caffe)")) |> - dplyr::filter( - !stringr::str_detect( - tolower(nci_concept_definition), - "chinese |antidiabet|diabetes|antidepress|analgesic|pulmonary edema|nutritional|human carcinogen|anesthetic|nonsedating|sedative|antihyper|antiinflamma|antiarrythm|antiangin|antihist|muscle|neurotransmitter")) - ) - - #nci_antineo_thesaurus - nci2chembl <- as.data.frame(nci_antineo_thesaurus_raw |> - dplyr::select(molecule_chembl_id, nci_cd_name) |> - dplyr::filter(!is.na(molecule_chembl_id)) |> - dplyr::distinct() + dplyr::filter(!stringr::str_detect( + nci_concept_synonym_all, + "SARS-CoV-2|COVID-19|CoV-19|Coronary|Corona|Covid-19|covid-19")) |> + #)) + dplyr::mutate(antineoplastic_agent = dplyr::if_else( + is.na(antineoplastic_agent), + as.logical(TRUE), + as.logical(antineoplastic_agent) + )) |> + dplyr::filter(!stringr::str_detect(nci_cd_name," (Gel|Oil|Cream|Seed|Block|Field|Supplement|Factor)$")) |> + dplyr::filter(!stringr::str_detect(nci_cd_name,"(Vaccination|Lotion|Therapeutic Heat|Procedure|Rehabilitation|Prevention|Rinse)$")) |> + dplyr::filter(!stringr::str_detect(nci_cd_name,"(Epitope|Exract|Influenza|Ginseng|Ointment|Management|Injection|Tool)$")) |> + dplyr::filter(!stringr::str_detect(nci_cd_name,"^(Vitamin A Compound|Inactivated Poliovirus|Antineoplastic Immune Cell|Topical)")) |> + dplyr::filter(!stringr::str_detect(nci_cd_name,"^(Sheng-Yu|Ginseng|Dry Cleaning|Boost|Tobacco|Microwave)")) |> + dplyr::filter(!stringr::str_detect(nci_cd_name,"^(Blood|Interruption of|Gum Arabic|Vaginal Cylinder|Laser Ablation|Wheatgrass)")) |> + dplyr::filter(!stringr::str_detect(nci_cd_name,"^(Chemical Challenge|Prevention of|Magic Mouthwash|Wood Dust|Soot|Cocaine)")) |> + dplyr::filter(!stringr::str_detect(nci_cd_name,"^(Antibody|Antigen|Antioxidant|Vaccination|Acetate|Antiserum|Asbestos|Aspirate|Autoantigen|Cytokine)$")) |> + dplyr::filter(!stringr::str_detect(nci_cd_name," Spray| Extract| Antidiabetic| Implant|(Green Tea|Living Healthy|Pollutant|Probe|Protective Agent|Supportive Care|Caffe)")) + #dplyr::filter( + # !stringr::str_detect( + # tolower(nci_concept_definition), + # "chinese |antidiabet|diabetes|antidepress|analgesic|pulmonary edema|nutritional|human carcinogen|anesthetic|nonsedating|sedative|antihyper|antiinflamma|antiarrythm|antiangin|antihist|muscle|neurotransmitter")) ) - - j <- 1 - nci2chembl_dict <- list() - while(j <= nrow(nci2chembl)){ - nci2chembl_dict[[nci2chembl[j,"nci_cd_name"]]] <- - nci2chembl[j,"molecule_chembl_id"] - j <- j + 1 - } - - i <- 1 - while(i <= nrow(nci_antineo_thesaurus_raw)){ - nci_cd_name <- - nci_antineo_thesaurus_raw[i,"nci_cd_name"] - if(nci_cd_name %in% names(nci2chembl_dict)){ - nci_antineo_thesaurus_raw[i,"molecule_chembl_id"] <- - nci2chembl_dict[[nci_cd_name]] - } - i <- i + 1 - } - - nci_antineo_thesaurus <- nci_antineo_thesaurus_raw |> - dplyr::filter(!(molecule_chembl_id == "CHEMBL1569487" & nci_t == "C405")) |> - dplyr::mutate(molecule_chembl_id = dplyr::if_else( - nci_cd_name == "Sorafenib Tosylate", "CHEMBL1200485", as.character(molecule_chembl_id))) |> - dplyr::mutate(molecule_chembl_id = dplyr::if_else( - nci_cd_name == "Leucovorin", "CHEMBL1679", as.character(molecule_chembl_id))) |> - dplyr::mutate(molecule_chembl_id = dplyr::if_else( - nci_cd_name == "Masoprocol", "CHEMBL313972", as.character(molecule_chembl_id))) |> - dplyr::mutate(molecule_chembl_id = dplyr::if_else( - nci_cd_name == "7-Hydroxystaurosporine", "CHEMBL1236539", as.character(molecule_chembl_id))) |> - dplyr::mutate(molecule_chembl_id = dplyr::if_else( - nci_cd_name == "Dromostanolone", "CHEMBL1201048", as.character(molecule_chembl_id))) |> - dplyr::mutate(molecule_chembl_id = dplyr::if_else( - nci_cd_name == "Patritumab", "CHEMBL2109406", as.character(molecule_chembl_id))) |> - dplyr::mutate(molecule_chembl_id = dplyr::if_else( - nci_cd_name == "Rivoceranib", "CHEMBL3186534", as.character(molecule_chembl_id))) |> - dplyr::mutate(molecule_chembl_id = dplyr::if_else( - nci_cd_name == "Rivoceranib Mesylate", "CHEMBL3545414", as.character(molecule_chembl_id))) |> - dplyr::mutate(molecule_chembl_id = dplyr::if_else( - nci_cd_name == "Tricribine Phosphate", "CHEMBL462018", as.character(molecule_chembl_id))) |> - dplyr::mutate(molecule_chembl_id = dplyr::if_else( - nci_cd_name == "Umbrasilib" | - nci_cd_name == "Umbrasilib Tosylate", "CHEMBL3948730", - as.character(molecule_chembl_id))) |> - dplyr::filter( - !(stringr::str_detect( - nci_cd_name, - "^Antineoplast(on|ic) Agent|^Support| Gel | Ointment|Caffeine|^Acetate$"))) - #CHEMBL3545055 - + + + nci_antineo_thesaurus <- nci_antineo_thesaurus_raw + ## add apatinib as an alias (is missing in NCI thesaurus) apatinib_alias_entry <- nci_antineo_thesaurus |> dplyr::filter(nci_concept_synonym == "rivoceranib") |> dplyr::mutate(nci_concept_synonym = "apatinib") |> dplyr::mutate(nci_concept_synonym_all = paste(nci_concept_synonym_all,"Apatinib",sep="|")) - + nci_antineo_thesaurus <- nci_antineo_thesaurus |> dplyr::bind_rows(apatinib_alias_entry) |> - dplyr::arrange(nci_cd_name) - - nci_with_chembl <- nci_antineo_thesaurus |> - dplyr::filter(!is.na(molecule_chembl_id)) |> - dplyr::select(nci_t, molecule_chembl_id) |> - dplyr::distinct() - - nci_2 <- nci_antineo_thesaurus |> - dplyr::filter(is.na(molecule_chembl_id)) |> - dplyr::select(-molecule_chembl_id) |> - dplyr::left_join( - nci_with_chembl, by = c("nci_t"), - multiple = "all", - relationship = "many-to-many") - - nci_antineo_thesaurus <- nci_antineo_thesaurus |> - dplyr::filter(!is.na(molecule_chembl_id)) |> - dplyr::bind_rows(nci_2) |> + dplyr::arrange(nci_cd_name) |> dplyr::rename(nci_drug_name = nci_concept_synonym) |> dplyr::filter(!(nci_cd_name == "Sorafenib Tosylate" & nci_drug_name == "sorafenib")) |> dplyr::select(-c(nci_t_parent,cui)) |> - dplyr::distinct() |> - ## Removing duplicate/erroneous NCI/CHEMBL cross-ref identifiers - dplyr::filter( - !((nci_t == "C137804" | nci_t == "C137803") & - molecule_chembl_id == "CHEMBL1201567")) |> #FILGRASTIM - dplyr::filter( - !(nci_t == "C88325" & molecule_chembl_id == "CHEMBL2109653")) |> #BERMEKIMAB - dplyr::filter( - !(nci_t == "C2602" & molecule_chembl_id == "CHEMBL513")) |> #CARMUSTINE - dplyr::filter( - !(nci_t == "C80867" & molecule_chembl_id == "CHEMBL491473")) |> #CEDIRANIB - dplyr::filter( - !(nci_t == "C225" & molecule_chembl_id == "CHEMBL1201577")) |> #CETUXIMAB - dplyr::filter( - !(nci_t == "C2213" & molecule_chembl_id == "CHEMBL178")) |> #DAUNORUBICIN - dplyr::filter( - !(nci_t == "C128039" & molecule_chembl_id == "CHEMBL3137331")) |> #DEFACTINIB - dplyr::filter( - !(nci_t == "C62435" & molecule_chembl_id == "CHEMBL522892")) |> #DOVITINIB - dplyr::filter( - !(nci_t == "C2693" & molecule_chembl_id == "CHEMBL553")) |> #ERLOTINIB - dplyr::filter( - !(nci_t == "C478" & molecule_chembl_id == "CHEMBL135")) |> #ESTRADIOL - dplyr::filter( - !(nci_t == "C1687" & molecule_chembl_id == "CHEMBL941")) |> #IMATINIB - dplyr::filter( - !(nci_t == "C29165" & molecule_chembl_id == "CHEMBL191")) |> #LOSARTAN - dplyr::filter( - !(nci_t == "C1561" & molecule_chembl_id == "CHEMBL2109447")) |> #LYM-1 - dplyr::filter( - !(nci_t == "C1155" & molecule_chembl_id == "CHEMBL717")) |> #MEDROXYPROGESTERONE ACETATE - dplyr::filter( - !(nci_t == "C9678" & molecule_chembl_id == "CHEMBL1456")) |> #MYCOPHENOLATE MOFETIL - dplyr::filter( - !(nci_t == "C20513" & molecule_chembl_id == "CHEMBL1201573")) |> #OPRELVEKIN - dplyr::filter( - !(nci_t == "C95230" & molecule_chembl_id == "CHEMBL1201421")) |> #PEGAPTANIB SODIUM - dplyr::filter( - !(nci_t == "C176878" & molecule_chembl_id == "CHEMBL3545154")) |> #POZIOTINIB - dplyr::filter( - !(nci_t == "C2297" & molecule_chembl_id == "CHEMBL103")) |> #PROGESTERONE - dplyr::filter( - !(nci_t == "C148170" & molecule_chembl_id == "CHEMBL225071")) |> #RALTITREXED - dplyr::filter( - !(nci_t == "C66506" & molecule_chembl_id == "CHEMBL1790041")) |> #RANITIDINE - dplyr::filter( - !(nci_t == "C82693" & molecule_chembl_id == "CHEMBL1738757")) |> #REBASTINIB - dplyr::filter( - !(nci_t == "C1492" & molecule_chembl_id == "CHEMBL1201670")) |> #SARGRAMOSTIM - dplyr::filter( - !(nci_t == "C88337" & molecule_chembl_id == "CHEMBL2105737")) |> #SONIDEGIB - dplyr::filter( - !(nci_t == "C80631" & molecule_chembl_id == "CHEMBL565612")) |> #SOTRASTAURIN - dplyr::filter( - !(nci_t == "C104057" & molecule_chembl_id == "CHEMBL2105694")) |> #TELAPRISTONE ACETATE - dplyr::filter( - !(nci_t == "C29523" & molecule_chembl_id == "CHEMBL1201334")) |> #TRIPTORELIN - dplyr::filter( - !(nci_t == "C80049" & molecule_chembl_id == "CHEMBL3545218")) |> #VORUCICLIB - dplyr::filter( - !(nci_t == "C133021" & molecule_chembl_id == "CHEMBL3188386")) #WNT-974 - - - - i <- 1 - nci_compounds_no_chembl <- as.data.frame( - nci_antineo_thesaurus |> - dplyr::select(nci_cd_name, molecule_chembl_id) |> - dplyr::distinct() |> - dplyr::filter(!stringr::str_detect(tolower(nci_cd_name), - "( vaccine)|^[0-9]")) |> - dplyr::filter(is.na(molecule_chembl_id)) |> - dplyr::select(nci_cd_name) |> - dplyr::mutate(num_spaces = stringr::str_count(nci_cd_name," ")) |> - dplyr::filter( - num_spaces <= 1 & !stringr::str_detect( - tolower(nci_cd_name),"regimen|&|/|;|,")) |> - dplyr::filter( - stringr::str_detect( - nci_cd_name, - "(mab|cin|ide|ib|im|bine|tin|om|lin|stat|one|ate|ole|ane|ine|xel|rol)$")) |> - dplyr::distinct() - ) - - nci_compounds_chembl_match <- data.frame() - - ## Retrieve aliases for drugs with PubChem x-refs - pubchem_synonym_files <- - sort(list.files(path = file.path(here::here(), "data-raw","pubchem"), - pattern = "CID-Synonym-filtered_", - full.names = T)) - - lgr::lgr$info("Mapping ChEMBL identifiers for NCI compounds") - i <- 1 - for(f in pubchem_synonym_files){ - lgr::lgr$info(paste0("Mapping iteration..", i)) - synonym_data <- as.data.frame(readr::read_tsv( - f, col_names = c('pubchem_cid','alias'), - col_types = "dc", - progress = F - )) - - chembl2pubchem <- synonym_data |> - dplyr::filter(stringr::str_detect( - alias, "^CHEMBL")) |> - dplyr::rename(molecule_chembl_id = alias) |> - dplyr::mutate(pubchem_cid = as.integer(pubchem_cid)) - - chembl2alias <- synonym_data |> - dplyr::filter(!stringr::str_detect( - alias, "^CHEMBL")) |> - dplyr::mutate(pubchem_cid = as.integer(pubchem_cid)) |> - dplyr::mutate(alias = tolower(alias)) - - hits <- nci_compounds_no_chembl |> - dplyr::mutate(nci_cd_name_lc = - tolower(nci_cd_name)) |> - dplyr::inner_join( - chembl2alias, - by = c("nci_cd_name_lc" = "alias"), - multiple = "all", relationship = "many-to-many") - - - rm(chembl2alias) - - if(nrow(hits) > 0){ - hits <- hits |> - dplyr::inner_join( - chembl2pubchem, - by = "pubchem_cid", - multiple = "all", - relationship = "many-to-many") |> - dplyr::select(nci_cd_name, - molecule_chembl_id) - - lgr::lgr$info(paste0("Found ", nrow(hits), " ChEMBL identifiers")) - - nci_compounds_chembl_match <- nci_compounds_chembl_match |> - dplyr::bind_rows(hits) - } - rm(chembl2pubchem) - - i <- i + 1 - } - - nci_compounds_chembl_match_unique <- nci_compounds_chembl_match |> - dplyr::group_by(nci_cd_name) |> - dplyr::summarise( - n_identifiers = dplyr::n(), - molecule_chembl_id = paste(unique(molecule_chembl_id), collapse="&"), - .groups = "drop") |> - dplyr::mutate(molecule_chembl_id = dplyr::if_else( - nci_cd_name == "Goserelin Acetate", - "CHEMBL1200501", - as.character(molecule_chembl_id))) |> - dplyr::mutate(molecule_chembl_id = dplyr::if_else( - nci_cd_name == "Abacavir Sulfate", - "CHEMBL1200666", - as.character(molecule_chembl_id))) |> - dplyr::mutate(molecule_chembl_id = dplyr::if_else( - nci_cd_name == "Roniciclib", - "CHEMBL4442620", - as.character(molecule_chembl_id))) |> - dplyr::filter(!stringr::str_detect(molecule_chembl_id,"&")) |> - dplyr::select(-n_identifiers) - - - # i <- 1 - # while(i <= nrow(nci_compounds_no_chembl)){ - # name <- nci_compounds_no_chembl[i, "nci_cd_name"] - # chembl_hit <- get_chembl_compound_by_name(name) - # if(!is.null(chembl_hit)){ - # chembl_hit <- chembl_hit |> - # dplyr::rename(nci_cd_name = name) - # nci_compounds_chembl_match <- nci_compounds_chembl_match |> - # dplyr::bind_rows(chembl_hit) - # - # } - # if(i %% 10 == 0){ - # lgr::lgr$info("Done with querying ChEMBL for ", i, " compound names") - # } - # i <- i + 1 - # } - - nci_antineo_thesaurus_chembl <- nci_antineo_thesaurus |> - dplyr::anti_join(nci_compounds_no_chembl, - by = "nci_cd_name") - - nci_antineo_thesaurus_no_chembl <- nci_antineo_thesaurus |> - dplyr::inner_join(nci_compounds_no_chembl, - by = "nci_cd_name", - multiple = "all", relationship = "many-to-many") |> - dplyr::select(-c(num_spaces, molecule_chembl_id)) |> - dplyr::left_join( - nci_compounds_chembl_match_unique, - by = "nci_cd_name", - multiple = "all", - relationship = "many-to-many") - - nci_antineo_thesaurus <- - nci_antineo_thesaurus_chembl |> - dplyr::bind_rows(nci_antineo_thesaurus_no_chembl) |> - dplyr::arrange(nci_cd_name) |> - dplyr::distinct() |> - dplyr::mutate(molecule_chembl_id = dplyr::if_else( - nci_cd_name == "Vorolanib", - "CHEMBL3545427", - as.character(molecule_chembl_id))) |> - dplyr::mutate(molecule_chembl_id = dplyr::if_else( - nci_cd_name == "Gemtuzumab Ozogamicin", - "CHEMBL1201506", - as.character(molecule_chembl_id))) |> - dplyr::mutate(molecule_chembl_id = dplyr::if_else( - nci_cd_name == "Ublituximab", - "CHEMBL2108354", - as.character(molecule_chembl_id))) |> - dplyr::mutate(molecule_chembl_id = dplyr::if_else( - nci_cd_name == "Etoposide", - "CHEMBL44657", - as.character(molecule_chembl_id))) |> - dplyr::mutate(molecule_chembl_id = dplyr::if_else( - nci_cd_name == "Lorvotuzumab Mertansine", - "CHEMBL1743037", - as.character(molecule_chembl_id))) |> - dplyr::mutate(molecule_chembl_id = dplyr::if_else( - nci_cd_name == "Epratuzumab", - "CHEMBL2108404", - as.character(molecule_chembl_id))) |> - dplyr::mutate(molecule_chembl_id = dplyr::if_else( - nci_cd_name == "Ocaratuzumab", - "CHEMBL2109665", - as.character(molecule_chembl_id))) |> - dplyr::mutate(molecule_chembl_id = dplyr::if_else( - nci_cd_name == "Medroxyprogesterone Acetate", - "CHEMBL717", - as.character(molecule_chembl_id))) |> - dplyr::mutate(molecule_chembl_id = dplyr::if_else( - nci_cd_name == "Brigatinib", - "CHEMBL3545311", - as.character(molecule_chembl_id))) |> - dplyr::mutate(molecule_chembl_id = dplyr::if_else( - nci_cd_name == "Brentuximab", - "CHEMBL1742994", - as.character(molecule_chembl_id))) |> - dplyr::mutate(molecule_chembl_id = dplyr::if_else( - nci_cd_name == "Afatinib", - "CHEMBL1173655", - as.character(molecule_chembl_id))) |> - dplyr::mutate(molecule_chembl_id = dplyr::if_else( - nci_cd_name == "Spebrutinib", - "CHEMBL3301625", - as.character(molecule_chembl_id))) |> - dplyr::mutate(molecule_chembl_id = dplyr::if_else( - nci_cd_name == "Dacomitinib", - "CHEMBL2110732", - as.character(molecule_chembl_id))) |> - dplyr::mutate(molecule_chembl_id = dplyr::if_else( - nci_cd_name == "Lucitanib", - "CHEMBL2220486", - as.character(molecule_chembl_id))) |> - dplyr::mutate(molecule_chembl_id = dplyr::if_else( - nci_cd_name == "Tofacitinib", - "CHEMBL221959", - as.character(molecule_chembl_id))) |> - dplyr::mutate(molecule_chembl_id = dplyr::if_else( - nci_cd_name == "Mifamurtide", - "CHEMBL2111100", - as.character(molecule_chembl_id))) |> - dplyr::mutate(molecule_chembl_id = dplyr::if_else( - nci_cd_name == "Umbralisib", - "CHEMBL3948730", - as.character(molecule_chembl_id))) |> - dplyr::mutate(molecule_chembl_id = dplyr::if_else( - nci_cd_name == "Onvansertib", - "CHEMBL1738758", - as.character(molecule_chembl_id))) |> - dplyr::mutate(molecule_chembl_id = dplyr::if_else( - nci_cd_name == "Sulindac", - "CHEMBL15770", - as.character(molecule_chembl_id))) |> - dplyr::mutate(molecule_chembl_id = dplyr::if_else( - nci_cd_name == "Calcipotriene", - "CHEMBL1200666", - as.character(molecule_chembl_id))) |> dplyr::distinct() - - + nci_drugs <- list() - ## NCI anticancer drugs (targeted) - including compound identifier (CHEMBL) - nci_drugs[['with_chembl_id']] <- nci_antineo_thesaurus |> - dplyr::select(nci_t, - nci_concept_definition, - nci_cd_name, - molecule_chembl_id, - nci_drug_name, - nci_concept_synonym_all) |> - dplyr::filter(!is.na(molecule_chembl_id)) |> - #dplyr::mutate(nci_drug_name_lc = tolower(nci_drug_name)) |> - dplyr::distinct() - + ## NCI anticancer drugs (non-targeted) - lacking compound identifier (CHEMBL) nci_drugs[['no_chembl_id']] <- nci_antineo_thesaurus |> - dplyr::filter(is.na(molecule_chembl_id)) |> dplyr::select(nci_t, nci_concept_definition, nci_cd_name, @@ -1348,16 +703,16 @@ get_nci_drugs <- function(nci_db_release = nci_db_release, )) |> dplyr::select(-num_words) |> dplyr::distinct() - + saveRDS(nci_drugs, file = file.path( path_data_processed, "nci_thesaurus", - "nci_treatment_thesaurus_antineo.rds")) - + "nci_treatment_thesaurus_antineo_v2.rds")) + }else{ nci_drugs <- readRDS( file = file.path( path_data_processed, "nci_thesaurus", - "nci_treatment_thesaurus_antineo.rds")) + "nci_treatment_thesaurus_antineo_v2.rds")) } return(nci_drugs) } @@ -1367,177 +722,82 @@ merge_nci_opentargets <- function( drug_sets = NULL, path_data_raw = NULL){ - ot_nci_matched <- list() - - ## X-ref Open Targets and NCI by molecule id - ot_nci_matched[['targeted_by_id']] <- drug_sets[['otp']] |> - dplyr::left_join( - drug_sets[['nci']][['with_chembl_id']], - by = c("molecule_chembl_id"), - multiple = "all", relationship = "many-to-many") |> - dplyr::filter(!is.na(nci_drug_name)) |> - dplyr::select(-c(nci_drug_name)) - - ## X-ref Open Targets and NCI (with molecule ID) by drug name - ot_nci_matched[['targeted_by_name']] <- drug_sets[['otp']] |> + + ## X-ref Open Targets and NCI by drug name + otp_drugs_all <- drug_sets[['otp']] |> dplyr::mutate(drug_name_lc = tolower(drug_name)) |> dplyr::left_join( - dplyr::select( - drug_sets[['nci']][['with_chembl_id']], - -molecule_chembl_id), + drug_sets[['nci']][['no_chembl_id']], by = c("drug_name_lc" = "nci_drug_name"), multiple = "all", relationship = "many-to-many") |> - dplyr::anti_join( - ot_nci_matched[['targeted_by_id']], by = "nci_cd_name") |> - dplyr::filter(!is.na(drug_name_lc)) |> - dplyr::select(-c(drug_name_lc)) - - ot_nci_matched_all <- do.call(rbind, ot_nci_matched) |> + dplyr::select(-c(drug_name_lc)) |> + dplyr::distinct() |> + dplyr::mutate(opentargets = TRUE) + + ## list all drug aliases in targeted drugs from OTP + all_drug_aliases <- otp_drugs_all |> + tidyr::separate_rows(drug_synonyms, sep = "\\|") |> + dplyr::select(drug_synonyms) |> + dplyr::mutate(nci_drug_name = tolower(drug_synonyms)) |> dplyr::distinct() - - ot_targeted_remain <- drug_sets[['otp']] |> - dplyr::anti_join( - ot_nci_matched_all, - by = c("target_symbol","molecule_chembl_id", - "disease_efo_label","drug_name", - "drug_clinical_source", - "drug_clinical_id")) |> - dplyr::mutate(drug_name_lc = tolower(drug_name)) |> - dplyr::left_join( - drug_sets[['nci']][['no_chembl_id']], - by = c("drug_name_lc" = "nci_drug_name"), - multiple = "all", relationship = "many-to-many") |> - dplyr::select(-drug_name_lc) - - ot_drugs_all <- ot_nci_matched_all |> - dplyr::bind_rows(ot_targeted_remain) |> + + ## Identify drugs in NCI that are not in Open Targets + found_drugs <- drug_sets[['nci']][['no_chembl_id']] |> + dplyr::semi_join(all_drug_aliases, + by = "nci_drug_name") |> + dplyr::select(nci_t) |> + dplyr::distinct() + + nci_missing <- drug_sets[['nci']][['no_chembl_id']] |> + dplyr::anti_join(found_drugs, + by = "nci_t") |> + dplyr::select(-nci_drug_name) |> + dplyr::mutate(opentargets = F) |> + dplyr::distinct() + + ## ignore some drugs + custom_name_ignore <- readr::read_tsv( + file = "data-raw/drug_names_ignore.tsv", + col_names = F, show_col_types = F) + colnames(custom_name_ignore) <- c("nci_cd_name") + + all_cancer_drugs <- otp_drugs_all |> + dplyr::bind_rows(nci_missing) |> + dplyr::distinct() |> dplyr::mutate( nci_cd_name = - dplyr::if_else(is.na(nci_cd_name) & - !stringr::str_detect(drug_name,"[0-9]"), - Hmisc::capitalize(tolower(drug_name)), - nci_cd_name)) |> + dplyr::if_else( + is.na(nci_cd_name) & + !stringr::str_detect(drug_name,"[0-9]"), + Hmisc::capitalize(tolower(drug_name)), + nci_cd_name)) |> dplyr::mutate( nci_cd_name = dplyr::if_else( is.na(nci_cd_name) & stringr::str_detect(drug_name,"[0-9]"), drug_name, nci_cd_name)) |> - dplyr::mutate(opentargets = TRUE) - - nci_missing_1 <- drug_sets[['nci']][['no_chembl_id']] |> - dplyr::anti_join(ot_drugs_all, - by = "nci_cd_name") |> - dplyr::select(-nci_drug_name) - - nci_missing_2 <- drug_sets[['nci']][['with_chembl_id']] |> - dplyr::anti_join(ot_drugs_all, - by = "molecule_chembl_id") |> - dplyr::select(-nci_drug_name) - - nci_missing <- dplyr::bind_rows( - nci_missing_1, - nci_missing_2) |> - dplyr::distinct() |> - dplyr::mutate(opentargets = FALSE) - - rm(nci_missing_1) - rm(nci_missing_2) - - - ## do not include NCI records with a similar drug name - nci_missing$drug_name_lc <- tolower(nci_missing$nci_cd_name) - - ot_drugs_all$drug_name_lc1 <- tolower(ot_drugs_all$drug_name) - ot_drugs_all$drug_name_lc2 <- tolower(ot_drugs_all$nci_cd_name) - - - nci_missing_final <- nci_missing |> - dplyr::select(drug_name_lc) |> - dplyr::anti_join( - ot_drugs_all, - by = c("drug_name_lc" = "drug_name_lc1")) |> + dplyr::mutate(drug_name = dplyr::if_else( + is.na(drug_name), + nci_cd_name, + drug_name)) |> dplyr::anti_join( - ot_drugs_all, - by = c("drug_name_lc" = "drug_name_lc2")) |> - dplyr::inner_join( - nci_missing, by = "drug_name_lc", - multiple = "all", - relationship = "many-to-many") |> - dplyr::select(-drug_name_lc) + custom_name_ignore, + by = "nci_cd_name") - all_cancer_drugs <- ot_drugs_all |> - dplyr::select(-c(drug_name_lc1, drug_name_lc2)) |> - dplyr::bind_rows(nci_missing_final) |> - dplyr::distinct() - - - ## Figure out cases where a single drug maps to multiple - ## molecule chembl identifiers rownames(all_cancer_drugs) <- NULL - - custom_chembl_map <- readr::read_tsv( - file = "data-raw/custom_chembl_map.tsv", - col_names = F, show_col_types = F) - - colnames(custom_chembl_map) <- - c("nci_cd_name", "molecule_chembl_id") - - custom_name_ignore <- readr::read_tsv( - file = "data-raw/drug_names_ignore.tsv", - col_names = F, show_col_types = F) - colnames(custom_name_ignore) <- c("nci_cd_name") - - name2chembl_id <- all_cancer_drugs |> - dplyr::filter(!is.na(molecule_chembl_id)) |> - dplyr::group_by(nci_cd_name) |> - dplyr::summarise(m = paste( - sort(unique(molecule_chembl_id)), collapse=";")) - - ## Drugs that map to a single identifier - name2chembl_unique <- name2chembl_id |> - dplyr::filter(!stringr::str_detect(m,";")) |> - dplyr::inner_join( - all_cancer_drugs, - by = "nci_cd_name", - multiple = "all", - relationship = "many-to-many") |> - dplyr::select(-m) - - ## Drugs that map to multiple identifiers - name2chembl_ambiguous_curated <- name2chembl_id |> - dplyr::filter(stringr::str_detect(m, ";")) |> - dplyr::inner_join( - custom_chembl_map, - by = "nci_cd_name", - multiple = "all", - relationship = "many-to-many") |> - dplyr::inner_join( - all_cancer_drugs, - by = c("nci_cd_name","molecule_chembl_id"), - multiple = "all", - relationship = "many-to-many") |> - dplyr::select(-m) - - all_drugs <- dplyr::bind_rows( - dplyr::filter( - all_cancer_drugs, is.na(molecule_chembl_id)), - name2chembl_unique, - name2chembl_ambiguous_curated - ) - - salt_patterns <- - readr::read_tsv( + salt_patterns <- + readr::read_tsv( file.path(path_data_raw, "salts.tsv"), show_col_types = F, col_names = F) - + salt_patterns_regex <- paste0( "( (", paste(salt_patterns$X1, collapse="|"), "))$") - salt_forms <- all_drugs |> + salt_forms <- all_cancer_drugs |> dplyr::filter( stringr::str_detect( tolower(nci_cd_name), @@ -1551,14 +811,14 @@ merge_nci_opentargets <- function( dplyr::distinct() |> dplyr::mutate(is_salt = T) |> dplyr::inner_join( - dplyr::select(all_drugs, nci_cd_name), + dplyr::select(all_cancer_drugs, nci_cd_name), by = c("tradename" = "nci_cd_name"), multiple = "all", relationship = "many-to-many") |> dplyr::distinct() |> dplyr::select(-tradename) - - - all_drugs_final <- all_drugs |> + + + all_cancer_drugs <- all_cancer_drugs |> dplyr::left_join( salt_forms, by = "nci_cd_name", @@ -1569,18 +829,16 @@ merge_nci_opentargets <- function( as.logical(FALSE), as.logical(is_salt) )) |> - dplyr::anti_join( - custom_name_ignore, - by = "nci_cd_name") - - + dplyr::distinct() + + ## antibody drug conjugates - adc_candidates <- all_drugs_final |> + adc_candidates <- all_cancer_drugs |> dplyr::filter( (!is.na(nci_cd_name) & - stringr::str_detect(tolower(nci_cd_name), "mab ")) | - stringr::str_detect( - nci_concept_definition, "ADC|antibody(-| )drug conjugate")) |> + stringr::str_detect(tolower(nci_cd_name), "mab ")) | + stringr::str_detect( + nci_concept_definition, "ADC|antibody(-| )drug conjugate")) |> dplyr::mutate(is_adc = TRUE) |> dplyr::mutate(is_adc = dplyr::if_else( stringr::str_detect(nci_concept_definition, "ADCC") & @@ -1595,7 +853,7 @@ merge_nci_opentargets <- function( )) |> dplyr::mutate(is_adc = dplyr::if_else( is_adc == F & - !is.na(nci_cd_name) & + !is.na(nci_cd_name) & stringr::str_detect( tolower(nci_cd_name), "mab ") & stringr::str_detect( @@ -1606,8 +864,8 @@ merge_nci_opentargets <- function( )) |> dplyr::select(nci_cd_name, is_adc) |> dplyr::distinct() - - all_drugs_final <- all_drugs_final |> + + all_cancer_drugs <- all_cancer_drugs |> dplyr::left_join( adc_candidates, by = "nci_cd_name", @@ -1644,10 +902,12 @@ merge_nci_opentargets <- function( as.character(drug_cancer_relevance) )) |> dplyr::distinct() + + return(all_cancer_drugs) + +} - return(all_drugs_final) -} map_curated_targets <- function(gene_info = NULL, path_data_raw = NULL, @@ -1777,16 +1037,6 @@ map_curated_targets <- function(gene_info = NULL, nci_concept_definition, "(A|a)ntibody(-| )drug conjugate \\(ADC\\)" )) |> - - ## filter for the presence of gene symbols in name or concept definition - # dplyr::filter( - # stringr::str_detect( - # nci_cd_name, trialOncoX::tox_int_data$regex_patterns$variant[51,]$regex) | - # stringr::str_detect( - # nci_concept_definition, - # trialOncoX::tox_int_data$regex_patterns$variant[51,]$regex - # ) - # ) |> dplyr::filter(!stringr::str_detect( tolower(nci_cd_name),"^(allogeneic|regimen |copper |fluorine f |indium |iodine |carbon c|autologous |recombinant |lutetium |yttrium |y 90)|vaccine$" )) |> @@ -1837,449 +1087,10 @@ map_curated_targets <- function(gene_info = NULL, as.character(drug_action_type) )) - return(ot_nci_drugs_curated) + return(list('curated' = ot_nci_drugs_curated, + 'nonmapped' = inhibitors_no_target_nonmapped)) } -# assign_drug_category <- function(drug_df = NULL, -# path_data_raw = NULL){ -# -# -# atc_classification <- -# get_atc_drug_classification(path_data_raw = path_data_raw) -# -# drug_df <- drug_df |> -# dplyr::distinct() |> -# dplyr::mutate(antimetabolite = dplyr::if_else( -# !is.na(nci_concept_definition) & -# stringr::str_detect( -# tolower(nci_concept_definition), -# "antimetabol|anti-metabol|nucleoside analog"),TRUE,FALSE) -# ) |> -# dplyr::mutate(iap_inhibitor = dplyr::if_else( -# !is.na(target_symbol) & -# stringr::str_detect( -# target_symbol, -# "^(BIRC|XIAP)"),TRUE,FALSE) -# ) |> -# dplyr::mutate(topoisomerase_inhibitor = dplyr::if_else( -# (!is.na(nci_concept_definition) & -# stringr::str_detect( -# nci_concept_definition, -# "(T|t)opoisomerase II-mediated|(T|t)opoisomerase( I|II )? \\(.*\\) inhibitor|inhibit(ion|or) of (T|t)opoisomerase|(stabilizes|interrupts|binds to|interacts with|inhibits( the activity of)?)( the)?( DNA)? (t|T)opoisomerase|(T|t)opoisomerase( (I|II))? inhibitor")) | -# (!is.na(target_genename) & -# stringr::str_detect(target_genename,"topoisomerase")),TRUE,FALSE) -# ) |> -# dplyr::mutate(hedgehog_antagonist = dplyr::if_else( -# (!is.na(nci_concept_definition) & -# stringr::str_detect( -# nci_concept_definition, -# "Hedgehog") & stringr::str_detect( -# nci_cd_name,"Smoothened Antagonist|(ate|ib)$")) | -# (!is.na(nci_cd_name) & -# stringr::str_detect( -# nci_cd_name,"Hedgehog Inhibitor|SMO Protein Inhibitor")), -# TRUE,FALSE) -# ) |> -# dplyr::mutate(hdac_inhibitor = dplyr::if_else( -# (!is.na(target_symbol) & -# stringr::str_detect( -# target_symbol, -# "^HDAC")) | -# (!is.na(nci_concept_definition) & -# stringr::str_detect(nci_concept_definition,"inhibitor of histone deacetylase")) | -# (!is.na(nci_cd_name) & -# stringr::str_detect(nci_cd_name,"HDAC Inhibitor")), -# TRUE,FALSE) -# ) |> -# dplyr::mutate(alkylating_agent = dplyr::if_else( -# (is.na(drug_moa) | -# (!is.na(drug_moa) & stringr::str_detect(drug_moa,"DNA"))) & -# !stringr::str_detect(nci_cd_name, -# "antiangiogenic") & -# !is.na(nci_concept_definition) & -# (stringr::str_detect( -# tolower(nci_concept_definition), -# "(alkylates dna|alkylation of dna|alkylating (agent|metabolite)|alkylating-like|alkylates and cross-links dna|alkylating( and antimetabolite)? activit(y|ies))") | -# (!is.na(nci_cd_name) & -# stringr::str_detect(tolower(nci_cd_name),"(mustine|platin)$") & -# !stringr::str_detect(tolower(nci_cd_name),"/"))), -# TRUE,FALSE) -# ) |> -# dplyr::mutate(parp_inhibitor = dplyr::if_else( -# !is.na(target_symbol) & -# stringr::str_detect( -# target_symbol, -# "^PARP[0-9]{1}"),TRUE,FALSE) -# ) |> -# dplyr::mutate(bet_inhibitor = dplyr::if_else( -# !is.na(target_symbol) & -# stringr::str_detect( -# target_symbol, -# "^BRD(T|[1-9]{1})") | -# (!is.na(nci_cd_name) & -# stringr::str_detect( -# nci_cd_name,"BET( Bromodomain)? Inhibitor")),TRUE,FALSE) -# ) |> -# dplyr::mutate(tubulin_inhibitor = dplyr::if_else( -# (!is.na(drug_action_type) & -# drug_action_type != "STABILISER" & -# !is.na(target_genename) & -# stringr::str_detect( -# tolower(target_genename), -# "tubulin")) | -# (!is.na(nci_concept_definition) & stringr::str_detect( -# tolower(nci_concept_definition), -# "binds to tubulin|disrupts microtubule|microtubule disrupt")), -# TRUE,FALSE) -# ) |> -# dplyr::mutate(ar_antagonist = dplyr::if_else( -# (!is.na(target_genename) & -# stringr::str_detect( -# tolower(target_genename), -# "androgen receptor")), -# TRUE,FALSE) -# ) |> -# dplyr::mutate(kinase_inhibitor = dplyr::if_else( -# (!is.na(target_symbol) & stringr::str_detect(target_symbol,"EGFR|PTPN11|ABL1|FGFR|PDGFR|CSF1R")) | -# (((!is.na(drug_action_type) & -# stringr::str_detect(tolower(drug_action_type),"blocker|inhibitor|antagonist")) | -# stringr::str_detect(tolower(nci_cd_name),"ib$")) & -# (!is.na(target_genename) & -# stringr::str_detect(tolower(target_genename),"kinase|eph receptor"))) | -# (!is.na(nci_concept_definition) & -# stringr::str_detect(nci_concept_definition,"kinase inhibit(or|ion)")), -# TRUE,FALSE) -# ) |> -# dplyr::mutate(angiogenesis_inhibitor = dplyr::if_else( -# stringr::str_detect(tolower(drug_action_type),"blocker|inhibitor|antagonist") & -# (!is.na(nci_cd_name) & -# stringr::str_detect(tolower(nci_cd_name), -# "antiangiogenic|angiogenesis inhibitor")) | -# (!is.na(nci_concept_definition) & -# stringr::str_detect( -# tolower(nci_concept_definition), -# "antiangiogenic activities|angiogenesis inhibitor|(inhibiting|blocking)( tumor)? angiogenesis|anti(-)?angiogenic|(inhibits|((inhibition|reduction) of))( .*) angiogenesis")), -# TRUE,FALSE) -# ) |> -# dplyr::mutate(monoclonal_antibody = dplyr::if_else( -# (!is.na(drug_type) & drug_type == "Antibody") | -# (stringr::str_detect(tolower(nci_cd_name), -# "^anti-|mab |mab$|monoclonal antibody") & -# (!is.na(nci_concept_definition) & -# stringr::str_detect(nci_concept_definition,"monoclonal antibody"))), -# TRUE,FALSE) -# ) |> -# dplyr::mutate(proteasome_inhibitor = dplyr::if_else( -# (stringr::str_detect(tolower(nci_cd_name), -# "^proteasome") & -# !stringr::str_detect(tolower(nci_cd_name),"vaccine")) | -# (!is.na(nci_concept_definition) & -# stringr::str_detect( -# tolower(nci_concept_definition),"proteasome inhibitor|inhibits the proteasome|inhibition of proteasome")), -# TRUE,FALSE) -# ) |> -# dplyr::mutate(hormone_therapy = dplyr::if_else( -# stringr::str_detect(tolower(nci_cd_name), -# "aromatase inhib|estrogen receptor (inhibitor|degrader|modulator)") | -# (!is.na(nci_concept_definition) & -# stringr::str_detect( -# tolower(nci_concept_definition),"inhibitor of estrogen|estrogen receptor (modulator|inhibitor|degrader)|antiestrogen|aromatase inhibit(or|ion)") & -# !stringr::str_detect(nci_concept_definition,"antiestrogen resistance")) | -# (!is.na(target_symbol) & stringr::str_detect(target_symbol,"ESR[0-9]|GNRHR")), -# TRUE,FALSE) -# ) |> -# dplyr::mutate(anthracycline = dplyr::if_else( -# (!is.na(nci_concept_definition) & -# stringr::str_detect( -# tolower(nci_concept_definition), -# "anthracycline|anthracenedione")), -# TRUE, FALSE) -# ) |> -# dplyr::mutate(immune_checkpoint_inhibitor = dplyr::if_else( -# (!is.na(nci_concept_definition) & -# !stringr::str_detect( -# tolower(nci_cd_name), "oncolytic|pentoxifylline|vaccine") & -# iap_inhibitor == FALSE & -# stringr::str_detect( -# tolower(nci_concept_definition), -# "immune checkpoint inhib")) | -# stringr::str_detect(nci_cd_name,"(Anti-(PD|CTLA)-)") | -# (stringr::str_detect(nci_cd_name, -# "Tremelimumab|Milatuzumab")) | -# (!is.na(target_symbol) & -# !stringr::str_detect( -# tolower(nci_cd_name), "oncolytic|pentoxifylline") & -# (target_symbol == "CD274" | -# target_symbol == "CTLA4" | -# target_symbol == "PDCD1" | -# target_symbol == "TIGIT")), -# TRUE,FALSE) -# ) |> -# dplyr::mutate(immune_checkpoint_inhibitor = dplyr::if_else( -# !is.na(nci_cd_name) & -# immune_checkpoint_inhibitor == T & -# stringr::str_detect(nci_cd_name,"NLM-001|CEA-MUC-1|Oncolytic|Vaccine"), -# as.logical(FALSE), -# as.logical(immune_checkpoint_inhibitor) -# )) |> -# dplyr::mutate(platinum_compound = dplyr::if_else( -# !is.na(drug_name) & -# stringr::str_detect(tolower(drug_name),"platin$"), -# as.logical(TRUE), -# as.logical(FALSE) -# )) -# -# ## Make sure each drug is assigned an unambiguous value for each category -# nciCDN2Category <- list() -# for(c in c('immune_checkpoint_inhibitor', -# 'topoisomerase_inhibitor', -# 'tubulin_inhibitor', -# 'kinase_inhibitor', -# 'iap_inhibitor', -# 'hdac_inhibitor', -# 'parp_inhibitor', -# 'bet_inhibitor', -# 'ar_antagonist', -# 'monoclonal_antibody', -# 'antimetabolite', -# 'angiogenesis_inhibitor', -# 'alkylating_agent', -# 'anthracycline', -# 'platinum_compound', -# 'proteasome_inhibitor', -# 'hormone_therapy', -# 'hedgehog_antagonist')){ -# -# cat <- drug_df[,c] -# name <- drug_df$nci_cd_name -# -# nciCDN2Category[[c]] <- as.data.frame( -# data.frame( -# 'nci_cd_name' = name, -# stringsAsFactors = F -# ) |> -# dplyr::mutate(!!c := cat) |> -# dplyr::distinct() |> -# dplyr::group_by(nci_cd_name) |> -# dplyr::summarise(!!c := paste(!!dplyr::sym(c), collapse="/")) |> -# dplyr::mutate(!!c := dplyr::if_else( -# stringr::str_detect(!!dplyr::sym(c),"/"), -# TRUE, -# as.logical(!!dplyr::sym(c)))) -# ) -# -# drug_df[,c] <- NULL -# drug_df <- drug_df |> -# dplyr::left_join( -# nciCDN2Category[[c]], -# by = "nci_cd_name", -# multiple = "all" -# ) -# -# } -# -# -# drugs_with_codes <- drug_df |> -# dplyr::mutate(drug_entry = tolower(nci_cd_name)) |> -# dplyr::left_join( -# dplyr::select( -# atc_classification, -# atc_drug_entry, -# atc_level3, -# atc_code_level3), -# by = c("drug_entry" = "atc_drug_entry"), -# multiple = "all", relationship = "many-to-many") |> -# dplyr::mutate(atc_code_level3 = dplyr::case_when( -# is.na(atc_code_level3) & -# target_symbol == "ABL1" | target_symbol == "BCR" ~ "L01EA", -# is.na(atc_code_level3) & -# stringr::str_detect(target_symbol, "^(PARP[0-9]{1})$") ~ "L01XK", -# -# is.na(atc_code_level3) & -# stringr::str_detect(target_symbol, "^(HDAC[0-9]{1,})$") ~ "L01XH", -# is.na(atc_code_level3) & -# hedgehog_antagonist == T ~ "L01XJ", -# is.na(atc_code_level3) & -# topoisomerase_inhibitor == T ~ "L01CE", -# is.na(atc_code_level3) & -# target_symbol == "BRAF" ~ "L01EC", -# is.na(atc_code_level3) & -# target_symbol == "ALK" ~ "L01ED", -# is.na(atc_code_level3) & -# stringr::str_detect( -# target_symbol,"TUBA|TUBB") & -# (drug_action_type == "INHIBITOR" | -# drug_action_type == "DISRUPTING_AGENT") ~ "L01XX", -# is.na(atc_code_level3) & -# (stringr::str_detect(drug_entry," inhibitor") & -# stringr::str_detect(drug_entry, "kinase ")) | -# stringr::str_detect( -# target_symbol, -# "^(KIT|SRC|SYK|YES1|AKT[0-9]|AURK(A|B|C)|MAPK|PDGFR|AXL|BLK|ATM|CSF1R|ATR|FRK|FYN|HCK|IRAK4|LCK|LYN|MET|NTRK[0-9]|PLK[0-9]|RAF1|ROS1|CHEK(1|2)|ERBB4)$") ~ "L01EX", -# is.na(atc_code_level3) & -# stringr::str_detect(target_symbol, "^MAP2K[0-9]") ~ "L01EE", -# is.na(atc_code_level3) & -# stringr::str_detect(target_symbol, "^CDK[0-9]{1,}") ~ "L01EF", -# is.na(atc_code_level3) & -# stringr::str_detect(target_symbol, "^JAK[0-9]") ~ "L01EJ", -# is.na(atc_code_level3) & -# stringr::str_detect(target_symbol, "^FGFR[0-9]") ~ "L01EN", -# is.na(atc_code_level3) & -# stringr::str_detect(target_symbol, "^GNRH") ~ "L02AE", -# is.na(atc_code_level3) & -# !stringr::str_detect(drug_entry,"/") & -# stringr::str_detect(drug_entry, "xel$") ~ "L01CD", -# is.na(atc_code_level3) & -# !stringr::str_detect(drug_entry,"/") & -# stringr::str_detect(drug_entry, "platin$") ~ "L01XA", -# is.na(atc_code_level3) & -# !is.na(nci_concept_definition) & -# stringr::str_detect(tolower(nci_concept_definition), "anti-estrogen") ~ "L02BA", -# is.na(atc_code_level3) & -# !is.na(nci_concept_definition) & -# stringr::str_detect(tolower(nci_concept_definition), "aromatase inhibitor") ~ "L02BG", -# is.na(atc_code_level3) & -# !is.na(nci_concept_definition) & -# stringr::str_detect(tolower(nci_concept_definition), "nitrogen mustard") ~ "L01AA", -# is.na(atc_code_level3) & -# stringr::str_detect(target_symbol, "^PIK3") ~ "L01EM", -# is.na(atc_code_level3) & -# stringr::str_detect(target_symbol, "^BTK$") ~ "L01EL", -# is.na(atc_code_level3) & -# stringr::str_detect(target_symbol, "^(KDR|FLT1|FLT3|FLT4)$") ~ "L01EK", -# is.na(atc_code_level3) & -# stringr::str_detect(target_symbol, "^(MS4A1)$") ~ "L01FA", -# is.na(atc_code_level3) & -# stringr::str_detect(target_symbol, "^(CD38)$") ~ "L01FC", -# is.na(atc_code_level3) & -# stringr::str_detect(target_symbol, "^(CD22)$") ~ "L01FB", -# is.na(atc_code_level3) & -# (target_symbol == "PDCD1" | -# target_symbol == "CD274") ~ "L01FF", -# is.na(atc_code_level3) & -# target_symbol == "EGFR" & -# stringr::str_detect(drug_entry, "mab$") ~ "L01FE", -# is.na(atc_code_level3) & -# target_symbol == "EGFR" & -# !stringr::str_detect(drug_entry, "mab$") ~ "L01EB", -# is.na(atc_code_level3) & -# target_symbol == "ERBB2" & -# stringr::str_detect(drug_entry, "mab$") ~ "L01FD", -# is.na(atc_code_level3) & -# target_symbol == "ERBB2" & -# !stringr::str_detect(drug_entry, "mab$") ~ "L01EH", -# is.na(atc_code_level3) & -# monoclonal_antibody == T | -# is_adc == T ~ "L01FX", -# is.na(atc_code_level3) & -# bet_inhibitor == T | -# iap_inhibitor == T ~ "L01XX", -# is.na(atc_code_level3) & -# stringr::str_detect( -# tolower(nci_concept_definition), "purine( nucleoside)? analog") ~ "L01BB", -# is.na(atc_code_level3) & -# stringr::str_detect( -# tolower(nci_concept_definition), "pyrimidine( nucleoside)? analog") ~ "L01BC", -# is.na(atc_code_level3) & -# !stringr::str_detect(drug_entry, "/") & -# stringr::str_detect( -# tolower(nci_concept_definition), "vinca alkaloid") ~ "L01CA", -# -# is.na(atc_code_level3) & -# ((!is.na(nci_concept_definition) & -# stringr::str_detect( -# tolower(nci_concept_definition), -# "antineoplastic activit|anti-tumor activit" -# )) | -# (!is.na(drug_max_ct_phase) & -# stringr::str_detect( -# drug_entry,"(in|ib|ide|ine|ax|il|an|ate| alfa)$") & -# drug_max_ct_phase >= 2 & -# (!is.na(drug_n_indications) & -# drug_n_indications > 2) & -# (!is.na(drug_frac_cancer_indications) & -# drug_frac_cancer_indications > 0.4))) ~ "L01XX", -# -# -# TRUE ~ as.character(atc_code_level3) -# )) -# -# drugs_classified <- list() -# drugs_classified[['part1']] <- as.data.frame( -# drugs_with_codes |> -# dplyr::filter(!is.na(atc_code_level3) & -# !is.na(atc_level3)) |> -# dplyr::select(-atc_level3) |> -# dplyr::distinct() |> -# dplyr::left_join( -# dplyr::select( -# atc_classification, -# -atc_drug_entry), -# by = c("atc_code_level3"), -# multiple = "all", -# relationship = "many-to-many" -# ) |> -# dplyr::distinct() -# ) -# -# drugs_classified[['part2']] <- drugs_with_codes |> -# dplyr::filter(!is.na(atc_code_level3) & -# is.na(atc_level3)) |> -# dplyr::select(-atc_level3) |> -# dplyr::left_join( -# dplyr::select( -# atc_classification, -atc_drug_entry), -# by = "atc_code_level3", -# multiple = "all", -# relationship = "many-to-many" -# ) |> -# dplyr::distinct() -# -# drugs_classified_all <- -# drugs_classified[['part1']] |> -# dplyr::bind_rows( -# drugs_classified[['part2']] -# ) -# -# drugs_unclassified <- drugs_with_codes |> -# dplyr::filter(is.na(atc_code_level3)) -# -# -# for(c in c('immune_checkpoint_inhibitor', -# 'topoisomerase_inhibitor', -# 'tubulin_inhibitor', -# 'kinase_inhibitor', -# 'iap_inhibitor', -# 'hdac_inhibitor', -# 'parp_inhibitor', -# 'bet_inhibitor', -# 'ar_antagonist', -# 'monoclonal_antibody', -# 'antimetabolite', -# 'angiogenesis_inhibitor', -# 'alkylating_agent', -# 'anthracycline', -# 'platinum_compound', -# 'proteasome_inhibitor', -# 'hormone_therapy', -# 'hedgehog_antagonist')){ -# -# drugs_classified_all[ ,c] <- NULL -# drugs_unclassified[, c] <- NULL -# } -# -# drug_df <- dplyr::bind_rows( -# drugs_classified_all, -# drugs_unclassified) -# -# drug_df <- remove_duplicate_chembl_ids( -# drug_df = drug_df -# ) -# -# return(drug_df) -# -# } - assign_drug_category <- function(drug_df = NULL, path_data_raw = NULL){ @@ -3282,7 +2093,7 @@ expand_drug_aliases <- function(drug_index_map = NULL, chembl_pubchem_datestamp = chembl_pubchem_datestamp){ chembl_pubchem_xref <- - get_chembl_pubchem_compound_xref( + get_chembl_pubchem_xref( datestamp = chembl_pubchem_datestamp, path_data_raw = path_data_raw) diff --git a/data-raw/metadata_pharm_oncox.xlsx b/data-raw/metadata_pharm_oncox.xlsx index 35bbafb32e65d10388685c882a6e88c94b5acfd6..10453920783a289a73a39304c0abf68915d17ad0 100644 GIT binary patch delta 4496 zcmZ9QWmMGP*2jmgp+P!k7(yCGx}-~xmPSHp2N7W?f0Q%`3`nPR!;m6Ki3&)kbay#` z(%1ie*1hYlXTRBN@AWqG1oJ%(PG6?x%qU%*)@?rn&dqxe(DEVnGZMT?u<)C2oSwOQN=9?9pPqpg=2(?mH)eKoU#s3Hv2qqb8PzmB^kS zfgdl#m3Zr2IuGRHv)e;#uql{2~s&SyJm^uTtb=z&)ZHUFB%*Z0|6~gv&7(?j=-T zWj34_F-|*2ia+~8TH*Pds@U^uhlO6?lhaRFeCLJ=uSd`HhnoA#W-?CWT5%xnfa6*b z%`x^S8AbZ>K4H#CTljtO_VJgRrOEjqxe896`HOG2d#!c}rg=igwB}0i`myHT3Ei5E zdDY*HS?U>xrbl_Y-vI`(foS>=X(hJr;X5yDExpC`4-^t+ZAQM2yf~+s&8HSYVFoXV zq^Mo&!`L+l)XCP^aPV(S-IDjK0DZDp#OBe;tbC$?EL5Y&@B0bUmDY-ZUdqY_JV#aE zsE?$Er=ZsYHu9(#H9``@9zDjMJ-j(yvmC1>%BzXkr3*COo+niVI~J2WD45sgo_+Ew zI@$kx?b~#h&){xT)zd^J@6j|~qg8L{c$Xfdb?7NUNaeBXoYf~k=pA|hz-u0ng1eEP zmRB(+I;3GQlYXiykuKHC)Q8aSDL!@HJj4{>>fHM>kqO%=qPx^z(fQ!D`r{s)?eR&a zrOL)b^6X=IG`xQ}JEXD6qf(b^U=iU2ov0gAed zJ@iCSi;+0}S=)*niv3Hz@j}=6X1lmDdBNr~b^P^CQLb8!O?Vxh&RA!ritX}`0$;tp z)QwO4c30i!6{s7wMxSq0Zr536?-O`8brp%b)Q>qV=u|>s5ftppB+;y9A5JFDBtU2# zFed>KO^=ix3Ks;DLy5o`0nvZTDeH;Fy9!D08yn|4oYib>O^tjPe_X8RLp@p*?ec~h zO-?~x!L2*n*S}xqZBWh3jwU{5)Yu)D+#)FMyA2S|*?r@mC zj5jIF4dzSQ-MMfnENH>*g$$rK+wmqFp$h?WsRdeOQoT3NQLxEH-apF6?~b1Tyh$&D zYV+yG7^1Nq&=_BUwZqZxdD6KC=8poNJvDk&DE-kg=$kh&6y-n0r0l5HjcJlrXE}&e^?_ zgI?I5WNoXhu|;6phVLL@A}i|pXn)4C6K0vkp};xpi^)U=uFqbKbqb~gm3>oWl5d2f zQo$~KzI&hclSQm-WBJ`U)ThU`K2m;RIWJyvR?;=sF}0D=p#yZO?cBs{+no(sX4ul8 z5_qNvVj#Y;AldX32nZuI*REZpIXLS-_)wAQaxVHDS^5v_GU+ikw6!XU>PwKz?W2{VsB1?EPrHP*|)V2!b81TPksncEnqxL0%^iC|UKlk=zwN}1|G~3lsn{bXj)qWCbC z75hU922c2kvUiCuX`FJGRzt9nKB2xrpL8M-Bs#|3Vt!{a5?($+<-=o7xz58Mt3y!) zM6zvJODbRJm z3+B-gbx~%mgDf{Sp20}ENJjKB4?pgja(T-at_&^Hm36(cJ15Ul7-X^kzm5Fxf5-@Di-3vO?)j>962h?+l)~B6zY(w|Qnyt_Y)WoSuMYy6<*!Vp%!eM2 z;8xPcVjD}OH4ewMG_@_?C_bPU_F7mHJchqj&TKYW9@mWAyoHwZ+(rzesp;Pn;HhVq zr#?nivq1sGoH)fBw%R+7W7c!3%%p>vAr~19T9`&e!@}n48N-MAj_IW;<*<#3t1~&R z`f*K@OFPrAZEGioSMr{|H)6w2-}Px#nS^@KfI%rEQARs1$Gx& z^Rw&hax3@!lysiyvnQe1uf8B+&FXrx<`M<M@1d^CC?mNqU}lw_k7>(b*UZ7Nh{DYWo&cZ|?dwAoq|QC7v~L_OnyDnK8LtPLTi zz5UqaI-f_pPc3dZ__Do%LY1&PV?9PuLhQh%eU!E>?zifh{1=lb0HSpNNIMY{x1;v0 zw>#HKD2R;dQ}A~O+4Of?F=UCox)`>w2zl9083M(aY1A%60^5cJ#Q{~7a-A=g(lAgF zNdZqnQYQKgw&nx4R1uEsoNkw7h{w7fvW9aOg)D;ZNLJTSwcO>g4k{nY z%b7Y+G=5I=r&^j7vXWnrOHR%soAsl56C*6ihi^;aop;kV2y~*3p#oU%QLG#+z@I}Z zJLAQ}z&C}AhX;4nF*(o=gp0Q%=1RxebyD3Kn?<<(|#%f$u$i+XyZ+JcB82G_VN<3VN%2Z zosSw~=`Suppu0O9ke)UUE)57DMEEy`fP|`FVHW?wvzrtIq6dLc5-=veR?i%W-j4lkWnd~YbPSNttHb}IHcQ>ris7K9m@K>k{ z_ut5C(+Z6@D`jV}78>}D1>K#A$V@+JfCl1LYm`qn!?gxh!4yJfIbt?4dgDA}8<-U% zlkeJOF9Exx!ahelfGl9XBK^h9kU?URv03WXbF6PBN0`7r`FynIn$qkGU+*ZuTqZ?} zXz^ePo~^Hz-HSmR^PTf-xF2v#wVyNw40yX@GEsh(NIOh5&X4nw+IjU z!QA|eqJb~qeWU9Wf8<((CHwp#TM|2dW-LRz|4l(smGsq{3v$i zQcQAV1bbwilYW%*DuA8jG_(SBe};mEluJ_@tbo{`lP}0U&3bmP`mD{4^zQAbhAe2e z+!_^Dsgd@Q7DEHnU|G?T*0`%omXHjgU#xGJ@Ar?#M#5=lhay`-Bk{aQvlJPIJn$<* zV={cFTq<=+a}+~d9L}ipszbxnxY^cZ81d{rX^QQ6uBHzSB2g4^DF2#BiD6CuwE5{# zNq{vD@z1I46o_4w^s2R8Y(6ak+fi!<=HSBng9TdfYo_U303(7*fPBus=9@q|N;+6K zYX#Q?QB?VQ5_zQNHH~^1(Px_g?Q@QCiet%>7M+OSZ)77Ovy=5K!sS~mSxVKAW@e&gYMYA(o@Pf&DgZKz_37wF!|eo z@0Ae;lp;t%f_V_V%=~6G_$hEr;sq5&1L<=yGem0G0XZQrr&g-leVO6+h=`u4D2GdO2_^*NtuNur zs_X64#v8g?7H+NQz0oOG_AZZO?lYuUrd#%&B#&5-0aq{Zs+=2$3p4()`pmM@oD{;) zYYmFIs&}il`APKp&spOI9U6Xr0~;^k0zsvNX5kmb0!FZ3jpCaYJbyft2L5nT<$Poc z=P9hRzxsV|h9dij*M7|xD2iZoiP8Hwt1%0{}+kmgc zos^JQmO${c5VLTZyrozbFC}7mXy#+9b5NH^vR-gKeuQG==k|ME&u*EwQ=-sG=QAz3 zB`tw>nST?98 zA!#g2)QOND_D@Qb8wC@}T9}dfzXRF-G3763{$n20s32-aRJAZKmK17Mn2YJ({m@_h zNRxm-H2+ck+qa+?MYx&&AD;e4AqD$;6)nnBL30)d>dK_JTi5rROh OsAUlteu3aW$$tUkCR=v^ delta 4448 zcmZ9Qby(Ejw#H`&VQ3JPlpIoF1|&s7h7OUElpKkn1f}6Sgpx`OT?W!!g9=D@=TOox zh_rw-*WW$op7T6+|F!mh*MDn0`}3~#!7j?Kf(!zLkA}$z5`sW7Bp?tq2n6zR7V&oT zK-svtp@e;0oJ$PN+%jPFLGNp>(HA-{H$R{?^22CKM};RQE#|pMG8Y+O)!LY%8DIe} z;D~K_nG&(sS1AwRGj~&?pVsGj?R*)1gcbN?PbA%N&rV=QJ4)`y-8oX}nF_33t+Q{` z4)9f_T-fsamnn@`^$7=)V6;(wfoPs4Y*M*{C2z>C%dDa#*C{plPXAH9EBz3GODIwO z7^5t8Pgm3eB%_T`h~WxayoY9x3x0AiK}JCDlA0S6wnmKZQ_HZzBgQ}r=s1m0BM zo=W0~adi3Ol(v-SOu4Z)QVvYap|cF>=yw7Z7yLg;(cju24|pvpM28*4^&9AvZ_+P4 zX3p#~b>8ubegJLSZ@sJeoN>qaMW2LuY}Q&;ajMh25)$Z8wWyymbdHW~@7EE+Q?K@u zv2ar;&OE5vy}kq6Q-$W(mB01)0080uq2qb?4o4(M(m`csxpOjaA66G)A?I_Ixk`I9 zr%Zzs9=K?J+t%%wqYnFRBYITDaRweNfC=#Y(mpDnCeQi=iCAhsOx{)@hpLP8@-b7d z?&D{U>u?U|)@)eGCHt1TGj(;Zjjity*u+_S-)E#cKVsX^%?=NyzDJA#2F+GSi%!>2DCeT@d%zC zmf6nzg=`)|!R2CFRli$9xkkJlgbW(@^s_3t4{Wb)x#9fpdSwgt(Zz06I&- zMA4Y4vqkn@%&msWG_De){=VRG9-imeBH=LsF4jgZUw(hC|GmGjd-l+86Dv=rCb6FD zFD~Iu8lBXsNJxjZD?L4Bz_HxkQH&$a5)A=GfMOdrN_OYl1*F=90!jjjUxmC6Ap(K! zVRU(zF-Z+Q%C-D@6m9UiPb^PAV#Th);v+gxqo`+tQvm-DOj1 z?HJzPRI7(ii7oU8h-GVGE$5@m{g({0B0ciWg!FMfX;HRD+1_WCKxg+z?Kkya+l1(Z z-9ztZzbucNuBrkVCiDZB8b&}LOT5o=9m`nS3dYC*kpw?zSwk2GQxNqz7`N#>j*H^! zGAit*bWZQ*Pbe9G5T<>^MDaofDL}1!X1-VTQ$aABkMhLQvI-TbDl|IO%%FaB7@%Cp zUk2JwT{HPTAF^p0?|Nr2w;-X5bvdZoA;HOw;llca&s%K!s$VZYH>seb&;(pHawjiI0ddPiI{jekPiuV#-#xfwPq9*YvGMVh zfP52K1ev9{Nq%Nz0M# z^HASuj)9Ph5BUm`D`{H!E`f)BXYGg;Y%~F8T0TSyIjXGk3nMF7G*DjC^?5zGyUD(5 zoj{UUxHtZN0>0r?so{=~*Z2DsvjlluBrYf8QCiOg3vyRWEi4}(TaCi3Fve4pwTE9_ z5ii1%sE*R(LXB=tX5=Z!cKn!{DJkLBea5tTxX-J*a= zzqP}WiGlhCxr8$65+w~&8e=yOM(RoV@XGJj&EL@b!7n)S#HoI1$G_uFMKN21%vM1# zywh$%`$a8BueESEgoF}mMFE`WF@~w%W};W4tQYb3L^UXs=1M{UVZ}-Pcq_&Bw5Tw0 zt{nrO^l06V6d%R*^iKg6G%fn^Z9oX=Pi2Y^#dr!Q|6sAH?P+FuGr7RR%CV0Ievpz> zY8hJoD@BANgzr}3-*)ky?c?`kT#!^jYgr>H)Cya><~U_mpSO}oH8yui(SJhx(qwo| z14SzGiE<)dz&8`BKNCNW7W>YVp86WewRpQ-&kayj zu^X^R2O3bTI%r?srAv(mLR~PN>s`ED@Y*;{{uo})$eh|;vt;&*I12^hFy>A0+UJ+- zt`WPUjJB-wR?eJnz}mUR-g^lvhXdm zPcZQ=Em9L5{-PT+?;lZF-8p5MV@1jSPMgU?$%c~!8|pJ)6Wt1&h*O`!j%gA^LFp3x z&Ttq$ooHGZbe5+?i&g5%;qiL(jj?L$Mqu~wfWN{mk~AyLxgT_VUMq@LqM8QikNy z#WD4W^((IYwkv}0!D<*|G70ffLR_6TrjMNq@Lc};9qnpew^0RdXevn1a6II|c=D#1 z^WjjMQC$9#Ew(#dEon+hO1=z3RT-&qhduVvKwfP z4MfpP@e`D%q&Y>iOc`CatF>+lV6BWp}7BrP#(=0#vq3&A?(e#aEOo^q&4t+fPONs3WfimS^jMMs^d`K|gxdch`$8*Py=}$gl_PCgciu;?|FqGW)z)6?~+-!i) zD@diF+aEc3J<31F08aUF{1c+x>tIKK_KM6AB{)lK$ruX}E*VZV`Gl1BB{*~srYjBr2*K?Fj|kzaLGR3%aSd{7knq zyQwSD`kEfBRTwhw7evO5*U5b-yrEU9T^Vh z$H1=p&xp4vjSc~w`!T1}4;f^Yy|jB9M%yDB+woqNb;hz@v3nZSJz$2$-gfYy)EV!f z<$l=6toJ4RUk7=8O$gG}BqX8--2jpPy>o&FaQ>q||3Yz;5(HxW$75oQd6)o;f3O(% zFDy1&Q6SXIU|30^TD^%O`(!DIDXBmO`_qZbT{aHR#SaqSMxS#Y_ouCh+4@(V9hj_k z))#G-$d+n1RA?(@je4YHQK5dvEj3k`Xud$wqkC%75c zg5KvwrTjGesH07-S19X<1_JvBj~GhT*n@=skGb?={xH*5)Br1bizJ29l3T27Zq6Oc@)Kd5~?f3S`_ur01? zs|Mdu)fB3sVlOKdxlN*1522pn#5A*t>6A{gJ%;R{Oz_QUWKV-~s^&S&zy&6N|C zVfc=b(A;@V$D^+sqWym%&0X@)B?U z=tG;9oqOhh)2LC-jpZBqH|1>UbhHs}1iwP7HVbLqMmMN(`a~Vvw_VoK#)|RoSlI-d zhd$SRBbMH>q40!!$TYSVG9hNHt6efr(EF-lcy-k_Koz49!`H~NEu7)es`d1_ta=2H z*Yi+lc>pzfhxgB``_E$YBvB71dAE|RJ4|1-H+C>82~o*qdx{@ypL$`;aCWkh;!clO z)IY6R`wvCUB}fBXD zJ)iTY#=4;uH-D zQrjb@r&OnI6a3cwlG!in92*e~aI(4+T5Y*!)Qh6MU={J)khr#SWiXPi{`+CaMM9k| zH8p;2 zX3=)|3q^{%9M=+y(jgk*uhLCdxE}J69C_-oRq4B|4rf1N`39OXTwvw(+8zGll~Ylb zQWe~}d$vv5ltq8YaZF$^mxA=X9Q7(_E3X`A0#&ODtPv0s{tT|vtcKqe1p~TP8LpHS zh=J@(IBm{;spGD(n5mjs*YWPX3Qn07TmQjJzd>!xnqu<&of_ICjVJVB-Gm*2!Upl5 zZ~<#W@%DEOGn11-=-FC6AiV;UD$*-7+N#T(hKm)s`#(-P4#bzgf}>l>pm5VnAe6|pe1Ebw|tp_}Zkl6u2Q zD628ThI;+zcAxZRHk%NUC*V`_eUnI zot5l^m$1Q{I0U(9MnKKbb*fwF@$CcpWnj66dwzW>1R*Ax^)A9*A$c)+C_WI3$rPS~ z{AW9X2^7%*>tp&wWC(O=FeWe-3^(jPco1U@lVJVN%=K@){3Xo4kKIj7%uQwtF`NdY z%1A^0f6pKgD`pqQ%ksaL=r5OK{wnnU75_#V7#N(7<^OZ*zbVWZ54bW|8uJCt{jXwU T7U2wFGR!fY=SGb1KY9NH=(uHM diff --git a/pkgdown/index.md b/pkgdown/index.md index 8814416..1c61e92 100644 --- a/pkgdown/index.md +++ b/pkgdown/index.md @@ -10,14 +10,14 @@ Drug-target associations from the Open Targets Platform have furthermore been in _pharmOncoX_ provides anti-cancer drug classification through existing entries in the [Anatomical Therapeutic Chemical (ATC) Classification System](https://www.whocc.no/atc_ddd_index/), and these have been extended significantly with manual curation, also by establishing novel drug categories that are presently missing in the ATC classificiation tree (examples include _AURK inhibitors_, _MET inhibitors_, _BET inhibitors_, _AKT inhibitors_, _PLK inhibitors_, _IAP inhibitors_, _RAS inhibitors_, _BCL2 inhibitors_ etc.) enabling a filtering of drugs according to their main mechanisms of action. -Currently (as of mid September 2024), `pharmOncoX` is built upon the following +Currently (as of early November 2024), `pharmOncoX` is built upon the following releases of external databases: - - Open Targets Platform (2024.06) - - ChEMBL (v34) - - NCI Thesaurus (24.07e) - - MitelmanDB (20240715) - - CIViC (20240918) + - Open Targets Platform (2024.09) + - ChEMBL (v35) + - NCI Thesaurus (24.09e) + - MitelmanDB (20241015) + - CIViC (20241102) ### Getting started