From 20b04da07e1d5511b18c857bea26b7241a0fd82a Mon Sep 17 00:00:00 2001
From: Sigve Nakken <sigven@ifi.uio.no>
Date: Sat, 2 Nov 2024 12:18:58 +0100
Subject: [PATCH] NCI 24.09e, OTP 2024.09, skip DGIDB chembl map

---
 R/sysdata.rda                             |  Bin 734 -> 733 bytes
 data-raw/custom_drug_target_regex_nci.tsv |   10 +-
 data-raw/data-raw.R                       |   19 +-
 data-raw/drug_name_black_list.txt         |    1 +
 data-raw/drug_utilities.R                 | 1501 +++------------------
 data-raw/metadata_pharm_oncox.xlsx        |  Bin 12810 -> 12832 bytes
 pkgdown/index.md                          |   12 +-
 7 files changed, 178 insertions(+), 1365 deletions(-)
diff --git a/R/sysdata.rda b/R/sysdata.rda
index 937ab35910664f34b9d9683f836ff894189ba65f..9a07f3aee52b7d0417736c5a53a49bbb159b9a9c 100644
GIT binary patch
literal 733
zcmV<30wVoFT4*^jL0KkKSrf^XVE_Tb|G@uy%5h)=f8am=|Iokx|L{Nn01?0fTpO-P
z>KcelN#L3%l)%b<shKirGe8q+Xwc9!Gz>J<8k0>7jExgh+7djVXwi@XkkAbc0i#BP
zLaFLJr>W^Yh<cg<pa5b30000007(@z%6Uv=O$L~N13&-`00E((21g1~P<$AeM6;4`
z3#=F3%NOOd3PJDNKJ>z<)Y?w>_O$nJx<HH*%2Z8^Vm$hA7)XG4z2&(0A5l+3T7u@p
zeQ{pujjAIVkcz?t!a`&)I|HzQ4<;2xKvfwalv`r$>*TA#3J_I)tBZUT8{(uV;IUpc
zU)JT?6k?ED0{#`da|!5Bqch1CQ^RebtjbYX+zEBCON-}$1)hA)7%s%Zd}huBeDdrT
zCNc#jj(Fjb2dxsgt<T65IzddCR0G4Lqk|@apSFo)7}6_~1VR9YXb(=<t-?t_1aeiI
zgBS8tJtmn+Sb1s0>~Iy-9^5qM2D_G<FX>vK{6ec6d+9+zeXWhovvr%2w5rFK1jV^O
zs@`i{KbYgmGu^VF<nK3)TMj1tV!bw1YVdascsemQtLg!oGlif)QHfKA0RE)ZLkZaI
zEQ*-~cWbwDwbTn3Cg>n}V53wpQxJSM(;M8fYzBl+8zh-EWLe^(8gOVm3qaZFGKZ60
zNz@1qqKOAflFNjuJ*vdm{yv&(-|r?}+u;O_vO-fh#1L0<agRvXjG>TtM3gjQH7Nv&
zi~Yq~j(laIO$N_d{x56P#+0>!2FA~Js#TYt*1*sa6|;m&IfUmz`42dJcrV)WXZ6Ap
zZ@$|AgJ_i^60zihL_xGHIcXd~#-^<<2pq3c%-bzmJPS0%QgI^~ys?qA@ePE~&Zfw}
zk(<#BBMsT>I*yi=3a+=)GpC#jWTG>KevvW=8cY-A)zCx8(?71A`6r)eW?q-{`n~p@
P$Nv{{ML1B96Umif^oUdj

literal 734
zcmV<40wMiET4*^jL0KkKSpfC+J^%r?|G@uy%5h)=f8am=|M0*6|L{Nn01?0fUDKJ{
zty#oGBg$wE14fz+00Te(000dGL)03Hq`{CUp{b#%kYY3&QRxjBnrJiwK*-PyDd{~;
z4Fw(weyC^w&;SNQKpJVGh-3gsBoaLl;X@$M(@h3|XlbBm27olvK+rfbVxZ~sm_%<R
z;0w$j&e3D~I*_Cn`(XW$Q#7p&!_?M0|HecC1`D!~#%QS19nm=lL9g0hGk+4$hU2q8
zGtgRd_pQu|EQC@JCK3}Nfy@qJ03GZr41mZPAfPSba2?daZ-Vo|nkm+(3e+yTL!ZhC
zOHFLmU$X#DiSbSes1dP>+yIF-n4E+xs8q-kPzf$J76fidO0<=8u#LExiy#C%ax_xM
z8FfJ;Rhn!O0esOJcA<5so}!tnB2*WdT8`-)jz}GHpa|H>(nZ9_hkJ?yl}`YD7-_eZ
zsMiYAxMR1)04B(9DO#-$WxsabOHyl|r#8D;_>7ju8hTKr5sPoI9-{<!Xi%A6nz_u9
zF&ntK-Q5M`KBk*AqNP~^$DXkv+bQpCA({2v0-2T%0s@Pi3kL+-O=VoF)@??#z2c)r
ziEu8n^~XpmVlYTJu2CyQL|LNduuR`@##w%?{Y+~zVb96X)t|3jbn)>WMF>apYBQJ%
z<t|1TwR~N4J8sEiZsQdXH)IETK1*ThJ9qRku+NWv*`5MWON})%ZaOI5oc>L9kt0hE
zTokT<2Bbp(PN_2KpE0O9QXZZHrpI@+C!eSYBp$T$z)WLsNz6Yo-!R?`*bd{`W5N5(
z>Z^qb3rp8zGAc=?f&v79#SopI35{#S33$>6s2n1d-ft@Lh{mmppMB|}tG6(ue1(f~
z5i{Z%vq1E$jjst{fMJ@W^M|L6D-2e-gd&H0;auqQd#;^KzP&ukaN3qV*nDWFEI+u2
Q%pdr>k}1N3fCsO!@JokIHvj+t

diff --git a/data-raw/custom_drug_target_regex_nci.tsv b/data-raw/custom_drug_target_regex_nci.tsv
index 6839a79..c7ee599 100755
--- a/data-raw/custom_drug_target_regex_nci.tsv
+++ b/data-raw/custom_drug_target_regex_nci.tsv
@@ -74,7 +74,7 @@ Tolinapant|Xevinapant|Smac Mimetic	BIRC2
 Xevinapant|(Smac|SMAC) Mimetic	BIRC3
 Tolinapant|Idronoxil|Xevinapant|Smac Mimetic	XIAP
 hSTC810	BTN1A1
-Divarasib|Opnurasib	KRAS
+Divarasib|Opnurasib|INCB161734|QTX3046|RMC-9805|TSN1611|QTX3034|RMC-6236	KRAS
 Ras Inhibitor LUNA18	KRAS
 Ras Inhibitor LUNA18	NRAS
 Ras Inhibitor LUNA18	HRAS
@@ -90,11 +90,11 @@ Tinengotinib	FLT1
 Tinengotinib	KDR
 Tinengotinib	FLT3
 Tinengotinib	FLT4
-pan-RAF Inhibitor|pan-RAF Kinase Inhibitor	BRAF
-pan-RAF Inhibitor|pan-RAF Kinase Inhibitor	ARAF
-pan-RAF Inhibitor|pan-RAF Kinase Inhibitor	CRAF
+pan-RAF Inhibitor|pan-RAF Kinase Inhibitor|Brimarafenib|BDTX-4933|DCC-3084	BRAF
+pan-RAF Inhibitor|pan-RAF Kinase Inhibitor|BDTX-4933|DCC-3084	ARAF
+pan-RAF Inhibitor|pan-RAF Kinase Inhibitor|BDTX-4933|DCC-3084	CRAF
 Ebvaciclib|Tagtociclib	CDK2
-Ebvaciclib	CDK4
+Ebvaciclib|Atirmociclib	CDK4
 Ebvaciclib	CDK6
 Utatrectinib	NTRK1
 Utatrectinib	NTRK2
diff --git a/data-raw/data-raw.R b/data-raw/data-raw.R
index 5c1eee2..666e5d7 100755
--- a/data-raw/data-raw.R
+++ b/data-raw/data-raw.R
@@ -21,7 +21,7 @@ opentargets_version <-
   metadata$compounds[metadata$compounds$source_abbreviation == "opentargets", 
                      "source_version"]
 package_datestamp <- stringr::str_replace_all(Sys.Date(),"-","")
-chembl_pubchem_datestamp <- '20240708' 
+chembl_pubchem_datestamp <- '20241024' 
 
 ## set logging layout
 lgr::lgr$appenders$console$set_layout(
@@ -81,7 +81,7 @@ drug_sets[['nci']] <- get_nci_drugs(
 #### -- Open Targets Platform - drugs ---####
 ## Get all targeted anticancer/other drugs from Open Targets Platform
 drug_sets[['otp']] <-
-  get_opentargets_cancer_drugs(
+  get_otp_cancer_drugs(
     path_data_raw = path_data_raw,
     ot_version = opentargets_version)
 
@@ -101,7 +101,7 @@ drug_sets[['nci_otp_curated']] <- map_curated_targets(
   gene_info = gene_info,
   path_data_raw = path_data_raw,
   drug_df = drug_sets[['nci_otp']]
-)
+)$curated
 
 ####-- Cancer drugs classified into categories (ATC) ---####
 drug_sets[['nci_otp_curated_classified']] <- assign_drug_category(
@@ -194,7 +194,8 @@ raw_biomarkers[['depmap']] <-
 raw_biomarkers[['custom_fusions']]$variant <- 
   raw_biomarkers[['custom_fusions']]$variant |>
   dplyr::anti_join(
-    raw_biomarkers[["mitelmandb"]][['variant']], by = "variant_alias")
+    raw_biomarkers[["mitelmandb"]][['variant']], 
+    by = "variant_alias")
 
 biomarkers <- list()
 biomarkers[['data']] <- raw_biomarkers
@@ -202,11 +203,11 @@ biomarkers[['metadata']] <- metadata$biomarkers
 #rm(biomarkers_all)
 
 ## upload to Google Drive
-version_bump <- paste0(
-  substr(as.character(packageVersion("pharmOncoX")),1,4),
-  as.character(as.integer(substr(as.character(packageVersion("pharmOncoX")),5,5)) + 1))
+#version_bump <- paste0(
+#  substr(as.character(packageVersion("pharmOncoX")),1,4),
+#  as.character(as.integer(substr(as.character(packageVersion("pharmOncoX")),5,5)) + 1))
   
-
+version_bump <- "1.8.0"
 
 db <- list()
 db[['biomarkers']] <- biomarkers
@@ -221,7 +222,7 @@ db[['drug_map_basic']][['records']] <- drug_index_map[['id2basic']]
 db[['drug_map_alias']] <- list()
 db[['drug_map_alias']][['records']] <- drug_index_map[['id2alias']]
 
-googledrive::drive_auth_configure(api_key = Sys.getenv("GD_KEY"))
+#googledrive::drive_auth_configure(api_key = Sys.getenv("GD_KEY"))
 
 gd_records <- list()
 db_id_ref <- data.frame()
diff --git a/data-raw/drug_name_black_list.txt b/data-raw/drug_name_black_list.txt
index d197ad5..fcc5b29 100644
--- a/data-raw/drug_name_black_list.txt
+++ b/data-raw/drug_name_black_list.txt
@@ -9,6 +9,7 @@ Sodium Caseinate
 AXL-1717
 AZD-7451
 TAS-115
+RAC-3-N-BUTYLPHTHALIDE
 Lenperone Hydrochloride
 ABT-126
 AZD-3759
diff --git a/data-raw/drug_utilities.R b/data-raw/drug_utilities.R
index 58748c9..e178729 100644
--- a/data-raw/drug_utilities.R
+++ b/data-raw/drug_utilities.R
@@ -29,201 +29,6 @@ readUrl <- function(q_url) {
   return(out)
 }
 
-### PUBCHEM/CHEMBL - WEB SERVICE FUNCTIONS FOR COMPOUND INFORMATION
-
-## Retrieval of compound properties from PubChem
-#'
-#' Function that retrieves PubChem compound properties using PubChem public user gateway (PUG)
-#'
-#' @param pubchem_cid PubChem compound identifier
-#' @return a data frame with the following columns of chemical compound properties:
-#' \itemize{
-#'   \item pubchem_isomeric_SMILES
-#'   \item pubchem_TPSA
-#'   \item pubchem_IUPAC_name
-#'   \item pubchem_complexity
-#'   \item pubchem_hbond_donor_count
-#'   \item pubchem_hbond_acceptor_count
-#'   \item pubchem_molecular_weight
-#'   \item pubchem_cid
-#'   \item pubchem_name
-#'   \item molecule_chembl_id
-#' }
-#' @examples
-#' ## Retrieve compound properties for Azacitidine (Pubchem compound ID = 9444)
-#' \dontrun{
-#' pubchem_compound_props(pubchem_cid = 9444)
-#' }
-#'
-pubchem_compound_properties <- function(PUBCHEM_PUG_URL = 'https://pubchem.ncbi.nlm.nih.gov/rest/pug/', pubchem_cid = 9444){
-  compound_description_list <- jsonlite::fromJSON(paste0(PUBCHEM_PUG_URL,'compound/cid/',pubchem_cid,'/description/JSON'))
-  compound_description <- dplyr::filter(compound_description_list$InformationList$Information, !is.na(Title))
-  if(nrow(compound_description) > 1){
-    cat("More than a single description line\n",pubchem_cid)
-    cat('\n')
-    return(NULL)
-  }
-  compound_properties <- read.csv(paste0(PUBCHEM_PUG_URL,'compound/cid/',pubchem_cid,'/property/IsomericSMILES,TPSA,IUPACName,Complexity,HBondDonorCount,HBondAcceptorCount,MolecularWeight/CSV'),stringsAsFactors = F)
-
-  compound_properties$pubchem_name <- compound_description$Title
-  chembl_id <- NA
-  synonym_url <- paste0(PUBCHEM_PUG_URL,'compound/cid/',pubchem_cid,'/synonyms/JSON')
-  pubchem_compound_synonyms <- NULL
-  if(!is.null(readUrl(synonym_url))){
-    compound_synonyms <- jsonlite::fromJSON(synonym_url)
-    all_synonyms <- compound_synonyms$InformationList$Information$Synonym[[1]]
-    pubchem_compound_synonyms <- paste(all_synonyms,collapse="@@@@")
-    j <- 1
-    while(j <= length(all_synonyms)){
-      if(stringr::str_detect(all_synonyms[j],"^CHEMBL[0-9]{1,}")){
-        chembl_id <- all_synonyms[j]
-        break
-      }
-      j <- j + 1
-    }
-  }
-
-  compound_properties$molecule_chembl_id <- chembl_id
-  compound_properties$pubchem_synonyms <- pubchem_compound_synonyms
-  compound_properties <- dplyr::rename(compound_properties, pubchem_isomeric_SMILES = IsomericSMILES, pubchem_TPSA = TPSA, pubchem_IUPAC_name = IUPACName, pubchem_complexity = Complexity, pubchem_hbond_donor_count = HBondDonorCount, pubchem_hbond_acceptor_count = HBondAcceptorCount, pubchem_molecular_weight = MolecularWeight, pubchem_id = CID)
-
-  return(compound_properties)
-}
-
-## Retrieval of molecular compound properties from ChEMBL
-#'
-#' Function that accepts a ChEMBL molecule identifer and uses the ChEMBL web service API to return a range of compound properties
-#'
-#' @param molecule_chembl_id ChEMBL molecule identifier
-#' @return a data frame with the following columns of chemical compound properties:
-#' \itemize{
-#'   \item molecule_chembl_id
-#'   \item parent_chembl_id
-#'   \item chembl_acd_logp
-#'   \item chembl_acd_logd
-#'   \item chembl_acd_most_apka
-#'   \item chembl_acd_most_bpka
-#'   \item chembl_alogp
-#'   \item chembl_aromatic_rings
-#'   \item chembl_full_molformula
-#'   \item chembl_full_mwt
-#'   \item chembl_hba
-#'   \item chembl_hba_lipinski
-#'   \item chembl_hbd
-#'   \item chembl_hbd_lipinski
-#'   \item chembl_heavy_atoms
-#'   \item chembl_molecular_species
-#'   \item chembl_mw_freebase
-#'   \item chembl_mw_monoisotopic
-#'   \item chembl_num_alerts
-#'   \item chembl_num_lipinski_ro5_violations
-#'   \item chembl_num_ro5_violations
-#'   \item chembl_psa
-#'   \item chembl_qed_weighted
-#'   \item chembl_ro3_pass
-#'   \item chembl_rtb
-#'   \item chembl_canonical_smiles
-#'   \item chembl_standard_inchi
-#'   \item chembl_standard_inchi_key
-#'   \item chembl_pref_name
-#'   \item lincs_id
-#'   \item drugbank_id
-#'   \item pharmgkb_id
-#' }
-#' @examples
-#' ## Retrieve compound properties for IRINOTECAN (ChEMBL molecular compound ID = 'CHEMBL481')
-#' \dontrun{
-#' chembl_compound_props(molecule_chembl_id = 'CHEMBL481')
-#' }
-#'
-chembl_compound_properties <- function(chembl_ws_base_url = 'https://www.ebi.ac.uk/chembl/api/data', molecule_chembl_id = NA){
-  all_molecule_properties <- data.frame("molecule_chembl_id" = molecule_chembl_id, "parent_chembl_id" = NA, "chembl_acd_logp" = NA, "chembl_acd_logd" = NA, "chembl_acd_most_apka" = NA,
-                                        "chembl_acd_most_bpka" = NA, "chembl_alogp" = NA, "chembl_aromatic_rings" = NA, "chembl_full_molformula" = NA,
-                                        "chembl_full_mwt" = NA, "chembl_hba" = NA, "chembl_hba_lipinski" = NA, "chembl_hbd" = NA, "chembl_hbd_lipinski" = NA,
-                                        "chembl_heavy_atoms" = NA, "chembl_molecular_species" = NA, "chembl_mw_freebase" = NA, "chembl_mw_monoisotopic" = NA,
-                                        "chembl_num_alerts" = NA, "chembl_num_lipinski_ro5_violations" = NA, "chembl_num_ro5_violations" = NA, "chembl_psa" = NA,
-                                        "chembl_qed_weighted" = NA, "chembl_ro3_pass" = NA, "chembl_rtb" = NA, "chembl_canonical_smiles" = NA,
-                                        "chembl_standard_inchi" = NA, "chembl_standard_inchi_key" = NA, "chembl_pref_name" = NA, "lincs_id" = NA, "drugbank_id" = NA,
-                                        "pharmgkb_id" = NA, stringsAsFactors = F)
-
-  molecule_url <- paste0(chembl_ws_base_url,'/molecule?molecule_chembl_id=',molecule_chembl_id)
-  molecule_hierarchy <- NULL
-  molecule_properties <- NULL
-  molecule_structures <- NULL
-  props_all <- NULL
-  lincs_id <- NA
-  drugbank_id <- NA
-  pharmgkb_id <- NA
-  if(!is.null(readUrl(molecule_url))){
-    raw_xml_string <- rawToChar(httr::GET(molecule_url)$content)
-    doc <- XML::xmlParse(raw_xml_string)
-    if(stringr::str_detect(raw_xml_string, '<molecule_hierarchy>')){
-      molecule_hierarchy <- XML::xmlToDataFrame(nodes = XML::getNodeSet(doc, "//response/molecules/molecule/molecule_hierarchy"), collectNames = T, stringsAsFactors = F)
-    }
-    if(stringr::str_detect(raw_xml_string, '<molecule_properties>')){
-      molecule_properties <- XML::xmlToDataFrame(nodes = XML::getNodeSet(doc, "//response/molecules/molecule/molecule_properties"), collectNames = T, stringsAsFactors = F)
-    }
-    if(!is.null(molecule_properties)){
-      colnames(molecule_properties) <- paste0('chembl_',colnames(molecule_properties))
-    }
-    if(stringr::str_detect(raw_xml_string, '<molecule_structures>')){
-      molecule_structures <- XML::xmlToDataFrame(nodes = XML::getNodeSet(doc, "//response/molecules/molecule/molecule_structures"), collectNames = T, stringsAsFactors = F)
-    }
-    if(!is.null(molecule_structures)){
-      colnames(molecule_structures) <- paste0('chembl_',colnames(molecule_structures))
-    }
-    if(stringr::str_detect(raw_xml_string, '<molecule>')){
-      props_all <- XML::xmlToDataFrame(nodes = XML::getNodeSet(doc, "//response/molecules/molecule"), collectNames = T, stringsAsFactors = F)
-      props_all <- dplyr::select(props_all, pref_name) |> dplyr::rename(chembl_pref_name = pref_name)
-    }
-
-    if(!is.null(molecule_structures)){
-      unichem_url <- paste0('https://www.ebi.ac.uk/unichem/rest/verbose_inchikey/',molecule_structures$chembl_standard_inchi_key)
-      if(!is.null(readUrl(unichem_url))){
-        unichem_xrefs <- httr::content(httr::GET(unichem_url))
-        i <- 1
-        while(i <= length(unichem_xrefs)){
-          list_item <- unichem_xrefs[[i]]
-          if(list_item$name == 'lincs'){
-            lincs_id <- list_item$src_compound_id[[1]]
-          }
-          if(list_item$name == 'drugbank'){
-            drugbank_id <- list_item$src_compound_id[[1]]
-          }
-          if(list_item$name == 'pharmgkb'){
-            pharmgkb_id <- list_item$src_compound_id[[1]]
-          }
-          i <- i + 1
-        }
-      }
-    }
-
-    if(!is.null(molecule_properties)){
-      for(c in colnames(molecule_properties)){
-        all_molecule_properties[,c] <- molecule_properties[,c]
-      }
-    }
-    if(!is.null(molecule_structures)){
-      for(c in colnames(molecule_structures)){
-        all_molecule_properties[,c] <- molecule_structures[,c]
-      }
-    }
-    if(!is.null(props_all)){
-      for(c in colnames(props_all)){
-        all_molecule_properties[,c] <- props_all[,c]
-      }
-    }
-
-    all_molecule_properties$lincs_id <- lincs_id
-    all_molecule_properties$drugbank_id <- drugbank_id
-    all_molecule_properties$pharmgkb_id <- pharmgkb_id
-    if(!is.null(molecule_hierarchy)){
-      all_molecule_properties$parent_chembl_id <- molecule_hierarchy$parent_chembl_id
-    }
-  }
-  return(all_molecule_properties)
-}
-
 
 ### NCI DRUG DISPLAY LABELS
 
@@ -244,8 +49,8 @@ process_nci_labels <- function(path_data_raw, overwrite = F) {
 
 ### CHEMBL-PUBCHEM COMPOUND CROSS-REFERENCE
 
-get_chembl_pubchem_compound_xref <- function(datestamp = '20220906',
-                                             chembl_release = "v31",
+get_chembl_pubchem_xref <- function(datestamp = '20241024',
+                                             chembl_release = "v34",
                                              path_data_raw = NULL,
                                              update = F){
   chembl_pubchem_xref_fname <- file.path(
@@ -268,9 +73,9 @@ get_chembl_pubchem_compound_xref <- function(datestamp = '20220906',
 
 
 ### TARGETED ANTICANCER COMPOUNDS FROM OPEN TARGETS
-get_opentargets_cancer_drugs <-
+get_otp_cancer_drugs <-
   function(path_data_raw = NULL,
-           ot_version = "2023.12"){
+           ot_version = "2024.09"){
 
     cancer_terms <- list()
     cancer_terms[['all']] <- phenOncoX::get_terms(
@@ -699,37 +504,15 @@ get_atc_drug_classification <- function(
 }
 
 
-get_fda_ndc_mapping <- function(
-  path_data_raw = NULL){
-
-  fda_ndc_fname <- file.path(
-    path_data_raw,"national_drug_code_fda","product.txt")
-
-  drug2epc <- readr::read_tsv(fda_ndc_fname, show_col_types = F) |>
-    janitor::clean_names() |>
-    ## ignore drug regimens/combos
-    dplyr::filter(!stringr::str_detect(substancename,"; ")) |>
-    ## separate entries of pharm classes
-    tidyr::separate_rows(pharm_classes, sep=", ") |>
-    dplyr::select(substancename, pharm_classes) |>
-    dplyr::filter(!is.na(pharm_classes)) |>
-    dplyr::filter(stringr::str_detect(pharm_classes," \\[EPC\\]")) |>
-    dplyr::rename(drug = substancename,
-                  fda_epc_category = pharm_classes) |>
-    dplyr::distinct()
-
-  return(drug2epc)
-}
-
 
-#### NCI THESAURUS CANCER DRUGS/TREATMENTS
+## NCI THESAURUS CANCER DRUGS/TREATMENTS
 get_nci_drugs <- function(nci_db_release = nci_db_release,
                           overwrite = F,
                           path_data_raw = NULL,
                           path_data_processed = NULL){
   nci_antineo_thesaurus <- NULL
   nci_drugs <- NULL
-
+  
   nci_ftp_base <- paste0(
     "https://evs.nci.nih.gov/ftp1/NCI_Thesaurus/archive/",
     nci_db_release,
@@ -780,75 +563,15 @@ get_nci_drugs <- function(nci_db_release = nci_db_release,
     file.path(
       path_data_processed,
       "nci_thesaurus",
-      "nci_treatment_thesaurus_antineo.rds")) | overwrite == T){
+      "nci_treatment_thesaurus_antineo_v2.rds")) | overwrite == T){
     
     sorafenib_definition <-
       'A synthetic compound targeting growth signaling and angiogenesis. Sorafenib blocks the enzyme RAF kinase, a critical component of the RAF/MEK/ERK signaling pathway that controls cell division and proliferation; in addition, sorafenib inhibits the VEGFR-2/PDGFR-beta signaling cascade, thereby blocking tumor angiogenesis.'
-
+    
     nci_display_labels <- process_nci_labels(
       path_data_raw = path_data_raw,
       overwrite = overwrite)
     
-    drug2chembl <-
-      read.table(
-        file = file.path(
-          path_data_raw,"dgidb","dgidb.202202.tsv"),
-        header=T, quote="", comment.char="", sep="\t",
-        stringsAsFactors = F) |>
-      dplyr::filter(nchar(drug_name) > 0) |>
-      dplyr::select(-drug_claim_source) |>
-      dplyr::rename(molecule_chembl_id = concept_id) |>
-      dplyr::mutate(molecule_chembl_id =
-                      stringr::str_replace(molecule_chembl_id, "chembl:","")) |>
-      dplyr::filter(!startsWith(molecule_chembl_id,"wiki")) |>
-      dplyr::distinct() |>
-      dplyr::mutate(drug_claim_name = tolower(drug_claim_name)) |>
-      dplyr::filter(!startsWith(drug_claim_name,"chembl")) |>
-      dplyr::filter(!stringr::str_detect(drug_claim_name,"^[0-9]{1,}$")) |>
-      dplyr::mutate(drug_name = tolower(drug_name)) |>
-
-      ### remove and correct wrong drug to molecule_chembl_id associations
-      dplyr::mutate(molecule_chembl_id = dplyr::if_else(
-        drug_name == "calcifediol",
-        as.character("CHEMBL1040"),
-        as.character(molecule_chembl_id)
-      )) |>
-      dplyr::mutate(molecule_chembl_id = dplyr::if_else(
-        drug_name == "leucovorin",
-        as.character("CHEMBL1040"),
-        as.character(molecule_chembl_id)
-      )) |>
-      dplyr::mutate(molecule_chembl_id = dplyr::if_else(
-        drug_name == "adl-5747",
-        as.character("CHEMBL561339"),
-        as.character(molecule_chembl_id)
-      )) |>
-      dplyr::filter(molecule_chembl_id != "CHEMBL1200796") |> #cyclophosphamide
-      dplyr::filter(molecule_chembl_id != "CHEMBL3989496") |> #tezacitabine
-      dplyr::filter(molecule_chembl_id != "CHEMBL1588") |> #thiamine
-      dplyr::filter(molecule_chembl_id != "CHEMBL1200751") |> #mercaptopurine
-      dplyr::filter(molecule_chembl_id != "CHEMBL541887") |> #irinotecan
-      dplyr::filter(molecule_chembl_id != "CHEMBL513000") |> #emetine hydrochloride
-      dplyr::filter(molecule_chembl_id != "CHEMBL3989727") |> #vitamine e
-      dplyr::filter(molecule_chembl_id != "CHEMBL1889436") |> #ouabain
-      dplyr::filter(molecule_chembl_id != "CHEMBL1201113") |> #cetirizine hydrochloride
-      dplyr::filter(molecule_chembl_id != "CHEMBL1200792") |> #fluphenazine hydrochloride
-      dplyr::filter(molecule_chembl_id != "CHEMBL935") |> #pentamidine isethionate
-      dplyr::filter(molecule_chembl_id != "CHEMBL482811") |> #u-50488 methane sulfonate
-      dplyr::distinct()
-
-    drug2chembl_all <-
-      dplyr::select(drug2chembl, molecule_chembl_id, drug_claim_name) |>
-      dplyr::rename(drug_name = drug_claim_name) |>
-      dplyr::bind_rows(dplyr::select(drug2chembl, drug_name, molecule_chembl_id)) |>
-      dplyr::filter(!stringr::str_detect(drug_name,"(^[0-9]{1,}$)|^chembl[0-9]{1,}")) |>
-      dplyr::distinct() |>
-      dplyr::bind_rows(data.frame('drug_name' = 'gemtuzumab',
-                                  'molecule_chembl_id' = 'CHEMBL2108342',
-                                  stringsAsFactors = F)) |>
-      dplyr::arrange(drug_name)
-
-
     ## Agents/compounds marked as antineplastic according to NCI
     nci_antineo_agents <-
       read.table(file = file.path(path_data_raw,"nci_thesaurus","Antineoplastic_Agent.txt"),
@@ -859,37 +582,39 @@ get_nci_drugs <- function(nci_db_release = nci_db_release,
       dplyr::select(nci_t) |>
       dplyr::mutate(antineoplastic_agent = TRUE) |>
       dplyr::distinct()
-
-
-    ## parse all entries in nci thesaurus where the semantic concept type is treatment-related
-    nci_antineo_thesaurus_raw <- as.data.frame(
+    
+    
+    ## parse all entries in nci thesaurus where the 
+    ## semantic concept type is treatment-related
+    nci_antineo_thesaurus_raw2 <- as.data.frame(
       read.table(file = file.path(path_data_raw, "nci_thesaurus", "Thesaurus.txt"),
-                                        header = F, stringsAsFactors = F, sep="\t",
-                                        comment.char="", quote = "") |>
-      dplyr::rename(nci_t = V1, nci_concept_name = V2,
-                    nci_t_parent = V3, concept_synonym = V4,
-                    nci_concept_definition = V5,
-                    nci_cd_name = V6, nci_concept_status = V7,
-                    nci_concept_semantic_type = V8) |>
-
-      dplyr::filter(
-        stringr::str_detect(
-          nci_concept_semantic_type,"Chemical|Substance|Therapeutic|Drug|Immunologic")) |>
-      dplyr::left_join(
-        nci_antineo_agents, by = c("nci_t"),
-        multiple = "all", relationship = "many-to-many") |>
-      dplyr::select(-c(nci_concept_name, nci_cd_name)) |>
-      dplyr::mutate(nci_concept_synonym_all = concept_synonym) |>
-      tidyr::separate_rows(concept_synonym,sep="\\|") |>
-      dplyr::mutate(nci_concept_synonym = tolower(concept_synonym)) |>
-      dplyr::select(-concept_synonym) |>
-      dplyr::filter(
-        nci_t != 'C147908' & nci_t != 'C71622') |> ## Hormone Therapy Agent
-      dplyr::mutate(
-        nci_concept_semantic_type =
-          dplyr::if_else(
-            nci_t == 'C61948','Pharmacologic Substance',
-            nci_concept_semantic_type)) |> ## redundant Sorafenib entries
+                 header = F, stringsAsFactors = F, sep="\t",
+                 comment.char="", quote = "") |>
+        dplyr::rename(nci_t = V1, nci_concept_name = V2,
+                      nci_t_parent = V3, concept_synonym = V4,
+                      nci_concept_definition = V5,
+                      nci_cd_name = V6, nci_concept_status = V7,
+                      nci_concept_semantic_type = V8) |>
+        
+        dplyr::filter(
+          stringr::str_detect(
+            nci_concept_semantic_type,
+            "Chemical|Substance|Therapeutic|Drug|Immunologic")) |>
+        dplyr::left_join(
+          nci_antineo_agents, by = c("nci_t"),
+          multiple = "all", relationship = "many-to-many") |>
+        dplyr::select(-c(nci_concept_name, nci_cd_name)) |>
+        dplyr::mutate(nci_concept_synonym_all = concept_synonym) |>
+        tidyr::separate_rows(concept_synonym,sep="\\|") |>
+        dplyr::mutate(nci_concept_synonym = tolower(concept_synonym)) |>
+        dplyr::select(-concept_synonym) |>
+        dplyr::filter(
+          nci_t != 'C147908' & nci_t != 'C71622') |> ## Hormone Therapy Agent
+        dplyr::mutate(
+          nci_concept_semantic_type =
+            dplyr::if_else(
+              nci_t == 'C61948','Pharmacologic Substance',
+              nci_concept_semantic_type)) |> ## redundant Sorafenib entries
         dplyr::mutate(
           nci_concept_definition =
             dplyr::if_else(
@@ -897,433 +622,63 @@ get_nci_drugs <- function(nci_db_release = nci_db_release,
               nci_concept_definition)) |> ## redundant Sorafenib entries
         dplyr::distinct() |>
         dplyr::left_join(
-        nci_display_labels, by = c("nci_t"), 
-        multiple = "all", relationship = "many-to-many") |>
-      dplyr::filter(!(nci_t == "C1806" & nci_concept_synonym == "gemtuzumab")) |>
-      dplyr::filter(!(nci_t == "C405" & nci_concept_synonym == "ctx")) |>
-
-      dplyr::left_join(
-        drug2chembl_all, 
-        by = c("nci_concept_synonym" = "drug_name"),
-        multiple = "all", relationship = "many-to-many") |>
-      dplyr::mutate(nci_db_version = nci_db_release) |>
-      dplyr::filter(!is.na(nci_cd_name)) |>
-      dplyr::filter(!stringr::str_detect(
+          nci_display_labels, by = c("nci_t"), 
+          multiple = "all", relationship = "many-to-many") |>
+        dplyr::filter(
+          !(nci_t == "C1806" & nci_concept_synonym == "gemtuzumab")) |>
+        dplyr::filter(
+          !(nci_t == "C405" & nci_concept_synonym == "ctx")) |>
+        dplyr::mutate(nci_db_version = nci_db_release) |>
+        dplyr::filter(!is.na(nci_cd_name)) |>
+        dplyr::filter(!stringr::str_detect(
           tolower(nci_concept_definition), "coronavirus")) |>
-      dplyr::filter(!stringr::str_detect(
-        nci_concept_synonym_all, 
-        "SARS-CoV-2|COVID-19|CoV-19|Coronary|Corona|Covid-19|covid-19")) |>
-      #))
-      dplyr::mutate(antineoplastic_agent = dplyr::if_else(
-        is.na(antineoplastic_agent),
-        as.logical(TRUE),
-        as.logical(antineoplastic_agent)
-      )) |>
-      dplyr::filter(!stringr::str_detect(nci_cd_name," (Gel|Oil|Cream|Seed|Block|Field|Supplement|Factor)$")) |>
-      dplyr::filter(!stringr::str_detect(nci_cd_name,"(Vaccination|Lotion|Therapeutic Heat|Procedure|Rehabilitation|Prevention|Rinse)$")) |>
-      dplyr::filter(!stringr::str_detect(nci_cd_name,"(Epitope|Exract|Influenza|Ginseng|Ointment|Management|Injection|Tool)$")) |>
-      dplyr::filter(!stringr::str_detect(nci_cd_name,"^(Vitamin A Compound|Inactivated Poliovirus|Antineoplastic Immune Cell|Topical)")) |>
-      dplyr::filter(!stringr::str_detect(nci_cd_name,"^(Sheng-Yu|Ginseng|Dry Cleaning|Boost|Tobacco|Microwave)")) |>
-      dplyr::filter(!stringr::str_detect(nci_cd_name,"^(Blood|Interruption of|Gum Arabic|Vaginal Cylinder|Laser Ablation|Wheatgrass)")) |>
-      dplyr::filter(!stringr::str_detect(nci_cd_name,"^(Chemical Challenge|Prevention of|Magic Mouthwash|Wood Dust|Soot|Cocaine)")) |>
-      dplyr::filter(!stringr::str_detect(nci_cd_name,"^(Antibody|Antigen|Antioxidant|Vaccination|Acetate|Antiserum|Asbestos|Aspirate|Autoantigen|Cytokine)$")) |>
-      dplyr::filter(!stringr::str_detect(nci_cd_name," Spray| Extract| Antidiabetic| Implant|(Green Tea|Living Healthy|Pollutant|Probe|Protective Agent|Supportive Care|Caffe)")) |>
-      dplyr::filter(
-        !stringr::str_detect(
-          tolower(nci_concept_definition),
-          "chinese |antidiabet|diabetes|antidepress|analgesic|pulmonary edema|nutritional|human carcinogen|anesthetic|nonsedating|sedative|antihyper|antiinflamma|antiarrythm|antiangin|antihist|muscle|neurotransmitter"))
-    )
-
-    #nci_antineo_thesaurus
-    nci2chembl <- as.data.frame(nci_antineo_thesaurus_raw |>
-      dplyr::select(molecule_chembl_id, nci_cd_name) |>
-      dplyr::filter(!is.na(molecule_chembl_id)) |>
-      dplyr::distinct()
+        dplyr::filter(!stringr::str_detect(
+          nci_concept_synonym_all, 
+          "SARS-CoV-2|COVID-19|CoV-19|Coronary|Corona|Covid-19|covid-19")) |>
+        #))
+        dplyr::mutate(antineoplastic_agent = dplyr::if_else(
+          is.na(antineoplastic_agent),
+          as.logical(TRUE),
+          as.logical(antineoplastic_agent)
+        )) |>
+        dplyr::filter(!stringr::str_detect(nci_cd_name," (Gel|Oil|Cream|Seed|Block|Field|Supplement|Factor)$")) |>
+        dplyr::filter(!stringr::str_detect(nci_cd_name,"(Vaccination|Lotion|Therapeutic Heat|Procedure|Rehabilitation|Prevention|Rinse)$")) |>
+        dplyr::filter(!stringr::str_detect(nci_cd_name,"(Epitope|Exract|Influenza|Ginseng|Ointment|Management|Injection|Tool)$")) |>
+        dplyr::filter(!stringr::str_detect(nci_cd_name,"^(Vitamin A Compound|Inactivated Poliovirus|Antineoplastic Immune Cell|Topical)")) |>
+        dplyr::filter(!stringr::str_detect(nci_cd_name,"^(Sheng-Yu|Ginseng|Dry Cleaning|Boost|Tobacco|Microwave)")) |>
+        dplyr::filter(!stringr::str_detect(nci_cd_name,"^(Blood|Interruption of|Gum Arabic|Vaginal Cylinder|Laser Ablation|Wheatgrass)")) |>
+        dplyr::filter(!stringr::str_detect(nci_cd_name,"^(Chemical Challenge|Prevention of|Magic Mouthwash|Wood Dust|Soot|Cocaine)")) |>
+        dplyr::filter(!stringr::str_detect(nci_cd_name,"^(Antibody|Antigen|Antioxidant|Vaccination|Acetate|Antiserum|Asbestos|Aspirate|Autoantigen|Cytokine)$")) |>
+        dplyr::filter(!stringr::str_detect(nci_cd_name," Spray| Extract| Antidiabetic| Implant|(Green Tea|Living Healthy|Pollutant|Probe|Protective Agent|Supportive Care|Caffe)"))
+        #dplyr::filter(
+        #  !stringr::str_detect(
+        #    tolower(nci_concept_definition),
+        #    "chinese |antidiabet|diabetes|antidepress|analgesic|pulmonary edema|nutritional|human carcinogen|anesthetic|nonsedating|sedative|antihyper|antiinflamma|antiarrythm|antiangin|antihist|muscle|neurotransmitter"))
     )
-
-    j <- 1
-    nci2chembl_dict <- list()
-    while(j <= nrow(nci2chembl)){
-      nci2chembl_dict[[nci2chembl[j,"nci_cd_name"]]] <-
-        nci2chembl[j,"molecule_chembl_id"]
-      j <- j + 1
-    }
-
-    i <- 1
-    while(i <= nrow(nci_antineo_thesaurus_raw)){
-      nci_cd_name <-
-        nci_antineo_thesaurus_raw[i,"nci_cd_name"]
-      if(nci_cd_name %in% names(nci2chembl_dict)){
-        nci_antineo_thesaurus_raw[i,"molecule_chembl_id"] <-
-          nci2chembl_dict[[nci_cd_name]]
-      }
-      i <- i + 1
-    }
-
-    nci_antineo_thesaurus <- nci_antineo_thesaurus_raw |>
-      dplyr::filter(!(molecule_chembl_id == "CHEMBL1569487" & nci_t == "C405")) |>
-      dplyr::mutate(molecule_chembl_id = dplyr::if_else(
-        nci_cd_name == "Sorafenib Tosylate", "CHEMBL1200485", as.character(molecule_chembl_id))) |>
-      dplyr::mutate(molecule_chembl_id = dplyr::if_else(
-        nci_cd_name == "Leucovorin", "CHEMBL1679", as.character(molecule_chembl_id))) |>
-      dplyr::mutate(molecule_chembl_id = dplyr::if_else(
-        nci_cd_name == "Masoprocol", "CHEMBL313972", as.character(molecule_chembl_id))) |>
-      dplyr::mutate(molecule_chembl_id = dplyr::if_else(
-        nci_cd_name == "7-Hydroxystaurosporine", "CHEMBL1236539", as.character(molecule_chembl_id))) |>
-      dplyr::mutate(molecule_chembl_id = dplyr::if_else(
-        nci_cd_name == "Dromostanolone", "CHEMBL1201048", as.character(molecule_chembl_id))) |>
-      dplyr::mutate(molecule_chembl_id = dplyr::if_else(
-        nci_cd_name == "Patritumab", "CHEMBL2109406", as.character(molecule_chembl_id))) |>
-      dplyr::mutate(molecule_chembl_id = dplyr::if_else(
-        nci_cd_name == "Rivoceranib", "CHEMBL3186534", as.character(molecule_chembl_id))) |>
-      dplyr::mutate(molecule_chembl_id = dplyr::if_else(
-        nci_cd_name == "Rivoceranib Mesylate", "CHEMBL3545414", as.character(molecule_chembl_id))) |>
-      dplyr::mutate(molecule_chembl_id = dplyr::if_else(
-        nci_cd_name == "Tricribine Phosphate", "CHEMBL462018", as.character(molecule_chembl_id))) |>
-      dplyr::mutate(molecule_chembl_id = dplyr::if_else(
-        nci_cd_name == "Umbrasilib" |
-          nci_cd_name == "Umbrasilib Tosylate", "CHEMBL3948730",
-        as.character(molecule_chembl_id))) |>
-      dplyr::filter(
-        !(stringr::str_detect(
-          nci_cd_name,
-          "^Antineoplast(on|ic) Agent|^Support| Gel | Ointment|Caffeine|^Acetate$")))
-      #CHEMBL3545055
-
+    
+    
+    nci_antineo_thesaurus <- nci_antineo_thesaurus_raw
+    
     ## add apatinib as an alias (is missing in NCI thesaurus)
     apatinib_alias_entry <- nci_antineo_thesaurus |>
       dplyr::filter(nci_concept_synonym == "rivoceranib") |>
       dplyr::mutate(nci_concept_synonym = "apatinib") |>
       dplyr::mutate(nci_concept_synonym_all =
                       paste(nci_concept_synonym_all,"Apatinib",sep="|"))
-
+    
     nci_antineo_thesaurus <- nci_antineo_thesaurus |>
       dplyr::bind_rows(apatinib_alias_entry) |>
-      dplyr::arrange(nci_cd_name)
-
-    nci_with_chembl <- nci_antineo_thesaurus |>
-      dplyr::filter(!is.na(molecule_chembl_id)) |>
-      dplyr::select(nci_t, molecule_chembl_id) |>
-      dplyr::distinct()
-
-    nci_2 <- nci_antineo_thesaurus |>
-      dplyr::filter(is.na(molecule_chembl_id)) |>
-      dplyr::select(-molecule_chembl_id) |>
-      dplyr::left_join(
-        nci_with_chembl, by = c("nci_t"),
-        multiple = "all", 
-        relationship = "many-to-many")
-
-    nci_antineo_thesaurus <- nci_antineo_thesaurus |>
-      dplyr::filter(!is.na(molecule_chembl_id)) |>
-      dplyr::bind_rows(nci_2) |>
+      dplyr::arrange(nci_cd_name) |>
       dplyr::rename(nci_drug_name = nci_concept_synonym) |>
       dplyr::filter(!(nci_cd_name == "Sorafenib Tosylate" &
                         nci_drug_name == "sorafenib")) |>
       dplyr::select(-c(nci_t_parent,cui)) |>
-      dplyr::distinct() |>
-      ## Removing duplicate/erroneous NCI/CHEMBL cross-ref identifiers
-      dplyr::filter(
-        !((nci_t == "C137804" | nci_t == "C137803") &
-                        molecule_chembl_id == "CHEMBL1201567")) |> #FILGRASTIM
-      dplyr::filter(
-        !(nci_t == "C88325" & molecule_chembl_id == "CHEMBL2109653")) |> #BERMEKIMAB
-      dplyr::filter(
-        !(nci_t == "C2602" & molecule_chembl_id == "CHEMBL513")) |> #CARMUSTINE
-      dplyr::filter(
-        !(nci_t == "C80867" & molecule_chembl_id == "CHEMBL491473")) |> #CEDIRANIB
-      dplyr::filter(
-        !(nci_t == "C225" & molecule_chembl_id == "CHEMBL1201577")) |> #CETUXIMAB
-      dplyr::filter(
-        !(nci_t == "C2213" & molecule_chembl_id == "CHEMBL178")) |> #DAUNORUBICIN
-      dplyr::filter(
-        !(nci_t == "C128039" & molecule_chembl_id == "CHEMBL3137331")) |> #DEFACTINIB
-      dplyr::filter(
-        !(nci_t == "C62435" & molecule_chembl_id == "CHEMBL522892")) |> #DOVITINIB
-      dplyr::filter(
-        !(nci_t == "C2693" & molecule_chembl_id == "CHEMBL553")) |> #ERLOTINIB
-      dplyr::filter(
-        !(nci_t == "C478" & molecule_chembl_id == "CHEMBL135")) |> #ESTRADIOL
-      dplyr::filter(
-        !(nci_t == "C1687" & molecule_chembl_id == "CHEMBL941")) |> #IMATINIB
-      dplyr::filter(
-        !(nci_t == "C29165" & molecule_chembl_id == "CHEMBL191")) |> #LOSARTAN
-      dplyr::filter(
-        !(nci_t == "C1561" & molecule_chembl_id == "CHEMBL2109447")) |> #LYM-1
-      dplyr::filter(
-        !(nci_t == "C1155" & molecule_chembl_id == "CHEMBL717")) |> #MEDROXYPROGESTERONE ACETATE
-      dplyr::filter(
-        !(nci_t == "C9678" & molecule_chembl_id == "CHEMBL1456")) |> #MYCOPHENOLATE MOFETIL
-      dplyr::filter(
-        !(nci_t == "C20513" & molecule_chembl_id == "CHEMBL1201573")) |> #OPRELVEKIN
-      dplyr::filter(
-        !(nci_t == "C95230" & molecule_chembl_id == "CHEMBL1201421")) |> #PEGAPTANIB SODIUM
-      dplyr::filter(
-        !(nci_t == "C176878" & molecule_chembl_id == "CHEMBL3545154")) |> #POZIOTINIB
-      dplyr::filter(
-        !(nci_t == "C2297" & molecule_chembl_id == "CHEMBL103")) |> #PROGESTERONE
-      dplyr::filter(
-        !(nci_t == "C148170" & molecule_chembl_id == "CHEMBL225071")) |> #RALTITREXED
-      dplyr::filter(
-        !(nci_t == "C66506" & molecule_chembl_id == "CHEMBL1790041")) |> #RANITIDINE
-      dplyr::filter(
-        !(nci_t == "C82693" & molecule_chembl_id == "CHEMBL1738757")) |> #REBASTINIB
-      dplyr::filter(
-        !(nci_t == "C1492" & molecule_chembl_id == "CHEMBL1201670")) |> #SARGRAMOSTIM
-      dplyr::filter(
-        !(nci_t == "C88337" & molecule_chembl_id == "CHEMBL2105737")) |> #SONIDEGIB
-      dplyr::filter(
-        !(nci_t == "C80631" & molecule_chembl_id == "CHEMBL565612")) |> #SOTRASTAURIN
-      dplyr::filter(
-        !(nci_t == "C104057" & molecule_chembl_id == "CHEMBL2105694")) |> #TELAPRISTONE ACETATE
-      dplyr::filter(
-        !(nci_t == "C29523" & molecule_chembl_id == "CHEMBL1201334")) |> #TRIPTORELIN
-      dplyr::filter(
-        !(nci_t == "C80049" & molecule_chembl_id == "CHEMBL3545218")) |> #VORUCICLIB
-      dplyr::filter(
-        !(nci_t == "C133021" & molecule_chembl_id == "CHEMBL3188386")) #WNT-974
-
-
-
-    i <- 1
-    nci_compounds_no_chembl <- as.data.frame(
-      nci_antineo_thesaurus |>
-      dplyr::select(nci_cd_name, molecule_chembl_id) |>
-      dplyr::distinct() |>
-      dplyr::filter(!stringr::str_detect(tolower(nci_cd_name),
-                                         "( vaccine)|^[0-9]")) |>
-      dplyr::filter(is.na(molecule_chembl_id)) |>
-      dplyr::select(nci_cd_name) |>
-      dplyr::mutate(num_spaces = stringr::str_count(nci_cd_name," ")) |>
-      dplyr::filter(
-        num_spaces <= 1 & !stringr::str_detect(
-          tolower(nci_cd_name),"regimen|&|/|;|,")) |>
-      dplyr::filter(
-        stringr::str_detect(
-          nci_cd_name,
-          "(mab|cin|ide|ib|im|bine|tin|om|lin|stat|one|ate|ole|ane|ine|xel|rol)$")) |>
-      dplyr::distinct()
-    )
-
-    nci_compounds_chembl_match <- data.frame()
-
-    ## Retrieve aliases for drugs with PubChem x-refs
-    pubchem_synonym_files <-
-      sort(list.files(path = file.path(here::here(), "data-raw","pubchem"),
-                      pattern = "CID-Synonym-filtered_",
-                      full.names = T))
-
-    lgr::lgr$info("Mapping ChEMBL identifiers for NCI compounds")
-    i <- 1
-    for(f in pubchem_synonym_files){
-      lgr::lgr$info(paste0("Mapping iteration..", i))
-      synonym_data <- as.data.frame(readr::read_tsv(
-        f, col_names = c('pubchem_cid','alias'),
-        col_types = "dc",
-        progress = F
-      ))
-
-      chembl2pubchem <- synonym_data |>
-        dplyr::filter(stringr::str_detect(
-          alias, "^CHEMBL")) |>
-        dplyr::rename(molecule_chembl_id = alias) |>
-        dplyr::mutate(pubchem_cid = as.integer(pubchem_cid))
-
-      chembl2alias <- synonym_data |>
-        dplyr::filter(!stringr::str_detect(
-          alias, "^CHEMBL")) |>
-        dplyr::mutate(pubchem_cid = as.integer(pubchem_cid)) |>
-        dplyr::mutate(alias = tolower(alias))
-
-      hits <- nci_compounds_no_chembl |>
-        dplyr::mutate(nci_cd_name_lc =
-                        tolower(nci_cd_name)) |>
-        dplyr::inner_join(
-          chembl2alias, 
-          by = c("nci_cd_name_lc" = "alias"),
-          multiple = "all", relationship = "many-to-many")
-
-
-      rm(chembl2alias)
-
-      if(nrow(hits) > 0){
-        hits <- hits |>
-          dplyr::inner_join(
-            chembl2pubchem, 
-            by = "pubchem_cid", 
-            multiple = "all",
-            relationship = "many-to-many") |>
-          dplyr::select(nci_cd_name,
-                        molecule_chembl_id)
-
-        lgr::lgr$info(paste0("Found ", nrow(hits), " ChEMBL identifiers"))
-
-        nci_compounds_chembl_match <- nci_compounds_chembl_match |>
-          dplyr::bind_rows(hits)
-      }
-      rm(chembl2pubchem)
-
-      i <- i + 1
-    }
-
-    nci_compounds_chembl_match_unique <- nci_compounds_chembl_match |>
-      dplyr::group_by(nci_cd_name) |>
-      dplyr::summarise(
-        n_identifiers = dplyr::n(),
-        molecule_chembl_id = paste(unique(molecule_chembl_id), collapse="&"),
-        .groups = "drop") |>
-      dplyr::mutate(molecule_chembl_id = dplyr::if_else(
-        nci_cd_name == "Goserelin Acetate",
-        "CHEMBL1200501",
-        as.character(molecule_chembl_id))) |>
-      dplyr::mutate(molecule_chembl_id = dplyr::if_else(
-        nci_cd_name == "Abacavir Sulfate",
-        "CHEMBL1200666",
-        as.character(molecule_chembl_id))) |>
-      dplyr::mutate(molecule_chembl_id = dplyr::if_else(
-        nci_cd_name == "Roniciclib",
-        "CHEMBL4442620",
-        as.character(molecule_chembl_id))) |>
-      dplyr::filter(!stringr::str_detect(molecule_chembl_id,"&")) |>
-      dplyr::select(-n_identifiers)
-
-
-    # i <- 1
-    # while(i <= nrow(nci_compounds_no_chembl)){
-    #   name <- nci_compounds_no_chembl[i, "nci_cd_name"]
-    #   chembl_hit <- get_chembl_compound_by_name(name)
-    #   if(!is.null(chembl_hit)){
-    #     chembl_hit <- chembl_hit |>
-    #       dplyr::rename(nci_cd_name = name)
-    #     nci_compounds_chembl_match <- nci_compounds_chembl_match |>
-    #       dplyr::bind_rows(chembl_hit)
-    #
-    #   }
-    #   if(i %% 10 == 0){
-    #     lgr::lgr$info("Done with querying ChEMBL for ", i, " compound names")
-    #   }
-    #   i <- i + 1
-    # }
-
-    nci_antineo_thesaurus_chembl <- nci_antineo_thesaurus |>
-      dplyr::anti_join(nci_compounds_no_chembl,
-                       by = "nci_cd_name")
-
-    nci_antineo_thesaurus_no_chembl <- nci_antineo_thesaurus |>
-      dplyr::inner_join(nci_compounds_no_chembl,
-                        by = "nci_cd_name", 
-                        multiple = "all", relationship = "many-to-many") |>
-      dplyr::select(-c(num_spaces, molecule_chembl_id)) |>
-      dplyr::left_join(
-        nci_compounds_chembl_match_unique,
-        by = "nci_cd_name", 
-        multiple = "all", 
-        relationship = "many-to-many")
-    
-    nci_antineo_thesaurus <-
-      nci_antineo_thesaurus_chembl |>
-      dplyr::bind_rows(nci_antineo_thesaurus_no_chembl) |>
-      dplyr::arrange(nci_cd_name) |>
-      dplyr::distinct() |>
-      dplyr::mutate(molecule_chembl_id = dplyr::if_else(
-        nci_cd_name == "Vorolanib",
-        "CHEMBL3545427",
-        as.character(molecule_chembl_id))) |>
-      dplyr::mutate(molecule_chembl_id = dplyr::if_else(
-        nci_cd_name == "Gemtuzumab Ozogamicin",
-        "CHEMBL1201506",
-        as.character(molecule_chembl_id))) |>
-      dplyr::mutate(molecule_chembl_id = dplyr::if_else(
-        nci_cd_name == "Ublituximab",
-        "CHEMBL2108354",
-        as.character(molecule_chembl_id))) |>
-      dplyr::mutate(molecule_chembl_id = dplyr::if_else(
-        nci_cd_name == "Etoposide",
-        "CHEMBL44657",
-        as.character(molecule_chembl_id))) |>
-      dplyr::mutate(molecule_chembl_id = dplyr::if_else(
-        nci_cd_name == "Lorvotuzumab Mertansine",
-        "CHEMBL1743037",
-        as.character(molecule_chembl_id))) |>
-      dplyr::mutate(molecule_chembl_id = dplyr::if_else(
-        nci_cd_name == "Epratuzumab",
-        "CHEMBL2108404",
-        as.character(molecule_chembl_id))) |>
-      dplyr::mutate(molecule_chembl_id = dplyr::if_else(
-        nci_cd_name == "Ocaratuzumab",
-        "CHEMBL2109665",
-        as.character(molecule_chembl_id))) |>
-      dplyr::mutate(molecule_chembl_id = dplyr::if_else(
-        nci_cd_name == "Medroxyprogesterone Acetate",
-        "CHEMBL717",
-        as.character(molecule_chembl_id))) |>
-      dplyr::mutate(molecule_chembl_id = dplyr::if_else(
-        nci_cd_name == "Brigatinib",
-        "CHEMBL3545311",
-        as.character(molecule_chembl_id))) |>
-      dplyr::mutate(molecule_chembl_id = dplyr::if_else(
-        nci_cd_name == "Brentuximab",
-        "CHEMBL1742994",
-        as.character(molecule_chembl_id))) |>
-      dplyr::mutate(molecule_chembl_id = dplyr::if_else(
-        nci_cd_name == "Afatinib",
-        "CHEMBL1173655",
-        as.character(molecule_chembl_id))) |>
-      dplyr::mutate(molecule_chembl_id = dplyr::if_else(
-        nci_cd_name == "Spebrutinib",
-        "CHEMBL3301625",
-        as.character(molecule_chembl_id))) |>
-      dplyr::mutate(molecule_chembl_id = dplyr::if_else(
-        nci_cd_name == "Dacomitinib",
-        "CHEMBL2110732",
-        as.character(molecule_chembl_id))) |>
-      dplyr::mutate(molecule_chembl_id = dplyr::if_else(
-        nci_cd_name == "Lucitanib",
-        "CHEMBL2220486",
-        as.character(molecule_chembl_id))) |>
-      dplyr::mutate(molecule_chembl_id = dplyr::if_else(
-        nci_cd_name == "Tofacitinib",
-        "CHEMBL221959",
-        as.character(molecule_chembl_id))) |>
-      dplyr::mutate(molecule_chembl_id = dplyr::if_else(
-        nci_cd_name == "Mifamurtide",
-        "CHEMBL2111100",
-        as.character(molecule_chembl_id))) |>
-      dplyr::mutate(molecule_chembl_id = dplyr::if_else(
-        nci_cd_name == "Umbralisib",
-        "CHEMBL3948730",
-        as.character(molecule_chembl_id))) |>
-      dplyr::mutate(molecule_chembl_id = dplyr::if_else(
-        nci_cd_name == "Onvansertib",
-        "CHEMBL1738758",
-        as.character(molecule_chembl_id))) |>
-      dplyr::mutate(molecule_chembl_id = dplyr::if_else(
-        nci_cd_name == "Sulindac",
-        "CHEMBL15770",
-        as.character(molecule_chembl_id))) |>
-      dplyr::mutate(molecule_chembl_id = dplyr::if_else(
-        nci_cd_name == "Calcipotriene",
-        "CHEMBL1200666",
-        as.character(molecule_chembl_id))) |>
       dplyr::distinct()
-
-
+     
     nci_drugs <- list()
-    ## NCI anticancer drugs (targeted) - including compound identifier (CHEMBL)
-    nci_drugs[['with_chembl_id']] <- nci_antineo_thesaurus |>
-      dplyr::select(nci_t,
-                    nci_concept_definition,
-                    nci_cd_name,
-                    molecule_chembl_id,
-                    nci_drug_name,
-                    nci_concept_synonym_all) |>
-      dplyr::filter(!is.na(molecule_chembl_id)) |>
-      #dplyr::mutate(nci_drug_name_lc = tolower(nci_drug_name)) |>
-      dplyr::distinct()
-
+    
     ## NCI anticancer drugs (non-targeted) - lacking compound identifier (CHEMBL)
     nci_drugs[['no_chembl_id']] <- nci_antineo_thesaurus |>
-      dplyr::filter(is.na(molecule_chembl_id)) |>
       dplyr::select(nci_t,
                     nci_concept_definition,
                     nci_cd_name,
@@ -1348,16 +703,16 @@ get_nci_drugs <- function(nci_db_release = nci_db_release,
         )) |>
       dplyr::select(-num_words) |>
       dplyr::distinct()
-
+    
     saveRDS(nci_drugs, file = file.path(
       path_data_processed,  "nci_thesaurus",
-      "nci_treatment_thesaurus_antineo.rds"))
-
+      "nci_treatment_thesaurus_antineo_v2.rds"))
+    
   }else{
     nci_drugs <- readRDS(
       file = file.path(
         path_data_processed, "nci_thesaurus",
-        "nci_treatment_thesaurus_antineo.rds"))
+        "nci_treatment_thesaurus_antineo_v2.rds"))
   }
   return(nci_drugs)
 }
@@ -1367,177 +722,82 @@ merge_nci_opentargets <- function(
     drug_sets = NULL,
     path_data_raw = NULL){
   
-  ot_nci_matched <- list()
-
-  ## X-ref Open Targets and NCI by molecule id
-  ot_nci_matched[['targeted_by_id']] <- drug_sets[['otp']] |>
-    dplyr::left_join(
-      drug_sets[['nci']][['with_chembl_id']], 
-      by = c("molecule_chembl_id"), 
-      multiple = "all", relationship = "many-to-many") |>
-    dplyr::filter(!is.na(nci_drug_name)) |>
-    dplyr::select(-c(nci_drug_name))
-
-  ## X-ref Open Targets and NCI (with molecule ID) by drug name
-  ot_nci_matched[['targeted_by_name']] <- drug_sets[['otp']] |>
+  
+  ## X-ref Open Targets and NCI by drug name
+  otp_drugs_all <- drug_sets[['otp']] |>
     dplyr::mutate(drug_name_lc = tolower(drug_name)) |>
     dplyr::left_join(
-      dplyr::select(
-        drug_sets[['nci']][['with_chembl_id']],
-                    -molecule_chembl_id),
+        drug_sets[['nci']][['no_chembl_id']],
       by = c("drug_name_lc" = "nci_drug_name"),
       multiple = "all", relationship = "many-to-many") |>
-    dplyr::anti_join(
-      ot_nci_matched[['targeted_by_id']], by = "nci_cd_name") |>
-    dplyr::filter(!is.na(drug_name_lc)) |>
-    dplyr::select(-c(drug_name_lc))
-
-  ot_nci_matched_all <- do.call(rbind, ot_nci_matched) |>
+    dplyr::select(-c(drug_name_lc)) |>
+    dplyr::distinct() |>
+    dplyr::mutate(opentargets = TRUE)
+  
+  ## list all drug aliases in targeted drugs from OTP
+  all_drug_aliases <- otp_drugs_all |>
+    tidyr::separate_rows(drug_synonyms, sep = "\\|") |>
+    dplyr::select(drug_synonyms) |>
+    dplyr::mutate(nci_drug_name = tolower(drug_synonyms)) |>
     dplyr::distinct()
-
-  ot_targeted_remain <- drug_sets[['otp']] |>
-    dplyr::anti_join(
-      ot_nci_matched_all, 
-      by = c("target_symbol","molecule_chembl_id",
-             "disease_efo_label","drug_name",
-             "drug_clinical_source",
-             "drug_clinical_id")) |>
-    dplyr::mutate(drug_name_lc = tolower(drug_name)) |>
-    dplyr::left_join(
-      drug_sets[['nci']][['no_chembl_id']],
-      by = c("drug_name_lc" = "nci_drug_name"),
-      multiple = "all", relationship = "many-to-many") |>
-    dplyr::select(-drug_name_lc)
-
-  ot_drugs_all <- ot_nci_matched_all |>
-    dplyr::bind_rows(ot_targeted_remain) |>
+  
+  ## Identify drugs in NCI that are not in Open Targets
+  found_drugs <- drug_sets[['nci']][['no_chembl_id']] |>
+    dplyr::semi_join(all_drug_aliases,
+                     by = "nci_drug_name") |>
+    dplyr::select(nci_t) |>
+    dplyr::distinct()
+  
+  nci_missing <- drug_sets[['nci']][['no_chembl_id']] |>
+    dplyr::anti_join(found_drugs,
+                     by = "nci_t") |>
+    dplyr::select(-nci_drug_name) |>
+    dplyr::mutate(opentargets = F) |>
+    dplyr::distinct()
+  
+  ## ignore some drugs
+  custom_name_ignore <- readr::read_tsv(
+    file = "data-raw/drug_names_ignore.tsv",
+    col_names = F, show_col_types = F)
+  colnames(custom_name_ignore) <- c("nci_cd_name")
+  
+  all_cancer_drugs <- otp_drugs_all |>
+    dplyr::bind_rows(nci_missing) |>
+    dplyr::distinct() |>
     dplyr::mutate(
       nci_cd_name =
-        dplyr::if_else(is.na(nci_cd_name) &
-                         !stringr::str_detect(drug_name,"[0-9]"),
-                       Hmisc::capitalize(tolower(drug_name)),
-                       nci_cd_name)) |>
+        dplyr::if_else(
+          is.na(nci_cd_name) &
+            !stringr::str_detect(drug_name,"[0-9]"),
+          Hmisc::capitalize(tolower(drug_name)),
+          nci_cd_name)) |>
     dplyr::mutate(
       nci_cd_name = dplyr::if_else(
         is.na(nci_cd_name) &
           stringr::str_detect(drug_name,"[0-9]"),
         drug_name,
         nci_cd_name)) |>
-    dplyr::mutate(opentargets = TRUE)
-
-  nci_missing_1 <- drug_sets[['nci']][['no_chembl_id']] |>
-    dplyr::anti_join(ot_drugs_all,
-                     by = "nci_cd_name") |>
-    dplyr::select(-nci_drug_name)
-
-  nci_missing_2 <- drug_sets[['nci']][['with_chembl_id']] |>
-    dplyr::anti_join(ot_drugs_all,
-                     by = "molecule_chembl_id") |>
-    dplyr::select(-nci_drug_name)
-
-  nci_missing <- dplyr::bind_rows(
-    nci_missing_1,
-    nci_missing_2) |>
-    dplyr::distinct() |>
-    dplyr::mutate(opentargets = FALSE)
-  
-  rm(nci_missing_1)
-  rm(nci_missing_2)
-  
-  
-  ## do not include NCI records with a similar drug name
-  nci_missing$drug_name_lc <- tolower(nci_missing$nci_cd_name)
-  
-  ot_drugs_all$drug_name_lc1 <- tolower(ot_drugs_all$drug_name)
-  ot_drugs_all$drug_name_lc2 <- tolower(ot_drugs_all$nci_cd_name)
-  
-  
-  nci_missing_final <- nci_missing |>
-    dplyr::select(drug_name_lc) |>
-    dplyr::anti_join(
-      ot_drugs_all, 
-      by = c("drug_name_lc" = "drug_name_lc1")) |>
+    dplyr::mutate(drug_name = dplyr::if_else(
+      is.na(drug_name),
+      nci_cd_name,
+      drug_name)) |>
     dplyr::anti_join(
-      ot_drugs_all, 
-      by = c("drug_name_lc" = "drug_name_lc2")) |>
-    dplyr::inner_join(
-      nci_missing, by = "drug_name_lc",
-      multiple = "all", 
-      relationship = "many-to-many") |>
-    dplyr::select(-drug_name_lc)
+      custom_name_ignore,
+      by = "nci_cd_name")
   
-  all_cancer_drugs <- ot_drugs_all |>
-    dplyr::select(-c(drug_name_lc1, drug_name_lc2)) |>
-    dplyr::bind_rows(nci_missing_final) |>
-    dplyr::distinct()
-
-
-  ## Figure out cases where a single drug maps to multiple
-  ## molecule chembl identifiers
   rownames(all_cancer_drugs) <- NULL
-
-  custom_chembl_map <- readr::read_tsv(
-    file = "data-raw/custom_chembl_map.tsv",
-    col_names = F, show_col_types = F)
-
-  colnames(custom_chembl_map) <-
-    c("nci_cd_name", "molecule_chembl_id")
-
-  custom_name_ignore <- readr::read_tsv(
-      file = "data-raw/drug_names_ignore.tsv",
-    col_names = F, show_col_types = F)
-  colnames(custom_name_ignore) <- c("nci_cd_name")
-
-  name2chembl_id <- all_cancer_drugs |>
-    dplyr::filter(!is.na(molecule_chembl_id)) |>
-    dplyr::group_by(nci_cd_name) |>
-    dplyr::summarise(m = paste(
-      sort(unique(molecule_chembl_id)), collapse=";"))
-
-  ## Drugs that map to a single identifier
-  name2chembl_unique <- name2chembl_id |>
-    dplyr::filter(!stringr::str_detect(m,";")) |>
-    dplyr::inner_join(
-      all_cancer_drugs, 
-      by = "nci_cd_name",
-      multiple = "all", 
-      relationship = "many-to-many") |>
-    dplyr::select(-m)
-
-  ## Drugs that map to multiple identifiers
-  name2chembl_ambiguous_curated <- name2chembl_id |>
-    dplyr::filter(stringr::str_detect(m, ";")) |>
-    dplyr::inner_join(
-      custom_chembl_map, 
-      by = "nci_cd_name",
-      multiple = "all", 
-      relationship = "many-to-many") |>
-    dplyr::inner_join(
-      all_cancer_drugs, 
-      by = c("nci_cd_name","molecule_chembl_id"),
-      multiple = "all", 
-      relationship = "many-to-many") |>
-    dplyr::select(-m)
-
-  all_drugs <- dplyr::bind_rows(
-    dplyr::filter(
-      all_cancer_drugs, is.na(molecule_chembl_id)),
-    name2chembl_unique,
-    name2chembl_ambiguous_curated
-  )
-
   
-  salt_patterns <- 
-      readr::read_tsv(
+  salt_patterns <-
+    readr::read_tsv(
       file.path(path_data_raw, "salts.tsv"),
       show_col_types = F, col_names = F)
-  
+
   salt_patterns_regex <- paste0(
     "( (",
     paste(salt_patterns$X1, collapse="|"),
     "))$")
   
-  salt_forms <- all_drugs |>
+  salt_forms <- all_cancer_drugs |>
     dplyr::filter(
       stringr::str_detect(
         tolower(nci_cd_name), 
@@ -1551,14 +811,14 @@ merge_nci_opentargets <- function(
     dplyr::distinct() |>
     dplyr::mutate(is_salt = T) |>
     dplyr::inner_join(
-      dplyr::select(all_drugs, nci_cd_name),
+      dplyr::select(all_cancer_drugs, nci_cd_name),
       by = c("tradename" = "nci_cd_name"),
       multiple = "all", relationship = "many-to-many") |>
     dplyr::distinct() |>
     dplyr::select(-tradename)
-
-
-  all_drugs_final <- all_drugs |>
+  
+  
+  all_cancer_drugs <- all_cancer_drugs |>
     dplyr::left_join(
       salt_forms, 
       by = "nci_cd_name",
@@ -1569,18 +829,16 @@ merge_nci_opentargets <- function(
       as.logical(FALSE),
       as.logical(is_salt)
     )) |>
-    dplyr::anti_join(
-      custom_name_ignore,
-      by = "nci_cd_name")
-    
-
+    dplyr::distinct()
+  
+  
   ## antibody drug conjugates
-  adc_candidates <- all_drugs_final |>
+  adc_candidates <- all_cancer_drugs |>
     dplyr::filter(
       (!is.na(nci_cd_name) &
-        stringr::str_detect(tolower(nci_cd_name), "mab ")) |
-      stringr::str_detect(
-        nci_concept_definition, "ADC|antibody(-| )drug conjugate")) |>
+         stringr::str_detect(tolower(nci_cd_name), "mab ")) |
+        stringr::str_detect(
+          nci_concept_definition, "ADC|antibody(-| )drug conjugate")) |>
     dplyr::mutate(is_adc = TRUE) |>
     dplyr::mutate(is_adc = dplyr::if_else(
       stringr::str_detect(nci_concept_definition, "ADCC") &
@@ -1595,7 +853,7 @@ merge_nci_opentargets <- function(
     )) |>
     dplyr::mutate(is_adc = dplyr::if_else(
       is_adc == F &
-      !is.na(nci_cd_name) &
+        !is.na(nci_cd_name) &
         stringr::str_detect(
           tolower(nci_cd_name), "mab ") &
         stringr::str_detect(
@@ -1606,8 +864,8 @@ merge_nci_opentargets <- function(
     )) |>
     dplyr::select(nci_cd_name, is_adc) |>
     dplyr::distinct()
-
-  all_drugs_final <- all_drugs_final |>
+  
+  all_cancer_drugs <- all_cancer_drugs |>
     dplyr::left_join(
       adc_candidates, 
       by = "nci_cd_name",
@@ -1644,10 +902,12 @@ merge_nci_opentargets <- function(
       as.character(drug_cancer_relevance)
     )) |>
     dplyr::distinct()
+  
+  return(all_cancer_drugs)
+  
+}
 
-  return(all_drugs_final)
 
-}
 
 map_curated_targets <- function(gene_info = NULL,
                                    path_data_raw = NULL,
@@ -1777,16 +1037,6 @@ map_curated_targets <- function(gene_info = NULL,
       nci_concept_definition, 
       "(A|a)ntibody(-| )drug conjugate \\(ADC\\)"
     )) |>
-    
-    ## filter for the presence of gene symbols in name or concept definition
-    # dplyr::filter(
-    #   stringr::str_detect(
-    #     nci_cd_name, trialOncoX::tox_int_data$regex_patterns$variant[51,]$regex) |
-    #     stringr::str_detect(
-    #       nci_concept_definition,
-    #     trialOncoX::tox_int_data$regex_patterns$variant[51,]$regex
-    #     )
-    # ) |>
     dplyr::filter(!stringr::str_detect(
       tolower(nci_cd_name),"^(allogeneic|regimen |copper |fluorine f |indium |iodine |carbon c|autologous |recombinant |lutetium |yttrium |y 90)|vaccine$"
     )) |>
@@ -1837,449 +1087,10 @@ map_curated_targets <- function(gene_info = NULL,
       as.character(drug_action_type)
     ))
 
-  return(ot_nci_drugs_curated)
+  return(list('curated' = ot_nci_drugs_curated,
+              'nonmapped' = inhibitors_no_target_nonmapped))
 }
 
-# assign_drug_category <- function(drug_df = NULL,
-#                                  path_data_raw = NULL){
-#   
-#   
-#   atc_classification <- 
-#     get_atc_drug_classification(path_data_raw = path_data_raw)
-# 
-#   drug_df <- drug_df |>
-#     dplyr::distinct() |>
-#     dplyr::mutate(antimetabolite = dplyr::if_else(
-#       !is.na(nci_concept_definition) &
-#         stringr::str_detect(
-#           tolower(nci_concept_definition),
-#           "antimetabol|anti-metabol|nucleoside analog"),TRUE,FALSE)
-#     ) |>
-#     dplyr::mutate(iap_inhibitor = dplyr::if_else(
-#       !is.na(target_symbol) &
-#         stringr::str_detect(
-#           target_symbol,
-#           "^(BIRC|XIAP)"),TRUE,FALSE)
-#     ) |>
-#     dplyr::mutate(topoisomerase_inhibitor = dplyr::if_else(
-#       (!is.na(nci_concept_definition) &
-#          stringr::str_detect(
-#            nci_concept_definition,
-#            "(T|t)opoisomerase II-mediated|(T|t)opoisomerase( I|II )? \\(.*\\) inhibitor|inhibit(ion|or) of (T|t)opoisomerase|(stabilizes|interrupts|binds to|interacts with|inhibits( the activity of)?)( the)?( DNA)? (t|T)opoisomerase|(T|t)opoisomerase( (I|II))? inhibitor")) |
-#         (!is.na(target_genename) &
-#            stringr::str_detect(target_genename,"topoisomerase")),TRUE,FALSE)
-#     ) |>
-#     dplyr::mutate(hedgehog_antagonist = dplyr::if_else(
-#       (!is.na(nci_concept_definition) &
-#          stringr::str_detect(
-#            nci_concept_definition,
-#            "Hedgehog") & stringr::str_detect(
-#              nci_cd_name,"Smoothened Antagonist|(ate|ib)$")) |
-#         (!is.na(nci_cd_name) &
-#            stringr::str_detect(
-#              nci_cd_name,"Hedgehog Inhibitor|SMO Protein Inhibitor")),
-#       TRUE,FALSE)
-#     ) |>
-#     dplyr::mutate(hdac_inhibitor = dplyr::if_else(
-#       (!is.na(target_symbol) &
-#          stringr::str_detect(
-#            target_symbol,
-#            "^HDAC")) |
-#         (!is.na(nci_concept_definition) &
-#            stringr::str_detect(nci_concept_definition,"inhibitor of histone deacetylase")) |
-#         (!is.na(nci_cd_name) &
-#            stringr::str_detect(nci_cd_name,"HDAC Inhibitor")),
-#       TRUE,FALSE)
-#     ) |>
-#     dplyr::mutate(alkylating_agent = dplyr::if_else(
-#       (is.na(drug_moa) | 
-#          (!is.na(drug_moa) & stringr::str_detect(drug_moa,"DNA"))) &
-#         !stringr::str_detect(nci_cd_name,
-#                              "antiangiogenic") &
-#         !is.na(nci_concept_definition) &
-#         (stringr::str_detect(
-#           tolower(nci_concept_definition),
-#           "(alkylates dna|alkylation of dna|alkylating (agent|metabolite)|alkylating-like|alkylates and cross-links dna|alkylating( and antimetabolite)? activit(y|ies))") |
-#            (!is.na(nci_cd_name) & 
-#               stringr::str_detect(tolower(nci_cd_name),"(mustine|platin)$") &
-#               !stringr::str_detect(tolower(nci_cd_name),"/"))),
-#       TRUE,FALSE)
-#     ) |>
-#     dplyr::mutate(parp_inhibitor = dplyr::if_else(
-#       !is.na(target_symbol) &
-#         stringr::str_detect(
-#           target_symbol,
-#           "^PARP[0-9]{1}"),TRUE,FALSE)
-#     ) |>
-#     dplyr::mutate(bet_inhibitor = dplyr::if_else(
-#       !is.na(target_symbol) &
-#         stringr::str_detect(
-#           target_symbol,
-#           "^BRD(T|[1-9]{1})") |
-#         (!is.na(nci_cd_name) &
-#            stringr::str_detect(
-#              nci_cd_name,"BET( Bromodomain)? Inhibitor")),TRUE,FALSE)
-#     ) |>
-#     dplyr::mutate(tubulin_inhibitor = dplyr::if_else(
-#       (!is.na(drug_action_type) &
-#          drug_action_type != "STABILISER" &
-#          !is.na(target_genename) &
-#          stringr::str_detect(
-#            tolower(target_genename),
-#            "tubulin")) |
-#         (!is.na(nci_concept_definition) & stringr::str_detect(
-#           tolower(nci_concept_definition),
-#           "binds to tubulin|disrupts microtubule|microtubule disrupt")),
-#       TRUE,FALSE)
-#     ) |>
-#     dplyr::mutate(ar_antagonist = dplyr::if_else(
-#       (!is.na(target_genename) &
-#          stringr::str_detect(
-#            tolower(target_genename),
-#            "androgen receptor")),
-#       TRUE,FALSE)
-#     ) |>
-#     dplyr::mutate(kinase_inhibitor = dplyr::if_else(
-#       (!is.na(target_symbol) & stringr::str_detect(target_symbol,"EGFR|PTPN11|ABL1|FGFR|PDGFR|CSF1R")) |
-#         (((!is.na(drug_action_type) &
-#              stringr::str_detect(tolower(drug_action_type),"blocker|inhibitor|antagonist")) |
-#             stringr::str_detect(tolower(nci_cd_name),"ib$")) &
-#            (!is.na(target_genename) &
-#               stringr::str_detect(tolower(target_genename),"kinase|eph receptor"))) |
-#         (!is.na(nci_concept_definition) &
-#            stringr::str_detect(nci_concept_definition,"kinase inhibit(or|ion)")),
-#       TRUE,FALSE)
-#     ) |>
-#     dplyr::mutate(angiogenesis_inhibitor = dplyr::if_else(
-#       stringr::str_detect(tolower(drug_action_type),"blocker|inhibitor|antagonist") &
-#         (!is.na(nci_cd_name) &
-#            stringr::str_detect(tolower(nci_cd_name),
-#                                "antiangiogenic|angiogenesis inhibitor")) |
-#         (!is.na(nci_concept_definition) &
-#            stringr::str_detect(
-#              tolower(nci_concept_definition),
-#              "antiangiogenic activities|angiogenesis inhibitor|(inhibiting|blocking)( tumor)? angiogenesis|anti(-)?angiogenic|(inhibits|((inhibition|reduction) of))( .*) angiogenesis")),
-#       TRUE,FALSE)
-#     ) |>
-#     dplyr::mutate(monoclonal_antibody = dplyr::if_else(
-#       (!is.na(drug_type) & drug_type == "Antibody") |
-#         (stringr::str_detect(tolower(nci_cd_name),
-#                              "^anti-|mab |mab$|monoclonal antibody") &
-#            (!is.na(nci_concept_definition) &
-#               stringr::str_detect(nci_concept_definition,"monoclonal antibody"))),
-#       TRUE,FALSE)
-#     ) |>
-#     dplyr::mutate(proteasome_inhibitor = dplyr::if_else(
-#       (stringr::str_detect(tolower(nci_cd_name),
-#                            "^proteasome") &
-#          !stringr::str_detect(tolower(nci_cd_name),"vaccine")) |
-#         (!is.na(nci_concept_definition) &
-#            stringr::str_detect(
-#              tolower(nci_concept_definition),"proteasome inhibitor|inhibits the proteasome|inhibition of proteasome")),
-#       TRUE,FALSE)
-#     ) |>
-#     dplyr::mutate(hormone_therapy = dplyr::if_else(
-#       stringr::str_detect(tolower(nci_cd_name),
-#                           "aromatase inhib|estrogen receptor (inhibitor|degrader|modulator)") |
-#         (!is.na(nci_concept_definition) &
-#            stringr::str_detect(
-#              tolower(nci_concept_definition),"inhibitor of estrogen|estrogen receptor (modulator|inhibitor|degrader)|antiestrogen|aromatase inhibit(or|ion)") &
-#            !stringr::str_detect(nci_concept_definition,"antiestrogen resistance")) |
-#         (!is.na(target_symbol) & stringr::str_detect(target_symbol,"ESR[0-9]|GNRHR")),
-#       TRUE,FALSE)
-#     ) |>
-#     dplyr::mutate(anthracycline = dplyr::if_else(
-#       (!is.na(nci_concept_definition) &
-#          stringr::str_detect(
-#            tolower(nci_concept_definition),
-#            "anthracycline|anthracenedione")),
-#       TRUE, FALSE)
-#     ) |>
-#     dplyr::mutate(immune_checkpoint_inhibitor = dplyr::if_else(
-#       (!is.na(nci_concept_definition) &
-#          !stringr::str_detect(
-#            tolower(nci_cd_name), "oncolytic|pentoxifylline|vaccine") &
-#          iap_inhibitor == FALSE &
-#          stringr::str_detect(
-#            tolower(nci_concept_definition),
-#            "immune checkpoint inhib")) |
-#         stringr::str_detect(nci_cd_name,"(Anti-(PD|CTLA)-)") |
-#         (stringr::str_detect(nci_cd_name,
-#                              "Tremelimumab|Milatuzumab")) |
-#         (!is.na(target_symbol) &
-#            !stringr::str_detect(
-#              tolower(nci_cd_name), "oncolytic|pentoxifylline") &
-#            (target_symbol == "CD274" |
-#               target_symbol == "CTLA4" |
-#               target_symbol == "PDCD1" |
-#               target_symbol == "TIGIT")),
-#       TRUE,FALSE)
-#     ) |>
-#     dplyr::mutate(immune_checkpoint_inhibitor = dplyr::if_else(
-#       !is.na(nci_cd_name) &
-#         immune_checkpoint_inhibitor == T &
-#         stringr::str_detect(nci_cd_name,"NLM-001|CEA-MUC-1|Oncolytic|Vaccine"),
-#       as.logical(FALSE),
-#       as.logical(immune_checkpoint_inhibitor)
-#     )) |>
-#     dplyr::mutate(platinum_compound = dplyr::if_else(
-#       !is.na(drug_name) &
-#         stringr::str_detect(tolower(drug_name),"platin$"),
-#       as.logical(TRUE),
-#       as.logical(FALSE)
-#     ))
-# 
-#   ## Make sure each drug is assigned an unambiguous value for each category
-#   nciCDN2Category <- list()
-#   for(c in c('immune_checkpoint_inhibitor',
-#              'topoisomerase_inhibitor',
-#              'tubulin_inhibitor',
-#              'kinase_inhibitor',
-#              'iap_inhibitor',
-#              'hdac_inhibitor',
-#              'parp_inhibitor',
-#              'bet_inhibitor',
-#              'ar_antagonist',
-#              'monoclonal_antibody',
-#              'antimetabolite',
-#              'angiogenesis_inhibitor',
-#              'alkylating_agent',
-#              'anthracycline',
-#              'platinum_compound',
-#              'proteasome_inhibitor',
-#              'hormone_therapy',
-#              'hedgehog_antagonist')){
-# 
-#     cat <- drug_df[,c]
-#     name <- drug_df$nci_cd_name
-# 
-#     nciCDN2Category[[c]] <- as.data.frame(
-#       data.frame(
-#         'nci_cd_name' = name,
-#         stringsAsFactors = F
-#       ) |>
-#         dplyr::mutate(!!c := cat) |>
-#         dplyr::distinct() |>
-#         dplyr::group_by(nci_cd_name) |>
-#         dplyr::summarise(!!c := paste(!!dplyr::sym(c), collapse="/")) |>
-#         dplyr::mutate(!!c := dplyr::if_else(
-#           stringr::str_detect(!!dplyr::sym(c),"/"),
-#           TRUE,
-#           as.logical(!!dplyr::sym(c))))
-#     )
-# 
-#     drug_df[,c] <- NULL
-#     drug_df <- drug_df |>
-#       dplyr::left_join(
-#         nciCDN2Category[[c]], 
-#         by = "nci_cd_name",
-#         multiple = "all"
-#       )
-# 
-#   }
-#   
-#   
-#   drugs_with_codes <- drug_df |>
-#     dplyr::mutate(drug_entry = tolower(nci_cd_name)) |>
-#     dplyr::left_join(
-#       dplyr::select(
-#         atc_classification, 
-#         atc_drug_entry, 
-#         atc_level3, 
-#         atc_code_level3),
-#       by = c("drug_entry" = "atc_drug_entry"),
-#       multiple = "all", relationship = "many-to-many") |>
-#     dplyr::mutate(atc_code_level3 = dplyr::case_when(
-#       is.na(atc_code_level3) &
-#         target_symbol == "ABL1" | target_symbol == "BCR" ~ "L01EA",
-#       is.na(atc_code_level3) &
-#         stringr::str_detect(target_symbol, "^(PARP[0-9]{1})$") ~ "L01XK",
-#       
-#       is.na(atc_code_level3) &
-#         stringr::str_detect(target_symbol, "^(HDAC[0-9]{1,})$") ~ "L01XH",
-#       is.na(atc_code_level3) &
-#         hedgehog_antagonist == T ~ "L01XJ",
-#       is.na(atc_code_level3) &
-#         topoisomerase_inhibitor == T ~ "L01CE",
-#       is.na(atc_code_level3) & 
-#         target_symbol == "BRAF" ~ "L01EC",
-#       is.na(atc_code_level3) & 
-#         target_symbol == "ALK" ~ "L01ED",
-#       is.na(atc_code_level3) &
-#         stringr::str_detect(
-#           target_symbol,"TUBA|TUBB") &
-#         (drug_action_type == "INHIBITOR" |
-#            drug_action_type == "DISRUPTING_AGENT") ~ "L01XX",
-#       is.na(atc_code_level3) & 
-#         (stringr::str_detect(drug_entry," inhibitor") &
-#            stringr::str_detect(drug_entry, "kinase ")) |
-#         stringr::str_detect(
-#           target_symbol, 
-#           "^(KIT|SRC|SYK|YES1|AKT[0-9]|AURK(A|B|C)|MAPK|PDGFR|AXL|BLK|ATM|CSF1R|ATR|FRK|FYN|HCK|IRAK4|LCK|LYN|MET|NTRK[0-9]|PLK[0-9]|RAF1|ROS1|CHEK(1|2)|ERBB4)$") ~ "L01EX",
-#       is.na(atc_code_level3) & 
-#         stringr::str_detect(target_symbol, "^MAP2K[0-9]") ~ "L01EE",
-#       is.na(atc_code_level3) & 
-#         stringr::str_detect(target_symbol, "^CDK[0-9]{1,}") ~ "L01EF",
-#       is.na(atc_code_level3) & 
-#         stringr::str_detect(target_symbol, "^JAK[0-9]") ~ "L01EJ",
-#       is.na(atc_code_level3) & 
-#         stringr::str_detect(target_symbol, "^FGFR[0-9]") ~ "L01EN",
-#       is.na(atc_code_level3) & 
-#         stringr::str_detect(target_symbol, "^GNRH") ~ "L02AE",
-#       is.na(atc_code_level3) & 
-#         !stringr::str_detect(drug_entry,"/") &
-#         stringr::str_detect(drug_entry, "xel$") ~ "L01CD",
-#       is.na(atc_code_level3) & 
-#         !stringr::str_detect(drug_entry,"/") &
-#         stringr::str_detect(drug_entry, "platin$") ~ "L01XA",
-#       is.na(atc_code_level3) & 
-#         !is.na(nci_concept_definition) & 
-#         stringr::str_detect(tolower(nci_concept_definition), "anti-estrogen") ~ "L02BA",
-#       is.na(atc_code_level3) & 
-#         !is.na(nci_concept_definition) & 
-#         stringr::str_detect(tolower(nci_concept_definition), "aromatase inhibitor") ~ "L02BG",
-#       is.na(atc_code_level3) & 
-#         !is.na(nci_concept_definition) & 
-#         stringr::str_detect(tolower(nci_concept_definition), "nitrogen mustard") ~ "L01AA",
-#       is.na(atc_code_level3) & 
-#         stringr::str_detect(target_symbol, "^PIK3") ~ "L01EM",
-#       is.na(atc_code_level3) & 
-#         stringr::str_detect(target_symbol, "^BTK$") ~ "L01EL",
-#       is.na(atc_code_level3) & 
-#         stringr::str_detect(target_symbol, "^(KDR|FLT1|FLT3|FLT4)$") ~ "L01EK",
-#       is.na(atc_code_level3) & 
-#         stringr::str_detect(target_symbol, "^(MS4A1)$") ~ "L01FA",
-#       is.na(atc_code_level3) & 
-#         stringr::str_detect(target_symbol, "^(CD38)$") ~ "L01FC",
-#       is.na(atc_code_level3) & 
-#         stringr::str_detect(target_symbol, "^(CD22)$") ~ "L01FB",
-#       is.na(atc_code_level3) & 
-#               (target_symbol == "PDCD1" |
-#                  target_symbol == "CD274") ~ "L01FF",
-#       is.na(atc_code_level3) & 
-#               target_symbol == "EGFR" &
-#               stringr::str_detect(drug_entry, "mab$") ~ "L01FE",
-#       is.na(atc_code_level3) & 
-#               target_symbol == "EGFR" &
-#               !stringr::str_detect(drug_entry, "mab$") ~ "L01EB",
-#       is.na(atc_code_level3) & 
-#               target_symbol == "ERBB2" &
-#               stringr::str_detect(drug_entry, "mab$") ~ "L01FD",
-#       is.na(atc_code_level3) & 
-#               target_symbol == "ERBB2" &
-#         !stringr::str_detect(drug_entry, "mab$") ~ "L01EH",
-#       is.na(atc_code_level3) &
-#         monoclonal_antibody == T |
-#         is_adc == T ~ "L01FX",
-#       is.na(atc_code_level3) &
-#         bet_inhibitor == T |
-#         iap_inhibitor == T ~ "L01XX",
-#       is.na(atc_code_level3) &
-#         stringr::str_detect(
-#           tolower(nci_concept_definition), "purine( nucleoside)? analog") ~ "L01BB",
-#       is.na(atc_code_level3) &
-#         stringr::str_detect(
-#           tolower(nci_concept_definition), "pyrimidine( nucleoside)? analog") ~ "L01BC",
-#       is.na(atc_code_level3) &
-#         !stringr::str_detect(drug_entry, "/") &
-#         stringr::str_detect(
-#           tolower(nci_concept_definition), "vinca alkaloid") ~ "L01CA",
-#       
-#       is.na(atc_code_level3) &
-#         ((!is.na(nci_concept_definition) &
-#            stringr::str_detect(
-#              tolower(nci_concept_definition),
-#              "antineoplastic activit|anti-tumor activit"
-#            )) |
-#         (!is.na(drug_max_ct_phase) &
-#         stringr::str_detect(
-#           drug_entry,"(in|ib|ide|ine|ax|il|an|ate| alfa)$") &
-#         drug_max_ct_phase >= 2 &
-#         (!is.na(drug_n_indications) &
-#            drug_n_indications > 2) &
-#         (!is.na(drug_frac_cancer_indications) &
-#            drug_frac_cancer_indications > 0.4))) ~ "L01XX",
-#       
-#       
-#       TRUE ~ as.character(atc_code_level3)
-#     )) 
-#   
-#   drugs_classified <- list()
-#   drugs_classified[['part1']] <- as.data.frame(
-#     drugs_with_codes |>
-#     dplyr::filter(!is.na(atc_code_level3) &
-#                     !is.na(atc_level3)) |>
-#     dplyr::select(-atc_level3) |>
-#     dplyr::distinct() |>
-#     dplyr::left_join(
-#       dplyr::select(
-#         atc_classification,
-#         -atc_drug_entry),
-#       by = c("atc_code_level3"),
-#       multiple = "all",
-#       relationship = "many-to-many"
-#     ) |>
-#     dplyr::distinct()
-#   )
-#   
-#   drugs_classified[['part2']] <- drugs_with_codes |>
-#     dplyr::filter(!is.na(atc_code_level3) &
-#                     is.na(atc_level3)) |>
-#     dplyr::select(-atc_level3) |>
-#     dplyr::left_join(
-#       dplyr::select(
-#         atc_classification, -atc_drug_entry),
-#       by = "atc_code_level3",
-#       multiple = "all",
-#       relationship = "many-to-many"
-#     ) |>
-#     dplyr::distinct()
-#   
-#   drugs_classified_all <-
-#     drugs_classified[['part1']] |>
-#     dplyr::bind_rows(
-#       drugs_classified[['part2']]
-#     )
-#   
-#   drugs_unclassified <- drugs_with_codes |>
-#     dplyr::filter(is.na(atc_code_level3))
-#   
-#   
-#   for(c in c('immune_checkpoint_inhibitor',
-#              'topoisomerase_inhibitor',
-#              'tubulin_inhibitor',
-#              'kinase_inhibitor',
-#              'iap_inhibitor',
-#              'hdac_inhibitor',
-#              'parp_inhibitor',
-#              'bet_inhibitor',
-#              'ar_antagonist',
-#              'monoclonal_antibody',
-#              'antimetabolite',
-#              'angiogenesis_inhibitor',
-#              'alkylating_agent',
-#              'anthracycline',
-#              'platinum_compound',
-#              'proteasome_inhibitor',
-#              'hormone_therapy',
-#              'hedgehog_antagonist')){
-#     
-#     drugs_classified_all[ ,c] <- NULL
-#     drugs_unclassified[, c] <- NULL
-#   }
-#   
-#   drug_df <- dplyr::bind_rows(
-#     drugs_classified_all,
-#     drugs_unclassified)
-#   
-#   drug_df <- remove_duplicate_chembl_ids(
-#     drug_df = drug_df
-#   )
-#   
-#   return(drug_df)
-# 
-# }
-
 assign_drug_category <- function(drug_df = NULL,
                                   path_data_raw = NULL){
 
@@ -3282,7 +2093,7 @@ expand_drug_aliases <- function(drug_index_map = NULL,
                                 chembl_pubchem_datestamp = chembl_pubchem_datestamp){
 
   chembl_pubchem_xref <-
-    get_chembl_pubchem_compound_xref(
+    get_chembl_pubchem_xref(
       datestamp = chembl_pubchem_datestamp,
       path_data_raw = path_data_raw)
   
diff --git a/data-raw/metadata_pharm_oncox.xlsx b/data-raw/metadata_pharm_oncox.xlsx
index 35bbafb32e65d10388685c882a6e88c94b5acfd6..10453920783a289a73a39304c0abf68915d17ad0 100644
GIT binary patch
delta 4496
zcmZ9QWmMGP*2jmgp+P!k7(yCGx}-~xmPSHp2N7W?f0Q%`3`nPR!;m6Ki3&)kbay#`
z(%1ie*1hYlXTRBN@AW<B%{hC0_TJGBc@DM2M8FUAQ;awc2&6#(0+EA2Af%g+55m*c
z4uNnLM7q0G8qd9$lcWf=d3vWf(x#-}Y^s#pUH{B2$Xmh*)qud)8NDY#RmORLPH_Y{
z(u-$qexo@@%DfmZD+`x}fAnicFgXq+ZyKx%b{tzp5b^Q}insa$&Bt8TA4n-_oeQ4=
ztyjGbSBICw3f0~Qa@B<aUZ98D^Hp&>qG1oJ%(PG6?x%qU%*)@?r<IeAW!n{wiP;ys
z>n&dqxe(DEVnGZMT?u<)C2oSwOQN=9?9pPqpg=2(?mH)eKoU#s3Hv2qqb8PzmB^kS
z<Dgw>fgdl#m3Zr2IuGRHv)e;#u<vGo2rhaA_V}!^K#7)1t2iAfj;wI_`m22RpEip9
zVf$n8Lc;q^8|S3&Dvf@{R3>ql{2~s&SyJm^uTtb=z&)ZHUFB%*Z0|6~gv&7(?j=-T
zWj34_F-|*2ia+~8TH*Pds@U^uhlO6?lhaRFeCLJ=uSd`HhnoA#W-?CWT5%xnfa6*b
z%`x^S8AbZ>K4H#CTljtO_VJgRrOEjqxe896`HOG2d#!c}rg=igwB}0i`myHT3Ei5E
zdDY*HS?U>xrbl_Y-vI`(foS>=X(hJr;X5yDExpC`4-^t+ZAQM2yf~+s&8HSYVFoXV
zq^Mo&!`L+l)XCP^aPV(S-IDjK0DZDp#OBe;tbC$?EL5Y&@B0bUmDY-ZUdqY_JV#aE
zsE?$Er=ZsYHu9(#H9``@9zDjMJ-j(yvmC1>%BzXkr3*COo+niVI~J2WD45sgo_+Ew
zI@$kx?b~#h&){xT)zd^J@6j|~qg8L{c$Xfdb?7NUNaeBXoYf~k=pA|hz-u0ng1eEP
zmRB(+I;3GQlYXiykuKHC)Q8aSDL!@HJj4{>>fHM>kqO%=qPx^z(fQ!D`r{s)?eR&a
zrOL)b^6X<KV&P+EKXtICJxSiaD&EE`(<a3T>=IG`xQ}JEXD6qf(b^U=iU2ov0gAed
zJ@iCSi;+0}S=)*niv3Hz@j}=6X1lmDdBNr~b^P^CQLb8!O?Vxh&RA!ritX}`0$;tp
z)QwO4c30i!6{s7wMxSq0Zr536?-O`8brp%b)Q>qV=u|>s5ftppB+;y9A5JFDBtU2#
zFed>KO^=ix3Ks;DLy5o`0nvZTDeH;Fy9!D08yn|4oYib>O^tjPe_X8RLp@p*?ec~h
zO-?~x!L2*n*S}xqZBWh3jwU{5)Yu<Bcl`Rg*=8}?(tpJ`JIb?Uj!%0UnI3I#lH+rk
z3-pFpxV}aue}&O;F6|tBn)<vC+&W+JmX}<1`+mSuGCMle9=DVLip@&stYK$bmcB&X
z-1@YdI5m^Al{K`F<Eso3Gr!fBn!(J<d<Zjl;q<Fs{RXVjr>)D+#)FMyA2S|*?r@mC
zj5jIF4dzSQ-MMfnENH>*g$$rK+wmqFp$h?WsRdeOQoT3NQLxEH-apF6?~b1Tyh$&D
zYV+yG7^1Nq&=_BUwZqZxdD6KC=8poNJvDk&DE-kg=<S<9&bghs5l}cl5qthLsoDjT
z#o}`*LC&G2x6S0L7HO-F%fxz;R@c`Qm(GSqiSF3DIuHp350o}U=X!rcAXr%z<`Hw3
z7kq0qXWM43Nj*ohTh2XQ;Z?5nvgAs#nq4)u&#>$kh&6y-n0r0l5HjcJlrXE}&e^?_
zgI?I5WNoXhu|;6phVLL@A}i|pXn)4C6K0vkp};xpi^)U=uFqbKbqb~gm3>oWl5d2f
zQo$~KzI&hclSQm-WBJ`U)ThU`K2m;RIWJyvR?;=sF}0D=p#yZO?cBs{+no(sX4ul8
z5_qNvVj#Y;AldX32nZuI*REZpIXLS-_)wAQ<C_h$Vlyp~pfE+S6Ov9TMAUWIc76;x
z34Y0590!`pgfgfLPd{kGZGJ}0rBlCTvT7W<2Trf9iE@iS?|X~=XroxjXAaJj!02sr
zJ~b5(Y_5%<>axVHDS^5v_GU+ikw6!XU>PwKz?W2{VsB1?EPrHP*|)V2!b8<Zc@<k=
z7yn@R%)u-pjfFf;S>1TPksncEnqxL0%^iC|UKlk=zwN}1|G~3lsn{bXj)&#7qWCbC
z75hU922c2kvUiCuX`FJGRzt9nKB2xrpL8M-Bs#|3Vt!{a5?($+<-=o7xz58Mt3y!)
zM6zvJODbRJm<EYAbYkz55gB*ll+};tlafNUF3`FT-64Ka(usADN)LwDQ2Tqe-AbR>
z3+B-gbx~%mgDf{Sp20}ENJjKB4?pgja(T-at_&^Hm36(cJ15Ul7<UdcZ_00BDWjQi
zhNl{(aEFSD_lFg2dtIr&`D%Vu280{|i?^+roDK@5;+~9U9jy6cXJ{fcluqiNm=lEs
zeteWWGOe?eYE{ylAlRK?zil6zmwiHyoQJ#Qsx}noUAO5PBN#H5M_6k~Tob=$H%C-<
zG`pwz@Eej`GAGArUj-7S#1U)tT$ngWas48^r*_y4`Hs*EJAgR?yxbl^*y;*|g*0|e
z9j{1-F<M8u_XNA&t8sRtrvJJLQHYldD+C9<$AMdd2Yv5}Gn0dG1~~COm8Q8Ry_M_f
z>-X^kzm5Fxf5-@Di-3vO?)j>962h?+l)~B6zY(w|Qnyt_Y)WoSuMYy6<*!Vp%!eM2
z;8xPcVjD}OH4ewMG_@_?C_bPU_F7mHJchqj&TKYW9@mWAyoHwZ+(rzesp;Pn;HhVq
zr#?nivq1sGoH)fBw%R+7W7c!3%%p>vAr~19T9`&e!@}n48N-MAj_IW;<*<#3t1~&R
z`f*K@OFPrAZEGioSMr{|H)6w2-}Px#nS^@KfI%rEQARs1$G<mUw9p5=w&au@Cw>x&
z^Rw&hax3@!lysiyvnQe1uf8B+&FXrx<`M<<M*B@nGqYqaAKXX3Rm_=TDHwo@lZ7x0
zv>M@1d^CC?mNqU}lw_k7>(b*UZ7Nh{DYWo&cZ|?dwAoq|QC7v~L_OnyDnK8LtPLTi
zz5UqaI-f_pPc3dZ__Do%LY1&PV?9PuLhQh%eU!E>?zifh{1=lb0HSpNNIMY{x1;v0
zw>#HKD2R;dQ}A~O+4Of?F=UCox)`>w2zl9083M(aY1A%60^5cJ#Q{~7a-A=g(lAgF
zNdZqnQYQKgw&nx4R1uEsoNkw7h{w7fvW9aOg)D;ZNLJTSwcO>g<eSK-2(uO>4k{nY
z%b7Y+G=5I=r&^j7vXWnrOHR%soAsl56C*6ihi^;aop;kV2y~*3p#oU%QLG#+z@I}Z
zJLAQ}z&C}AhX;4nF*(o=gp0Q%<G0Xj+Oa94CXNnaR%%Pr{QGE?fj1|(+mrZyO_HtZ
z`IxNa3Ml=3k+)pEqGdBcFTPs)c6)zsI`~IAe++qS{74k1zWV0d+EycLRLHp4_onLs
zDW}JCeYi1Mbb(TPldrm)^0zYefh0ga5!%h~581l*6wUT}D3WtKy3+S@O2~gyGIost
zgGTZqqxUnCy^Dy}{G)~s>=1RxebyD3Kn?<<(|#%f$u$i+XyZ+JcB82G_VN<3VN%2Z
zosSw~=`Suppu0O9ke)UUE)57DMEEy`fP|`FVHW?wvzrtIq6dLc5-=v<FFXJK&6<h3
zAf!*{Q?QC=yl|5QU!#HX*{?w=ZbcFZ-nplj*R4N+2BBnWTzrFBq#WkQpalHf;cNr_
zEzZYy(#+R96KR_nE?u-CD8n+fnG0ypq^T{yO01Gnxf{hlm;b!3zqlxwR5xp%QZHh}
zq0l}1DJ%ZN*V*i~pAn#HAk>eR?i%W-j4lkWnd~YbPSNttHb}IHcQ>ris7K9m@K>k{
z_ut5C(+Z6@D`jV}78>}D1>K#A$V@+JfCl1LYm`qn!?gxh!4yJfIbt?4dgDA}8<-U%
zlkeJOF9Exx!ahelfGl9XBK^h9kU?URv03WXbF6PBN0`7r`FynIn$qkGU+*ZuTqZ?}
zXz^ePo~^Hz-HSmR^PTf-xF2v#wVyNw40yX@GEsh<jDUnhg%v)-iw~l_jW(H-*7E+v
zjb^XUFi(o*cSqYslecUpI--noYJSjXs#)X;J-hg+B_C?V#O|6>(NIOh5&X4nw+IjU
z<ZhfH_(|<kQm($EC$H)&wwHL<ILnMJHvz}CO~NXiY_#Fkx&Y#hkU|r$3^S8yk-$^)
z%{<b(I4I!S29D;t*w0~WuXh!~p02anM;0e`OzN0f+6rB^bHR$n!?sl69<1YG^fEf4
zPM2OQ@^;j+yAM`;>!QA|eqJb~qeWU9Wf8<((CHwp#TM|2dW-LRz|4l(smGsq{3v$i
zQcQAV1bbwilYW%*DuA8jG_(SBe};mEluJ_@tbo{`lP}0U&3bmP`mD{4^zQAbhAe2e
z+!_^Dsgd@Q7DEHnU|G?T*0`%omXHjgU#xGJ@Ar?#M#5=lhay`-Bk{aQvlJPIJn$<*
zV={cFTq<=+a}+~d9L}ipszbxnxY^cZ81d{rX^QQ6uBHzSB2g4^DF2#BiD6CuwE5{#
zNq{vD@z1I46o_4w^s2R8Y(6ak+fi!<=HSBng9TdfYo_U303(7*fPBus=9@q|N;+6K
zYX#Q?QB?VQ5_zQNHH~^1(<HtU)?%D)g7=9oz0d+tQioS5!o{80yn3{+pc61&?lfn?
zirHMy$WFl45mxSR5v#o?<!W4<5^@Hd@2GNVPF|JJF{rP&@YwCYE%_qEE$C8o`CxS7
zd#g{p)1}9kX2|)phtI@m>Px_g?Q@QCiet%>7M+OSZ)77Ovy=5K!sS~mSx<XfC`I~P
z)QMK})f19?)gXfm8ezd4X?!hAcuRBICIq-4_?_PLABThzd!iDc53#E`*NLcr0(L6O
z<vFNVIxINO5ew3Q<snNE)#-iz%&r%&B$9HKeRF_Nq?XDT;wTD52Z2OEA<dx?^CcpV
z4<3f~GAAbJ(X@1{=Tqa4k=T==elst74?>VKAW@e&gYMYA(o@Pf&DgZKz_37wF!|eo
z@0Ae;lp;t%f_V_V%=~6G_$hEr;<m1VJ!HFE5F{F9WpimKzkZOWEQ$3?^nmNGu{E?U
zK-NJzQ9v@45EBIH>sq5&1L<=yGem0G0XZQrr&g-leVO6+h=`u4D2GdO2_^*NtuNur
zs_X64#v8g?7H+NQz0oOG_AZZO?lYuUrd#%&B#&5-0aq{Zs+=2$3p4()`pmM@oD{;)
zYYmFIs&}il`APKp&spOI9U6Xr0~;^k0zsvNX5kmb0!FZ3jpCaYJbyft2L5nT<$Poc
z=P9hRzxsV|h9di<PHyXs+*a}TRlSr&)`rAJEq!{5_2AXUgmR5h*N#&}q9eayNJ?V}
z4NF&^s&`<@ariIuo|tN;5tEP;W=)3vOd9tpcb>j*M7|xD2iZoiP8Hwt1%0{}+kmgc
zos^JQmO${c5VLTZyrozbFC}7mXy#+9b5NH^vR-gKeuQG==k|ME&u*EwQ=-sG=QAz3
zB`t<c%b}7V$<^Zfn%qOh5A9p7A#ZHts;<gAj25aT^Y|&VJV^(qq%8*Q$QR*r2M;-s
zP%mx%^g6!gqd44`!1YG^1WR*`NT(uFke<aQOD3iH&x(jm0Y6lK9c7WL;;Z3=R7b47
zJG@}SSdFX{NWcA)D~VBFRY%KajU#m(N5hc#4n!S)m}lugsBpm<qW`WjP>w>nST?98
zA!#g2)QOND_D@Qb8wC@}T9}dfzXRF-G3763{$n20s32-aRJAZKmK17Mn2YJ({m@_h
zNRxm-H2+ck+qa+?MYx&&AD;e4AqD$;6)nnBL<ma<RUpE_^zT>30)d>dK_JTi5rROh
OsAUlteu3aW$$tUkCR=v^

delta 4448
zcmZ9Qby(Ejw#H`&VQ3JPlpIoF1|&s7h7OUElpKkn1f}6Sgpx`OT?W!!g9=D@=TOox
zh_rw-*WW$op7T6+|F!mh*MDn0`}3~#!7j?Kf(!zLkA}$z5`sW7Bp?tq2n6zR7V&oT
zK-svtp@e;0oJ$PN+%jPFLGNp>(HA-{H$R{?^22CKM};RQE#|pMG8Y+O)!LY%8DIe}
z;D~K_nG&(sS1AwRGj~&?pVsGj?R*)1gcbN?PbA%N&rV=QJ4)`y-8oX}nF_33t+Q{`
z4)9f_T-fsamnn@`^$7=)V6;(wfoPs4Y*M*{C2z>C%dDa#*C{pl<BZi0fk6$67q02d
z{$XKXtUoFHhan!4D3#$pEqa<9Qo0S?LF}h9eJB?i`AJ(UkWGk#y_)rQ<;<YsF#2?B
zJqV<%qtEj+!$=TY)UwS+nVA*B7bXwgI{-Q{%`ZI+xXGZk<{4t>PXAH9EBz3GODIwO
z7^5t8Pgm3eB%_T`h~WxayoY9x3x0Ai<Us#f+yNTAcnLLiUtIs#7hPL$if??$Hu-AT
z*jCJtJOBC2YA&onYebr2E%{pX<C*JZG;>K}JCDlA0S6wnmKZQ_HZzBgQ}r=s1m0BM
zo=W0~adi3Ol(v-SOu4Z)QVvYap|cF>=yw7Z7yLg;(cju24|pvpM28*4^&9AvZ_+P4
zX3p#~b>8ubegJLSZ@sJeoN>qaMW2LuY}Q&;ajMh25)$Z8wWyymbdHW~@7EE+Q?K@u
zv2ar;&OE5vy}kq6Q-$W(mB01)0080uq2qb?4o4(M(m`csxpOjaA66G)A?I_Ixk`I9
zr%Zzs9=K?J+t%%wqYnFRBYITDaRweNfC=#Y(mpDnCeQi=iCAhsOx{)@hpLP8@-b7d
z?&D{U>u?U|)@)eGCHt1TGj(;Zjjity*u+_S-)E#cKVsX^%?=NyzDJA#2F+G<i14pC
zb7w&gt+pwhM=i!S0)ED;A6loKH&v_yc61`-J@bQDzfN;;ztpG>Si%!>2DCeT@d%zC
zmf6<jJ?_N>nzg=`)|!R2CFRli$9xkkJlgbW(@^s_3t4{Wb)x#9fpdSwgt(Zz06I&-
zMA4Y4vqkn@%&msWG_De){=VRG9-imeBH=LsF4jgZUw(hC|GmGjd-l+86Dv=rCb6FD
zFD~Iu8lBXsNJxjZD?L4Bz_HxkQH&$a5)A=GfMOdrN_OYl1*F=90!jjjUxmC6Ap(K!
zVRU(zF-<UHAoHnNY_SAQQ2F>Z+Q%C-D@6m9UiPb^PAV#Th);v+gxqo`+tQvm-DOj1
z?HJzPRI7(ii7oU8h-GVGE$5@m{g({0B0ciWg!FMfX;HRD+1_WCKxg+z?Kkya+l1(Z
z-9ztZzbucNuBrkVCiDZB8b&}LOT5o=9m`nS3dYC*kpw?zSwk2GQxNqz7`N#>j*H^!
zGAit*bWZQ*Pbe9G5T<>^MDaofDL}1!X1-VTQ$aABkMhLQvI-TbDl|IO%%FaB7@%Cp
zUk2JwT{HPTAF^p0?|Nr2w;-X5bvdZoA;HOw;llca&s%K!s$VZYH>seb&;(pHaw<!`
zAZ`wDtNVZUy?7r|S@2{(MT^%R-71eyK>jiI0ddPiI{jekPiuV#-#xfwPq9*YvGMVh
zf<P6QMx5pkP-!GGzB&4W9|mlkZ^J_=`U+~I-g|wsccH?%Zq>P52K1ev9{Nq%Nz0M#
z^HASuj)9Ph5BUm`D`{H!E`f)BXYGg;Y%~F8T0TSyIjXGk3nMF7G*DjC^?5zGyUD(5
zoj{UUxHtZN0>0r?so{=~*Z2DsvjlluBrYf8QCiOg3vyRWEi4}(TaCi3Fve4pwTE9_
z5ii1%sE*R(LXB=tX5=Z!<wL^i(ktfgGnubaYmH<Olt>cKn!{DJkLBea5tTxX-J*a=
zzqP}WiGlhCxr8$65+w~&8e=yOM(RoV@XGJj&EL@b!7n)S#HoI1$G_uFMKN21%vM1#
zywh$%`$a8BueESEgoF}mMFE`WF@~w%W};W4tQYb3L^UXs=1M{UVZ}-Pcq_&Bw5Tw0
zt{nrO^l06V6d%R*^iKg6G%fn^Z9oX=Pi2Y^#dr!Q|6sAH?P+FuGr7RR%CV0Ievpz>
zY8hJoD@BANgzr}3-*)ky?c?`kT#!^jYgr>H)Cya><~U_mpSO}oH8yui(SJhx(qwo|
z14SzGiE<)dz&8<dwDwpox*_F?(h8V_ViK2Je`7`XK}H40#UYq@ijA%aK%eU&^Oabb
z5s3%XGVu`dP3R_~^G!r6ay&97l8cOdKP19q==(eE{$AwH29QMFUav5oN}PAH`<QAX
zLrkuQA*!wCW2^anX@k^QzVOX~G*-d+R`07g-8>`BKNCNW7W>YVp86WewRpQ-&kayj
zu^X^R2O3bTI%r?srAv(mLR~PN>s`ED@Y*;{{uo})$eh|;vt;&*I12^hFy>A0+UJ+-
zt`WPUjJB-wR?eJnz<t(xPP0^%TXDW+wa<vcV*a3493wlt=tubx)>}mUR-g^lvhXdm
zPcZQ=Em9L5{-PT+?;lZF-8p5MV@1jSPMgU?$%c~!8|pJ)6Wt1&h*O`!j%gA^LFp3x
z&Ttq$ooHGZbe5+?i&g5%;qiL(jj?L$Mqu~wfWN{mk~<K`n242c-P8`~0Fl=2Xznd=
z?9{ugNhRCN1C+;l6Ldma2z3w7=RmMdrV?Dl(m#V&F`u>AyLxgT_VUMq@LqM8QikNy
z#WD4W^((IYwkv}0!D<*|G70ffLR_6TrjMNq@Lc};9qnpew^0RdXevn1a6II|c=D#1
z^WjjM<zx9VcSTe@v8xz6k3*a{k|kkA>QC$9#Ew(#dEon+hO1=z3RT-&qhduVvKwfP
z4MfpP@e`D%q&Y>iOc`Ca<LlxU@b;qL<qEYXicPnqs<>tF>+<c+k}2!KZMEA44#;9-
z?LoG{^u*~_j>lV6BWp}7BrP#(=0#vq3&A?(e#aEOo^q&4t+fPONs3WfimS^jM<U@g
zGm*5q@yP{;n^W$XPt|(@D}!!0uQSGn&CkP=Q}SxIJ2sZ8G3^|V;314KrwxTcC$04t
zmn*Z3yQkBbOwLCF?3(eMZ-1(Ex3{u8h@h!h8bW*QWYc2t(Nyv6I=^?r!sTQe((mQ{
zuEUUUNfIoOVHCKOrOoCFq}B8g@Fa=}&ZO~9J^Wk%r?Q(p2h{nDYWJ~@yR81K*=IIk
z7oR(*P?h(KqluRhk>Ms^d`K|gxdch`$8*Py=}$gl_PCgciu;?|FqGW)z)6?~+-!i)
zD@diF+aEc3J<31F08aUF{1c+x>tIKK_KM6AB{)lK$ruX}E*VZV`Gl1BB{*~<d}x+A
zmox0E(`!f@lbHJGrcIi`mZc40&a=a3$YrfJ>srYjBr2*K?Fj|kzaLGR3%aSd{7knq
zyQ<w*>wSD`kEfBRTwhw7evO5<aNz-EOUk27Y0kM}&msPvrc5<@>*U5b-yrEU9T^Vh
z$H1=p&xp4vjSc~w`!T1}4;f^Yy|jB9M%yDB+woqNb;hz@v3nZSJz$2$-gfYy)EV!f
z<$l=6toJ4RUk7=8O$gG}BqX8--2jpPy>o&FaQ>q||3Yz;5(HxW$75oQd6)o;f3O(%
zFDy1&Q6SXIU|30^TD^%O`(!DIDXBmO`_qZbT{aHR#SaqSMxS#Y_ouCh+4@(V9hj_k
z))#G-$d+n1RA?(@je4YHQK5dvE<nk&a$FO}gnRlhrWuxoE<w{5CvBTKCC`54=)DD#
z=KHza$yjfwDUo?t#03=IUNgzEdTwc?Jlc%gw3@(cK<ft!yYF>j3k`Xud$wqkC%75c
zg5KvwrTjGesH07-S19X<1_JvBj~GhT*n@=<HjlPrMysR<+clHz2XIv5_oH5(21q3t
z9wfAVlZUJ8tyG7|lgRCeYe@#(F6<sHD(MTX2H6$k1DY;|$plMz*y46@*EAHoM!ECV
z62X%E_;Y@1w`LDls~zu<5pYRTvFUeYeeG#1CLPKQh)EV(++cEjiZpO)lHhI#IMzZ{
zZ*Jm;v;xO-ymQo_1?hj)U?GNAINo17<*RkGNy4MV-UMTxS@s^(9tGTBVR<y3rOLvk
zp0Q<dG)XWu33w|mF>skGb?={xH*5)Br1bizJ29l3T27Zq6Oc@)Kd5~?f3S`_ur01?
zs|<bu%n;-uspj_<_^Z5dij~m!KYr(Udu=N(%3!0}hVGBneG`Zl+UjWf6UOr@d#}Og
z#-MF>Mdu)fB3sVlOKdxlN*1522pn#5A*t>6A{gJ%;R{Oz_QUWKV-~s^&S&zy&6N|C
zVfc=b(A;@V$D^+sqWym<SW<M*hAt&lFB}5Y{VSG`q_qjy&?gUOaJZQg>%&0X@)B?U
z=tG;9oqOhh)2LC-jpZBqH|1>UbhHs}1iwP7HVbLqMmMN(`a~Vvw_VoK#)|RoSlI-d
zhd$SRBbMH>q40!!$TYSVG9hNHt6efr(EF-lcy-k_Koz49!`H~NEu7)es`d1_ta=2H
z*Yi+lc>pzfhxgB``_E$YBvB71dAE|RJ4|1-H+C>82~o*qdx{@ypL$`;aCWkh;!clO
z)IY6R`wvCUB}f<EoQf1nZ{8?PthEr&u7~_U!d*uuc{6G11jmZde|Rx3f6Y3wf>BXD
zJ)iTY#=<g;lxO7*j3sw@HxcwxL}H(Sn7SL2MuVm)j=5HW=ZCzQ&ztA@l1$~T6T0oG
zuk$P8Zbq3F%4y5Kda=_(JHTFc;T3r)KF210iJW7f=qk52T$}#M;5xR{=yuyi&8ya#
z<=v()3E$Lk5F$o&=Shgq1}zV?@(F1m37z#K={nhtN=w~w!US0H1GvKFUD>4;u<FkB
znKTZKg7}7n@kbztPOzjFyy{jfCJM$gASu^4gjIc_AAC7Pzn%02&2MBFBO<aPPK+tu
zL;=JE3f^vxJHNjkb0Z=ym79((is0!9|I1bdKkNRFH{TP^y%Q(G5_05V??DfpV>H-D
zQrjb@r&OnI6a3cwlG!in92*e~aI(4+T5Y*!)Qh6MU={J)khr#SWiXPi{`+CaMM9k|
z<q}bMU(GqRLc-fUB{oBcuZkE?XMG3OE(N24Q_&z><AWo3j`2`Nd$ye!mX*{>H8p;2
zX3=)|3q^{%9M=+y(jgk*uhLCdxE}J69C_-oRq4B|4rf1N`39OXTwvw(+8zGll~Ylb
zQWe~}d$vv5ltq8YaZF$^mxA=X9Q7(_E3X`A0#&ODtPv0s{tT|vtcKqe1p~TP8LpHS
zh=J@(IBm{;spGD(n5mjs*YWPX3Qn07TmQjJzd>!xnqu<&of_ICjVJVB-Gm*2!Upl5
zZ~<#W@%DEOGn11-=-FC6AiV;UD$*-7+N#T(hK<nstm;gyoAfTFrI5}yE{b|LrNTXy
z^F|X)y~l~m1ZGDF+*#j98UrD@nWj-@A1tX6FCTeGaQu2~;RPCmC2}a5gywLxE<7VQ
zO_wK;L9QmY-a+Ah(tG!>m)s`#(-P4#bzgf}>l>pm5VnAe6|pe1Ebw|tp_}Zkl6u2Q
zD628ThI;<yk8K604^NHe-NH&#-R@gZkoUz}<2YMg@km>+zcAxZRHk%NUC*V`_eUnI
zot5l^m$1Q{I0U(9MnKKbb*fwF@$CcpWnj66dwzW>1R*Ax^)A9*A$c)+C_WI3$rPS~
z{AW9X2^7%*>tp&wWC(O=FeWe-3^(jPco1U@lVJVN%=K@){3Xo4kKIj7%uQwtF`NdY
z%1A^0f6pKgD`pqQ%ksaL=r5OK{wnnU75_#V7#N(7<^OZ*zbVWZ54bW|8uJCt{jXwU
T7U2wFGR!fY=SGb1KY9NH=(uHM

diff --git a/pkgdown/index.md b/pkgdown/index.md
index 8814416..1c61e92 100644
--- a/pkgdown/index.md
+++ b/pkgdown/index.md
@@ -10,14 +10,14 @@ Drug-target associations from the Open Targets Platform have furthermore been in
 
 _pharmOncoX_ provides anti-cancer drug classification through existing entries in the [Anatomical Therapeutic Chemical (ATC) Classification System](https://www.whocc.no/atc_ddd_index/), and these have been extended significantly with manual curation, also by establishing novel drug categories that are presently missing in the ATC classificiation tree (examples include _AURK inhibitors_, _MET inhibitors_, _BET inhibitors_, _AKT inhibitors_, _PLK inhibitors_, _IAP inhibitors_, _RAS inhibitors_, _BCL2 inhibitors_ etc.) enabling a filtering of drugs according to their main mechanisms of action.
 
-Currently (as of mid September 2024), `pharmOncoX` is built upon the following 
+Currently (as of early November 2024), `pharmOncoX` is built upon the following 
 releases of external databases:
 
- - Open Targets Platform (2024.06)
- - ChEMBL (v34)
- - NCI Thesaurus (24.07e)
- - MitelmanDB (20240715)
- - CIViC (20240918)
+ - Open Targets Platform (2024.09)
+ - ChEMBL (v35)
+ - NCI Thesaurus (24.09e)
+ - MitelmanDB (20241015)
+ - CIViC (20241102)
 
 ### Getting started