From f6dc1f3b35777384a2115a527a8526523de7befe Mon Sep 17 00:00:00 2001 From: "J. Allen Baron" Date: Fri, 15 Mar 2024 16:21:49 -0400 Subject: [PATCH] Update plot_citedby() to fix color error/retractions A publication citing DO has been retracted and 6 colors are no longer sufficient. This updates color_set to avoid errors [BREAKING CHANGE - partial] and adds a 'retracted' argument to specify how those publications should be handled. --- NEWS.md | 5 ++++ R/plot.R | 64 ++++++++++++++++++++++++++++++++++++++------- man/plot_citedby.Rd | 31 ++++++++++++++++++---- 3 files changed, 85 insertions(+), 15 deletions(-) diff --git a/NEWS.md b/NEWS.md index 7e2a67fb..120664d4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -12,6 +12,11 @@ * `read_omim()` now additionally parses official API-key requiring phenotypicSeries.txt downloads and may be able to handle additional API-key requiring downloads. +* `plot_citedby(): + * _[BREAKING CHANGE]_ `color_set` argument now requires names and one color + for each of the 7 possible publication types when specifying colors manually. + * `retracted` argument added to specify how retracted articles should be + managed. ### New * `download_omim()` downloads official API-key requiring files directly from diff --git a/R/plot.R b/R/plot.R index c0336313..c4a3450e 100644 --- a/R/plot.R +++ b/R/plot.R @@ -107,20 +107,43 @@ plot_branch_counts <- function(DO_repo, out_dir = "graphics/website", #' citing the DO, as a string. #' @param out_dir The directory where the plot `"DO_cited_by_count.png"` #' should be saved, as a string. If `NULL` the plot is not saved to disk. -#' @param color_set A set of 6 colors or the prefix of the color set to use from -#' [DO_colors]. Available sets include: "sat", "accent1", "accent2", -#' and "orange". The default and light versions of the specified color set -#' will be used. +#' @param color_set A named set of 7 colors, one for each of +#' the possible publication types (see Colors section) or the +#' prefix of the color set to use from [DO_colors], as a character vector. +#' @param retracted How to handle retracted publications, as a string. +#' One of: +#' * "warn" (default) to drop them with a warning. +#' * "include" to display them in the plot in their own category. +#' * "other" to include them in the "Other" category. #' @inheritParams plot_branch_counts #' #' @section Data Preparation: #' To prepare data, execute `scripts/citedby_full_procedure.R`. #' +#' @section Colors: +#' If specifying a color set manually, one color should be included for each of +#' the following publication types: "Article", "Book", "Clinical Trial", +#' "Conference", "Review", "Other", "Retracted". "Other" serves as a catch all +#' category (generally a small subset of otherwise uncategorized publications). +#' +#' Sets available in [DO_colors] include: "sat" (saturated), "accent1", +#' "accent2", and "orange". The default and light versions of the specified +#' color set will be used to generate a gradient. +#' #' @export plot_citedby <- function(data_file = "data/citedby/DO_citedby.csv", out_dir = "graphics/website", - color_set = c("#C45055", "#934FBB", "#95B1BB", "#83C85F", "#B9964B", "#4C3E45"), + color_set = c( + "Article" = "#4C3E45", "Clinical Trial" = "#B9964B", + "Book" = "#83C85F", "Conference" = "#95B1BB", + "Review" = "#934FBB", "Other" = "#C45055", + "Retracted" = "#000000" + ), + retracted = "warn", w = 6, h = 3.15) { + retracted <- match.arg(retracted, c("warn", "include", "other")) + color_nm <- c("Retracted", "Other", "Review", "Conference", "Book", + "Clinical Trial", "Article") df <- readr::read_csv(data_file) %>% dplyr::mutate( @@ -128,13 +151,34 @@ plot_citedby <- function(data_file = "data/citedby/DO_citedby.csv", pub_type = clean_pub_type(.data$pub_type) ) - # set color ramp - if (length(color_set) > 1) { - cb_colors <- color_set - } else { + retracted_n <- sum(df$pub_type == "Retracted") + if (retracted_n > 0) { + if (retracted == "warn") { + df <- dplyr::filter(df, .data$pub_type != "Retracted") + rlang::warn(paste0(retracted_n, " retracted publication(s) dropped.")) + } + if (retracted == "other") { + df <- dplyr::mutate( + df, + pub_type = dplyr::recode(.data$pub_type, Retracted = "Other") + ) + } + } + + # prepare colors + color_n <- dplyr::n_distinct(df$pub_type) + if (length(color_set) == 1) { cb_colors <- grDevices::colorRampPalette( DO_colors[paste0(color_set, c("_light", ""))] - )(dplyr::n_distinct(df$pub_type)) + )(color_n) + } else { + if (length(color_set) != 7 || !all(names(color_set) %in% color_nm)) { + rlang::error("`color_set` must specify a DO_colors color set or 7 named colors") + } + # order colors to match publication type order + cb_colors <- color_set[color_nm] + # use only colors corresponding to publication types in the data + cb_colors <- cb_colors[names(cb_colors) %in% df$pub_type] } g <- ggplot2::ggplot(data = df) + diff --git a/man/plot_citedby.Rd b/man/plot_citedby.Rd index 8b43046e..b43ba1ca 100644 --- a/man/plot_citedby.Rd +++ b/man/plot_citedby.Rd @@ -7,7 +7,9 @@ plot_citedby( data_file = "data/citedby/DO_citedby.csv", out_dir = "graphics/website", - color_set = c("#C45055", "#934FBB", "#95B1BB", "#83C85F", "#B9964B", "#4C3E45"), + color_set = c(Article = "#4C3E45", `Clinical Trial` = "#B9964B", Book = "#83C85F", + Conference = "#95B1BB", Review = "#934FBB", Other = "#C45055", Retracted = "#000000"), + retracted = "warn", w = 6, h = 3.15 ) @@ -19,10 +21,17 @@ citing the DO, as a string.} \item{out_dir}{The directory where the plot \code{"DO_cited_by_count.png"} should be saved, as a string. If \code{NULL} the plot is not saved to disk.} -\item{color_set}{A set of 6 colors or the prefix of the color set to use from -\link{DO_colors}. Available sets include: "sat", "accent1", "accent2", -and "orange". The default and light versions of the specified color set -will be used.} +\item{color_set}{A named set of 7 colors, one for each of +the possible publication types (see Colors section) or the +prefix of the color set to use from \link{DO_colors}, as a character vector.} + +\item{retracted}{How to handle retracted publications, as a string. +One of: +\itemize{ +\item "warn" (default) to drop them with a warning. +\item "include" to display them in the plot in their own category. +\item "other" to include them in the "Other" category. +}} \item{w}{The width of the plot in inches, as numeric.} @@ -37,3 +46,15 @@ year. To prepare data, execute \code{scripts/citedby_full_procedure.R}. } +\section{Colors}{ + +If specifying a color set manually, one color should be included for each of +the following publication types: "Article", "Book", "Clinical Trial", +"Conference", "Review", "Other", "Retracted". "Other" serves as a catch all +category (generally a small subset of otherwise uncategorized publications). + +Sets available in \link{DO_colors} include: "sat" (saturated), "accent1", +"accent2", and "orange". The default and light versions of the specified +color set will be used to generate a gradient. +} +