From 3e1dc0636d6609ac9529f0f6d766de30dce67927 Mon Sep 17 00:00:00 2001 From: "J. Allen Baron" Date: Wed, 14 Feb 2024 19:10:24 -0500 Subject: [PATCH] Unset robot_query() character col type assumption Also add col_types from readr::read_tsv() for management. To reduce verbosity, show_col_types has been set to FALSE and cannot be changed. Tests (robot_wrappers): PASS --- R/extract.R | 7 +++--- R/robot_wrappers.R | 6 ++++-- man/extract_ordo_mappings.Rd | 35 +++++++++++++++++++++++++++++- man/robot_query.Rd | 41 +++++++++++++++++++++++++++++++++++- 4 files changed, 82 insertions(+), 7 deletions(-) diff --git a/R/extract.R b/R/extract.R index 7ccae50..6b58d6b 100644 --- a/R/extract.R +++ b/R/extract.R @@ -465,7 +465,7 @@ extract_as_tidygraph <- function(x, query = NULL, collapse_method = "first", #' #' @param output The path where output will be written, as a string, or `NULL` #' (default) to load data directly. -#' @inheritParams tidy_sparql +#' @inheritParams robot_query #' #' @returns #' If `output` is specified, the path to the output file with the data. @@ -482,7 +482,7 @@ extract_as_tidygraph <- function(x, query = NULL, collapse_method = "first", #' #' @export extract_ordo_mappings <- function(ordo_path, as_skos = TRUE, output = NULL, - tidy_what = "everything") { + tidy_what = "everything", col_types = NULL) { if (isTRUE(as_skos)) { q_nm <- "mapping-ordo-skos.rq" } else { @@ -494,7 +494,8 @@ extract_ordo_mappings <- function(ordo_path, as_skos = TRUE, output = NULL, input = ordo_path, query = q_file, output = output, - tidy_what = tidy_what + tidy_what = tidy_what, + col_types = col_types ) out diff --git a/R/robot_wrappers.R b/R/robot_wrappers.R index d39ac0e..c4037fd 100644 --- a/R/robot_wrappers.R +++ b/R/robot_wrappers.R @@ -51,6 +51,7 @@ convert_to_ofn <- function(path, out_path = NULL, gzip = FALSE, #' [ROBOT query](http://robot.obolibrary.org/query) formatted as described in #' [DO.utils::robot()]. #' @inheritParams tidy_sparql +#' @inheritParams readr::read_tsv #' #' @returns #' If `output` is specified, the path to the output file with the query result. @@ -60,7 +61,7 @@ convert_to_ofn <- function(path, out_path = NULL, gzip = FALSE, #' #' @export robot_query <- function(input, query, output = NULL, ..., - tidy_what = "nothing") { + tidy_what = "nothing", col_types = NULL) { # load query, also ensure query in a file (required by ROBOT) query_is_file <- file.exists(query) if (query_is_file) { @@ -121,7 +122,8 @@ robot_query <- function(input, query, output = NULL, ..., } else { out <- readr::read_tsv( output, - col_types = readr::cols(.default = readr::col_character()) + col_types = col_types, + show_col_types = FALSE ) out <- tidy_sparql(out, tidy_what) } diff --git a/man/extract_ordo_mappings.Rd b/man/extract_ordo_mappings.Rd index 36f58bf..eec7b83 100644 --- a/man/extract_ordo_mappings.Rd +++ b/man/extract_ordo_mappings.Rd @@ -8,7 +8,8 @@ extract_ordo_mappings( ordo_path, as_skos = TRUE, output = NULL, - tidy_what = "everything" + tidy_what = "everything", + col_types = NULL ) } \arguments{ @@ -44,6 +45,38 @@ character vector. One or more of the following: \item \code{"as_tibble"} to make the output a \link[tibble:tibble]{tibble}. \item \code{"nothing"} to prevent all tidying. }} + +\item{col_types}{One of \code{NULL}, a \code{\link[readr:cols]{cols()}} specification, or +a string. See \code{vignette("readr")} for more details. + +If \code{NULL}, all column types will be inferred from \code{guess_max} rows of the +input, interspersed throughout the file. This is convenient (and fast), +but not robust. If the guessed types are wrong, you'll need to increase +\code{guess_max} or supply the correct types yourself. + +Column specifications created by \code{\link[=list]{list()}} or \code{\link[readr:cols]{cols()}} must contain +one column specification for each column. If you only want to read a +subset of the columns, use \code{\link[readr:cols_only]{cols_only()}}. + +Alternatively, you can use a compact string representation where each +character represents one column: +\itemize{ +\item c = character +\item i = integer +\item n = number +\item d = double +\item l = logical +\item f = factor +\item D = date +\item T = date time +\item t = time +\item ? = guess +\item _ or - = skip +} + +By default, reading a file without a column specification will print a +message showing what \code{readr} guessed they were. To remove this message, +set \code{show_col_types = FALSE} or set `options(readr.show_col_types = FALSE).} } \value{ If \code{output} is specified, the path to the output file with the data. diff --git a/man/robot_query.Rd b/man/robot_query.Rd index 44d90b9..a84f3be 100644 --- a/man/robot_query.Rd +++ b/man/robot_query.Rd @@ -4,7 +4,14 @@ \alias{robot_query} \title{Execute a SPARQL Query with ROBOT} \usage{ -robot_query(input, query, output = NULL, ..., tidy_what = "nothing") +robot_query( + input, + query, + output = NULL, + ..., + tidy_what = "nothing", + col_types = NULL +) } \arguments{ \item{input}{The path to an RDF/OWL file recognized by ROBOT, as a string.} @@ -33,6 +40,38 @@ character vector. One or more of the following: \item \code{"as_tibble"} to make the output a \link[tibble:tibble]{tibble}. \item \code{"nothing"} to prevent all tidying. }} + +\item{col_types}{One of \code{NULL}, a \code{\link[readr:cols]{cols()}} specification, or +a string. See \code{vignette("readr")} for more details. + +If \code{NULL}, all column types will be inferred from \code{guess_max} rows of the +input, interspersed throughout the file. This is convenient (and fast), +but not robust. If the guessed types are wrong, you'll need to increase +\code{guess_max} or supply the correct types yourself. + +Column specifications created by \code{\link[=list]{list()}} or \code{\link[readr:cols]{cols()}} must contain +one column specification for each column. If you only want to read a +subset of the columns, use \code{\link[readr:cols_only]{cols_only()}}. + +Alternatively, you can use a compact string representation where each +character represents one column: +\itemize{ +\item c = character +\item i = integer +\item n = number +\item d = double +\item l = logical +\item f = factor +\item D = date +\item T = date time +\item t = time +\item ? = guess +\item _ or - = skip +} + +By default, reading a file without a column specification will print a +message showing what \code{readr} guessed they were. To remove this message, +set \code{show_col_types = FALSE} or set `options(readr.show_col_types = FALSE).} } \value{ If \code{output} is specified, the path to the output file with the query result.