Skip to content

Commit

Permalink
Merge pull request #203 from justinkadi/main
Browse files Browse the repository at this point in the history
Added functions to add Arctic Report Card annotations into data packages
  • Loading branch information
jeanetteclark authored Jul 9, 2024
2 parents 5213e9d + 87b624b commit 62c4df8
Show file tree
Hide file tree
Showing 16 changed files with 357 additions and 24 deletions.
10 changes: 6 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ Imports:
dplyr,
digest,
EML (>= 2.0),
filelock,
httr,
jsonlite (>= 1.8.4),
magrittr,
methods,
filelock,
stringr,
stringi,
tools,
Expand All @@ -41,7 +41,8 @@ Imports:
XML,
lifecycle,
rdflib,
pins
pins,
rlang
Suggests:
emld,
humaniformat,
Expand All @@ -53,9 +54,10 @@ Suggests:
raster,
rmarkdown,
sf,
testthat,
testthat (>= 3.0.0),
xslt,
yaml
RoxygenNote: 7.1.2
RoxygenNote: 7.2.3
Roxygen: list(markdown = TRUE)
VignetteBuilder: knitr
Config/testthat/edition: 3
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ export(eml_adcad_annotation)
export(eml_add_distribution)
export(eml_add_entity_system)
export(eml_add_publisher)
export(eml_arcrc_add_annotation)
export(eml_arcrc_essay_annotation)
export(eml_arcrc_key_variable_annotation)
export(eml_associated_party)
export(eml_categorize_dataset)
export(eml_contact)
Expand Down Expand Up @@ -81,10 +84,12 @@ importFrom(methods,as)
importFrom(methods,is)
importFrom(methods,new)
importFrom(methods,slot)
importFrom(rlang,.data)
importFrom(stats,na.omit)
importFrom(utils,URLencode)
importFrom(utils,head)
importFrom(utils,read.csv)
importFrom(utils,read.delim)
importFrom(utils,setTxtProgressBar)
importFrom(utils,tail)
importFrom(utils,txtProgressBar)
53 changes: 49 additions & 4 deletions R/eml.R
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ reorder_pids <- function(pid_list, doc){
#' doc$dataset$project <- proj
#'
#' EML::eml_validate(doc)
#'
#' @importFrom utils tail
eml_nsf_to_project <- function(awards, eml_version = "2.2"){

stopifnot(is.character(awards))
Expand Down Expand Up @@ -443,12 +443,14 @@ eml_nsf_to_project <- function(awards, eml_version = "2.2"){
extract_name <- function(x){
lapply(x, function(x) {
data.frame(
firstName = trimws(stringr::str_extract(x, "[A-Za-z]{2,}\\s[A-Z]?")),
lastName = trimws(gsub("^([A-Za-z]{2,})\\s[A-Z]?", "", x)),
stringsAsFactors = F)})
firstName = unlist(lapply(x, function(x){head(strsplit(x, split = " ")[[1]], 1)})),
lastName = unlist(lapply(x, function(x) {paste(tail(strsplit(x, split = " ")[[1]], -1), collapse = " ")}))
)
})
}



#' Get raster info from a file on disk
#'
#' This function populates a spatialRaster element with the
Expand Down Expand Up @@ -730,3 +732,46 @@ eml_add_distribution <- function(doc, identifier){

return(doc)
}

#' Add an Arctic Report Card annotation to a dataset
#'
#' Creates an annotation from the Arctic Report Card ontology
#' [here](https://bioportal.bioontology.org/ontologies/ARCRC/?p=summary)
#' and inserts the annotation into the EML document `doc` while retaining any existing
#' annotations such as the sensitivity annotations or dataset categorization. For a
#' list of available essay topics or key variables, see link above.
#'
#'
#'
#' @param doc (emld) An EML document
#' @param property (character) One of two properties: "isAbout" for key variables or "influenced" for essay topics
#' @param label (character) One or more labels in title case from the ADCAD ontology.
#'
#' @return doc (emld) An EML document with annotation added
#' @export
#' @examples
#' library(EML)
#' # read in any EML document
#' doc <- read_eml(system.file("extdata/strix-pacific-northwest.xml", package="dataone"))
#' # add the dataset categories
#' doc <- eml_arcrc_add_annotation(doc, "isAbout", c("sea ice thickness", "sea surface temperature"))
#'
eml_arcrc_add_annotation <- function(doc, property, label){

stopifnot("emld" %in% class(doc))
existing_anns <- doc$dataset$annotation

if (is.null(doc$dataset$id)){
doc$dataset$id <- gsub(":", "-", doc$packageId)
}

if (property == "isAbout") {
new_ann <- purrr::map(label, eml_arcrc_key_variable_annotation)
} else if (property == "influenced") {
new_ann <- purrr::map(label, eml_arcrc_essay_annotation)
}

doc$dataset$annotation <- c(list(existing_anns), new_ann)

return(doc)
}
5 changes: 3 additions & 2 deletions R/inventory.R
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,7 @@ inv_load_identifiers <- function(inventory, paths) {
#' @return (data.frame) An inventory.
#'
#' @noRd
#' @importFrom rlang .data
inv_add_extra_columns <- function(inventory) {
stopifnot(is(inventory, "data.frame"), "file" %in% names(inventory))

Expand Down Expand Up @@ -348,8 +349,8 @@ inv_add_extra_columns <- function(inventory) {

# Calculate statistics related to packages
cat("Adding 'package_nfiles', 'package_size_mb', and 'package_has_archives' columns.\n")
inventory <- dplyr::group_by(inventory, package)
inventory <- dplyr::mutate(inventory, package_nfiles = length(package))
inventory <- dplyr::group_by(inventory, .data$package)
inventory <- dplyr::mutate(inventory, package_nfiles = length(.data$package))

as.data.frame(inventory)
}
Expand Down
7 changes: 4 additions & 3 deletions R/marking.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#' @return (data.frame) An inventory.
#'
#' @noRd
#' @importFrom rlang .data
theme_packages <- function(inventory, nfiles_cutoff=100) {
stopifnot(is.data.frame(inventory),
"package_nfiles" %in% names(inventory))
Expand All @@ -59,9 +60,9 @@ theme_packages <- function(inventory, nfiles_cutoff=100) {
inventory$theme[grep("v_\\d\\.", inventory$file)] <- "has-versions"

# There should be no un-themed packages once we're done
theme_stats <- dplyr::group_by(inventory, theme)
theme_stats <- dplyr::filter(theme_stats, is_metadata == TRUE)
theme_stats <- dplyr::summarize(theme_stats, npkgs = length(filename))
theme_stats <- dplyr::group_by(inventory, .data$theme)
theme_stats <- dplyr::filter(theme_stats, .data$is_metadata == TRUE)
theme_stats <- dplyr::summarize(theme_stats, npkgs = length(.data$filename))

cat("Theme summary (by package):\n")

Expand Down
13 changes: 8 additions & 5 deletions R/mosaic.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#' @export
#'
#' @examples mosaic_annotate_attribute("PS122/2_14-270")
#' @importFrom rlang .data
mosaic_annotate_attribute <- function(eventLabel) {
# get the owl file from github
mosaic <- read_ontology("mosaic")
Expand All @@ -31,7 +32,7 @@ mosaic_annotate_attribute <- function(eventLabel) {
stopifnot(eventLabel %in% events$label)

event_device_iri <- events %>%
dplyr::filter(label == eventLabel)
dplyr::filter(.data$label == eventLabel)

#construct the annotation
event_annotation <- list(
Expand Down Expand Up @@ -70,6 +71,7 @@ mosaic_annotate_attribute <- function(eventLabel) {
#'
#' #multiple campaigns
#' mosaic_annotate_dataset(c("PS122/2", "PS122/1"))
#' @importFrom rlang .data
mosaic_annotate_dataset <- function(campaign) {
check_ps <-
purrr::map(campaign, ~ stringr::str_detect(.x, "PS", negate = T))
Expand All @@ -95,7 +97,7 @@ mosaic_annotate_dataset <- function(campaign) {

stopifnot(campaign %in% df_campaign$label)

campaign_iri <- dplyr::filter(df_campaign, label %in% campaign)
campaign_iri <- dplyr::filter(df_campaign, .data$label %in% campaign)

construct_campaign <- function(label, uri) {
# Campaign
Expand Down Expand Up @@ -153,14 +155,15 @@ query_class <-
#' mosaic_portal_filter("Basis")
#'
#' mosaic_portal_filter("Campaign")
#' @importFrom rlang .data
mosaic_portal_filter <- function(class) {

#find the class IRI
mosaic <- read_ontology("mosaic")

concepts <- get_ontology_concepts(mosaic)

df_uri <- dplyr::filter(concepts, label == class)
df_uri <- dplyr::filter(concepts, .data$label == class)

#build the SPARQL query
query <-
Expand All @@ -178,7 +181,7 @@ mosaic_portal_filter <- function(class) {
)

df <- suppressMessages(rdflib::rdf_query(mosaic, query)) %>%
dplyr::arrange(label)
dplyr::arrange(.data$label)

#for method/devices, filter the list based on existing annotations
if (df_uri$Concept[1] == "https://purl.dataone.org/odo/MOSAIC_00000036") {
Expand All @@ -204,7 +207,7 @@ mosaic_portal_filter <- function(class) {
relevant <- unique(unlist(result$sem_annotation))

df <- df %>%
dplyr::filter(iri %in% relevant)
dplyr::filter(.data$iri %in% relevant)

}

Expand Down
90 changes: 87 additions & 3 deletions R/ontology.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@ read_ontology <- function(ontology_name) {
ann_url <- "https://data.bioontology.org/ontologies/ADCAD/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb&download_format=rdf"
ont <- rdflib::rdf_parse(ann_url,
format = "rdfxml")

} else if (ontology_name == "ARCRC") {
ann_url <- "https://data.bioontology.org/ontologies/ARCRC/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb&download_format=rdf"
ont <- rdflib::rdf_parse(ann_url,
format = "rdfxml")
}

}
Expand Down Expand Up @@ -69,6 +72,7 @@ get_ontology_concepts <- function(ontology){
#' @export
#'
#' @examples eml_ecso_annotation("latitude coordinate")
#' @importFrom rlang .data
eml_ecso_annotation <- function(valueLabel){

ecso <- read_ontology("ecso")
Expand All @@ -87,7 +91,7 @@ eml_ecso_annotation <- function(valueLabel){

stopifnot(valueLabel %in% df$label)

annotations <- dplyr::filter(df, label == valueLabel)
annotations <- dplyr::filter(df, .data$label == valueLabel)

list(
propertyURI = list(label = "contains measurements of type",
Expand All @@ -108,6 +112,7 @@ eml_ecso_annotation <- function(valueLabel){
#' @export
#'
#' @examples eml_ecso_annotation("latitude coordinate")
#' @importFrom rlang .data
eml_adcad_annotation <- function(valueLabel){

adcad <- read_ontology("ADCAD")
Expand All @@ -126,7 +131,7 @@ eml_adcad_annotation <- function(valueLabel){

stopifnot(valueLabel %in% df$label)

annotations <- dplyr::filter(df, label == valueLabel)
annotations <- dplyr::filter(df, .data$label == valueLabel)

list(
propertyURI = list(label = "theme",
Expand All @@ -136,3 +141,82 @@ eml_adcad_annotation <- function(valueLabel){
)
}

#' Given a key variable from the Arctic Report Card (ARCRC) ontology, produce the corresponding annotation
#'
#' Reduces the amount of copy pasting needed
#'
#' @param valueLabel (character) One of the key variables found in
#' [ARCRC](https://bioportal.bioontology.org/ontologies/ARCRC/?p=classes&conceptid=http%3A%2F%2Fpurl.dataone.org%2Fodo%2FARCRC_00000040)
#'
#' @return list - a formatted EML annotation
#' @export
#'
#' @examples eml_arcrc_key_variable_annotation("age of sea ice")
#' @importFrom rlang .data
eml_arcrc_key_variable_annotation <- function(valueLabel) {

arcrc <- read_ontology("ARCRC")

query <-
"PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT ?iri ?label
WHERE {
?iri rdf:type <http://www.w3.org/2002/07/owl#Class> .
?iri rdfs:label ?label .
}"

df <- suppressMessages(rdflib::rdf_query(arcrc, query))

stopifnot(valueLabel %in% df$label)

annotations <- dplyr::filter(df, .data$label == valueLabel)

list(
propertyURI = list(label = "isAbout",
propertyURI = "http://purl.obolibrary.org/obo/IAO_0000136"),
valueURI = list(label = annotations$label,
valueURI = annotations$iri)
)
}

#' Given an essay topic from the Arctic Report Card (ARCRC) ontology, produce the corresponding annotation
#'
#' Reduces the amount of copy pasting needed
#'
#' @param valueLabel (character) One of the essay topics found in
#' [ARCRC](https://bioportal.bioontology.org/ontologies/ARCRC/?p=classes&conceptid=http%3A%2F%2Fpurl.dataone.org%2Fodo%2FARCRC_00000510)
#'
#' @return list - a formatted EML annotation
#' @export
#'
#' @examples eml_arcrc_essay_annotation("Sea Ice Indicator")
#' @importFrom rlang .data
eml_arcrc_essay_annotation <- function(valueLabel) {

arcrc <- read_ontology("ARCRC")

query <-
"PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT ?iri ?label
WHERE {
?iri rdf:type <http://www.w3.org/2002/07/owl#Class> .
?iri rdfs:label ?label .
}"

df <- suppressMessages(rdflib::rdf_query(arcrc, query))

stopifnot(valueLabel %in% df$label)

annotations <- dplyr::filter(df, .data$label == valueLabel)

list(
propertyURI = list(label = "influenced",
propertyURI = "http://www.w3.org/ns/prov#influenced"),
valueURI = list(label = annotations$label,
valueURI = annotations$iri)
)
}
2 changes: 1 addition & 1 deletion R/util.R
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ log_message <- function(message=NULL) {

#' Check if an object exists on a Member Node
#'
#' This is a simple check for the HTTP status of a /meta/{PID} call on the
#' This is a simple check for the HTTP status of a /meta/\{PID\} call on the
#' provided Member Mode.
#'
#' @param node (MNode) The Member Node to query.
Expand Down
Loading

0 comments on commit 62c4df8

Please sign in to comment.