Task 3 (#3)

* Pkg updates with implementation of Task 1 and 2 * Update DESCRIPTION Authors added * Update sample data path and added authors * Remove R from man * gitignore updated * DESCRIPTION add httr dependence * Add functions classEndpoin correspondenceList prefixList retrieveClassificationTable retrieveCorrespondenceTable structureData * rm library * Update functions * Documentation functions * Update functions description and first version of vignettes * rm vignettes * Update documentation classification table and vignettes * Update vignettes * htlm vignettes * Update documentation * update vignette * update vignette * update vignettes * Add CSVout parameter to both retrieve function and update documentation and vignette * add import httr * remove tables * -add ShowQUERY argument -change functions names (dataStructure and Classification table) -add columns and change orders to correspondence tables -remove prefix duplicates -final docs -others * Final version (there might be error to check) * vignette corrected * quiete messages sparql query * correct typos * Update final changes * Updated vignette * add ALL as default to classificationEndpoint * change doc for classificationEndpoint * remove table * Functions task 4 * Upload drafted documentation * task 4 draft * try function classification QC --------- Co-authored-by: Martina Patone <[email protected]> Co-authored-by: Mészáros Mátyás <[email protected]>
eurostat · Dec 7, 2023 · 6ad9bd1 · 6ad9bd1
1 parent bbf5b62
commit 6ad9bd1
Show file tree

Hide file tree

Showing 17 changed files with 1,285 additions and 5 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -0,0 +1,2 @@
+^.*\.Rproj$
+^\.Rproj\.user$
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,7 @@
+.Rproj.user
+.Rhistory
+.RData
+.Ruserdata
+.gitignore
+.Rbuildignore
+*.Rproj
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -8,13 +8,15 @@ Authors@R: c(person("Vasilis", "Chasiotis", role = "aut", comment="Department of
       person("Martin", "Karlberg", role = "aut"),
       person("Mátyás", "Mészáros", email = "[email protected]", role = "cre"),
       person("Martina", "Patone", role = "aut"),
-      person("Erkand", "Muraku", role = "aut"))
+      person("Erkand", "Muraku", role = "aut"), 
+      person("Clement", "Thomas", role = "aut"), 
+      person("Loic", "Bienvenue", role = "aut"))
 Description: 
     A candidate correspondence table between two classifications can be created when there are correspondence tables leading from the first classification to the second one via intermediate 'pivot' classifications. 
     The correspondence table between two statistical classifications can be updated when one of the classifications gets updated to a new version.
 License: EUPL
 Encoding: UTF-8
-Imports: data.table, httr
+Imports: data.table, httr, tidyverse, writexl
 Suggests: 
     knitr,
     rmarkdown,

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,13 +1,20 @@
 # Generated by roxygen2: do not edit by hand
 
-export(classEndpoint)
+export(classificationEndpoint)
+export(classificationQC)
+export(correctionClassification)
 export(correspondenceList)
+export(dataStructure)
+export(lengthsFile)
 export(newCorrespondenceTable)
 export(prefixList)
 export(retrieveClassificationTable)
 export(retrieveCorrespondenceTable)
 export(structureData)
 export(updateCorrespondenceTable)
+import(httr)
+import(tidyverse)
+import(writexl)
 importFrom(data.table,fread)
 importFrom(data.table,fwrite)
 importFrom(httr,POST)

diff --git a/R/classificationEndpoint.R b/R/classificationEndpoint.R
@@ -0,0 +1,97 @@
+#' @title Retrieve a list of classification tables from CELLAR and FAO repositories or both.
+#' @description The purpose of this function is to provide a comprehensive summary 
+#' of the data structure for each classification in CELLAR and FAO endpoint. 
+#' The summary includes information such as the prefix name, URI, key, concept scheme, and title associated with each classification.
+#' @param endpoint SPARQL endpoints provide a standardized way to access data sets, 
+#' making it easier to retrieve specific information or perform complex queries on linked data. This is an optional
+#' parameter, which by default is set to \code{"ALL"}.
+#' The valid values are \code{"CELLAR"}, \code{"FAO"} and \code{"ALL"} for both endpoints. 
+#' @import httr
+#' @export
+#' @return
+#' \code{classificationEndpoint()} returns a table with information needed to retrieve the classification table:
+#' \itemize{
+#'     \item Prefix name: the  SPARQL instruction for a declaration of a namespace prefix
+#'     \item Conceptscheme: taxonomy of the SKOS object to be retrieved
+#'     \item URI: the URL from which the SPARQL query was retrieved
+#'     \item Name: the name of the table retrieved
+#' }
+#' @examples
+#' {
+#'     endpoint = "ALL"
+#'     list_data = classificationEndpoint(endpoint)
+#'     }
+
+classificationEndpoint = function(endpoint = "ALL") {
+
+  ### Datasets in CELLAR
+  endpoint_cellar = "http://publications.europa.eu/webapi/rdf/sparql"
+
+  SPARQL.query_cellar = paste0("
+  SELECT DISTINCT ?s ?Title
+  WHERE { ?s a skos:ConceptScheme ;
+          skos:prefLabel ?Title ;
+          ?p <http://publications.europa.eu/resource/authority/corporate-body/ESTAT> 
+           FILTER (LANG(?Title) = 'en')}
+  ORDER BY ?Title
+  ")
+
+  response = POST(url = endpoint_cellar, accept("text/csv"), body = list(query = SPARQL.query_cellar), encode = "form")
+  data_cellar = read.csv(text=content(response, "text"), sep= ",")  
+
+  ## add prefix name
+  str_dt = t(sapply(data_cellar[,1], function(x) unlist(strsplit(as.character(x), "/+"))))
+  uri = paste0(str_dt[,1],"/", "/", str_dt[,2],"/",str_dt[,3],"/",str_dt[,4] )
+  prefix = str_dt[,4]
+  prefix = gsub("\\.","",prefix)
+  #key = str_dt[,4]
+  conceptscheme = str_dt[,5]
+  title = data_cellar[,2]
+  data_cellar = cbind(prefix, conceptscheme, uri, title)
+  rownames(data_cellar) = 1:nrow(data_cellar)
+  colnames(data_cellar) = c("Prefix", "ConceptScheme", "URI", "Title")
+
+  ### Datasets in FAO
+  endpoint_fao = "https://stats.fao.org/caliper/sparql/AllVocs"
+  SPARQL.query_fao = paste0("
+      PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
+      SELECT DISTINCT ?classification ?label
+
+      WHERE {
+          ?classification a skos:ConceptScheme .
+          ?classification skos:prefLabel ?label .
+          FILTER(regex(?label, 'classification', 'i'))
+      }
+    ORDER BY ?label
+  ")
+
+  response = httr::POST(url = endpoint_fao, accept("text/csv"), body = list(query = SPARQL.query_fao), encode = "form")
+  data_fao = read.csv(text=content(response, "text"), sep= ",")                                 
+
+  ## add prefix name
+  str_dt = t(sapply(data_fao[,1], function(x) unlist(strsplit(as.character(x), "/+"))))
+  prefix = paste0(str_dt[,4], str_dt[,5])
+  prefix = gsub("\\.","",prefix)
+  uri = paste0(str_dt[,1], "/", "/", str_dt[,2], "/", str_dt[,3], "/", str_dt[,4], "/", str_dt[,5])
+  #class = prefix
+  ConceptScheme = str_dt[,6]
+  data_fao = cbind(prefix, ConceptScheme, uri, data_fao[,2])
+  rownames(data_fao) = 1:nrow(data_fao)
+  colnames(data_fao) = c("Prefix", "ConceptScheme", "URI", "Title")
+
+  if (endpoint == "ALL") {
+    data = list("CELLAR" = data_cellar, "FAO" = data_fao)
+  }
+
+  if (endpoint == "CELLAR") {
+    data = list("CELLAR" = data_cellar)
+  }
+
+  if (endpoint == "FAO") {
+    data = list("FAO" = data_fao)
+  }  
+
+  return(data)
+
+}
+