Skip to content

Commit

Permalink
metadata refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewallenbruce committed Dec 4, 2023
1 parent 0261f25 commit c3727a4
Show file tree
Hide file tree
Showing 5 changed files with 172 additions and 71 deletions.
2 changes: 2 additions & 0 deletions R/globals.R
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ utils::globalVariables(c(
"landingPage", # <metadata.json>
"modified", # <metadata.json>
"references", # <metadata.json>
"accrualPeriodicity", # <metadata.json>
"temporal", # <metadata.json>
"distribution_title", # <metadata.json>
"distribution_modified", # <metadata.json>
"distribution_accessURL", # <metadata.json>
Expand Down
209 changes: 140 additions & 69 deletions R/metadata.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
#' @param title name of the api
#' @return A [tibble()] containing the updated ids.
#' @return A list of metadata describing each API's dataset
#' @examplesIf interactive()
#' metadata.store("Facility Affiliation Data")
#' metadata.store("National Downloadable File")
#' @autoglobal
#' @noRd
metadata.store <- function(title) {

#------------------------------------------------
url.store <- paste0('https://data.cms.gov/',
'provider-data/api/1/metastore/',
Expand Down Expand Up @@ -50,23 +49,23 @@ metadata.store <- function(title) {
simplifyVector = TRUE)

schema <- dplyr::tibble(
title = response$title,
description = response$description,
uuid = response$identifier,
identifier = response$keyword$identifier,
distribution = response$distribution$identifier,
landing_page = response$landingPage,
publisher = response$publisher$data$name,
contact = response$contactPoint$hasEmail,
date_issued = response$issued,
date_modified = response$modified,
title = response$title,
description = response$description,
uuid = response$identifier,
identifier = response$keyword$identifier,
distribution = response$distribution$identifier,
landing_page = response$landingPage,
publisher = response$publisher$data$name,
contact = response$contactPoint$hasEmail,
date_issued = response$issued,
date_modified = response$modified,
datetime_modified = response$`%modified`,
date_released = response$released)
date_released = response$released)

#------------------------------------------------
url.query <- glue::glue('https://data.cms.gov/',
'provider-data/api/1/datastore/query/',
'{schema$distribution}',
'{schema$distribution[1]}',
'?limit=1&offset=0&count=true&results=true',
'&schema=true&keys=true&format=json&rowIds=true')

Expand All @@ -81,56 +80,23 @@ metadata.store <- function(title) {
names = list(names(response$results)))

#------------------------------------------------
return(list(store = store, schema = schema, query = query))
}

#' @param uuid distribution id of the api
#' @return A numeric vector containing the total rows in the dataset.
#' @examplesIf interactive()
#' metadata.rows('2457ea29-fc82-48b0-86ec-3b0755de7515') # providers()
#' metadata.rows('a85fa452-dee9-4c8f-8156-665238b8492f') # hospitals()
#' @autoglobal
#' @noRd
metadata.rows <- function(uuid) {

url <- glue::glue('https://data.cms.gov/',
'data-api/v1/dataset/',
'{uuid}/data-viewer/stats')

response <- httr2::request(url) |>
httr2::req_perform() |>
httr2::resp_body_json(check_type = FALSE,
simplifyVector = TRUE)

return(as.integer(response$data$total_rows))
}

#' @param uuid distribution id of the api
#' @return A list containing the total columns and rows in the dataset,
#' as well as the column names.
#' @examplesIf interactive()
#' metadata.viewer('2457ea29-fc82-48b0-86ec-3b0755de7515') # providers()
#' metadata.viewer('a85fa452-dee9-4c8f-8156-665238b8492f') # hospitals()
#' @autoglobal
#' @noRd
metadata.viewer <- function(uuid) {

url <- glue::glue('https://data.cms.gov/',
'data-api/v1/dataset/',
'{uuid}/data-viewer?offset=0&size=1')

response <- httr2::request(url) |>
httr2::req_perform() |>
httr2::resp_body_json(check_type = FALSE,
simplifyVector = TRUE)

rows <- response$meta$total_rows
cols <- response$meta$headers

list(
dimensions = paste0(length(cols), ' columns x ', format(rows, big.mark = ","), ' rows'),
fields = cols
results <- list(
title = store$title,
description = schema$description[[1]],
publisher = store$name,
uuid = store$identifier,
distribution = schema$distribution[[1]],
date_issued = store$issued,
date_modified = store$modified,
date_released = store$released,
period = store$period,
timelength_days = store$timelength_days,
dimensions = paste0(query$columns, ' columns x ', format(query$rows, big.mark = ","), ' rows'),
fields = query$names[[1]],
landing_page = store$landingPage,
data_dictionary = "https://data.cms.gov/provider-data/sites/default/files/data_dictionaries/physician/DOC_Data_Dictionary.pdf"
)
return(results)
}

#' @param title name of the api
Expand All @@ -155,7 +121,9 @@ metadata.json <- function(title, first = TRUE) {
distribution,
landingPage,
modified,
references) |>
references,
accrualPeriodicity,
temporal) |>
dplyr::filter(title == {{ title }}) |>
tidyr::unnest(references)

Expand All @@ -176,20 +144,123 @@ metadata.json <- function(title, first = TRUE) {

results <- dplyr::left_join(resp, dst, by = dplyr::join_by(title)) |>
dplyr::select(-title) |>
dplyr::select(title = distribution_title,
dplyr::select(title = distribution_title,
description,
dictionary = describedBy,
methodology = references,
dictionary = describedBy,
methodology = references,
landing_page = landingPage,
distribution,
modified = distribution_modified) |>
dplyr::mutate(modified = lubridate::ymd(modified)) |>
modified = distribution_modified,
accrualPeriodicity) |>
dplyr::mutate(modified = lubridate::ymd(modified)) |>
provider::make_interval(start = modified) |>
tidyr::separate_wider_delim(title, delim = " : ", names = c("title", NA))

results$interval <- NULL

if (first) results <- dplyr::slice_head(results)

url <- glue::glue('https://data.cms.gov/',
'data-api/v1/dataset/',
'{results$distribution}',
'/data-viewer?offset=0&size=1')

response <- httr2::request(url) |>
httr2::req_perform() |>
httr2::resp_body_json(check_type = FALSE,
simplifyVector = TRUE)

rows <- response$meta$total_rows
cols <- response$meta$headers

iso_8601 <- function(x) {
dplyr::case_match(
x,
"R/P10Y" ~ "Decennial",
"R/P4Y" ~ "Quadrennial",
"R/P1Y" ~ "Annual",
c("R/P2M", "R/P0.5M") ~ "Bimonthly",
"R/P3.5D" ~ "Semiweekly",
"R/P1D" ~ "Daily",
c("R/P2W", "R/P0.5W") ~ "Biweekly",
"R/P6M" ~ "Semiannual",
"R/P2Y" ~ "Biennial",
"R/P3Y" ~ "Triennial",
"R/P0.33W" ~ "Three Times a Week",
"R/P0.33M" ~ "Three Times a Month",
"R/PT1S" ~ "Continuously Updated",
"R/P1M" ~ "Monthly",
"R/P3M" ~ "Quarterly",
"R/P0.5M" ~ "Semimonthly",
"R/P4M" ~ "Three Times a Year",
"R/P1W" ~ "Weekly",
"R/PT1H" ~ "Hourly")
}

results <- list(
title = results$title,
description = results$description,
publisher = " ",
distribution = results$distribution,
update_schedule = iso_8601(results$accrualPeriodicity),
date_modified = results$modified,
period = results$period,
timelength_days = results$timelength_days,
dimensions = paste0(length(cols), ' columns x ', format(rows, big.mark = ","), ' rows'),
fields = cols,
landing_page = results$landing_page,
data_dictionary = results$dictionary,
methodology = results$methodology)

return(results)
}

#' @param uuid distribution id of the api
#' @return A numeric vector containing the total rows in the dataset.
#' @examplesIf interactive()
#' metadata.rows('2457ea29-fc82-48b0-86ec-3b0755de7515') # providers()
#' metadata.rows('a85fa452-dee9-4c8f-8156-665238b8492f') # hospitals()
#' @autoglobal
#' @noRd
metadata.rows <- function(uuid) {

url <- glue::glue('https://data.cms.gov/',
'data-api/v1/dataset/',
'{uuid}/data-viewer/stats')

response <- httr2::request(url) |>
httr2::req_perform() |>
httr2::resp_body_json(check_type = FALSE,
simplifyVector = TRUE)

return(as.integer(response$data$total_rows))
}

#' @param uuid distribution id of the api
#' @return A list containing the total columns and rows in the dataset,
#' as well as the column names.
#' @examplesIf interactive()
#' metadata.viewer('2457ea29-fc82-48b0-86ec-3b0755de7515') # providers()
#' metadata.viewer('a85fa452-dee9-4c8f-8156-665238b8492f') # hospitals()
#' @autoglobal
#' @noRd

metadata.viewer <- function(uuid) {

url <- glue::glue('https://data.cms.gov/',
'data-api/v1/dataset/',
'{uuid}/data-viewer?offset=0&size=1')

response <- httr2::request(url) |>
httr2::req_perform() |>
httr2::resp_body_json(check_type = FALSE,
simplifyVector = TRUE)

rows <- response$meta$total_rows
cols <- response$meta$headers

list(
dimensions = paste0(length(cols), ' columns x ', format(rows, big.mark = ","), ' rows'),
fields = cols
)
}
3 changes: 2 additions & 1 deletion R/prescribers.R
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,8 @@ prescribers_ <- function(year = rx_years(),
fct_src <- function(x) {
factor(x,
levels = c("S", "T"),
labels = c("Medicare Specialty Code", "Taxonomy Code Classification"))
labels = c("Medicare Specialty Code",
"Taxonomy Code Classification"))
}

#' @param df data frame
Expand Down
11 changes: 11 additions & 0 deletions man/betos.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 17 additions & 1 deletion vignettes/articles/linking-providers.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ options(scipen = 999)

```{r message=FALSE, warning=FALSE}
library(provider)
library(tibble)
library(vctrs)
library(dplyr)
library(purrr)
Expand All @@ -36,6 +35,23 @@ library(gt)

## Individual Provider

```{r}
library(chainr)
mark <- chain(
providers = providers(pac = 7810891009),
reassignment = reassignments(pac = 7810891009),
clinicians = clinicians(pac = 7810891009),
nppes = nppes(npi = 1043245657),
referrals = order_refer(npi = 1043245657),
affiliations = affiliations(pac = 7810891009),
hospitals = affiliations(pac = 7810891009) |> pull(facility_ccn) |> map_dfr(~hospitals(facility_ccn = .x)),
utilization = utilization_(npi = 1043245657, type = "Provider"))
mark
```


```{r}
vctrs::vec_rbind(
display_long(providers(pac = 7810891009)) |> tibble::add_column(source = "`providers()`"),
Expand Down

0 comments on commit c3727a4

Please sign in to comment.