Skip to content

Commit

Permalink
fix: bug in prescribers
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewallenbruce committed Jul 21, 2024
1 parent a34b8f2 commit 8ecaa9c
Show file tree
Hide file tree
Showing 5 changed files with 220 additions and 127 deletions.
246 changes: 156 additions & 90 deletions R/prescribers.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,47 +93,74 @@
#' @name prescribers
NULL

#' @param year < *integer* > // **required** Year data was reported, in `YYYY`
#' format. Run [rx_years()] to return a vector of the years currently available.
#' @param type < *character* > // **required** dataset to query, `"Provider"`,
#' `"Drug"`, `"Geography"`
#' @param npi < *integer* > 10-digit national provider identifier
#' @param first,last,organization < *character* > Individual/Organizational
#' prescriber's name
#' @param credential < *character* > Individual prescriber's credentials
#' @param gender < *character* > Individual prescriber's gender; `"F"` (Female),
#' `"M"` (Male)
#' @param entype < *character* > Prescriber entity type; `"I"` (Individual),
#' `"O"` (Organization)
#' @param city < *character* > City where prescriber is located
#' @param state < *character* > State where prescriber is located
#' @param fips < *character* > Prescriber's state's FIPS code
#' @param zip < *character* > Prescriber’s zip code
#' @param ruca < *character* > Prescriber’s RUCA code
#' @param country < *character* > Country where prescriber is located
#' @param specialty < *character* > Prescriber specialty code reported on the
#' largest number of claims submitted
#' @param brand_name < *character* > Brand name (trademarked name) of the drug
#' @param year `<int>` // **required** Year data was reported, in `YYYY` format.
#' Run [rx_years()] to return a vector of the years currently available.
#'
#' @param type `<chr>` // **required** dataset to query, `"Provider"`, `"Drug"`,
#' `"Geography"`
#'
#' @param npi `<int>` 10-digit national provider identifier
#'
#' @param first,last,organization `<chr>` Individual/Organizational prescriber's
#' name
#'
#' @param credential `<chr>` Individual prescriber's credentials
#'
#' @param gender `<chr>` Individual prescriber's gender; `"F"` (Female), `"M"`
#' (Male)
#'
#' @param entype `<chr>` Prescriber entity type; `"I"` (Individual), `"O"`
#' (Organization)
#'
#' @param city `<chr>` City where prescriber is located
#'
#' @param state `<chr>` State where prescriber is located
#'
#' @param fips `<chr>` Prescriber's state's FIPS code
#'
#' @param zip `<chr>` Prescriber’s zip code
#'
#' @param ruca `<chr>` Prescriber’s RUCA code
#'
#' @param country `<chr>` Country where prescriber is located
#'
#' @param specialty `<chr>` Prescriber specialty code reported on the largest
#' number of claims submitted
#'
#' @param brand_name `<chr>` Brand name (trademarked name) of the drug
#' filled, derived by linking the National Drug Codes (NDCs) from PDEs to a
#' drug information database.
#' @param generic_name < *character* > USAN generic name of the drug filled (short
#'
#' @param generic_name `<chr>` USAN generic name of the drug filled (short
#' version); A term referring to the chemical ingredient of a drug rather than
#' the trademarked brand name under which the drug is sold, derived by linking
#' the National Drug Codes (NDCs) from PDEs to a drug information database.
#' @param level < *character* > Geographic level by which the data will be
#' aggregated:
#' + `"State"`: Data is aggregated for each state
#' + `"National"`: Data is aggregated across all states for a given HCPCS Code
#' @param opioid < *boolean* > _type = 'Geography'_, `TRUE` returns Opioid drugs
#' @param opioidLA < *boolean* > _type = 'Geography'_, `TRUE` returns Long-acting Opioids
#' @param antibiotic < *boolean* > _type = 'Geography'_, `TRUE` returns antibiotics
#' @param antipsychotic < *boolean* > _type = 'Geography'_, `TRUE` returns antipsychotics
#' @param tidy < *boolean* > // __default:__ `TRUE` Tidy output
#' @param nest < *boolean* > // __default:__ `TRUE` Nest output
#' @param na.rm < *boolean* > // __default:__ `TRUE` Remove empty rows and columns
#' @param ... For future use.
#'
#' @param level `<chr>` Geographic level by which the data will be aggregated:
#'
#' + `"State"`: Data is aggregated for each state
#' + `"National"`: Data is aggregated across all states for a given HCPCS Code
#'
#' @param opioid `<lgl>` _type = 'Geography'_, `TRUE` returns Opioid drugs
#'
#' @param opioidLA `<lgl>` _type = 'Geography'_, `TRUE` returns Long-acting Opioids
#'
#' @param antibiotic `<lgl>` _type = 'Geography'_, `TRUE` returns antibiotics
#'
#' @param antipsychotic `<lgl>` _type = 'Geography'_, `TRUE` returns antipsychotics
#'
#' @param tidy `<lgl>` // __default:__ `TRUE` Tidy output
#'
#' @param nest `<lgl>` // __default:__ `TRUE` Nest output
#'
#' @param na.rm `<lgl>` // __default:__ `TRUE` Remove empty rows and columns
#'
#' @param ... Empty dots.
#'
#' @rdname prescribers
#'
#' @autoglobal
#'
#' @export
prescribers <- function(year,
type,
Expand Down Expand Up @@ -165,18 +192,18 @@ prescribers <- function(year,

rlang::check_required(year)
year <- as.character(year)
year <- rlang::arg_match(year, as.character(rx_years()))
year <- rlang::arg_match0(year, as.character(rx_years()))

npi <- npi %nn% validate_npi(npi)
zip <- zip %nn% as.character(zip)
fips <- fips %nn% as.character(fips)
ruca <- ruca %nn% as.character(ruca)

rlang::check_required(type)
type <- rlang::arg_match(type, c('Provider', 'Drug', 'Geography'))
type <- rlang::arg_match0(type, c('Provider', 'Drug', 'Geography'))

if (type == 'Provider') {
param_npi <- 'PRSCRBR_NPI'
param_npi <- 'Prscrbr_NPI'
param_state <- 'Prscrbr_State_Abrvtn'
param_fips <- 'Prscrbr_State_FIPS'
brand_name <- NULL
Expand Down Expand Up @@ -213,7 +240,7 @@ prescribers <- function(year,
zip <- NULL
ruca <- NULL
country <- NULL
level <- level %nn% rlang::arg_match(level, c('National', 'State'))
level <- level %nn% rlang::arg_match0(level, c('National', 'State'))
if (!is.null(state) && (state %in% state.abb)) state <- abb2full(state)
opioid <- opioid %nn% tf_2_yn(opioid)
opioidLA <- opioidLA %nn% tf_2_yn(opioidLA)
Expand Down Expand Up @@ -244,17 +271,25 @@ prescribers <- function(year,
'Antbtc_Drug_Flag', antibiotic,
'Antpsyct_Drug_Flag', antipsychotic)

yr <- switch(type,
'Provider' = api_years('rxp'),
'Drug' = api_years('rxd'),
'Geography' = api_years('rxg'))
yr <- switch(
type,
'Provider' = api_years('rxp'),
'Drug' = api_years('rxd'),
'Geography' = api_years('rxg'))

id <- dplyr::filter(yr, year == {{ year }}) |> dplyr::pull(distro)
id <- dplyr::filter(
yr,
year == {{ year }}) |>
dplyr::pull(distro)

url <- paste0("https://data.cms.gov/data-api/v1/dataset/",
id, "/data.json?", encode_param(args))
url <- paste0(
"https://data.cms.gov/data-api/v1/dataset/",
id,
"/data.json?",
encode_param(args))

response <- httr2::request(url) |> httr2::req_perform()
response <- httr2::request(url) |>
httr2::req_perform()

if (vctrs::vec_is_empty(response$body)) {

Expand Down Expand Up @@ -284,21 +319,23 @@ prescribers <- function(year,
return(invisible(NULL))
}

results <- httr2::resp_body_json(response, simplifyVector = TRUE)
results <- httr2::resp_body_json(
response,
simplifyVector = TRUE)

if (!tidy) results <- df2chr(results)

if (tidy) {

results$year <- year

results <- switch(type,
'Provider' = tidyup_provider.rx(results, nest = nest),
'Drug' = tidyup_drug.rx(results, nest = nest),
'Geography' = tidyup_geography.rx(results))
results <- switch(
type,
'Provider' = tidyup_provider.rx(results, nest = nest),
'Drug' = tidyup_drug.rx(results, nest = nest),
'Geography' = tidyup_geography.rx(results))

if (na.rm) results <- narm(results)

}
return(results)
}
Expand All @@ -323,10 +360,12 @@ tidyup_geography.rx <- function(results) {
dplyr::mutate(state = fct_stname(state),
level = fct_level(level))

results <- dplyr::mutate(results,
dplyr::across(
dplyr::contains('suppress_'),
suppress_flag))
results <- dplyr::mutate(
results,
dplyr::across(
dplyr::contains('suppress_'),
suppress_flag)
)

return(results)
}
Expand All @@ -344,25 +383,32 @@ tidyup_drug.rx <- function(results, nest = TRUE) {
dbl = c('tot_fills',
'tot_cost')) |>
dplyr::mutate(level = 'Provider',
source = fct_src(source), # nolint
# source = fct_src(source), # nolint
state = fct_stabb(state),
level = fct_level(level))

results <- dplyr::mutate(results,
dplyr::across(
dplyr::contains('suppress_'),
suppress_flag))
results <- dplyr::mutate(
results,
dplyr::across(
dplyr::contains('suppress_'),
suppress_flag)
)

if (nest) {
results <- results |>
tidyr::nest(gte_65 = dplyr::any_of(c(
'tot_claims_ge65',
'tot_fills_ge65',
'tot_cost_ge65',
'tot_supply_ge65',
'tot_benes_ge65',
'suppress_ge65',
'suppress_bene_ge65')))
tidyr::nest(
gte_65 = dplyr::any_of(
c(
'tot_claims_ge65',
'tot_fills_ge65',
'tot_cost_ge65',
'tot_supply_ge65',
'tot_benes_ge65',
'suppress_ge65',
'suppress_bene_ge65'
)
)
)
}
return(results)
}
Expand Down Expand Up @@ -399,21 +445,25 @@ tidyup_provider.rx <- function(results, nest = TRUE) {
cred = 'credential',
zip = 'zip') |>
combine(address, c('prscrbr_st1', 'prscrbr_st2')) |>
dplyr::mutate(source = fct_src(source), # nolint
entity_type = fct_ent(entity_type),
dplyr::mutate(entity_type = fct_ent(entity_type),
# source = fct_src(source), # nolint
gender = fct_gen(gender),
state = fct_stabb(state)) |>
dplyr::mutate(bene_race_nonwht = tot_benes - bene_race_wht,
.after = bene_race_wht)

results <- dplyr::mutate(results,
dplyr::across(
dplyr::contains('suppress_'),
suppress_flag))
results <- dplyr::mutate(
results,
dplyr::across(
dplyr::contains('suppress_'),
suppress_flag)
)

if (nest) {
results <- results |>
tidyr::nest(detailed = dplyr::any_of(c(
tidyr::nest(
detailed = dplyr::any_of(
c(
'tot_claims_brand',
'tot_cost_brand',
'tot_claims_generic',
Expand Down Expand Up @@ -445,8 +495,13 @@ tidyup_provider.rx <- function(results, nest = TRUE) {
'suppress_mapd',
'suppress_lis',
'suppress_nlis',
'suppress_pdp'))) |>
tidyr::nest(demographics = dplyr::any_of(c(
'suppress_pdp'
)
)
) |>
tidyr::nest(
demographics = dplyr::any_of(
c(
'bene_age_avg',
'bene_age_lt65',
'bene_age_65_74',
Expand All @@ -460,16 +515,24 @@ tidyup_provider.rx <- function(results, nest = TRUE) {
'bene_race_nat',
'bene_race_oth',
'bene_dual',
'bene_ndual'))) |>
tidyr::nest(gte_65 = dplyr::any_of(c(
'bene_ndual'
)
)
) |>
tidyr::nest(
gte_65 = dplyr::any_of(
c(
'tot_claims_ge65',
'tot_fills_ge65',
'tot_cost_ge65',
'tot_supply_ge65',
'tot_benes_ge65',
'tot_claims_antipsych_ge65',
'tot_cost_antipsych_ge65',
'tot_benes_antipsych_ge65')))
'tot_benes_antipsych_ge65'
)
)
)
}
return(results)
}
Expand All @@ -483,20 +546,23 @@ tidyup_provider.rx <- function(results, nest = TRUE) {
#' @export
prescribers_ <- function(year = rx_years(),
...) {
furrr::future_map_dfr(year, prescribers, ...,
.options = furrr::furrr_options(seed = NULL))

furrr::future_map_dfr(
year,
prescribers,
...,
.options = furrr::furrr_options(seed = NULL))
}

#' Convert specialty source to unordered labelled factor
#' @param x vector
#' @autoglobal
#' @noRd
fct_src <- function(x) {
factor(x,
levels = c("S", "T"),
labels = c("Medicare Specialty Code",
"Taxonomy Code Classification"))
factor(
x,
levels = c("S", "T"),
labels = c("Medicare Specialty Code",
"Taxonomy Code Classification"))
}

#' @param df data frame
Expand All @@ -507,15 +573,15 @@ cols_rx <- function(df, type) {

if (type == 'Provider') {
cols <- c('year',
'npi' = 'PRSCRBR_NPI',
'npi' = 'Prscrbr_NPI',
'entity_type' = 'Prscrbr_Ent_Cd',
'first' = 'Prscrbr_First_Name',
'middle' = 'Prscrbr_MI',
'last' = 'Prscrbr_Last_Org_Name',
'gender' = 'Prscrbr_Gndr',
'credential' = 'Prscrbr_Crdntls',
'specialty' = 'Prscrbr_Type',
'source' = 'Prscrbr_Type_src',
'source' = 'Prscrbr_Type_Src',
'Prscrbr_St1',
'Prscrbr_St2',
'city' = 'Prscrbr_City',
Expand Down
Loading

0 comments on commit 8ecaa9c

Please sign in to comment.