Skip to content

Commit

Permalink
Merge pull request #176 from UCD-SERG/test-stratify-data
Browse files Browse the repository at this point in the history
Test stratify data
  • Loading branch information
chrisorwa authored Jul 17, 2024
2 parents ca99c04 + fdda4c2 commit 4e04563
Show file tree
Hide file tree
Showing 13 changed files with 359 additions and 75 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: serocalculator
Type: Package
Title: Estimating Infection Rates from Serological Data
Version: 1.2.0
Version: 1.2.0.9000
Authors@R: c(
person(given = "Peter", family = "Teunis", email = "[email protected]", role = c("aut", "cph"), comment = "Author of the method and original code."),
person(given = "Kristina", family = "Lai", email = "[email protected]", role = c("aut", "cre")),
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# serocalculator (development version)

# serocalculator 1.2.0
* Added `test-summary.pop_data` test

Expand Down
1 change: 1 addition & 0 deletions R/curve_param_names.R
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
curve_param_names = c("y1", "alpha", "r", "antigen_iso")
5 changes: 3 additions & 2 deletions R/est.incidence.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#' This function models seroincidence using maximum likelihood estimation; that is, it finds the value of the seroincidence parameter which maximizes the likelihood (i.e., joint probability) of the data.
#' @inheritParams log_likelihood
#' @inheritParams stats::nlm
#' @param pop_data [data.frame()] with cross-sectional serology data per antibody and age, and additional columns
#' @param pop_data a [data.frame] with cross-sectional serology data per antibody and age, and additional columns
#' @param lambda_start starting guess for incidence rate, in years/event.
#' @param antigen_isos Character vector with one or more antibody names. Values must match `pop_data`
#' @param build_graph whether to graph the log-likelihood function across a range of incidence rates (lambda values)
Expand All @@ -29,7 +29,8 @@
#' pop_data = xs_data %>% filter(Country == "Pakistan"),
#' curve_params = curves,
#' noise_params = noise %>% filter(Country == "Pakistan"),
#' antigen_isos = c("HlyE_IgG", "HlyE_IgA")
#' antigen_isos = c("HlyE_IgG", "HlyE_IgA"),
#' iterlim = 5 # limit iterations for the purpose of this example
#' )
#'
#' summary(est1)
Expand Down
12 changes: 9 additions & 3 deletions R/est.incidence.by.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,20 @@
#' Function to estimate seroincidences based on cross-section serology data and longitudinal
#' response model.
#'
#' @param pop_data [data.frame()] with cross-sectional serology data per antibody and age, and additional columns to identify possible `strata`.
#' @param strata [character()] vector of stratum-defining variables. Values must be variable names in `pop_data`.
#' @param pop_data a [data.frame] with cross-sectional serology data per antibody and age, and additional columns corresponding to each element of the `strata` input
#' @param strata a [character] vector of stratum-defining variables. Values must be variable names in `pop_data`.
#' @param curve_strata_varnames A subset of `strata`. Values must be variable names in `curve_params`. Default = "".
#' @param noise_strata_varnames A subset of `strata`. Values must be variable names in `noise_params`. Default = "".
#' @param num_cores Number of processor cores to use for calculations when computing by strata. If set to more than 1 and package \pkg{parallel} is available, then the computations are executed in parallel. Default = 1L.

#' @details
#'
#' If `strata` is left empty, a warning will be produced, recommending that you use `est.incidence()` for unstratified analyses, and then the data will be passed to `est.incidence()`. If for some reason you want to use `est.incidence.by()` with no strata instead of calling `est.incidence()`, you may use `NA`, `NULL`, or "" as the `strata` argument to avoid that warning.
#' If `strata` is left empty, a warning will be produced,
#' recommending that you use [est.incidence()] for unstratified analyses,
#' and then the data will be passed to [est.incidence()].
#' If for some reason you want to use [est.incidence.by()]
#' with no strata instead of calling [est.incidence()],
#' you may use `NA`, `NULL`, or `""` as the `strata` argument to avoid that warning.
#'
#'
#' @inheritParams est.incidence
Expand Down Expand Up @@ -45,6 +50,7 @@
#' noise_params = noise %>% filter(Country == "Pakistan"),
#' antigen_isos = c("HlyE_IgG", "HlyE_IgA"),
#' #num_cores = 8 # Allow for parallel processing to decrease run time
#' iterlim = 5 # limit iterations for the purpose of this example
#' )
#'
#' summary(est2)
Expand Down
1 change: 1 addition & 0 deletions R/noise_param_names.R
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
noise_param_names = c("nu", "eps", "y.low", "y.high", "antigen_iso")
153 changes: 89 additions & 64 deletions R/stratify_data.R
Original file line number Diff line number Diff line change
@@ -1,37 +1,73 @@
stratify_data <- function(
data,
antigen_isos,
curve_params,
noise_params,
strata_varnames = "",
curve_strata_varnames = NULL,
noise_strata_varnames = NULL) {
if (is.null(strata_varnames) || all(strata_varnames == "")) {
#' @title Split data by stratum
#' @description Split biomarker data, decay curve parameters, and noise parameters
#' to prepare for stratified incidence estimation.
#' @param strata_varnames [character()] vector of names of variables in `data` to stratify by
#' @inheritParams est.incidence.by
#'
#' @returns a `"biomarker_data_and_params.list"` object (a [list] with extra attributes `"strata"`, `"antigen_isos"`, etc)
#' @keywords internal
#' @examples
#' \dontrun{
#' library(dplyr)
#'
#' xs_data <- load_pop_data("https://osf.io/download//n6cp3/")
#'
#' curve <- load_curve_params("https://osf.io/download/rtw5k/") %>%
#' filter(antigen_iso %in% c("HlyE_IgA", "HlyE_IgG")) %>%
#' slice(1:100, .by = antigen_iso) # Reduce dataset for the purposes of this example
#'
#' noise <- load_noise_params("https://osf.io/download//hqy4v/")
#'
#' stratified_data =
#' stratify_data(
#' data = xs_data,
#' curve_params = curve,
#' noise_params = noise,
#' strata_varnames = "catchment",
#' curve_strata_varnames = NULL,
#' noise_strata_varnames = NULL
#' )
#' }
stratify_data <- function(data,
curve_params,
noise_params,
strata_varnames = "",
curve_strata_varnames = NULL,
noise_strata_varnames = NULL,
antigen_isos = data %>% attr("antigen_isos")) {
no_strata = is.null(strata_varnames) || all(strata_varnames == "")

curve_params =
curve_params %>%
filter(.data[["antigen_iso"]] %in% antigen_isos)

noise_params =
noise_params %>%
filter(.data[["antigen_iso"]] %in% antigen_isos)

if (no_strata) {
all_data <-
list(
pop_data = data %>% select(
attributes(data)$value_var,
attributes(data)$age_var,
"antigen_iso"
),
curve_params = curve_params %>% select("y1", "alpha", "r", "antigen_iso"),
noise_params = noise_params %>% select("nu", "eps", "y.low", "y.high", "antigen_iso")
pop_data = data %>% select(all_of(
c(
data %>% get_value_var(),
data %>% get_age_var(),
"antigen_iso"
)
)),
curve_params =
curve_params %>%
select(all_of(curve_param_names)),
noise_params =
noise_params %>%
select(all_of(noise_param_names))
) %>%
structure(
class = union(
"biomarker_data_and_params",
"list"
)
)
structure(class = union("biomarker_data_and_params", "list"))

stratumDataList <-
list( # est.incidence.by() expects a list.
`all data` = all_data
) %>%
structure(
antigen_isos = antigen_isos,
strata = tibble(Stratum = NA)
)
list(# est.incidence.by() expects a list.
`all data` = all_data) %>%
structure(antigen_isos = antigen_isos, strata = tibble(Stratum = NA))


return(stratumDataList)
Expand All @@ -44,14 +80,14 @@ stratify_data <- function(
strata_vars_curve_params <-
warn.missing.strata(
data = curve_params,
strata = strata %>% select(curve_strata_varnames),
strata = strata %>% select(all_of(curve_strata_varnames)),
dataname = "curve_params"
)

strata_vars_noise_params <-
warn.missing.strata(
data = noise_params,
strata = strata %>% select(noise_strata_varnames),
strata = strata %>% select(all_of(noise_strata_varnames)),
dataname = "noise_params"
)

Expand All @@ -71,62 +107,51 @@ stratify_data <- function(
list(
pop_data =
data %>%
semi_join(
cur_stratum_vals,
by = strata_varnames
) %>%
select(
attributes(data)$value_var,
attributes(data)$age_var,
semi_join(cur_stratum_vals, by = strata_varnames) %>%
select(all_of(
c(
data %>% get_value_var(),
data %>% get_age_var(),
"antigen_iso"
)
))
)

if (length(strata_vars_curve_params) == 0) {
data_and_params_cur_stratum$curve_params <-
curve_params %>% select("y1", "alpha", "r", "antigen_iso")
curve_params %>% select(all_of(curve_param_names))
} else {
data_and_params_cur_stratum$curve_params <-
curve_params %>%
semi_join(
cur_stratum_vals,
by = strata_vars_curve_params
) %>%
select("y1", "alpha", "r", "antigen_iso")
semi_join(cur_stratum_vals, by = strata_vars_curve_params) %>%
select(all_of(curve_param_names))
}

if (length(strata_vars_noise_params) == 0) {
data_and_params_cur_stratum$noise_params <-
noise_params %>%
select("nu", "eps", "y.low", "y.high", "antigen_iso")
select(all_of(noise_param_names))
} else {
data_and_params_cur_stratum$noise_params <-
noise_params %>%
semi_join(
cur_stratum_vals,
by = strata_vars_noise_params
) %>%
select("nu", "eps", "y.low", "y.high", "antigen_iso")
semi_join(cur_stratum_vals, by = strata_vars_noise_params) %>%
select(all_of(noise_param_names))
}

stratumDataList[[cur_stratum]] <-
data_and_params_cur_stratum %>%
structure(
class = union(
"biomarker_data_and_params",
class(data_and_params_cur_stratum)
)
)
structure(class = union(
"biomarker_data_and_params",
class(data_and_params_cur_stratum)
))
}



return(
structure(
stratumDataList,
antigen_isos = antigen_isos,
strata = strata,
class = c("biomarker_data_and_params.list", "list")
)
)
return(structure(
stratumDataList,
antigen_isos = antigen_isos,
strata = strata,
class = c("biomarker_data_and_params.list", "list")
))
}
1 change: 1 addition & 0 deletions inst/WORDLIST
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ Wiens
Zota
al
behaviour
biomarker
campylobacteriosis
de
der
Expand Down
5 changes: 3 additions & 2 deletions man/est.incidence.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 9 additions & 3 deletions man/est.incidence.by.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 4e04563

Please sign in to comment.