diff --git a/DESCRIPTION b/DESCRIPTION
index a1b4a88e..e7072383 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: serocalculator
 Type: Package
 Title: Estimating Infection Rates from Serological Data
-Version: 1.2.0
+Version: 1.2.0.9000
 Authors@R: c(
   person(given = "Peter", family = "Teunis", email = "p.teunis@emory.edu", role = c("aut", "cph"), comment = "Author of the method and original code."),
   person(given = "Kristina", family = "Lai", email = "kwlai@ucdavis.edu", role = c("aut", "cre")), 
diff --git a/NEWS.md b/NEWS.md
index 61d31397..41549599 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,5 @@
+# serocalculator (development version)
+
 # serocalculator 1.2.0
 * Added `test-summary.pop_data` test
 
diff --git a/R/curve_param_names.R b/R/curve_param_names.R
new file mode 100644
index 00000000..dcb1034f
--- /dev/null
+++ b/R/curve_param_names.R
@@ -0,0 +1 @@
+curve_param_names = c("y1", "alpha", "r", "antigen_iso")
diff --git a/R/est.incidence.R b/R/est.incidence.R
index 3ebeec1f..edcb834a 100644
--- a/R/est.incidence.R
+++ b/R/est.incidence.R
@@ -3,7 +3,7 @@
 #' This function models seroincidence using maximum likelihood estimation; that is, it finds the value of the seroincidence parameter which maximizes the likelihood (i.e., joint probability) of the data.
 #' @inheritParams log_likelihood
 #' @inheritParams stats::nlm
-#' @param pop_data [data.frame()] with cross-sectional serology data per antibody and age, and additional columns
+#' @param pop_data a [data.frame] with cross-sectional serology data per antibody and age, and additional columns
 #' @param lambda_start starting guess for incidence rate, in years/event.
 #' @param antigen_isos Character vector with one or more antibody names. Values must match `pop_data`
 #' @param build_graph whether to graph the log-likelihood function across a range of incidence rates (lambda values)
@@ -29,7 +29,8 @@
 #'   pop_data = xs_data %>% filter(Country == "Pakistan"),
 #'   curve_params = curves,
 #'   noise_params = noise %>% filter(Country == "Pakistan"),
-#'   antigen_isos = c("HlyE_IgG", "HlyE_IgA")
+#'   antigen_isos = c("HlyE_IgG", "HlyE_IgA"),
+#'   iterlim = 5 # limit iterations for the purpose of this example
 #' )
 #'
 #' summary(est1)
diff --git a/R/est.incidence.by.R b/R/est.incidence.by.R
index 216021d4..6395f5b3 100644
--- a/R/est.incidence.by.R
+++ b/R/est.incidence.by.R
@@ -4,15 +4,20 @@
 #' Function to estimate seroincidences based on cross-section serology data and longitudinal
 #' response model.
 #'
-#' @param pop_data [data.frame()] with cross-sectional serology data per antibody and age, and additional columns to identify possible `strata`.
-#' @param strata [character()] vector of stratum-defining variables. Values must be variable names in `pop_data`.
+#' @param pop_data a [data.frame] with cross-sectional serology data per antibody and age, and additional columns corresponding to each element of the `strata` input
+#' @param strata a [character] vector of stratum-defining variables. Values must be variable names in `pop_data`.
 #' @param curve_strata_varnames A subset of `strata`. Values must be variable names in `curve_params`. Default = "".
 #' @param noise_strata_varnames A subset of `strata`. Values must be variable names in `noise_params`. Default = "".
 #' @param num_cores Number of processor cores to use for calculations when computing by strata. If set to more than 1 and package \pkg{parallel} is available, then the computations are executed in parallel. Default = 1L.
 
 #' @details
 #'
-#' If `strata` is left empty, a warning will be produced, recommending that you use `est.incidence()` for unstratified analyses, and then the data will be passed to `est.incidence()`. If for some reason you want to use `est.incidence.by()` with no strata instead of calling `est.incidence()`, you may use `NA`, `NULL`, or "" as the `strata` argument to avoid that warning.
+#' If `strata` is left empty, a warning will be produced,
+#' recommending that you use [est.incidence()] for unstratified analyses,
+#' and then the data will be passed to [est.incidence()].
+#' If for some reason you want to use [est.incidence.by()]
+#' with no strata instead of calling [est.incidence()],
+#' you may use `NA`, `NULL`, or `""` as the `strata` argument to avoid that warning.
 #'
 #'
 #' @inheritParams est.incidence
@@ -45,6 +50,7 @@
 #'   noise_params = noise %>% filter(Country == "Pakistan"),
 #'   antigen_isos = c("HlyE_IgG", "HlyE_IgA"),
 #'   #num_cores = 8 # Allow for parallel processing to decrease run time
+#'   iterlim = 5 # limit iterations for the purpose of this example
 #' )
 #'
 #' summary(est2)
diff --git a/R/noise_param_names.R b/R/noise_param_names.R
new file mode 100644
index 00000000..4bc5503d
--- /dev/null
+++ b/R/noise_param_names.R
@@ -0,0 +1 @@
+noise_param_names = c("nu", "eps", "y.low", "y.high", "antigen_iso")
diff --git a/R/stratify_data.R b/R/stratify_data.R
index 6865bbc8..3681d794 100644
--- a/R/stratify_data.R
+++ b/R/stratify_data.R
@@ -1,37 +1,73 @@
-stratify_data <- function(
-    data,
-    antigen_isos,
-    curve_params,
-    noise_params,
-    strata_varnames = "",
-    curve_strata_varnames = NULL,
-    noise_strata_varnames = NULL) {
-  if (is.null(strata_varnames) || all(strata_varnames == "")) {
+#' @title Split data by stratum
+#' @description Split biomarker data, decay curve parameters, and noise parameters
+#' to prepare for stratified incidence estimation.
+#' @param strata_varnames [character()] vector of names of variables in `data` to stratify by
+#' @inheritParams est.incidence.by
+#'
+#' @returns a `"biomarker_data_and_params.list"` object (a [list] with extra attributes `"strata"`, `"antigen_isos"`, etc)
+#' @keywords internal
+#' @examples
+#' \dontrun{
+#' library(dplyr)
+#'
+#' xs_data <- load_pop_data("https://osf.io/download//n6cp3/")
+#'
+#' curve <- load_curve_params("https://osf.io/download/rtw5k/") %>%
+#'   filter(antigen_iso %in% c("HlyE_IgA", "HlyE_IgG")) %>%
+#'   slice(1:100, .by = antigen_iso) # Reduce dataset for the purposes of this example
+#'
+#' noise <- load_noise_params("https://osf.io/download//hqy4v/")
+#'
+#' stratified_data =
+#'   stratify_data(
+#'    data = xs_data,
+#'    curve_params = curve,
+#'    noise_params = noise,
+#'    strata_varnames = "catchment",
+#'    curve_strata_varnames = NULL,
+#'    noise_strata_varnames = NULL
+#'    )
+#' }
+stratify_data <- function(data,
+                          curve_params,
+                          noise_params,
+                          strata_varnames = "",
+                          curve_strata_varnames = NULL,
+                          noise_strata_varnames = NULL,
+                          antigen_isos = data %>% attr("antigen_isos")) {
+  no_strata = is.null(strata_varnames) || all(strata_varnames == "")
+
+  curve_params =
+    curve_params %>%
+    filter(.data[["antigen_iso"]] %in% antigen_isos)
+
+  noise_params =
+    noise_params %>%
+    filter(.data[["antigen_iso"]] %in% antigen_isos)
+
+  if (no_strata) {
     all_data <-
       list(
-        pop_data = data %>% select(
-          attributes(data)$value_var,
-          attributes(data)$age_var,
-          "antigen_iso"
-        ),
-        curve_params = curve_params %>% select("y1", "alpha", "r", "antigen_iso"),
-        noise_params = noise_params %>% select("nu", "eps", "y.low", "y.high", "antigen_iso")
+        pop_data = data %>% select(all_of(
+          c(
+            data %>% get_value_var(),
+            data %>% get_age_var(),
+            "antigen_iso"
+          )
+        )),
+        curve_params =
+          curve_params %>%
+          select(all_of(curve_param_names)),
+        noise_params =
+          noise_params %>%
+          select(all_of(noise_param_names))
       ) %>%
-      structure(
-        class = union(
-          "biomarker_data_and_params",
-          "list"
-        )
-      )
+      structure(class = union("biomarker_data_and_params", "list"))
 
     stratumDataList <-
-      list( # est.incidence.by() expects a list.
-        `all data` = all_data
-      ) %>%
-      structure(
-        antigen_isos = antigen_isos,
-        strata = tibble(Stratum = NA)
-      )
+      list(# est.incidence.by() expects a list.
+        `all data` = all_data) %>%
+      structure(antigen_isos = antigen_isos, strata = tibble(Stratum = NA))
 
 
     return(stratumDataList)
@@ -44,14 +80,14 @@ stratify_data <- function(
   strata_vars_curve_params <-
     warn.missing.strata(
       data = curve_params,
-      strata = strata %>% select(curve_strata_varnames),
+      strata = strata %>% select(all_of(curve_strata_varnames)),
       dataname = "curve_params"
     )
 
   strata_vars_noise_params <-
     warn.missing.strata(
       data = noise_params,
-      strata = strata %>% select(noise_strata_varnames),
+      strata = strata %>% select(all_of(noise_strata_varnames)),
       dataname = "noise_params"
     )
 
@@ -71,62 +107,51 @@ stratify_data <- function(
       list(
         pop_data =
           data %>%
-            semi_join(
-              cur_stratum_vals,
-              by = strata_varnames
-            ) %>%
-            select(
-              attributes(data)$value_var,
-              attributes(data)$age_var,
+          semi_join(cur_stratum_vals, by = strata_varnames) %>%
+          select(all_of(
+            c(
+              data %>% get_value_var(),
+              data %>% get_age_var(),
               "antigen_iso"
             )
+          ))
       )
 
     if (length(strata_vars_curve_params) == 0) {
       data_and_params_cur_stratum$curve_params <-
-        curve_params %>% select("y1", "alpha", "r", "antigen_iso")
+        curve_params %>% select(all_of(curve_param_names))
     } else {
       data_and_params_cur_stratum$curve_params <-
         curve_params %>%
-        semi_join(
-          cur_stratum_vals,
-          by = strata_vars_curve_params
-        ) %>%
-        select("y1", "alpha", "r", "antigen_iso")
+        semi_join(cur_stratum_vals, by = strata_vars_curve_params) %>%
+        select(all_of(curve_param_names))
     }
 
     if (length(strata_vars_noise_params) == 0) {
       data_and_params_cur_stratum$noise_params <-
         noise_params %>%
-        select("nu", "eps", "y.low", "y.high", "antigen_iso")
+        select(all_of(noise_param_names))
     } else {
       data_and_params_cur_stratum$noise_params <-
         noise_params %>%
-        semi_join(
-          cur_stratum_vals,
-          by = strata_vars_noise_params
-        ) %>%
-        select("nu", "eps", "y.low", "y.high", "antigen_iso")
+        semi_join(cur_stratum_vals, by = strata_vars_noise_params) %>%
+        select(all_of(noise_param_names))
     }
 
     stratumDataList[[cur_stratum]] <-
       data_and_params_cur_stratum %>%
-      structure(
-        class = union(
-          "biomarker_data_and_params",
-          class(data_and_params_cur_stratum)
-        )
-      )
+      structure(class = union(
+        "biomarker_data_and_params",
+        class(data_and_params_cur_stratum)
+      ))
   }
 
 
 
-  return(
-    structure(
-      stratumDataList,
-      antigen_isos = antigen_isos,
-      strata = strata,
-      class = c("biomarker_data_and_params.list", "list")
-    )
-  )
+  return(structure(
+    stratumDataList,
+    antigen_isos = antigen_isos,
+    strata = strata,
+    class = c("biomarker_data_and_params.list", "list")
+  ))
 }
diff --git a/inst/WORDLIST b/inst/WORDLIST
index 11befee7..aee74866 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -86,6 +86,7 @@ Wiens
 Zota
 al
 behaviour
+biomarker
 campylobacteriosis
 de
 der
diff --git a/man/est.incidence.Rd b/man/est.incidence.Rd
index a8859c19..58b19797 100644
--- a/man/est.incidence.Rd
+++ b/man/est.incidence.Rd
@@ -19,7 +19,7 @@ est.incidence(
 )
 }
 \arguments{
-\item{pop_data}{\code{\link[=data.frame]{data.frame()}} with cross-sectional serology data per antibody and age, and additional columns}
+\item{pop_data}{a \link{data.frame} with cross-sectional serology data per antibody and age, and additional columns}
 
 \item{curve_params}{a \code{\link[=data.frame]{data.frame()}} containing MCMC samples of parameters from the Bayesian posterior distribution of a longitudinal decay curve model. The parameter columns must be named:
 \itemize{
@@ -104,7 +104,8 @@ est1 <- est.incidence(
   pop_data = xs_data \%>\% filter(Country == "Pakistan"),
   curve_params = curves,
   noise_params = noise \%>\% filter(Country == "Pakistan"),
-  antigen_isos = c("HlyE_IgG", "HlyE_IgA")
+  antigen_isos = c("HlyE_IgG", "HlyE_IgA"),
+  iterlim = 5 # limit iterations for the purpose of this example
 )
 
 summary(est1)
diff --git a/man/est.incidence.by.Rd b/man/est.incidence.by.Rd
index 89f93d47..123a4b16 100644
--- a/man/est.incidence.by.Rd
+++ b/man/est.incidence.by.Rd
@@ -21,7 +21,7 @@ est.incidence.by(
 )
 }
 \arguments{
-\item{pop_data}{\code{\link[=data.frame]{data.frame()}} with cross-sectional serology data per antibody and age, and additional columns to identify possible \code{strata}.}
+\item{pop_data}{a \link{data.frame} with cross-sectional serology data per antibody and age, and additional columns corresponding to each element of the \code{strata} input}
 
 \item{curve_params}{a \code{\link[=data.frame]{data.frame()}} containing MCMC samples of parameters from the Bayesian posterior distribution of a longitudinal decay curve model. The parameter columns must be named:
 \itemize{
@@ -43,7 +43,7 @@ est.incidence.by(
 \item \code{y.high}: upper limit of detection for the current antigen isotype
 }}
 
-\item{strata}{\code{\link[=character]{character()}} vector of stratum-defining variables. Values must be variable names in \code{pop_data}.}
+\item{strata}{a \link{character} vector of stratum-defining variables. Values must be variable names in \code{pop_data}.}
 
 \item{curve_strata_varnames}{A subset of \code{strata}. Values must be variable names in \code{curve_params}. Default = "".}
 
@@ -103,7 +103,12 @@ Function to estimate seroincidences based on cross-section serology data and lon
 response model.
 }
 \details{
-If \code{strata} is left empty, a warning will be produced, recommending that you use \code{est.incidence()} for unstratified analyses, and then the data will be passed to \code{est.incidence()}. If for some reason you want to use \code{est.incidence.by()} with no strata instead of calling \code{est.incidence()}, you may use \code{NA}, \code{NULL}, or "" as the \code{strata} argument to avoid that warning.
+If \code{strata} is left empty, a warning will be produced,
+recommending that you use \code{\link[=est.incidence]{est.incidence()}} for unstratified analyses,
+and then the data will be passed to \code{\link[=est.incidence]{est.incidence()}}.
+If for some reason you want to use \code{\link[=est.incidence.by]{est.incidence.by()}}
+with no strata instead of calling \code{\link[=est.incidence]{est.incidence()}},
+you may use \code{NA}, \code{NULL}, or \code{""} as the \code{strata} argument to avoid that warning.
 }
 \examples{
 
@@ -124,6 +129,7 @@ est2 <- est.incidence.by(
   noise_params = noise \%>\% filter(Country == "Pakistan"),
   antigen_isos = c("HlyE_IgG", "HlyE_IgA"),
   #num_cores = 8 # Allow for parallel processing to decrease run time
+  iterlim = 5 # limit iterations for the purpose of this example
 )
 
 summary(est2)
diff --git a/man/stratify_data.Rd b/man/stratify_data.Rd
new file mode 100644
index 00000000..7dacbcbb
--- /dev/null
+++ b/man/stratify_data.Rd
@@ -0,0 +1,76 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/stratify_data.R
+\name{stratify_data}
+\alias{stratify_data}
+\title{Split data by stratum}
+\usage{
+stratify_data(
+  data,
+  curve_params,
+  noise_params,
+  strata_varnames = "",
+  curve_strata_varnames = NULL,
+  noise_strata_varnames = NULL,
+  antigen_isos = data \%>\% attr("antigen_isos")
+)
+}
+\arguments{
+\item{curve_params}{a \code{\link[=data.frame]{data.frame()}} containing MCMC samples of parameters from the Bayesian posterior distribution of a longitudinal decay curve model. The parameter columns must be named:
+\itemize{
+\item \code{antigen_iso}: a \code{\link[=character]{character()}} vector indicating antigen-isotype combinations
+\item \code{iter}: an \code{\link[=integer]{integer()}} vector indicating MCMC sampling iterations
+\item \code{y0}: baseline antibody level at $t=0$ ($y(t=0)$)
+\item \code{y1}: antibody peak level (ELISA units)
+\item \code{t1}: duration of infection
+\item \code{alpha}: antibody decay rate (1/days for the current longitudinal parameter sets)
+\item \code{r}: shape factor of antibody decay
+}}
+
+\item{noise_params}{a \code{\link[=data.frame]{data.frame()}} (or \code{\link[tibble:tibble]{tibble::tibble()}}) containing the following variables, specifying noise parameters for each antigen isotype:
+\itemize{
+\item \code{antigen_iso}: antigen isotype whose noise parameters are being specified on each row
+\item \code{nu}: biological noise
+\item \code{eps}: measurement noise
+\item \code{y.low}: lower limit of detection for the current antigen isotype
+\item \code{y.high}: upper limit of detection for the current antigen isotype
+}}
+
+\item{strata_varnames}{\code{\link[=character]{character()}} vector of names of variables in \code{data} to stratify by}
+
+\item{curve_strata_varnames}{A subset of \code{strata}. Values must be variable names in \code{curve_params}. Default = "".}
+
+\item{noise_strata_varnames}{A subset of \code{strata}. Values must be variable names in \code{noise_params}. Default = "".}
+
+\item{antigen_isos}{Character vector with one or more antibody names. Values must match \code{pop_data}}
+}
+\value{
+a \code{"biomarker_data_and_params.list"} object (a \link{list} with extra attributes \code{"strata"}, \code{"antigen_isos"}, etc)
+}
+\description{
+Split biomarker data, decay curve parameters, and noise parameters
+to prepare for stratified incidence estimation.
+}
+\examples{
+\dontrun{
+library(dplyr)
+
+xs_data <- load_pop_data("https://osf.io/download//n6cp3/")
+
+curve <- load_curve_params("https://osf.io/download/rtw5k/") \%>\%
+  filter(antigen_iso \%in\% c("HlyE_IgA", "HlyE_IgG")) \%>\%
+  slice(1:100, .by = antigen_iso) # Reduce dataset for the purposes of this example
+
+noise <- load_noise_params("https://osf.io/download//hqy4v/")
+
+stratified_data =
+  stratify_data(
+   data = xs_data,
+   curve_params = curve,
+   noise_params = noise,
+   strata_varnames = "catchment",
+   curve_strata_varnames = NULL,
+   noise_strata_varnames = NULL
+   )
+}
+}
+\keyword{internal}
diff --git a/tests/testthat/_snaps/stratify_data.md b/tests/testthat/_snaps/stratify_data.md
new file mode 100644
index 00000000..82e6d68b
--- /dev/null
+++ b/tests/testthat/_snaps/stratify_data.md
@@ -0,0 +1,103 @@
+# stratify_data() produces consistent results
+
+    Code
+      stratified_data
+    Output
+      $`Stratum 1`
+      $pop_data
+      # A tibble: 588 x 3
+         value   age antigen_iso
+         <dbl> <dbl> <fct>      
+       1 5.69   18   HlyE_IgA   
+       2 4.21   18   HlyE_IgG   
+       3 1.23    7.3 HlyE_IgA   
+       4 3.00    7.3 HlyE_IgG   
+       5 1.08    2.6 HlyE_IgA   
+       6 0.217   2.6 HlyE_IgG   
+       7 1.43    3.9 HlyE_IgA   
+       8 0.956   3.9 HlyE_IgG   
+       9 3.06   13   HlyE_IgA   
+      10 9.57   13   HlyE_IgG   
+      # i 578 more rows
+      
+      $curve_params
+      # A tibble: 200 x 4
+            y1     alpha     r antigen_iso
+         <dbl>     <dbl> <dbl> <fct>      
+       1  63.5 0.000581   1.75 HlyE_IgA   
+       2 288.  0.000459   2.66 HlyE_IgA   
+       3 432.  0.000277   1.61 HlyE_IgA   
+       4  30.6 0.00127    1.87 HlyE_IgA   
+       5 160.  0.00140    1.40 HlyE_IgA   
+       6 525.  0.000294   2.26 HlyE_IgA   
+       7  30.8 0.00459    1.55 HlyE_IgA   
+       8  41.3 0.00234    1.48 HlyE_IgA   
+       9 248.  0.0000467  8.50 HlyE_IgA   
+      10 319.  0.000448   1.75 HlyE_IgA   
+      # i 190 more rows
+      
+      $noise_params
+      # A tibble: 2 x 5
+           nu   eps y.low  y.high antigen_iso
+        <dbl> <dbl> <dbl>   <dbl> <chr>      
+      1  2.60 0.279 0.508 5000000 HlyE_IgA   
+      2  2.36 0.146 1.59  5000000 HlyE_IgG   
+      
+      attr(,"class")
+      [1] "biomarker_data_and_params" "list"                     
+      
+      $`Stratum 2`
+      $pop_data
+      # A tibble: 400 x 3
+          value   age antigen_iso
+          <dbl> <dbl> <fct>      
+       1  0.568  13.2 HlyE_IgA   
+       2  2.15   13.2 HlyE_IgG   
+       3  0.779  11   HlyE_IgA   
+       4  1.89   11   HlyE_IgG   
+       5  1.90   12   HlyE_IgA   
+       6  8.09   12   HlyE_IgG   
+       7  1.41   16   HlyE_IgA   
+       8  2.37   16   HlyE_IgG   
+       9  7.12    7.6 HlyE_IgA   
+      10 11.6     7.6 HlyE_IgG   
+      # i 390 more rows
+      
+      $curve_params
+      # A tibble: 200 x 4
+            y1     alpha     r antigen_iso
+         <dbl>     <dbl> <dbl> <fct>      
+       1  63.5 0.000581   1.75 HlyE_IgA   
+       2 288.  0.000459   2.66 HlyE_IgA   
+       3 432.  0.000277   1.61 HlyE_IgA   
+       4  30.6 0.00127    1.87 HlyE_IgA   
+       5 160.  0.00140    1.40 HlyE_IgA   
+       6 525.  0.000294   2.26 HlyE_IgA   
+       7  30.8 0.00459    1.55 HlyE_IgA   
+       8  41.3 0.00234    1.48 HlyE_IgA   
+       9 248.  0.0000467  8.50 HlyE_IgA   
+      10 319.  0.000448   1.75 HlyE_IgA   
+      # i 190 more rows
+      
+      $noise_params
+      # A tibble: 2 x 5
+           nu   eps y.low  y.high antigen_iso
+        <dbl> <dbl> <dbl>   <dbl> <chr>      
+      1  2.60 0.279 0.508 5000000 HlyE_IgA   
+      2  2.36 0.146 1.59  5000000 HlyE_IgG   
+      
+      attr(,"class")
+      [1] "biomarker_data_and_params" "list"                     
+      
+      attr(,"antigen_isos")
+      [1] HlyE_IgA HlyE_IgG
+      Levels: HlyE_IgA HlyE_IgG
+      attr(,"strata")
+      # A tibble: 2 x 3
+        Stratum   catchment     n
+        <chr>     <chr>     <int>
+      1 Stratum 1 aku         294
+      2 Stratum 2 kgh         200
+      attr(,"class")
+      [1] "biomarker_data_and_params.list" "list"                          
+
diff --git a/tests/testthat/test-stratify_data.R b/tests/testthat/test-stratify_data.R
new file mode 100644
index 00000000..546a623b
--- /dev/null
+++ b/tests/testthat/test-stratify_data.R
@@ -0,0 +1,61 @@
+test_that("stratify_data() produces consistent results", {
+
+  library(dplyr)
+  library(readr)
+
+  xs_data <-
+    read_rds("https://osf.io/download//n6cp3/")  %>%
+    as_pop_data() %>%
+    filter(Country == "Pakistan")
+
+  curve <-
+    load_curve_params("https://osf.io/download/rtw5k/") %>%
+    filter(antigen_iso %in% c("HlyE_IgA", "HlyE_IgG")) %>%
+    slice(1:100, .by = antigen_iso) # Reduce dataset for the purposes of this example
+
+  noise <-
+    load_noise_params("https://osf.io/download//hqy4v/") %>%
+    filter(Country == "Pakistan")
+
+  stratified_data =
+    stratify_data(
+      data = xs_data,
+      curve_params = curve,
+      noise_params = noise,
+      strata_varnames = "catchment",
+      curve_strata_varnames = NULL,
+      noise_strata_varnames = NULL
+    )
+
+  expect_snapshot(stratified_data)
+})
+
+test_that("stratify_data() warns about missing data", {
+
+  library(dplyr)
+  library(readr)
+
+  xs_data <-
+    read_rds("https://osf.io/download//n6cp3/")  %>%
+    as_pop_data() %>%
+    filter(Country == "Nepal")
+
+  curve <-
+    load_curve_params("https://osf.io/download/rtw5k/") %>%
+    filter(antigen_iso %in% c("HlyE_IgA", "HlyE_IgG")) %>%
+    slice(1:100, .by = antigen_iso) # Reduce dataset for the purposes of this example
+
+  noise <-
+    load_noise_params("https://osf.io/download//hqy4v/") %>%
+    filter(Country == "Nepal")
+
+  stratify_data(
+    data = xs_data,
+    curve_params = curve,
+    noise_params = noise,
+    strata_varnames = "catchment",
+    curve_strata_varnames = NULL,
+    noise_strata_varnames = NULL
+  ) |>
+    expect_warning(regexp = "The number of observations in `data` varies")
+})