Merge pull request #289 from UCD-SERG/clean_simulation_vignette

Clean simulation vignette
UCD-SERG · Oct 2, 2024 · 7501ca6 · 7501ca6
2 parents 6bcf346 + da5ef07
commit 7501ca6
Show file tree

Hide file tree

Showing 7 changed files with 106 additions and 79 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -26,6 +26,7 @@ allpopsamples_hlye.csv$
 ^_quarto\.yml$
 ^\.lintr$
 ^vignettes$
-^man/df_to_array\.Rd$
 ^vignettes/\.quarto$
 ^vignettes/methodology\.qmd$
+^\.quarto$
+^man/df_to_array\.Rd$
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: serocalculator
 Title: Estimating Infection Rates from Serological Data
-Version: 1.2.0.9012
+Version: 1.2.0.9013
 Authors@R: c(
     person("Peter", "Teunis", , "[email protected]", role = c("aut", "cph"),
            comment = "Author of the method and original code."),
@@ -69,5 +69,4 @@ Remotes:
     moodymudskipper/devtag
 Roxygen: list(markdown = TRUE, roclets = c("collate", "rd", "namespace", "devtag::dev_roclet"))
 RoxygenNote: 7.3.2
-VignetteBuilder: 
-    quarto
+
diff --git a/NEWS.md b/NEWS.md
@@ -2,6 +2,8 @@
 
 ## New features
 
+* Updated `simulate_xsectionalData.Rmd()` (linting, removing deprecated functions).
+
 * Added default value for `antigen_isos` argument in `log_likelihood()` (#286)
 
 * Updated enteric fever example article with upgraded code and visualizations (#290)

diff --git a/R/sees_baseline_data.R b/R/sees_baseline_data.R
diff --git a/data-raw/sees_baseline_data.R b/data-raw/sees_baseline_data.R
@@ -0,0 +1,14 @@
+# #' @docType data
+# #'
+# #' @name sees_crossSectional_baseline_allCountries
+# #'
+# #' @title
+# #' Cross-sectional Typhoid data
+# #'
+# #' @description
+# #' A [tibble::tibble()]
+# #'
+# #' @usage
+# #' sees_crossSectional_baseline_allCountries
+# #'
+# NULL
diff --git a/inst/WORDLIST b/inst/WORDLIST
@@ -5,7 +5,6 @@ BMC
 Bonačić
 Bordetella
 CMD
-CODE’
 Campylobacter
 Ceper
 Chiang
@@ -85,6 +84,9 @@ Valk
 Vectorized
 Vellore
 Versteegh
+VignetteEncoding
+VignetteEngine
+VignetteIndexEntry
 Volterra
 Wetering
 Wiens
@@ -95,6 +97,7 @@ behaviour
 bioassays
 biomarker
 boldsymbol
+bookdown
 callout
 campylobacteriosis
 cdot
@@ -122,6 +125,7 @@ isotypes
 jinf
 jitter
 kDa
+knitr
 le
 leq
 llik
@@ -151,6 +155,7 @@ qquad
 recombinant
 renewcommand
 rescale
+rmarkdown
 savePath
 sectionally
 sera
@@ -173,9 +178,11 @@ subfigures
 th
 tibble
 titers
+toc
 tsutsugamushi
 undercount
 unstratified
 varepsilon
 vec
 vee
+xsectionalData
diff --git a/vignettes/articles/simulate_xsectionalData.Rmd b/vignettes/articles/simulate_xsectionalData.Rmd
@@ -11,12 +11,14 @@ vignette: >
   %\VignetteIndexEntry{simulate_xsectionalData}
   %\VignetteEngine{knitr::rmarkdown}
   %\VignetteEncoding{UTF-8}
----
-
-
+  ---
+  
+  
 This vignette shows how to simulate a cross-sectional sample of 
-seroresponses for incident infections as a Poisson process with frequency `lambda`. 
-Responses are generated for the antibodies given in the `antigen_isos` argument.
+seroresponses for incident infections as a Poisson process with 
+frequency `lambda`. 
+Responses are generated for the antibodies given in the `antigen_isos` 
+argument.
 
 Age range of the simulated cross-sectional record is `lifespan`.
 
@@ -30,19 +32,22 @@ However, when `age.fx` is set to NA then the age at infection is used.
 
 The boolean `renew.params` determines whether each infection uses a
 new set of longitudinal parameters, sampled at random from the
-posterior predictive output of  the longitudinal model. If set to `FALSE`
+posterior predictive output of  the longitudinal model. 
+If set to `FALSE`,
 a parameter set is chosen at birth and kept, but:
-1. the baseline antibody levels (`y0`) are updated with the simulated level
-(just) prior to infection, and
-2. when `is.na(age.fx)` then the selected parameter sample is updated for the
-age when infection occurs.
 
+1. the baseline antibody levels (`y0`) are updated with the simulated 
+level (just) prior to infection, and
+
+2. when `is.na(age.fx)` then the selected parameter sample is updated 
+for the age when infection occurs.
+
 There is also a variable `n.mc`: when `n.mc==0` then a random MC
 sample is chosen out of the posterior set (1:4000). When `n.mc` is
 given a value in 1:4000 then the chosen number is fixed and reused
 in any subsequent infection. This is for diagnostic purposes.
-
-
+  
+  
 ```{r, include = FALSE}
 knitr::opts_chunk$set(
   collapse = TRUE,
@@ -56,12 +61,14 @@ knitr::opts_chunk$set(
 
 ## load model parameters
 
+Here we load in longitudinal parameters;
+these are modeled from all SEES cases across all ages and countries:
+
 ```{r setup}
 library(serocalculator)
 library(tidyverse)
 library(ggbeeswarm) # for plotting
 library(dplyr)
-# load in longitudinal parameters, these are modeled from all SEES cases across all ages and countries
 dmcmc <-
   "https://osf.io/download/rtw5k" %>%
   load_curve_params() %>%
@@ -70,7 +77,9 @@ dmcmc <-
 
 ## visualize antibody decay model
 
-We can graph individual MCMC samples from the posterior distribution of model parameters using a `autoplot.curve_params()` method for the `autoplot()` function:
+We can graph individual MCMC samples from the posterior distribution 
+of model parameters using a `autoplot.curve_params()` method for the 
+`autoplot()` function:
 
 ```{r}
 dmcmc %>% autoplot(n_curves = 50)
@@ -155,7 +164,12 @@ ggplot(csdata, aes(
   x = as.factor(antigen_iso),
   y = value
 )) +
-  geom_beeswarm(size = .2, alpha = .3, aes(color = antigen_iso), show.legend = F) +
+  geom_beeswarm(
+    size = .2,
+    alpha = .3,
+    aes(color = antigen_iso),
+    show.legend = FALSE
+  ) +
   geom_boxplot(outlier.colour = NA, fill = NA) +
   scale_y_log10() +
   theme_linedraw() +
@@ -167,46 +181,52 @@ ggplot(csdata, aes(
 We can calculate the log-likelihood of the data as a function of the incidence rate directly:
 
 ```{r}
-ll_A <- log_likelihood(
-  pop_data = csdata,
-  curve_params = dmcmc,
-  noise_params = cond,
-  antigen_isos = "HlyE_IgA",
-  lambda = 0.1
-) %>% print()
+ll_a <-
+  log_likelihood(
+    pop_data = csdata,
+    curve_params = dmcmc,
+    noise_params = cond,
+    antigen_isos = "HlyE_IgA",
+    lambda = 0.1
+  ) %>%
+  print()
 
-ll_G <- log_likelihood(
-  pop_data = csdata,
-  curve_params = dmcmc,
-  noise_params = cond,
-  antigen_isos = "HlyE_IgG",
-  lambda = 0.1
-) %>% print()
+ll_g <-
+  log_likelihood(
+    pop_data = csdata,
+    curve_params = dmcmc,
+    noise_params = cond,
+    antigen_isos = "HlyE_IgG",
+    lambda = 0.1
+  ) %>%
+  print()
 
-ll_AG <- log_likelihood(
-  pop_data = csdata,
-  curve_params = dmcmc,
-  noise_params = cond,
-  antigen_isos = c("HlyE_IgG", "HlyE_IgA"),
-  lambda = 0.1
-) %>% 
+ll_ag <-
+  log_likelihood(
+    pop_data = csdata,
+    curve_params = dmcmc,
+    noise_params = cond,
+    antigen_isos = c("HlyE_IgG", "HlyE_IgA"),
+    lambda = 0.1
+  ) %>%
   print()
 
-print(ll_A + ll_G)
+print(ll_a + ll_g)
 ```
 
 ## graph log-likelihood
 
 We can also graph the log-likelihoods, even without finding the MLEs, using `graph_loglik()`:
 
 ```{r}
-lik_HlyE_IgA <- graph_loglik(
-  pop_data = csdata,
-  curve_params = dmcmc,
-  noise_params = cond,
-  antigen_isos = "HlyE_IgA",
-  log_x = TRUE
-)
+lik_HlyE_IgA <-
+  graph_loglik(
+    pop_data = csdata,
+    curve_params = dmcmc,
+    noise_params = cond,
+    antigen_isos = "HlyE_IgA",
+    log_x = TRUE
+  )
 
 lik_HlyE_IgG <- graph_loglik(
   previous_plot = lik_HlyE_IgA,
@@ -239,9 +259,10 @@ est1 <- est.incidence(
   curve_params = dmcmc,
   noise_params = cond,
   lambda_start = .1,
-  build_graph = T,
-  verbose = T, # print updates as the function runs
-  print_graph = F, # display the log-likelihood curve while `est.incidence()` is running
+  build_graph = TRUE,
+  verbose = TRUE, # print updates as the function runs
+  print_graph = FALSE, # display the log-likelihood curve while
+  #`est.incidence()` is running
   antigen_isos = antibodies
 )
 ```
@@ -269,7 +290,6 @@ autoplot(est1, log_x = TRUE)
 ```{r "init-parallel"}
 library(parallel)
 n_cores <- max(1, parallel::detectCores() - 1)
-# n_cores = 1
 print(n_cores)
 ```
 
@@ -282,13 +302,12 @@ nclus <- 10
 nrep <- 100
 
 # incidence rate in e
-lmbdaVec <- c(.05, .1, .15, .2)
+lambdas <- c(.05, .1, .15, .2)
 
-sim.df <-
+sim_df <-
   sim.cs.multi(
-    # verbose = TRUE,
     n_cores = n_cores,
-    lambdas = lmbdaVec,
+    lambdas = lambdas,
     nclus = nclus,
     n.smpl = nrep,
     age.rng = lifespan,
@@ -300,16 +319,17 @@ sim.df <-
     format = "long"
   )
 
-print(sim.df)
+print(sim_df)
 ```
 
 We can plot the distributions of the simulated responses:
 
 ```{r}
-ggplot(sim.df, aes(
-  x = as.factor(cluster),
-  y = value
-)) +
+sim_df %>% ggplot() +
+  aes(
+    x = as.factor(cluster),
+    y = value
+  ) +
   geom_beeswarm(size = .2, alpha = .3, aes(color = antigen_iso)) +
   geom_boxplot(outlier.colour = NA, fill = NA) +
   scale_y_log10() +
@@ -321,9 +341,8 @@ ggplot(sim.df, aes(
 
 ```{r, "est-by-stratum"}
 ests <-
-  # sim.df %>%
   est.incidence.by(
-    pop_data = sim.df,
+    pop_data = sim_df,
     curve_params = dmcmc,
     noise_params = cond,
     num_cores = n_cores,
@@ -333,7 +352,6 @@ ests <-
     verbose = TRUE,
     build_graph = TRUE, # slows down the function substantially
     antigen_isos = c("HlyE_IgG", "HlyE_IgA")
-    # antigen_isos = "HlyE_IgA"
   )
 ```