Skip to content

Commit

Permalink
Add option to remove certain markers before calculating LRs
Browse files Browse the repository at this point in the history
  • Loading branch information
zenalapp committed Sep 7, 2023
1 parent f6a38e9 commit e9c12b6
Show file tree
Hide file tree
Showing 24 changed files with 246 additions and 99 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,6 @@ export(match_static_thresh)
export(prep_bloodmeal_profiles)
export(prep_human_profiles)
export(rm_dups)
export(rm_markers)
export(rm_twins)
import(euroformix)
23 changes: 19 additions & 4 deletions R/bistro.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#' @param rm_twins A boolean indicating whether or not to remove likely twins
#' (identical STR profiles) from the human database prior to identifying
#' matches. Default: TRUE
#' @param rm_markers A vector indicating what markers should be removed prior to calculating log10LRs. NULL to include all markers. By default, for the bistro function AMEL is removed as it is not standard to include it in LR calculations.
#' @param model_degrad A boolean indicating whether or not to model peak
#' degradation. Used for `modelDegrad` argument in
#' [euroformix::contLikSearch()]. Default: TRUE
Expand Down Expand Up @@ -92,6 +93,7 @@ bistro <-
bloodmeal_ids = NULL,
human_ids = NULL,
rm_twins = TRUE,
rm_markers = c('AMEL'),
model_degrad = TRUE,
model_bw_stutt = FALSE,
model_fw_stutt = FALSE,
Expand All @@ -110,33 +112,46 @@ bistro <-
bloodmeal_ids,
human_ids,
rm_twins,
rm_markers,
model_degrad,
model_bw_stutt,
model_fw_stutt,
difftol,
threads,
seed,
time_limit
time_limit,
return_lrs
)

if (calc_allele_freqs) {
pop_allele_freqs <- calc_allele_freqs(human_profiles)
pop_allele_freqs <- calc_allele_freqs(human_profiles, rm_markers)
}else if(!is.null(rm_markers)){
pop_allele_freqs <- pop_allele_freqs |>
dplyr::select(-dplyr::matches(paste0('^', toupper(rm_markers), '$')))
}

message("Formatting bloodmeal profiles")
bloodmeal_profiles <- prep_bloodmeal_profiles(
bloodmeal_profiles,
bloodmeal_ids,
peak_thresh
peak_thresh,
rm_markers
)

message("Formatting human profiles")
human_profiles <- prep_human_profiles(
human_profiles,
human_ids,
rm_twins
rm_twins,
rm_markers
)

bm_markers <- unique(bloodmeal_profiles$Marker[!is.na(bloodmeal_profiles$Marker)])
hu_markers <- unique(human_profiles$Marker[!is.na(human_profiles$Marker)])
message('Markers being used: ',
paste(intersect(bm_markers, hu_markers),
collapse = ', '))

message("Calculating log10LRs")

log10_lrs <-
Expand Down
8 changes: 7 additions & 1 deletion R/calc_allele_freqs.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,15 @@
#'
#' @export
#' @keywords internal
calc_allele_freqs <- function(human_profiles) {
calc_allele_freqs <- function(human_profiles, rm_markers = NULL) {
# check if expected columns are present
check_colnames(human_profiles, c("SampleName", "Marker", "Allele"))
check_ids(rm_markers, "rm_markers")

if(!is.null(rm_markers)){
human_profiles <- human_profiles |>
dplyr::filter(!Marker %in% rm_markers)
}

# remove duplicates if necessary
human_profiles <- rm_dups(human_profiles)
Expand Down
47 changes: 32 additions & 15 deletions R/checks.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,31 +9,47 @@ check_bistro_inputs <-
human_profiles,
kit,
peak_thresh,
pop_allele_freqs = NULL,
calc_allele_freqs = FALSE,
bloodmeal_ids = NULL,
human_ids = NULL,
rm_twins = TRUE,
model_degrad = TRUE,
model_bw_stutt = FALSE,
model_fw_stutt = FALSE,
difftol = 1,
threads = 4,
seed = 1,
time_limit = 3) {
pop_allele_freqs,
calc_allele_freqs,
bloodmeal_ids,
human_ids,
rm_twins,
rm_markers,
model_degrad,
model_bw_stutt,
model_fw_stutt,
difftol,
threads,
seed,
time_limit,
return_lrs) {
check_colnames(
bloodmeal_profiles,
c("SampleName", "Marker", "Allele", "Height")
)
check_colnames(human_profiles, c("SampleName", "Marker", "Allele"))
check_ids(bloodmeal_ids, "bloodmeal_ids")
check_ids(human_ids, "human_ids")
check_ids(rm_markers)

kit_df <- check_kit(kit)

bm_prof_markers <- toupper(unique(bloodmeal_profiles$Marker))
hu_prof_markers <- toupper(unique(human_profiles$Marker))
kit_markers <- toupper(unique(kit_df$Marker))
bm_prof_markers <- bloodmeal_profiles$Marker |>
unique() |>
toupper()
hu_prof_markers <- human_profiles$Marker |>
unique() |>
toupper()
kit_markers <- kit_df$Marker |>
unique() |>
toupper()

if(!is.null(rm_markers)){
rm_markers <- toupper(rm_markers)
bm_prof_markers <- bm_prof_markers[!bm_prof_markers %in% rm_markers]
hu_prof_markers <- hu_prof_markers[!hu_prof_markers %in% rm_markers]
kit_markers <- kit_markers[!kit_markers %in% rm_markers]
}

check_setdiff_markers(
bm_prof_markers,
Expand All @@ -56,6 +72,7 @@ check_bistro_inputs <-
check_is_bool(model_degrad, "model_degrad")
check_is_bool(model_bw_stutt, "model_bw_stutt")
check_is_bool(model_fw_stutt, "model_fw_stutt")
check_is_bool(return_lrs, "return_lrs")
check_is_numeric(difftol, "difftol", pos = TRUE)
check_is_numeric(threads, "threads", pos = TRUE)
check_is_numeric(seed, "seed")
Expand Down
10 changes: 7 additions & 3 deletions R/match_exact.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ match_exact <- function(bloodmeal_profiles,
bloodmeal_ids = NULL,
human_ids = NULL,
peak_thresh = NULL,
rm_twins = TRUE) {
rm_twins = TRUE,
rm_markers = NULL) {
if (is.null(peak_thresh)) {
check_colnames(bloodmeal_profiles, c("SampleName", "Marker", "Allele"))
} else {
Expand All @@ -34,17 +35,20 @@ match_exact <- function(bloodmeal_profiles,
check_ids(bloodmeal_ids, "bloodmeal_ids")
check_ids(human_ids, "human_ids")
check_is_bool(rm_twins, "rm_twins")
check_ids(rm_markers, "rm_markers")

bloodmeal_profiles <- prep_bloodmeal_profiles(
bloodmeal_profiles,
bloodmeal_ids,
peak_thresh
peak_thresh,
rm_markers = NULL
)

human_profiles <- prep_human_profiles(
human_profiles,
human_ids,
rm_twins
rm_twins,
rm_markers = NULL
)

# human profiles
Expand Down
13 changes: 8 additions & 5 deletions R/match_similarity.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
#' match_similarity(bloodmeal_profiles, human_profiles)
match_similarity <- function(bloodmeal_profiles, human_profiles,
bloodmeal_ids = NULL, human_ids = NULL,
peak_thresh = NULL, rm_twins = TRUE,
peak_thresh = NULL, rm_twins = TRUE, rm_markers = NULL,
return_similarities = FALSE) {
if (is.null(peak_thresh)) {
check_colnames(bloodmeal_profiles, c("SampleName", "Marker", "Allele"))
Expand All @@ -44,12 +44,14 @@ match_similarity <- function(bloodmeal_profiles, human_profiles,
check_ids(bloodmeal_ids, "bloodmeal_ids")
check_ids(human_ids, "human_ids")
check_is_bool(rm_twins, "rm_twins")
check_ids(rm_markers, "rm_markers")
check_is_bool(return_similarities, "return_similarities")

bloodmeal_profiles <- prep_bloodmeal_profiles(
bloodmeal_profiles,
bloodmeal_ids,
peak_thresh
peak_thresh,
rm_markers = rm_markers
)

all_bm_ids <- unique(bloodmeal_profiles$SampleName)
Expand All @@ -60,12 +62,13 @@ match_similarity <- function(bloodmeal_profiles, human_profiles,
human_profiles <- prep_human_profiles(
human_profiles,
human_ids,
rm_twins
rm_twins,
rm_markers = rm_markers
) |>
tidyr::drop_na()

message("Calculating human-human similarities")
hu_similarities <- get_human_similarities(human_profiles)
hu_similarities <- get_human_similarity(human_profiles)

max_similarity <- hu_similarities |>
dplyr::filter(similarity != 1) |> # remove twins
Expand Down Expand Up @@ -136,7 +139,7 @@ match_similarity <- function(bloodmeal_profiles, human_profiles,
#' - hu1: human ID 1
#' - hu2: human ID 2
#' - similarity: similarity value (# exact locus matches / # loci)
get_human_similarities <- function(human_profiles) {
get_human_similarity <- function(human_profiles) {
human_profiles <- human_profiles |>
tidyr::drop_na()
str_hu1_by_marker <- human_profiles |>
Expand Down
23 changes: 21 additions & 2 deletions R/preprocess_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
#' @keywords internal
prep_bloodmeal_profiles <- function(bloodmeal_profiles,
bloodmeal_ids = NULL,
peak_thresh = NULL) {
peak_thresh = NULL,
rm_markers = c('AMEL')) {
if (is.null(bloodmeal_ids)) {
bloodmeal_ids <- unique(bloodmeal_profiles$SampleName)
} else {
Expand All @@ -19,6 +20,7 @@ prep_bloodmeal_profiles <- function(bloodmeal_profiles,

bloodmeal_profiles <- bloodmeal_profiles |>
dplyr::filter(SampleName %in% bloodmeal_ids) |>
rm_markers(rm_markers) |>
rm_dups()

if (!is.null(peak_thresh)) {
Expand All @@ -41,7 +43,8 @@ prep_bloodmeal_profiles <- function(bloodmeal_profiles,
#' @keywords internal
prep_human_profiles <- function(human_profiles,
human_ids = NULL,
rm_twins = TRUE) {
rm_twins = TRUE,
rm_markers = c('AMEL')) {
if (rm_twins) {
human_profiles <- rm_twins(human_profiles)
}
Expand All @@ -54,11 +57,27 @@ prep_human_profiles <- function(human_profiles,

human_profiles <- human_profiles |>
dplyr::filter(SampleName %in% human_ids) |>
rm_markers(rm_markers) |>
rm_dups()

return(human_profiles)
}

#' Remove markers
#'
#' @param df Dataframe from which to remove markers
#'
#' @return Dataframe without markers
#' @export
#' @keywords internal
rm_markers <- function(profiles, markers){
check_colnames(profiles, 'Marker')
profiles <- profiles |>
dplyr::mutate(Marker = toupper(Marker)) |>
dplyr::filter(!Marker %in% toupper(markers))
return(profiles)
}

#' Remove duplicate rows with warning
#'
#' @param df Dataframe from which to remove duplicate rows
Expand Down
3 changes: 3 additions & 0 deletions man/bistro.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/calc_allele_freqs.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 19 additions & 12 deletions man/check_bistro_inputs.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/get_human_similarities.Rd → man/get_human_similarity.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit e9c12b6

Please sign in to comment.