Skip to content

Commit

Permalink
replace maggitr pipe with the base R pipe in the companion scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
haganjam committed Jul 20, 2023
1 parent 6a24a31 commit f58f013
Show file tree
Hide file tree
Showing 9 changed files with 267 additions and 269 deletions.
72 changes: 36 additions & 36 deletions companion_scripts/01_data_cleaning/01_clean_equation_database.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,64 +9,64 @@ library(dplyr)
source("R/special_names.R")

# load the equation data
equ.dat <- readxl::read_xlsx(path = "C:/Users/james/OneDrive/PhD_Gothenburg/Chapter_4_FreshInvTraitR/data/allometry_database_ver4/equation_database.xlsx")
head(equ.dat)
equ_dat <- readxl::read_xlsx(path = "C:/Users/james/OneDrive/PhD_Gothenburg/Chapter_4_FreshInvTraitR/data/allometry_database_ver4/equation_database.xlsx")
head(equ_dat)

# clean the names for typos etc.
x <- bdc_clean_names(sci_names = equ.dat$db_taxon, save_outputs = FALSE)
x <- bdc::bdc_clean_names(sci_names = equ_dat$db_taxon, save_outputs = FALSE)

# check if any names were changed
if (!any(x$scientificName != x$names_clean)) {
message("No names were changed")
}

# replace the names in tax.dat with these cleaned names
equ.dat$db_taxon <- x$names_clean
equ_dat$db_taxon <- x$names_clean

# fix the special names
spec.names <- special_taxon_names()
spec_names <- special_taxon_names()

# replace incorrectly spelled special names
for (i in 1:length(spec.names)) {
for (i in 1:length(spec_names)) {
x <-
sapply(equ.dat$db_taxon, function(y) {
ain(x = spec.names[i], table = y, method = "lv", maxDist = 2)
sapply(equ_dat$db_taxon, function(y) {
ain(x = spec_names[i], table = y, method = "lv", maxDist = 2)
})

equ.dat[x, "db_taxon"] <- spec.names[i]
equ_dat[x, "db_taxon"] <- spec_names[i]
}

# convert relevant columns to numeric variables

# maximum and minimum body size
equ.dat[["body_size_min"]] <- round(as.numeric(equ.dat[["body_size_min"]]), 4)
equ.dat[["body_size_max"]] <- round(as.numeric(equ.dat[["body_size_max"]]), 4)
equ_dat[["body_size_min"]] <- round(as.numeric(equ_dat[["body_size_min"]]), 4)
equ_dat[["body_size_max"]] <- round(as.numeric(equ_dat[["body_size_max"]]), 4)

# number of data points
equ.dat[["n"]] <- round(as.numeric(equ.dat[["n"]]), 0)
equ_dat[["n"]] <- round(as.numeric(equ_dat[["n"]]), 0)

# r2 of the log-linear equation
equ.dat[["r2"]] <- round(as.numeric(equ.dat[["r2"]]), 2)
equ_dat[["r2"]] <- round(as.numeric(equ_dat[["r2"]]), 2)

# maximum and minimum dry biomass
equ.dat[["dry_biomass_min"]] <- round(as.numeric(equ.dat[["dry_biomass_min"]]), 4)
equ.dat[["dry_biomass_max"]] <- round(as.numeric(equ.dat[["dry_biomass_max"]]), 4)
equ_dat[["dry_biomass_min"]] <- round(as.numeric(equ_dat[["dry_biomass_min"]]), 4)
equ_dat[["dry_biomass_max"]] <- round(as.numeric(equ_dat[["dry_biomass_max"]]), 4)

# residual mean squared error
equ.dat[["RMS"]] <- round(as.numeric(equ.dat[["RMS"]]), 4)
equ_dat[["RMS"]] <- round(as.numeric(equ_dat[["RMS"]]), 4)

# back-transformation correction factor
equ.dat[["lm_correction"]] <- round(as.numeric(equ.dat[["lm_correction"]]), 4)
equ_dat[["lm_correction"]] <- round(as.numeric(equ_dat[["lm_correction"]]), 4)

# preservation correction factor
equ.dat[["correction_percentage"]] <- round(as.numeric(equ.dat[["correction_percentage"]]), 4)
equ_dat[["correction_percentage"]] <- round(as.numeric(equ_dat[["correction_percentage"]]), 4)

# convert the log-base to a numeric factor
equ.dat[["log_base"]] <- round(as.numeric(equ.dat[["log_base"]]), 5)
equ_dat[["log_base"]] <- round(as.numeric(equ_dat[["log_base"]]), 5)

# convert the equation parameters
equ.dat[["a"]] <- round(as.numeric(equ.dat[["a"]]), 5)
equ.dat[["b"]] <- round(as.numeric(equ.dat[["b"]]), 5)
equ_dat[["a"]] <- round(as.numeric(equ_dat[["a"]]), 5)
equ_dat[["b"]] <- round(as.numeric(equ_dat[["b"]]), 5)

# calculate the correction factors

Expand All @@ -78,24 +78,24 @@ BC_correction <- function(r2, a, ymin, ymax) {
}

# 1. calculate the BC-corrections
equ.dat <-
equ.dat %>%
mutate(lm_correction = ifelse(lm_correction_type == "BC_correction",
BC_correction(r2 = r2,
a = log_base,
ymin = dry_biomass_min,
ymax = dry_biomass_max),
lm_correction))
equ_dat <-
equ_dat |>
dplyr::mutate(lm_correction = ifelse(lm_correction_type == "BC_correction",
BC_correction(r2 = r2,
a = log_base,
ymin = dry_biomass_min,
ymax = dry_biomass_max),
lm_correction))

# 2. calculate the RMS_corrections
equ.dat <-
equ.dat %>%
mutate(lm_correction = ifelse(lm_correction_type == "RMS_correction",
log_base^(RMS/2),
lm_correction))
equ_dat <-
equ_dat |>
dplyr::mutate(lm_correction = ifelse(lm_correction_type == "RMS_correction",
log_base^(RMS/2),
lm_correction))

# replace the character NAs with true NAs as interpreted by R
equ.dat[equ.dat == "NA"] <- NA
equ_dat[equ_dat == "NA"] <- NA

# write this into a .rds file
saveRDS(equ.dat, file = paste("database", "/", "equation_database.rds", sep = ""))
saveRDS(equ_dat, file = paste("database", "/", "equation_database.rds", sep = ""))
20 changes: 10 additions & 10 deletions companion_scripts/01_data_cleaning/02_clean_taxon_database.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,32 +8,32 @@ library(stringdist)
source("R/special_names.R")

# load the equation data
t.dat <- readxl::read_xlsx(path = "C:/Users/james/OneDrive/PhD_Gothenburg/Chapter_4_FreshInvTraitR/data/allometry_database_ver4/taxon_database.xlsx")
head(t.dat)
t_dat <- readxl::read_xlsx(path = "C:/Users/james/OneDrive/PhD_Gothenburg/Chapter_4_FreshInvTraitR/data/allometry_database_ver4/taxon_database.xlsx")
head(t_dat)

# clean the names for typos etc.
x <- bdc_clean_names(sci_names = t.dat$db_taxon, save_outputs = FALSE)
x <- bdc::bdc_clean_names(sci_names = t_dat$db_taxon, save_outputs = FALSE)

# check if any names were changed
if (!any(x$scientificName != x$names_clean)) {
message("No names were changed")
}

# replace the names in tax.dat with these cleaned names
t.dat$db_taxon <- x$names_clean
t_dat$db_taxon <- x$names_clean

# fix the special names
spec.names <- special_taxon_names()
spec_names <- special_taxon_names()

# replace incorrectly spelled special names
for (i in 1:length(spec.names)) {
for (i in 1:length(spec_names)) {
x <-
sapply(t.dat$db_taxon, function(y) {
ain(x = spec.names[i], table = y, method = "lv", maxDist = 2)
sapply(t_dat$db_taxon, function(y) {
ain(x = spec_names[i], table = y, method = "lv", maxDist = 2)
})

t.dat[x, "db_taxon"] <- spec.names[i]
t_dat[x, "db_taxon"] <- spec_names[i]
}

# write this into a .rds file
saveRDS(t.dat, file = paste("database", "/", "taxon_database.rds", sep = ""))
saveRDS(t_dat, file = paste("database", "/", "taxon_database.rds", sep = ""))
Loading

0 comments on commit f58f013

Please sign in to comment.