Skip to content

Commit

Permalink
rename, remove alias
Browse files Browse the repository at this point in the history
  • Loading branch information
strengejacke committed Sep 12, 2023
1 parent 1e21e05 commit 8302c97
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 55 deletions.
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,6 @@ export(row_means)
export(row_to_colnames)
export(rowid_as_column)
export(rownames_as_column)
export(seek_variables)
export(skewness)
export(slide)
export(smoothness)
Expand Down
25 changes: 10 additions & 15 deletions R/seek_variables.R → R/data_seek.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
#' @title Find variables by its name, variable or value labels
#' @name seek_variables
#' @title Find variables by their names, variable or value labels
#' @name data_seek
#'
#' @description This functions seeks variables in a data frame, based on patterns
#' that either match the variable name (column name), variable labels, value labels
#' or factor levels. Matching variable and value labels only works for "labelled"
#' data, i.e. when the variables either have a `label` attribute or `labels`
#' attribute.
#'
#' `seek_variables()` is particular useful for larger data frames with labelled
#' `data_seek()` is particular useful for larger data frames with labelled
#' data - finding the correct variable name can be a challenge. This function
#' helps to find the required variables, when only certain patterns of variable
#' names or labels are known.
Expand All @@ -34,25 +34,25 @@
#'
#' @examples
#' # seek variables with "Length" in variable name or labels
#' seek_variables(iris, "Length")
#' data_seek(iris, "Length")
#'
#' # seek variables with "dependency" in names or labels
#' # column "e42dep" has a label-attribute "elder's dependency"
#' data(efc)
#' seek_variables(efc, "dependency")
#' data_seek(efc, "dependency")
#'
#' # "female" only appears as value label attribute - default search is in
#' # variable names and labels only, so no match
#' seek_variables(efc, "female")
#' data_seek(efc, "female")
#' # when we seek in all sources, we find the variable "e16sex"
#' seek_variables(efc, "female", seek = "all")
#' data_seek(efc, "female", seek = "all")
#'
#' # typo, no match
#' seek_variables(iris, "Lenght")
#' data_seek(iris, "Lenght")
#' # typo, fuzzy match
#' seek_variables(iris, "Lenght", fuzzy = TRUE)
#' data_seek(iris, "Lenght", fuzzy = TRUE)
#' @export
seek_variables <- function(data, pattern, seek = c("names", "labels"), fuzzy = FALSE) {
data_seek <- function(data, pattern, seek = c("names", "labels"), fuzzy = FALSE) {
# check valid args
if (!is.data.frame(data)) {
insight::format_error("`data` must be a data frame.")
Expand Down Expand Up @@ -155,11 +155,6 @@ seek_variables <- function(data, pattern, seek = c("names", "labels"), fuzzy = F
out
}

# alias
#' @rdname seek_variables
#' @export
data_seek <- seek_variables


# methods ---------------------------------------------------------------------

Expand Down
27 changes: 12 additions & 15 deletions man/seek_variables.Rd → man/data_seek.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

48 changes: 24 additions & 24 deletions tests/testthat/test-data_seek.R
Original file line number Diff line number Diff line change
@@ -1,81 +1,81 @@
test_that("seek_variables - simple use case", {
test_that("data_seek - simple use case", {
data(iris)
out <- seek_variables(iris, "Length")
out <- data_seek(iris, "Length")
expect_identical(out$index, c(1L, 3L))
expect_identical(out$labels, c("Sepal.Length", "Petal.Length"))
})

test_that("seek_variables - seek label attribute", {
test_that("data_seek - seek label attribute", {
data(efc)
out <- seek_variables(efc, "dependency")
out <- data_seek(efc, "dependency")
expect_identical(out$index, which(colnames(efc) == out$column))
expect_identical(out$labels, "elder's dependency")
})

test_that("seek_variables - seek label attribute", {
test_that("data_seek - seek label attribute", {
data(efc)
out <- seek_variables(efc, "female")
out <- data_seek(efc, "female")
expect_identical(nrow(out), 0L)
out <- seek_variables(efc, "female", seek = "all")
out <- data_seek(efc, "female", seek = "all")
expect_identical(out$index, which(colnames(efc) == out$column))
expect_identical(out$labels, "elder's gender")
})

test_that("seek_variables - fuzzy match", {
test_that("data_seek - fuzzy match", {
data(iris)
out <- seek_variables(iris, "Lenght")
out <- data_seek(iris, "Lenght")
expect_identical(nrow(out), 0L)
out <- seek_variables(iris, "Lenght", fuzzy = TRUE)
out <- data_seek(iris, "Lenght", fuzzy = TRUE)
expect_identical(out$index, which(colnames(iris) %in% out$column))
expect_identical(out$labels, c("Sepal.Length", "Petal.Length"))
})

test_that("seek_variables - fuzzy match, value labels", {
test_that("data_seek - fuzzy match, value labels", {
data(efc)
out <- seek_variables(efc, "femlae", seek = "all", fuzzy = TRUE)
out <- data_seek(efc, "femlae", seek = "all", fuzzy = TRUE)
expect_identical(nrow(out), 1L)
expect_identical(out$index, which(colnames(efc) %in% out$column))
expect_identical(out$labels, "elder's gender")
})

test_that("seek_variables - multiple pattern", {
test_that("data_seek - multiple pattern", {
data(efc)
out <- seek_variables(efc, c("e16", "e42"))
out <- data_seek(efc, c("e16", "e42"))
expect_identical(nrow(out), 2L)
expect_identical(out$index, which(colnames(efc) %in% out$column))
expect_identical(out$labels, c("elder's gender", "elder's dependency"))
# only one match, typo
out <- seek_variables(efc, c("femlae", "dependency"))
out <- data_seek(efc, c("femlae", "dependency"))
expect_identical(nrow(out), 1L)
expect_identical(out$index, which(colnames(efc) %in% out$column))
expect_identical(out$labels, "elder's dependency")
# only one match, not searching in value labels
out <- seek_variables(efc, c("female", "dependency"))
out <- data_seek(efc, c("female", "dependency"))
expect_identical(nrow(out), 1L)
expect_identical(out$index, which(colnames(efc) %in% out$column))
expect_identical(out$labels, "elder's dependency")
# two matches
out <- seek_variables(efc, c("female", "dependency"), seek = "all")
out <- data_seek(efc, c("female", "dependency"), seek = "all")
expect_identical(nrow(out), 2L)
expect_identical(out$index, which(colnames(efc) %in% out$column))
expect_identical(out$labels, c("elder's gender", "elder's dependency"))
# only one match, typo
out <- seek_variables(efc, c("femlae", "dependency"), seek = "all")
out <- data_seek(efc, c("femlae", "dependency"), seek = "all")
expect_identical(nrow(out), 1L)
expect_identical(out$index, which(colnames(efc) %in% out$column))
expect_identical(out$labels, "elder's dependency")
# two matches, despite typo
out <- seek_variables(efc, c("femlae", "dependency"), seek = "all", fuzzy = TRUE)
out <- data_seek(efc, c("femlae", "dependency"), seek = "all", fuzzy = TRUE)
expect_identical(nrow(out), 2L)
expect_identical(out$index, which(colnames(efc) %in% out$column))
expect_identical(out$labels, c("elder's gender", "elder's dependency"))
})

test_that("seek_variables - valid input", {
expect_error(seek_variables(rnorm(10), "Length"), regex = "`data` must be a data frame.")
expect_error(seek_variables(iris, "Length", seek = "somewhere"), regex = "`seek` must be")
test_that("data_seek - valid input", {
expect_error(data_seek(rnorm(10), "Length"), regex = "`data` must be a data frame.")
expect_error(data_seek(iris, "Length", seek = "somewhere"), regex = "`seek` must be")
})

test_that("seek_variables - print", {
expect_snapshot(seek_variables(iris, "Length"))
test_that("data_seek - print", {
expect_snapshot(data_seek(iris, "Length"))
})

0 comments on commit 8302c97

Please sign in to comment.