Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Basic bulk submission support #39

Merged
merged 7 commits into from
Jan 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: hipercow
Title: High Performance Computing
Version: 0.2.0
Version: 0.2.1
Authors@R: c(person("Rich", "FitzJohn", role = c("aut", "cre"),
email = "[email protected]"),
person("Wes", "Hinsley", role = "aut"),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ export(hipercow_hello)
export(hipercow_init)
export(hipercow_provision)
export(task_cancel)
export(task_create_bulk_expr)
export(task_create_explicit)
export(task_create_expr)
export(task_create_script)
Expand Down
105 changes: 105 additions & 0 deletions R/task-create.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
##' Create a bulk set of tasks. This is an experimental interface and
##' does not have an analogue within didehpc. Variables in `data`
##' take precedence over variables in the environment in which `expr`
##' was created. There is no "pronoun" support yet (see rlang docs).
##' Use `!!` to pull a variable from the environment if you need to,
##' but be careful not to inject something really large (e.g., any
##' vector really) or you'll end up with a revolting expression and
##' poor backtraces. We will likely change some of these semantics
##' later, be careful.
##'
##' @title Create bulk tasks via with expressions
##'
##' @param expr An expression, as for [task_create_expr]
##'
##' @param data Data that you wish to inject _rowwise_ into the expression
##'
##' @inheritParams task_create_explicit
##'
##' @return A vector of ids, with the same length as `data` has rows.
##'
##' @export
task_create_bulk_expr <- function(expr, data, environment = "default",
submit = NULL, root = NULL) {
root <- hipercow_root(root)
if (!inherits(data, "data.frame")) {
cli::cli_abort("Expected 'data' to be a data.frame (or tbl, etc)")
}

## This will allow `!!x` to reference a value in the enclosing
## environment and we'll splice it into the expression. This will
## work pretty well for simple things and _terribly_ for large
## objects, which would be better pulled in by name if possible.
##
## We could do this using "eval_tidy" and use "pronouns" but that
## will require a little more setup; probably worth considering
## though. For now this is fine, but we can improve this by:
##
## * Not doing the injection until later
## * Setting up the bits for eval_tidy and exporting them
## * Analysing the expression before injection and making sure
## that anything injected is small
expr <- check_expression(rlang::inject(rlang::enquo(expr)))

## Warn about lack of overlap here? That is, if there's nothing
## within locals that could be referenced from the data.frame that's
## likely an error.
extra <- setdiff(all.vars(expr$value), names(data))
variables <- task_variables(
extra, expr$envir, environment, root, rlang::current_env())
path <- relative_workdir(root$path$root)
id <- vcapply(seq_len(nrow(data)), function(i) {
variables_i <- variables
variables_i$locals <- c(variables$locals, as.list(data[i, ]))
task_create(root, "expression", path, environment,
expr = expr$value, variables = variables_i)
})
task_submit_maybe(id, submit, root, rlang::current_env())
id
}


## We'll move this out everywhere soon:
task_create <- function(root, type, path, environment, ...) {
id <- ids::random_id()
dest <- file.path(root$path$tasks, id)
fs::dir_create(dest)
data <- list(type = type, id = id, path = path, environment = environment,
...)
saveRDS(data, file.path(dest, EXPR))
file.create(file.path(dest, STATUS_CREATED))
id
}


check_expression <- function(quo) {
if (rlang::quo_is_symbol(quo)) {
sym <- rlang::as_name(rlang::quo_get_expr(quo))
envir <- rlang::quo_get_env(quo)
if (!rlang::env_has(envir, sym, inherit = TRUE)) {
cli::cli_abort("Could not find expression '{sym}'")
}
expr <- rlang::env_get(envir, sym, inherit = TRUE)
if (!rlang::is_call(expr)) {
cli::cli_abort(c(
"Expected 'expr' to be a function call",
i = paste("You passed a symbol '{sym}', but that turned out to be",
"an object of type {typeof(expr)} and not a call")))
}
} else {
if (!rlang::quo_is_call(quo)) {
cli::cli_abort("Expected 'expr' to be a function call")
}
envir <- rlang::quo_get_env(quo)
expr <- rlang::quo_get_expr(quo)
}

if (rlang::is_call(expr, "quote")) {
given <- rlang::expr_deparse(expr)
alt <- rlang::expr_deparse(expr[[2]])
cli::cli_abort(
c("You have an extra layer of quote() around 'expr'",
i = "You passed '{given}' but probably meant to pass '{alt}'"))
}
list(value = expr, envir = envir)
}
53 changes: 5 additions & 48 deletions R/task.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,56 +78,13 @@ task_create_explicit <- function(expr, export = NULL, envir = .GlobalEnv,
task_create_expr <- function(expr, environment = "default", submit = NULL,
root = NULL) {
root <- hipercow_root(root)

quo <- rlang::enquo(expr)
if (rlang::quo_is_symbol(quo)) {
sym <- rlang::as_name(rlang::quo_get_expr(quo))
envir <- rlang::caller_env() # or is it rlang::quo_get_expr(quo) perhaps?
if (!rlang::env_has(envir, sym, inherit = TRUE)) {
cli::cli_abort("Could not find expression '{sym}'")
}
expr <- rlang::env_get(envir, sym, inherit = TRUE)
if (!rlang::is_call(expr)) {
cli::cli_abort(c(
"Expected 'expr' to be a function call",
i = paste("You passed a symbol '{sym}', but that turned out to be",
"an object of type {typeof(expr)} and not a call")))
}
} else {
if (!rlang::quo_is_call(quo)) {
cli::cli_abort("Expected 'expr' to be a function call")
}
envir <- rlang::quo_get_env(quo)
expr <- rlang::quo_get_expr(quo)
}

if (rlang::is_call(expr, "quote")) {
given <- rlang::expr_deparse(expr)
alt <- rlang::expr_deparse(expr[[2]])
cli::cli_abort(
c("You have an extra layer of quote() around 'expr'",
i = "You passed '{given}' but probably meant to pass '{alt}'"))
}

variables <- task_variables(all.vars(expr), envir, environment, root,
rlang::current_env())
expr <- check_expression(rlang::enquo(expr))
variables <- task_variables(
all.vars(expr$value), expr$envir, environment, root, rlang::current_env())
path <- relative_workdir(root$path$root)

id <- ids::random_id()
dest <- file.path(root$path$tasks, id)
dir.create(dest, FALSE, TRUE)

data <- list(type = "expression",
id = id,
expr = expr,
variables = variables,
path = path,
environment = environment)
saveRDS(data, file.path(dest, EXPR))
file.create(file.path(dest, STATUS_CREATED))

id <- task_create(root, "expression", path, environment,
expr = expr$value, variables = variables)
task_submit_maybe(id, submit, root, rlang::current_env())

id
}

Expand Down
2 changes: 1 addition & 1 deletion drivers/windows/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: hipercow.windows
Title: DIDE HPC Support for Windows
Version: 0.2.0
Version: 0.2.1
Authors@R: c(person("Rich", "FitzJohn", role = c("aut", "cre"),
email = "[email protected]"),
person("Wes", "Hinsley", role = "aut"),
Expand Down
46 changes: 46 additions & 0 deletions man/task_create_bulk_expr.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

54 changes: 54 additions & 0 deletions tests/testthat/test-task-bulk.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
test_that("bulk creation of a task", {
path <- withr::local_tempdir()
init_quietly(path)

d <- data.frame(a = 1:3, b = c("x", "y", "z"))
x <- 1
id <- withr::with_dir(path, task_create_bulk_expr(list(x, a, b), d))

expect_length(id, 3)
expect_type(id, "character")
expect_match(id, "^[[:xdigit:]]{32}$", all = TRUE)
expect_equal(task_status(id, root = path), rep("created", 3))

d <- lapply(file.path(path, "hipercow", "tasks", id, "expr"), readRDS)
expect_equal(d[[1]]$variables$locals, list(x = 1, a = 1, b = "x"))
expect_equal(d[[2]]$variables$locals, list(x = 1, a = 2, b = "y"))
expect_equal(d[[3]]$variables$locals, list(x = 1, a = 3, b = "z"))

expect_equal(d[[1]]$expr, quote(list(x, a, b)))
expect_equal(d[[1]]$environment, "default")
expect_equal(d[[1]]$path, ".")
expect_equal(d[[1]]$type, "expression")

v <- c("type", "path", "environment", "expr")
expect_equal(d[[2]][v], d[[1]][v])
expect_equal(d[[3]][v], d[[1]][v])
})


test_that("use splicing to disambiguate expressions", {
path <- withr::local_tempdir()
init_quietly(path)

d <- data.frame(a = 1:3, b = c("x", "y", "z"))
a <- 1
id <- withr::with_dir(path, task_create_bulk_expr(list(!!a, a, b), d))

d <- lapply(file.path(path, "hipercow", "tasks", id, "expr"), readRDS)
expect_equal(d[[1]]$variables$locals, list(a = 1, b = "x"))
expect_equal(d[[2]]$variables$locals, list(a = 2, b = "y"))
expect_equal(d[[3]]$variables$locals, list(a = 3, b = "z"))
expect_equal(d[[1]]$expr, quote(list(1, a, b)))
expect_equal(d[[2]]$expr, quote(list(1, a, b)))
expect_equal(d[[3]]$expr, quote(list(1, a, b)))
})


test_that("require that data is a data.frame", {
path <- withr::local_tempdir()
init_quietly(path)
expect_error(
withr::with_dir(path, task_create_bulk_expr(list(x, a, b), NULL)),
"Expected 'data' to be a data.frame")
})