From af15822df08cd081dbb87cbeff82a7a1b4296caa Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Fri, 22 Dec 2023 10:34:56 +0000 Subject: [PATCH 1/6] Basic bulk submission support --- NAMESPACE | 1 + R/task-create.R | 79 +++++++++++++++++++++++++++++++++ man/task_create_bulk_expr.Rd | 46 +++++++++++++++++++ tests/testthat/test-task-bulk.R | 45 +++++++++++++++++++ 4 files changed, 171 insertions(+) create mode 100644 R/task-create.R create mode 100644 man/task_create_bulk_expr.Rd create mode 100644 tests/testthat/test-task-bulk.R diff --git a/NAMESPACE b/NAMESPACE index 69aecbe1..67a29923 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,6 +12,7 @@ export(hipercow_environment_show) export(hipercow_init) export(hipercow_provision) export(task_cancel) +export(task_create_bulk_expr) export(task_create_explicit) export(task_create_expr) export(task_create_script) diff --git a/R/task-create.R b/R/task-create.R new file mode 100644 index 00000000..9951b510 --- /dev/null +++ b/R/task-create.R @@ -0,0 +1,79 @@ +##' Create a bulk set of tasks. This is an experimental interface and +##' does not have an analogue within didehpc. Variables in `data` +##' take precidence over variables in the environment in which `expr` +##' was created. There is no "pronoun" support yet (see rlang docs). +##' Use `!!` to pull a variable from the environment if you need to, +##' but be careful not to inject something really large (e.g., any +##' vector really) or you'll end up with a revolting expression and +##' poor backtraces. We will likely change some of these semantics +##' later, be careful. +##' +##' @title Create bulk tasks via with expressions +##' +##' @param expr An expression, as for [task_create_expr] +##' +##' @param data Data that you wish to inject _rowwise_ into the expression +##' +##' @inheritParams task_create_explicit +##' +##' @return A vector of ids, with the same length as `data` has rows. +##' +##' @export +task_create_bulk_expr <- function(expr, data, environment = "default", + submit = NULL, root = NULL) { + root <- hipercow_root(root) + ## assert_is(data, "data.frame") + + ## This will allow `!!x` to reference a value in the enclosing + ## environment and we'll splice it into the expression. This will + ## work pretty well for simple things and _terribly_ for large + ## objects, which would be better pulled in by name if possible. + ## + ## We could do this using "eval_tidy" and use "pronouns" but that + ## will require a little more setup; probably worth considering + ## though. For now this is fine, but we can improve this by: + ## + ## * Not doing the injection until later + ## * Setting up the bits for eval_tidy and exporting them + ## * Analysing the expression before injection and making sure + ## that anything injected is small + quo <- rlang::inject(rlang::enquo(expr)) + + ## TODO: might copy over the same bits as for expr(); easy enough to + ## refactor out. + if (!rlang::quo_is_call(quo)) { + cli::cli_abort("Expected 'expr' to be a function call") + } + envir <- rlang::quo_get_env(quo) + expr <- rlang::quo_get_expr(quo) + + ## warn about lack of overlap here? + extra <- setdiff(all.vars(expr), names(data)) + variables <- task_variables(extra, envir, environment, root, + rlang::current_env()) + + path <- relative_workdir(root$path$root) + + id <- vcapply(seq_len(nrow(data)), function(i) { + variables_i <- variables + variables_i$locals <- c(variables$locals, as.list(data[i, ])) + task_create(root, "expression", path, environment, + expr = expr, variables = variables_i) + }) + + task_submit_maybe(id, submit, root, rlang::current_env()) + id +} + + +## We'll move this out everywhere soon: +task_create <- function(root, type, path, environment, ...) { + id <- ids::random_id() + dest <- file.path(root$path$tasks, id) + fs::dir_create(dest) + data <- list(type = type, id = id, path = path, environment = environment, + ...) + saveRDS(data, file.path(dest, EXPR)) + file.create(file.path(dest, STATUS_CREATED)) + id +} diff --git a/man/task_create_bulk_expr.Rd b/man/task_create_bulk_expr.Rd new file mode 100644 index 00000000..2a33d7c2 --- /dev/null +++ b/man/task_create_bulk_expr.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/task-create.R +\name{task_create_bulk_expr} +\alias{task_create_bulk_expr} +\title{Create bulk tasks via with expressions} +\usage{ +task_create_bulk_expr( + expr, + data, + environment = "default", + submit = NULL, + root = NULL +) +} +\arguments{ +\item{expr}{An expression, as for \link{task_create_expr}} + +\item{data}{Data that you wish to inject \emph{rowwise} into the expression} + +\item{environment}{Name of the hipercow environment to evaluate the +task within.} + +\item{submit}{Control over task submission. This will expand over +time once we support specifying resources. The most simple +interface is to use \code{TRUE} here to automatically submit a task, +using your default configuration, or \code{FALSE} to prevent +submission. The default \code{NULL} will submit a task if a driver +is configured.} + +\item{root}{A hipercow root, or path to it. If \code{NULL} we search up +your directory tree.} +} +\value{ +A vector of ids, with the same length as \code{data} has rows. +} +\description{ +Create a bulk set of tasks. This is an experimental interface and +does not have an analogue within didehpc. Variables in \code{data} +take precidence over variables in the environment in which \code{expr} +was created. There is no "pronoun" support yet (see rlang docs). +Use \verb{!!} to pull a variable from the environment if you need to, +but be careful not to inject something really large (e.g., any +vector really) or you'll end up with a revolting expression and +poor backtraces. We will likely change some of these semantics +later, be careful. +} diff --git a/tests/testthat/test-task-bulk.R b/tests/testthat/test-task-bulk.R new file mode 100644 index 00000000..d41c4101 --- /dev/null +++ b/tests/testthat/test-task-bulk.R @@ -0,0 +1,45 @@ +test_that("bulk creation of a task", { + path <- withr::local_tempdir() + init_quietly(path) + + d <- data.frame(a = 1:3, b = c("x", "y", "z")) + x <- 1 + id <- withr::with_dir(path, task_create_bulk_expr(list(x, a, b), d)) + + expect_length(id, 3) + expect_type(id, "character") + expect_match(id, "^[[:xdigit:]]{32}$", all = TRUE) + expect_equal(task_status(id, root = path), rep("created", 3)) + + d <- lapply(file.path(path, "hipercow", "tasks", id, "expr"), readRDS) + expect_equal(d[[1]]$variables$locals, list(x = 1, a = 1, b = "x")) + expect_equal(d[[2]]$variables$locals, list(x = 1, a = 2, b = "y")) + expect_equal(d[[3]]$variables$locals, list(x = 1, a = 3, b = "z")) + + expect_equal(d[[1]]$expr, quote(list(x, a, b))) + expect_equal(d[[1]]$environment, "default") + expect_equal(d[[1]]$path, ".") + expect_equal(d[[1]]$type, "expression") + + v <- c("type", "path", "environment", "expr") + expect_equal(d[[2]][v], d[[1]][v]) + expect_equal(d[[3]][v], d[[1]][v]) +}) + + +test_that("use splicing to disambiguate expressions", { + path <- withr::local_tempdir() + init_quietly(path) + + d <- data.frame(a = 1:3, b = c("x", "y", "z")) + a <- 1 + id <- withr::with_dir(path, task_create_bulk_expr(list(!!a, a, b), d)) + + d <- lapply(file.path(path, "hipercow", "tasks", id, "expr"), readRDS) + expect_equal(d[[1]]$variables$locals, list(a = 1, b = "x")) + expect_equal(d[[2]]$variables$locals, list(a = 2, b = "y")) + expect_equal(d[[3]]$variables$locals, list(a = 3, b = "z")) + expect_equal(d[[1]]$expr, quote(list(1, a, b))) + expect_equal(d[[2]]$expr, quote(list(1, a, b))) + expect_equal(d[[3]]$expr, quote(list(1, a, b))) +}) From bfa70336dd465958fcccf7149ab589eba62b1ef9 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Fri, 22 Dec 2023 10:48:45 +0000 Subject: [PATCH 2/6] Tidy up expression checking code --- R/task-create.R | 33 ++++++++++++++++++++++++++++++ R/task.R | 53 +++++-------------------------------------------- 2 files changed, 38 insertions(+), 48 deletions(-) diff --git a/R/task-create.R b/R/task-create.R index 9951b510..13c26a93 100644 --- a/R/task-create.R +++ b/R/task-create.R @@ -77,3 +77,36 @@ task_create <- function(root, type, path, environment, ...) { file.create(file.path(dest, STATUS_CREATED)) id } + + +check_expression <- function(quo) { + if (rlang::quo_is_symbol(quo)) { + sym <- rlang::as_name(rlang::quo_get_expr(quo)) + envir <- rlang::quo_get_env(quo) + if (!rlang::env_has(envir, sym, inherit = TRUE)) { + cli::cli_abort("Could not find expression '{sym}'") + } + expr <- rlang::env_get(envir, sym, inherit = TRUE) + if (!rlang::is_call(expr)) { + cli::cli_abort(c( + "Expected 'expr' to be a function call", + i = paste("You passed a symbol '{sym}', but that turned out to be", + "an object of type {typeof(expr)} and not a call"))) + } + } else { + if (!rlang::quo_is_call(quo)) { + cli::cli_abort("Expected 'expr' to be a function call") + } + envir <- rlang::quo_get_env(quo) + expr <- rlang::quo_get_expr(quo) + } + + if (rlang::is_call(expr, "quote")) { + given <- rlang::expr_deparse(expr) + alt <- rlang::expr_deparse(expr[[2]]) + cli::cli_abort( + c("You have an extra layer of quote() around 'expr'", + i = "You passed '{given}' but probably meant to pass '{alt}'")) + } + list(value = expr, envir = envir) +} diff --git a/R/task.R b/R/task.R index 3049ec89..f08247ea 100644 --- a/R/task.R +++ b/R/task.R @@ -78,56 +78,13 @@ task_create_explicit <- function(expr, export = NULL, envir = .GlobalEnv, task_create_expr <- function(expr, environment = "default", submit = NULL, root = NULL) { root <- hipercow_root(root) - - quo <- rlang::enquo(expr) - if (rlang::quo_is_symbol(quo)) { - sym <- rlang::as_name(rlang::quo_get_expr(quo)) - envir <- rlang::caller_env() # or is it rlang::quo_get_expr(quo) perhaps? - if (!rlang::env_has(envir, sym, inherit = TRUE)) { - cli::cli_abort("Could not find expression '{sym}'") - } - expr <- rlang::env_get(envir, sym, inherit = TRUE) - if (!rlang::is_call(expr)) { - cli::cli_abort(c( - "Expected 'expr' to be a function call", - i = paste("You passed a symbol '{sym}', but that turned out to be", - "an object of type {typeof(expr)} and not a call"))) - } - } else { - if (!rlang::quo_is_call(quo)) { - cli::cli_abort("Expected 'expr' to be a function call") - } - envir <- rlang::quo_get_env(quo) - expr <- rlang::quo_get_expr(quo) - } - - if (rlang::is_call(expr, "quote")) { - given <- rlang::expr_deparse(expr) - alt <- rlang::expr_deparse(expr[[2]]) - cli::cli_abort( - c("You have an extra layer of quote() around 'expr'", - i = "You passed '{given}' but probably meant to pass '{alt}'")) - } - - variables <- task_variables(all.vars(expr), envir, environment, root, - rlang::current_env()) + expr <- check_expression(rlang::enquo(expr)) + variables <- task_variables( + all.vars(expr$value), expr$envir, environment, root, rlang::current_env()) path <- relative_workdir(root$path$root) - - id <- ids::random_id() - dest <- file.path(root$path$tasks, id) - dir.create(dest, FALSE, TRUE) - - data <- list(type = "expression", - id = id, - expr = expr, - variables = variables, - path = path, - environment = environment) - saveRDS(data, file.path(dest, EXPR)) - file.create(file.path(dest, STATUS_CREATED)) - + id <- task_create(root, "expression", path, environment, + expr = expr$value, variables = variables) task_submit_maybe(id, submit, root, rlang::current_env()) - id } From 936578f8d66411451d12dabcc104bceebdbed751 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Fri, 22 Dec 2023 10:49:00 +0000 Subject: [PATCH 3/6] Validate that we have a data.frame --- R/task-create.R | 4 +++- tests/testthat/test-task-bulk.R | 9 +++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/R/task-create.R b/R/task-create.R index 13c26a93..6cefd7f0 100644 --- a/R/task-create.R +++ b/R/task-create.R @@ -22,7 +22,9 @@ task_create_bulk_expr <- function(expr, data, environment = "default", submit = NULL, root = NULL) { root <- hipercow_root(root) - ## assert_is(data, "data.frame") + if (!inherits(data, "data.frame")) { + cli::cli_abort("Expected 'data' to be a data.frame (or tbl, etc)") + } ## This will allow `!!x` to reference a value in the enclosing ## environment and we'll splice it into the expression. This will diff --git a/tests/testthat/test-task-bulk.R b/tests/testthat/test-task-bulk.R index d41c4101..5544ca71 100644 --- a/tests/testthat/test-task-bulk.R +++ b/tests/testthat/test-task-bulk.R @@ -43,3 +43,12 @@ test_that("use splicing to disambiguate expressions", { expect_equal(d[[2]]$expr, quote(list(1, a, b))) expect_equal(d[[3]]$expr, quote(list(1, a, b))) }) + + +test_that("require that data is a data.frame", { + path <- withr::local_tempdir() + init_quietly(path) + expect_error( + withr::with_dir(path, task_create_bulk_expr(list(x, a, b), NULL)), + "Expected 'data' to be a data.frame") +}) From e8733b74b5bc4fb69a79f7e30a50cb8960e56acd Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Fri, 22 Dec 2023 10:50:04 +0000 Subject: [PATCH 4/6] Refactor bulk creation --- R/task-create.R | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/R/task-create.R b/R/task-create.R index 6cefd7f0..52458cab 100644 --- a/R/task-create.R +++ b/R/task-create.R @@ -39,30 +39,21 @@ task_create_bulk_expr <- function(expr, data, environment = "default", ## * Setting up the bits for eval_tidy and exporting them ## * Analysing the expression before injection and making sure ## that anything injected is small - quo <- rlang::inject(rlang::enquo(expr)) - - ## TODO: might copy over the same bits as for expr(); easy enough to - ## refactor out. - if (!rlang::quo_is_call(quo)) { - cli::cli_abort("Expected 'expr' to be a function call") - } - envir <- rlang::quo_get_env(quo) - expr <- rlang::quo_get_expr(quo) - - ## warn about lack of overlap here? - extra <- setdiff(all.vars(expr), names(data)) - variables <- task_variables(extra, envir, environment, root, - rlang::current_env()) + expr <- check_expression(rlang::inject(rlang::enquo(expr))) + ## Warn about lack of overlap here? That is, if there's nothing + ## within locals that could be referenced from the data.frame that's + ## likely an error. + extra <- setdiff(all.vars(expr$value), names(data)) + variables <- task_variables( + extra, expr$envir, environment, root, rlang::current_env()) path <- relative_workdir(root$path$root) - id <- vcapply(seq_len(nrow(data)), function(i) { variables_i <- variables variables_i$locals <- c(variables$locals, as.list(data[i, ])) task_create(root, "expression", path, environment, - expr = expr, variables = variables_i) + expr = expr$value, variables = variables_i) }) - task_submit_maybe(id, submit, root, rlang::current_env()) id } From 37c0fbf4c6767364f047bfcdb8f4e2df528dae75 Mon Sep 17 00:00:00 2001 From: Wes Hinsley Date: Wed, 3 Jan 2024 15:47:42 +0000 Subject: [PATCH 5/6] Bump version --- DESCRIPTION | 2 +- drivers/windows/DESCRIPTION | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2f3ff625..ab6b003b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: hipercow Title: High Performance Computing -Version: 0.2.0 +Version: 0.2.1 Authors@R: c(person("Rich", "FitzJohn", role = c("aut", "cre"), email = "rich.fitzjohn@gmail.com"), person("Wes", "Hinsley", role = "aut"), diff --git a/drivers/windows/DESCRIPTION b/drivers/windows/DESCRIPTION index 668cc5b8..4924ad4a 100644 --- a/drivers/windows/DESCRIPTION +++ b/drivers/windows/DESCRIPTION @@ -1,6 +1,6 @@ Package: hipercow.windows Title: DIDE HPC Support for Windows -Version: 0.2.0 +Version: 0.2.1 Authors@R: c(person("Rich", "FitzJohn", role = c("aut", "cre"), email = "rich.fitzjohn@gmail.com"), person("Wes", "Hinsley", role = "aut"), From 3877df10d1fd2c5fcccb1dad49561a23f3533ea2 Mon Sep 17 00:00:00 2001 From: Wes Hinsley Date: Wed, 3 Jan 2024 15:48:33 +0000 Subject: [PATCH 6/6] Typo --- R/task-create.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/task-create.R b/R/task-create.R index 52458cab..fe91fc59 100644 --- a/R/task-create.R +++ b/R/task-create.R @@ -1,6 +1,6 @@ ##' Create a bulk set of tasks. This is an experimental interface and ##' does not have an analogue within didehpc. Variables in `data` -##' take precidence over variables in the environment in which `expr` +##' take precedence over variables in the environment in which `expr` ##' was created. There is no "pronoun" support yet (see rlang docs). ##' Use `!!` to pull a variable from the environment if you need to, ##' but be careful not to inject something really large (e.g., any