Skip to content

Commit

Permalink
Merge pull request #289 from korpling/feature/main-memory-switch
Browse files Browse the repository at this point in the history
New command line argument `--in-memory`
  • Loading branch information
thomaskrause authored Aug 2, 2024
2 parents 7cd96b9 + 5567e7e commit a42cfa1
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 93 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added

- New command line argument `--in-memory` that has the same meaning as setting
`ANNATTO_IN_MEMORY` to true but is easier to discover.
- `map` manipulator can now add annotated spans and copy values from existing
annotations. The copied values can be manipulated used regular expressions and
replacement values.
Expand Down
21 changes: 16 additions & 5 deletions src/bin/annatto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,14 @@ enum Cli {
#[clap(value_parser)]
workflow_file: std::path::PathBuf,
/// Adding this argument resolves environmental variables in the provided workflow file.
#[structopt(long)]
#[clap(long)]
env: bool,
/// If this argument is given, store temporary annotation graphs in main
/// memory instead of on disk. This is faster, but if the corpus is too
/// large to fit into main memory, the pipeline will fail. Can also set
/// by setting the environment variable `ANNATTO_IN_MEMORY` to `true`.
#[clap(long, env = "ANNATTO_IN_MEMORY", default_value = "false")]
in_memory: bool,
},
/// Only check if a workflow file can be loaded. Invalid workflow files will lead to a non-zero exit code.
Validate {
Expand Down Expand Up @@ -93,7 +99,11 @@ pub fn main() -> anyhow::Result<()> {

let args = Parser::parse();
match args {
Cli::Run { workflow_file, env } => convert(workflow_file, env)?,
Cli::Run {
workflow_file,
env,
in_memory,
} => convert(workflow_file, env, in_memory)?,
Cli::Validate { workflow_file } => {
Workflow::try_from((workflow_file, false))?;
}
Expand All @@ -107,10 +117,11 @@ pub fn main() -> anyhow::Result<()> {
}

/// Execute the conversion in the background and show the status to the user
fn convert(workflow_file: PathBuf, read_env: bool) -> Result<(), AnnattoError> {
fn convert(workflow_file: PathBuf, read_env: bool, in_memory: bool) -> Result<(), AnnattoError> {
let (tx, rx) = mpsc::channel();
let result =
thread::spawn(move || execute_from_file(&workflow_file, read_env, Some(tx.clone())));
let result = thread::spawn(move || {
execute_from_file(&workflow_file, read_env, in_memory, Some(tx.clone()))
});

let mut all_bars: HashMap<StepID, ProgressBar> = HashMap::new();

Expand Down
7 changes: 6 additions & 1 deletion src/importer/relannis/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,12 @@ fn node_by_text_entry_serializer() {

#[test]
fn parse_relannis_workflow() {
let r = workflow::execute_from_file(Path::new("./tests/workflows/relannis.toml"), true, None);
let r = workflow::execute_from_file(
Path::new("./tests/workflows/relannis.toml"),
true,
false,
None,
);
// This should fail, because the input directory does not exist
assert_eq!(true, r.is_err());
assert_eq!(
Expand Down
1 change: 0 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ pub mod importer;
pub mod manipulator;
pub mod models;
pub mod progress;
pub mod runtime;
#[cfg(test)]
pub(crate) mod test_util;
pub(crate) mod util;
Expand Down
4 changes: 2 additions & 2 deletions src/manipulator/visualize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ mod tests {
&workflow_file,
)
.unwrap();
execute_from_file(&workflow_file, true, None).unwrap();
execute_from_file(&workflow_file, true, false, None).unwrap();
let result_dot = std::fs::read_to_string(workflow_dir.path().join("test.dot")).unwrap();
assert_snapshot!(result_dot);
}
Expand All @@ -375,7 +375,7 @@ mod tests {
&workflow_file,
)
.unwrap();
execute_from_file(&workflow_file, true, None).unwrap();
execute_from_file(&workflow_file, true, false, None).unwrap();
let result_dot = std::fs::read_to_string(workflow_dir.path().join("test.dot")).unwrap();
assert_snapshot!(result_dot);
}
Expand Down
78 changes: 0 additions & 78 deletions src/runtime.rs

This file was deleted.

26 changes: 20 additions & 6 deletions src/workflow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ use regex::Regex;
use serde_derive::Deserialize;

use crate::{
error::AnnattoError, error::Result, progress::ProgressReporter, runtime, ExporterStep,
ImporterStep, ManipulatorStep, StepID,
error::AnnattoError, error::Result, progress::ProgressReporter, ExporterStep, ImporterStep,
ManipulatorStep, StepID,
};
use log::error;
use normpath::PathExt;
Expand Down Expand Up @@ -113,10 +113,12 @@ impl TryFrom<(PathBuf, bool)> for Workflow {
///
/// * `workflow_file` - The TOML workflow file.
/// * `read_env` - Set whether to resolve environment variables in the workflow file.
/// * `in_memory` - If true, use a main memory implementation to store the temporary graphs.
/// * `tx` - If supported by the caller, this is a sender object that allows to send [status updates](enum.StatusMessage.html) (like information messages, warnings and module progress) to the calling entity.
pub fn execute_from_file(
workflow_file: &Path,
read_env: bool,
in_memory: bool,
tx: Option<Sender<StatusMessage>>,
) -> Result<()> {
let wf = Workflow::try_from((workflow_file.to_path_buf(), read_env))?;
Expand All @@ -125,7 +127,7 @@ pub fn execute_from_file(
} else {
Path::new("")
};
wf.execute(tx, parent_dir)?;
wf.execute(tx, parent_dir, in_memory)?;
Ok(())
}

Expand All @@ -136,6 +138,7 @@ impl Workflow {
&self,
tx: Option<StatusSender>,
default_workflow_directory: &Path,
in_memory: bool,
) -> Result<()> {
// Create a vector of all conversion steps and report these as current status
let apply_update_step_id = StepID {
Expand Down Expand Up @@ -172,9 +175,17 @@ impl Workflow {
// Create a new empty annotation graph and apply updates
let apply_update_reporter =
ProgressReporter::new_unknown_total_work(tx.clone(), apply_update_step_id.clone())?;
apply_update_reporter
.info("Creating annotation graph by applying the updates from the import steps")?;
let mut g = runtime::initialize_graph(&tx)?;
if in_memory {
apply_update_reporter.info(
"Creating in-memory annotation graph by applying the updates from the import steps",
)?;
} else {
apply_update_reporter.info(
"Creating on-disk annotation graph by applying the updates from the import steps",
)?;
}
let mut g = AnnotationGraph::with_default_graphstorages(!in_memory)
.map_err(|e| AnnattoError::CreateGraph(e.to_string()))?;

// collect all updates in a single update to only have a single atomic
// call to `apply_update`
Expand Down Expand Up @@ -344,6 +355,7 @@ mod tests {
execute_from_file(
Path::new("./tests/data/import/empty/empty.toml"),
false,
false,
None,
)
.unwrap();
Expand Down Expand Up @@ -380,6 +392,7 @@ mod tests {
execute_from_file(
Path::new("./tests/workflows/multiple_importer.toml"),
false,
false,
None,
)
.unwrap();
Expand All @@ -394,6 +407,7 @@ mod tests {
execute_from_file(
Path::new("./tests/workflows/nonexisting_dir.toml"),
true,
false,
None,
)
.unwrap();
Expand Down

0 comments on commit a42cfa1

Please sign in to comment.