diff --git a/CHANGELOG.md b/CHANGELOG.md index d8ec5ff9..deb2571e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 annotations. The copied values can be manipulated used regular expressions and replacement values. +### Fixed + +- Using the same type of manipulator in a workflow now shows the correct + progress. + ## [0.14.0] - 2024-07-24 ### Added diff --git a/src/exporter/exmaralda.rs b/src/exporter/exmaralda.rs index e5e98e32..53404ea0 100644 --- a/src/exporter/exmaralda.rs +++ b/src/exporter/exmaralda.rs @@ -699,7 +699,7 @@ mod tests { exporter::exmaralda::ExportExmaralda, importer::exmaralda::ImportEXMARaLDA, test_util::{export_to_string, export_to_string_in_directory}, - ImporterStep, ReadFrom, Step, StepID, + ImporterStep, ReadFrom, }; #[test] @@ -709,11 +709,7 @@ mod tests { module: crate::ReadFrom::EXMARaLDA(import), path: PathBuf::from("./tests/data/import/exmaralda/clean/import/"), }; - let u = step.module.reader().import_corpus( - Path::new("./tests/data/import/exmaralda/clean/import/"), - step.get_step_id(), - None, - ); + let u = step.execute(None); assert!(u.is_ok()); let mut update = u.unwrap(); let g = AnnotationGraph::with_default_graphstorages(false); @@ -747,11 +743,12 @@ mod tests { .unwrap() .join(Path::new("./tests/data/import/exmaralda/clean/import/")); - let u = import.reader().import_corpus( - &source_path, - StepID::from_importer_module(&import, Some(source_path.clone())), - None, - ); + let step = ImporterStep { + module: import, + path: source_path, + }; + let u = step.execute(None); + assert!(u.is_ok()); let mut update = u.unwrap(); let g = AnnotationGraph::with_default_graphstorages(false); diff --git a/src/exporter/xlsx.rs b/src/exporter/xlsx.rs index ae39a8ad..b9a9e143 100644 --- a/src/exporter/xlsx.rs +++ b/src/exporter/xlsx.rs @@ -371,7 +371,8 @@ mod tests { use tempfile::TempDir; use crate::{ - importer::xlsx::ImportSpreadsheet, test_util::compare_graphs, ReadFrom, StepID, WriteAs, + importer::xlsx::ImportSpreadsheet, test_util::compare_graphs, ExporterStep, ImporterStep, + ReadFrom, WriteAs, }; use super::*; @@ -388,31 +389,38 @@ mod tests { // Import an example document let path = Path::new("./tests/data/import/xlsx/clean/xlsx/"); - let importer = crate::ReadFrom::Xlsx(importer); - let mut updates = importer - .reader() - .import_corpus(path, StepID::from_importer_module(&importer, None), None) - .unwrap(); + let orig_import_step = ImporterStep { + module: crate::ReadFrom::Xlsx(importer), + path: path.to_path_buf(), + }; + let mut updates = orig_import_step.execute(None).unwrap(); let mut original_graph = AnnotationGraph::with_default_graphstorages(false).unwrap(); original_graph.apply_update(&mut updates, |_| {}).unwrap(); // Export to Excel file, read it again and then compare the annotation graphs - let output_dir = TempDir::new().unwrap(); + let tmp_outputdir = TempDir::new().unwrap(); + let output_dir = tmp_outputdir.path().join("xlsx"); + std::fs::create_dir(&output_dir).unwrap(); let exporter = crate::WriteAs::Xlsx(exporter); - exporter - .writer() - .export_corpus( - &original_graph, - output_dir.path(), - StepID::from_exporter_module(&exporter, None), - None, - ) - .unwrap(); + let export_step = ExporterStep { + module: exporter, + path: output_dir.clone(), + }; + export_step.execute(&original_graph, None).unwrap(); + + let importer: ImportSpreadsheet = toml::from_str( + r#" + column_map = {"dipl" = ["sentence"], "norm" = ["pos", "lemma", "seg"]} + "#, + ) + .unwrap(); + let second_import_step = ImporterStep { + module: crate::ReadFrom::Xlsx(importer), + path: output_dir.clone(), + }; + let mut updates = second_import_step.execute(None).unwrap(); let mut written_graph = AnnotationGraph::with_default_graphstorages(false).unwrap(); - let mut updates = importer - .reader() - .import_corpus(path, StepID::from_importer_module(&importer, None), None) - .unwrap(); + written_graph.apply_update(&mut updates, |_| {}).unwrap(); compare_graphs(&original_graph, &written_graph); @@ -431,30 +439,38 @@ mod tests { // Import an example document let path = Path::new("./tests/data/import/xlsx/sample_sentence/"); let importer = crate::ReadFrom::Xlsx(importer); - let mut updates = importer - .reader() - .import_corpus(path, StepID::from_importer_module(&importer, None), None) - .unwrap(); + let orig_import_step = ImporterStep { + module: importer, + path: path.to_path_buf(), + }; + let mut updates = orig_import_step.execute(None).unwrap(); let mut original_graph = AnnotationGraph::with_default_graphstorages(false).unwrap(); original_graph.apply_update(&mut updates, |_| {}).unwrap(); // Export to Excel file and read it again - let output_dir = TempDir::new().unwrap(); + let tmp_outputdir = TempDir::new().unwrap(); + let output_dir = tmp_outputdir.path().join("sample_sentence"); + std::fs::create_dir(&output_dir).unwrap(); let exporter = crate::WriteAs::Xlsx(exporter); - exporter - .writer() - .export_corpus( - &original_graph, - output_dir.path(), - StepID::from_exporter_module(&exporter, None), - None, - ) - .unwrap(); + let export_step = ExporterStep { + module: exporter, + path: output_dir.clone(), + }; + export_step.execute(&original_graph, None).unwrap(); + + let importer: ImportSpreadsheet = toml::from_str( + r#" + column_map = {"tok" = ["lb"]} + "#, + ) + .unwrap(); + let second_import_step = ImporterStep { + module: crate::ReadFrom::Xlsx(importer), + path: output_dir.clone(), + }; + let mut updates = second_import_step.execute(None).unwrap(); + let mut written_graph = AnnotationGraph::with_default_graphstorages(false).unwrap(); - let mut updates = importer - .reader() - .import_corpus(path, StepID::from_importer_module(&importer, None), None) - .unwrap(); written_graph.apply_update(&mut updates, |_| {}).unwrap(); // Compare the graphs and make sure the token exist @@ -477,7 +493,7 @@ mod tests { fn with_namespace() { let importer: ImportSpreadsheet = toml::from_str( r#" - column_map = {"tok" = ["mynamespace::lb"]} + column_map = {"default_ns::text" = ["mynamespace::lb"]} "#, ) .unwrap(); @@ -485,37 +501,46 @@ mod tests { let mut exporter = ExportXlsx::default(); exporter.include_namespace = true; exporter.annotation_order = vec![AnnoKey { - ns: ANNIS_NS.into(), - name: "tok".into(), + ns: "default_ns".into(), + name: "text".into(), }]; let exporter = WriteAs::Xlsx(exporter); // Import an example document let path = Path::new("./tests/data/import/xlsx/sample_sentence_with_namespace/"); + let first_import_step = ImporterStep { + module: importer, + path: path.to_path_buf(), + }; - let mut updates = importer - .reader() - .import_corpus(path, StepID::from_importer_module(&importer, None), None) - .unwrap(); + let mut updates = first_import_step.execute(None).unwrap(); let mut original_graph = AnnotationGraph::with_default_graphstorages(false).unwrap(); original_graph.apply_update(&mut updates, |_| {}).unwrap(); // Export to Excel file and read it again - let output_dir = TempDir::new().unwrap(); - exporter - .writer() - .export_corpus( - &original_graph, - output_dir.path(), - StepID::from_exporter_module(&exporter, None), - None, - ) - .unwrap(); + let tmp_outputdir = TempDir::new().unwrap(); + let output_dir = tmp_outputdir.path().join("sample_sentence_with_namespace"); + std::fs::create_dir(&output_dir).unwrap(); + let export_step = ExporterStep { + module: exporter, + path: output_dir.clone(), + }; + export_step.execute(&original_graph, None).unwrap(); + + let importer: ImportSpreadsheet = toml::from_str( + r#" + column_map = {"default_ns::text" = ["mynamespace::lb"]} + "#, + ) + .unwrap(); + let second_import_step = ImporterStep { + module: crate::ReadFrom::Xlsx(importer), + path: output_dir.clone(), + }; + let mut updates = second_import_step.execute(None).unwrap(); + let mut written_graph = AnnotationGraph::with_default_graphstorages(false).unwrap(); - let mut updates = importer - .reader() - .import_corpus(path, StepID::from_importer_module(&importer, None), None) - .unwrap(); + written_graph.apply_update(&mut updates, |_| {}).unwrap(); // Compare the graphs and make sure the token exist diff --git a/src/importer/conllu/tests.rs b/src/importer/conllu/tests.rs index 80b6e80b..46fbb24c 100644 --- a/src/importer/conllu/tests.rs +++ b/src/importer/conllu/tests.rs @@ -4,7 +4,8 @@ use graphannis::{graph::AnnoKey, update::GraphUpdate}; use insta::assert_snapshot; use crate::{ - importer::conllu::default_comment_key, test_util::import_as_graphml_string, ReadFrom, StepID, + importer::conllu::default_comment_key, test_util::import_as_graphml_string, ImporterStep, + ReadFrom, StepID, }; use super::ImportCoNLLU; @@ -13,14 +14,16 @@ use super::ImportCoNLLU; fn test_conll_fail_invalid() { let import = ReadFrom::CoNLLU(ImportCoNLLU::default()); let import_path = Path::new("tests/data/import/conll/invalid"); - let step_id = StepID::from_importer_module(&import, Some(import_path.to_path_buf())); - let job = import - .reader() - .import_corpus(import_path, step_id.clone(), None); + let import_step = ImporterStep { + module: import, + path: import_path.to_path_buf(), + }; + let job = import_step.execute(None); assert!(job.is_err()); assert_snapshot!(job.err().unwrap().to_string()); let mut u = GraphUpdate::default(); let import = ImportCoNLLU::default(); + let step_id = StepID::from_importer_step(&import_step); assert!(import .import_document( &step_id, @@ -36,11 +39,12 @@ fn test_conll_fail_invalid() { fn test_conll_fail_invalid_heads() { let import = ReadFrom::CoNLLU(ImportCoNLLU::default()); let import_path = Path::new("tests/data/import/conll/invalid-heads/"); - let step_id = StepID::from_importer_module(&import, Some(import_path.to_path_buf())); + let import_step = ImporterStep { + module: import, + path: import_path.to_path_buf(), + }; let (sender, _receiver) = mpsc::channel(); - let job = import - .reader() - .import_corpus(import_path, step_id, Some(sender)); + let job = import_step.execute(Some(sender)); assert!(job.is_err()); assert_snapshot!(job.err().unwrap().to_string()); } @@ -49,9 +53,12 @@ fn test_conll_fail_invalid_heads() { fn test_conll_fail_cyclic() -> Result<(), Box> { let import = ReadFrom::CoNLLU(ImportCoNLLU::default()); let import_path = Path::new("tests/data/import/conll/cyclic-deps/"); - let step_id = StepID::from_importer_module(&import, Some(import_path.to_path_buf())); + let import_step = ImporterStep { + module: import, + path: import_path.to_path_buf(), + }; - let job = import.reader().import_corpus(import_path, step_id, None); + let job = import_step.execute(None); assert!(job.is_ok()); Ok(()) } diff --git a/src/importer/exmaralda/tests.rs b/src/importer/exmaralda/tests.rs index 2d1b78e0..56bde523 100644 --- a/src/importer/exmaralda/tests.rs +++ b/src/importer/exmaralda/tests.rs @@ -10,7 +10,7 @@ use itertools::Itertools; use crate::{ progress::ProgressReporter, test_util::import_as_graphml_string_2, workflow::StatusMessage, - StepID, + ImporterStep, StepID, }; use super::ImportEXMARaLDA; @@ -19,17 +19,19 @@ use super::ImportEXMARaLDA; fn timeline_fail() { let import = crate::ReadFrom::EXMARaLDA(ImportEXMARaLDA::default()); let import_path = "./tests/data/import/exmaralda/fail-corrupt_timeline/import/"; - let step_id = StepID::from_importer_module(&import, Some(PathBuf::from(import_path))); + let import_step = ImporterStep { + module: import, + path: PathBuf::from(import_path), + }; let (sender, _receiver) = mpsc::channel(); - let r = import - .reader() - .import_corpus(Path::new(import_path), step_id.clone(), Some(sender)); + let r = import_step.execute(Some(sender)); assert!(r.is_err()); assert_snapshot!(r.err().unwrap().to_string()); let document_path = "./tests/data/import/exmaralda/fail-corrupt_timeline/import/test_doc.exb"; let mut u = GraphUpdate::default(); let import = ImportEXMARaLDA::default(); + let step_id = StepID::from_importer_step(&import_step); assert!(import .import_document( &step_id, @@ -46,11 +48,15 @@ fn timeline_fail() { fn category_fail() { let import = crate::ReadFrom::EXMARaLDA(ImportEXMARaLDA::default()); let import_path = "./tests/data/import/exmaralda/fail-no_category/"; - let step_id = StepID::from_importer_module(&import, Some(PathBuf::from(import_path))); + let import_step = ImporterStep { + module: import, + path: PathBuf::from(import_path), + }; + + let step_id = StepID::from_importer_step(&import_step); + let (sender, _receiver) = mpsc::channel(); - let r = import - .reader() - .import_corpus(Path::new(import_path), step_id.clone(), Some(sender)); + let r = import_step.execute(Some(sender)); assert!(r.is_err()); assert_snapshot!(r.err().unwrap().to_string()); let document_path = Path::new(import_path).join("test_doc.exb"); @@ -71,12 +77,15 @@ fn category_fail() { fn speaker_fail() { let import = crate::ReadFrom::EXMARaLDA(ImportEXMARaLDA::default()); let import_path = "./tests/data/import/exmaralda/fail-no_speaker/"; - let step_id = StepID::from_importer_module(&import, Some(PathBuf::from(import_path))); + let import_step = ImporterStep { + module: import, + path: PathBuf::from(import_path), + }; + + let step_id = StepID::from_importer_step(&import_step); let (sender, _receiver) = mpsc::channel(); - let r = import - .reader() - .import_corpus(Path::new(import_path), step_id.clone(), Some(sender)); + let r = import_step.execute(Some(sender)); assert!(r.is_err()); assert_snapshot!(r.err().unwrap().to_string()); let document_path = Path::new(import_path).join("test_doc.exb"); @@ -97,12 +106,15 @@ fn speaker_fail() { fn undefined_speaker_fail() { let import = crate::ReadFrom::EXMARaLDA(ImportEXMARaLDA::default()); let import_path = "./tests/data/import/exmaralda/fail-undefined_speaker/"; - let step_id = StepID::from_importer_module(&import, Some(PathBuf::from(import_path))); + let import_step = ImporterStep { + module: import, + path: PathBuf::from(import_path), + }; + + let step_id = StepID::from_importer_step(&import_step); let (sender, _receiver) = mpsc::channel(); - let r = import - .reader() - .import_corpus(Path::new(import_path), step_id.clone(), Some(sender)); + let r = import_step.execute(Some(sender)); assert!(r.is_err()); assert_snapshot!(r.err().unwrap().to_string()); let document_path = Path::new(import_path).join("test_doc.exb"); @@ -123,11 +135,15 @@ fn undefined_speaker_fail() { fn unknown_tli_fail() { let import = crate::ReadFrom::EXMARaLDA(ImportEXMARaLDA::default()); let import_path = "./tests/data/import/exmaralda/fail-unknown_tli/"; - let step_id = StepID::from_importer_module(&import, Some(PathBuf::from(import_path))); + let import_step = ImporterStep { + module: import, + path: PathBuf::from(import_path), + }; + + let step_id = StepID::from_importer_step(&import_step); + let (sender, _receiver) = mpsc::channel(); - let r = import - .reader() - .import_corpus(Path::new(import_path), step_id.clone(), Some(sender)); + let r = import_step.execute(Some(sender)); assert!(r.is_err()); assert_snapshot!(r.err().unwrap().to_string()); let document_path = Path::new(import_path).join("test_doc.exb"); @@ -148,11 +164,15 @@ fn unknown_tli_fail() { fn bad_timevalue_fail() { let import = crate::ReadFrom::EXMARaLDA(ImportEXMARaLDA::default()); let import_path = "./tests/data/import/exmaralda/fail-bad_timevalue/"; - let step_id = StepID::from_importer_module(&import, Some(PathBuf::from(import_path))); + let import_step = ImporterStep { + module: import, + path: PathBuf::from(import_path), + }; + + let step_id = StepID::from_importer_step(&import_step); + let (sender, _receiver) = mpsc::channel(); - let r = import - .reader() - .import_corpus(Path::new(import_path), step_id.clone(), Some(sender)); + let r = import_step.execute(Some(sender)); assert!(r.is_err()); assert_snapshot!(r.err().unwrap().to_string()); let document_path = Path::new(import_path).join("test_doc.exb"); @@ -173,11 +193,13 @@ fn bad_timevalue_fail() { fn underspec_event_fail() { let import = crate::ReadFrom::EXMARaLDA(ImportEXMARaLDA::default()); let import_path = "./tests/data/import/exmaralda/fail-no_start_no_end/"; - let step_id = StepID::from_importer_module(&import, Some(PathBuf::from(import_path))); + let import_step = ImporterStep { + module: import, + path: PathBuf::from(import_path), + }; + let (sender, _receiver) = mpsc::channel(); - let r = import - .reader() - .import_corpus(Path::new(import_path), step_id, Some(sender)); + let r = import_step.execute(Some(sender)); assert!(r.is_err()); assert_snapshot!(r.err().unwrap().to_string()); } @@ -186,11 +208,15 @@ fn underspec_event_fail() { fn invalid_fail() { let import = crate::ReadFrom::EXMARaLDA(ImportEXMARaLDA::default()); let import_path = "./tests/data/import/exmaralda/fail-invalid/import/"; - let step_id = StepID::from_importer_module(&import, Some(PathBuf::from(import_path))); + let import_step = ImporterStep { + module: import, + path: PathBuf::from(import_path), + }; + + let step_id = StepID::from_importer_step(&import_step); + let (sender, _receiver) = mpsc::channel(); - let r = import - .reader() - .import_corpus(Path::new(import_path), step_id.clone(), Some(sender)); + let r = import_step.execute(Some(sender)); assert!(r.is_err()); assert_snapshot!(r.err().unwrap().to_string()); let document_path = "./tests/data/import/exmaralda/fail-invalid/import/test_doc_invalid.exb"; diff --git a/src/importer/file_nodes.rs b/src/importer/file_nodes.rs index 2beaee6e..372e282c 100644 --- a/src/importer/file_nodes.rs +++ b/src/importer/file_nodes.rs @@ -89,7 +89,7 @@ mod tests { use graphannis_core::graph::ANNIS_NS; use itertools::Itertools; - use crate::{ImporterStep, Step}; + use crate::ImporterStep; use super::CreateFileNodes; @@ -147,10 +147,7 @@ mod tests { module: crate::ReadFrom::Path(import), path: PathBuf::from("tests/data/import/xlsx/clean/xlsx/"), }; - let mut test_u = - step.module - .reader() - .import_corpus(&step.path, step.get_step_id(), None)?; + let mut test_u = step.execute(None)?; // add dummy node and dummy ordering edge to pass model checks when applying the update to the graph test_u.add_event(UpdateEvent::AddNode { node_name: "dummy_node".to_string(), diff --git a/src/importer/meta.rs b/src/importer/meta.rs index c539e5a3..b4f3b64e 100644 --- a/src/importer/meta.rs +++ b/src/importer/meta.rs @@ -110,7 +110,7 @@ mod tests { use graphannis_core::graph::ANNIS_NS; use tempfile::tempdir; - use crate::{ReadFrom, StepID}; + use crate::{ImporterStep, ReadFrom}; use super::AnnotateCorpus; @@ -157,12 +157,11 @@ mod tests { .write(corpus_metadata.join("\n").as_bytes()) .map_err(|_| assert!(false)) .unwrap(); - let step_id = StepID::from_importer_module(&add_metadata, None); - let r = add_metadata.reader().import_corpus( - tmp_dir.path().join("metadata").as_path(), - step_id, - None, - ); + let import_step = ImporterStep { + module: add_metadata, + path: tmp_dir.path().join("metadata").to_path_buf(), + }; + let r = import_step.execute(None); assert_eq!( true, r.is_ok(), diff --git a/src/importer/xlsx.rs b/src/importer/xlsx.rs index ed791348..6b6d2414 100644 --- a/src/importer/xlsx.rs +++ b/src/importer/xlsx.rs @@ -474,7 +474,7 @@ mod tests { use graphannis_core::{annostorage::ValueSearch, types::AnnoKey}; use tempfile::tempdir; - use crate::{workflow::Workflow, ReadFrom}; + use crate::{workflow::Workflow, ImporterStep, ReadFrom}; use super::*; @@ -508,8 +508,12 @@ mod tests { }; let importer = ReadFrom::Xlsx(importer); let path = Path::new("./tests/data/import/xlsx/clean/xlsx/"); - let step_id = StepID::from_importer_module(&importer, Some(path.to_path_buf())); - let import = importer.reader().import_corpus(path, step_id, None); + let import_step = ImporterStep { + module: importer, + path: path.to_path_buf(), + }; + + let import = import_step.execute(None); let mut u = import?; let mut g = AnnotationGraph::with_default_graphstorages(on_disk)?; g.apply_update(&mut u, |_| {})?; @@ -612,9 +616,12 @@ mod tests { }; let importer = ReadFrom::Xlsx(importer); let path = Path::new("./tests/data/import/xlsx/dirty/xlsx/"); - let step_id = StepID::from_importer_module(&importer, Some(path.to_path_buf())); + let import_step = ImporterStep { + module: importer, + path: path.to_path_buf(), + }; let (sender, receiver) = mpsc::channel(); - let import = importer.reader().import_corpus(path, step_id, Some(sender)); + let import = import_step.execute(Some(sender)); assert!(import.is_err()); assert_ne!(receiver.into_iter().count(), 0); } @@ -642,9 +649,12 @@ mod tests { }; let importer = ReadFrom::Xlsx(importer); let path = Path::new("./tests/data/import/xlsx/warnings/xlsx/"); - let step_id = StepID::from_importer_module(&importer, Some(path.to_path_buf())); + let import_step = ImporterStep { + module: importer, + path: path.to_path_buf(), + }; let (sender, receiver) = mpsc::channel(); - let import = importer.reader().import_corpus(path, step_id, Some(sender)); + let import = import_step.execute(Some(sender)); assert!(import.is_ok()); assert_ne!(receiver.into_iter().count(), 0); } @@ -716,9 +726,12 @@ mod tests { }; let importer = ReadFrom::Xlsx(importer); let path = Path::new("./tests/data/import/xlsx/clean/xlsx/"); - let step_id = StepID::from_importer_module(&importer, Some(path.to_path_buf())); + let import_step = ImporterStep { + module: importer, + path: path.to_path_buf(), + }; let (sender, receiver) = mpsc::channel(); - let import = importer.reader().import_corpus(path, step_id, Some(sender)); + let import = import_step.execute(Some(sender)); assert!(import.is_ok()); assert_ne!(receiver.into_iter().count(), 0); } @@ -803,8 +816,12 @@ mod tests { }; let importer = ReadFrom::Xlsx(importer); let path = Path::new("./tests/data/import/xlsx/clean/xlsx/"); - let step_id = StepID::from_importer_module(&importer, Some(path.to_path_buf())); - let import = importer.reader().import_corpus(path, step_id, None); + let import_step = ImporterStep { + module: importer, + path: path.to_path_buf(), + }; + + let import = import_step.execute(None); let mut g = AnnotationGraph::with_default_graphstorages(on_disk)?; g.apply_update(&mut import?, |_| {})?; let node_annos = g.get_node_annos(); diff --git a/src/lib.rs b/src/lib.rs index 1864b3c7..abb40b54 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,7 +13,10 @@ pub(crate) mod test_util; pub(crate) mod util; pub mod workflow; -use std::{fmt::Display, path::PathBuf}; +use std::{ + fmt::Display, + path::{Path, PathBuf}, +}; use documented::{Documented, DocumentedFields}; use error::Result; @@ -21,6 +24,7 @@ use exporter::{ exmaralda::ExportExmaralda, graphml::GraphMLExporter, sequence::ExportSequence, textgrid::ExportTextGrid, xlsx::ExportXlsx, Exporter, }; +use graphannis::AnnotationGraph; use importer::{ conllu::ImportCoNLLU, exmaralda::ImportEXMARaLDA, file_nodes::CreateFileNodes, graphml::GraphMLImporter, meta::AnnotateCorpus, none::CreateEmptyCorpus, opus::ImportOpusLinks, @@ -35,6 +39,7 @@ use serde_derive::Deserialize; use struct_field_names_as_array::FieldNamesAsSlice; use strum::{AsRefStr, EnumDiscriminants, EnumIter}; use tabled::Tabled; +use workflow::StatusSender; #[derive(Tabled)] pub struct ModuleConfiguration { @@ -370,24 +375,27 @@ pub struct StepID { } impl StepID { - pub fn from_importer_module(m: &ReadFrom, path: Option) -> StepID { + pub fn from_importer_step(step: &ImporterStep) -> StepID { StepID { - module_name: format!("import_{}", m.as_ref().to_lowercase()), - path, + module_name: format!("import_{}", step.module.as_ref().to_lowercase()), + path: Some(step.path.clone()), } } - pub fn from_graph_op_module(m: &GraphOp) -> StepID { + pub fn from_graphop_step(step: &ManipulatorStep, position_in_workflow: usize) -> StepID { StepID { - module_name: m.as_ref().to_lowercase(), + module_name: format!( + "{position_in_workflow}_{}", + step.module.as_ref().to_lowercase() + ), path: None, } } - pub fn from_exporter_module(m: &WriteAs, path: Option) -> StepID { + pub fn from_exporter_step(step: &ExporterStep) -> StepID { StepID { - module_name: format!("export_{}", m.as_ref().to_lowercase()), - path, + module_name: format!("export_{}", step.module.as_ref().to_lowercase()), + path: Some(step.path.clone()), } } } @@ -403,9 +411,7 @@ impl Display for StepID { } /// Represents a single step in a conversion pipeline. -pub trait Step { - fn get_step_id(&self) -> StepID; -} +pub trait Step {} #[derive(Deserialize)] pub struct ImporterStep { @@ -414,12 +420,20 @@ pub struct ImporterStep { path: PathBuf, } -impl Step for ImporterStep { - fn get_step_id(&self) -> StepID { - StepID::from_importer_module(&self.module, Some(self.path.clone())) +impl ImporterStep { + #[cfg(test)] + fn execute( + &self, + tx: Option, + ) -> std::result::Result> { + self.module + .reader() + .import_corpus(&self.path, StepID::from_importer_step(&self), tx) } } +impl Step for ImporterStep {} + #[derive(Deserialize)] pub struct ExporterStep { #[serde(flatten)] @@ -427,12 +441,21 @@ pub struct ExporterStep { path: PathBuf, } -impl Step for ExporterStep { - fn get_step_id(&self) -> StepID { - StepID::from_exporter_module(&self.module, Some(self.path.clone())) +impl ExporterStep { + #[cfg(test)] + fn execute( + &self, + graph: &AnnotationGraph, + tx: Option, + ) -> std::result::Result<(), Box> { + self.module + .writer() + .export_corpus(graph, &self.path, StepID::from_exporter_step(&self), tx) } } +impl Step for ExporterStep {} + #[derive(Deserialize)] pub struct ManipulatorStep { #[serde(flatten)] @@ -440,12 +463,25 @@ pub struct ManipulatorStep { workflow_directory: Option, } -impl Step for ManipulatorStep { - fn get_step_id(&self) -> StepID { - StepID::from_graph_op_module(&self.module) +impl ManipulatorStep { + fn execute( + &self, + graph: &mut AnnotationGraph, + workflow_directory: &Path, + position_in_workflow: usize, + tx: Option, + ) -> std::result::Result<(), Box> { + self.module.processor().manipulate_corpus( + graph, + workflow_directory, + StepID::from_graphop_step(self, position_in_workflow), + tx, + ) } } +impl Step for ManipulatorStep {} + #[cfg(test)] mod tests { use std::fs; diff --git a/src/manipulator/collapse.rs b/src/manipulator/collapse.rs index 9d6bed1d..5e4c4718 100644 --- a/src/manipulator/collapse.rs +++ b/src/manipulator/collapse.rs @@ -530,7 +530,7 @@ mod tests { let (msg_sender, msg_receiver) = mpsc::channel(); let application = - collapse.manipulate_corpus(&mut g, Path::new("./"), step_id, Some(msg_sender)); + collapse.manipulate_corpus(&mut g, Path::new("./"), step_id.clone(), Some(msg_sender)); assert!(application.is_ok(), "not Ok: {:?}", application.err()); assert!(msg_receiver.into_iter().count() > 0); let eg = target_graph(on_disk, disjoint); @@ -547,13 +547,11 @@ mod tests { assert!(check_r.is_ok()); let check = check_r.unwrap(); let dummy_path = Path::new("./"); + let (sender_e, _receiver_e) = mpsc::channel(); - if let Err(e) = check.manipulate_corpus( - &mut expected_g, - dummy_path, - StepID::from_graph_op_module(&crate::GraphOp::Collapse(collapse)), - Some(sender_e), - ) { + if let Err(e) = + check.manipulate_corpus(&mut expected_g, dummy_path, step_id.clone(), Some(sender_e)) + { return Err(e); } diff --git a/src/test_util.rs b/src/test_util.rs index 61dfdad2..907a7002 100644 --- a/src/test_util.rs +++ b/src/test_util.rs @@ -14,6 +14,7 @@ use graphannis_core::{ types::{Edge, NodeID}, }; use itertools::Itertools; +use pretty_assertions::assert_eq; use std::{cmp::Ordering, fs, io::BufWriter, path::Path}; use tempfile::TempDir; diff --git a/src/workflow.rs b/src/workflow.rs index 49ada961..4e6e5e0a 100644 --- a/src/workflow.rs +++ b/src/workflow.rs @@ -11,7 +11,7 @@ use serde_derive::Deserialize; use crate::{ error::AnnattoError, error::Result, progress::ProgressReporter, runtime, ExporterStep, - ImporterStep, ManipulatorStep, Step, StepID, + ImporterStep, ManipulatorStep, StepID, }; use log::error; use normpath::PathExt; @@ -142,19 +142,21 @@ impl Workflow { module_name: "create_annotation_graph".to_string(), path: None, }; + if let Some(tx) = &tx { let mut steps: Vec = Vec::default(); - steps.extend(self.import.iter().map(|importer| importer.get_step_id())); + steps.extend(self.import.iter().map(StepID::from_importer_step)); steps.push(apply_update_step_id.clone()); + + let mut graph_op_position = 1; if let Some(ref manipulators) = self.graph_op { - steps.extend( - manipulators - .iter() - .map(|manipulator| manipulator.get_step_id()), - ); + for m in manipulators { + steps.push(StepID::from_graphop_step(m, graph_op_position)); + graph_op_position += 1; + } } if let Some(ref exporters) = self.export { - steps.extend(exporters.iter().map(|exporter| exporter.get_step_id())); + steps.extend(exporters.iter().map(StepID::from_exporter_step)); } tx.send(StatusMessage::StepsCreated(steps))?; } @@ -206,26 +208,26 @@ impl Workflow { // Execute all manipulators in sequence if let Some(ref manipulators) = self.graph_op { + let mut graph_op_position = 1; for desc in manipulators.iter() { + let step_id = StepID::from_graphop_step(desc, graph_op_position); let workflow_directory = &desc.workflow_directory; - desc.module - .processor() - .manipulate_corpus( - &mut g, - workflow_directory - .as_ref() - .map_or(default_workflow_directory, PathBuf::as_path), - desc.get_step_id(), - tx.clone(), - ) - .map_err(|reason| AnnattoError::Manipulator { - reason: reason.to_string(), - manipulator: desc.get_step_id().module_name, - })?; + desc.execute( + &mut g, + workflow_directory + .as_ref() + .map_or(default_workflow_directory, PathBuf::as_path), + graph_op_position, + tx.clone(), + ) + .map_err(|reason| AnnattoError::Manipulator { + reason: reason.to_string(), + manipulator: step_id.to_string(), + })?; + graph_op_position += 1; + if let Some(ref tx) = tx { - tx.send(crate::workflow::StatusMessage::StepDone { - id: desc.get_step_id(), - })?; + tx.send(crate::workflow::StatusMessage::StepDone { id: step_id })?; } } } @@ -261,6 +263,10 @@ impl Workflow { default_workflow_directory: &Path, tx: Option, ) -> Result { + let step_id = StepID::from_importer_step(step); + + // Do not use the import path directly, but resolve it against the + // workflow directory if the path is relative. let import_path = if step.path.is_relative() { default_workflow_directory.join(&step.path) } else { @@ -275,20 +281,14 @@ impl Workflow { let updates = step .module .reader() - .import_corpus( - resolved_import_path.as_path(), - step.get_step_id(), - tx.clone(), - ) + .import_corpus(resolved_import_path.as_path(), step_id.clone(), tx.clone()) .map_err(|reason| AnnattoError::Import { reason: reason.to_string(), - importer: step.get_step_id().module_name.to_string(), + importer: step_id.module_name.to_string(), path: step.path.to_path_buf(), })?; if let Some(ref tx) = tx { - tx.send(crate::workflow::StatusMessage::StepDone { - id: step.get_step_id(), - })?; + tx.send(crate::workflow::StatusMessage::StepDone { id: step_id })?; } Ok(updates) } @@ -300,6 +300,10 @@ impl Workflow { default_workflow_directory: &Path, tx: Option, ) -> Result<()> { + let step_id = StepID::from_exporter_step(step); + + // Do not use the output path directly, but resolve it against the + // workflow directory if the path is relative. let mut resolved_output_path = if step.path.is_relative() { default_workflow_directory.join(&step.path) } else { @@ -314,18 +318,16 @@ impl Workflow { .export_corpus( g, resolved_output_path.as_path(), - step.get_step_id(), + step_id.clone(), tx.clone(), ) .map_err(|reason| AnnattoError::Export { reason: reason.to_string(), - exporter: step.get_step_id().module_name.to_string(), + exporter: step_id.module_name.to_string(), path: step.path.clone(), })?; if let Some(ref tx) = tx { - tx.send(crate::workflow::StatusMessage::StepDone { - id: step.get_step_id(), - })?; + tx.send(crate::workflow::StatusMessage::StepDone { id: step_id })?; } Ok(()) } diff --git a/tests/data/import/xlsx/sample_sentence_with_namespace/doc1.xlsx b/tests/data/import/xlsx/sample_sentence_with_namespace/doc1.xlsx index a67d8a1a..82adc6c2 100644 Binary files a/tests/data/import/xlsx/sample_sentence_with_namespace/doc1.xlsx and b/tests/data/import/xlsx/sample_sentence_with_namespace/doc1.xlsx differ