Skip to content

Commit

Permalink
Merge branch 'main' into feature/map-update-value
Browse files Browse the repository at this point in the history
  • Loading branch information
thomaskrause committed Aug 2, 2024
2 parents b710e0d + b8d6914 commit 9f07c22
Show file tree
Hide file tree
Showing 13 changed files with 314 additions and 204 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
annotations. The copied values can be manipulated used regular expressions and
replacement values.

### Fixed

- Using the same type of manipulator in a workflow now shows the correct
progress.

## [0.14.0] - 2024-07-24

### Added
Expand Down
19 changes: 8 additions & 11 deletions src/exporter/exmaralda.rs
Original file line number Diff line number Diff line change
Expand Up @@ -699,7 +699,7 @@ mod tests {
exporter::exmaralda::ExportExmaralda,
importer::exmaralda::ImportEXMARaLDA,
test_util::{export_to_string, export_to_string_in_directory},
ImporterStep, ReadFrom, Step, StepID,
ImporterStep, ReadFrom,
};

#[test]
Expand All @@ -709,11 +709,7 @@ mod tests {
module: crate::ReadFrom::EXMARaLDA(import),
path: PathBuf::from("./tests/data/import/exmaralda/clean/import/"),
};
let u = step.module.reader().import_corpus(
Path::new("./tests/data/import/exmaralda/clean/import/"),
step.get_step_id(),
None,
);
let u = step.execute(None);
assert!(u.is_ok());
let mut update = u.unwrap();
let g = AnnotationGraph::with_default_graphstorages(false);
Expand Down Expand Up @@ -747,11 +743,12 @@ mod tests {
.unwrap()
.join(Path::new("./tests/data/import/exmaralda/clean/import/"));

let u = import.reader().import_corpus(
&source_path,
StepID::from_importer_module(&import, Some(source_path.clone())),
None,
);
let step = ImporterStep {
module: import,
path: source_path,
};
let u = step.execute(None);

assert!(u.is_ok());
let mut update = u.unwrap();
let g = AnnotationGraph::with_default_graphstorages(false);
Expand Down
143 changes: 84 additions & 59 deletions src/exporter/xlsx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,8 @@ mod tests {
use tempfile::TempDir;

use crate::{
importer::xlsx::ImportSpreadsheet, test_util::compare_graphs, ReadFrom, StepID, WriteAs,
importer::xlsx::ImportSpreadsheet, test_util::compare_graphs, ExporterStep, ImporterStep,
ReadFrom, WriteAs,
};

use super::*;
Expand All @@ -388,31 +389,38 @@ mod tests {

// Import an example document
let path = Path::new("./tests/data/import/xlsx/clean/xlsx/");
let importer = crate::ReadFrom::Xlsx(importer);
let mut updates = importer
.reader()
.import_corpus(path, StepID::from_importer_module(&importer, None), None)
.unwrap();
let orig_import_step = ImporterStep {
module: crate::ReadFrom::Xlsx(importer),
path: path.to_path_buf(),
};
let mut updates = orig_import_step.execute(None).unwrap();
let mut original_graph = AnnotationGraph::with_default_graphstorages(false).unwrap();
original_graph.apply_update(&mut updates, |_| {}).unwrap();

// Export to Excel file, read it again and then compare the annotation graphs
let output_dir = TempDir::new().unwrap();
let tmp_outputdir = TempDir::new().unwrap();
let output_dir = tmp_outputdir.path().join("xlsx");
std::fs::create_dir(&output_dir).unwrap();
let exporter = crate::WriteAs::Xlsx(exporter);
exporter
.writer()
.export_corpus(
&original_graph,
output_dir.path(),
StepID::from_exporter_module(&exporter, None),
None,
)
.unwrap();
let export_step = ExporterStep {
module: exporter,
path: output_dir.clone(),
};
export_step.execute(&original_graph, None).unwrap();

let importer: ImportSpreadsheet = toml::from_str(
r#"
column_map = {"dipl" = ["sentence"], "norm" = ["pos", "lemma", "seg"]}
"#,
)
.unwrap();
let second_import_step = ImporterStep {
module: crate::ReadFrom::Xlsx(importer),
path: output_dir.clone(),
};
let mut updates = second_import_step.execute(None).unwrap();
let mut written_graph = AnnotationGraph::with_default_graphstorages(false).unwrap();
let mut updates = importer
.reader()
.import_corpus(path, StepID::from_importer_module(&importer, None), None)
.unwrap();

written_graph.apply_update(&mut updates, |_| {}).unwrap();

compare_graphs(&original_graph, &written_graph);
Expand All @@ -431,30 +439,38 @@ mod tests {
// Import an example document
let path = Path::new("./tests/data/import/xlsx/sample_sentence/");
let importer = crate::ReadFrom::Xlsx(importer);
let mut updates = importer
.reader()
.import_corpus(path, StepID::from_importer_module(&importer, None), None)
.unwrap();
let orig_import_step = ImporterStep {
module: importer,
path: path.to_path_buf(),
};
let mut updates = orig_import_step.execute(None).unwrap();
let mut original_graph = AnnotationGraph::with_default_graphstorages(false).unwrap();
original_graph.apply_update(&mut updates, |_| {}).unwrap();

// Export to Excel file and read it again
let output_dir = TempDir::new().unwrap();
let tmp_outputdir = TempDir::new().unwrap();
let output_dir = tmp_outputdir.path().join("sample_sentence");
std::fs::create_dir(&output_dir).unwrap();
let exporter = crate::WriteAs::Xlsx(exporter);
exporter
.writer()
.export_corpus(
&original_graph,
output_dir.path(),
StepID::from_exporter_module(&exporter, None),
None,
)
.unwrap();
let export_step = ExporterStep {
module: exporter,
path: output_dir.clone(),
};
export_step.execute(&original_graph, None).unwrap();

let importer: ImportSpreadsheet = toml::from_str(
r#"
column_map = {"tok" = ["lb"]}
"#,
)
.unwrap();
let second_import_step = ImporterStep {
module: crate::ReadFrom::Xlsx(importer),
path: output_dir.clone(),
};
let mut updates = second_import_step.execute(None).unwrap();

let mut written_graph = AnnotationGraph::with_default_graphstorages(false).unwrap();
let mut updates = importer
.reader()
.import_corpus(path, StepID::from_importer_module(&importer, None), None)
.unwrap();
written_graph.apply_update(&mut updates, |_| {}).unwrap();

// Compare the graphs and make sure the token exist
Expand All @@ -477,45 +493,54 @@ mod tests {
fn with_namespace() {
let importer: ImportSpreadsheet = toml::from_str(
r#"
column_map = {"tok" = ["mynamespace::lb"]}
column_map = {"default_ns::text" = ["mynamespace::lb"]}
"#,
)
.unwrap();
let importer = ReadFrom::Xlsx(importer);
let mut exporter = ExportXlsx::default();
exporter.include_namespace = true;
exporter.annotation_order = vec![AnnoKey {
ns: ANNIS_NS.into(),
name: "tok".into(),
ns: "default_ns".into(),
name: "text".into(),
}];
let exporter = WriteAs::Xlsx(exporter);

// Import an example document
let path = Path::new("./tests/data/import/xlsx/sample_sentence_with_namespace/");
let first_import_step = ImporterStep {
module: importer,
path: path.to_path_buf(),
};

let mut updates = importer
.reader()
.import_corpus(path, StepID::from_importer_module(&importer, None), None)
.unwrap();
let mut updates = first_import_step.execute(None).unwrap();
let mut original_graph = AnnotationGraph::with_default_graphstorages(false).unwrap();
original_graph.apply_update(&mut updates, |_| {}).unwrap();

// Export to Excel file and read it again
let output_dir = TempDir::new().unwrap();
exporter
.writer()
.export_corpus(
&original_graph,
output_dir.path(),
StepID::from_exporter_module(&exporter, None),
None,
)
.unwrap();
let tmp_outputdir = TempDir::new().unwrap();
let output_dir = tmp_outputdir.path().join("sample_sentence_with_namespace");
std::fs::create_dir(&output_dir).unwrap();
let export_step = ExporterStep {
module: exporter,
path: output_dir.clone(),
};
export_step.execute(&original_graph, None).unwrap();

let importer: ImportSpreadsheet = toml::from_str(
r#"
column_map = {"default_ns::text" = ["mynamespace::lb"]}
"#,
)
.unwrap();
let second_import_step = ImporterStep {
module: crate::ReadFrom::Xlsx(importer),
path: output_dir.clone(),
};
let mut updates = second_import_step.execute(None).unwrap();

let mut written_graph = AnnotationGraph::with_default_graphstorages(false).unwrap();
let mut updates = importer
.reader()
.import_corpus(path, StepID::from_importer_module(&importer, None), None)
.unwrap();

written_graph.apply_update(&mut updates, |_| {}).unwrap();

// Compare the graphs and make sure the token exist
Expand Down
29 changes: 18 additions & 11 deletions src/importer/conllu/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ use graphannis::{graph::AnnoKey, update::GraphUpdate};
use insta::assert_snapshot;

use crate::{
importer::conllu::default_comment_key, test_util::import_as_graphml_string, ReadFrom, StepID,
importer::conllu::default_comment_key, test_util::import_as_graphml_string, ImporterStep,
ReadFrom, StepID,
};

use super::ImportCoNLLU;
Expand All @@ -13,14 +14,16 @@ use super::ImportCoNLLU;
fn test_conll_fail_invalid() {
let import = ReadFrom::CoNLLU(ImportCoNLLU::default());
let import_path = Path::new("tests/data/import/conll/invalid");
let step_id = StepID::from_importer_module(&import, Some(import_path.to_path_buf()));
let job = import
.reader()
.import_corpus(import_path, step_id.clone(), None);
let import_step = ImporterStep {
module: import,
path: import_path.to_path_buf(),
};
let job = import_step.execute(None);
assert!(job.is_err());
assert_snapshot!(job.err().unwrap().to_string());
let mut u = GraphUpdate::default();
let import = ImportCoNLLU::default();
let step_id = StepID::from_importer_step(&import_step);
assert!(import
.import_document(
&step_id,
Expand All @@ -36,11 +39,12 @@ fn test_conll_fail_invalid() {
fn test_conll_fail_invalid_heads() {
let import = ReadFrom::CoNLLU(ImportCoNLLU::default());
let import_path = Path::new("tests/data/import/conll/invalid-heads/");
let step_id = StepID::from_importer_module(&import, Some(import_path.to_path_buf()));
let import_step = ImporterStep {
module: import,
path: import_path.to_path_buf(),
};
let (sender, _receiver) = mpsc::channel();
let job = import
.reader()
.import_corpus(import_path, step_id, Some(sender));
let job = import_step.execute(Some(sender));
assert!(job.is_err());
assert_snapshot!(job.err().unwrap().to_string());
}
Expand All @@ -49,9 +53,12 @@ fn test_conll_fail_invalid_heads() {
fn test_conll_fail_cyclic() -> Result<(), Box<dyn std::error::Error>> {
let import = ReadFrom::CoNLLU(ImportCoNLLU::default());
let import_path = Path::new("tests/data/import/conll/cyclic-deps/");
let step_id = StepID::from_importer_module(&import, Some(import_path.to_path_buf()));
let import_step = ImporterStep {
module: import,
path: import_path.to_path_buf(),
};

let job = import.reader().import_corpus(import_path, step_id, None);
let job = import_step.execute(None);
assert!(job.is_ok());
Ok(())
}
Expand Down
Loading

0 comments on commit 9f07c22

Please sign in to comment.