Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix progress and StepID when there are multiple graph operations from… #288

Merged
merged 5 commits into from
Aug 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- `map` manipulator can now add annotated spans.

### Fixed

- Using the same type of manipulator in a workflow now shows the correct
progress.

## [0.14.0] - 2024-07-24

### Added
Expand Down
19 changes: 8 additions & 11 deletions src/exporter/exmaralda.rs
Original file line number Diff line number Diff line change
Expand Up @@ -699,7 +699,7 @@ mod tests {
exporter::exmaralda::ExportExmaralda,
importer::exmaralda::ImportEXMARaLDA,
test_util::{export_to_string, export_to_string_in_directory},
ImporterStep, ReadFrom, Step, StepID,
ImporterStep, ReadFrom,
};

#[test]
Expand All @@ -709,11 +709,7 @@ mod tests {
module: crate::ReadFrom::EXMARaLDA(import),
path: PathBuf::from("./tests/data/import/exmaralda/clean/import/"),
};
let u = step.module.reader().import_corpus(
Path::new("./tests/data/import/exmaralda/clean/import/"),
step.get_step_id(),
None,
);
let u = step.execute(None);
assert!(u.is_ok());
let mut update = u.unwrap();
let g = AnnotationGraph::with_default_graphstorages(false);
Expand Down Expand Up @@ -747,11 +743,12 @@ mod tests {
.unwrap()
.join(Path::new("./tests/data/import/exmaralda/clean/import/"));

let u = import.reader().import_corpus(
&source_path,
StepID::from_importer_module(&import, Some(source_path.clone())),
None,
);
let step = ImporterStep {
module: import,
path: source_path,
};
let u = step.execute(None);

assert!(u.is_ok());
let mut update = u.unwrap();
let g = AnnotationGraph::with_default_graphstorages(false);
Expand Down
149 changes: 87 additions & 62 deletions src/exporter/xlsx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,6 @@ impl ExportXlsx {
"".into(),
)) {
// Output all token in the first column
let mut row_index = 1;
let mut token_roots_for_document = Vec::default();
for t in token_roots {
if gs_part_of.is_connected(*t, doc_node_id, 1, std::ops::Bound::Unbounded)? {
Expand All @@ -225,6 +224,8 @@ impl ExportXlsx {
// Start with the first token
let mut token = token_roots_for_document.into_iter().next();

// Reserve the first row for the header (rows start at index 1)
let mut row_index = 2;
while let Some(current_token) = token {
if let Some(val) = g
.get_node_annos()
Expand All @@ -236,8 +237,7 @@ impl ExportXlsx {
worksheet.get_cell_mut((1, row_index)).set_value_string(val);
}

// Reserve the first row for the header
token_to_row.insert(current_token, row_index + 1);
token_to_row.insert(current_token, row_index);

token = if let Some(ordering_gs) = ordering_gs {
if let Some(next_token) = ordering_gs.get_outgoing_edges(current_token).next() {
Expand Down Expand Up @@ -371,7 +371,8 @@ mod tests {
use tempfile::TempDir;

use crate::{
importer::xlsx::ImportSpreadsheet, test_util::compare_graphs, ReadFrom, StepID, WriteAs,
importer::xlsx::ImportSpreadsheet, test_util::compare_graphs, ExporterStep, ImporterStep,
ReadFrom, WriteAs,
};

use super::*;
Expand All @@ -388,31 +389,38 @@ mod tests {

// Import an example document
let path = Path::new("./tests/data/import/xlsx/clean/xlsx/");
let importer = crate::ReadFrom::Xlsx(importer);
let mut updates = importer
.reader()
.import_corpus(path, StepID::from_importer_module(&importer, None), None)
.unwrap();
let orig_import_step = ImporterStep {
module: crate::ReadFrom::Xlsx(importer),
path: path.to_path_buf(),
};
let mut updates = orig_import_step.execute(None).unwrap();
let mut original_graph = AnnotationGraph::with_default_graphstorages(false).unwrap();
original_graph.apply_update(&mut updates, |_| {}).unwrap();

// Export to Excel file, read it again and then compare the annotation graphs
let output_dir = TempDir::new().unwrap();
let tmp_outputdir = TempDir::new().unwrap();
let output_dir = tmp_outputdir.path().join("xlsx");
std::fs::create_dir(&output_dir).unwrap();
let exporter = crate::WriteAs::Xlsx(exporter);
exporter
.writer()
.export_corpus(
&original_graph,
output_dir.path(),
StepID::from_exporter_module(&exporter, None),
None,
)
.unwrap();
let export_step = ExporterStep {
module: exporter,
path: output_dir.clone(),
};
export_step.execute(&original_graph, None).unwrap();

let importer: ImportSpreadsheet = toml::from_str(
r#"
column_map = {"dipl" = ["sentence"], "norm" = ["pos", "lemma", "seg"]}
"#,
)
.unwrap();
let second_import_step = ImporterStep {
module: crate::ReadFrom::Xlsx(importer),
path: output_dir.clone(),
};
let mut updates = second_import_step.execute(None).unwrap();
let mut written_graph = AnnotationGraph::with_default_graphstorages(false).unwrap();
let mut updates = importer
.reader()
.import_corpus(path, StepID::from_importer_module(&importer, None), None)
.unwrap();

written_graph.apply_update(&mut updates, |_| {}).unwrap();

compare_graphs(&original_graph, &written_graph);
Expand All @@ -431,30 +439,38 @@ mod tests {
// Import an example document
let path = Path::new("./tests/data/import/xlsx/sample_sentence/");
let importer = crate::ReadFrom::Xlsx(importer);
let mut updates = importer
.reader()
.import_corpus(path, StepID::from_importer_module(&importer, None), None)
.unwrap();
let orig_import_step = ImporterStep {
module: importer,
path: path.to_path_buf(),
};
let mut updates = orig_import_step.execute(None).unwrap();
let mut original_graph = AnnotationGraph::with_default_graphstorages(false).unwrap();
original_graph.apply_update(&mut updates, |_| {}).unwrap();

// Export to Excel file and read it again
let output_dir = TempDir::new().unwrap();
let tmp_outputdir = TempDir::new().unwrap();
let output_dir = tmp_outputdir.path().join("sample_sentence");
std::fs::create_dir(&output_dir).unwrap();
let exporter = crate::WriteAs::Xlsx(exporter);
exporter
.writer()
.export_corpus(
&original_graph,
output_dir.path(),
StepID::from_exporter_module(&exporter, None),
None,
)
.unwrap();
let export_step = ExporterStep {
module: exporter,
path: output_dir.clone(),
};
export_step.execute(&original_graph, None).unwrap();

let importer: ImportSpreadsheet = toml::from_str(
r#"
column_map = {"tok" = ["lb"]}
"#,
)
.unwrap();
let second_import_step = ImporterStep {
module: crate::ReadFrom::Xlsx(importer),
path: output_dir.clone(),
};
let mut updates = second_import_step.execute(None).unwrap();

let mut written_graph = AnnotationGraph::with_default_graphstorages(false).unwrap();
let mut updates = importer
.reader()
.import_corpus(path, StepID::from_importer_module(&importer, None), None)
.unwrap();
written_graph.apply_update(&mut updates, |_| {}).unwrap();

// Compare the graphs and make sure the token exist
Expand All @@ -477,45 +493,54 @@ mod tests {
fn with_namespace() {
let importer: ImportSpreadsheet = toml::from_str(
r#"
column_map = {"tok" = ["mynamespace::lb"]}
column_map = {"default_ns::text" = ["mynamespace::lb"]}
"#,
)
.unwrap();
let importer = ReadFrom::Xlsx(importer);
let mut exporter = ExportXlsx::default();
exporter.include_namespace = true;
exporter.annotation_order = vec![AnnoKey {
ns: ANNIS_NS.into(),
name: "tok".into(),
ns: "default_ns".into(),
name: "text".into(),
}];
let exporter = WriteAs::Xlsx(exporter);

// Import an example document
let path = Path::new("./tests/data/import/xlsx/sample_sentence_with_namespace/");
let first_import_step = ImporterStep {
module: importer,
path: path.to_path_buf(),
};

let mut updates = importer
.reader()
.import_corpus(path, StepID::from_importer_module(&importer, None), None)
.unwrap();
let mut updates = first_import_step.execute(None).unwrap();
let mut original_graph = AnnotationGraph::with_default_graphstorages(false).unwrap();
original_graph.apply_update(&mut updates, |_| {}).unwrap();

// Export to Excel file and read it again
let output_dir = TempDir::new().unwrap();
exporter
.writer()
.export_corpus(
&original_graph,
output_dir.path(),
StepID::from_exporter_module(&exporter, None),
None,
)
.unwrap();
let tmp_outputdir = TempDir::new().unwrap();
let output_dir = tmp_outputdir.path().join("sample_sentence_with_namespace");
std::fs::create_dir(&output_dir).unwrap();
let export_step = ExporterStep {
module: exporter,
path: output_dir.clone(),
};
export_step.execute(&original_graph, None).unwrap();

let importer: ImportSpreadsheet = toml::from_str(
r#"
column_map = {"default_ns::text" = ["mynamespace::lb"]}
"#,
)
.unwrap();
let second_import_step = ImporterStep {
module: crate::ReadFrom::Xlsx(importer),
path: output_dir.clone(),
};
let mut updates = second_import_step.execute(None).unwrap();

let mut written_graph = AnnotationGraph::with_default_graphstorages(false).unwrap();
let mut updates = importer
.reader()
.import_corpus(path, StepID::from_importer_module(&importer, None), None)
.unwrap();

written_graph.apply_update(&mut updates, |_| {}).unwrap();

// Compare the graphs and make sure the token exist
Expand Down
29 changes: 18 additions & 11 deletions src/importer/conllu/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ use graphannis::{graph::AnnoKey, update::GraphUpdate};
use insta::assert_snapshot;

use crate::{
importer::conllu::default_comment_key, test_util::import_as_graphml_string, ReadFrom, StepID,
importer::conllu::default_comment_key, test_util::import_as_graphml_string, ImporterStep,
ReadFrom, StepID,
};

use super::ImportCoNLLU;
Expand All @@ -13,14 +14,16 @@ use super::ImportCoNLLU;
fn test_conll_fail_invalid() {
let import = ReadFrom::CoNLLU(ImportCoNLLU::default());
let import_path = Path::new("tests/data/import/conll/invalid");
let step_id = StepID::from_importer_module(&import, Some(import_path.to_path_buf()));
let job = import
.reader()
.import_corpus(import_path, step_id.clone(), None);
let import_step = ImporterStep {
module: import,
path: import_path.to_path_buf(),
};
let job = import_step.execute(None);
assert!(job.is_err());
assert_snapshot!(job.err().unwrap().to_string());
let mut u = GraphUpdate::default();
let import = ImportCoNLLU::default();
let step_id = StepID::from_importer_step(&import_step);
assert!(import
.import_document(
&step_id,
Expand All @@ -36,11 +39,12 @@ fn test_conll_fail_invalid() {
fn test_conll_fail_invalid_heads() {
let import = ReadFrom::CoNLLU(ImportCoNLLU::default());
let import_path = Path::new("tests/data/import/conll/invalid-heads/");
let step_id = StepID::from_importer_module(&import, Some(import_path.to_path_buf()));
let import_step = ImporterStep {
module: import,
path: import_path.to_path_buf(),
};
let (sender, _receiver) = mpsc::channel();
let job = import
.reader()
.import_corpus(import_path, step_id, Some(sender));
let job = import_step.execute(Some(sender));
assert!(job.is_err());
assert_snapshot!(job.err().unwrap().to_string());
}
Expand All @@ -49,9 +53,12 @@ fn test_conll_fail_invalid_heads() {
fn test_conll_fail_cyclic() -> Result<(), Box<dyn std::error::Error>> {
let import = ReadFrom::CoNLLU(ImportCoNLLU::default());
let import_path = Path::new("tests/data/import/conll/cyclic-deps/");
let step_id = StepID::from_importer_module(&import, Some(import_path.to_path_buf()));
let import_step = ImporterStep {
module: import,
path: import_path.to_path_buf(),
};

let job = import.reader().import_corpus(import_path, step_id, None);
let job = import_step.execute(None);
assert!(job.is_ok());
Ok(())
}
Expand Down
Loading