Skip to content

Commit

Permalink
Add test for not decoding tag attribute
Browse files Browse the repository at this point in the history
  • Loading branch information
thomaskrause committed Nov 13, 2023
1 parent 4efc484 commit 8428c09
Show file tree
Hide file tree
Showing 3 changed files with 162 additions and 1 deletion.
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
---
source: src/importer/treetagger/tests.rs
expression: actual
---
<?xml version="1.0" encoding="UTF-8"?>
<graphml>
<key id="k0" for="graph" attr.name="configuration" attr.type="string"/>
<key id="k1" for="node" attr.name="annis::doc" attr.type="string"/>
<key id="k2" for="node" attr.name="annis::layer" attr.type="string"/>
<key id="k3" for="node" attr.name="annis::node_type" attr.type="string"/>
<key id="k4" for="node" attr.name="annis::tok" attr.type="string"/>
<key id="k5" for="node" attr.name="annis::tok-whitespace-before" attr.type="string"/>
<key id="k6" for="node" attr.name="url" attr.type="string"/>
<graph edgedefault="directed" parse.order="nodesfirst" parse.nodeids="free" parse.edgeids="canonical">
<data key="k0"><![CDATA[
[context]
default = 5
sizes = [0, 1, 2, 5, 10]

[view]
page_size = 10

[[visualizers]]
vis_type = "kwic"
display_name = "kwic"
visibility = "permanent"

[[visualizers]]
vis_type = "grid"
display_name = "grid"
]]></data>
<node id="unescaped_attribute">
<data key="k3">corpus</data>
</node>
<node id="unescaped_attribute/zossen">
<data key="k1">zossen</data>
<data key="k3">corpus</data>
<data key="k6">http://example.com/?a=test&amp;b=notest</data>
</node>
<node id="unescaped_attribute/zossen#text">
<data key="k3">datasource</data>
</node>
<node id="unescaped_attribute/zossen#t1">
<data key="k2">default_layer</data>
<data key="k3">node</data>
<data key="k4">Die</data>
</node>
<node id="unescaped_attribute/zossen#t2">
<data key="k2">default_layer</data>
<data key="k3">node</data>
<data key="k4">Jugendlichen</data>
<data key="k5"> </data>
</node>
<node id="unescaped_attribute/zossen#t3">
<data key="k2">default_layer</data>
<data key="k3">node</data>
<data key="k4">in</data>
<data key="k5"> </data>
</node>
<node id="unescaped_attribute/zossen#t4">
<data key="k2">default_layer</data>
<data key="k3">node</data>
<data key="k4">Zossen</data>
<data key="k5"> </data>
</node>
<node id="unescaped_attribute/zossen#t5">
<data key="k2">default_layer</data>
<data key="k3">node</data>
<data key="k4">wollen</data>
<data key="k5"> </data>
</node>
<node id="unescaped_attribute/zossen#t6">
<data key="k2">default_layer</data>
<data key="k3">node</data>
<data key="k4">ein</data>
<data key="k5"> </data>
</node>
<node id="unescaped_attribute/zossen#t7">
<data key="k2">default_layer</data>
<data key="k3">node</data>
<data key="k4">Musikcafé</data>
<data key="k5"> </data>
</node>
<node id="unescaped_attribute/zossen#t8">
<data key="k2">default_layer</data>
<data key="k3">node</data>
<data key="k4">.</data>
<data key="k5"> </data>
</node>
<edge id="e0" source="unescaped_attribute/zossen#t7" target="unescaped_attribute/zossen#t8" label="Ordering/annis/">
</edge>
<edge id="e1" source="unescaped_attribute/zossen#t6" target="unescaped_attribute/zossen#t7" label="Ordering/annis/">
</edge>
<edge id="e2" source="unescaped_attribute/zossen#t5" target="unescaped_attribute/zossen#t6" label="Ordering/annis/">
</edge>
<edge id="e3" source="unescaped_attribute/zossen#t4" target="unescaped_attribute/zossen#t5" label="Ordering/annis/">
</edge>
<edge id="e4" source="unescaped_attribute/zossen#t3" target="unescaped_attribute/zossen#t4" label="Ordering/annis/">
</edge>
<edge id="e5" source="unescaped_attribute/zossen#t2" target="unescaped_attribute/zossen#t3" label="Ordering/annis/">
</edge>
<edge id="e6" source="unescaped_attribute/zossen#t1" target="unescaped_attribute/zossen#t2" label="Ordering/annis/">
</edge>
<edge id="e7" source="unescaped_attribute/zossen#t8" target="unescaped_attribute/zossen#text" label="PartOf/annis/">
</edge>
<edge id="e8" source="unescaped_attribute/zossen#t5" target="unescaped_attribute/zossen#text" label="PartOf/annis/">
</edge>
<edge id="e9" source="unescaped_attribute/zossen#t2" target="unescaped_attribute/zossen#text" label="PartOf/annis/">
</edge>
<edge id="e10" source="unescaped_attribute/zossen" target="unescaped_attribute" label="PartOf/annis/">
</edge>
<edge id="e11" source="unescaped_attribute/zossen#t6" target="unescaped_attribute/zossen#text" label="PartOf/annis/">
</edge>
<edge id="e12" source="unescaped_attribute/zossen#t3" target="unescaped_attribute/zossen#text" label="PartOf/annis/">
</edge>
<edge id="e13" source="unescaped_attribute/zossen#text" target="unescaped_attribute/zossen" label="PartOf/annis/">
</edge>
<edge id="e14" source="unescaped_attribute/zossen#t7" target="unescaped_attribute/zossen#text" label="PartOf/annis/">
</edge>
<edge id="e15" source="unescaped_attribute/zossen#t4" target="unescaped_attribute/zossen#text" label="PartOf/annis/">
</edge>
<edge id="e16" source="unescaped_attribute/zossen#t1" target="unescaped_attribute/zossen#text" label="PartOf/annis/">
</edge>
</graph>
</graphml>
28 changes: 27 additions & 1 deletion src/importer/treetagger/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@ use std::path::Path;

use insta::assert_snapshot;

use crate::{importer::treetagger::TreeTaggerImporter, util::import_as_graphml_string};
use crate::{
importer::treetagger::{AttributeDecoding, TreeTaggerImporter},
util::import_as_graphml_string,
};

const TT_DEFAULT_VIS_CONFIG: &str = r#"
[context]
Expand Down Expand Up @@ -48,6 +51,29 @@ fn encoding_latin() {
assert_snapshot!(actual);
}

#[test]
fn disable_attribute_encoding() {
let mut importer = TreeTaggerImporter::default();
importer.attribute_decoding = AttributeDecoding::Entitites;
let should_fail = import_as_graphml_string(
importer,
Path::new("tests/data/import/treetagger/unescaped_attribute/"),
Some(TT_DEFAULT_VIS_CONFIG),
);
assert!(should_fail.is_err());

let mut importer = TreeTaggerImporter::default();
importer.attribute_decoding = AttributeDecoding::None;
let actual = import_as_graphml_string(
importer,
Path::new("tests/data/import/treetagger/unescaped_attribute/"),
Some(TT_DEFAULT_VIS_CONFIG),
)
.unwrap();

assert_snapshot!(actual);
}

#[test]
fn single_sentence() {
let actual = import_as_graphml_string(
Expand Down
10 changes: 10 additions & 0 deletions tests/data/import/treetagger/unescaped_attribute/zossen.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<document url="http://example.com/?a=test&b=notest">
Die
Jugendlichen
in
Zossen
wollen
ein
Musikcafé
.
</document>

0 comments on commit 8428c09

Please sign in to comment.