Skip to content

Commit

Permalink
Introducing --output parameter
Browse files Browse the repository at this point in the history
Change-Id: Ibeb1bb625f9ef30ccb6207d8c4d20d4c0d0c9056
  • Loading branch information
Akron committed Jun 6, 2024
1 parent 568b22f commit 132bdeb
Show file tree
Hide file tree
Showing 6 changed files with 80 additions and 4 deletions.
3 changes: 3 additions & 0 deletions Changes
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
2.6.0 2023-06-06
- Add -o parameter.

2.5.0 2023-01-24
- Upgrade minimal Perl version to 5.36 to improve
unicode handling.
Expand Down
2 changes: 1 addition & 1 deletion lib/KorAP/XML/TEI/Tokenizer/KorAP.pm
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use strict;
use warnings;
use File::Share ':all';

our $VERSION = '2.5.0';
our $VERSION = '2.6.0';
my $MIN_JAVA_VERSION = 17;

use constant {
Expand Down
2 changes: 1 addition & 1 deletion lib/KorAP/XML/TEI/Zipper.pm
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ sub new {
$root_dir =~ s/^\.?\/+//;
};

bless [$out // '-', undef, $root_dir // ''], $class;
bless [$out || '-', undef, $root_dir // ''], $class;
};


Expand Down
35 changes: 35 additions & 0 deletions lib/Test/KorAP/XML/TEI.pm
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,41 @@ sub stderr_unlike {
);
};

# Check for stdout equality
sub stdout_is {
my ($self, $value, $desc) = @_;
return $self->_test(
'is',
$self->{stdout},
$value,
_desc($desc, 'exact match for stdout')
);
};


# Check for stdout similarity
sub stdout_like {
my ($self, $value, $desc) = @_;
return $self->_test(
'like',
$self->{stdout},
$value,
_desc($desc, 'similar to stdout')
);
};


# Check for stdout non-similarity
sub stdout_unlike {
my ($self, $value, $desc) = @_;
return $self->_test(
'unlike',
$self->{stdout},
$value,
_desc($desc, 'not similar to stdout')
);
};


# Check if a zip exists
sub file_exists {
Expand Down
9 changes: 7 additions & 2 deletions script/tei2korapxml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use KorAP::XML::TEI::Zipper;
use KorAP::XML::TEI::Header;
use KorAP::XML::TEI::Inline;

our $VERSION = '2.5.0';
our $VERSION = '2.6.0';

our $VERSION_MSG = "\ntei2korapxml - v$VERSION\n";

Expand All @@ -44,6 +44,7 @@ my $inline_tokens_exclusive = 0;
GetOptions(
'root|r=s' => \(my $root_dir = '.'),
'input|i=s' => \(my $input_fname = ''),
'output|o=s' => \(my $output_fname = ''),
'tokenizer-call|tc=s' => \(my $tokenizer_call),
'tokenizer-korap|tk' => \(my $tokenizer_korap),
'tokenizer-internal|ti' => \(my $tokenizer_intern),
Expand Down Expand Up @@ -166,7 +167,7 @@ if (index($_tokens_dir, '!') == 0) {
};

# Initialize zipper
my $zipper = KorAP::XML::TEI::Zipper->new($root_dir);
my $zipper = KorAP::XML::TEI::Zipper->new($root_dir, $output_fname);

# text directory (below $root_dir)
my $dir = '';
Expand Down Expand Up @@ -563,6 +564,10 @@ Minimum requirement for L<KorAP::XML::TEI> is Perl 5.16.
The input file to process. If no specific input is defined and a single
dash C<-> is passed as an argument, data is read from C<STDIN>.
=item B<--output|-o>
The output zip file to be created. If no specific output is defined,
data is written to C<STDOUT>.
=item B<--root|-r>
Expand Down
33 changes: 33 additions & 0 deletions t/script.t
Original file line number Diff line number Diff line change
Expand Up @@ -829,4 +829,37 @@ subtest 'Handling of whitespace at linebreaks' => sub {
;
};

subtest 'Write to output' => sub {

my $temp_out = korap_tempfile('out');

my $t = test_tei2korapxml(
file => catfile($f, 'data', 'stadigmer.p5.xml'),
tmp => 'script_out',
param => '-s -ti -o "' . $temp_out . '"',
)->stderr_like(qr!tei2korapxml:.*? text_id=NO_000\.00000!)
->stdout_is('');

my $content;
open(X, '<' . $temp_out);
binmode(X);
$content .= <X> while !eof(X);
close(X);
$t->{stdout} = $content;

$t->unzip_xml('NO/000/00000/data.xml')
->content_like(qr/har lurt/)
->content_like(qr/etter at/)
->content_like(qr/en stund/)
->content_like(qr/skjønner med/)
->content_like(qr/og det/)
->content_like(qr/stadig mer/)
->content_like(qr/sitt, og/)
->content_like(qr/tenkt å bli/)
->content_like(qr/er både/)
;

unlink $temp_out;
};

done_testing;

0 comments on commit 132bdeb

Please sign in to comment.