From 1f95122fb70c936dbbe684dec4be7a01290c513e Mon Sep 17 00:00:00 2001 From: Akron Date: Thu, 25 Jul 2024 17:45:21 +0200 Subject: [PATCH] Support idno Change-Id: I05ae12e6076a911d8f9c69b032afeb6fdb792d85 --- Changes | 2 ++ lib/KorAP/XML/Meta/I5.pm | 44 +++++++++++++++++++++----- t/real/bzk_2.t | 4 ++- t/real/corpus/BZK/D59/00089/header.xml | 5 +++ 4 files changed, 46 insertions(+), 9 deletions(-) diff --git a/Changes b/Changes index 22e1b4a..9c1efe6 100644 --- a/Changes +++ b/Changes @@ -1,5 +1,7 @@ 0.57 2024-07-25 - Support award notes in i5. + - Add support for idno (with @rend) in i5. + - Add support for ISBN in i5. 0.56 2024-06-05 - Add support für corpusexplorer. diff --git a/lib/KorAP/XML/Meta/I5.pm b/lib/KorAP/XML/Meta/I5.pm index 66765de..36598fd 100644 --- a/lib/KorAP/XML/Meta/I5.pm +++ b/lib/KorAP/XML/Meta/I5.pm @@ -454,15 +454,41 @@ sub parse { $self->{A_file_edition_statement} = $temp2 if $temp2; }; - if (my $availability = $temp->at('publicationStmt > availability')) { - $temp2 = _squish $availability->all_text; - $self->{S_availability} = $temp2 if $temp2; - }; + if (my $pubstatement = $temp->at('publicationStmt')) { - if (my $distributor = $temp->at('publicationStmt > distributor')) { - $temp2 = _squish $distributor->all_text; - $self->{A_distributor} = $temp2 if $temp2; - } + if (my $availability = $pubstatement->at('> availability')) { + $temp2 = _squish $availability->all_text; + $self->{S_availability} = $temp2 if $temp2; + }; + + if (my $distributor = $pubstatement->at('> distributor')) { + $temp2 = _squish $distributor->all_text; + $self->{A_distributor} = $temp2 if $temp2; + }; + + $pubstatement->find('> idno')->each( + sub { + return unless $_->attr('type'); + if ($_->attr('type') eq 'ISBN') { + $temp2 = _squish $_->all_text or return; + $self->{S_ISBN} = $temp2 if $temp2; + } + + # When an idno has a rend, render value as a link + elsif (my $rend = $_->attr('rend')) { + $temp2 = _squish $_->all_text or return; + my ($key, $title) = split(';', $_->attr('rend'), 2); + + if ($_->attr('type') eq 'URL' || $_->attr('type') eq 'URI') { + $self->{'A_' . $key} = $self->korap_data_uri($temp2, title => $title // $temp2); + } + else { + $self->{'A_' . $key} = $temp2; + } + } + } + ); + }; }; if ($temp = $dom->at('profileDesc > langUsage > language[id]')) { @@ -625,6 +651,8 @@ The order may indicate a field to be overwritten. fileDesc editionStmt fileEditionStatement ATTACHMENT fileDesc publicationStmt > availability availability STRING fileDesc publicationStmt > distributor distributor ATTACHMENT + fileDesc publicationStmt > idno[type=ISBN] isbn STRING + fileDesc publicationStmt > idno @rend[0] ATTACHMENT profileDesc > langUsage > language[id]@id language STRING =item B diff --git a/t/real/bzk_2.t b/t/real/bzk_2.t index d250a7c..ea81cf4 100644 --- a/t/real/bzk_2.t +++ b/t/real/bzk_2.t @@ -51,7 +51,6 @@ is($meta->{K_text_class}->[0], 'politik', 'Correct Text Class'); is($meta->{K_text_class}->[1], 'ausland', 'Correct Text Class'); ok(!$meta->{K_text_class}->[2], 'Correct Text Class'); - is($meta->{D_creation_date}, '19590219', 'Creation date'); is($meta->{S_availability}, 'ACA-NC-LC', 'License'); ok(!$meta->{pages}, 'Pages'); @@ -74,6 +73,9 @@ is($meta->{T_doc_sub_title}, 'Organ des Zentralkomitees der Sozialistischen Einh ok(!$meta->{T_doc_author}, 'Correct Doc author'); ok(!$meta->{A_doc_editor}, 'Correct doc editor'); +is($meta->{A_URN},'data:application/x.korap-link;title=urn%3Anbn%3Ade%3A101%3A1-2019012904515398173807,http%3A%2F%2Fnbn-resolving.de%2Furn%3Anbn%3Ade%3A101%3A1-2019012904515398173807'); +is($meta->{S_ISBN},'978-3-7325-6705-8'); + # Tokenization use_ok('KorAP::XML::Tokenizer'); diff --git a/t/real/corpus/BZK/D59/00089/header.xml b/t/real/corpus/BZK/D59/00089/header.xml index b140a57..b51fb42 100644 --- a/t/real/corpus/BZK/D59/00089/header.xml +++ b/t/real/corpus/BZK/D59/00089/header.xml @@ -10,6 +10,11 @@ + urn:nbn:de:101:1-2019012904515398173807 + http://nbn-resolving.de/urn:nbn:de:101:1-2019012904515398173807 + 978-3-7325-6705-8 + http://d-nb.info/1176524615/34 + 1176524615 ACA-NC-LC