From 1ed9d185456b640b78e84411b16c8c87a7f55f03 Mon Sep 17 00:00:00 2001 From: Christian Clausner Date: Tue, 14 Oct 2014 10:00:22 +0100 Subject: [PATCH] Initial commit --- .classpath | 11 + .project | 17 + apidoc/allclasses-frame.html | 30 ++ apidoc/allclasses-noframe.html | 30 ++ apidoc/constant-values.html | 246 +++++++++ apidoc/deprecated-list.html | 116 +++++ apidoc/help-doc.html | 221 ++++++++ apidoc/index-all.html | 423 ++++++++++++++++ apidoc/index.html | 67 +++ .../page/scanner/ContentObjectHandler.html | 217 ++++++++ .../dla/page/scanner/PageScanner.html | 293 +++++++++++ .../class-use/ContentObjectHandler.html | 194 +++++++ .../page/scanner/class-use/PageScanner.html | 116 +++++ .../element/AttributeValueSetScanElement.html | 345 +++++++++++++ .../scanner/element/BoundsScanElement.html | 372 ++++++++++++++ .../element/ContentTypeCountScanElement.html | 342 +++++++++++++ .../LayersRegionRefCountScanElement.html | 317 ++++++++++++ .../scanner/element/MetaDataScanElement.html | 428 ++++++++++++++++ ...ReadingOrderRegionRefCountScanElement.html | 317 ++++++++++++ .../element/RegionCountScanElement.html | 338 +++++++++++++ .../RegionSubTypeCountScanElement.html | 345 +++++++++++++ .../dla/page/scanner/element/ScanElement.html | 251 ++++++++++ .../element/SpecialCharactersScanElement.html | 358 +++++++++++++ .../element/TextContentScanElement.html | 423 ++++++++++++++++ .../AttributeValueSetScanElement.html | 116 +++++ .../element/class-use/BoundsScanElement.html | 116 +++++ .../ContentTypeCountScanElement.html | 116 +++++ .../LayersRegionRefCountScanElement.html | 116 +++++ .../class-use/MetaDataScanElement.html | 116 +++++ ...ReadingOrderRegionRefCountScanElement.html | 116 +++++ .../class-use/RegionCountScanElement.html | 116 +++++ .../RegionSubTypeCountScanElement.html | 116 +++++ .../element/class-use/ScanElement.html | 218 ++++++++ .../SpecialCharactersScanElement.html | 116 +++++ .../class-use/TextContentScanElement.html | 116 +++++ .../page/scanner/element/package-frame.html | 32 ++ .../page/scanner/element/package-summary.html | 213 ++++++++ .../page/scanner/element/package-tree.html | 142 ++++++ .../dla/page/scanner/element/package-use.html | 151 ++++++ .../dla/page/scanner/package-frame.html | 23 + .../dla/page/scanner/package-summary.html | 163 ++++++ .../dla/page/scanner/package-tree.html | 133 +++++ .../dla/page/scanner/package-use.html | 151 ++++++ apidoc/overview-frame.html | 21 + apidoc/overview-summary.html | 134 +++++ apidoc/overview-tree.html | 145 ++++++ apidoc/package-list | 2 + apidoc/resources/background.gif | Bin 0 -> 2313 bytes apidoc/resources/tab.gif | Bin 0 -> 291 bytes apidoc/resources/titlebar.gif | Bin 0 -> 10701 bytes apidoc/resources/titlebar_end.gif | Bin 0 -> 849 bytes apidoc/stylesheet.css | 474 ++++++++++++++++++ .../page/scanner/ContentObjectHandler.java | 32 ++ .../dla/page/scanner/PageScanner.java | 335 +++++++++++++ .../element/AttributeValueSetScanElement.java | 91 ++++ .../scanner/element/BoundsScanElement.java | 62 +++ .../element/ContentTypeCountScanElement.java | 65 +++ .../LayersRegionRefCountScanElement.java | 62 +++ .../scanner/element/MetaDataScanElement.java | 79 +++ ...ReadingOrderRegionRefCountScanElement.java | 62 +++ .../element/RegionCountScanElement.java | 53 ++ .../RegionSubTypeCountScanElement.java | 174 +++++++ .../dla/page/scanner/element/ScanElement.java | 45 ++ .../element/SpecialCharactersScanElement.java | 118 +++++ .../element/TextContentScanElement.java | 108 ++++ 65 files changed, 10265 insertions(+) create mode 100644 .classpath create mode 100644 .project create mode 100644 apidoc/allclasses-frame.html create mode 100644 apidoc/allclasses-noframe.html create mode 100644 apidoc/constant-values.html create mode 100644 apidoc/deprecated-list.html create mode 100644 apidoc/help-doc.html create mode 100644 apidoc/index-all.html create mode 100644 apidoc/index.html create mode 100644 apidoc/org/primaresearch/dla/page/scanner/ContentObjectHandler.html create mode 100644 apidoc/org/primaresearch/dla/page/scanner/PageScanner.html create mode 100644 apidoc/org/primaresearch/dla/page/scanner/class-use/ContentObjectHandler.html create mode 100644 apidoc/org/primaresearch/dla/page/scanner/class-use/PageScanner.html create mode 100644 apidoc/org/primaresearch/dla/page/scanner/element/AttributeValueSetScanElement.html create mode 100644 apidoc/org/primaresearch/dla/page/scanner/element/BoundsScanElement.html create mode 100644 apidoc/org/primaresearch/dla/page/scanner/element/ContentTypeCountScanElement.html create mode 100644 apidoc/org/primaresearch/dla/page/scanner/element/LayersRegionRefCountScanElement.html create mode 100644 apidoc/org/primaresearch/dla/page/scanner/element/MetaDataScanElement.html create mode 100644 apidoc/org/primaresearch/dla/page/scanner/element/ReadingOrderRegionRefCountScanElement.html create mode 100644 apidoc/org/primaresearch/dla/page/scanner/element/RegionCountScanElement.html create mode 100644 apidoc/org/primaresearch/dla/page/scanner/element/RegionSubTypeCountScanElement.html create mode 100644 apidoc/org/primaresearch/dla/page/scanner/element/ScanElement.html create mode 100644 apidoc/org/primaresearch/dla/page/scanner/element/SpecialCharactersScanElement.html create mode 100644 apidoc/org/primaresearch/dla/page/scanner/element/TextContentScanElement.html create mode 100644 apidoc/org/primaresearch/dla/page/scanner/element/class-use/AttributeValueSetScanElement.html create mode 100644 apidoc/org/primaresearch/dla/page/scanner/element/class-use/BoundsScanElement.html create mode 100644 apidoc/org/primaresearch/dla/page/scanner/element/class-use/ContentTypeCountScanElement.html create mode 100644 apidoc/org/primaresearch/dla/page/scanner/element/class-use/LayersRegionRefCountScanElement.html create mode 100644 apidoc/org/primaresearch/dla/page/scanner/element/class-use/MetaDataScanElement.html create mode 100644 apidoc/org/primaresearch/dla/page/scanner/element/class-use/ReadingOrderRegionRefCountScanElement.html create mode 100644 apidoc/org/primaresearch/dla/page/scanner/element/class-use/RegionCountScanElement.html create mode 100644 apidoc/org/primaresearch/dla/page/scanner/element/class-use/RegionSubTypeCountScanElement.html create mode 100644 apidoc/org/primaresearch/dla/page/scanner/element/class-use/ScanElement.html create mode 100644 apidoc/org/primaresearch/dla/page/scanner/element/class-use/SpecialCharactersScanElement.html create mode 100644 apidoc/org/primaresearch/dla/page/scanner/element/class-use/TextContentScanElement.html create mode 100644 apidoc/org/primaresearch/dla/page/scanner/element/package-frame.html create mode 100644 apidoc/org/primaresearch/dla/page/scanner/element/package-summary.html create mode 100644 apidoc/org/primaresearch/dla/page/scanner/element/package-tree.html create mode 100644 apidoc/org/primaresearch/dla/page/scanner/element/package-use.html create mode 100644 apidoc/org/primaresearch/dla/page/scanner/package-frame.html create mode 100644 apidoc/org/primaresearch/dla/page/scanner/package-summary.html create mode 100644 apidoc/org/primaresearch/dla/page/scanner/package-tree.html create mode 100644 apidoc/org/primaresearch/dla/page/scanner/package-use.html create mode 100644 apidoc/overview-frame.html create mode 100644 apidoc/overview-summary.html create mode 100644 apidoc/overview-tree.html create mode 100644 apidoc/package-list create mode 100644 apidoc/resources/background.gif create mode 100644 apidoc/resources/tab.gif create mode 100644 apidoc/resources/titlebar.gif create mode 100644 apidoc/resources/titlebar_end.gif create mode 100644 apidoc/stylesheet.css create mode 100644 src/org/primaresearch/dla/page/scanner/ContentObjectHandler.java create mode 100644 src/org/primaresearch/dla/page/scanner/PageScanner.java create mode 100644 src/org/primaresearch/dla/page/scanner/element/AttributeValueSetScanElement.java create mode 100644 src/org/primaresearch/dla/page/scanner/element/BoundsScanElement.java create mode 100644 src/org/primaresearch/dla/page/scanner/element/ContentTypeCountScanElement.java create mode 100644 src/org/primaresearch/dla/page/scanner/element/LayersRegionRefCountScanElement.java create mode 100644 src/org/primaresearch/dla/page/scanner/element/MetaDataScanElement.java create mode 100644 src/org/primaresearch/dla/page/scanner/element/ReadingOrderRegionRefCountScanElement.java create mode 100644 src/org/primaresearch/dla/page/scanner/element/RegionCountScanElement.java create mode 100644 src/org/primaresearch/dla/page/scanner/element/RegionSubTypeCountScanElement.java create mode 100644 src/org/primaresearch/dla/page/scanner/element/ScanElement.java create mode 100644 src/org/primaresearch/dla/page/scanner/element/SpecialCharactersScanElement.java create mode 100644 src/org/primaresearch/dla/page/scanner/element/TextContentScanElement.java diff --git a/.classpath b/.classpath new file mode 100644 index 0000000..b37f781 --- /dev/null +++ b/.classpath @@ -0,0 +1,11 @@ + + + + + + + + + + + diff --git a/.project b/.project new file mode 100644 index 0000000..97f0063 --- /dev/null +++ b/.project @@ -0,0 +1,17 @@ + + + PageScanner + + + + + + org.eclipse.jdt.core.javabuilder + + + + + + org.eclipse.jdt.core.javanature + + diff --git a/apidoc/allclasses-frame.html b/apidoc/allclasses-frame.html new file mode 100644 index 0000000..9d33887 --- /dev/null +++ b/apidoc/allclasses-frame.html @@ -0,0 +1,30 @@ + + + + + +All Classes (PageMetadataScanner API) + + + + +

All Classes

+
+ +
+ + diff --git a/apidoc/allclasses-noframe.html b/apidoc/allclasses-noframe.html new file mode 100644 index 0000000..719e1a1 --- /dev/null +++ b/apidoc/allclasses-noframe.html @@ -0,0 +1,30 @@ + + + + + +All Classes (PageMetadataScanner API) + + + + +

All Classes

+
+ +
+ + diff --git a/apidoc/constant-values.html b/apidoc/constant-values.html new file mode 100644 index 0000000..2108549 --- /dev/null +++ b/apidoc/constant-values.html @@ -0,0 +1,246 @@ + + + + + +Constant Field Values (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + +
+

Constant Field Values

+

Contents

+ +
+
+ + +

org.primaresearch.*

+ +
+ +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/deprecated-list.html b/apidoc/deprecated-list.html new file mode 100644 index 0000000..d23a4bb --- /dev/null +++ b/apidoc/deprecated-list.html @@ -0,0 +1,116 @@ + + + + + +Deprecated List (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + +
+

Deprecated API

+

Contents

+
+ +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/help-doc.html b/apidoc/help-doc.html new file mode 100644 index 0000000..4c00528 --- /dev/null +++ b/apidoc/help-doc.html @@ -0,0 +1,221 @@ + + + + + +API Help (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + +
+

How This API Document Is Organized

+
This API (Application Programming Interface) document has pages corresponding to the items in the navigation bar, described as follows.
+
+
+ +This help file applies to API documentation generated using the standard doclet.
+ +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/index-all.html b/apidoc/index-all.html new file mode 100644 index 0000000..b6dd6b5 --- /dev/null +++ b/apidoc/index-all.html @@ -0,0 +1,423 @@ + + + + + +Index (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + +
A B C G H I L M O P R S T  + + +

A

+
+
AttributeValueSetScanElement - Class in org.primaresearch.dla.page.scanner.element
+
+
Scan element that builds a set of values for specified content type and attribute.
+
+ Example:
+ Content type: TextRegion
+ Attribute: Language
+ Result value: German;English;French
+
+
AttributeValueSetScanElement(ContentType, String) - Constructor for class org.primaresearch.dla.page.scanner.element.AttributeValueSetScanElement
+
 
+
+ + + +

B

+
+
BoundsScanElement - Class in org.primaresearch.dla.page.scanner.element
+
+
Scan element that checks if border or print space are defined.
+
+
BoundsScanElement(int) - Constructor for class org.primaresearch.dla.page.scanner.element.BoundsScanElement
+
+
Constructor
+
+
+ + + +

C

+
+
ContentObjectHandler - Interface in org.primaresearch.dla.page.scanner
+
+
Interface for classes that handle layout content objects.
+
+
ContentTypeCountScanElement - Class in org.primaresearch.dla.page.scanner.element
+
+
Scan element that counts the occurrences of content objects of a specified type.
+
+
ContentTypeCountScanElement(ContentType) - Constructor for class org.primaresearch.dla.page.scanner.element.ContentTypeCountScanElement
+
+
Constructor
+
+
+ + + +

G

+
+
getCsvHeader() - Method in class org.primaresearch.dla.page.scanner.element.AttributeValueSetScanElement
+
 
+
getCsvHeader() - Method in class org.primaresearch.dla.page.scanner.element.BoundsScanElement
+
 
+
getCsvHeader() - Method in class org.primaresearch.dla.page.scanner.element.ContentTypeCountScanElement
+
 
+
getCsvHeader() - Method in class org.primaresearch.dla.page.scanner.element.LayersRegionRefCountScanElement
+
 
+
getCsvHeader() - Method in class org.primaresearch.dla.page.scanner.element.MetaDataScanElement
+
 
+
getCsvHeader() - Method in class org.primaresearch.dla.page.scanner.element.ReadingOrderRegionRefCountScanElement
+
 
+
getCsvHeader() - Method in class org.primaresearch.dla.page.scanner.element.RegionCountScanElement
+
 
+
getCsvHeader() - Method in class org.primaresearch.dla.page.scanner.element.RegionSubTypeCountScanElement
+
 
+
getCsvHeader() - Method in interface org.primaresearch.dla.page.scanner.element.ScanElement
+
+
Returns the CSV header(s) of this scan element.
+
+
getCsvHeader() - Method in class org.primaresearch.dla.page.scanner.element.SpecialCharactersScanElement
+
 
+
getCsvHeader() - Method in class org.primaresearch.dla.page.scanner.element.TextContentScanElement
+
 
+
getCsvValue() - Method in class org.primaresearch.dla.page.scanner.element.AttributeValueSetScanElement
+
 
+
getCsvValue() - Method in class org.primaresearch.dla.page.scanner.element.BoundsScanElement
+
 
+
getCsvValue() - Method in class org.primaresearch.dla.page.scanner.element.ContentTypeCountScanElement
+
 
+
getCsvValue() - Method in class org.primaresearch.dla.page.scanner.element.LayersRegionRefCountScanElement
+
 
+
getCsvValue() - Method in class org.primaresearch.dla.page.scanner.element.MetaDataScanElement
+
 
+
getCsvValue() - Method in class org.primaresearch.dla.page.scanner.element.ReadingOrderRegionRefCountScanElement
+
 
+
getCsvValue() - Method in class org.primaresearch.dla.page.scanner.element.RegionCountScanElement
+
 
+
getCsvValue() - Method in class org.primaresearch.dla.page.scanner.element.RegionSubTypeCountScanElement
+
 
+
getCsvValue() - Method in interface org.primaresearch.dla.page.scanner.element.ScanElement
+
+
Returns the CSV value(s) of this scan element.
+
+
getCsvValue() - Method in class org.primaresearch.dla.page.scanner.element.SpecialCharactersScanElement
+
 
+
getCsvValue() - Method in class org.primaresearch.dla.page.scanner.element.TextContentScanElement
+
 
+
+ + + +

H

+
+
handleContentObject(ContentObject) - Method in interface org.primaresearch.dla.page.scanner.ContentObjectHandler
+
+
Handles the given layout content object.
+
+
handleContentObject(ContentObject) - Method in class org.primaresearch.dla.page.scanner.element.AttributeValueSetScanElement
+
 
+
handleContentObject(ContentObject) - Method in class org.primaresearch.dla.page.scanner.element.ContentTypeCountScanElement
+
 
+
handleContentObject(ContentObject) - Method in class org.primaresearch.dla.page.scanner.element.RegionCountScanElement
+
 
+
handleContentObject(ContentObject) - Method in class org.primaresearch.dla.page.scanner.element.RegionSubTypeCountScanElement
+
 
+
handleContentObject(ContentObject) - Method in class org.primaresearch.dla.page.scanner.element.SpecialCharactersScanElement
+
 
+
handleContentObject(ContentObject) - Method in class org.primaresearch.dla.page.scanner.element.TextContentScanElement
+
 
+
+ + + +

I

+
+
init(Page) - Method in class org.primaresearch.dla.page.scanner.element.AttributeValueSetScanElement
+
 
+
init(Page) - Method in class org.primaresearch.dla.page.scanner.element.BoundsScanElement
+
 
+
init(Page) - Method in class org.primaresearch.dla.page.scanner.element.ContentTypeCountScanElement
+
 
+
init(Page) - Method in class org.primaresearch.dla.page.scanner.element.LayersRegionRefCountScanElement
+
 
+
init(Page) - Method in class org.primaresearch.dla.page.scanner.element.MetaDataScanElement
+
 
+
init(Page) - Method in class org.primaresearch.dla.page.scanner.element.ReadingOrderRegionRefCountScanElement
+
 
+
init(Page) - Method in class org.primaresearch.dla.page.scanner.element.RegionCountScanElement
+
 
+
init(Page) - Method in class org.primaresearch.dla.page.scanner.element.RegionSubTypeCountScanElement
+
 
+
init(Page) - Method in interface org.primaresearch.dla.page.scanner.element.ScanElement
+
+
Initialisation (called before any other method).
+
+
init(Page) - Method in class org.primaresearch.dla.page.scanner.element.SpecialCharactersScanElement
+
 
+
init(Page) - Method in class org.primaresearch.dla.page.scanner.element.TextContentScanElement
+
 
+
+ + + +

L

+
+
LayersRegionRefCountScanElement - Class in org.primaresearch.dla.page.scanner.element
+
+
Scan element that counts the number of referenced regions in layers.
+
+
LayersRegionRefCountScanElement() - Constructor for class org.primaresearch.dla.page.scanner.element.LayersRegionRefCountScanElement
+
 
+
+ + + +

M

+
+
main(String[]) - Static method in class org.primaresearch.dla.page.scanner.PageScanner
+
+
Main function
+
+
MetaDataScanElement - Class in org.primaresearch.dla.page.scanner.element
+
+
Scan element for meta data entries.
+
+
MetaDataScanElement(int) - Constructor for class org.primaresearch.dla.page.scanner.element.MetaDataScanElement
+
+
Constructor
+
+
+ + + +

O

+
+
org.primaresearch.dla.page.scanner - package org.primaresearch.dla.page.scanner
+
 
+
org.primaresearch.dla.page.scanner.element - package org.primaresearch.dla.page.scanner.element
+
 
+
+ + + +

P

+
+
PageScanner - Class in org.primaresearch.dla.page.scanner
+
+
Command line tool that scans a single PAGE XML file and outputs its properties in CSV format.
+
+ Properties:
+ + Metadata (ID, creator, creation time, modification time, width, height) + Border and print space (true/false) + Content objects count (per type and sub-type) + Text content statistics (number of characters and white spaces) + Language and script (semicolon separated list) + Reading order and layers (number of region references) +
+
+
PageScanner() - Constructor for class org.primaresearch.dla.page.scanner.PageScanner
+
+
Constructor
+
+
+ + + +

R

+
+
ReadingOrderRegionRefCountScanElement - Class in org.primaresearch.dla.page.scanner.element
+
+
Scan element that counts the number of referenced regions in the reading order.
+
+
ReadingOrderRegionRefCountScanElement() - Constructor for class org.primaresearch.dla.page.scanner.element.ReadingOrderRegionRefCountScanElement
+
 
+
RegionCountScanElement - Class in org.primaresearch.dla.page.scanner.element
+
+
Scan element that counts the overall number of layout regions.
+
+
RegionCountScanElement() - Constructor for class org.primaresearch.dla.page.scanner.element.RegionCountScanElement
+
 
+
RegionSubTypeCountScanElement - Class in org.primaresearch.dla.page.scanner.element
+
+
Scan element that counts the occurrences of regions per sub-type.
+ This element represents multiple CSV entries.
+
+
RegionSubTypeCountScanElement(RegionType, FormatModel) - Constructor for class org.primaresearch.dla.page.scanner.element.RegionSubTypeCountScanElement
+
+
Constructor
+
+
+ + + +

S

+
+
ScanElement - Interface in org.primaresearch.dla.page.scanner.element
+
+
Interface for PAGE scanner elements.
+
+
setMode(String) - Method in class org.primaresearch.dla.page.scanner.PageScanner
+
+
Sets the scan mode
+
+
SpecialCharactersScanElement - Class in org.primaresearch.dla.page.scanner.element
+
+
Creates a list of characters that occur in the text content of the document.
+
+
SpecialCharactersScanElement() - Constructor for class org.primaresearch.dla.page.scanner.element.SpecialCharactersScanElement
+
+
Default constructor
+
+
SpecialCharactersScanElement(boolean) - Constructor for class org.primaresearch.dla.page.scanner.element.SpecialCharactersScanElement
+
+
Constructor
+
+
+ + + +

T

+
+
TextContentScanElement - Class in org.primaresearch.dla.page.scanner.element
+
+
Scan element that calculates text content statistics.
+
+
TextContentScanElement(ContentType, int) - Constructor for class org.primaresearch.dla.page.scanner.element.TextContentScanElement
+
+
Constructor
+
+
TYPE_BORDER - Static variable in class org.primaresearch.dla.page.scanner.element.BoundsScanElement
+
 
+
TYPE_COUNT_ALL - Static variable in class org.primaresearch.dla.page.scanner.element.TextContentScanElement
+
 
+
TYPE_COUNT_CHARACTERS - Static variable in class org.primaresearch.dla.page.scanner.element.TextContentScanElement
+
 
+
TYPE_COUNT_LINE_BREAKS - Static variable in class org.primaresearch.dla.page.scanner.element.TextContentScanElement
+
 
+
TYPE_COUNT_SPACES_AND_TABS - Static variable in class org.primaresearch.dla.page.scanner.element.TextContentScanElement
+
 
+
TYPE_CREATED - Static variable in class org.primaresearch.dla.page.scanner.element.MetaDataScanElement
+
 
+
TYPE_CREATOR - Static variable in class org.primaresearch.dla.page.scanner.element.MetaDataScanElement
+
 
+
TYPE_HEIGHT - Static variable in class org.primaresearch.dla.page.scanner.element.MetaDataScanElement
+
 
+
TYPE_MODIFIED - Static variable in class org.primaresearch.dla.page.scanner.element.MetaDataScanElement
+
 
+
TYPE_PCGTS_ID - Static variable in class org.primaresearch.dla.page.scanner.element.MetaDataScanElement
+
 
+
TYPE_PRINT_SPACE - Static variable in class org.primaresearch.dla.page.scanner.element.BoundsScanElement
+
 
+
TYPE_WIDTH - Static variable in class org.primaresearch.dla.page.scanner.element.MetaDataScanElement
+
 
+
+A B C G H I L M O P R S T 
+ +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/index.html b/apidoc/index.html new file mode 100644 index 0000000..c9769a6 --- /dev/null +++ b/apidoc/index.html @@ -0,0 +1,67 @@ + + + + + +PageMetadataScanner API + + + + + + + + + +<noscript> +<div>JavaScript is disabled on your browser.</div> +</noscript> +<h2>Frame Alert</h2> +<p>This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to <a href="overview-summary.html">Non-frame version</a>.</p> + + + diff --git a/apidoc/org/primaresearch/dla/page/scanner/ContentObjectHandler.html b/apidoc/org/primaresearch/dla/page/scanner/ContentObjectHandler.html new file mode 100644 index 0000000..24bb4c4 --- /dev/null +++ b/apidoc/org/primaresearch/dla/page/scanner/ContentObjectHandler.html @@ -0,0 +1,217 @@ + + + + + +ContentObjectHandler (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + + +
+
org.primaresearch.dla.page.scanner
+

Interface ContentObjectHandler

+
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + +
      Methods 
      Modifier and TypeMethod and Description
      voidhandleContentObject(org.primaresearch.dla.page.layout.physical.ContentObject obj) +
      Handles the given layout content object.
      +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        handleContentObject

        +
        void handleContentObject(org.primaresearch.dla.page.layout.physical.ContentObject obj)
        +
        Handles the given layout content object.
        +
      • +
      +
    • +
    +
  • +
+
+
+ + +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/org/primaresearch/dla/page/scanner/PageScanner.html b/apidoc/org/primaresearch/dla/page/scanner/PageScanner.html new file mode 100644 index 0000000..0b06960 --- /dev/null +++ b/apidoc/org/primaresearch/dla/page/scanner/PageScanner.html @@ -0,0 +1,293 @@ + + + + + +PageScanner (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + + +
+
org.primaresearch.dla.page.scanner
+

Class PageScanner

+
+
+ +
+
    +
  • +
    +
    +
    public class PageScanner
    +extends java.lang.Object
    +
    Command line tool that scans a single PAGE XML file and outputs its properties in CSV format.
    +
    + Properties:
    +
      +
    • Metadata (ID, creator, creation time, modification time, width, height)
    • +
    • Border and print space (true/false)
    • +
    • Content objects count (per type and sub-type)
    • +
    • Text content statistics (number of characters and white spaces)
    • +
    • Language and script (semicolon separated list)
    • +
    • Reading order and layers (number of region references)
    • +
    +
    Author:
    +
    clc
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      PageScanner() +
      Constructor
      +
      +
    • +
    + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + +
      Methods 
      Modifier and TypeMethod and Description
      static voidmain(java.lang.String[] args) +
      Main function
      +
      voidsetMode(java.lang.String mode) +
      Sets the scan mode
      +
      +
        +
      • + + +

        Methods inherited from class java.lang.Object

        +clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        PageScanner

        +
        public PageScanner()
        +
        Constructor
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        main

        +
        public static void main(java.lang.String[] args)
        +
        Main function
        +
      • +
      + + + +
        +
      • +

        setMode

        +
        public void setMode(java.lang.String mode)
        +
        Sets the scan mode
        +
        Parameters:
        mode - 'default', 'characters', or 'characters-one-column'
        +
      • +
      +
    • +
    +
  • +
+
+
+ + +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/org/primaresearch/dla/page/scanner/class-use/ContentObjectHandler.html b/apidoc/org/primaresearch/dla/page/scanner/class-use/ContentObjectHandler.html new file mode 100644 index 0000000..f4eaf97 --- /dev/null +++ b/apidoc/org/primaresearch/dla/page/scanner/class-use/ContentObjectHandler.html @@ -0,0 +1,194 @@ + + + + + +Uses of Interface org.primaresearch.dla.page.scanner.ContentObjectHandler (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + +
+

Uses of Interface
org.primaresearch.dla.page.scanner.ContentObjectHandler

+
+
+ +
+ +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/org/primaresearch/dla/page/scanner/class-use/PageScanner.html b/apidoc/org/primaresearch/dla/page/scanner/class-use/PageScanner.html new file mode 100644 index 0000000..abc4069 --- /dev/null +++ b/apidoc/org/primaresearch/dla/page/scanner/class-use/PageScanner.html @@ -0,0 +1,116 @@ + + + + + +Uses of Class org.primaresearch.dla.page.scanner.PageScanner (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + +
+

Uses of Class
org.primaresearch.dla.page.scanner.PageScanner

+
+
No usage of org.primaresearch.dla.page.scanner.PageScanner
+ +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/org/primaresearch/dla/page/scanner/element/AttributeValueSetScanElement.html b/apidoc/org/primaresearch/dla/page/scanner/element/AttributeValueSetScanElement.html new file mode 100644 index 0000000..7321701 --- /dev/null +++ b/apidoc/org/primaresearch/dla/page/scanner/element/AttributeValueSetScanElement.html @@ -0,0 +1,345 @@ + + + + + +AttributeValueSetScanElement (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + + +
+
org.primaresearch.dla.page.scanner.element
+

Class AttributeValueSetScanElement

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    ContentObjectHandler, ScanElement
    +
    +
    +
    +
    public class AttributeValueSetScanElement
    +extends java.lang.Object
    +implements ScanElement, ContentObjectHandler
    +
    Scan element that builds a set of values for specified content type and attribute.
    +
    + Example:
    + Content type: TextRegion
    + Attribute: Language
    + Result value: German;English;French
    +
    Author:
    +
    clc
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      AttributeValueSetScanElement(org.primaresearch.dla.page.layout.physical.shared.ContentType contentType, + java.lang.String attributeName) 
      +
    • +
    + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + + + + + + + + + +
      Methods 
      Modifier and TypeMethod and Description
      java.lang.StringgetCsvHeader() +
      Returns the CSV header(s) of this scan element.
      +
      java.lang.StringgetCsvValue() +
      Returns the CSV value(s) of this scan element.
      +
      voidhandleContentObject(org.primaresearch.dla.page.layout.physical.ContentObject obj) +
      Handles the given layout content object.
      +
      voidinit(org.primaresearch.dla.page.Page page) +
      Initialisation (called before any other method).
      +
      +
        +
      • + + +

        Methods inherited from class java.lang.Object

        +clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        AttributeValueSetScanElement

        +
        public AttributeValueSetScanElement(org.primaresearch.dla.page.layout.physical.shared.ContentType contentType,
        +                            java.lang.String attributeName)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        handleContentObject

        +
        public void handleContentObject(org.primaresearch.dla.page.layout.physical.ContentObject obj)
        +
        Description copied from interface: ContentObjectHandler
        +
        Handles the given layout content object.
        +
        +
        Specified by:
        +
        handleContentObject in interface ContentObjectHandler
        +
        +
      • +
      + + + +
        +
      • +

        init

        +
        public void init(org.primaresearch.dla.page.Page page)
        +
        Description copied from interface: ScanElement
        +
        Initialisation (called before any other method).
        +
        +
        Specified by:
        +
        init in interface ScanElement
        +
        +
      • +
      + + + +
        +
      • +

        getCsvHeader

        +
        public java.lang.String getCsvHeader()
        +
        Description copied from interface: ScanElement
        +
        Returns the CSV header(s) of this scan element. If the element represents multiple CSV entries, + the individual headers must be comma separated.
        +
        +
        Specified by:
        +
        getCsvHeader in interface ScanElement
        +
        +
      • +
      + + + +
        +
      • +

        getCsvValue

        +
        public java.lang.String getCsvValue()
        +
        Description copied from interface: ScanElement
        +
        Returns the CSV value(s) of this scan element. If the element represents multiple CSV entries, + the individual values must be comma separated.
        +
        +
        Specified by:
        +
        getCsvValue in interface ScanElement
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/org/primaresearch/dla/page/scanner/element/BoundsScanElement.html b/apidoc/org/primaresearch/dla/page/scanner/element/BoundsScanElement.html new file mode 100644 index 0000000..5efc4ad --- /dev/null +++ b/apidoc/org/primaresearch/dla/page/scanner/element/BoundsScanElement.html @@ -0,0 +1,372 @@ + + + + + +BoundsScanElement (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + + +
+
org.primaresearch.dla.page.scanner.element
+

Class BoundsScanElement

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    ScanElement
    +
    +
    +
    +
    public class BoundsScanElement
    +extends java.lang.Object
    +implements ScanElement
    +
    Scan element that checks if border or print space are defined.
    +
    Author:
    +
    clc
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      BoundsScanElement(int type) +
      Constructor
      +
      +
    • +
    + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + + + + + +
      Methods 
      Modifier and TypeMethod and Description
      java.lang.StringgetCsvHeader() +
      Returns the CSV header(s) of this scan element.
      +
      java.lang.StringgetCsvValue() +
      Returns the CSV value(s) of this scan element.
      +
      voidinit(org.primaresearch.dla.page.Page page) +
      Initialisation (called before any other method).
      +
      +
        +
      • + + +

        Methods inherited from class java.lang.Object

        +clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        BoundsScanElement

        +
        public BoundsScanElement(int type)
        +
        Constructor
        +
        Parameters:
        type - TYPE_BORDER or TYPE_PRINT_SPACE
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        init

        +
        public void init(org.primaresearch.dla.page.Page page)
        +
        Description copied from interface: ScanElement
        +
        Initialisation (called before any other method).
        +
        +
        Specified by:
        +
        init in interface ScanElement
        +
        +
      • +
      + + + +
        +
      • +

        getCsvHeader

        +
        public java.lang.String getCsvHeader()
        +
        Description copied from interface: ScanElement
        +
        Returns the CSV header(s) of this scan element. If the element represents multiple CSV entries, + the individual headers must be comma separated.
        +
        +
        Specified by:
        +
        getCsvHeader in interface ScanElement
        +
        +
      • +
      + + + +
        +
      • +

        getCsvValue

        +
        public java.lang.String getCsvValue()
        +
        Description copied from interface: ScanElement
        +
        Returns the CSV value(s) of this scan element. If the element represents multiple CSV entries, + the individual values must be comma separated.
        +
        +
        Specified by:
        +
        getCsvValue in interface ScanElement
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/org/primaresearch/dla/page/scanner/element/ContentTypeCountScanElement.html b/apidoc/org/primaresearch/dla/page/scanner/element/ContentTypeCountScanElement.html new file mode 100644 index 0000000..2ee7bb0 --- /dev/null +++ b/apidoc/org/primaresearch/dla/page/scanner/element/ContentTypeCountScanElement.html @@ -0,0 +1,342 @@ + + + + + +ContentTypeCountScanElement (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + + +
+
org.primaresearch.dla.page.scanner.element
+

Class ContentTypeCountScanElement

+
+
+ +
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      ContentTypeCountScanElement(org.primaresearch.dla.page.layout.physical.shared.ContentType type) +
      Constructor
      +
      +
    • +
    + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + + + + + + + + + +
      Methods 
      Modifier and TypeMethod and Description
      java.lang.StringgetCsvHeader() +
      Returns the CSV header(s) of this scan element.
      +
      java.lang.StringgetCsvValue() +
      Returns the CSV value(s) of this scan element.
      +
      voidhandleContentObject(org.primaresearch.dla.page.layout.physical.ContentObject obj) +
      Handles the given layout content object.
      +
      voidinit(org.primaresearch.dla.page.Page page) +
      Initialisation (called before any other method).
      +
      +
        +
      • + + +

        Methods inherited from class java.lang.Object

        +clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ContentTypeCountScanElement

        +
        public ContentTypeCountScanElement(org.primaresearch.dla.page.layout.physical.shared.ContentType type)
        +
        Constructor
        +
        Parameters:
        type - Content object type to count.
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        handleContentObject

        +
        public void handleContentObject(org.primaresearch.dla.page.layout.physical.ContentObject obj)
        +
        Description copied from interface: ContentObjectHandler
        +
        Handles the given layout content object.
        +
        +
        Specified by:
        +
        handleContentObject in interface ContentObjectHandler
        +
        +
      • +
      + + + +
        +
      • +

        init

        +
        public void init(org.primaresearch.dla.page.Page page)
        +
        Description copied from interface: ScanElement
        +
        Initialisation (called before any other method).
        +
        +
        Specified by:
        +
        init in interface ScanElement
        +
        +
      • +
      + + + +
        +
      • +

        getCsvHeader

        +
        public java.lang.String getCsvHeader()
        +
        Description copied from interface: ScanElement
        +
        Returns the CSV header(s) of this scan element. If the element represents multiple CSV entries, + the individual headers must be comma separated.
        +
        +
        Specified by:
        +
        getCsvHeader in interface ScanElement
        +
        +
      • +
      + + + +
        +
      • +

        getCsvValue

        +
        public java.lang.String getCsvValue()
        +
        Description copied from interface: ScanElement
        +
        Returns the CSV value(s) of this scan element. If the element represents multiple CSV entries, + the individual values must be comma separated.
        +
        +
        Specified by:
        +
        getCsvValue in interface ScanElement
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/org/primaresearch/dla/page/scanner/element/LayersRegionRefCountScanElement.html b/apidoc/org/primaresearch/dla/page/scanner/element/LayersRegionRefCountScanElement.html new file mode 100644 index 0000000..75bb9ac --- /dev/null +++ b/apidoc/org/primaresearch/dla/page/scanner/element/LayersRegionRefCountScanElement.html @@ -0,0 +1,317 @@ + + + + + +LayersRegionRefCountScanElement (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + + +
+
org.primaresearch.dla.page.scanner.element
+

Class LayersRegionRefCountScanElement

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    ScanElement
    +
    +
    +
    +
    public class LayersRegionRefCountScanElement
    +extends java.lang.Object
    +implements ScanElement
    +
    Scan element that counts the number of referenced regions in layers.
    +
    Author:
    +
    clc
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + + + + + +
      Methods 
      Modifier and TypeMethod and Description
      java.lang.StringgetCsvHeader() +
      Returns the CSV header(s) of this scan element.
      +
      java.lang.StringgetCsvValue() +
      Returns the CSV value(s) of this scan element.
      +
      voidinit(org.primaresearch.dla.page.Page page) +
      Initialisation (called before any other method).
      +
      +
        +
      • + + +

        Methods inherited from class java.lang.Object

        +clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        LayersRegionRefCountScanElement

        +
        public LayersRegionRefCountScanElement()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        init

        +
        public void init(org.primaresearch.dla.page.Page page)
        +
        Description copied from interface: ScanElement
        +
        Initialisation (called before any other method).
        +
        +
        Specified by:
        +
        init in interface ScanElement
        +
        +
      • +
      + + + +
        +
      • +

        getCsvHeader

        +
        public java.lang.String getCsvHeader()
        +
        Description copied from interface: ScanElement
        +
        Returns the CSV header(s) of this scan element. If the element represents multiple CSV entries, + the individual headers must be comma separated.
        +
        +
        Specified by:
        +
        getCsvHeader in interface ScanElement
        +
        +
      • +
      + + + +
        +
      • +

        getCsvValue

        +
        public java.lang.String getCsvValue()
        +
        Description copied from interface: ScanElement
        +
        Returns the CSV value(s) of this scan element. If the element represents multiple CSV entries, + the individual values must be comma separated.
        +
        +
        Specified by:
        +
        getCsvValue in interface ScanElement
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/org/primaresearch/dla/page/scanner/element/MetaDataScanElement.html b/apidoc/org/primaresearch/dla/page/scanner/element/MetaDataScanElement.html new file mode 100644 index 0000000..870ea93 --- /dev/null +++ b/apidoc/org/primaresearch/dla/page/scanner/element/MetaDataScanElement.html @@ -0,0 +1,428 @@ + + + + + +MetaDataScanElement (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + + +
+
org.primaresearch.dla.page.scanner.element
+

Class MetaDataScanElement

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    ScanElement
    +
    +
    +
    +
    public class MetaDataScanElement
    +extends java.lang.Object
    +implements ScanElement
    +
    Scan element for meta data entries.
    +
    Author:
    +
    clc
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      MetaDataScanElement(int type) +
      Constructor
      +
      +
    • +
    + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + + + + + +
      Methods 
      Modifier and TypeMethod and Description
      java.lang.StringgetCsvHeader() +
      Returns the CSV header(s) of this scan element.
      +
      java.lang.StringgetCsvValue() +
      Returns the CSV value(s) of this scan element.
      +
      voidinit(org.primaresearch.dla.page.Page page) +
      Initialisation (called before any other method).
      +
      +
        +
      • + + +

        Methods inherited from class java.lang.Object

        +clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        MetaDataScanElement

        +
        public MetaDataScanElement(int type)
        +
        Constructor
        +
        Parameters:
        type - See TYPE_ class members.
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        init

        +
        public void init(org.primaresearch.dla.page.Page page)
        +
        Description copied from interface: ScanElement
        +
        Initialisation (called before any other method).
        +
        +
        Specified by:
        +
        init in interface ScanElement
        +
        +
      • +
      + + + +
        +
      • +

        getCsvHeader

        +
        public java.lang.String getCsvHeader()
        +
        Description copied from interface: ScanElement
        +
        Returns the CSV header(s) of this scan element. If the element represents multiple CSV entries, + the individual headers must be comma separated.
        +
        +
        Specified by:
        +
        getCsvHeader in interface ScanElement
        +
        +
      • +
      + + + +
        +
      • +

        getCsvValue

        +
        public java.lang.String getCsvValue()
        +
        Description copied from interface: ScanElement
        +
        Returns the CSV value(s) of this scan element. If the element represents multiple CSV entries, + the individual values must be comma separated.
        +
        +
        Specified by:
        +
        getCsvValue in interface ScanElement
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/org/primaresearch/dla/page/scanner/element/ReadingOrderRegionRefCountScanElement.html b/apidoc/org/primaresearch/dla/page/scanner/element/ReadingOrderRegionRefCountScanElement.html new file mode 100644 index 0000000..b23ff0e --- /dev/null +++ b/apidoc/org/primaresearch/dla/page/scanner/element/ReadingOrderRegionRefCountScanElement.html @@ -0,0 +1,317 @@ + + + + + +ReadingOrderRegionRefCountScanElement (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + + +
+
org.primaresearch.dla.page.scanner.element
+

Class ReadingOrderRegionRefCountScanElement

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    ScanElement
    +
    +
    +
    +
    public class ReadingOrderRegionRefCountScanElement
    +extends java.lang.Object
    +implements ScanElement
    +
    Scan element that counts the number of referenced regions in the reading order.
    +
    Author:
    +
    clc
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + + + + + +
      Methods 
      Modifier and TypeMethod and Description
      java.lang.StringgetCsvHeader() +
      Returns the CSV header(s) of this scan element.
      +
      java.lang.StringgetCsvValue() +
      Returns the CSV value(s) of this scan element.
      +
      voidinit(org.primaresearch.dla.page.Page page) +
      Initialisation (called before any other method).
      +
      +
        +
      • + + +

        Methods inherited from class java.lang.Object

        +clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ReadingOrderRegionRefCountScanElement

        +
        public ReadingOrderRegionRefCountScanElement()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        init

        +
        public void init(org.primaresearch.dla.page.Page page)
        +
        Description copied from interface: ScanElement
        +
        Initialisation (called before any other method).
        +
        +
        Specified by:
        +
        init in interface ScanElement
        +
        +
      • +
      + + + +
        +
      • +

        getCsvHeader

        +
        public java.lang.String getCsvHeader()
        +
        Description copied from interface: ScanElement
        +
        Returns the CSV header(s) of this scan element. If the element represents multiple CSV entries, + the individual headers must be comma separated.
        +
        +
        Specified by:
        +
        getCsvHeader in interface ScanElement
        +
        +
      • +
      + + + +
        +
      • +

        getCsvValue

        +
        public java.lang.String getCsvValue()
        +
        Description copied from interface: ScanElement
        +
        Returns the CSV value(s) of this scan element. If the element represents multiple CSV entries, + the individual values must be comma separated.
        +
        +
        Specified by:
        +
        getCsvValue in interface ScanElement
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/org/primaresearch/dla/page/scanner/element/RegionCountScanElement.html b/apidoc/org/primaresearch/dla/page/scanner/element/RegionCountScanElement.html new file mode 100644 index 0000000..9c2e6ba --- /dev/null +++ b/apidoc/org/primaresearch/dla/page/scanner/element/RegionCountScanElement.html @@ -0,0 +1,338 @@ + + + + + +RegionCountScanElement (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + + +
+
org.primaresearch.dla.page.scanner.element
+

Class RegionCountScanElement

+
+
+ +
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + + + + + + + + + +
      Methods 
      Modifier and TypeMethod and Description
      java.lang.StringgetCsvHeader() +
      Returns the CSV header(s) of this scan element.
      +
      java.lang.StringgetCsvValue() +
      Returns the CSV value(s) of this scan element.
      +
      voidhandleContentObject(org.primaresearch.dla.page.layout.physical.ContentObject obj) +
      Handles the given layout content object.
      +
      voidinit(org.primaresearch.dla.page.Page page) +
      Initialisation (called before any other method).
      +
      +
        +
      • + + +

        Methods inherited from class java.lang.Object

        +clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        RegionCountScanElement

        +
        public RegionCountScanElement()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        handleContentObject

        +
        public void handleContentObject(org.primaresearch.dla.page.layout.physical.ContentObject obj)
        +
        Description copied from interface: ContentObjectHandler
        +
        Handles the given layout content object.
        +
        +
        Specified by:
        +
        handleContentObject in interface ContentObjectHandler
        +
        +
      • +
      + + + +
        +
      • +

        init

        +
        public void init(org.primaresearch.dla.page.Page page)
        +
        Description copied from interface: ScanElement
        +
        Initialisation (called before any other method).
        +
        +
        Specified by:
        +
        init in interface ScanElement
        +
        +
      • +
      + + + +
        +
      • +

        getCsvHeader

        +
        public java.lang.String getCsvHeader()
        +
        Description copied from interface: ScanElement
        +
        Returns the CSV header(s) of this scan element. If the element represents multiple CSV entries, + the individual headers must be comma separated.
        +
        +
        Specified by:
        +
        getCsvHeader in interface ScanElement
        +
        +
      • +
      + + + +
        +
      • +

        getCsvValue

        +
        public java.lang.String getCsvValue()
        +
        Description copied from interface: ScanElement
        +
        Returns the CSV value(s) of this scan element. If the element represents multiple CSV entries, + the individual values must be comma separated.
        +
        +
        Specified by:
        +
        getCsvValue in interface ScanElement
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/org/primaresearch/dla/page/scanner/element/RegionSubTypeCountScanElement.html b/apidoc/org/primaresearch/dla/page/scanner/element/RegionSubTypeCountScanElement.html new file mode 100644 index 0000000..38f9e59 --- /dev/null +++ b/apidoc/org/primaresearch/dla/page/scanner/element/RegionSubTypeCountScanElement.html @@ -0,0 +1,345 @@ + + + + + +RegionSubTypeCountScanElement (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + + +
+
org.primaresearch.dla.page.scanner.element
+

Class RegionSubTypeCountScanElement

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    ContentObjectHandler, ScanElement
    +
    +
    +
    +
    public class RegionSubTypeCountScanElement
    +extends java.lang.Object
    +implements ScanElement, ContentObjectHandler
    +
    Scan element that counts the occurrences of regions per sub-type.
    + This element represents multiple CSV entries.
    +
    Author:
    +
    clc
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      RegionSubTypeCountScanElement(org.primaresearch.dla.page.layout.physical.shared.RegionType regionType, + org.primaresearch.io.FormatModel formatModel) +
      Constructor
      +
      +
    • +
    + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + + + + + + + + + +
      Methods 
      Modifier and TypeMethod and Description
      java.lang.StringgetCsvHeader() +
      Returns the CSV header(s) of this scan element.
      +
      java.lang.StringgetCsvValue() +
      Returns the CSV value(s) of this scan element.
      +
      voidhandleContentObject(org.primaresearch.dla.page.layout.physical.ContentObject obj) +
      Handles the given layout content object.
      +
      voidinit(org.primaresearch.dla.page.Page page) +
      Initialisation (called before any other method).
      +
      +
        +
      • + + +

        Methods inherited from class java.lang.Object

        +clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        RegionSubTypeCountScanElement

        +
        public RegionSubTypeCountScanElement(org.primaresearch.dla.page.layout.physical.shared.RegionType regionType,
        +                             org.primaresearch.io.FormatModel formatModel)
        +
        Constructor
        +
        Parameters:
        regionType - The type of region to handle.
        formatModel - Model with attribute templates.
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        handleContentObject

        +
        public void handleContentObject(org.primaresearch.dla.page.layout.physical.ContentObject obj)
        +
        Description copied from interface: ContentObjectHandler
        +
        Handles the given layout content object.
        +
        +
        Specified by:
        +
        handleContentObject in interface ContentObjectHandler
        +
        +
      • +
      + + + +
        +
      • +

        init

        +
        public void init(org.primaresearch.dla.page.Page page)
        +
        Description copied from interface: ScanElement
        +
        Initialisation (called before any other method).
        +
        +
        Specified by:
        +
        init in interface ScanElement
        +
        +
      • +
      + + + +
        +
      • +

        getCsvHeader

        +
        public java.lang.String getCsvHeader()
        +
        Description copied from interface: ScanElement
        +
        Returns the CSV header(s) of this scan element. If the element represents multiple CSV entries, + the individual headers must be comma separated.
        +
        +
        Specified by:
        +
        getCsvHeader in interface ScanElement
        +
        +
      • +
      + + + +
        +
      • +

        getCsvValue

        +
        public java.lang.String getCsvValue()
        +
        Description copied from interface: ScanElement
        +
        Returns the CSV value(s) of this scan element. If the element represents multiple CSV entries, + the individual values must be comma separated.
        +
        +
        Specified by:
        +
        getCsvValue in interface ScanElement
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/org/primaresearch/dla/page/scanner/element/ScanElement.html b/apidoc/org/primaresearch/dla/page/scanner/element/ScanElement.html new file mode 100644 index 0000000..1eec404 --- /dev/null +++ b/apidoc/org/primaresearch/dla/page/scanner/element/ScanElement.html @@ -0,0 +1,251 @@ + + + + + +ScanElement (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + + +
+
org.primaresearch.dla.page.scanner.element
+

Interface ScanElement

+
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + + + + + +
      Methods 
      Modifier and TypeMethod and Description
      java.lang.StringgetCsvHeader() +
      Returns the CSV header(s) of this scan element.
      +
      java.lang.StringgetCsvValue() +
      Returns the CSV value(s) of this scan element.
      +
      voidinit(org.primaresearch.dla.page.Page page) +
      Initialisation (called before any other method).
      +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        init

        +
        void init(org.primaresearch.dla.page.Page page)
        +
        Initialisation (called before any other method).
        +
      • +
      + + + +
        +
      • +

        getCsvHeader

        +
        java.lang.String getCsvHeader()
        +
        Returns the CSV header(s) of this scan element. If the element represents multiple CSV entries, + the individual headers must be comma separated.
        +
      • +
      + + + +
        +
      • +

        getCsvValue

        +
        java.lang.String getCsvValue()
        +
        Returns the CSV value(s) of this scan element. If the element represents multiple CSV entries, + the individual values must be comma separated.
        +
      • +
      +
    • +
    +
  • +
+
+
+ + +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/org/primaresearch/dla/page/scanner/element/SpecialCharactersScanElement.html b/apidoc/org/primaresearch/dla/page/scanner/element/SpecialCharactersScanElement.html new file mode 100644 index 0000000..f8b4354 --- /dev/null +++ b/apidoc/org/primaresearch/dla/page/scanner/element/SpecialCharactersScanElement.html @@ -0,0 +1,358 @@ + + + + + +SpecialCharactersScanElement (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + + +
+
org.primaresearch.dla.page.scanner.element
+

Class SpecialCharactersScanElement

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    ContentObjectHandler, ScanElement
    +
    +
    +
    +
    public class SpecialCharactersScanElement
    +extends java.lang.Object
    +implements ScanElement, ContentObjectHandler
    +
    Creates a list of characters that occur in the text content of the document. + The characters are output as Unicode number in decimal format.
    +
    Author:
    +
    clc
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + + + + + + + + + +
      Methods 
      Modifier and TypeMethod and Description
      java.lang.StringgetCsvHeader() +
      Returns the CSV header(s) of this scan element.
      +
      java.lang.StringgetCsvValue() +
      Returns the CSV value(s) of this scan element.
      +
      voidhandleContentObject(org.primaresearch.dla.page.layout.physical.ContentObject obj) +
      Handles the given layout content object.
      +
      voidinit(org.primaresearch.dla.page.Page page) +
      Initialisation (called before any other method).
      +
      +
        +
      • + + +

        Methods inherited from class java.lang.Object

        +clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        SpecialCharactersScanElement

        +
        public SpecialCharactersScanElement()
        +
        Default constructor
        +
      • +
      + + + +
        +
      • +

        SpecialCharactersScanElement

        +
        public SpecialCharactersScanElement(boolean oneColumn)
        +
        Constructor
        +
        Parameters:
        oneColumn - Switch to optionally output the character codes in one column
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        init

        +
        public void init(org.primaresearch.dla.page.Page page)
        +
        Description copied from interface: ScanElement
        +
        Initialisation (called before any other method).
        +
        +
        Specified by:
        +
        init in interface ScanElement
        +
        +
      • +
      + + + +
        +
      • +

        getCsvHeader

        +
        public java.lang.String getCsvHeader()
        +
        Description copied from interface: ScanElement
        +
        Returns the CSV header(s) of this scan element. If the element represents multiple CSV entries, + the individual headers must be comma separated.
        +
        +
        Specified by:
        +
        getCsvHeader in interface ScanElement
        +
        +
      • +
      + + + +
        +
      • +

        getCsvValue

        +
        public java.lang.String getCsvValue()
        +
        Description copied from interface: ScanElement
        +
        Returns the CSV value(s) of this scan element. If the element represents multiple CSV entries, + the individual values must be comma separated.
        +
        +
        Specified by:
        +
        getCsvValue in interface ScanElement
        +
        +
      • +
      + + + +
        +
      • +

        handleContentObject

        +
        public void handleContentObject(org.primaresearch.dla.page.layout.physical.ContentObject obj)
        +
        Description copied from interface: ContentObjectHandler
        +
        Handles the given layout content object.
        +
        +
        Specified by:
        +
        handleContentObject in interface ContentObjectHandler
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/org/primaresearch/dla/page/scanner/element/TextContentScanElement.html b/apidoc/org/primaresearch/dla/page/scanner/element/TextContentScanElement.html new file mode 100644 index 0000000..ee6701d --- /dev/null +++ b/apidoc/org/primaresearch/dla/page/scanner/element/TextContentScanElement.html @@ -0,0 +1,423 @@ + + + + + +TextContentScanElement (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + + +
+
org.primaresearch.dla.page.scanner.element
+

Class TextContentScanElement

+
+
+ +
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      TextContentScanElement(org.primaresearch.dla.page.layout.physical.shared.ContentType contentType, + int statisticsType) +
      Constructor
      +
      +
    • +
    + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + + + + + + + + + +
      Methods 
      Modifier and TypeMethod and Description
      java.lang.StringgetCsvHeader() +
      Returns the CSV header(s) of this scan element.
      +
      java.lang.StringgetCsvValue() +
      Returns the CSV value(s) of this scan element.
      +
      voidhandleContentObject(org.primaresearch.dla.page.layout.physical.ContentObject obj) +
      Handles the given layout content object.
      +
      voidinit(org.primaresearch.dla.page.Page page) +
      Initialisation (called before any other method).
      +
      +
        +
      • + + +

        Methods inherited from class java.lang.Object

        +clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        TextContentScanElement

        +
        public TextContentScanElement(org.primaresearch.dla.page.layout.physical.shared.ContentType contentType,
        +                      int statisticsType)
        +
        Constructor
        +
        Parameters:
        contentType - Content type (e.g. RegionType.TextRegion or LowLevelTextType.Word).
        statisticsType - See TYPE_ class members.
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        handleContentObject

        +
        public void handleContentObject(org.primaresearch.dla.page.layout.physical.ContentObject obj)
        +
        Description copied from interface: ContentObjectHandler
        +
        Handles the given layout content object.
        +
        +
        Specified by:
        +
        handleContentObject in interface ContentObjectHandler
        +
        +
      • +
      + + + +
        +
      • +

        init

        +
        public void init(org.primaresearch.dla.page.Page page)
        +
        Description copied from interface: ScanElement
        +
        Initialisation (called before any other method).
        +
        +
        Specified by:
        +
        init in interface ScanElement
        +
        +
      • +
      + + + +
        +
      • +

        getCsvHeader

        +
        public java.lang.String getCsvHeader()
        +
        Description copied from interface: ScanElement
        +
        Returns the CSV header(s) of this scan element. If the element represents multiple CSV entries, + the individual headers must be comma separated.
        +
        +
        Specified by:
        +
        getCsvHeader in interface ScanElement
        +
        +
      • +
      + + + +
        +
      • +

        getCsvValue

        +
        public java.lang.String getCsvValue()
        +
        Description copied from interface: ScanElement
        +
        Returns the CSV value(s) of this scan element. If the element represents multiple CSV entries, + the individual values must be comma separated.
        +
        +
        Specified by:
        +
        getCsvValue in interface ScanElement
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/org/primaresearch/dla/page/scanner/element/class-use/AttributeValueSetScanElement.html b/apidoc/org/primaresearch/dla/page/scanner/element/class-use/AttributeValueSetScanElement.html new file mode 100644 index 0000000..4d53058 --- /dev/null +++ b/apidoc/org/primaresearch/dla/page/scanner/element/class-use/AttributeValueSetScanElement.html @@ -0,0 +1,116 @@ + + + + + +Uses of Class org.primaresearch.dla.page.scanner.element.AttributeValueSetScanElement (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + +
+

Uses of Class
org.primaresearch.dla.page.scanner.element.AttributeValueSetScanElement

+
+
No usage of org.primaresearch.dla.page.scanner.element.AttributeValueSetScanElement
+ +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/org/primaresearch/dla/page/scanner/element/class-use/BoundsScanElement.html b/apidoc/org/primaresearch/dla/page/scanner/element/class-use/BoundsScanElement.html new file mode 100644 index 0000000..586956f --- /dev/null +++ b/apidoc/org/primaresearch/dla/page/scanner/element/class-use/BoundsScanElement.html @@ -0,0 +1,116 @@ + + + + + +Uses of Class org.primaresearch.dla.page.scanner.element.BoundsScanElement (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + +
+

Uses of Class
org.primaresearch.dla.page.scanner.element.BoundsScanElement

+
+
No usage of org.primaresearch.dla.page.scanner.element.BoundsScanElement
+ +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/org/primaresearch/dla/page/scanner/element/class-use/ContentTypeCountScanElement.html b/apidoc/org/primaresearch/dla/page/scanner/element/class-use/ContentTypeCountScanElement.html new file mode 100644 index 0000000..986f78c --- /dev/null +++ b/apidoc/org/primaresearch/dla/page/scanner/element/class-use/ContentTypeCountScanElement.html @@ -0,0 +1,116 @@ + + + + + +Uses of Class org.primaresearch.dla.page.scanner.element.ContentTypeCountScanElement (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + +
+

Uses of Class
org.primaresearch.dla.page.scanner.element.ContentTypeCountScanElement

+
+
No usage of org.primaresearch.dla.page.scanner.element.ContentTypeCountScanElement
+ +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/org/primaresearch/dla/page/scanner/element/class-use/LayersRegionRefCountScanElement.html b/apidoc/org/primaresearch/dla/page/scanner/element/class-use/LayersRegionRefCountScanElement.html new file mode 100644 index 0000000..d5d7b64 --- /dev/null +++ b/apidoc/org/primaresearch/dla/page/scanner/element/class-use/LayersRegionRefCountScanElement.html @@ -0,0 +1,116 @@ + + + + + +Uses of Class org.primaresearch.dla.page.scanner.element.LayersRegionRefCountScanElement (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + +
+

Uses of Class
org.primaresearch.dla.page.scanner.element.LayersRegionRefCountScanElement

+
+
No usage of org.primaresearch.dla.page.scanner.element.LayersRegionRefCountScanElement
+ +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/org/primaresearch/dla/page/scanner/element/class-use/MetaDataScanElement.html b/apidoc/org/primaresearch/dla/page/scanner/element/class-use/MetaDataScanElement.html new file mode 100644 index 0000000..ddf0375 --- /dev/null +++ b/apidoc/org/primaresearch/dla/page/scanner/element/class-use/MetaDataScanElement.html @@ -0,0 +1,116 @@ + + + + + +Uses of Class org.primaresearch.dla.page.scanner.element.MetaDataScanElement (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + +
+

Uses of Class
org.primaresearch.dla.page.scanner.element.MetaDataScanElement

+
+
No usage of org.primaresearch.dla.page.scanner.element.MetaDataScanElement
+ +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/org/primaresearch/dla/page/scanner/element/class-use/ReadingOrderRegionRefCountScanElement.html b/apidoc/org/primaresearch/dla/page/scanner/element/class-use/ReadingOrderRegionRefCountScanElement.html new file mode 100644 index 0000000..965dcd5 --- /dev/null +++ b/apidoc/org/primaresearch/dla/page/scanner/element/class-use/ReadingOrderRegionRefCountScanElement.html @@ -0,0 +1,116 @@ + + + + + +Uses of Class org.primaresearch.dla.page.scanner.element.ReadingOrderRegionRefCountScanElement (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + +
+

Uses of Class
org.primaresearch.dla.page.scanner.element.ReadingOrderRegionRefCountScanElement

+
+
No usage of org.primaresearch.dla.page.scanner.element.ReadingOrderRegionRefCountScanElement
+ +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/org/primaresearch/dla/page/scanner/element/class-use/RegionCountScanElement.html b/apidoc/org/primaresearch/dla/page/scanner/element/class-use/RegionCountScanElement.html new file mode 100644 index 0000000..0f24866 --- /dev/null +++ b/apidoc/org/primaresearch/dla/page/scanner/element/class-use/RegionCountScanElement.html @@ -0,0 +1,116 @@ + + + + + +Uses of Class org.primaresearch.dla.page.scanner.element.RegionCountScanElement (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + +
+

Uses of Class
org.primaresearch.dla.page.scanner.element.RegionCountScanElement

+
+
No usage of org.primaresearch.dla.page.scanner.element.RegionCountScanElement
+ +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/org/primaresearch/dla/page/scanner/element/class-use/RegionSubTypeCountScanElement.html b/apidoc/org/primaresearch/dla/page/scanner/element/class-use/RegionSubTypeCountScanElement.html new file mode 100644 index 0000000..3ba7af3 --- /dev/null +++ b/apidoc/org/primaresearch/dla/page/scanner/element/class-use/RegionSubTypeCountScanElement.html @@ -0,0 +1,116 @@ + + + + + +Uses of Class org.primaresearch.dla.page.scanner.element.RegionSubTypeCountScanElement (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + +
+

Uses of Class
org.primaresearch.dla.page.scanner.element.RegionSubTypeCountScanElement

+
+
No usage of org.primaresearch.dla.page.scanner.element.RegionSubTypeCountScanElement
+ +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/org/primaresearch/dla/page/scanner/element/class-use/ScanElement.html b/apidoc/org/primaresearch/dla/page/scanner/element/class-use/ScanElement.html new file mode 100644 index 0000000..6727a59 --- /dev/null +++ b/apidoc/org/primaresearch/dla/page/scanner/element/class-use/ScanElement.html @@ -0,0 +1,218 @@ + + + + + +Uses of Interface org.primaresearch.dla.page.scanner.element.ScanElement (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + +
+

Uses of Interface
org.primaresearch.dla.page.scanner.element.ScanElement

+
+
+ +
+ +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/org/primaresearch/dla/page/scanner/element/class-use/SpecialCharactersScanElement.html b/apidoc/org/primaresearch/dla/page/scanner/element/class-use/SpecialCharactersScanElement.html new file mode 100644 index 0000000..d05097e --- /dev/null +++ b/apidoc/org/primaresearch/dla/page/scanner/element/class-use/SpecialCharactersScanElement.html @@ -0,0 +1,116 @@ + + + + + +Uses of Class org.primaresearch.dla.page.scanner.element.SpecialCharactersScanElement (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + +
+

Uses of Class
org.primaresearch.dla.page.scanner.element.SpecialCharactersScanElement

+
+
No usage of org.primaresearch.dla.page.scanner.element.SpecialCharactersScanElement
+ +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/org/primaresearch/dla/page/scanner/element/class-use/TextContentScanElement.html b/apidoc/org/primaresearch/dla/page/scanner/element/class-use/TextContentScanElement.html new file mode 100644 index 0000000..a90747b --- /dev/null +++ b/apidoc/org/primaresearch/dla/page/scanner/element/class-use/TextContentScanElement.html @@ -0,0 +1,116 @@ + + + + + +Uses of Class org.primaresearch.dla.page.scanner.element.TextContentScanElement (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + +
+

Uses of Class
org.primaresearch.dla.page.scanner.element.TextContentScanElement

+
+
No usage of org.primaresearch.dla.page.scanner.element.TextContentScanElement
+ +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/org/primaresearch/dla/page/scanner/element/package-frame.html b/apidoc/org/primaresearch/dla/page/scanner/element/package-frame.html new file mode 100644 index 0000000..854e430 --- /dev/null +++ b/apidoc/org/primaresearch/dla/page/scanner/element/package-frame.html @@ -0,0 +1,32 @@ + + + + + +org.primaresearch.dla.page.scanner.element (PageMetadataScanner API) + + + + +

org.primaresearch.dla.page.scanner.element

+
+

Interfaces

+ +

Classes

+ +
+ + diff --git a/apidoc/org/primaresearch/dla/page/scanner/element/package-summary.html b/apidoc/org/primaresearch/dla/page/scanner/element/package-summary.html new file mode 100644 index 0000000..b7fca18 --- /dev/null +++ b/apidoc/org/primaresearch/dla/page/scanner/element/package-summary.html @@ -0,0 +1,213 @@ + + + + + +org.primaresearch.dla.page.scanner.element (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + +
+

Package org.primaresearch.dla.page.scanner.element

+
+
+ +
+ +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/org/primaresearch/dla/page/scanner/element/package-tree.html b/apidoc/org/primaresearch/dla/page/scanner/element/package-tree.html new file mode 100644 index 0000000..18947c4 --- /dev/null +++ b/apidoc/org/primaresearch/dla/page/scanner/element/package-tree.html @@ -0,0 +1,142 @@ + + + + + +org.primaresearch.dla.page.scanner.element Class Hierarchy (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + +
+

Hierarchy For Package org.primaresearch.dla.page.scanner.element

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +

Interface Hierarchy

+ +
+ +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/org/primaresearch/dla/page/scanner/element/package-use.html b/apidoc/org/primaresearch/dla/page/scanner/element/package-use.html new file mode 100644 index 0000000..ea5ea23 --- /dev/null +++ b/apidoc/org/primaresearch/dla/page/scanner/element/package-use.html @@ -0,0 +1,151 @@ + + + + + +Uses of Package org.primaresearch.dla.page.scanner.element (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + +
+

Uses of Package
org.primaresearch.dla.page.scanner.element

+
+
+ +
+ +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/org/primaresearch/dla/page/scanner/package-frame.html b/apidoc/org/primaresearch/dla/page/scanner/package-frame.html new file mode 100644 index 0000000..0edeef5 --- /dev/null +++ b/apidoc/org/primaresearch/dla/page/scanner/package-frame.html @@ -0,0 +1,23 @@ + + + + + +org.primaresearch.dla.page.scanner (PageMetadataScanner API) + + + + +

org.primaresearch.dla.page.scanner

+
+

Interfaces

+ +

Classes

+ +
+ + diff --git a/apidoc/org/primaresearch/dla/page/scanner/package-summary.html b/apidoc/org/primaresearch/dla/page/scanner/package-summary.html new file mode 100644 index 0000000..ab5f02b --- /dev/null +++ b/apidoc/org/primaresearch/dla/page/scanner/package-summary.html @@ -0,0 +1,163 @@ + + + + + +org.primaresearch.dla.page.scanner (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + +
+

Package org.primaresearch.dla.page.scanner

+
+
+ +
+ +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/org/primaresearch/dla/page/scanner/package-tree.html b/apidoc/org/primaresearch/dla/page/scanner/package-tree.html new file mode 100644 index 0000000..866d803 --- /dev/null +++ b/apidoc/org/primaresearch/dla/page/scanner/package-tree.html @@ -0,0 +1,133 @@ + + + + + +org.primaresearch.dla.page.scanner Class Hierarchy (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + +
+

Hierarchy For Package org.primaresearch.dla.page.scanner

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +

Interface Hierarchy

+ +
+ +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/org/primaresearch/dla/page/scanner/package-use.html b/apidoc/org/primaresearch/dla/page/scanner/package-use.html new file mode 100644 index 0000000..65bc571 --- /dev/null +++ b/apidoc/org/primaresearch/dla/page/scanner/package-use.html @@ -0,0 +1,151 @@ + + + + + +Uses of Package org.primaresearch.dla.page.scanner (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + +
+

Uses of Package
org.primaresearch.dla.page.scanner

+
+
+ +
+ +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/overview-frame.html b/apidoc/overview-frame.html new file mode 100644 index 0000000..0ae4988 --- /dev/null +++ b/apidoc/overview-frame.html @@ -0,0 +1,21 @@ + + + + + +Overview List (PageMetadataScanner API) + + + + +
All Classes
+
+

Packages

+ +
+

 

+ + diff --git a/apidoc/overview-summary.html b/apidoc/overview-summary.html new file mode 100644 index 0000000..9167680 --- /dev/null +++ b/apidoc/overview-summary.html @@ -0,0 +1,134 @@ + + + + + +Overview (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + +
+

PRImA Page Metadata Scanner for Java

+
+
+ + + + + + + + + + + + + + + + +
Packages 
PackageDescription
org.primaresearch.dla.page.scanner 
org.primaresearch.dla.page.scanner.element 
+
+ +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/overview-tree.html b/apidoc/overview-tree.html new file mode 100644 index 0000000..0872455 --- /dev/null +++ b/apidoc/overview-tree.html @@ -0,0 +1,145 @@ + + + + + +Class Hierarchy (PageMetadataScanner API) + + + + + + + +
+ + + + + +
+ + +
+

Hierarchy For All Packages

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +

Interface Hierarchy

+ +
+ +
+ + + + + +
+ + +

Copyright © 2014 PPImA Research Lab. All Rights Reserved.

+ + diff --git a/apidoc/package-list b/apidoc/package-list new file mode 100644 index 0000000..e6ac67f --- /dev/null +++ b/apidoc/package-list @@ -0,0 +1,2 @@ +org.primaresearch.dla.page.scanner +org.primaresearch.dla.page.scanner.element diff --git a/apidoc/resources/background.gif b/apidoc/resources/background.gif new file mode 100644 index 0000000000000000000000000000000000000000..f471940fde2f39ef8943a6af9569bcf986b1579b GIT binary patch literal 2313 zcmV+k3HJ6!Nk%w1VKM-40OkMy00030|NlK(aXwsfKV5S}VtGJbbVOr%L0@%CZH88Q zl{{NzcR^uxNo<2iYk@pjY)*5FJz8x~bc{)B zfk z+1T6M-s9WdW8dcJ-wO*3@9+W*5AY543-j^$^!EPz_4eHZ2#>)41`h@dc!2OAgN6$a zCS2I?;lqgx6IR4nkpTe;1RN0f=zxMq2O=q`94V5d$&e>Unta)^<;;^G3>e7yp=ZvW z6DIW3xpSvaogXF?_4%`@(V;s}NR^5J!3hrtJV@1QRV&r5S*L!zYE|rss${iFkg&!? zTN5V#)~=bmMorwgZsEpdOE)iExo+FO-8;8Kga{=HbSQCnF=E6W3?o*|ID%uwi5**> zJXy127Y9m+=HQ|PhXWi+xNwoWv}n_%Pq%(e+H~mGqhq5kv4Mo|-n~g|7!F*xZ{xv< zCpXS~dGg^IGK?4@J-T%b(XnUHFul6n<@2&4)zzyO2) z3Q8`i0+UKY*`$}e9mmp;tg*))`|PsK1|hAo%u0K$vDwm4gaSkm0j{`26k#qAKmbuhxZ#cquDR>B zD{s8+&TH-uNg$C#68QG}1HMBHfrP&L@@w$F_!itRzXdCN@V|LDAu%3!IDtq1#1UV7 z#1RxvT=B(DWbCoU5l=ia$Pp`Hgb_?Mp@hmtxZDI2N-)v#$}PXVvdm1d>@v(v`0TUJ zF)Pu89(q`zv=w^nVTIF3@3BYIPA}c`(@ZCAwbNBEt@PDUKe5CTR8aB66IE1!w%Amt zy+jpcn~k>GZpVFg+H6x{_uOksvBlq0OyT$6TyQZ37k(cOxZr|JEx1sGm<(M9gH z-~PMqyn|tT=))UN`|-FFFUA#KToK0fUOaz=7}Z~KeHhVC&%O27cTfHQ^WBU8z4p&T zp#>D|V}XShTD;Hx745Iz{`>K-Z$A|7!*Boo{mY;G21vjH8t{M!OrQc6$iN0V@PQDF zpadsK!3tXNf*8!81~qnXWuHZ)kytd=_y+ADWvw31ouV;CdZ#ya*(l7-A-C-Y^+iit8O zBy3*`Ls$|5Hn4m_^I^|C7{m7EFn|5vTk;|oywIgCc9Bb*=L+Y$)M>9GC<|HGs@6NB zHLY%03!dDf=eDRt2O6lVSFRcsuWZEwU?=z$CZ0W?#VJfdN>HG(l%oKpyiftJc|Y)xkjSJYCrQal-0PC~()T9xwF!Jf zVi1UA#3BBbh(i8r5&v#Pz!cF41KjbCc?4u2@@Q~oKLirt2TM30;y6b+zyX2`Yl9u; z`0$3;v0-YUp&7NdPT#q`cZlbij$jvbRk6R>8g*>}*b9E+WDwmpHAAxYzyT aU_pX{M6b8i>#Dq3onfZy}_nli%!Q$ZV%e&!tN2 zX3B0NWXQ443Eo1rUP86rLU>O>oTp%wt3Z{Tz&P*)Iraq^_@X;RtUFY!JxH|4U!>kw zxXwqo&R3Y=EsXaR!ng@y+y$%L1P3FZ4@N!j3m5MW74HcC->_JFuvlxLXiI=-OQ2|@ zpGc#>2-aN)<1RE9^`bB0`65VSK2>5m>CHs^YZCC)NX*NfbeT1%)Cxpu2_(6cCbLvjLY`hf1%*q}QO*%V4SfOu5Nqg~`-+(-76= za<`RA&(qDB^S!nIS^od5|Nk$KPXD8(qSB!f`M*{E?A^&yOW$08V^iNPK!%UNJ-@xmz>`pG2_%4I3QWk4UdtwP!GH$C%mo2K|$Ap=_)Y!#O($1@ohsUtR1k%wI*) z4*X&g==oWh`j{uP=HFm;Ye>0>UbDdtSp^~MaQ!L9I#)Ga?q}{@T#|qec*FkMLDenm zj^sCgk!^O^3o|vG!~2$$$7`C#4Ry zdQ!tui+J1*HyavK+4{`r+zvYHj9IsRt~@uEBOreWS8~2rXAR3!|7aTdr+x4|>@$Az z)b1t$gSB~6USxpfLmy^|_J_eNt*PI=ScO1SVH895N#`ef%IOh&o-2GIjK1s-JzkyZ z@r7O%hChz}kMHCM@Wqi^R-9t&%Fh^#9dVB0%ej@$=OjXA%XZdzCXf}c>SW26_z-Te z5b{}XWg&rELM=N*%aimp)k04t2c+`WAS>ZFIPWKvtyOI))HzpRA!T!b{tv?4NzF1v zNlP%#{&p@lFFEKvcroMAsI)mq?&`!e%l+-y&j9ZqhN}oG&dB=Pw09r+Q%m0cMujS# zs$a7!9VH`CC7k{!bV(J`rm%Jpj6&nLtWhPcy$onn$8G#ZdD9hxO<9k67Ya>K_7W~3 z&KYf14fq<{qHA7u6;>AOcomhdg?ianjr9uINt}*7w?g%z9{Q`(qRo@hDwSpGmxz&h&>%G%T(URL~=c>C{>y$K?+wLFp zy*M1@FTUKYV>8DeDIAIKM+!T5c-k&C4?Y~y^E zQCIc-=9~DiPtfVZB=_c3`qH3h|NXd^BcOQG`funSe)i5!NoA_r{b6PwzSDIXG+!(F z9CqJgo&~#7^VZHWj{u23q+NDCHn}GeWDC*(SW%{f4WMtP3l2jsO7*M)EX)#NLlsNnU4q@#jn0r#rsWsf^ngE0&ambG1f;Rj zfOk#_>1|25Z%?iI{0Yv8)DQfk>m1td?~}m0N%^k^u%EuUCc#ItmlY|epQ3YLWehYw zRU0qpPb#X&WU*UOU8et(s8x~WyYWYsgJCF+;U6@*nICY8)dk}IG+(#_Bz8zURd3HZ6qPE68U1%S{wL0 z;K{PDw2iRFIGG?(UiE9kT9?siuv4O{ z`dX2-eiXU3N)H2nT4V=AO^~J}sw+gr{&~qx%$$wlMv_JCWAMfcjYl}*Cfcf!adOY8 z8oLmJ{%49e+nLiVo#H9}wRk?UCzDz^>9TDxreVHzl~R*)?YU>Uu;J2eQ27O5`&X^8 z`94{)YWJQa#l0Fbz0N6B>j&8J;<%VuG6OYM9&QIdtueWjI3X;*dEtGiF@1AcvN4U> zG5SXIEXxB>)!mtQOztJLyeF78S*kLiU-!>PtQ_s~OMl~&y(hVVe$A5 zwo}E-DJ6${QP75?LsQ}Wl@MXwXMT4d>|?rD!g?jE>J^N*y;X}5FLe%d0_ zZ>eIBK6l@jkfw{p_YiDP;MS{jww{%j#?rk2z1J!HqE;Vd!TrCl_7UPef8;edI}wD6 zT&12Bxj&q}d4%$GHq+$~UYtWv`wI9k`89oKkCEK_E;-+O)(rhThjOM|kXDn{!W1Lo z`_?yQv=lp=-w()R<=0&c5%RWHY_fw@qb}uwFuPAGkl~@Kis}eE%MY@~6ZyWcF+llM zGyK`)(vn1F%%z=W7-Y=1$`w0Mv+-|#d};%JjCmw)Y1hOxwA|{}P%6LS4X`jQCGh`mR@=hGrr|cXa^Ipj;Mh)6mTqd1s_HmP0IxXT!w7YhoIHT>Hm#!;c@|L9OjV zsTlHE{Z;HWeM9^tPm-`|&nnl$%DRtNG1~?npUvgKPwKlaccEe4q!7YU3zykJnu6Sr z()LMXs_)^~u-ds7+wMff)RAJF?2?1H`_wDnt%MssYeB5;q~ojgVm6OHA6B>FG2erv z8&`|6<`=!EPKR^8Qlp5MiKwfxy4D`mN> ze$RKh_6*YJd4y0nnUZvwN%iY&^9xk@cM|5g#pZkc#N*(PH?^w&?ilTDMXFcd0`5!E zvgHS`=Lc|~1aO=L@L~eE*aP{90lc7qXY7GOs)3JH14T{(`K1D%tpvUT1-?F^1d4_S zJ#7yXkP3Q37bJlRQfv=mV-J3B8O*m5B%L3uW)S>|Jwy`|s6iK`sv0Z-3NcU(0knrG z5ChFXA@A9PUSdLI+(VU!!J1Mbw!~0VP^jZci2X|Nx0BF!24ObrAr>b=QtlyN4TAhn z!mQncJm~^m4MIafVLt_ewDUtO+e5w*!`(6A&H^F7i9s4t5&uBpNvh$nlTZjqTM5krNRRQ zqP)VR!|9@H>7qN_!+-)&_9s!^;gOvy5s~iEB&qP8{77&2NJMzZcsnJgSt_bYDzYU% zxQ#uuk3D*e7_*d5^?HW(^(WxICGf-mcmM((VStzIz%zFsm0;ZI3h=5OciJ#a%7I(IeGbFv+PP^?^sKBPrRBl<+qK^o%3fi=L9`la>-l4~p|hzAl~W zf=%(|NHgF7r5dJD+Cf08q-c(m;Epsldaz4cqHzTHT>)4xEe(cE0i~tf{Y0xs_1~Kv z+BYQ-TpEOch13;5YC9nHYEXhSv{ew=LV~nQL%UBQEgaDL2m?9u~v zEQmOvM=aB)Z$+eE38rs%AZR_)4>@2raqwH#Fji#xoLc&PS_TU^W8W(M0GqLdO~1yF z{sfHZ_sC#FX58(}d>RSkKZCz8%D7{cC3Z$Zh@52{31&V*W-@s~Z<8~aBeNcNW?e&O zsR(7fHOf}B&fsRqdZ(WK1e~s*o^uD6{YX9QJvqyWAqQXt*E>r$V94YK=X@8+{1cg> z*_i`a%alCJvbD~lCg&Q1Gk=|BzY)sejf9EHJ{s7lu4?ExCWR3jgTiET;exy{sW!Mg zuj*_YOf0@ScN~X0$7V6&KpL172rf|rA8?K<2+GelXw)NUk#@b4aT5MO%1ip4*ym}B-JI__S1R?CK z<4eW~bH;@H@tR55x}&JNSw_NvEPk)6E>XDt7*)4sgWuw+_vNZzmaS(tsi(57zcjA9 z@~XcHtzYq~IX|z*Md9mh>W~`sk3<^s7;EmyH4wcTdAo5NkUA2ofeG69{Gx7#i_*lt zQ7;N@xEo#nNRj&SbDHNnP0w#OE0{DZ$~7ySG%IN~zwd5Vu4&dnH>*OMb>&*VL^tbA zG;7y1t9dsYU$p3pw0x6mwGe6fjBYWsZ8e3q8f~-~cefgHxBangajI$kv(c*W-DZGp zbM$UgnP{_MYPXYX|6$u^deIhE(-xuGX2RVXqS+o~(iSV%;ZW1=Zqkut(r&xak^pT> zsp*I@X|-eOd^gb+sM(%3(E$|c47Y91mTU99Xe;4vFOTl5gmwVB+fvc3n2pwK?~Xd# zwrY{?CUj@~Msr?wXU0WKv2A$hq z`$V^gNq4(<*C=;4e4}$*uIC$5&uUHkM08J~N$>VV*VpdmLCuc!?!J9=-)VH;fo9)| zNN4m#^Kb9|`RF!^ZAT-z=bC8$do8~Tjc^o-aQjyc2(TW*d50E1#NW0pKb^~tf&OUlS+W}>0!m@!~1 z&TdSLhm`0u99c-z=oxYL8IFaGCDoFwFUP!1iJ%xF1UC4hhv*VR2451Pc0+kQGC)39C5 za81oV=$+xzZNYhn=RB-CTZ>Bevj)A3mi9|OS(dcy=N#Zm=Dza|z4Jd<=3IQ2CB>FiwH7{4Ej#+oa>M67 z!56)Km&2xJ|H7B;%~rJDuJ{rbZQiaX*e^$DEt~T$#h9(y#jg6>uX?boq!N}Q;EQth zYo1rjc15dETPw~*Ymu=lreoE9g^wb)ZcRe1yp1(Eo(rmqUYZXOU$BC_| zX{{&qE?E06wXm#v#cpKwE)jaydSaI`TkCCClr_lKMzPkyFT!R%VRn&sZSrchKx&4e~pJQcfViQxxl=T=7}#gYz7Pvoh`T#Jbab%2A2m zxh?A<`}A?8_GumBEcL;$x%gQb@PZ(If%ZE~D?ax#Km4a~+GV~!;Bb~qxxh@HHc|H6 zr%$^c9Dw~UQFWJv+81rCXS1vqqLfQ~-BtO63xCArGVA4T-}xPXYGHqB5h^+n5%$24 z(BROpi13J@*qFfR$oRMHel`=(zy zovs-UKHD3VkJ?hVeq!aA+8Fh4+NIlFhcC~UrR{4I#}K*u&z%68+P1*=q0B1r*2MY> z!9gYs*vlTO5v#8S>c#3goFmp>3iVKdU)NkjNV(s7tO4Wq?2M}o5Cj-*7;S=fEshOA zR*4$dm{ROvUamG%xL_tSW6}U$Nl=@91T;nC11o-iIVyVrfkd) zTCp;^tOy|_kuOFV$Nn=$AQJO9;&sZ&eDs^!r*m;Hw!)vpO1vcfj2EV{dJ?7ap0tq6 z$SwUVM*Vt+MS_`;bas-svPV|3POQi8G~?f^KOx4hg1He+Wd*s3Hl1{TfJS-+zv6vc zPoKiwr?7wECbub(IdB)9f_!kmUjBR*KY_z4E8_QA9xSr#G&@i5y^H`jB^I{|akh>W z%Cn3luOVY|8P>u>e^~#{$kmgX&-q>k{#pFbm2({(rtG<%nb0UCQ0%{Cy`F&~7}*we z@Of>ND_)V&XwN_+n~KjVorUQWZ*B6cld7ymQl{;rwlHl34K#}2YWxE+4CX@P&u6AfCda`&ZT1MOY69e-L@gNcAvwx8%1Z7lB4zc=_Cpt~&s ze%?;){1DB(PSK!^za967qF?lIjB~&06}Lf`cgh2qUiI^|$-VCTNE=hp&Ij}^A9&|* zQQrSqo3gn#_=z9j(y6f@T|OkJYv(fjwpz}$*U$|nLH2F zPNMuTS4g8 z*^hOlRh6~Mk}58;d477R>F^~aLO$dOXmhA*6zwIaHK()t2zKjo?j^NOJbh_=+71xg zO{Mgp7x?Z-1MKzoQ<+V2g#|e}|JawOPJZBL{o~PYdtWDX?jl##!Aiq|w>)vGJLipp zBK1xGhcvgSsQ;rn>+`>UmxlID{<~}7{y>SO^cyktN^Fsz!Z|B4?p*RKQG*8}SYBt{ zuFO{vJ?jgL{gUzYsnv(io}c0vlCp#*1vE?}KL^UZ&VF^TK+D;40CxX%j);%dCt;Z{ zAeMXC9JPWvKGwsCxx4w2iv_wNGG8l16AVI93rmc^c1>r(P||YE zpXa+=-&k995hfykL^J5S&vJF^ljR&`FE#ppNMM3%Omc!F)Mn{{&Ip#)JegbEJxud2 zn`wDVB~DMii5|H%m~51YeU1juNG3!+&?*uC#q@)z8q~`4yEL5I8}PtyA1IZ=52P$x zX)KhZt z7czUXBsy-8d`GVQ`90`wIh(Xt7v5j7h0t&ET~2M!Tb~4rN-xtK@8@mB*c(6QTwOS- z%9445_WY|cfm4?$nX$72&{~^mu}an^x^Da%=UU6YI;ur3+9L6I>raW5!=-Nzy(F2Z zwZlg7aM3NN5b{K|FB>s4R}|&Lr32_Ys{wwkECxo|rV@;5aHB25iUs7(6@dDpjN{Y%?C~UGp>*Q}K?)KKk64 zAn;@-dER}QG0L${jQ1cR75eM3-~ZTltTQ8%sm9x4Y`ve@ekMuvpA#Rh51@s6;6^&Q z!&M7^b%cea7FlZkPV9}@!bPBBfB&~XvGlE2T7V?IpM~OBmuK;OSt{~N`rL5c_I^de z9n*=@p|l;d`b_YIn8Aem1t7pp0=2-MCTIcJHlY z6x+mNLgi{JpwP)y(yzAFL2A#>bI&EwZE`PGvd*FQ!rx~6bUN&+Ij3)L;=595L#G;m8*^e?ap1`J5w7-q)*iUT_W9w8 z&xS-`i++HpWzY-a-)CWd0(pLW$A85P{Dy9r-=uPekNpN^yA}pJ7yWTZ>3iw4d6+IK zF%1XXkGcJm{0*vhSG5R1ySW;jctk9O==1-Mk?=Bl<{HE1p_@tx1s^+GoczYxj#B=i=kwQvEPrOt`<4W*pJw zbNjEqpr7B|Llc%m{V*QssV)im;pb00LUob=yFaU4`P_}ywU zt*QZl-bUsmh@L&zQaX4uHL&7YD(BOb9hH;;y;O-b-_O$4EFi1vCrMlz`dN|u?}HNO^aFQV{UZg_yy%nf>IXpulip!cR8|vNu7P*; zQye@}Qmj%(TB6`5E=c~w=LITF266XJ6X5xA7!OM1SE=~N*o3EP5Qqx!W<_+EMSLGo zqkC18AQ=0AK9=hgGQtrTovYc5^?Z^RLX?hlO-j&e1MXTTbfm>MS^=}!p>C>icUKdZ zBcNOb(6IJ!kq*e7N8Fx!!kPyn+2B2^2hd00+W^PUA&+S63jFE)bP5Tv+L5l~n(pu? zbeO|+K{{?pEow3?j0+dGVu)a6(0r{1Uj7{3 zxSsZ|BdMk>1-S}-;+`pk{Q5>H=tLRx+YqeenaSRsEX@gtPzz>j1A9g!C9kGtspY(- z%YL>NkVDE2z@}*;Q{=&5)yS;NupAmmibGUE4qte7aY6PcnXJgw>}ad(SW;@HtNurF ziV0_yHz=;Di%Tki6DW^tjkL`t%Ktct(ay zvuAOYoCu!Pm~@P5CIjk$bp`_iv{^l*Au{fB8mJK1>Macv?GL)**8*+JNvySIH5Y7i#1;!%NT!efc z;Z0*AOM&1VpR+6wIQxBM{xf`8T1V@#e<#QL}=YRwMkWG8%1(Fgj{iX)N zup{Txko(DqJWf=#Oi?Z!nra-?C{);TP`w|4>L+EKx1&P3swX<*#_50F!lD_$nQyuK??!UwA-{y)^QmMxoK1xIJ~uML{u;5!Z5tQyEL>;KaUd!_9FP zl2$QOI6V1`QdF|8gkdZsSpUqCjSBu(1H)r*vL#PEy)@Px>5TIk7_9o#Bj zzD&<1_k(ejk%qO6ak=GMmG5b7LTAA^KKq-Ey#z8(2wy2;Ot^oZI(MG@)~iY$RAnJt zu`ioyvR?Vws_tuK9hDqmel+)bP0kyxJV{7t=&3{b(@Hs1fs$9n45aq)IKknZa2H*7 z^P-ZDyOMdMj&-9{(-?dqo5I3Gy=K$!L%q>3^0N~o^2i0^_@^2nQv>S4B&=5_8^a^V zaY!NjyA5QgO&r#^CJcp&=!))MZ*CC&hvLEzWU*!IO=aYo{_yG+53H$XOAIQWnG`uD zLuuwTY6e8N^m5^AHQa}Y5Z#SdbEY;+x{oW?g;ie4CNYomRyQd2mv^L}T!>a5<*wTh>@>Qtwp~nejn`~DcZJI+QC-xU zoxz=5z0k%1;jBrGI%Th~FQElrAPr?E-Fv9|o09dPk=?>f)jFKL8PK|;w(cVDq>YWP zEfL7RGBv|<>f4IccND3wCi*V8`>#a$FPZu&a{V`W`me+Kuf_CJ)%IV%?5ByL^#3Q{ z&uBM5|34IKI>0_Tz{5OngXe#6w*N6;;5PH%9n%56%RaWA{wJ4%515Apdj`a62bp<> zM12OuV+QZ^55ATkViO(UWgg}%9C}kb^r~=BiDyWIXZWM&kb>Q?dd$#W`4KU|2#4qh zz;sZ>ZqS5h#Kdk$&1c9AHmDUdtmHE)CqH0RIAZEE;t(^+RXF+*FlJyk;?6Vn{&MsO zZ0HwY)b4Va!F1#s^N5$-s9(&mPa*Lu4>4SxXm~l|3?PR2jB1J!Q|(4#0i$lFME^-r zA~Q(2O+PHOdcVN((R8zqi>%+yx4PA5u&+jI zZ?)Fm8m-+`n!Bnrx0PvZE7!Q)Z+NTE@K(R!nO40sZF(n~bq_b_9H`UYU#q>pPJ3UC z_UeU>J7qcy%%`ks9)BNcS^GDOn z?oKkjHNoWO1e2?M#vd12e^_AscAnLnc~-CISiYWX`D%{k^H~<37unpMYJYdSv=Om2vbAM@`Qp{{SI=yP zj6WN*eEt0G$9EPX6FU%)-ho>hWTW!yzXBIo73<0umM-=@eG&niY^` zlG(|vuCl_x(X^Fob@=i{8+M5vWf7Bz=#aHGTNA;fZQyfbfueI8Z^639n`(DI%w^-^ zl`=@!u)r~Xf920-xd$Ab+S&PJY%K0H8a_J8uN3^_!K1_NV$*e#*Y*6|)XpiW=9H`*`Xx7W%v@7{XDma1?v0a%(K6rI&1!a YpWXKgmku8Vj|K)Vje`mzEKCg608Q#dYybcN literal 0 HcmV?d00001 diff --git a/apidoc/stylesheet.css b/apidoc/stylesheet.css new file mode 100644 index 0000000..0aeaa97 --- /dev/null +++ b/apidoc/stylesheet.css @@ -0,0 +1,474 @@ +/* Javadoc style sheet */ +/* +Overall document style +*/ +body { + background-color:#ffffff; + color:#353833; + font-family:Arial, Helvetica, sans-serif; + font-size:76%; + margin:0; +} +a:link, a:visited { + text-decoration:none; + color:#4c6b87; +} +a:hover, a:focus { + text-decoration:none; + color:#bb7a2a; +} +a:active { + text-decoration:none; + color:#4c6b87; +} +a[name] { + color:#353833; +} +a[name]:hover { + text-decoration:none; + color:#353833; +} +pre { + font-size:1.3em; +} +h1 { + font-size:1.8em; +} +h2 { + font-size:1.5em; +} +h3 { + font-size:1.4em; +} +h4 { + font-size:1.3em; +} +h5 { + font-size:1.2em; +} +h6 { + font-size:1.1em; +} +ul { + list-style-type:disc; +} +code, tt { + font-size:1.2em; +} +dt code { + font-size:1.2em; +} +table tr td dt code { + font-size:1.2em; + vertical-align:top; +} +sup { + font-size:.6em; +} +/* +Document title and Copyright styles +*/ +.clear { + clear:both; + height:0px; + overflow:hidden; +} +.aboutLanguage { + float:right; + padding:0px 21px; + font-size:.8em; + z-index:200; + margin-top:-7px; +} +.legalCopy { + margin-left:.5em; +} +.bar a, .bar a:link, .bar a:visited, .bar a:active { + color:#FFFFFF; + text-decoration:none; +} +.bar a:hover, .bar a:focus { + color:#bb7a2a; +} +.tab { + background-color:#0066FF; + background-image:url(resources/titlebar.gif); + background-position:left top; + background-repeat:no-repeat; + color:#ffffff; + padding:8px; + width:5em; + font-weight:bold; +} +/* +Navigation bar styles +*/ +.bar { + background-image:url(resources/background.gif); + background-repeat:repeat-x; + color:#FFFFFF; + padding:.8em .5em .4em .8em; + height:auto;/*height:1.8em;*/ + font-size:1em; + margin:0; +} +.topNav { + background-image:url(resources/background.gif); + background-repeat:repeat-x; + color:#FFFFFF; + float:left; + padding:0; + width:100%; + clear:right; + height:2.8em; + padding-top:10px; + overflow:hidden; +} +.bottomNav { + margin-top:10px; + background-image:url(resources/background.gif); + background-repeat:repeat-x; + color:#FFFFFF; + float:left; + padding:0; + width:100%; + clear:right; + height:2.8em; + padding-top:10px; + overflow:hidden; +} +.subNav { + background-color:#dee3e9; + border-bottom:1px solid #9eadc0; + float:left; + width:100%; + overflow:hidden; +} +.subNav div { + clear:left; + float:left; + padding:0 0 5px 6px; +} +ul.navList, ul.subNavList { + float:left; + margin:0 25px 0 0; + padding:0; +} +ul.navList li{ + list-style:none; + float:left; + padding:3px 6px; +} +ul.subNavList li{ + list-style:none; + float:left; + font-size:90%; +} +.topNav a:link, .topNav a:active, .topNav a:visited, .bottomNav a:link, .bottomNav a:active, .bottomNav a:visited { + color:#FFFFFF; + text-decoration:none; +} +.topNav a:hover, .bottomNav a:hover { + text-decoration:none; + color:#bb7a2a; +} +.navBarCell1Rev { + background-image:url(resources/tab.gif); + background-color:#a88834; + color:#FFFFFF; + margin: auto 5px; + border:1px solid #c9aa44; +} +/* +Page header and footer styles +*/ +.header, .footer { + clear:both; + margin:0 20px; + padding:5px 0 0 0; +} +.indexHeader { + margin:10px; + position:relative; +} +.indexHeader h1 { + font-size:1.3em; +} +.title { + color:#2c4557; + margin:10px 0; +} +.subTitle { + margin:5px 0 0 0; +} +.header ul { + margin:0 0 25px 0; + padding:0; +} +.footer ul { + margin:20px 0 5px 0; +} +.header ul li, .footer ul li { + list-style:none; + font-size:1.2em; +} +/* +Heading styles +*/ +div.details ul.blockList ul.blockList ul.blockList li.blockList h4, div.details ul.blockList ul.blockList ul.blockListLast li.blockList h4 { + background-color:#dee3e9; + border-top:1px solid #9eadc0; + border-bottom:1px solid #9eadc0; + margin:0 0 6px -8px; + padding:2px 5px; +} +ul.blockList ul.blockList ul.blockList li.blockList h3 { + background-color:#dee3e9; + border-top:1px solid #9eadc0; + border-bottom:1px solid #9eadc0; + margin:0 0 6px -8px; + padding:2px 5px; +} +ul.blockList ul.blockList li.blockList h3 { + padding:0; + margin:15px 0; +} +ul.blockList li.blockList h2 { + padding:0px 0 20px 0; +} +/* +Page layout container styles +*/ +.contentContainer, .sourceContainer, .classUseContainer, .serializedFormContainer, .constantValuesContainer { + clear:both; + padding:10px 20px; + position:relative; +} +.indexContainer { + margin:10px; + position:relative; + font-size:1.0em; +} +.indexContainer h2 { + font-size:1.1em; + padding:0 0 3px 0; +} +.indexContainer ul { + margin:0; + padding:0; +} +.indexContainer ul li { + list-style:none; +} +.contentContainer .description dl dt, .contentContainer .details dl dt, .serializedFormContainer dl dt { + font-size:1.1em; + font-weight:bold; + margin:10px 0 0 0; + color:#4E4E4E; +} +.contentContainer .description dl dd, .contentContainer .details dl dd, .serializedFormContainer dl dd { + margin:10px 0 10px 20px; +} +.serializedFormContainer dl.nameValue dt { + margin-left:1px; + font-size:1.1em; + display:inline; + font-weight:bold; +} +.serializedFormContainer dl.nameValue dd { + margin:0 0 0 1px; + font-size:1.1em; + display:inline; +} +/* +List styles +*/ +ul.horizontal li { + display:inline; + font-size:0.9em; +} +ul.inheritance { + margin:0; + padding:0; +} +ul.inheritance li { + display:inline; + list-style:none; +} +ul.inheritance li ul.inheritance { + margin-left:15px; + padding-left:15px; + padding-top:1px; +} +ul.blockList, ul.blockListLast { + margin:10px 0 10px 0; + padding:0; +} +ul.blockList li.blockList, ul.blockListLast li.blockList { + list-style:none; + margin-bottom:25px; +} +ul.blockList ul.blockList li.blockList, ul.blockList ul.blockListLast li.blockList { + padding:0px 20px 5px 10px; + border:1px solid #9eadc0; + background-color:#f9f9f9; +} +ul.blockList ul.blockList ul.blockList li.blockList, ul.blockList ul.blockList ul.blockListLast li.blockList { + padding:0 0 5px 8px; + background-color:#ffffff; + border:1px solid #9eadc0; + border-top:none; +} +ul.blockList ul.blockList ul.blockList ul.blockList li.blockList { + margin-left:0; + padding-left:0; + padding-bottom:15px; + border:none; + border-bottom:1px solid #9eadc0; +} +ul.blockList ul.blockList ul.blockList ul.blockList li.blockListLast { + list-style:none; + border-bottom:none; + padding-bottom:0; +} +table tr td dl, table tr td dl dt, table tr td dl dd { + margin-top:0; + margin-bottom:1px; +} +/* +Table styles +*/ +.contentContainer table, .classUseContainer table, .constantValuesContainer table { + border-bottom:1px solid #9eadc0; + width:100%; +} +.contentContainer ul li table, .classUseContainer ul li table, .constantValuesContainer ul li table { + width:100%; +} +.contentContainer .description table, .contentContainer .details table { + border-bottom:none; +} +.contentContainer ul li table th.colOne, .contentContainer ul li table th.colFirst, .contentContainer ul li table th.colLast, .classUseContainer ul li table th, .constantValuesContainer ul li table th, .contentContainer ul li table td.colOne, .contentContainer ul li table td.colFirst, .contentContainer ul li table td.colLast, .classUseContainer ul li table td, .constantValuesContainer ul li table td{ + vertical-align:top; + padding-right:20px; +} +.contentContainer ul li table th.colLast, .classUseContainer ul li table th.colLast,.constantValuesContainer ul li table th.colLast, +.contentContainer ul li table td.colLast, .classUseContainer ul li table td.colLast,.constantValuesContainer ul li table td.colLast, +.contentContainer ul li table th.colOne, .classUseContainer ul li table th.colOne, +.contentContainer ul li table td.colOne, .classUseContainer ul li table td.colOne { + padding-right:3px; +} +.overviewSummary caption, .packageSummary caption, .contentContainer ul.blockList li.blockList caption, .summary caption, .classUseContainer caption, .constantValuesContainer caption { + position:relative; + text-align:left; + background-repeat:no-repeat; + color:#FFFFFF; + font-weight:bold; + clear:none; + overflow:hidden; + padding:0px; + margin:0px; +} +caption a:link, caption a:hover, caption a:active, caption a:visited { + color:#FFFFFF; +} +.overviewSummary caption span, .packageSummary caption span, .contentContainer ul.blockList li.blockList caption span, .summary caption span, .classUseContainer caption span, .constantValuesContainer caption span { + white-space:nowrap; + padding-top:8px; + padding-left:8px; + display:block; + float:left; + background-image:url(resources/titlebar.gif); + height:18px; +} +.overviewSummary .tabEnd, .packageSummary .tabEnd, .contentContainer ul.blockList li.blockList .tabEnd, .summary .tabEnd, .classUseContainer .tabEnd, .constantValuesContainer .tabEnd { + width:10px; + background-image:url(resources/titlebar_end.gif); + background-repeat:no-repeat; + background-position:top right; + position:relative; + float:left; +} +ul.blockList ul.blockList li.blockList table { + margin:0 0 12px 0px; + width:100%; +} +.tableSubHeadingColor { + background-color: #EEEEFF; +} +.altColor { + background-color:#eeeeef; +} +.rowColor { + background-color:#ffffff; +} +.overviewSummary td, .packageSummary td, .contentContainer ul.blockList li.blockList td, .summary td, .classUseContainer td, .constantValuesContainer td { + text-align:left; + padding:3px 3px 3px 7px; +} +th.colFirst, th.colLast, th.colOne, .constantValuesContainer th { + background:#dee3e9; + border-top:1px solid #9eadc0; + border-bottom:1px solid #9eadc0; + text-align:left; + padding:3px 3px 3px 7px; +} +td.colOne a:link, td.colOne a:active, td.colOne a:visited, td.colOne a:hover, td.colFirst a:link, td.colFirst a:active, td.colFirst a:visited, td.colFirst a:hover, td.colLast a:link, td.colLast a:active, td.colLast a:visited, td.colLast a:hover, .constantValuesContainer td a:link, .constantValuesContainer td a:active, .constantValuesContainer td a:visited, .constantValuesContainer td a:hover { + font-weight:bold; +} +td.colFirst, th.colFirst { + border-left:1px solid #9eadc0; + white-space:nowrap; +} +td.colLast, th.colLast { + border-right:1px solid #9eadc0; +} +td.colOne, th.colOne { + border-right:1px solid #9eadc0; + border-left:1px solid #9eadc0; +} +table.overviewSummary { + padding:0px; + margin-left:0px; +} +table.overviewSummary td.colFirst, table.overviewSummary th.colFirst, +table.overviewSummary td.colOne, table.overviewSummary th.colOne { + width:25%; + vertical-align:middle; +} +table.packageSummary td.colFirst, table.overviewSummary th.colFirst { + width:25%; + vertical-align:middle; +} +/* +Content styles +*/ +.description pre { + margin-top:0; +} +.deprecatedContent { + margin:0; + padding:10px 0; +} +.docSummary { + padding:0; +} +/* +Formatting effect styles +*/ +.sourceLineNo { + color:green; + padding:0 30px 0 0; +} +h1.hidden { + visibility:hidden; + overflow:hidden; + font-size:.9em; +} +.block { + display:block; + margin:3px 0 0 0; +} +.strong { + font-weight:bold; +} diff --git a/src/org/primaresearch/dla/page/scanner/ContentObjectHandler.java b/src/org/primaresearch/dla/page/scanner/ContentObjectHandler.java new file mode 100644 index 0000000..d5a999a --- /dev/null +++ b/src/org/primaresearch/dla/page/scanner/ContentObjectHandler.java @@ -0,0 +1,32 @@ +/* + * Copyright 2014 PRImA Research Lab, University of Salford, United Kingdom + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.primaresearch.dla.page.scanner; + +import org.primaresearch.dla.page.layout.physical.ContentObject; + +/** + * Interface for classes that handle layout content objects. + * + * @author Christian Clausner + * + */ +public interface ContentObjectHandler { + + /** + * Handles the given layout content object. + */ + public void handleContentObject(ContentObject obj); +} diff --git a/src/org/primaresearch/dla/page/scanner/PageScanner.java b/src/org/primaresearch/dla/page/scanner/PageScanner.java new file mode 100644 index 0000000..0408eeb --- /dev/null +++ b/src/org/primaresearch/dla/page/scanner/PageScanner.java @@ -0,0 +1,335 @@ +/* + * Copyright 2014 PRImA Research Lab, University of Salford, United Kingdom + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.primaresearch.dla.page.scanner; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; + +import org.primaresearch.dla.page.Page; +import org.primaresearch.dla.page.io.FileInput; +import org.primaresearch.dla.page.io.xml.PageXmlInputOutput; +import org.primaresearch.dla.page.io.xml.XmlPageReader; +import org.primaresearch.dla.page.layout.physical.ContentObject; +import org.primaresearch.dla.page.layout.physical.ContentObjectProcessor; +import org.primaresearch.dla.page.layout.physical.shared.LowLevelTextType; +import org.primaresearch.dla.page.layout.physical.shared.RegionType; +import org.primaresearch.dla.page.scanner.element.AttributeValueSetScanElement; +import org.primaresearch.dla.page.scanner.element.BoundsScanElement; +import org.primaresearch.dla.page.scanner.element.ContentTypeCountScanElement; +import org.primaresearch.dla.page.scanner.element.LayersRegionRefCountScanElement; +import org.primaresearch.dla.page.scanner.element.MetaDataScanElement; +import org.primaresearch.dla.page.scanner.element.ReadingOrderRegionRefCountScanElement; +import org.primaresearch.dla.page.scanner.element.RegionCountScanElement; +import org.primaresearch.dla.page.scanner.element.RegionSubTypeCountScanElement; +import org.primaresearch.dla.page.scanner.element.ScanElement; +import org.primaresearch.dla.page.scanner.element.SpecialCharactersScanElement; +import org.primaresearch.dla.page.scanner.element.TextContentScanElement; +import org.primaresearch.io.FormatModel; +import org.primaresearch.io.UnsupportedFormatVersionException; + +/** + * Command line tool that scans a single PAGE XML file and outputs its properties in CSV format.
+ *
+ * Properties:
+ *
    + *
  • Metadata (ID, creator, creation time, modification time, width, height)
  • + *
  • Border and print space (true/false)
  • + *
  • Content objects count (per type and sub-type)
  • + *
  • Text content statistics (number of characters and white spaces)
  • + *
  • Language and script (semicolon separated list)
  • + *
  • Reading order and layers (number of region references)
  • + *
+ * + * @author Christian Clausner + * + */ +public class PageScanner { + + private static final String MODE_DEFAULT = "default"; + private static final String MODE_SPECIAL_CHARS = "characters"; + private static final String MODE_SPECIAL_CHARS_ONE_COLUMN = "characters-one-column"; + + private String pageFilename = ""; + private List scanElements = new ArrayList(); + private FormatModel formatModel = null; + private String mode = "default"; + + /** + * Main function + */ + public static void main(String[] args) { + if (args.length == 0) { + showUsage(); + return; + } + + PageScanner scanner = new PageScanner(); + + //Parse arguments + String filename = null; + boolean printHeaders = false; + for (int i=0; i"); + System.out.println(""); + System.out.println(" Scan mode (optional): -mode "); + System.out.println(" Supported modes:"); + System.out.println(" default - Outputs metadata, content object counts, text statistics, ..."); + System.out.println(" characters - Outputs a list of characters occurring in the text content (Unicode)"); + System.out.println(" characters-one-column - Outputs the characters as multiple rows in one column."); + } + + + /** + * Constructor + */ + public PageScanner() { + this.formatModel = PageXmlInputOutput.getLatestSchemaModel(); + + //addScanElements(); + } + + /** + * Sets the scan mode + * @param mode 'default', 'characters', or 'characters-one-column' + */ + public void setMode(String mode) { + this.mode = mode; + } + + /** + * Adds the scan elements according to the set scan mode + */ + private void addScanElements() { + if (MODE_DEFAULT.equals(mode)) { + //Meta data + scanElements.add(new MetaDataScanElement(MetaDataScanElement.TYPE_PCGTS_ID)); + scanElements.add(new MetaDataScanElement(MetaDataScanElement.TYPE_CREATOR)); + scanElements.add(new MetaDataScanElement(MetaDataScanElement.TYPE_CREATED)); + scanElements.add(new MetaDataScanElement(MetaDataScanElement.TYPE_MODIFIED)); + scanElements.add(new MetaDataScanElement(MetaDataScanElement.TYPE_WIDTH)); + scanElements.add(new MetaDataScanElement(MetaDataScanElement.TYPE_HEIGHT)); + + //Border and Print Space + scanElements.add(new BoundsScanElement(BoundsScanElement.TYPE_BORDER)); + scanElements.add(new BoundsScanElement(BoundsScanElement.TYPE_PRINT_SPACE)); + + //Content type count + scanElements.add(new RegionCountScanElement()); + scanElements.add(new ContentTypeCountScanElement(LowLevelTextType.TextLine)); + scanElements.add(new ContentTypeCountScanElement(LowLevelTextType.Word)); + scanElements.add(new ContentTypeCountScanElement(LowLevelTextType.Glyph)); + scanElements.add(new ContentTypeCountScanElement(RegionType.ChartRegion)); + //scanElements.add(new ContentTypeCountScanElement(RegionType.FrameRegion)); + scanElements.add(new ContentTypeCountScanElement(RegionType.GraphicRegion)); + scanElements.add(new ContentTypeCountScanElement(RegionType.ImageRegion)); + scanElements.add(new ContentTypeCountScanElement(RegionType.LineDrawingRegion)); + scanElements.add(new ContentTypeCountScanElement(RegionType.MathsRegion)); + scanElements.add(new ContentTypeCountScanElement(RegionType.AdvertRegion)); + scanElements.add(new ContentTypeCountScanElement(RegionType.ChemRegion)); + scanElements.add(new ContentTypeCountScanElement(RegionType.MusicRegion)); + scanElements.add(new ContentTypeCountScanElement(RegionType.NoiseRegion)); + scanElements.add(new ContentTypeCountScanElement(RegionType.SeparatorRegion)); + scanElements.add(new ContentTypeCountScanElement(RegionType.TableRegion)); + scanElements.add(new ContentTypeCountScanElement(RegionType.TextRegion)); + scanElements.add(new ContentTypeCountScanElement(RegionType.UnknownRegion)); + + //Region sub-type count + scanElements.add(new RegionSubTypeCountScanElement(RegionType.TextRegion, formatModel)); + scanElements.add(new RegionSubTypeCountScanElement(RegionType.GraphicRegion, formatModel)); + scanElements.add(new RegionSubTypeCountScanElement(RegionType.ChartRegion, formatModel)); + + //Text statistics + scanElements.add(new TextContentScanElement(RegionType.TextRegion, TextContentScanElement.TYPE_COUNT_CHARACTERS)); + scanElements.add(new TextContentScanElement(RegionType.TextRegion, TextContentScanElement.TYPE_COUNT_SPACES_AND_TABS)); + scanElements.add(new TextContentScanElement(RegionType.TextRegion, TextContentScanElement.TYPE_COUNT_LINE_BREAKS)); + scanElements.add(new TextContentScanElement(RegionType.TextRegion, TextContentScanElement.TYPE_COUNT_ALL)); + + scanElements.add(new TextContentScanElement(LowLevelTextType.TextLine, TextContentScanElement.TYPE_COUNT_CHARACTERS)); + scanElements.add(new TextContentScanElement(LowLevelTextType.TextLine, TextContentScanElement.TYPE_COUNT_SPACES_AND_TABS)); + scanElements.add(new TextContentScanElement(LowLevelTextType.TextLine, TextContentScanElement.TYPE_COUNT_ALL)); + + scanElements.add(new TextContentScanElement(LowLevelTextType.Word, TextContentScanElement.TYPE_COUNT_ALL)); + + scanElements.add(new TextContentScanElement(LowLevelTextType.Glyph, TextContentScanElement.TYPE_COUNT_ALL)); + + //Language and script + scanElements.add(new AttributeValueSetScanElement(RegionType.TextRegion, "primaryLanguage")); + scanElements.add(new AttributeValueSetScanElement(RegionType.TextRegion, "secondaryLanguage")); + scanElements.add(new AttributeValueSetScanElement(RegionType.TextRegion, "primaryScript")); + scanElements.add(new AttributeValueSetScanElement(RegionType.TextRegion, "secondaryScript")); + + //Reading order and layers + scanElements.add(new ReadingOrderRegionRefCountScanElement()); + scanElements.add(new LayersRegionRefCountScanElement()); + } + else if (MODE_SPECIAL_CHARS.equals(mode)) { + scanElements.add(new SpecialCharactersScanElement()); + } + else if (MODE_SPECIAL_CHARS_ONE_COLUMN.equals(mode)) { + scanElements.add(new SpecialCharactersScanElement(true)); + } + else { + throw new IllegalArgumentException("Unknown scan mode: "+mode); + } + } + + /** + * Prints the CSV headers to STDOUT + */ + private void printHeaders() { + if (scanElements.isEmpty()) + addScanElements(); + StringBuilder str = new StringBuilder(); + + //File name is hard-coded + str.append("File"); + + for (int i=0; i + *
+ * Example:
+ * Content type: TextRegion
+ * Attribute: Language
+ * Result value: German;English;French + * + * @author Christian Clausner + * + */ +public class AttributeValueSetScanElement implements ScanElement, + ContentObjectHandler { + + ContentType contentType; + String attributeName; + Set values = new HashSet(); + + public AttributeValueSetScanElement(ContentType contentType, String attributeName) { + this.contentType = contentType; + this.attributeName = attributeName; + } + + @Override + public void handleContentObject(ContentObject obj) { + //Check type + if (contentType.equals(obj.getType())) { + VariableMap atts = obj.getAttributes(); + if (atts != null) { + //Get attribute + Variable att = atts.get(attributeName); + if (att != null && att.getValue() != null) { + //Add value to set + String val = att.getValue().toString(); + values.add(val); + } + } + } + } + + @Override + public void init(Page page) { + } + + @Override + public String getCsvHeader() { + //Example: 'TextRegion (primaryLanguage)' + return contentType.getName() + " ("+attributeName+")"; + } + + @Override + public String getCsvValue() { + //Build semicolon separated list of values + StringBuilder str = new StringBuilder(); + for (Iterator it = values.iterator(); it.hasNext(); ) { + if (str.length() > 0) + str.append(';'); + str.append(it.next()); + } + return str.toString(); + } + +} diff --git a/src/org/primaresearch/dla/page/scanner/element/BoundsScanElement.java b/src/org/primaresearch/dla/page/scanner/element/BoundsScanElement.java new file mode 100644 index 0000000..47bf6d0 --- /dev/null +++ b/src/org/primaresearch/dla/page/scanner/element/BoundsScanElement.java @@ -0,0 +1,62 @@ +/* + * Copyright 2014 PRImA Research Lab, University of Salford, United Kingdom + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.primaresearch.dla.page.scanner.element; + +import org.primaresearch.dla.page.Page; + +/** + * Scan element that checks if border or print space are defined. + * + * @author Christian Clausner + * + */ +public class BoundsScanElement implements ScanElement { + + public static final int TYPE_BORDER = 1; + public static final int TYPE_PRINT_SPACE = 2; + + int type; + boolean exists = false; + String[] headers = {null, "Border", "PrintSpace"}; + + /** + * Constructor + * @param type TYPE_BORDER or TYPE_PRINT_SPACE + */ + public BoundsScanElement(int type) { + this.type = type; + } + + @Override + public void init(Page page) { + if (type == TYPE_BORDER) + exists = page.getLayout().getBorder() != null; + else if (type == TYPE_PRINT_SPACE) + exists = page.getLayout().getPrintSpace() != null; + } + + @Override + public String getCsvHeader() { + return headers[type]; + } + + @Override + public String getCsvValue() { + //true or false + return ""+exists; + } + +} diff --git a/src/org/primaresearch/dla/page/scanner/element/ContentTypeCountScanElement.java b/src/org/primaresearch/dla/page/scanner/element/ContentTypeCountScanElement.java new file mode 100644 index 0000000..5971301 --- /dev/null +++ b/src/org/primaresearch/dla/page/scanner/element/ContentTypeCountScanElement.java @@ -0,0 +1,65 @@ +/* + * Copyright 2014 PRImA Research Lab, University of Salford, United Kingdom + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.primaresearch.dla.page.scanner.element; + +import org.primaresearch.dla.page.Page; +import org.primaresearch.dla.page.layout.physical.ContentObject; +import org.primaresearch.dla.page.layout.physical.shared.ContentType; +import org.primaresearch.dla.page.scanner.ContentObjectHandler; + +/** + * Scan element that counts the occurrences of content objects of a specified type. + * + * @author Christian Clausner + * + */ +public class ContentTypeCountScanElement implements ScanElement, + ContentObjectHandler { + + private int count = 0; + private ContentType type; + + /** + * Constructor + * @param type Content object type to count. + */ + public ContentTypeCountScanElement(ContentType type) { + this.type = type; + } + + @Override + public void handleContentObject(ContentObject obj) { + //Check type + if (obj.getType().equals(type)) + count++; + } + + @Override + public void init(Page page) { + } + + @Override + public String getCsvHeader() { + //Example: 'TextRegion Count' + return type.getName()+" Count"; + } + + @Override + public String getCsvValue() { + return ""+count; + } + +} diff --git a/src/org/primaresearch/dla/page/scanner/element/LayersRegionRefCountScanElement.java b/src/org/primaresearch/dla/page/scanner/element/LayersRegionRefCountScanElement.java new file mode 100644 index 0000000..5e49795 --- /dev/null +++ b/src/org/primaresearch/dla/page/scanner/element/LayersRegionRefCountScanElement.java @@ -0,0 +1,62 @@ +/* + * Copyright 2014 PRImA Research Lab, University of Salford, United Kingdom + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.primaresearch.dla.page.scanner.element; + +import org.primaresearch.dla.page.Page; +import org.primaresearch.dla.page.layout.logical.Group; +import org.primaresearch.dla.page.layout.logical.GroupMember; +import org.primaresearch.dla.page.layout.logical.Layers; +import org.primaresearch.dla.page.layout.logical.RegionRef; + +/** + * Scan element that counts the number of referenced regions in layers. + * + * @author Christian Clausner + * + */ +public class LayersRegionRefCountScanElement implements ScanElement { + + private Layers layers; + private int count = 0; + + @Override + public void init(Page page) { + layers = page.getLayout().getLayers(); + if (layers != null) { + for (int i=0; i + * This element represents multiple CSV entries. + * + * @author Christian Clausner + * + */ +public class RegionSubTypeCountScanElement implements ScanElement, ContentObjectHandler { + + private RegionType regionType; + private ValidStringValues validValues = null; + private Map counts = new HashMap(); + + /** + * Constructor + * @param regionType The type of region to handle. + * @param formatModel Model with attribute templates. + */ + public RegionSubTypeCountScanElement(RegionType regionType, FormatModel formatModel) { + this.regionType = regionType; + + //Extract the sub-types from the format model + VariableMap attributeTemplates = formatModel.getTypeAttributeTemplates().get(getSchemaTypeName(regionType)); + if (attributeTemplates != null) { + //GEt the sub-type attribute + Variable var = attributeTemplates.get("type"); + if (var != null) { + //Get the variable constraint (contains the list of valid sub-types) + VariableConstraint constraint = var.getConstraint(); + if (constraint != null && constraint instanceof ValidStringValues) { + validValues = (ValidStringValues)constraint; + } + } + } + } + + @Override + public void handleContentObject(ContentObject obj) { + if (validValues == null) + return; + + //Check region type + if (regionType.equals(obj.getType())) { + VariableMap atts = obj.getAttributes(); + if (atts != null) { + //Get type attribute + Variable var = atts.get("type"); + if (var != null && var.getValue() != null) { + String subtype = var.getValue().toString(); + //Increase count for the type + Integer count = counts.get(subtype); + if (count == null) + count = 0; + count++; + counts.put(subtype, (Integer)count); + } + } + } + } + + @Override + public void init(Page page) { + } + + @Override + public String getCsvHeader() { + if (validValues == null) + return ""; + //Comma separated headers for all sub-types + //Example: 'TextRegion (heading) Count,TextRegion (paragraph) Count,...' + StringBuilder str = new StringBuilder(); + for (Iterator it = validValues.getValidValues().iterator(); it.hasNext(); ) { + String subtype = it.next(); + str.append(regionType.getName()); + str.append(" ("); + str.append(subtype); + str.append(") Count"); + if (it.hasNext()) + str.append(','); + } + return str.toString(); + } + + @Override + public String getCsvValue() { + if (validValues == null) + return ""; + //Comma separated counts per sub-type + StringBuilder str = new StringBuilder(); + for (Iterator it = validValues.getValidValues().iterator(); it.hasNext(); ) { + String subtype = it.next(); + Integer count = counts.get(subtype); + if (count == null) + count = 0; + str.append(count.toString()); + if (it.hasNext()) + str.append(','); + } + return str.toString(); + } + + //Copied from DefaultAttributeFactory + private String getSchemaTypeName(ContentType type) { + if (type == RegionType.ChartRegion) + return "ChartRegionType"; + //else if (type == RegionType.FrameRegion) + // return "FrameRegionType"; + else if (type == RegionType.GraphicRegion) + return "GraphicRegionType"; + else if (type == RegionType.ImageRegion) + return "ImageRegionType"; + else if (type == RegionType.LineDrawingRegion) + return "LineDrawingRegionType"; + else if (type == RegionType.MathsRegion) + return "MathsRegionType"; + else if (type == RegionType.AdvertRegion) + return "AdvertRegionType"; + else if (type == RegionType.ChemRegion) + return "ChemRegionType"; + else if (type == RegionType.MusicRegion) + return "MusicRegionType"; + else if (type == RegionType.NoiseRegion) + return "NoiseRegionType"; + else if (type == RegionType.SeparatorRegion) + return "SeparatorRegionType"; + else if (type == RegionType.TableRegion) + return "TableeRegionType"; + else if (type == RegionType.TextRegion) + return "TextRegionType"; + else if (type == RegionType.UnknownRegion) + return "UnknownRegionType"; + else if (type == LowLevelTextType.TextLine) + return "TextLineType"; + else if (type == LowLevelTextType.Word) + return "WordType"; + else if (type == LowLevelTextType.Glyph) + return "GlyphType"; + return null; + } + +} diff --git a/src/org/primaresearch/dla/page/scanner/element/ScanElement.java b/src/org/primaresearch/dla/page/scanner/element/ScanElement.java new file mode 100644 index 0000000..05eaefb --- /dev/null +++ b/src/org/primaresearch/dla/page/scanner/element/ScanElement.java @@ -0,0 +1,45 @@ +/* + * Copyright 2014 PRImA Research Lab, University of Salford, United Kingdom + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.primaresearch.dla.page.scanner.element; + +import org.primaresearch.dla.page.Page; + +/** + * Interface for PAGE scanner elements. One element can represent a single or multiple CSV entries. + * + * @author Christian Clausner + * + */ +public interface ScanElement { + + /** + * Initialisation (called before any other method). + */ + public void init(Page page); + + /** + * Returns the CSV header(s) of this scan element. If the element represents multiple CSV entries, + * the individual headers must be comma separated. + */ + public String getCsvHeader(); + + /** + * Returns the CSV value(s) of this scan element. If the element represents multiple CSV entries, + * the individual values must be comma separated. + */ + public String getCsvValue(); + +} diff --git a/src/org/primaresearch/dla/page/scanner/element/SpecialCharactersScanElement.java b/src/org/primaresearch/dla/page/scanner/element/SpecialCharactersScanElement.java new file mode 100644 index 0000000..0fbbbd4 --- /dev/null +++ b/src/org/primaresearch/dla/page/scanner/element/SpecialCharactersScanElement.java @@ -0,0 +1,118 @@ +/* + * Copyright 2014 PRImA Research Lab, University of Salford, United Kingdom + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.primaresearch.dla.page.scanner.element; + +import java.util.HashSet; +import java.util.Iterator; +import java.util.Set; + +import org.primaresearch.dla.page.Page; +import org.primaresearch.dla.page.layout.physical.ContentObject; +import org.primaresearch.dla.page.layout.physical.text.TextObject; +import org.primaresearch.dla.page.scanner.ContentObjectHandler; + +/** + * Creates a list of characters that occur in the text content of the document. + * The characters are output as Unicode number in decimal format. + * + * @author Christian Clausner + * + */ +public class SpecialCharactersScanElement implements ScanElement, ContentObjectHandler { + + /** Switch to optionally output the character codes in one column */ + boolean oneColumn; + Set specialChars = new HashSet(); + //Set normalCharHexCodes = new HashSet(); + + /** + * Default constructor + */ + public SpecialCharactersScanElement() { + this(false); + } + + /** + * Constructor + * @param oneColumn Switch to optionally output the character codes in one column + */ + public SpecialCharactersScanElement(boolean oneColumn) { + this.oneColumn = oneColumn; + } + + /*void initNormalCharacters() { + String normalChars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890"; + char[] chars = normalChars.toCharArray(); + String hexCode; + for (int i=0; i it = specialChars.iterator(); it.hasNext(); ) { + if (values.length() != 0) { + if (oneColumn) + values.append("\n,"); + else + values.append(','); + } + values.append(it.next()); + } + return values.toString(); + } + + @Override + public void handleContentObject(ContentObject obj) { + if (obj == null) + return; + //Check if text content object + if (obj instanceof TextObject) { + processText(((TextObject)obj).getText()); + } + } + + private void processText(String text) { + if (text == null) + return; + char[] chars = text.toCharArray(); + //String hexCode; + for (int i=0; i