Skip to content

Commit

Permalink
Upgrade to Tika 3.0.0-BETA
Browse files Browse the repository at this point in the history
- Use quarkus-pdfbox since PDFBox 3.0.1 is used
  • Loading branch information
gastaldi committed Jan 11, 2024
1 parent 6ef09f9 commit 6d290bd
Show file tree
Hide file tree
Showing 7 changed files with 24 additions and 63 deletions.
5 changes: 5 additions & 0 deletions deployment/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@
<artifactId>quarkus-poi-deployment</artifactId>
<version>${quarkus.poi.version}</version>
</dependency>
<dependency>
<groupId>io.quarkiverse.pdfbox</groupId>
<artifactId>quarkus-pdfbox-deployment</artifactId>
<version>${quarkus.pdfbox.version}</version>
</dependency>
<dependency>
<groupId>io.quarkiverse.tika</groupId>
<artifactId>quarkus-tika</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,13 @@
import io.quarkus.deployment.annotations.Record;
import io.quarkus.deployment.builditem.FeatureBuildItem;
import io.quarkus.deployment.builditem.NativeImageEnableAllCharsetsBuildItem;
import io.quarkus.deployment.builditem.NativeImageFeatureBuildItem;
import io.quarkus.deployment.builditem.nativeimage.NativeImageResourceBuildItem;
import io.quarkus.deployment.builditem.nativeimage.NativeImageResourceDirectoryBuildItem;
import io.quarkus.deployment.builditem.nativeimage.RuntimeInitializedClassBuildItem;
import io.quarkus.deployment.builditem.nativeimage.ServiceProviderBuildItem;
import io.quarkus.deployment.util.ServiceUtil;
import io.quarkus.tika.TikaParseException;
import io.quarkus.tika.runtime.TikaConfiguration;
import io.quarkus.tika.runtime.TikaParserProducer;
import io.quarkus.tika.runtime.TikaRecorder;
import io.quarkus.tika.runtime.graal.TikaFeature;

public class TikaProcessor {

Expand All @@ -59,11 +55,6 @@ FeatureBuildItem feature() {
return new FeatureBuildItem(FEATURE);
}

@BuildStep
NativeImageFeatureBuildItem tikaParsersFeature() {
return new NativeImageFeatureBuildItem(TikaFeature.class);
}

@BuildStep
AdditionalBeanBuildItem beans() {
return AdditionalBeanBuildItem.unremovableOf(TikaParserProducer.class);
Expand All @@ -77,13 +68,6 @@ NativeImageEnableAllCharsetsBuildItem enableAllCharsets() {
return new NativeImageEnableAllCharsetsBuildItem();
}

@BuildStep
public void registerRuntimeInitializedClasses(BuildProducer<RuntimeInitializedClassBuildItem> resource) {
//org.apache.tika.parser.pdf.PDFParser (https://issues.apache.org/jira/browse/PDFBOX-4548)
resource.produce(new RuntimeInitializedClassBuildItem("org.apache.pdfbox.pdmodel.font.PDType1Font"));
resource.produce(new RuntimeInitializedClassBuildItem("org.apache.pdfbox.text.LegacyPDFStreamEngine"));
}

@BuildStep
public void registerTikaCoreResources(BuildProducer<NativeImageResourceBuildItem> resource) {
resource.produce(new NativeImageResourceBuildItem("org/apache/tika/mime/tika-mimetypes.xml"));
Expand All @@ -95,14 +79,6 @@ public void registerTikaParsersResources(BuildProducer<NativeImageResourceBuildI
resource.produce(new NativeImageResourceBuildItem("org/apache/tika/parser/pdf/PDFParser.properties"));
}

@BuildStep
public void registerPdfBoxResources(BuildProducer<NativeImageResourceDirectoryBuildItem> resource) {
resource.produce(new NativeImageResourceDirectoryBuildItem("org/apache/pdfbox/resources/afm"));
resource.produce(new NativeImageResourceDirectoryBuildItem("org/apache/pdfbox/resources/glyphlist"));
resource.produce(new NativeImageResourceDirectoryBuildItem("org/apache/fontbox/cmap"));
resource.produce(new NativeImageResourceDirectoryBuildItem("org/apache/fontbox/unicode"));
}

@BuildStep
@Record(ExecutionTime.STATIC_INIT)
void initializeTikaParser(BeanContainerBuildItem beanContainer, TikaRecorder recorder,
Expand Down Expand Up @@ -211,7 +187,7 @@ public static String camelCase(String paramName) {
}

private static String capitalize(String paramName) {
if (paramName == null || paramName.length() == 0) {
if (paramName == null || paramName.isEmpty()) {
return paramName;
}
char[] chars = paramName.toCharArray();
Expand Down
2 changes: 1 addition & 1 deletion docs/modules/ROOT/pages/includes/attributes.adoc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
:quarkus-version: 3.2.0.Final
:quarkus-version: 3.2.9.Final
:quarkus-tika-version: 2.0.3

:quarkus-org-url: https://github.com/quarkusio
Expand Down
5 changes: 5 additions & 0 deletions docs/modules/ROOT/pages/includes/quarkus-tika.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ h|Default

a|icon:lock[title=Fixed at build time] [[quarkus-tika_quarkus.tika.tika-config-path]]`link:#quarkus-tika_quarkus.tika.tika-config-path[quarkus.tika.tika-config-path]`


[.description]
--
The resource path within the application artifact to the `tika-config.xml` file.
Expand All @@ -28,6 +29,7 @@ endif::add-copy-button-to-env-var[]

a|icon:lock[title=Fixed at build time] [[quarkus-tika_quarkus.tika.parsers]]`link:#quarkus-tika_quarkus.tika.parsers[quarkus.tika.parsers]`


[.description]
--
Comma separated list of the parsers which must be supported.
Expand Down Expand Up @@ -57,6 +59,7 @@ endif::add-copy-button-to-env-var[]

a|icon:lock[title=Fixed at build time] [[quarkus-tika_quarkus.tika.append-embedded-content]]`link:#quarkus-tika_quarkus.tika.append-embedded-content[quarkus.tika.append-embedded-content]`


[.description]
--
Controls how the content of the embedded documents is parsed. By default it is appended to the main document content. Setting this property to false makes the content of each of the embedded documents available separately.
Expand All @@ -73,6 +76,7 @@ endif::add-copy-button-to-env-var[]

a|icon:lock[title=Fixed at build time] [[quarkus-tika_quarkus.tika.parser-options-parser-options]]`link:#quarkus-tika_quarkus.tika.parser-options-parser-options[quarkus.tika.parser-options]`


[.description]
--
Configuration of the individual parsers. For example:
Expand All @@ -94,6 +98,7 @@ endif::add-copy-button-to-env-var[]

a|icon:lock[title=Fixed at build time] [[quarkus-tika_quarkus.tika.parser-parser]]`link:#quarkus-tika_quarkus.tika.parser-parser[quarkus.tika.parser]`


[.description]
--
Full parser class name for a given parser abbreviation. For example:
Expand Down
13 changes: 7 additions & 6 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@
<quarkus.version>3.2.9.Final</quarkus.version>

<quarkus.poi.version>2.0.4</quarkus.poi.version>
<tika.version>2.9.1</tika.version>
<quarkus.pdfbox.version>1.0.0.Alpha1</quarkus.pdfbox.version>
<tika.version>3.0.0-BETA</tika.version>
</properties>
<dependencyManagement>
<dependencies>
Expand All @@ -48,6 +49,11 @@
<artifactId>quarkus-poi</artifactId>
<version>${quarkus.poi.version}</version>
</dependency>
<dependency>
<groupId>io.quarkiverse.pdfbox</groupId>
<artifactId>quarkus-pdfbox</artifactId>
<version>${quarkus.pdfbox.version}</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-core</artifactId>
Expand Down Expand Up @@ -98,11 +104,6 @@
<artifactId>tika-parser-font-module</artifactId>
<version>${tika.version}</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parser-html-commons</artifactId>
<version>${tika.version}</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parser-image-module</artifactId>
Expand Down
10 changes: 5 additions & 5 deletions runtime/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@
<groupId>io.quarkiverse.poi</groupId>
<artifactId>quarkus-poi</artifactId>
</dependency>
<dependency>
<groupId>io.quarkiverse.pdfbox</groupId>
<artifactId>quarkus-pdfbox</artifactId>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-core</artifactId>
Expand Down Expand Up @@ -63,10 +67,6 @@
<groupId>org.apache.tika</groupId>
<artifactId>tika-parser-font-module</artifactId>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parser-html-commons</artifactId>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parser-image-module</artifactId>
Expand Down Expand Up @@ -154,4 +154,4 @@
</plugin>
</plugins>
</build>
</project>
</project>

This file was deleted.

0 comments on commit 6d290bd

Please sign in to comment.