Skip to content

Commit

Permalink
Update CommandLineTool.java
Browse files Browse the repository at this point in the history
L and T for text lines.
Ensure image exits (also added tiff and jpeg)
  • Loading branch information
chris1010010 committed Apr 27, 2020
1 parent f485b4c commit 365a072
Showing 1 changed file with 19 additions and 9 deletions.
28 changes: 19 additions & 9 deletions src/org/primaresearch/pdf/CommandLineTool.java
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ else if ("-font".equals(args[i])) {
textSource = textSource.toLowerCase();
if (textSource.equals("r"))
textSourceType = RegionType.TextRegion;
else if (textSource.equals("l"))
else if (textSource.equals("l") || textSource.equals("t"))
textSourceType = LowLevelTextType.TextLine;
else if (textSource.equals("w"))
textSourceType = LowLevelTextType.Word;
Expand All @@ -104,7 +104,7 @@ else if (textSource.equals("g"))
if (outlines != null) {
outlines = outlines.toLowerCase();
addRegionOutlines = outlines.contains("r");
addTextLineOutlines = outlines.contains("l");
addTextLineOutlines = outlines.contains("l") || outlines.contains("t");
addWordOutlines = outlines.contains("w");
addGlyphOutlines = outlines.contains("g");
}
Expand Down Expand Up @@ -154,15 +154,15 @@ public boolean accept(File dir, String name) {
List<Page> pages = new ArrayList<Page>();
List<String> images = new ArrayList<String>();
for (File f : xmlFiles) {
pages.add(PageXmlInputOutput.readPage(f.getAbsolutePath()));
//Image
String path = imageSource + File.separator + f.getName();
if (new File(path.toLowerCase().replace(".xml", ".tif")).exists())
images.add(path.substring(0, path.length()-4) + ".tif");
else if (new File(path.toLowerCase().replace(".xml", ".png")).exists())
images.add(path.substring(0, path.length()-4) + ".png");
else if (new File(path.toLowerCase().replace(".xml", ".jpg")).exists())
images.add(path.substring(0, path.length()-4) + ".jpg");
String imageFilePath = findImage(path.substring(0, path.length()-3));
if (imageFilePath != null) {
pages.add(PageXmlInputOutput.readPage(f.getAbsolutePath()));
images.add(imageFilePath);
} else {
System.err.println("Image not found for: " + f.getAbsolutePath());
}
}

converter.convert(pages, images, pdfFilename);
Expand All @@ -178,6 +178,16 @@ else if (new File(path.toLowerCase().replace(".xml", ".jpg")).exists())
exc.printStackTrace();
}
}

private static String findImage(String baseFilePath) {
String[] extensions = new String[] {"tif", "tiff", "png", "jpg", "jpeg"};
for (String extension : extensions) {
String filePath = baseFilePath + extension;
if (new File(filePath).exists())
return filePath;
}
return null;
}

/**
* Prints usage help to stdout
Expand Down

0 comments on commit 365a072

Please sign in to comment.