Skip to content

Commit

Permalink
Improved file extension detection of output
Browse files Browse the repository at this point in the history
  • Loading branch information
ThomasJejkal committed Aug 24, 2023
1 parent 7d1d847 commit 78fae0b
Showing 1 changed file with 4 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ public static Path fixFileExtension(Path pathToFile) {
fin = new FileInputStream(pathToFile.toFile());
byte[] header = fin.readNBytes(FEW_KILO_BYTES_FOR_TIKA);
fin.close();
String newExtension = guessFileExtension(header);
String newExtension = guessFileExtension(pathToFile.getFileName().toString(), header);
if (newExtension != null) {
if (!pathToFile.toString().endsWith(newExtension)) {
renamedFile = Paths.get(pathToFile + newExtension);
Expand Down Expand Up @@ -243,10 +243,11 @@ public static String getExtensionForMimeType(String mimeType) {
/**
* Guess the extension of the file from the first bytes using Apache Tika
*
* @param filename The name of the file to support mime type detection.
* @param fewKilobytesOfFile First few kilobytes of the file.
* @return Estimated extension. e.g. '.xml'
*/
private static String guessFileExtension(byte[] fewKilobytesOfFile) {
private static String guessFileExtension(String filename, byte[] fewKilobytesOfFile) {
String returnValue = null;
String headerAsString = new String(fewKilobytesOfFile, 0, Math.min(fewKilobytesOfFile.length, MAX_LENGTH_OF_HEADER));
LOGGER.trace("Guess type for '{}'", headerAsString);
Expand All @@ -265,7 +266,7 @@ private static String guessFileExtension(byte[] fewKilobytesOfFile) {
LOGGER.trace("Use tika library to estimate extension.");
Tika tika = new Tika();
String mimeType;
mimeType = tika.detect(fewKilobytesOfFile);
mimeType = tika.detect(fewKilobytesOfFile, filename);
MimeTypes allTypes = MimeTypes.getDefaultMimeTypes();
MimeType estimatedMimeType;
try {
Expand Down

0 comments on commit 78fae0b

Please sign in to comment.