Add PdfFont#appendDecodedCodesToGlyphsList methods to get success sta…

…tus of decoding to glyph line DEVSIX-5102
itext · Mar 25, 2021 · ded92fc · ded92fc
1 parent 79e8eb9
commit ded92fc
Show file tree

Hide file tree

Showing 4 changed files with 110 additions and 37 deletions.
diff --git a/kernel/src/main/java/com/itextpdf/kernel/font/PdfFont.java b/kernel/src/main/java/com/itextpdf/kernel/font/PdfFont.java
@@ -175,12 +175,30 @@ public boolean containsGlyph(int unicode) {
     public abstract String decode(PdfString content);
 
     /**
-     * Decodes a given {@link PdfString} containing encoded string (e.g. from content stream) into a {@link GlyphLine}
+     * Decodes sequence of character codes (e.g. from content stream) into a {@link GlyphLine}
+     *
+     * @param characterCodes the string which is interpreted as a sequence of character codes. Note, that {@link
+     *                       PdfString} acts as a storage for char code values specific to given font, therefore
+     *                       individual character codes must not be interpreted as code units of the UTF-16 encoding
      *
-     * @param content the encoded string
      * @return the {@link GlyphLine} containing the glyphs encoded by the passed string
      */
-    public abstract GlyphLine decodeIntoGlyphLine(PdfString content);
+    public abstract GlyphLine decodeIntoGlyphLine(PdfString characterCodes);
+
+    /**
+     * Decodes sequence of character codes (e.g. from content stream) to sequence of glyphs
+     * and appends them to the passed list.
+     *
+     * @param list           the list to the end of which decoded glyphs are to be added
+     * @param characterCodes the string which is interpreted as a sequence of character codes. Note, that {@link
+     *                       PdfString} acts as a storage for char code values specific to given font, therefore
+     *                       individual character codes must not be interpreted as code units of the UTF-16 encoding
+     *
+     * @return true if all codes where successfully decoded, false otherwise
+     */
+    public boolean appendDecodedCodesToGlyphsList(List<Glyph> list, PdfString characterCodes) {
+        return false;
+    }
 
     public abstract float getContentWidth(PdfString content);
 

diff --git a/kernel/src/main/java/com/itextpdf/kernel/font/PdfSimpleFont.java b/kernel/src/main/java/com/itextpdf/kernel/font/PdfSimpleFont.java
@@ -43,6 +43,7 @@ This file is part of the iText (R) project.
  */
 package com.itextpdf.kernel.font;
 
+import com.itextpdf.io.LogMessageConstant;
 import com.itextpdf.io.font.FontEncoding;
 import com.itextpdf.io.font.FontMetrics;
 import com.itextpdf.io.font.FontNames;
@@ -53,6 +54,7 @@ This file is part of the iText (R) project.
 import com.itextpdf.io.font.otf.Glyph;
 import com.itextpdf.io.font.otf.GlyphLine;
 import com.itextpdf.io.util.ArrayUtil;
+import com.itextpdf.io.util.MessageFormatUtil;
 import com.itextpdf.io.util.StreamUtil;
 import com.itextpdf.io.util.TextUtil;
 import com.itextpdf.kernel.pdf.PdfArray;
@@ -65,6 +67,8 @@ This file is part of the iText (R) project.
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public abstract class PdfSimpleFont<T extends FontProgram> extends PdfFont {
 
@@ -189,6 +193,15 @@ public FontEncoding getFontEncoding() {
         return fontEncoding;
     }
 
+    /**
+     * Get the mapping of character codes to unicode values based on /ToUnicode entry of font dictionary.
+     *
+     * @return the {@link CMapToUnicode} built based on /ToUnicode, or null if /ToUnicode is not available
+     */
+    public CMapToUnicode getToUnicode() {
+        return toUnicode;
+    }
+
     @Override
     public byte[] convertToBytes(String text) {
         byte[] bytes = fontEncoding.convertToBytes(text);
@@ -279,32 +292,53 @@ public String decode(PdfString content) {
      */
     @Override
     public GlyphLine decodeIntoGlyphLine(PdfString content) {
-        byte[] contentBytes = content.getValueBytes();
-        List<Glyph> glyphs = new ArrayList<>(contentBytes.length);
+        List<Glyph> glyphs = new ArrayList<>(content.getValue().length());
+        appendDecodedCodesToGlyphsList(glyphs, content);
+        return new GlyphLine(glyphs);
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public boolean appendDecodedCodesToGlyphsList(List<Glyph> list, PdfString characterCodes) {
+        boolean allCodesDecoded = true;
+
+        FontEncoding enc = getFontEncoding();
+        byte[] contentBytes = characterCodes.getValueBytes();
         for (byte b : contentBytes) {
             int code = b & 0xff;
             Glyph glyph = null;
-            if (toUnicode != null && toUnicode.lookup(code) != null && (glyph = fontProgram.getGlyphByCode(code)) != null) {
-                if (!Arrays.equals(toUnicode.lookup(code), glyph.getChars())) {
+            CMapToUnicode toUnicodeCMap = getToUnicode();
+            if (toUnicodeCMap != null && toUnicodeCMap.lookup(code) != null
+                    && (glyph = getFontProgram().getGlyphByCode(code)) != null) {
+                if (!Arrays.equals(toUnicodeCMap.lookup(code), glyph.getChars())) {
                     // Copy the glyph because the original one may be reused (e.g. standard Helvetica font program)
                     glyph = new Glyph(glyph);
-                    glyph.setChars(toUnicode.lookup(code));
+                    glyph.setChars(toUnicodeCMap.lookup(code));
                 }
             } else {
-                int uni = fontEncoding.getUnicode(code);
+                int uni = enc.getUnicode(code);
                 if (uni > -1) {
                     glyph = getGlyph(uni);
-                } else if (fontEncoding.getBaseEncoding() == null) {
-                    glyph = fontProgram.getGlyphByCode(code);
+                } else if (enc.getBaseEncoding() == null) {
+                    glyph = getFontProgram().getGlyphByCode(code);
                 }
             }
             if (glyph != null) {
-                glyphs.add(glyph);
+                list.add(glyph);
+            } else {
+                Logger logger = LoggerFactory.getLogger(this.getClass());
+                if (logger.isWarnEnabled()) {
+                    logger.warn(MessageFormatUtil.format(LogMessageConstant.COULD_NOT_FIND_GLYPH_WITH_CODE, code));
+                }
+                allCodesDecoded = false;
             }
         }
-        return new GlyphLine(glyphs);
+        return allCodesDecoded;
     }
 
+
     @Override
     public float getContentWidth(PdfString content) {
         float width = 0;

diff --git a/kernel/src/main/java/com/itextpdf/kernel/font/PdfType0Font.java b/kernel/src/main/java/com/itextpdf/kernel/font/PdfType0Font.java
@@ -87,6 +87,10 @@ public class PdfType0Font extends PdfFont {
 
     private static final long serialVersionUID = -8033620300884193397L;
 
+    /**
+     * The code length shall not be greater than 4.
+     */
+    private static final int MAX_CID_CODE_LENGTH = 4;
     private static final byte[] rotbits = {(byte) 0x80, (byte) 0x40, (byte) 0x20, (byte) 0x10, (byte) 0x08, (byte) 0x04, (byte) 0x02, (byte) 0x01};
 
     /**
@@ -523,49 +527,66 @@ public String decode(PdfString content) {
      * {@inheritDoc}
      */
     @Override
-    public GlyphLine decodeIntoGlyphLine(PdfString content) {
-        //A sequence of one or more bytes shall be extracted from the string and matched against the codespace
-        //ranges in the CMap. That is, the first byte shall be matched against 1-byte codespace ranges; if no match is
-        //found, a second byte shall be extracted, and the 2-byte code shall be matched against 2-byte codespace
-        //ranges. This process continues for successively longer codes until a match is found or all codespace ranges
-        //have been tested. There will be at most one match because codespace ranges shall not overlap.
-        String cids = content.getValue();
+    public GlyphLine decodeIntoGlyphLine(PdfString characterCodes) {
         List<Glyph> glyphs = new ArrayList<>();
-        for (int i = 0; i < cids.length(); i++) {
-            //The code length shall not be greater than 4.
+        appendDecodedCodesToGlyphsList(glyphs, characterCodes);
+        return new GlyphLine(glyphs);
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public boolean appendDecodedCodesToGlyphsList(List<Glyph> list, PdfString characterCodes) {
+        boolean allCodesDecoded = true;
+
+        String charCodesSequence = characterCodes.getValue();
+        // A sequence of one or more bytes shall be extracted from the string and matched against the codespace
+        // ranges in the CMap. That is, the first byte shall be matched against 1-byte codespace ranges; if no match is
+        // found, a second byte shall be extracted, and the 2-byte code shall be matched against 2-byte codespace
+        // ranges. This process continues for successively longer codes until a match is found or all codespace ranges
+        // have been tested. There will be at most one match because codespace ranges shall not overlap.
+        for (int i = 0; i < charCodesSequence.length(); i++) {
             int code = 0;
             Glyph glyph = null;
             int codeSpaceMatchedLength = 1;
-            for (int codeLength = 1; codeLength <= 4 && i + codeLength <= cids.length(); codeLength++) {
-                code = (code << 8) + cids.charAt(i + codeLength - 1);
-                if (!cmapEncoding.containsCodeInCodeSpaceRange(code, codeLength)) {
+            for (int codeLength = 1; codeLength <= MAX_CID_CODE_LENGTH && i + codeLength <= charCodesSequence.length();
+                    codeLength++) {
+                code = (code << 8) + charCodesSequence.charAt(i + codeLength - 1);
+                if (!getCmap().containsCodeInCodeSpaceRange(code, codeLength)) {
                     continue;
                 } else {
                     codeSpaceMatchedLength = codeLength;
                 }
-                int glyphCode = cmapEncoding.getCidCode(code);
-                glyph = fontProgram.getGlyphByCode(glyphCode);
+                int glyphCode = getCmap().getCidCode(code);
+                glyph = getFontProgram().getGlyphByCode(glyphCode);
                 if (glyph != null) {
                     i += codeLength - 1;
                     break;
                 }
             }
             if (glyph == null) {
-                StringBuilder failedCodes = new StringBuilder();
-                for (int codeLength = 1; codeLength <= 4 && i + codeLength <= cids.length(); codeLength++) {
-                    failedCodes.append((int) cids.charAt(i + codeLength - 1)).append(" ");
-                }
                 Logger logger = LoggerFactory.getLogger(PdfType0Font.class);
-                logger.warn(MessageFormatUtil.format(LogMessageConstant.COULD_NOT_FIND_GLYPH_WITH_CODE, failedCodes.toString()));
+                if (logger.isWarnEnabled()) {
+                    StringBuilder failedCodes = new StringBuilder();
+                    for (int codeLength = 1;
+                            codeLength <= MAX_CID_CODE_LENGTH && i + codeLength <= charCodesSequence.length();
+                            codeLength++) {
+                        failedCodes.append((int) charCodesSequence.charAt(i + codeLength - 1)).append(" ");
+                    }
+                    logger.warn(MessageFormatUtil
+                            .format(LogMessageConstant.COULD_NOT_FIND_GLYPH_WITH_CODE, failedCodes.toString()));
+                }
                 i += codeSpaceMatchedLength - 1;
             }
             if (glyph != null && glyph.getChars() != null) {
-                glyphs.add(glyph);
+                list.add(glyph);
             } else {
-                glyphs.add(new Glyph(0, fontProgram.getGlyphByCode(0).getWidth(), -1));
+                list.add(new Glyph(0, getFontProgram().getGlyphByCode(0).getWidth(), -1));
+                allCodesDecoded = false;
             }
         }
-        return new GlyphLine(glyphs);
+        return allCodesDecoded;
     }
 
     @Override

diff --git a/kernel/src/main/java/com/itextpdf/kernel/font/PdfType3Font.java b/kernel/src/main/java/com/itextpdf/kernel/font/PdfType3Font.java
@@ -422,8 +422,8 @@ private void addGlyphsFromCharProcs(PdfDictionary charProcsDic, int[] widths) {
             return;
         }
         Map<Integer, Integer> unicodeToCode = null;
-        if (toUnicode != null) {
-            try { unicodeToCode = toUnicode.createReverseMapping(); } catch (Exception ignored) {}
+        if (getToUnicode() != null) {
+            try { unicodeToCode = getToUnicode().createReverseMapping(); } catch (Exception ignored) {}
         }
 
         for (PdfName glyphName : charProcsDic.keySet()) {