Skip to content

Commit

Permalink
Add PdfFont#appendDecodedCodesToGlyphsList methods to get success sta…
Browse files Browse the repository at this point in the history
…tus of decoding to glyph line

DEVSIX-5102
  • Loading branch information
yulian-gaponenko authored and iText-CI committed Mar 25, 2021
1 parent 79e8eb9 commit ded92fc
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 37 deletions.
24 changes: 21 additions & 3 deletions kernel/src/main/java/com/itextpdf/kernel/font/PdfFont.java
Original file line number Diff line number Diff line change
Expand Up @@ -175,12 +175,30 @@ public boolean containsGlyph(int unicode) {
public abstract String decode(PdfString content);

/**
* Decodes a given {@link PdfString} containing encoded string (e.g. from content stream) into a {@link GlyphLine}
* Decodes sequence of character codes (e.g. from content stream) into a {@link GlyphLine}
*
* @param characterCodes the string which is interpreted as a sequence of character codes. Note, that {@link
* PdfString} acts as a storage for char code values specific to given font, therefore
* individual character codes must not be interpreted as code units of the UTF-16 encoding
*
* @param content the encoded string
* @return the {@link GlyphLine} containing the glyphs encoded by the passed string
*/
public abstract GlyphLine decodeIntoGlyphLine(PdfString content);
public abstract GlyphLine decodeIntoGlyphLine(PdfString characterCodes);

/**
* Decodes sequence of character codes (e.g. from content stream) to sequence of glyphs
* and appends them to the passed list.
*
* @param list the list to the end of which decoded glyphs are to be added
* @param characterCodes the string which is interpreted as a sequence of character codes. Note, that {@link
* PdfString} acts as a storage for char code values specific to given font, therefore
* individual character codes must not be interpreted as code units of the UTF-16 encoding
*
* @return true if all codes where successfully decoded, false otherwise
*/
public boolean appendDecodedCodesToGlyphsList(List<Glyph> list, PdfString characterCodes) {
return false;
}

public abstract float getContentWidth(PdfString content);

Expand Down
54 changes: 44 additions & 10 deletions kernel/src/main/java/com/itextpdf/kernel/font/PdfSimpleFont.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ This file is part of the iText (R) project.
*/
package com.itextpdf.kernel.font;

import com.itextpdf.io.LogMessageConstant;
import com.itextpdf.io.font.FontEncoding;
import com.itextpdf.io.font.FontMetrics;
import com.itextpdf.io.font.FontNames;
Expand All @@ -53,6 +54,7 @@ This file is part of the iText (R) project.
import com.itextpdf.io.font.otf.Glyph;
import com.itextpdf.io.font.otf.GlyphLine;
import com.itextpdf.io.util.ArrayUtil;
import com.itextpdf.io.util.MessageFormatUtil;
import com.itextpdf.io.util.StreamUtil;
import com.itextpdf.io.util.TextUtil;
import com.itextpdf.kernel.pdf.PdfArray;
Expand All @@ -65,6 +67,8 @@ This file is part of the iText (R) project.
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public abstract class PdfSimpleFont<T extends FontProgram> extends PdfFont {

Expand Down Expand Up @@ -189,6 +193,15 @@ public FontEncoding getFontEncoding() {
return fontEncoding;
}

/**
* Get the mapping of character codes to unicode values based on /ToUnicode entry of font dictionary.
*
* @return the {@link CMapToUnicode} built based on /ToUnicode, or null if /ToUnicode is not available
*/
public CMapToUnicode getToUnicode() {
return toUnicode;
}

@Override
public byte[] convertToBytes(String text) {
byte[] bytes = fontEncoding.convertToBytes(text);
Expand Down Expand Up @@ -279,32 +292,53 @@ public String decode(PdfString content) {
*/
@Override
public GlyphLine decodeIntoGlyphLine(PdfString content) {
byte[] contentBytes = content.getValueBytes();
List<Glyph> glyphs = new ArrayList<>(contentBytes.length);
List<Glyph> glyphs = new ArrayList<>(content.getValue().length());
appendDecodedCodesToGlyphsList(glyphs, content);
return new GlyphLine(glyphs);
}

/**
* {@inheritDoc}
*/
@Override
public boolean appendDecodedCodesToGlyphsList(List<Glyph> list, PdfString characterCodes) {
boolean allCodesDecoded = true;

FontEncoding enc = getFontEncoding();
byte[] contentBytes = characterCodes.getValueBytes();
for (byte b : contentBytes) {
int code = b & 0xff;
Glyph glyph = null;
if (toUnicode != null && toUnicode.lookup(code) != null && (glyph = fontProgram.getGlyphByCode(code)) != null) {
if (!Arrays.equals(toUnicode.lookup(code), glyph.getChars())) {
CMapToUnicode toUnicodeCMap = getToUnicode();
if (toUnicodeCMap != null && toUnicodeCMap.lookup(code) != null
&& (glyph = getFontProgram().getGlyphByCode(code)) != null) {
if (!Arrays.equals(toUnicodeCMap.lookup(code), glyph.getChars())) {
// Copy the glyph because the original one may be reused (e.g. standard Helvetica font program)
glyph = new Glyph(glyph);
glyph.setChars(toUnicode.lookup(code));
glyph.setChars(toUnicodeCMap.lookup(code));
}
} else {
int uni = fontEncoding.getUnicode(code);
int uni = enc.getUnicode(code);
if (uni > -1) {
glyph = getGlyph(uni);
} else if (fontEncoding.getBaseEncoding() == null) {
glyph = fontProgram.getGlyphByCode(code);
} else if (enc.getBaseEncoding() == null) {
glyph = getFontProgram().getGlyphByCode(code);
}
}
if (glyph != null) {
glyphs.add(glyph);
list.add(glyph);
} else {
Logger logger = LoggerFactory.getLogger(this.getClass());
if (logger.isWarnEnabled()) {
logger.warn(MessageFormatUtil.format(LogMessageConstant.COULD_NOT_FIND_GLYPH_WITH_CODE, code));
}
allCodesDecoded = false;
}
}
return new GlyphLine(glyphs);
return allCodesDecoded;
}


@Override
public float getContentWidth(PdfString content) {
float width = 0;
Expand Down
65 changes: 43 additions & 22 deletions kernel/src/main/java/com/itextpdf/kernel/font/PdfType0Font.java
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,10 @@ public class PdfType0Font extends PdfFont {

private static final long serialVersionUID = -8033620300884193397L;

/**
* The code length shall not be greater than 4.
*/
private static final int MAX_CID_CODE_LENGTH = 4;
private static final byte[] rotbits = {(byte) 0x80, (byte) 0x40, (byte) 0x20, (byte) 0x10, (byte) 0x08, (byte) 0x04, (byte) 0x02, (byte) 0x01};

/**
Expand Down Expand Up @@ -523,49 +527,66 @@ public String decode(PdfString content) {
* {@inheritDoc}
*/
@Override
public GlyphLine decodeIntoGlyphLine(PdfString content) {
//A sequence of one or more bytes shall be extracted from the string and matched against the codespace
//ranges in the CMap. That is, the first byte shall be matched against 1-byte codespace ranges; if no match is
//found, a second byte shall be extracted, and the 2-byte code shall be matched against 2-byte codespace
//ranges. This process continues for successively longer codes until a match is found or all codespace ranges
//have been tested. There will be at most one match because codespace ranges shall not overlap.
String cids = content.getValue();
public GlyphLine decodeIntoGlyphLine(PdfString characterCodes) {
List<Glyph> glyphs = new ArrayList<>();
for (int i = 0; i < cids.length(); i++) {
//The code length shall not be greater than 4.
appendDecodedCodesToGlyphsList(glyphs, characterCodes);
return new GlyphLine(glyphs);
}

/**
* {@inheritDoc}
*/
@Override
public boolean appendDecodedCodesToGlyphsList(List<Glyph> list, PdfString characterCodes) {
boolean allCodesDecoded = true;

String charCodesSequence = characterCodes.getValue();
// A sequence of one or more bytes shall be extracted from the string and matched against the codespace
// ranges in the CMap. That is, the first byte shall be matched against 1-byte codespace ranges; if no match is
// found, a second byte shall be extracted, and the 2-byte code shall be matched against 2-byte codespace
// ranges. This process continues for successively longer codes until a match is found or all codespace ranges
// have been tested. There will be at most one match because codespace ranges shall not overlap.
for (int i = 0; i < charCodesSequence.length(); i++) {
int code = 0;
Glyph glyph = null;
int codeSpaceMatchedLength = 1;
for (int codeLength = 1; codeLength <= 4 && i + codeLength <= cids.length(); codeLength++) {
code = (code << 8) + cids.charAt(i + codeLength - 1);
if (!cmapEncoding.containsCodeInCodeSpaceRange(code, codeLength)) {
for (int codeLength = 1; codeLength <= MAX_CID_CODE_LENGTH && i + codeLength <= charCodesSequence.length();
codeLength++) {
code = (code << 8) + charCodesSequence.charAt(i + codeLength - 1);
if (!getCmap().containsCodeInCodeSpaceRange(code, codeLength)) {
continue;
} else {
codeSpaceMatchedLength = codeLength;
}
int glyphCode = cmapEncoding.getCidCode(code);
glyph = fontProgram.getGlyphByCode(glyphCode);
int glyphCode = getCmap().getCidCode(code);
glyph = getFontProgram().getGlyphByCode(glyphCode);
if (glyph != null) {
i += codeLength - 1;
break;
}
}
if (glyph == null) {
StringBuilder failedCodes = new StringBuilder();
for (int codeLength = 1; codeLength <= 4 && i + codeLength <= cids.length(); codeLength++) {
failedCodes.append((int) cids.charAt(i + codeLength - 1)).append(" ");
}
Logger logger = LoggerFactory.getLogger(PdfType0Font.class);
logger.warn(MessageFormatUtil.format(LogMessageConstant.COULD_NOT_FIND_GLYPH_WITH_CODE, failedCodes.toString()));
if (logger.isWarnEnabled()) {
StringBuilder failedCodes = new StringBuilder();
for (int codeLength = 1;
codeLength <= MAX_CID_CODE_LENGTH && i + codeLength <= charCodesSequence.length();
codeLength++) {
failedCodes.append((int) charCodesSequence.charAt(i + codeLength - 1)).append(" ");
}
logger.warn(MessageFormatUtil
.format(LogMessageConstant.COULD_NOT_FIND_GLYPH_WITH_CODE, failedCodes.toString()));
}
i += codeSpaceMatchedLength - 1;
}
if (glyph != null && glyph.getChars() != null) {
glyphs.add(glyph);
list.add(glyph);
} else {
glyphs.add(new Glyph(0, fontProgram.getGlyphByCode(0).getWidth(), -1));
list.add(new Glyph(0, getFontProgram().getGlyphByCode(0).getWidth(), -1));
allCodesDecoded = false;
}
}
return new GlyphLine(glyphs);
return allCodesDecoded;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -422,8 +422,8 @@ private void addGlyphsFromCharProcs(PdfDictionary charProcsDic, int[] widths) {
return;
}
Map<Integer, Integer> unicodeToCode = null;
if (toUnicode != null) {
try { unicodeToCode = toUnicode.createReverseMapping(); } catch (Exception ignored) {}
if (getToUnicode() != null) {
try { unicodeToCode = getToUnicode().createReverseMapping(); } catch (Exception ignored) {}
}

for (PdfName glyphName : charProcsDic.keySet()) {
Expand Down

0 comments on commit ded92fc

Please sign in to comment.