Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support format 12 CMAP of TrueType font #3738

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added NotoSansJP-Regular.pdf
Binary file not shown.
39 changes: 35 additions & 4 deletions src/libs/ttffont.js
Original file line number Diff line number Diff line change
Expand Up @@ -584,11 +584,29 @@ var CmapEntry = (function() {
saveOffset = data.pos;
data.pos = this.offset;
this.format = data.readUInt16();
this.length = data.readUInt16();
this.language = data.readUInt16();
if (this.format === 12) data.readUInt16(); // skip reserved word
this.length = function() {
switch (this.format) {
case 0:
case 4:
return data.readUInt16();
case 12:
return data.readUInt32();
}
}.bind(this)();
this.language = function() {
switch (this.format) {
case 0:
case 4:
return data.readUInt16();
case 12:
return data.readUInt32();
}
}.bind(this)();
this.isUnicode =
(this.platformID === 3 && this.encodingID === 1 && this.format === 4) ||
(this.platformID === 0 && this.format === 4);
(this.platformID === 0 && this.format === 4) ||
(this.platformID === 0 && this.format === 12);
this.codeMap = {};
switch (this.format) {
case 0:
Expand Down Expand Up @@ -682,6 +700,18 @@ var CmapEntry = (function() {
this.codeMap[code] = glyphId & 0xffff;
}
}
break;
case 12:
var nGroups = data.readUInt32();
for (i = 0; i <= nGroups; i++) {
var startCharCode = data.readUInt32();
var endCharCode = data.readUInt32();
var startGlyphID = data.readUInt32();

for (var j = startCharCode; j <= endCharCode; j++) {
this.codeMap[j] = startGlyphID + j - startCharCode;
}
}
}
data.pos = saveOffset;
}
Expand Down Expand Up @@ -898,9 +928,10 @@ var CmapTable = (function(_super) {
i = 0 <= tableCount ? ++i : --i
) {
entry = new CmapEntry(data, this.offset);
if (Object.keys(entry.codeMap).length === 0) continue;
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When TTF has the unsupported version of format CMAP, the entry with an empty codeMap is created, so I skip it.

this.tables.push(entry);
if (entry.isUnicode) {
if (this.unicode == null) {
if (this.unicode == null || this.unicode.format < entry.format) {
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some codes refer this.tables[0], so when TTF has format 4 and 12, format 4 is referred.

To solve it, I sorted entries by format version
Father more, I think many people want to refer newer CMAP format version.

this.unicode = entry;
}
}
Expand Down
28 changes: 22 additions & 6 deletions src/modules/utf8.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@ import { toPDFName } from "../libs/pdfname.js";
var padz = ["", "0", "00", "000", "0000"];
var ar = [""];
for (var i = 0, l = text.length, t; i < l; ++i) {
t = font.metadata.characterToGlyph(text.charCodeAt(i));
var codePoint = text.codePointAt(i);
t = font.metadata.characterToGlyph(codePoint);
font.metadata.glyIdsUsed.push(t);
font.metadata.toUnicode[t] = text.charCodeAt(i);
font.metadata.toUnicode[t] = codePoint;
if (widths.indexOf(t) == -1) {
widths.push(t);
widths.push([parseInt(font.metadata.widthOfGlyph(t), 10)]);
Expand All @@ -33,6 +34,9 @@ import { toPDFName } from "../libs/pdfname.js";
t = t.toString(16);
ar.push(padz[4 - t.length], t);
}
if (codePoint > 0xffff) {
i++;
}
Comment on lines +37 to +39
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Skip later half of surrogate pair

}
return ar.join("");
});
Expand Down Expand Up @@ -63,8 +67,16 @@ import { toPDFName } from "../libs/pdfname.js";
map[code] !== null &&
typeof map[code].toString === "function"
) {
unicode = ("0000" + map[code].toString(16)).slice(-4);
code = ("0000" + (+code).toString(16)).slice(-4);
unicode = map[code];
if (unicode > 0xffff) {
unicode -= 0x10000;
unicode =
((unicode >> 10) + 0xd800).toString(16).padStart(4, "0") +
((unicode % 0x400) + 0xdc00).toString(16).padStart(4, "0");
} else {
unicode = unicode.toString(16).padStart(4, "0");
}
code = (+code).toString(16).padStart(4, "0");
Comment on lines +70 to +79
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an implementation for outputting surrogate pairs

I see PDF 1.3 specification because README says to follow PDF 1.3 specification.

It says that in 3.8.1

The remainder of the string consists of Unicode character codes, according to the UTF-16 encoding
specified in the Unicode standard, version 2.0.

So I output code as UTF-16 surrogate pairs encoding.

range.push("<" + code + "><" + unicode + ">");
}
}
Expand Down Expand Up @@ -271,8 +283,9 @@ import { toPDFName } from "../libs/pdfname.js";
}
for (s = 0; s < strText.length; s += 1) {
if (fonts[key].metadata.hasOwnProperty("cmap")) {
var codePoint = strText.codePointAt(s);
cmapConfirm =
fonts[key].metadata.cmap.unicode.codeMap[strText[s].charCodeAt(0)];
fonts[key].metadata.cmap.unicode.codeMap[strText.codePointAt(s)];
/*
if (Object.prototype.toString.call(text) === '[object Array]') {
var i = 0;
Expand All @@ -298,7 +311,10 @@ import { toPDFName } from "../libs/pdfname.js";
str += "";
}
} else {
str += strText[s];
str += String.fromCodePoint(codePoint);
}
if (codePoint > 0xffff) {
s++;
}
}
var result = "";
Expand Down
3 changes: 3 additions & 0 deletions src/polyfills.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ import "core-js/es/object/create";
import "core-js/es/object/keys";
import "core-js/es/object/values";
import "core-js/es/object/assign";
import "core-js/es/string/code-point-at";
import "core-js/es/string/from-code-point";
import "core-js/es/string/pad-start";
import "core-js/es/string/trim";
import "core-js/es/string/trim-left";
import "core-js/es/string/trim-right";
Expand Down
Binary file not shown.
Binary file added test/reference/ttf-format12.pdf
Binary file not shown.
34 changes: 34 additions & 0 deletions test/specs/ttfsupport.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,38 @@ describe("TTFSupport", () => {
doc.text("А ну чики брики и в дамки!", 10, 10);
comparePdf(doc.output(), "russian-1line.pdf", "unicode");
});

it("should display glyphs in OpenType Font version 12 for code points beyond 0xFFFF", () => {
const isNode = typeof global === "object" && global.isNode === true;

if (
!isNode &&
navigator.userAgent.indexOf("Trident") !== -1
) {
// The Media box test fails in IE with a slight numerical error.
// I suspect it's probably a problem with fonts and IE's calculation accuracy.
console.warn("Skipping IE this test");
return;
}

const doc = new jsPDF();

if (isNode) {
doc.addFont(
"./test/reference/fonts/NotoSansJP/NotoSansJP-Regular.ttf",
"NotoSansJP-Regular",
"normal"
);
} else {
doc.addFont(
"base/test/reference/fonts/NotoSansJP/NotoSansJP-Regular.ttf",
"NotoSansJP-Regular",
"normal"
);
}
doc.setFont("NotoSansJP-Regular"); // set font
doc.setFontSize(40);
doc.text("123abc吉𠮷高髙辺邉", 20, 30);
comparePdf(doc.output(), "ttf-format12.pdf", "unicode");
});
});