From c4cd405a8fde5ece5c792d3e6dfb9448e02a32d4 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Sun, 28 Jul 2024 11:51:46 +0200 Subject: [PATCH] Ignore non-dictionary nodes when parsing StructTree data (issue 18503) --- src/core/struct_tree.js | 3 + test/pdfs/issue18503.pdf.link | 1 + test/test_manifest.json | 8 ++ test/unit/api_spec.js | 180 ++++++++++++++++++++++++++++++++++ 4 files changed, 192 insertions(+) create mode 100644 test/pdfs/issue18503.pdf.link diff --git a/src/core/struct_tree.js b/src/core/struct_tree.js index 8b1559c90fc10..49be8eabfb9ed 100644 --- a/src/core/struct_tree.js +++ b/src/core/struct_tree.js @@ -671,6 +671,9 @@ class StructTreePage { warn("StructTree MAX_DEPTH reached."); return null; } + if (!(dict instanceof Dict)) { + return null; + } if (map.has(dict)) { return map.get(dict); diff --git a/test/pdfs/issue18503.pdf.link b/test/pdfs/issue18503.pdf.link new file mode 100644 index 0000000000000..4a704c4ee6cae --- /dev/null +++ b/test/pdfs/issue18503.pdf.link @@ -0,0 +1 @@ +https://github.com/user-attachments/files/16402842/apl_23_003.pdf diff --git a/test/test_manifest.json b/test/test_manifest.json index 9270359ff886e..8845acb992311 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -15,6 +15,14 @@ "link": true, "type": "other" }, + { + "id": "issue18503", + "file": "pdfs/issue18503.pdf", + "md5": "b6c7c8db3505d07ce8eabe6712641a94", + "rounds": 1, + "link": true, + "type": "other" + }, { "id": "filled-background-range", "file": "pdfs/filled-background.pdf", diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index 6d6a2b0bdf5bf..aaa20a7ccbfb8 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -3755,6 +3755,186 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`) await loadingTask.destroy(); }); + it("gets corrupt structure tree with non-dictionary nodes (issue 18503)", async function () { + if (isNodeJS) { + pending("Linked test-cases are not supported in Node.js."); + } + + const loadingTask = getDocument(buildGetDocumentParams("issue18503.pdf")); + const pdfDoc = await loadingTask.promise; + const pdfPage = await pdfDoc.getPage(1); + const tree = await pdfPage.getStructTree(); + + expect(tree).toEqual({ + role: "Root", + children: [ + { + role: "Document", + lang: "en-US", + children: [ + { + role: "Sect", + children: [ + { + role: "P", + children: [{ type: "content", id: "p406R_mc2" }], + }, + { + role: "Figure", + children: [{ type: "content", id: "p406R_mc11" }], + alt: "d h c s logo", + }, + { + role: "Figure", + children: [{ type: "content", id: "p406R_mc1" }], + alt: "Great Seal of the State of California", + }, + { + role: "P", + children: [ + { type: "content", id: "p406R_mc3" }, + { type: "content", id: "p406R_mc5" }, + { type: "content", id: "p406R_mc7" }, + ], + }, + { + role: "P", + children: [ + { type: "content", id: "p406R_mc4" }, + { type: "content", id: "p406R_mc6" }, + ], + }, + { + role: "P", + children: [{ type: "content", id: "p406R_mc12" }], + }, + { + role: "P", + children: [{ type: "content", id: "p406R_mc13" }], + }, + { + role: "P", + children: [ + { + role: "Span", + children: [ + { type: "content", id: "p406R_mc15" }, + { + role: "Note", + children: [{ type: "content", id: "p406R_mc32" }], + }, + ], + }, + { type: "content", id: "p406R_mc14" }, + { type: "content", id: "p406R_mc16" }, + ], + }, + { + role: "H1", + children: [{ type: "content", id: "p406R_mc17" }], + }, + ], + }, + { + role: "Sect", + children: [ + { + role: "H2", + children: [{ type: "content", id: "p406R_mc18" }], + }, + { + role: "P", + children: [{ type: "content", id: "p406R_mc19" }], + }, + ], + }, + { + role: "Sect", + children: [ + { + role: "H2", + children: [{ type: "content", id: "p406R_mc20" }], + }, + { + role: "P", + children: [ + { type: "content", id: "p406R_mc21" }, + { + role: "Span", + children: [ + { type: "content", id: "p406R_mc23" }, + { + role: "Note", + children: [ + { type: "content", id: "p406R_mc33" }, + { + role: "Link", + children: [ + { type: "object", id: "432R" }, + { type: "content", id: "p406R_mc34" }, + ], + }, + ], + }, + ], + }, + { type: "content", id: "p406R_mc22" }, + { type: "content", id: "p406R_mc24" }, + { type: "content", id: "p406R_mc25" }, + { type: "content", id: "p406R_mc26" }, + { + role: "Span", + children: [ + { type: "content", id: "p406R_mc28" }, + { + role: "Note", + children: [ + { type: "content", id: "p406R_mc35" }, + { + role: "Link", + children: [ + { type: "object", id: "433R" }, + { type: "content", id: "p406R_mc36" }, + ], + }, + { type: "content", id: "p406R_mc37" }, + ], + }, + ], + }, + { type: "content", id: "p406R_mc29" }, + { type: "content", id: "p406R_mc27" }, + { type: "content", id: "p406R_mc30" }, + ], + }, + { + role: "P", + children: [{ type: "content", id: "p406R_mc31" }], + }, + { + role: "P", + children: [ + { type: "content", id: "p406R_mc8" }, + { type: "content", id: "p406R_mc9" }, + { + role: "Link", + children: [ + { type: "object", id: "434R" }, + { type: "content", id: "p406R_mc10" }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }); + + await loadingTask.destroy(); + }); + it("gets operator list", async function () { const operatorList = await page.getOperatorList();