Skip to content

Commit

Permalink
Ignore non-dictionary nodes when parsing StructTree data (issue 18503)
Browse files Browse the repository at this point in the history
  • Loading branch information
Snuffleupagus committed Jul 28, 2024
1 parent d3384c0 commit c4cd405
Show file tree
Hide file tree
Showing 4 changed files with 192 additions and 0 deletions.
3 changes: 3 additions & 0 deletions src/core/struct_tree.js
Original file line number Diff line number Diff line change
Expand Up @@ -671,6 +671,9 @@ class StructTreePage {
warn("StructTree MAX_DEPTH reached.");
return null;
}
if (!(dict instanceof Dict)) {
return null;
}

if (map.has(dict)) {
return map.get(dict);
Expand Down
1 change: 1 addition & 0 deletions test/pdfs/issue18503.pdf.link
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
https://github.com/user-attachments/files/16402842/apl_23_003.pdf
8 changes: 8 additions & 0 deletions test/test_manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,14 @@
"link": true,
"type": "other"
},
{
"id": "issue18503",
"file": "pdfs/issue18503.pdf",
"md5": "b6c7c8db3505d07ce8eabe6712641a94",
"rounds": 1,
"link": true,
"type": "other"
},
{
"id": "filled-background-range",
"file": "pdfs/filled-background.pdf",
Expand Down
180 changes: 180 additions & 0 deletions test/unit/api_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -3755,6 +3755,186 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
await loadingTask.destroy();
});

it("gets corrupt structure tree with non-dictionary nodes (issue 18503)", async function () {
if (isNodeJS) {
pending("Linked test-cases are not supported in Node.js.");
}

const loadingTask = getDocument(buildGetDocumentParams("issue18503.pdf"));
const pdfDoc = await loadingTask.promise;
const pdfPage = await pdfDoc.getPage(1);
const tree = await pdfPage.getStructTree();

expect(tree).toEqual({
role: "Root",
children: [
{
role: "Document",
lang: "en-US",
children: [
{
role: "Sect",
children: [
{
role: "P",
children: [{ type: "content", id: "p406R_mc2" }],
},
{
role: "Figure",
children: [{ type: "content", id: "p406R_mc11" }],
alt: "d h c s logo",
},
{
role: "Figure",
children: [{ type: "content", id: "p406R_mc1" }],
alt: "Great Seal of the State of California",
},
{
role: "P",
children: [
{ type: "content", id: "p406R_mc3" },
{ type: "content", id: "p406R_mc5" },
{ type: "content", id: "p406R_mc7" },
],
},
{
role: "P",
children: [
{ type: "content", id: "p406R_mc4" },
{ type: "content", id: "p406R_mc6" },
],
},
{
role: "P",
children: [{ type: "content", id: "p406R_mc12" }],
},
{
role: "P",
children: [{ type: "content", id: "p406R_mc13" }],
},
{
role: "P",
children: [
{
role: "Span",
children: [
{ type: "content", id: "p406R_mc15" },
{
role: "Note",
children: [{ type: "content", id: "p406R_mc32" }],
},
],
},
{ type: "content", id: "p406R_mc14" },
{ type: "content", id: "p406R_mc16" },
],
},
{
role: "H1",
children: [{ type: "content", id: "p406R_mc17" }],
},
],
},
{
role: "Sect",
children: [
{
role: "H2",
children: [{ type: "content", id: "p406R_mc18" }],
},
{
role: "P",
children: [{ type: "content", id: "p406R_mc19" }],
},
],
},
{
role: "Sect",
children: [
{
role: "H2",
children: [{ type: "content", id: "p406R_mc20" }],
},
{
role: "P",
children: [
{ type: "content", id: "p406R_mc21" },
{
role: "Span",
children: [
{ type: "content", id: "p406R_mc23" },
{
role: "Note",
children: [
{ type: "content", id: "p406R_mc33" },
{
role: "Link",
children: [
{ type: "object", id: "432R" },
{ type: "content", id: "p406R_mc34" },
],
},
],
},
],
},
{ type: "content", id: "p406R_mc22" },
{ type: "content", id: "p406R_mc24" },
{ type: "content", id: "p406R_mc25" },
{ type: "content", id: "p406R_mc26" },
{
role: "Span",
children: [
{ type: "content", id: "p406R_mc28" },
{
role: "Note",
children: [
{ type: "content", id: "p406R_mc35" },
{
role: "Link",
children: [
{ type: "object", id: "433R" },
{ type: "content", id: "p406R_mc36" },
],
},
{ type: "content", id: "p406R_mc37" },
],
},
],
},
{ type: "content", id: "p406R_mc29" },
{ type: "content", id: "p406R_mc27" },
{ type: "content", id: "p406R_mc30" },
],
},
{
role: "P",
children: [{ type: "content", id: "p406R_mc31" }],
},
{
role: "P",
children: [
{ type: "content", id: "p406R_mc8" },
{ type: "content", id: "p406R_mc9" },
{
role: "Link",
children: [
{ type: "object", id: "434R" },
{ type: "content", id: "p406R_mc10" },
],
},
],
},
],
},
],
},
],
});

await loadingTask.destroy();
});

it("gets operator list", async function () {
const operatorList = await page.getOperatorList();

Expand Down

0 comments on commit c4cd405

Please sign in to comment.