Merge pull request #66 from datopian/refactor-ast

[#65, refactor ast for tags/links]
datopian · Nov 22, 2023 · b26a7e7 · b26a7e7
2 parents 88662e9 + 9fc56e2
commit b26a7e7
Show file tree

Hide file tree

Showing 8 changed files with 200 additions and 230 deletions.
diff --git a/src/lib/process.ts b/src/lib/process.ts
@@ -2,8 +2,9 @@ import crypto from "crypto";
 import fs from "fs";
 import path from "path";
 
-import { parseFile, WikiLink } from "../utils/index.js";
+import { parseFile } from "../utils/index.js";
 import { File } from "./schema.js";
+import { WikiLink } from "../utils/parseFile.js";
 
 export interface FileInfo extends File {
   tags: string[];

diff --git a/src/utils/databaseUtils.ts b/src/utils/databaseUtils.ts
@@ -7,7 +7,7 @@ import {
   File,
 } from "../lib/schema.js";
 import path from "path";
-import { WikiLink } from "./extractWikiLinks.js";
+import { WikiLink } from "./parseFile.js";
 
 export async function resetDatabaseTables(db: Knex) {
   const tableNames = [MddbFile, MddbTag, MddbFileTag, MddbLink];

diff --git a/src/utils/extractTagsFromBody.spec.ts b/src/utils/extractTagsFromBody.spec.ts
@@ -1,114 +1,108 @@
-import { extractTagsFromBody } from "./extractTagsFromBody";
+import { extractTagsFromBody, processAST } from "./parseFile";
+
+const getTagsFromSource = (source: string) => {
+  const ast = processAST(source, {});
+  const tags = extractTagsFromBody(ast);
+  return tags;
+};
 
 describe("extractTagsFromBody", () => {
   test("should extract tags from body", () => {
-    const source = "#tag";
-    const tags = extractTagsFromBody(source);
+    const tags = getTagsFromSource("#tag");
     const expectedTags = ["tag"];
     expect(tags).toEqual(expectedTags);
   });
 
   test("should extract tags from heading", () => {
-    const source = "# heading #tag";
-    const tags = extractTagsFromBody(source);
+    const tags = getTagsFromSource("# heading #tag");
     const expectedTags = ["tag"];
     expect(tags).toEqual(expectedTags);
   });
 
   test("should extract 2 tags from heading", () => {
-    const source = "# heading #tag #tag2";
-    const tags = extractTagsFromBody(source);
+    const tags = getTagsFromSource("# heading #tag #tag2");
     const expectedTags = ["tag", "tag2"];
     expect(tags).toEqual(expectedTags);
   });
 
   test("should extract tags from body text", () => {
-    const source = "This is a #tag in the body text.";
-    const tags = extractTagsFromBody(source);
+    const tags = getTagsFromSource("This is a #tag in the body text.");
     const expectedTags = ["tag"];
     expect(tags).toEqual(expectedTags);
   });
 
   test("should extract 2 tags from body text", () => {
-    const source = "This is #tag1 and #tag2 in the body text.";
-    const tags = extractTagsFromBody(source);
+    const tags = getTagsFromSource("This is #tag1 and #tag2 in the body text.");
     const expectedTags = ["tag1", "tag2"];
     expect(tags).toEqual(expectedTags);
   });
 
   test("should extract tags from both heading and body text", () => {
-    const source = `# head #tag 
-    in heading and also in the #tag-body body text.`;
-    const tags = extractTagsFromBody(source);
+    const tags = getTagsFromSource(`# head #tag 
+    in heading and also in the #tag-body body text.`);
     const expectedTags = ["tag", "tag-body"];
     expect(tags).toEqual(expectedTags);
   });
 
   test("should extract tags with numbers", () => {
-    const source = "This is #tag123 with numbers.";
-    const tags = extractTagsFromBody(source);
+    const tags = getTagsFromSource("This is #tag123 with numbers.");
     const expectedTags = ["tag123"];
     expect(tags).toEqual(expectedTags);
   });
 
   test("should extract tags with special characters", () => {
-    const source =
-      "This is #special-tag #special_tag2 with special characters.";
-    const tags = extractTagsFromBody(source);
+    const tags = getTagsFromSource(
+      "This is #special-tag #special_tag2 with special characters."
+    );
     const expectedTags = ["special-tag", "special_tag2"];
     expect(tags).toEqual(expectedTags);
   });
 
   test("should extract tags with slash", () => {
-    const source = "This is #tag/with/slash.";
-    const tags = extractTagsFromBody(source);
+    const tags = getTagsFromSource("This is #tag/with/slash.");
     const expectedTags = ["tag/with/slash"];
     expect(tags).toEqual(expectedTags);
   });
 
   test("should extract tags with multiple tags in a line", () => {
-    const source = "#tag1 #tag2 #tag3";
-    const tags = extractTagsFromBody(source);
+    const tags = getTagsFromSource("#tag1 #tag2 #tag3");
     const expectedTags = ["tag1", "tag2", "tag3"];
     expect(tags).toEqual(expectedTags);
   });
 
   // for now we will pass the body content only not the whole source
   test("shouldn't extract frontmatter tags", () => {
-    const content = `
+    const tags = getTagsFromSource(`
     No tags in this content.
     #gr3
-    `;
-    const tags = extractTagsFromBody(content);
+    `);
     const expectedTags: string[] = ["gr3"];
     expect(tags).toEqual(expectedTags);
   });
 
   test("should extract tags from multiline text", () => {
-    const source = `This is a multiline text with #tag1 and #tag2.
+    const tags =
+      getTagsFromSource(`This is a multiline text with #tag1 and #tag2.
       Multiple tags on different lines: 
       #tag3
       #tag4
       And another tag: #tag5.
-    `;
-    const tags = extractTagsFromBody(source);
+    `);
     const expectedTags: string[] = ["tag1", "tag2", "tag3", "tag4", "tag5"];
     expect(tags).toEqual(expectedTags);
   });
 
   test("should handle multiple tags in the same line", () => {
-    const source = `#tag1 #tag2 #tag3
-      #tag4 #tag5`;
-    const tags = extractTagsFromBody(source);
+    const tags = getTagsFromSource(`#tag1 #tag2 #tag3
+      #tag4 #tag5`);
     const expectedTags: string[] = ["tag1", "tag2", "tag3", "tag4", "tag5"];
     expect(tags).toEqual(expectedTags);
   });
 
   test("should handle tags with numbers and slashes in multiline text", () => {
-    const source = `Tags with numbers: #tag123 and #tag456.
+    const tags = getTagsFromSource(`Tags with numbers: #tag123 and #tag456.
       Tags with slashes: #tag/one and #tag/two/three.
-    `;
-    const tags = extractTagsFromBody(source);
+    `);
     const expectedTags: string[] = [
       "tag123",
       "tag456",
@@ -119,10 +113,10 @@ describe("extractTagsFromBody", () => {
   });
 
   test("should handle tags with special characters in multiline text", () => {
-    const source = `Tags with special characters: #special-tag and #tag$percent.
+    const tags =
+      getTagsFromSource(`Tags with special characters: #special-tag and #tag$percent.
       Another tag: #tag_with_underscore.
-    `;
-    const tags = extractTagsFromBody(source);
+    `);
     const expectedTags: string[] = [
       "special-tag",
       "tag",
@@ -132,17 +126,15 @@ describe("extractTagsFromBody", () => {
   });
 
   test("should handle edge case with no tags in multiline text", () => {
-    const source = `No tags in this multiline content.
+    const tags = getTagsFromSource(`No tags in this multiline content.
       Another line without tags.
-    `;
-    const tags = extractTagsFromBody(source);
+    `);
     const expectedTags: string[] = [];
     expect(tags).toEqual(expectedTags);
   });
 
   test("should handle edge case with no tags", () => {
-    const source = "No tags in this content.";
-    const tags = extractTagsFromBody(source);
+    const tags = getTagsFromSource("No tags in this content.");
     const expectedTags: string[] = [];
     expect(tags).toEqual(expectedTags);
   });

diff --git a/src/utils/extractTagsFromBody.ts b/src/utils/extractTagsFromBody.ts
diff --git a/src/utils/extractWikiLinks.spec.ts b/src/utils/extractWikiLinks.spec.ts
@@ -1,4 +1,4 @@
-import { extractWikiLinks } from "./extractWikiLinks";
+import { extractWikiLinks, processAST } from "./parseFile";
 
 // TODO test for links with headings and aliases ?
 // TODO test pdf embeds
@@ -7,13 +7,17 @@ import { extractWikiLinks } from "./extractWikiLinks";
 // TODO test custom extractors
 // TODO test with other remark plugins e.g. original wiki links
 
-describe("extractWikiLinks", () => {
+const getLinksFromSource = (source: string, options?) => {
   const from = "abc/foobar.md";
+  const ast = processAST(source, options);
+  const links = extractWikiLinks(ast, { from: from, ...options });
+  return links;
+};
 
+describe("extractWikiLinks", () => {
   describe("Common Mark links", () => {
     test("should extract CommonMark links", () => {
-      const source = "[Page 1](page-1)";
-      const links = extractWikiLinks(from, source);
+      const links = getLinksFromSource("[Page 1](page-1)");
       const expectedLinks = [
         {
           from: "abc/foobar.md",
@@ -28,8 +32,7 @@ describe("extractWikiLinks", () => {
     });
 
     test("should extract CommonMark links with image extension", () => {
-      const source = "[hello](world.png)";
-      const links = extractWikiLinks(from, source);
+      const links = getLinksFromSource("[hello](world.png)");
       const expectedLinks = [
         {
           from: "abc/foobar.md",
@@ -44,8 +47,7 @@ describe("extractWikiLinks", () => {
     });
 
     test("should extract CommonMark links with non-image extension", () => {
-      const source = "[hello](world.mdx)";
-      const links = extractWikiLinks(from, source);
+      const links = getLinksFromSource("[hello](world.mdx)");
       const expectedLinks = [
         {
           from: "abc/foobar.md",
@@ -60,8 +62,7 @@ describe("extractWikiLinks", () => {
     });
 
     test("should extract CommonMark links with absolute path", () => {
-      const source = "[hello](/world)";
-      const links = extractWikiLinks(from, source);
+      const links = getLinksFromSource("[hello](/world)");
       const expectedLinks = [
         {
           from: "abc/foobar.md",
@@ -76,8 +77,7 @@ describe("extractWikiLinks", () => {
     });
 
     test("should extract CommonMark image links", () => {
-      const source = "![hello](world.png)";
-      const links = extractWikiLinks(from, source);
+      const links = getLinksFromSource("![hello](world.png)");
       const expectedLinks = [
         {
           from: "abc/foobar.md",
@@ -92,8 +92,7 @@ describe("extractWikiLinks", () => {
     });
 
     test("should extract CommonMark image links without alt text", () => {
-      const source = "![](world.png)";
-      const links = extractWikiLinks(from, source);
+      const links = getLinksFromSource("![](world.png)");
       const expectedLinks = [
         {
           from: "abc/foobar.md",
@@ -111,7 +110,7 @@ describe("extractWikiLinks", () => {
   // TODO Obsidian wiki links
   describe("Obsidian wiki links", () => {
     test("should extract wiki links", () => {
-      const source = "[[Page 1]] [[Page 2]] [[Page 3]]";
+      const links = getLinksFromSource("[[Page 1]] [[Page 2]] [[Page 3]]");
       const expectedLinks = [
         {
           embed: false,
@@ -138,15 +137,21 @@ describe("extractWikiLinks", () => {
           toRaw: "Page 3",
         },
       ];
-      const links = extractWikiLinks("abc/foobar.md", source);
       expect(links).toHaveLength(expectedLinks.length);
       links.forEach((link) => {
         expect(expectedLinks).toContainEqual(link);
       });
     });
 
     test("should extract wiki links with Obsidian-style shortest path", () => {
-      const source = "[[Page 1]] [[Page 2]] [[Page 3]]";
+      const permalinks = [
+        "/some/folder/Page 1",
+        "/some/folder/Page 2",
+        "/some/folder/Page 3",
+      ];
+      const links = getLinksFromSource("[[Page 1]] [[Page 2]] [[Page 3]]", {
+        permalinks,
+      });
       const expectedLinks = [
         {
           embed: false,
@@ -173,20 +178,14 @@ describe("extractWikiLinks", () => {
           toRaw: "/some/folder/Page 3",
         },
       ];
-      const permalinks = [
-        "/some/folder/Page 1",
-        "/some/folder/Page 2",
-        "/some/folder/Page 3",
-      ];
-      const links = extractWikiLinks("abc/foobar.md", source, { permalinks });
       expect(links).toHaveLength(expectedLinks.length);
       links.forEach((link) => {
         expect(expectedLinks).toContainEqual(link);
       });
     });
 
     test("should extract embedded wiki links", () => {
-      const source = "![[My File.png]]]]";
+      const links = getLinksFromSource("![[My File.png]]]]");
       const expectedLinks = [
         {
           from: "abc/foobar.md",
@@ -197,7 +196,6 @@ describe("extractWikiLinks", () => {
           internal: true,
         },
       ];
-      const links = extractWikiLinks("abc/foobar.md", source);
       expect(links).toEqual(expectedLinks);
     });
   });
@@ -218,8 +216,7 @@ describe("extractWikiLinks", () => {
   // });
 
   test("should extract external links", () => {
-    const source = "[External Link](https://example.com)";
-    const links = extractWikiLinks("abc/foobar.md", source);
+    const links = getLinksFromSource("[External Link](https://example.com)");
     const expectedLinks = [
       {
         from: "abc/foobar.md",
@@ -234,14 +231,12 @@ describe("extractWikiLinks", () => {
   });
 
   test("should return empty array if no links are found", () => {
-    const source = "No links here";
-    const links = extractWikiLinks(from, source);
+    const links = getLinksFromSource("No links here");
     expect(links).toHaveLength(0);
   });
 
   test("should return empty array if page is empty", () => {
-    const source = "";
-    const links = extractWikiLinks(from, source);
+    const links = getLinksFromSource("");
     expect(links).toHaveLength(0);
   });
 });