Skip to content

Commit

Permalink
Merge pull request #66 from datopian/refactor-ast
Browse files Browse the repository at this point in the history
[#65, refactor ast for tags/links]
  • Loading branch information
rufuspollock authored Nov 22, 2023
2 parents 88662e9 + 9fc56e2 commit b26a7e7
Show file tree
Hide file tree
Showing 8 changed files with 200 additions and 230 deletions.
3 changes: 2 additions & 1 deletion src/lib/process.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@ import crypto from "crypto";
import fs from "fs";
import path from "path";

import { parseFile, WikiLink } from "../utils/index.js";
import { parseFile } from "../utils/index.js";
import { File } from "./schema.js";
import { WikiLink } from "../utils/parseFile.js";

export interface FileInfo extends File {
tags: string[];
Expand Down
2 changes: 1 addition & 1 deletion src/utils/databaseUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import {
File,
} from "../lib/schema.js";
import path from "path";
import { WikiLink } from "./extractWikiLinks.js";
import { WikiLink } from "./parseFile.js";

export async function resetDatabaseTables(db: Knex) {
const tableNames = [MddbFile, MddbTag, MddbFileTag, MddbLink];
Expand Down
78 changes: 35 additions & 43 deletions src/utils/extractTagsFromBody.spec.ts
Original file line number Diff line number Diff line change
@@ -1,114 +1,108 @@
import { extractTagsFromBody } from "./extractTagsFromBody";
import { extractTagsFromBody, processAST } from "./parseFile";

const getTagsFromSource = (source: string) => {
const ast = processAST(source, {});
const tags = extractTagsFromBody(ast);
return tags;
};

describe("extractTagsFromBody", () => {
test("should extract tags from body", () => {
const source = "#tag";
const tags = extractTagsFromBody(source);
const tags = getTagsFromSource("#tag");
const expectedTags = ["tag"];
expect(tags).toEqual(expectedTags);
});

test("should extract tags from heading", () => {
const source = "# heading #tag";
const tags = extractTagsFromBody(source);
const tags = getTagsFromSource("# heading #tag");
const expectedTags = ["tag"];
expect(tags).toEqual(expectedTags);
});

test("should extract 2 tags from heading", () => {
const source = "# heading #tag #tag2";
const tags = extractTagsFromBody(source);
const tags = getTagsFromSource("# heading #tag #tag2");
const expectedTags = ["tag", "tag2"];
expect(tags).toEqual(expectedTags);
});

test("should extract tags from body text", () => {
const source = "This is a #tag in the body text.";
const tags = extractTagsFromBody(source);
const tags = getTagsFromSource("This is a #tag in the body text.");
const expectedTags = ["tag"];
expect(tags).toEqual(expectedTags);
});

test("should extract 2 tags from body text", () => {
const source = "This is #tag1 and #tag2 in the body text.";
const tags = extractTagsFromBody(source);
const tags = getTagsFromSource("This is #tag1 and #tag2 in the body text.");
const expectedTags = ["tag1", "tag2"];
expect(tags).toEqual(expectedTags);
});

test("should extract tags from both heading and body text", () => {
const source = `# head #tag
in heading and also in the #tag-body body text.`;
const tags = extractTagsFromBody(source);
const tags = getTagsFromSource(`# head #tag
in heading and also in the #tag-body body text.`);
const expectedTags = ["tag", "tag-body"];
expect(tags).toEqual(expectedTags);
});

test("should extract tags with numbers", () => {
const source = "This is #tag123 with numbers.";
const tags = extractTagsFromBody(source);
const tags = getTagsFromSource("This is #tag123 with numbers.");
const expectedTags = ["tag123"];
expect(tags).toEqual(expectedTags);
});

test("should extract tags with special characters", () => {
const source =
"This is #special-tag #special_tag2 with special characters.";
const tags = extractTagsFromBody(source);
const tags = getTagsFromSource(
"This is #special-tag #special_tag2 with special characters."
);
const expectedTags = ["special-tag", "special_tag2"];
expect(tags).toEqual(expectedTags);
});

test("should extract tags with slash", () => {
const source = "This is #tag/with/slash.";
const tags = extractTagsFromBody(source);
const tags = getTagsFromSource("This is #tag/with/slash.");
const expectedTags = ["tag/with/slash"];
expect(tags).toEqual(expectedTags);
});

test("should extract tags with multiple tags in a line", () => {
const source = "#tag1 #tag2 #tag3";
const tags = extractTagsFromBody(source);
const tags = getTagsFromSource("#tag1 #tag2 #tag3");
const expectedTags = ["tag1", "tag2", "tag3"];
expect(tags).toEqual(expectedTags);
});

// for now we will pass the body content only not the whole source
test("shouldn't extract frontmatter tags", () => {
const content = `
const tags = getTagsFromSource(`
No tags in this content.
#gr3
`;
const tags = extractTagsFromBody(content);
`);
const expectedTags: string[] = ["gr3"];
expect(tags).toEqual(expectedTags);
});

test("should extract tags from multiline text", () => {
const source = `This is a multiline text with #tag1 and #tag2.
const tags =
getTagsFromSource(`This is a multiline text with #tag1 and #tag2.
Multiple tags on different lines:
#tag3
#tag4
And another tag: #tag5.
`;
const tags = extractTagsFromBody(source);
`);
const expectedTags: string[] = ["tag1", "tag2", "tag3", "tag4", "tag5"];
expect(tags).toEqual(expectedTags);
});

test("should handle multiple tags in the same line", () => {
const source = `#tag1 #tag2 #tag3
#tag4 #tag5`;
const tags = extractTagsFromBody(source);
const tags = getTagsFromSource(`#tag1 #tag2 #tag3
#tag4 #tag5`);
const expectedTags: string[] = ["tag1", "tag2", "tag3", "tag4", "tag5"];
expect(tags).toEqual(expectedTags);
});

test("should handle tags with numbers and slashes in multiline text", () => {
const source = `Tags with numbers: #tag123 and #tag456.
const tags = getTagsFromSource(`Tags with numbers: #tag123 and #tag456.
Tags with slashes: #tag/one and #tag/two/three.
`;
const tags = extractTagsFromBody(source);
`);
const expectedTags: string[] = [
"tag123",
"tag456",
Expand All @@ -119,10 +113,10 @@ describe("extractTagsFromBody", () => {
});

test("should handle tags with special characters in multiline text", () => {
const source = `Tags with special characters: #special-tag and #tag$percent.
const tags =
getTagsFromSource(`Tags with special characters: #special-tag and #tag$percent.
Another tag: #tag_with_underscore.
`;
const tags = extractTagsFromBody(source);
`);
const expectedTags: string[] = [
"special-tag",
"tag",
Expand All @@ -132,17 +126,15 @@ describe("extractTagsFromBody", () => {
});

test("should handle edge case with no tags in multiline text", () => {
const source = `No tags in this multiline content.
const tags = getTagsFromSource(`No tags in this multiline content.
Another line without tags.
`;
const tags = extractTagsFromBody(source);
`);
const expectedTags: string[] = [];
expect(tags).toEqual(expectedTags);
});

test("should handle edge case with no tags", () => {
const source = "No tags in this content.";
const tags = extractTagsFromBody(source);
const tags = getTagsFromSource("No tags in this content.");
const expectedTags: string[] = [];
expect(tags).toEqual(expectedTags);
});
Expand Down
29 changes: 0 additions & 29 deletions src/utils/extractTagsFromBody.ts

This file was deleted.

57 changes: 26 additions & 31 deletions src/utils/extractWikiLinks.spec.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { extractWikiLinks } from "./extractWikiLinks";
import { extractWikiLinks, processAST } from "./parseFile";

// TODO test for links with headings and aliases ?
// TODO test pdf embeds
Expand All @@ -7,13 +7,17 @@ import { extractWikiLinks } from "./extractWikiLinks";
// TODO test custom extractors
// TODO test with other remark plugins e.g. original wiki links

describe("extractWikiLinks", () => {
const getLinksFromSource = (source: string, options?) => {
const from = "abc/foobar.md";
const ast = processAST(source, options);
const links = extractWikiLinks(ast, { from: from, ...options });
return links;
};

describe("extractWikiLinks", () => {
describe("Common Mark links", () => {
test("should extract CommonMark links", () => {
const source = "[Page 1](page-1)";
const links = extractWikiLinks(from, source);
const links = getLinksFromSource("[Page 1](page-1)");
const expectedLinks = [
{
from: "abc/foobar.md",
Expand All @@ -28,8 +32,7 @@ describe("extractWikiLinks", () => {
});

test("should extract CommonMark links with image extension", () => {
const source = "[hello](world.png)";
const links = extractWikiLinks(from, source);
const links = getLinksFromSource("[hello](world.png)");
const expectedLinks = [
{
from: "abc/foobar.md",
Expand All @@ -44,8 +47,7 @@ describe("extractWikiLinks", () => {
});

test("should extract CommonMark links with non-image extension", () => {
const source = "[hello](world.mdx)";
const links = extractWikiLinks(from, source);
const links = getLinksFromSource("[hello](world.mdx)");
const expectedLinks = [
{
from: "abc/foobar.md",
Expand All @@ -60,8 +62,7 @@ describe("extractWikiLinks", () => {
});

test("should extract CommonMark links with absolute path", () => {
const source = "[hello](/world)";
const links = extractWikiLinks(from, source);
const links = getLinksFromSource("[hello](/world)");
const expectedLinks = [
{
from: "abc/foobar.md",
Expand All @@ -76,8 +77,7 @@ describe("extractWikiLinks", () => {
});

test("should extract CommonMark image links", () => {
const source = "![hello](world.png)";
const links = extractWikiLinks(from, source);
const links = getLinksFromSource("![hello](world.png)");
const expectedLinks = [
{
from: "abc/foobar.md",
Expand All @@ -92,8 +92,7 @@ describe("extractWikiLinks", () => {
});

test("should extract CommonMark image links without alt text", () => {
const source = "![](world.png)";
const links = extractWikiLinks(from, source);
const links = getLinksFromSource("![](world.png)");
const expectedLinks = [
{
from: "abc/foobar.md",
Expand All @@ -111,7 +110,7 @@ describe("extractWikiLinks", () => {
// TODO Obsidian wiki links
describe("Obsidian wiki links", () => {
test("should extract wiki links", () => {
const source = "[[Page 1]] [[Page 2]] [[Page 3]]";
const links = getLinksFromSource("[[Page 1]] [[Page 2]] [[Page 3]]");
const expectedLinks = [
{
embed: false,
Expand All @@ -138,15 +137,21 @@ describe("extractWikiLinks", () => {
toRaw: "Page 3",
},
];
const links = extractWikiLinks("abc/foobar.md", source);
expect(links).toHaveLength(expectedLinks.length);
links.forEach((link) => {
expect(expectedLinks).toContainEqual(link);
});
});

test("should extract wiki links with Obsidian-style shortest path", () => {
const source = "[[Page 1]] [[Page 2]] [[Page 3]]";
const permalinks = [
"/some/folder/Page 1",
"/some/folder/Page 2",
"/some/folder/Page 3",
];
const links = getLinksFromSource("[[Page 1]] [[Page 2]] [[Page 3]]", {
permalinks,
});
const expectedLinks = [
{
embed: false,
Expand All @@ -173,20 +178,14 @@ describe("extractWikiLinks", () => {
toRaw: "/some/folder/Page 3",
},
];
const permalinks = [
"/some/folder/Page 1",
"/some/folder/Page 2",
"/some/folder/Page 3",
];
const links = extractWikiLinks("abc/foobar.md", source, { permalinks });
expect(links).toHaveLength(expectedLinks.length);
links.forEach((link) => {
expect(expectedLinks).toContainEqual(link);
});
});

test("should extract embedded wiki links", () => {
const source = "![[My File.png]]]]";
const links = getLinksFromSource("![[My File.png]]]]");
const expectedLinks = [
{
from: "abc/foobar.md",
Expand All @@ -197,7 +196,6 @@ describe("extractWikiLinks", () => {
internal: true,
},
];
const links = extractWikiLinks("abc/foobar.md", source);
expect(links).toEqual(expectedLinks);
});
});
Expand All @@ -218,8 +216,7 @@ describe("extractWikiLinks", () => {
// });

test("should extract external links", () => {
const source = "[External Link](https://example.com)";
const links = extractWikiLinks("abc/foobar.md", source);
const links = getLinksFromSource("[External Link](https://example.com)");
const expectedLinks = [
{
from: "abc/foobar.md",
Expand All @@ -234,14 +231,12 @@ describe("extractWikiLinks", () => {
});

test("should return empty array if no links are found", () => {
const source = "No links here";
const links = extractWikiLinks(from, source);
const links = getLinksFromSource("No links here");
expect(links).toHaveLength(0);
});

test("should return empty array if page is empty", () => {
const source = "";
const links = extractWikiLinks(from, source);
const links = getLinksFromSource("");
expect(links).toHaveLength(0);
});
});
Loading

0 comments on commit b26a7e7

Please sign in to comment.