From 4931e318604049d0a085b2de6136f8311b1f4065 Mon Sep 17 00:00:00 2001 From: Daniel Kuznetsov Date: Fri, 20 Sep 2024 19:09:59 +0300 Subject: [PATCH] feat: liquid markdown front matter separately from the rest of the content --- src/transform/frontmatter/common.ts | 27 ++++++ src/transform/frontmatter/emplace.ts | 26 ++++++ src/transform/frontmatter/extract.ts | 89 +++++++++++++++++++ src/transform/frontmatter/index.ts | 4 + src/transform/frontmatter/transformValues.ts | 22 +++++ src/transform/index.ts | 6 +- src/transform/liquid/index.ts | 68 +++++++++++++- src/transform/utilsFS.ts | 4 +- test/liquid/cycles.test.ts | 22 +++-- test/liquid/substitutions.test.ts | 38 ++++---- .../liquidSourceMap.test.ts | 16 ++-- 11 files changed, 280 insertions(+), 42 deletions(-) create mode 100644 src/transform/frontmatter/common.ts create mode 100644 src/transform/frontmatter/emplace.ts create mode 100644 src/transform/frontmatter/extract.ts create mode 100644 src/transform/frontmatter/index.ts create mode 100644 src/transform/frontmatter/transformValues.ts diff --git a/src/transform/frontmatter/common.ts b/src/transform/frontmatter/common.ts new file mode 100644 index 00000000..8679cdb1 --- /dev/null +++ b/src/transform/frontmatter/common.ts @@ -0,0 +1,27 @@ +export type FrontMatter = { + [key: string]: unknown; + metadata?: Record[]; +}; + +export const frontMatterFence = '---'; + +/** + * Temporary workaround to enable parsing YAML metadata from potentially + * Liquid-aware source files + * @param content Input string which could contain Liquid-style substitution syntax (which clashes with YAML + * object syntax) + * @returns String with `{}` escaped, ready to be parsed with `js-yaml` + */ +export const escapeLiquidSubstitutionSyntax = (content: string): string => + content.replace(/{{/g, '(({{').replace(/}}/g, '}}))'); + +/** + * Inverse of a workaround defined above. + * @see `escapeLiquidSubstitutionSyntax` + * @param escapedContent Input string with `{}` escaped with backslashes + * @returns Unescaped string + */ +export const unescapeLiquidSubstitutionSyntax = (escapedContent: string): string => + escapedContent.replace(/\(\({{/g, '{{').replace(/}}\)\)/g, '}}'); + +export const countLineAmount = (str: string) => str.split(/\r?\n/).length; diff --git a/src/transform/frontmatter/emplace.ts b/src/transform/frontmatter/emplace.ts new file mode 100644 index 00000000..d42af7aa --- /dev/null +++ b/src/transform/frontmatter/emplace.ts @@ -0,0 +1,26 @@ +import {dump} from 'js-yaml'; + +import {FrontMatter, frontMatterFence, unescapeLiquidSubstitutionSyntax} from './common'; + +export const serializeFrontMatter = (frontMatter: FrontMatter) => { + const dumped = unescapeLiquidSubstitutionSyntax( + dump(frontMatter, {forceQuotes: true, lineWidth: -1}).trim(), + ); + + // This empty object check is a bit naive + // The other option would be to check if all own fields are `undefined`, + // since we exploit passing in `undefined` to remove a field quite a bit + if (dumped === '{}') { + return ''; + } + + return `${frontMatterFence}\n${dumped}\n${frontMatterFence}`; +}; + +export const emplaceSerializedFrontMatter = ( + frontMatterStrippedContent: string, + frontMatter: string, +) => `${frontMatter}${frontMatterStrippedContent}`; + +export const emplaceFrontMatter = (frontMatterStrippedContent: string, frontMatter: FrontMatter) => + emplaceSerializedFrontMatter(frontMatterStrippedContent, serializeFrontMatter(frontMatter)); diff --git a/src/transform/frontmatter/extract.ts b/src/transform/frontmatter/extract.ts new file mode 100644 index 00000000..5c672dcb --- /dev/null +++ b/src/transform/frontmatter/extract.ts @@ -0,0 +1,89 @@ +import {YAMLException, load} from 'js-yaml'; + +import {log} from '../log'; + +import { + FrontMatter, + countLineAmount, + escapeLiquidSubstitutionSyntax, + frontMatterFence, +} from './common'; + +type ParseExistingMetadataReturn = { + frontMatter: FrontMatter; + frontMatterStrippedContent: string; + frontMatterLineCount: number; +}; + +const matchMetadata = (fileContent: string) => { + if (!fileContent.startsWith(frontMatterFence)) { + return null; + } + + // Search by format: + // --- + // metaName1: metaValue1 + // metaName2: meta value2 + // incorrectMetadata + // --- + const regexpMetadata = '(?<=-{3}\\r?\\n)((.*\\r?\\n)*?)(?=-{3}\\r?\\n)'; + // Search by format: + // --- + // main content 123 + const regexpFileContent = '-{3}\\r?\\n((.*[\r?\n]*)*)'; + + const regexpParseFileContent = new RegExp(`${regexpMetadata}${regexpFileContent}`, 'gm'); + + return regexpParseFileContent.exec(fileContent); +}; + +const duplicateKeysCompatibleLoad = (yaml: string, filePath: string | undefined) => { + try { + return load(yaml); + } catch (e) { + if (e instanceof YAMLException) { + const duplicateKeysDeprecationWarning = ` + In ${filePath ?? '(unknown)'}: Encountered a YAML parsing exception when processing file metadata: ${e.reason}. + It's highly possible the input file contains duplicate mapping keys. + Will retry processing with necessary compatibility flags. + Please note that this behaviour is DEPRECATED and WILL be removed in a future version + without further notice, so the build WILL fail when supplied with YAML-incompatible meta. + ` + .replace(/^\s+/gm, '') + .replace(/\n/g, ' ') + .trim(); + + log.warn(duplicateKeysDeprecationWarning); + + return load(yaml, {json: true}); + } + + throw e; + } +}; + +export const separateAndExtractFrontMatter = ( + fileContent: string, + filePath?: string, +): ParseExistingMetadataReturn => { + const matches = matchMetadata(fileContent); + + if (matches && matches.length > 0) { + const [, metadata, , metadataStrippedContent] = matches; + + return { + frontMatter: duplicateKeysCompatibleLoad( + escapeLiquidSubstitutionSyntax(metadata), + filePath, + ) as FrontMatter, + frontMatterStrippedContent: metadataStrippedContent, + frontMatterLineCount: countLineAmount(metadata), + }; + } + + return { + frontMatter: {}, + frontMatterStrippedContent: fileContent, + frontMatterLineCount: 0, + }; +}; diff --git a/src/transform/frontmatter/index.ts b/src/transform/frontmatter/index.ts new file mode 100644 index 00000000..d2ea6608 --- /dev/null +++ b/src/transform/frontmatter/index.ts @@ -0,0 +1,4 @@ +export * from './extract'; +export * from './emplace'; +export * from './transformValues'; +export {countLineAmount} from './common'; diff --git a/src/transform/frontmatter/transformValues.ts b/src/transform/frontmatter/transformValues.ts new file mode 100644 index 00000000..59a4d2fe --- /dev/null +++ b/src/transform/frontmatter/transformValues.ts @@ -0,0 +1,22 @@ +import {FrontMatter} from './common'; + +export const transformFrontMatterValues = ( + frontMatter: FrontMatter, + valueMapper: (v: unknown) => unknown, +): FrontMatter => { + const transformInner = (something: unknown): unknown => { + if (typeof something === 'object' && something !== null) { + return Object.fromEntries( + Object.entries(something).map(([k, v]) => [k, transformInner(v)]), + ); + } + + if (Array.isArray(something)) { + return something.map((el) => transformInner(el)); + } + + return valueMapper(something); + }; + + return transformInner(frontMatter) as FrontMatter; +}; diff --git a/src/transform/index.ts b/src/transform/index.ts index f841915e..c46f3663 100644 --- a/src/transform/index.ts +++ b/src/transform/index.ts @@ -3,7 +3,7 @@ import type {EnvType, OptionsType, OutputType} from './typings'; import {bold} from 'chalk'; import {log} from './log'; -import liquid from './liquid'; +import liquidSnippet from './liquid'; import initMarkdownit from './md'; function applyLiquid(input: string, options: OptionsType) { @@ -15,7 +15,9 @@ function applyLiquid(input: string, options: OptionsType) { isLiquided = false, } = options; - return disableLiquid || isLiquided ? input : liquid(input, vars, path, {conditionsInCode}); + return disableLiquid || isLiquided + ? input + : liquidSnippet(input, vars, path, {conditionsInCode}); } function handleError(error: unknown, path?: string): never { diff --git a/src/transform/liquid/index.ts b/src/transform/liquid/index.ts index 7490fe67..7a8c4702 100644 --- a/src/transform/liquid/index.ts +++ b/src/transform/liquid/index.ts @@ -1,5 +1,12 @@ import type {Dictionary} from 'lodash'; +import { + countLineAmount, + separateAndExtractFrontMatter, + serializeFrontMatter, + transformFrontMatterValues, +} from '../frontmatter'; + import applySubstitutions from './substitutions'; import {prepareSourceMap} from './sourceMap'; import applyCycles from './cycles'; @@ -66,7 +73,7 @@ function repairCode(str: string, codes: string[]) { return replace(fence, fence, (code) => codes[Number(code)], str); } -function liquid< +function liquidSnippet< B extends boolean = false, C = B extends false ? string : {output: string; sourceMap: Dictionary}, >( @@ -141,6 +148,59 @@ function liquid< return output as unknown as C; } -// 'export default' instead of 'export = ' because of circular dependency with './cycles.ts'. -// somehow it breaks import in './cycles.ts' and imports nothing -export default liquid; +type TransformSourceMapOptions = { + emplacedResultOffset: number; + emplacedSourceOffset: number; +}; + +function transformSourceMap( + sourceMap: Dictionary, + {emplacedResultOffset, emplacedSourceOffset}: TransformSourceMapOptions, +) { + return Object.fromEntries( + Object.entries(sourceMap).map(([lineInResult, lineInSource]) => [ + (Number(lineInResult) + emplacedResultOffset).toString(), + (Number(lineInSource) + emplacedSourceOffset).toString(), + ]), + ); +} + +function liquidDocument< + B extends boolean = false, + C = B extends false ? string : {output: string; sourceMap: Dictionary}, +>( + originInput: string, + vars: Record, + path?: string, + settings?: ArgvSettings & {withSourceMap?: B}, +): C { + const {frontMatter, frontMatterStrippedContent, frontMatterLineCount} = + separateAndExtractFrontMatter(originInput, path); + + const transformedFrontMatter = transformFrontMatterValues(frontMatter, (v) => + typeof v === 'string' + ? liquidSnippet(v, vars, path, {...settings, withSourceMap: false}) + : v, + ); + const transformedAndSerialized = serializeFrontMatter(transformedFrontMatter); + + // -1 comes from the fact that the last line in serialized FM is the same as the first line in stripped content + const resultFrontMatterOffset = Math.max(0, countLineAmount(transformedAndSerialized) - 1); + const sourceFrontMatterOffset = Math.max(0, frontMatterLineCount - 1); + + const liquidProcessedContent = liquidSnippet(frontMatterStrippedContent, vars, path, settings); + + return typeof liquidProcessedContent === 'string' + ? (liquidProcessedContent as C) + : ({ + output: liquidProcessedContent.output, + sourceMap: transformSourceMap(liquidProcessedContent.sourceMap, { + emplacedResultOffset: resultFrontMatterOffset, + emplacedSourceOffset: sourceFrontMatterOffset, + }), + } as C); +} + +// both default and named exports for convenience +export {liquidDocument, liquidSnippet}; +export default liquidDocument; diff --git a/src/transform/utilsFS.ts b/src/transform/utilsFS.ts index 761040d8..f8accb8b 100644 --- a/src/transform/utilsFS.ts +++ b/src/transform/utilsFS.ts @@ -4,7 +4,7 @@ import {readFileSync, statSync} from 'fs'; import escapeRegExp from 'lodash/escapeRegExp'; import {join, parse, relative, resolve, sep} from 'path'; -import liquid from './liquid'; +import liquidSnippet from './liquid'; import {StateCore} from './typings'; import {defaultTransformLink} from './utils'; @@ -68,7 +68,7 @@ export function getFileTokens(path: string, state: StateCore, options: GetFileTo let sourceMap; if (!disableLiquid) { - const liquidResult = liquid(content, builtVars, path, { + const liquidResult = liquidSnippet(content, builtVars, path, { withSourceMap: true, conditionsInCode, }); diff --git a/test/liquid/cycles.test.ts b/test/liquid/cycles.test.ts index bddf5c76..f7263a00 100644 --- a/test/liquid/cycles.test.ts +++ b/test/liquid/cycles.test.ts @@ -1,6 +1,6 @@ import dedent from 'ts-dedent'; -import liquid from '../../src/transform/liquid'; +import liquidSnippet from '../../src/transform/liquid'; const commentsByPage = [ { @@ -28,13 +28,17 @@ describe('Cycles', () => { describe('location', () => { test('Inline for block', () => { expect( - liquid('Prefix {% for user in users %} {{user}} {% endfor %} Postfix', vars, ''), + liquidSnippet( + 'Prefix {% for user in users %} {{user}} {% endfor %} Postfix', + vars, + '', + ), ).toEqual('Prefix Alice Ivan Petr Postfix'); }); test('Nested inline for block', () => { expect( - liquid( + liquidSnippet( 'Prefix {% for user1 in users %} {% for user2 in users %} {{user1}}+{{user2}} {% endfor %} {% endfor %} Postfix', vars, '', @@ -46,7 +50,7 @@ describe('Cycles', () => { test('Multiline for block', () => { expect( - liquid( + liquidSnippet( dedent` Prefix {% for user in users %} @@ -111,12 +115,12 @@ describe('Cycles', () => { Postfix `; - expect(liquid(input, vars, '')).toEqual(result); + expect(liquidSnippet(input, vars, '')).toEqual(result); }); test('Multiline nested for block without indent', () => { expect( - liquid( + liquidSnippet( dedent` Prefix {% for user1 in users %} @@ -148,7 +152,7 @@ describe('Cycles', () => { describe('with conditions, filters, substitutions', () => { test('Test 1', () => { expect( - liquid( + liquidSnippet( 'Prefix {% for user in users2 %}{% if needCapitalize %} {{user | capitalize}}+{{user2}} {% else %} {{user}} {% endif %}{% endfor %} Postfix', vars, '', @@ -160,7 +164,7 @@ describe('Cycles', () => { describe('with code blocks', () => { test('code block before cycle block', () => { expect( - liquid( + liquidSnippet( '```\nCode block\n```\n\n {% for user in users %} {{user}} {% endfor %}', vars, '', @@ -170,7 +174,7 @@ describe('Cycles', () => { test('cycle block in code block', () => { expect( - liquid('```\n{% for user in users %} {{user}} {% endfor %}\n```', vars, '', { + liquidSnippet('```\n{% for user in users %} {{user}} {% endfor %}\n```', vars, '', { conditionsInCode: true, }), ).toEqual('```\nAlice Ivan Petr\n```'); diff --git a/test/liquid/substitutions.test.ts b/test/liquid/substitutions.test.ts index 57f3a97c..7f71f1c3 100644 --- a/test/liquid/substitutions.test.ts +++ b/test/liquid/substitutions.test.ts @@ -1,20 +1,22 @@ -import liquid from '../../src/transform/liquid'; +import liquidSnippet from '../../src/transform/liquid'; describe('Substitutions', () => { test('Should substitute to inline text', () => { - expect(liquid('Hello {{ user.name }}!', {user: {name: 'Alice'}})).toEqual('Hello Alice!'); + expect(liquidSnippet('Hello {{ user.name }}!', {user: {name: 'Alice'}})).toEqual( + 'Hello Alice!', + ); }); test('Should not substitute variables start with dot', () => { - expect(liquid('Hello {{ .name }}', {})).toEqual('Hello {{ .name }}'); + expect(liquidSnippet('Hello {{ .name }}', {})).toEqual('Hello {{ .name }}'); }); test('Should not substitute variables wrapped not_var', () => { - expect(liquid('Hello not_var{{ user.name }}!', {user: {name: 'Alice'}})).toEqual( + expect(liquidSnippet('Hello not_var{{ user.name }}!', {user: {name: 'Alice'}})).toEqual( 'Hello {{ user.name }}!', ); }); test('Keep not_var syntax', () => { expect( - liquid('Hello not_var{{ user.name }}!', {user: {name: 'Alice'}}, '', { + liquidSnippet('Hello not_var{{ user.name }}!', {user: {name: 'Alice'}}, '', { keepNotVar: true, }), ).toEqual('Hello not_var{{ user.name }}!'); @@ -22,18 +24,18 @@ describe('Substitutions', () => { test('Should return unchanged string if no variables present', () => { const input = 'This is just a string'; - expect(liquid(input, {})).toEqual(input); + expect(liquidSnippet(input, {})).toEqual(input); }); test('Should return unchanged string if variable not found in context', () => { const input = 'Variable {{ notFound }} not found'; - expect(liquid(input, {})).toEqual(input); + expect(liquidSnippet(input, {})).toEqual(input); }); test('Should substitute multiple occurrences of the same variable', () => { const input = 'Repeated {{ variable }} here and also here: {{ variable }}'; const context = {variable: 'value'}; - expect(liquid(input, context)).toEqual('Repeated value here and also here: value'); + expect(liquidSnippet(input, context)).toEqual('Repeated value here and also here: value'); }); describe('Should save type of variable, if possible', () => { @@ -46,39 +48,41 @@ describe('Substitutions', () => { const undefinedVar = undefined; test('Should substitute to string', () => { - expect(liquid('{{ string }}', {string})).toEqual(string); + expect(liquidSnippet('{{ string }}', {string})).toEqual(string); }); test('Should substitute to number', () => { - expect(liquid('{{ number }}', {number})).toEqual(number); + expect(liquidSnippet('{{ number }}', {number})).toEqual(number); }); test('Should substitute to boolean', () => { - expect(liquid('{{ boolean }}', {boolean})).toEqual(boolean); + expect(liquidSnippet('{{ boolean }}', {boolean})).toEqual(boolean); }); test('Should substitute to null', () => { - expect(liquid('{{ nullVar }}', {nullVar})).toEqual(nullVar); + expect(liquidSnippet('{{ nullVar }}', {nullVar})).toEqual(nullVar); }); test('Should substitute to array', () => { - expect(liquid('{{ array }}', {array})).toEqual(array); + expect(liquidSnippet('{{ array }}', {array})).toEqual(array); }); test('Should substitute to object', () => { - expect(liquid('{{ object }}', {object})).toEqual(object); + expect(liquidSnippet('{{ object }}', {object})).toEqual(object); }); test('Should not substitute undefined vars', () => { - expect(liquid('{{ undefinedVar }}', {undefinedVar})).toEqual('{{ undefinedVar }}'); + expect(liquidSnippet('{{ undefinedVar }}', {undefinedVar})).toEqual( + '{{ undefinedVar }}', + ); }); test('Should substitute to string if input contains more than one variable', () => { - expect(liquid('{{ number }} {{ boolean }}', {number, boolean})).toEqual( + expect(liquidSnippet('{{ number }} {{ boolean }}', {number, boolean})).toEqual( `${number} ${boolean}`, ); - expect(liquid('{{ number }} postfix', {number})).toEqual(`${number} postfix`); + expect(liquidSnippet('{{ number }} postfix', {number})).toEqual(`${number} postfix`); }); }); }); diff --git a/test/markdownlint-custom-rules/liquidSourceMap.test.ts b/test/markdownlint-custom-rules/liquidSourceMap.test.ts index 7f3732eb..2fe12de8 100644 --- a/test/markdownlint-custom-rules/liquidSourceMap.test.ts +++ b/test/markdownlint-custom-rules/liquidSourceMap.test.ts @@ -1,4 +1,4 @@ -import liquid from '../../src/transform/liquid'; +import liquidSnippet from '../../src/transform/liquid'; import {log} from '../../src/transform/log'; const testFn = 'test.md'; @@ -32,7 +32,7 @@ describe('Check source map after liquid', () => { /*7*/ '{% endif %}\n' + /*8*/ 'Postfix'; - const {sourceMap} = liquid(input, vars, testFn, {withSourceMap: true}); + const {sourceMap} = liquidSnippet(input, vars, testFn, {withSourceMap: true}); /* New line Source line 1 Prefix 1 Prefix @@ -55,7 +55,7 @@ describe('Check source map after liquid', () => { /*5*/ ' How are you?\n' + /*6*/ '{% endif %}'; - const {sourceMap} = liquid(input, vars, testFn, {withSourceMap: true}); + const {sourceMap} = liquidSnippet(input, vars, testFn, {withSourceMap: true}); expect(sourceMap).toEqual({'1': '2', '2': '5'}); }); @@ -74,7 +74,7 @@ describe('Check source map after liquid', () => { /*10*/ '{% endif %}\n' + /*11*/ 'Postfix'; - const {sourceMap} = liquid(input, vars, testFn, {withSourceMap: true}); + const {sourceMap} = liquidSnippet(input, vars, testFn, {withSourceMap: true}); expect(sourceMap).toEqual({'1': '1', '2': '3', '5': '9', '6': '11'}); }); @@ -89,7 +89,7 @@ describe('Check source map after liquid', () => { /*6*/ '{% endfor %}\n' + /*7*/ 'Postfix'; - const {sourceMap} = liquid(input, vars, testFn, {withSourceMap: true}); + const {sourceMap} = liquidSnippet(input, vars, testFn, {withSourceMap: true}); expect(sourceMap).toEqual({'1': '1', '4': '5', '11': '7'}); }); @@ -103,21 +103,21 @@ describe('Check source map after liquid', () => { /*5*/ '```\n' + /*6*/ 'Postfix'; - const {sourceMap} = liquid(input, vars, testFn, {withSourceMap: true}); + const {sourceMap} = liquidSnippet(input, vars, testFn, {withSourceMap: true}); expect(sourceMap).toEqual(getDefaultSourceMap(6)); }); it('Should works with fences: 1 line', () => { const input = 'Prefix\n```some code there\n```\nPostfix'; - const {sourceMap} = liquid(input, vars, testFn, {withSourceMap: true}); + const {sourceMap} = liquidSnippet(input, vars, testFn, {withSourceMap: true}); expect(sourceMap).toEqual(getDefaultSourceMap(5)); }); it('Should works with fences: inline', () => { const input = 'Prefix\n```some code there```\nPostfix'; - const {sourceMap} = liquid(input, vars, testFn, {withSourceMap: true}); + const {sourceMap} = liquidSnippet(input, vars, testFn, {withSourceMap: true}); expect(sourceMap).toEqual(getDefaultSourceMap(3)); });