Skip to content

Commit

Permalink
[@huggingface/jinja] Fix escaped characters (#416)
Browse files Browse the repository at this point in the history
Better parsing of escaped newlines, tabs, etc. Also adds a unit test for this.
  • Loading branch information
xenova committed Dec 15, 2023
1 parent 363e8aa commit 1afd5cf
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 6 deletions.
29 changes: 29 additions & 0 deletions packages/jinja/src/lexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,18 @@ const ORDERED_MAPPING_TABLE: [string, TokenType][] = [
["=", TOKEN_TYPES.Equals],
];

const ESCAPE_CHARACTERS = new Map([
["n", "\n"], // New line
["t", "\t"], // Horizontal tab
["r", "\r"], // Carriage return
["b", "\b"], // Backspace
["f", "\f"], // Form feed
["v", "\v"], // Vertical tab
["'", "'"], // Single quote
['"', '"'], // Double quote
["\\", "\\"], // Backslash
]);

/**
* Generate a list of tokens from a source string.
*/
Expand All @@ -135,6 +147,23 @@ export function tokenize(source: string): Token[] {
const consumeWhile = (predicate: (char: string) => boolean): string => {
let str = "";
while (predicate(src[cursorPosition])) {
// Check for escaped characters
if (src[cursorPosition] === "\\") {
// Consume the backslash
++cursorPosition;
// Check for end of input
if (cursorPosition >= src.length) throw new SyntaxError("Unexpected end of input");

// Add the escaped character
const escaped = src[cursorPosition++];
const unescaped = ESCAPE_CHARACTERS.get(escaped);
if (unescaped === undefined) {
throw new SyntaxError(`Unexpected escaped character: ${escaped}`);
}
str += unescaped;
continue;
}

str += src[cursorPosition++];
if (cursorPosition >= src.length) throw new SyntaxError("Unexpected end of input");
}
Expand Down
7 changes: 1 addition & 6 deletions packages/jinja/src/runtime.ts
Original file line number Diff line number Diff line change
Expand Up @@ -486,12 +486,7 @@ export class Interpreter {
case "NumericLiteral":
return new NumericValue(Number((statement as NumericLiteral).value));
case "StringLiteral":
return new StringValue(
(statement as StringLiteral).value
// Unescape special characters
.replaceAll("\\n", "\n")
.replaceAll("\\t", "\t")
);
return new StringValue((statement as StringLiteral).value);
case "BooleanLiteral":
return new BooleanValue((statement as BooleanLiteral).value);
case "Identifier":
Expand Down
33 changes: 33 additions & 0 deletions packages/jinja/test/templates.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ const TEST_STRINGS = {
MEMBERSHIP: `|{{ 0 in arr }}|{{ 1 in arr }}|{{ true in arr }}|{{ false in arr }}|{{ 'a' in arr }}|{{ 'b' in arr }}|`,
MEMBERSHIP_NEGATION_1: `|{{ not 0 in arr }}|{{ not 1 in arr }}|{{ not true in arr }}|{{ not false in arr }}|{{ not 'a' in arr }}|{{ not 'b' in arr }}|`,
MEMBERSHIP_NEGATION_2: `|{{ 0 not in arr }}|{{ 1 not in arr }}|{{ true not in arr }}|{{ false not in arr }}|{{ 'a' not in arr }}|{{ 'b' not in arr }}|`,

// Escaped characters
ESCAPED_CHARS: `{{ '\\n' }}{{ '\\t' }}{{ '\\'' }}{{ '\\"' }}{{ '\\\\' }}{{ '|\\n|\\t|\\'|\\"|\\\\|' }}`,
};

const TEST_PARSED = {
Expand Down Expand Up @@ -1019,6 +1022,28 @@ const TEST_PARSED = {
{ value: "}}", type: "CloseExpression" },
{ value: "|", type: "Text" },
],

// Escaped characters
ESCAPED_CHARS: [
{ value: "{{", type: "OpenExpression" },
{ value: "\n", type: "StringLiteral" },
{ value: "}}", type: "CloseExpression" },
{ value: "{{", type: "OpenExpression" },
{ value: "\t", type: "StringLiteral" },
{ value: "}}", type: "CloseExpression" },
{ value: "{{", type: "OpenExpression" },
{ value: "'", type: "StringLiteral" },
{ value: "}}", type: "CloseExpression" },
{ value: "{{", type: "OpenExpression" },
{ value: '"', type: "StringLiteral" },
{ value: "}}", type: "CloseExpression" },
{ value: "{{", type: "OpenExpression" },
{ value: "\\", type: "StringLiteral" },
{ value: "}}", type: "CloseExpression" },
{ value: "{{", type: "OpenExpression" },
{ value: `|\n|\t|'|"|\\|`, type: "StringLiteral" },
{ value: "}}", type: "CloseExpression" },
],
};

const TEST_CONTEXT = {
Expand Down Expand Up @@ -1115,6 +1140,9 @@ const TEST_CONTEXT = {
MEMBERSHIP_NEGATION_2: {
arr: [0, true, "a"],
},

// Escaped characters
ESCAPED_CHARS: {},
};

const EXPECTED_OUTPUTS = {
Expand Down Expand Up @@ -1173,6 +1201,11 @@ const EXPECTED_OUTPUTS = {
MEMBERSHIP: "|true|false|true|false|true|false|",
MEMBERSHIP_NEGATION_1: "|false|true|false|true|false|true|",
MEMBERSHIP_NEGATION_2: "|false|true|false|true|false|true|",

// Escaped characters
// NOTE: Since `trim_blocks` is enabled, we remove the first newline after the template tag,
// meaning the first newline in the output is not present
ESCAPED_CHARS: `\t'"\\|\n|\t|'|"|\\|`,
};

describe("Templates", () => {
Expand Down

0 comments on commit 1afd5cf

Please sign in to comment.