Skip to content

Commit

Permalink
perf: make parser faster
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-akait committed Jul 25, 2024
1 parent 6fa80d5 commit 1cf524f
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 30 deletions.
9 changes: 7 additions & 2 deletions src/plugins/sources-plugin.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ import {
webpackIgnoreCommentRegexp,
} from "../utils";

const DOUBLE_QUOTE = '"'.charCodeAt(0);
const SINGLE_QUOTE = "'".charCodeAt(0);

export default (options) =>
function process(html) {
const sources = [];
Expand Down Expand Up @@ -71,8 +74,10 @@ export default (options) =>
sourceCodeLocation.attrs[name].endOffset,
);
const isValueQuoted =
attributeAndValue[attributeAndValue.length - 1] === '"' ||
attributeAndValue[attributeAndValue.length - 1] === "'";
attributeAndValue.charCodeAt(attributeAndValue.length - 1) ===
DOUBLE_QUOTE ||
attributeAndValue.charCodeAt(attributeAndValue.length - 1) ===
SINGLE_QUOTE;
const valueStartOffset =
sourceCodeLocation.attrs[name].startOffset +
attributeAndValue.indexOf(attribute.value);
Expand Down
76 changes: 48 additions & 28 deletions src/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,24 @@ import path from "path";

import HtmlSourceError from "./HtmlSourceError";

const HORIZONTAL_TAB = "\u0009".charCodeAt(0);
const NEWLINE = "\u000A".charCodeAt(0);
const FORM_FEED = "\u000C".charCodeAt(0);
const CARRIAGE_RETURN = "\u000D".charCodeAt(0);
const SPACE = "\u0020".charCodeAt(0);

function isASCIIWhitespace(character) {
return (
// Horizontal tab
character === "\u0009" ||
character === HORIZONTAL_TAB ||
// New line
character === "\u000A" ||
character === NEWLINE ||
// Form feed
character === "\u000C" ||
character === FORM_FEED ||
// Carriage return
character === "\u000D" ||
character === CARRIAGE_RETURN ||
// Space
character === "\u0020"
character === SPACE
);
}

Expand All @@ -26,6 +32,12 @@ const regexLeadingCommasOrSpaces = /^[, \t\n\r\u000c]+/;
const regexLeadingNotSpaces = /^[^ \t\n\r\u000c]+/;
const regexTrailingCommas = /[,]+$/;
const regexNonNegativeInteger = /^\d+$/;
const COMMA = ",".charCodeAt(0);
const LEFT_PARENTHESIS = "(".charCodeAt(0);
const RIGHT_PARENTHESIS = ")".charCodeAt(0);
const SMALL_LETTER_W = "w".charCodeAt(0);
const SMALL_LETTER_X = "x".charCodeAt(0);
const SMALL_LETTER_H = "h".charCodeAt(0);

// ( Positive or negative or unsigned integers or decimals, without or without exponents.
// Must include at least one digit.
Expand Down Expand Up @@ -93,7 +105,7 @@ export function parseSrcset(input) {
// 8. If url ends with a U+002C COMMA character (,), follow these sub steps:
// (1). Remove all trailing U+002C COMMA characters from url. If this removed
// more than one character, that is a parse error.
if (url.slice(-1) === ",") {
if (url.charCodeAt(url.length - 1) === COMMA) {
url = url.replace(regexTrailingCommas, "");

// (Jump ahead to step 9 to skip tokenization and just push the candidate).
Expand Down Expand Up @@ -124,7 +136,7 @@ export function parseSrcset(input) {
// eslint-disable-next-line no-constant-condition
while (true) {
// 8.4. Let c be the character at position.
c = input.charAt(position);
c = input.charCodeAt(position);

// Do the following depending on the value of state.
// For the purpose of this step, "EOF" is a special character representing
Expand All @@ -149,7 +161,7 @@ export function parseSrcset(input) {
// Advance position to the next character in input. If current descriptor
// is not empty, append current descriptor to descriptors. Jump to the step
// labeled descriptor parser.
else if (c === ",") {
else if (c === COMMA) {
position += 1;

if (currentDescriptor) {
Expand All @@ -162,14 +174,14 @@ export function parseSrcset(input) {
}
// U+0028 LEFT PARENTHESIS (()
// Append c to current descriptor. Set state to in parens.
else if (c === "\u0028") {
currentDescriptor += c;
else if (c === LEFT_PARENTHESIS) {
currentDescriptor += input.charAt(position);
state = "in parens";
}
// EOF
// If current descriptor is not empty, append current descriptor to
// descriptors. Jump to the step labeled descriptor parser.
else if (c === "") {
else if (isNaN(c)) {
if (currentDescriptor) {
descriptors.push(currentDescriptor);
}
Expand All @@ -181,29 +193,29 @@ export function parseSrcset(input) {
// Anything else
// Append c to current descriptor.
} else {
currentDescriptor += c;
currentDescriptor += input.charAt(position);
}
}
// In parens
else if (state === "in parens") {
// U+0029 RIGHT PARENTHESIS ())
// Append c to current descriptor. Set state to in descriptor.
if (c === ")") {
currentDescriptor += c;
if (c === RIGHT_PARENTHESIS) {
currentDescriptor += input.charAt(position);
state = "in descriptor";
}
// EOF
// Append current descriptor to descriptors. Jump to the step labeled
// descriptor parser.
else if (c === "") {
else if (isNaN(c)) {
descriptors.push(currentDescriptor);
parseDescriptors();
return;
}
// Anything else
// Append c to current descriptor.
else {
currentDescriptor += c;
currentDescriptor += input.charAt(position);
}
}
// After descriptor
Expand All @@ -213,7 +225,7 @@ export function parseSrcset(input) {
// Space character: Stay in this state.
}
// EOF: Jump to the step labeled descriptor parser.
else if (c === "") {
else if (isNaN(c)) {
parseDescriptors();
return;
}
Expand Down Expand Up @@ -258,14 +270,14 @@ export function parseSrcset(input) {
for (i = 0; i < descriptors.length; i++) {
desc = descriptors[i];

lastChar = desc[desc.length - 1];
lastChar = desc[desc.length - 1].charCodeAt(0);
value = desc.substring(0, desc.length - 1);
intVal = parseInt(value, 10);
floatVal = parseFloat(value);

// If the descriptor consists of a valid non-negative integer followed by
// a U+0077 LATIN SMALL LETTER W character
if (regexNonNegativeInteger.test(value) && lastChar === "w") {
if (regexNonNegativeInteger.test(value) && lastChar === SMALL_LETTER_W) {
// If width and density are not both absent, then let error be yes.
if (w || d) {
pError = true;
Expand All @@ -282,7 +294,7 @@ export function parseSrcset(input) {
}
// If the descriptor consists of a valid floating-point number followed by
// a U+0078 LATIN SMALL LETTER X character
else if (regexFloatingPoint.test(value) && lastChar === "x") {
else if (regexFloatingPoint.test(value) && lastChar === SMALL_LETTER_X) {
// If width, density and future-compat-h are not all absent, then let error
// be yes.
if (w || d || h) {
Expand All @@ -300,7 +312,10 @@ export function parseSrcset(input) {
}
// If the descriptor consists of a valid non-negative integer followed by
// a U+0068 LATIN SMALL LETTER H character
else if (regexNonNegativeInteger.test(value) && lastChar === "h") {
else if (
regexNonNegativeInteger.test(value) &&
lastChar === SMALL_LETTER_H
) {
// If height and density are not both absent, then let error be yes.
if (h || d) {
pError = true;
Expand Down Expand Up @@ -354,14 +369,18 @@ export function parseSrc(input) {
}

let start = 0;
for (; start < input.length && isASCIIWhitespace(input[start]); start++);
for (
;
start < input.length && isASCIIWhitespace(input.charCodeAt(start));
start++
);

if (start === input.length) {
throw new Error("Must be non-empty");
}

let end = input.length - 1;
for (; end > -1 && isASCIIWhitespace(input[end]); end--);
for (; end > -1 && isASCIIWhitespace(input.charCodeAt(end)); end--);
end += 1;

let value = input;
Expand Down Expand Up @@ -430,12 +449,13 @@ export function isURLRequestable(url, options = {}) {

const WINDOWS_PATH_SEPARATOR_REGEXP = /\\/g;
const RELATIVE_PATH_REGEXP = /^\.\.?[/\\]/;
const SLASH = "/".charCodeAt(0);

const absoluteToRequest = (context, maybeAbsolutePath) => {
if (maybeAbsolutePath[0] === "/") {
if (maybeAbsolutePath.charCodeAt(0) === SLASH) {
if (
maybeAbsolutePath.length > 1 &&
maybeAbsolutePath[maybeAbsolutePath.length - 1] === "/"
maybeAbsolutePath.charCodeAt(maybeAbsolutePath.length - 1) === SLASH
) {
// this 'path' is actually a regexp generated by dynamic requires.
// Don't treat it as an absolute path.
Expand Down Expand Up @@ -505,7 +525,7 @@ export function requestify(context, request) {
.replace(/[\t\n\r]/g, "")
.replace(/\\/g, "/");

if (isWindowsAbsolutePath || newRequest[0] === "/") {
if (isWindowsAbsolutePath || newRequest.charCodeAt(0) === SLASH) {
return newRequest;
}

Expand Down Expand Up @@ -1240,7 +1260,7 @@ export function getImportCode(html, loaderContext, imports, options) {
return `// Imports\n${code}`;
}

const SLASH = "\\".charCodeAt(0);
const BACKSLASH = "\\".charCodeAt(0);
const BACKTICK = "`".charCodeAt(0);
const DOLLAR = "$".charCodeAt(0);

Expand All @@ -1251,7 +1271,7 @@ export function convertToTemplateLiteral(str) {
const code = str.charCodeAt(i);

escapedString +=
code === SLASH || code === BACKTICK || code === DOLLAR
code === BACKSLASH || code === BACKTICK || code === DOLLAR
? `\\${str[i]}`
: str[i];
}
Expand Down

0 comments on commit 1cf524f

Please sign in to comment.