Skip to content

Commit

Permalink
Rename unicode helpers (#250)
Browse files Browse the repository at this point in the history
* Rename unicode helpers to make them less confusing

* fix tests
  • Loading branch information
cd1m0 authored Jan 18, 2024
1 parent 6b3cb72 commit a85a990
Show file tree
Hide file tree
Showing 13 changed files with 94 additions and 41 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { strByteLen, toUTF8 } from "../../misc";
import { bytesToString, strUTF8Len } from "../../misc";
import { ASTNode } from "../ast_node";
import { ASTContext, ASTNodePostprocessor, FileMap } from "../ast_reader";
import { RawComment, parseComments } from "../comments";
Expand Down Expand Up @@ -31,7 +31,7 @@ export class StructuredDocumentationReconstructor {
source: Uint8Array
): StructuredDocumentation | undefined {
const [from, to, sourceIndex] = coords;
const fragment = toUTF8(source.slice(from, to));
const fragment = bytesToString(source.slice(from, to));

const parsedCommentsSoup = parseComments(fragment);

Expand Down Expand Up @@ -66,9 +66,9 @@ export class StructuredDocumentationReconstructor {
return undefined;
}

const byteOffsetFromFragment = strByteLen(fragment.slice(0, lastComment.loc.start));
const byteOffsetFromFragment = strUTF8Len(fragment.slice(0, lastComment.loc.start));
const offset = from + byteOffsetFromFragment;
const length = strByteLen(lastComment.text);
const length = strUTF8Len(lastComment.text);
const src = `${offset}:${length}:${sourceIndex}`;

return new StructuredDocumentation(0, src, lastComment.internalText.trim());
Expand Down
4 changes: 2 additions & 2 deletions src/ast/writing/writer.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { strByteLen } from "../../misc";
import { strUTF8Len } from "../../misc";
import { ASTNode, ASTNodeConstructor } from "../ast_node";
import { YulNode } from "../implementation/statement/inline_assembly";
import { SourceFormatter } from "./formatter";
Expand Down Expand Up @@ -117,7 +117,7 @@ export class ASTWriter {
for (const element of current) {
if (typeof element === "string") {
source += element;
size += strByteLen(element);
size += strUTF8Len(element);
} else {
const [node, nodeDesc] = element;
const start = size;
Expand Down
4 changes: 2 additions & 2 deletions src/bin/compile.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import {
ASTNodeFormatter,
ASTReader,
ASTWriter,
bytesToString,
CACHE_DIR,
CompilationOutput,
CompileFailedError,
Expand All @@ -32,7 +33,6 @@ import {
PrettyFormatter,
SourceUnit,
StateVariableVisibility,
toUTF8,
VariableDeclaration,
XPath
} from "..";
Expand Down Expand Up @@ -326,7 +326,7 @@ function error(message: string): never {
data.sources[key] = {};
}

data.sources[key].source = toUTF8(value);
data.sources[key].source = bytesToString(value);
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/compile/compiler_selection.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { toUTF8 } from "../misc";
import { bytesToString } from "../misc";
import { CompilerSeries, CompilerVersions } from "./constants";
import { extractSpecifiersFromSource, getCompilerVersionsBySpecifiers } from "./version";

Expand Down Expand Up @@ -80,7 +80,7 @@ export class VersionDetectionStrategy implements CompilerVersionSelectionStrateg
fallback: CompilerVersionSelectionStrategy,
descending = true
) {
this.sources = sources.map(toUTF8);
this.sources = sources.map(bytesToString);
this.fallback = fallback;
this.descending = descending;
}
Expand Down
4 changes: 2 additions & 2 deletions src/compile/inference/imports.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import fse from "fs-extra";
import { dirname, normalize } from "path";
import { CompileInferenceError, ImportResolver, Remapping } from "..";
import { FileMap, assert, toUTF8 } from "../..";
import { FileMap, assert, bytesToString } from "../..";
import {
AnyFileLevelNode,
FileLevelNodeKind,
Expand Down Expand Up @@ -161,7 +161,7 @@ export async function findAllFiles(
let flds: AnyFileLevelNode[];

try {
flds = parseFileLevelDefinitions(toUTF8(content));
flds = parseFileLevelDefinitions(bytesToString(content));
} catch (e: any) {
if (e instanceof PeggySyntaxError) {
const start = e.location.start.offset;
Expand Down
4 changes: 2 additions & 2 deletions src/compile/input.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { FileMap } from "../ast";
import { toUTF8 } from "../misc";
import { bytesToString } from "../misc";
import { CompilationOutput } from "./constants";

export interface PartialSolcInput {
Expand Down Expand Up @@ -81,7 +81,7 @@ export function createCompilerInput(
partialInp.sources = {};

for (const [fileName, content] of files.entries()) {
partialInp.sources[fileName] = { content: toUTF8(content) };
partialInp.sources[fileName] = { content: bytesToString(content) };
}

const input = partialInp as SolcInput;
Expand Down
8 changes: 4 additions & 4 deletions src/compile/utils.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import fse from "fs-extra";
import path from "path";
import { FileSystemResolver, getCompilerForVersion, LocalNpmResolver } from ".";
import { assert, fromUTF8 } from "../misc";
import { assert, stringToBytes } from "../misc";
import {
CompilerVersionSelectionStrategy,
LatestVersionInEachSeriesStrategy,
Expand Down Expand Up @@ -114,7 +114,7 @@ export function parsePathRemapping(remapping: string[]): Remapping[] {
function fillFilesFromSources(files: FileMap, sources: { [fileName: string]: any }): void {
for (const [fileName, section] of Object.entries(sources)) {
if (section && typeof section.source === "string") {
files.set(fileName, fromUTF8(section.source));
files.set(fileName, stringToBytes(section.source));
}
}
}
Expand Down Expand Up @@ -209,7 +209,7 @@ export async function compileSourceString(
const resolvers = [fsResolver, npmResolver];

const parsedRemapping = parsePathRemapping(remapping);
const files = new Map([[fileName, fromUTF8(sourceCode)]]);
const files = new Map([[fileName, stringToBytes(sourceCode)]]);
const resolvedFileNames = new Map([[fileName, fileName]]);

await findAllFiles(files, resolvedFileNames, parsedRemapping, resolvers);
Expand Down Expand Up @@ -386,7 +386,7 @@ export async function compileJsonData(

if (consistentlyContainsOneOf(sources, "source")) {
for (const [fileName, fileData] of Object.entries<{ source: string }>(sources)) {
files.set(fileName, fromUTF8(fileData.source));
files.set(fileName, stringToBytes(fileData.source));
}

const compilerVersionStrategy = getCompilerVersionStrategy([...files.values()], version);
Expand Down
66 changes: 58 additions & 8 deletions src/misc/unicode.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,64 @@
const decoder = new TextDecoder();
const encoder = new TextEncoder();
const utf8Enc = new TextEncoder();
const utf8Dec = new TextDecoder();
const scratch = new Uint8Array(4);

export function toUTF8(buf: Uint8Array): string {
return decoder.decode(buf);
/**
* Convert a UTF-8 encoded bytes into a JS UTF-16 string
*/
export function bytesToString(buf: Uint8Array): string {
return utf8Dec.decode(buf);
}

export function fromUTF8(str: string): Uint8Array {
return encoder.encode(str);
/**
* Convert JS UTF-16 string into UTF-8 encoded bytes
*/
export function stringToBytes(str: string): Uint8Array {
return utf8Enc.encode(str);
}

export function strByteLen(str: string): number {
return fromUTF8(str).length;
/**
* Compute the length of a JS string when encoded as UTF-8 bytes
*/
export function strUTF8Len(s: string): number {
let len = 0;
for (const ch of s) {
len += utf8Enc.encodeInto(ch, scratch).written;
}

return len;
}

/**
* Given a JS string `s` and an index `idx` of a character in it, compute the
* corresponding byte offset of the character in the UTF-8 encoding of the
* string.
*/
export function strUTF16IndexToUTF8Offset(s: string, idx: number): number {
let i = 0,
off = 0;

for (const ch of s) {
if (i === idx) {
return off;
}

const charBytes = utf8Enc.encodeInto(ch, scratch).written;

i += charBytes <= 2 ? 1 : 2;
off += charBytes;

if (i === idx) {
return off;
}

if (i >= idx) {
throw new Error(`No unicode character index ${idx} in string ${s}.`);
}
}

if (i === idx) {
return off;
}

throw new Error(`No unicode character index ${idx} in string ${s}.`);
}
6 changes: 3 additions & 3 deletions test/unit/ast/ast_node.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ import {
ASTReader,
Block,
compileJson,
fromUTF8,
FunctionDefinition,
Literal,
SourceUnit
SourceUnit,
stringToBytes
} from "../../../src";

describe("ASTNode", () => {
Expand Down Expand Up @@ -102,7 +102,7 @@ describe("ASTNode", () => {
it("extractSourceFragment()", () => {
const increment = nodes[nodes.length - 2];

expect(increment.extractSourceFragment(source)).toEqual(fromUTF8("a++"));
expect(increment.extractSourceFragment(source)).toEqual(stringToBytes("a++"));
});
});
}
Expand Down
8 changes: 4 additions & 4 deletions test/unit/ast/unicode.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ import {
SrcRangeMap,
StructuredDocumentation,
assert,
bytesToString,
compileSol,
compileSourceString,
detectCompileErrors,
fromUTF8,
toUTF8
stringToBytes
} from "../../../src";

const samples: string[] = [
Expand All @@ -30,7 +30,7 @@ async function strToAst(
contents: string,
version: string
): Promise<[SourceUnit, ASTReader]> {
const sources: FileMap = new Map([[name, fromUTF8(contents)]]);
const sources: FileMap = new Map([[name, stringToBytes(contents)]]);
const canonicalResult = await compileSourceString(name, contents, version);

const errors = detectCompileErrors(canonicalResult.data);
Expand Down Expand Up @@ -85,7 +85,7 @@ describe("Unicode tests", () => {

for (const doc of docs) {
const coords = doc.sourceInfo;
const actual = toUTF8(
const actual = bytesToString(
contents.slice(coords.offset, coords.offset + coords.length)
).trim();

Expand Down
4 changes: 2 additions & 2 deletions test/unit/compile/compiler_selection.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ import {
CompilerVersions07,
CompilerVersions08,
CompilerVersionSelectionStrategy,
fromUTF8,
LatestAndFirstVersionInEachSeriesStrategy,
LatestCompilerVersion,
LatestVersionInEachSeriesStrategy,
RangeVersionStrategy,
stringToBytes,
VersionDetectionStrategy
} from "../../../src";

Expand Down Expand Up @@ -180,7 +180,7 @@ describe("VersionDetectionStrategy", () => {
it(`Returns ${JSON.stringify(range)} for ${JSON.stringify(source)} and ${
fallback.constructor.name
} in constructor`, () => {
const strategy = new VersionDetectionStrategy([fromUTF8(source)], fallback);
const strategy = new VersionDetectionStrategy([stringToBytes(source)], fallback);

expect(strategy.select()).toEqual(range);
});
Expand Down
6 changes: 3 additions & 3 deletions test/unit/compile/inference/findAllFiles.spec.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import expect from "expect";
import fse from "fs-extra";
import { join } from "path";
import { FileMap, FileSystemResolver, findAllFiles, fromUTF8 } from "../../../../src";
import { FileMap, FileSystemResolver, findAllFiles, stringToBytes } from "../../../../src";

const SAMPLES_DIR = join("test", "samples", "solidity");

Expand Down Expand Up @@ -63,7 +63,7 @@ describe("findAllFiles() throws proper errors", () => {
const files: FileMap = new Map([
[
"foo.sol",
fromUTF8(`import a
stringToBytes(`import a
contract Foo {
}
`)
Expand All @@ -79,7 +79,7 @@ contract Foo {
const files: FileMap = new Map([
[
"foo.sol",
fromUTF8(`import "a.sol";
stringToBytes(`import "a.sol";
contract Foo {
}
`)
Expand Down
9 changes: 6 additions & 3 deletions test/unit/compile/utils.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ import {
CompilerKind,
detectCompileErrors,
FileMap,
fromUTF8,
getCompilerForVersion,
LatestAndFirstVersionInEachSeriesStrategy,
LatestCompilerVersion,
NativeCompiler,
parsePathRemapping,
stringToBytes,
WasmCompiler
} from "../../../src";

Expand Down Expand Up @@ -102,9 +102,12 @@ describe("Compile general utils", () => {
const expectedFiles: FileMap = new Map([
[
"./test/sol_files/json_code/B.sol",
fromUTF8("import './A.sol';\n\ncontract B {\n int16 test;\n}\n")
stringToBytes("import './A.sol';\n\ncontract B {\n int16 test;\n}\n")
],
["./test/sol_files/json_code/A.sol", fromUTF8("contract A {\n uint8 test;\n}\n")]
[
"./test/sol_files/json_code/A.sol",
stringToBytes("contract A {\n uint8 test;\n}\n")
]
]);

const cases: Array<[string, string | undefined, RegExp | undefined]> = [
Expand Down

0 comments on commit a85a990

Please sign in to comment.