-
Notifications
You must be signed in to change notification settings - Fork 24
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Rename unicode helpers to make them less confusing * fix tests
- Loading branch information
Showing
13 changed files
with
94 additions
and
41 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,14 +1,64 @@ | ||
const decoder = new TextDecoder(); | ||
const encoder = new TextEncoder(); | ||
const utf8Enc = new TextEncoder(); | ||
const utf8Dec = new TextDecoder(); | ||
const scratch = new Uint8Array(4); | ||
|
||
export function toUTF8(buf: Uint8Array): string { | ||
return decoder.decode(buf); | ||
/** | ||
* Convert a UTF-8 encoded bytes into a JS UTF-16 string | ||
*/ | ||
export function bytesToString(buf: Uint8Array): string { | ||
return utf8Dec.decode(buf); | ||
} | ||
|
||
export function fromUTF8(str: string): Uint8Array { | ||
return encoder.encode(str); | ||
/** | ||
* Convert JS UTF-16 string into UTF-8 encoded bytes | ||
*/ | ||
export function stringToBytes(str: string): Uint8Array { | ||
return utf8Enc.encode(str); | ||
} | ||
|
||
export function strByteLen(str: string): number { | ||
return fromUTF8(str).length; | ||
/** | ||
* Compute the length of a JS string when encoded as UTF-8 bytes | ||
*/ | ||
export function strUTF8Len(s: string): number { | ||
let len = 0; | ||
for (const ch of s) { | ||
len += utf8Enc.encodeInto(ch, scratch).written; | ||
} | ||
|
||
return len; | ||
} | ||
|
||
/** | ||
* Given a JS string `s` and an index `idx` of a character in it, compute the | ||
* corresponding byte offset of the character in the UTF-8 encoding of the | ||
* string. | ||
*/ | ||
export function strUTF16IndexToUTF8Offset(s: string, idx: number): number { | ||
let i = 0, | ||
off = 0; | ||
|
||
for (const ch of s) { | ||
if (i === idx) { | ||
return off; | ||
} | ||
|
||
const charBytes = utf8Enc.encodeInto(ch, scratch).written; | ||
|
||
i += charBytes <= 2 ? 1 : 2; | ||
off += charBytes; | ||
|
||
if (i === idx) { | ||
return off; | ||
} | ||
|
||
if (i >= idx) { | ||
throw new Error(`No unicode character index ${idx} in string ${s}.`); | ||
} | ||
} | ||
|
||
if (i === idx) { | ||
return off; | ||
} | ||
|
||
throw new Error(`No unicode character index ${idx} in string ${s}.`); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters