diff --git a/lib/encoding.js b/lib/encoding.js index fce81b9..414e863 100644 --- a/lib/encoding.js +++ b/lib/encoding.js @@ -2601,6 +2601,7 @@ if (typeof module !== "undefined" && module.exports) { if (code_point === end_of_stream && iso2022jp_state !== states.ASCII) { stream.prepend(code_point); + iso2022jp_state = states.ASCII; return [0x1B, 0x28, 0x42]; } @@ -2609,19 +2610,28 @@ if (typeof module !== "undefined" && module.exports) { if (code_point === end_of_stream && iso2022jp_state === states.ASCII) return finished; - // 3. If iso-2022-jp encoder state is ASCII and code point is an + // 3. If ISO-2022-JP encoder state is ASCII or Roman, and code + // point is U+000E, U+000F, or U+001B, return error with U+FFFD. + if ((iso2022jp_state === states.ASCII || + iso2022jp_state === states.Roman) && + (code_point === 0x000E || code_point === 0x000F || + code_point === 0x001B)) { + return encoderError(0xFFFD); + } + + // 4. If iso-2022-jp encoder state is ASCII and code point is an // ASCII code point, return a byte whose value is code point. if (iso2022jp_state === states.ASCII && isASCIICodePoint(code_point)) return code_point; - // 4. If iso-2022-jp encoder state is Roman and code point is an + // 5. If iso-2022-jp encoder state is Roman and code point is an // ASCII code point, excluding U+005C and U+007E, or is U+00A5 // or U+203E, run these substeps: if (iso2022jp_state === states.Roman && - (isASCIICodePoint(code_point) && + ((isASCIICodePoint(code_point) && code_point !== 0x005C && code_point !== 0x007E) || - (code_point == 0x00A5 || code_point == 0x203E)) { + (code_point == 0x00A5 || code_point == 0x203E))) { // 1. If code point is an ASCII code point, return a byte // whose value is code point. @@ -2637,7 +2647,7 @@ if (typeof module !== "undefined" && module.exports) { return 0x7E; } - // 5. If code point is an ASCII code point, and iso-2022-jp + // 6. If code point is an ASCII code point, and iso-2022-jp // encoder state is not ASCII, prepend code point to stream, set // iso-2022-jp encoder state to ASCII, and return three bytes // 0x1B 0x28 0x42. @@ -2648,7 +2658,7 @@ if (typeof module !== "undefined" && module.exports) { return [0x1B, 0x28, 0x42]; } - // 6. If code point is either U+00A5 or U+203E, and iso-2022-jp + // 7. If code point is either U+00A5 or U+203E, and iso-2022-jp // encoder state is not Roman, prepend code point to stream, set // iso-2022-jp encoder state to Roman, and return three bytes // 0x1B 0x28 0x4A. @@ -2659,19 +2669,19 @@ if (typeof module !== "undefined" && module.exports) { return [0x1B, 0x28, 0x4A]; } - // 7. If code point is U+2212, set it to U+FF0D. + // 8. If code point is U+2212, set it to U+FF0D. if (code_point === 0x2212) code_point = 0xFF0D; - // 8. Let pointer be the index pointer for code point in index + // 9. Let pointer be the index pointer for code point in index // jis0208. var pointer = indexPointerFor(code_point, index('jis0208')); - // 9. If pointer is null, return error with code point. + // 10. If pointer is null, return error with code point. if (pointer === null) return encoderError(code_point); - // 10. If iso-2022-jp encoder state is not jis0208, prepend code + // 11. If iso-2022-jp encoder state is not jis0208, prepend code // point to stream, set iso-2022-jp encoder state to jis0208, // and return three bytes 0x1B 0x24 0x42. if (iso2022jp_state !== states.jis0208) { @@ -2680,13 +2690,13 @@ if (typeof module !== "undefined" && module.exports) { return [0x1B, 0x24, 0x42]; } - // 11. Let lead be floor(pointer / 94) + 0x21. + // 12. Let lead be floor(pointer / 94) + 0x21. var lead = floor(pointer / 94) + 0x21; - // 12. Let trail be pointer % 94 + 0x21. + // 13. Let trail be pointer % 94 + 0x21. var trail = pointer % 94 + 0x21; - // 13. Return two bytes whose values are lead and trail. + // 14. Return two bytes whose values are lead and trail. return [lead, trail]; }; } diff --git a/test/test-misc.js b/test/test-misc.js index 989b0df..c6bf393 100644 --- a/test/test-misc.js +++ b/test/test-misc.js @@ -342,3 +342,18 @@ test(function() { .encode('\uE5E5'); }); }, 'NONSTANDARD - gb18030: U+E5E5 (encoding)'); + + +test(function() { + // Regression test for https://github.com/whatwg/encoding/issues/15 + var encoder = + new TextEncoder('iso-2022-jp', {NONSTANDARD_allowLegacyEncoding: true}); + + [ + //'\u000E', '\u000F', '\u001B', + '\u00A5\u000E', //'\u00A5\u000F', '\u00A5\u001B' + ].forEach(function(s) { + assert_throws(new TypeError, function() { encoder.encode(s); }); + }); + +}, 'NONSTANDARD - iso-2022-jp encoding attack (encoding)');