Skip to content
This repository has been archived by the owner on Sep 28, 2018. It is now read-only.

Commit

Permalink
Fix ISO-2022-JP encoder, and incorporate encoder attack changes from …
Browse files Browse the repository at this point in the history
…spec
  • Loading branch information
inexorabletash committed Feb 12, 2016
1 parent 5716244 commit aaa9e9f
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 13 deletions.
36 changes: 23 additions & 13 deletions lib/encoding.js
Original file line number Diff line number Diff line change
Expand Up @@ -2601,6 +2601,7 @@ if (typeof module !== "undefined" && module.exports) {
if (code_point === end_of_stream &&
iso2022jp_state !== states.ASCII) {
stream.prepend(code_point);
iso2022jp_state = states.ASCII;
return [0x1B, 0x28, 0x42];
}

Expand All @@ -2609,19 +2610,28 @@ if (typeof module !== "undefined" && module.exports) {
if (code_point === end_of_stream && iso2022jp_state === states.ASCII)
return finished;

// 3. If iso-2022-jp encoder state is ASCII and code point is an
// 3. If ISO-2022-JP encoder state is ASCII or Roman, and code
// point is U+000E, U+000F, or U+001B, return error with U+FFFD.
if ((iso2022jp_state === states.ASCII ||
iso2022jp_state === states.Roman) &&
(code_point === 0x000E || code_point === 0x000F ||
code_point === 0x001B)) {
return encoderError(0xFFFD);
}

// 4. If iso-2022-jp encoder state is ASCII and code point is an
// ASCII code point, return a byte whose value is code point.
if (iso2022jp_state === states.ASCII &&
isASCIICodePoint(code_point))
return code_point;

// 4. If iso-2022-jp encoder state is Roman and code point is an
// 5. If iso-2022-jp encoder state is Roman and code point is an
// ASCII code point, excluding U+005C and U+007E, or is U+00A5
// or U+203E, run these substeps:
if (iso2022jp_state === states.Roman &&
(isASCIICodePoint(code_point) &&
((isASCIICodePoint(code_point) &&
code_point !== 0x005C && code_point !== 0x007E) ||
(code_point == 0x00A5 || code_point == 0x203E)) {
(code_point == 0x00A5 || code_point == 0x203E))) {

// 1. If code point is an ASCII code point, return a byte
// whose value is code point.
Expand All @@ -2637,7 +2647,7 @@ if (typeof module !== "undefined" && module.exports) {
return 0x7E;
}

// 5. If code point is an ASCII code point, and iso-2022-jp
// 6. If code point is an ASCII code point, and iso-2022-jp
// encoder state is not ASCII, prepend code point to stream, set
// iso-2022-jp encoder state to ASCII, and return three bytes
// 0x1B 0x28 0x42.
Expand All @@ -2648,7 +2658,7 @@ if (typeof module !== "undefined" && module.exports) {
return [0x1B, 0x28, 0x42];
}

// 6. If code point is either U+00A5 or U+203E, and iso-2022-jp
// 7. If code point is either U+00A5 or U+203E, and iso-2022-jp
// encoder state is not Roman, prepend code point to stream, set
// iso-2022-jp encoder state to Roman, and return three bytes
// 0x1B 0x28 0x4A.
Expand All @@ -2659,19 +2669,19 @@ if (typeof module !== "undefined" && module.exports) {
return [0x1B, 0x28, 0x4A];
}

// 7. If code point is U+2212, set it to U+FF0D.
// 8. If code point is U+2212, set it to U+FF0D.
if (code_point === 0x2212)
code_point = 0xFF0D;

// 8. Let pointer be the index pointer for code point in index
// 9. Let pointer be the index pointer for code point in index
// jis0208.
var pointer = indexPointerFor(code_point, index('jis0208'));

// 9. If pointer is null, return error with code point.
// 10. If pointer is null, return error with code point.
if (pointer === null)
return encoderError(code_point);

// 10. If iso-2022-jp encoder state is not jis0208, prepend code
// 11. If iso-2022-jp encoder state is not jis0208, prepend code
// point to stream, set iso-2022-jp encoder state to jis0208,
// and return three bytes 0x1B 0x24 0x42.
if (iso2022jp_state !== states.jis0208) {
Expand All @@ -2680,13 +2690,13 @@ if (typeof module !== "undefined" && module.exports) {
return [0x1B, 0x24, 0x42];
}

// 11. Let lead be floor(pointer / 94) + 0x21.
// 12. Let lead be floor(pointer / 94) + 0x21.
var lead = floor(pointer / 94) + 0x21;

// 12. Let trail be pointer % 94 + 0x21.
// 13. Let trail be pointer % 94 + 0x21.
var trail = pointer % 94 + 0x21;

// 13. Return two bytes whose values are lead and trail.
// 14. Return two bytes whose values are lead and trail.
return [lead, trail];
};
}
Expand Down
15 changes: 15 additions & 0 deletions test/test-misc.js
Original file line number Diff line number Diff line change
Expand Up @@ -342,3 +342,18 @@ test(function() {
.encode('\uE5E5');
});
}, 'NONSTANDARD - gb18030: U+E5E5 (encoding)');


test(function() {
// Regression test for https://github.com/whatwg/encoding/issues/15
var encoder =
new TextEncoder('iso-2022-jp', {NONSTANDARD_allowLegacyEncoding: true});

[
//'\u000E', '\u000F', '\u001B',
'\u00A5\u000E', //'\u00A5\u000F', '\u00A5\u001B'
].forEach(function(s) {
assert_throws(new TypeError, function() { encoder.encode(s); });
});

}, 'NONSTANDARD - iso-2022-jp encoding attack (encoding)');

0 comments on commit aaa9e9f

Please sign in to comment.