Skip to content

Commit

Permalink
Error if \u{...} contains more than 6 hex digits
Browse files Browse the repository at this point in the history
  • Loading branch information
eilvelia committed Oct 15, 2023
1 parent 211caf5 commit 1e6d977
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
5 changes: 3 additions & 2 deletions src/lexer.ml
Original file line number Diff line number Diff line change
Expand Up @@ -146,10 +146,11 @@ let rec string lexbuf =
| "\\\"" -> Buffer.add_char string_buffer '"'; string lexbuf
| "\\b" -> Buffer.add_char string_buffer '\b'; string lexbuf
| "\\f" -> Buffer.add_char string_buffer '\012'; string lexbuf
| "\\u{", Rep (hex_digit, 1 .. 6), '}' ->
(* TODO: Disallow 7+ hex digits inside \u{...}? *)
| "\\u{", Plus hex_digit, '}' ->
let code_str =
Sedlexing.Utf8.sub_lexeme lexbuf 3 (Sedlexing.lexeme_length lexbuf - 4) in
if String.length code_str > 6 then
error "Invalid unicode code point, cannot contain more than 6 hex digits";
let code = int_of_string @@ "0x" ^ code_str in
if code > 0x10FFFF then
error "Invalid unicode code point, cannot be greater than 10FFFF";
Expand Down
3 changes: 1 addition & 2 deletions test/parse.ml
Original file line number Diff line number Diff line change
Expand Up @@ -378,9 +378,8 @@ let%test_module "strings" = (module struct
[%expect {| (- (string "a _ a _ \226\129\159")) |}]

let%expect_test "\\u{...} cannot contain more than 6 hex digits" =
(* TODO: Raise an error instead? *)
test {|- "\u{1234567}"|};
[%expect {| (- (string "\\u{1234567}")) |}]
[%expect {| Error: :1:4-1:15: Invalid unicode code point, cannot contain more than 6 hex digits |}]

let%expect_test "\\u{...} should not accept a > 0x10FFFF code point" =
test {|- "\u{11FBBF} _"|};
Expand Down

0 comments on commit 1e6d977

Please sign in to comment.