diff --git a/src/lexer.ml b/src/lexer.ml index 6b1580b..663f518 100644 --- a/src/lexer.ml +++ b/src/lexer.ml @@ -146,10 +146,11 @@ let rec string lexbuf = | "\\\"" -> Buffer.add_char string_buffer '"'; string lexbuf | "\\b" -> Buffer.add_char string_buffer '\b'; string lexbuf | "\\f" -> Buffer.add_char string_buffer '\012'; string lexbuf - | "\\u{", Rep (hex_digit, 1 .. 6), '}' -> - (* TODO: Disallow 7+ hex digits inside \u{...}? *) + | "\\u{", Plus hex_digit, '}' -> let code_str = Sedlexing.Utf8.sub_lexeme lexbuf 3 (Sedlexing.lexeme_length lexbuf - 4) in + if String.length code_str > 6 then + error "Invalid unicode code point, cannot contain more than 6 hex digits"; let code = int_of_string @@ "0x" ^ code_str in if code > 0x10FFFF then error "Invalid unicode code point, cannot be greater than 10FFFF"; diff --git a/test/parse.ml b/test/parse.ml index 820f1f0..48b4b45 100644 --- a/test/parse.ml +++ b/test/parse.ml @@ -378,9 +378,8 @@ let%test_module "strings" = (module struct [%expect {| (- (string "a _ a _ \226\129\159")) |}] let%expect_test "\\u{...} cannot contain more than 6 hex digits" = - (* TODO: Raise an error instead? *) test {|- "\u{1234567}"|}; - [%expect {| (- (string "\\u{1234567}")) |}] + [%expect {| Error: :1:4-1:15: Invalid unicode code point, cannot contain more than 6 hex digits |}] let%expect_test "\\u{...} should not accept a > 0x10FFFF code point" = test {|- "\u{11FBBF} _"|};