From f647e3226076a957a3c856074b336ff0c3984846 Mon Sep 17 00:00:00 2001 From: Jonathan Coates Date: Wed, 23 Aug 2023 00:46:03 +0100 Subject: [PATCH] Fix Lua patterns using Unicode As we use Character.isXyz functions, this checked the unicode codepoint rather than the ASCII one. This meant that, for instance, \xf3 is counted as a letter, when it's definitely not! --- .../org/squiddev/cobalt/lib/StringMatch.java | 25 ++++++++----------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/src/main/java/org/squiddev/cobalt/lib/StringMatch.java b/src/main/java/org/squiddev/cobalt/lib/StringMatch.java index 909cc70b..3c047dd6 100644 --- a/src/main/java/org/squiddev/cobalt/lib/StringMatch.java +++ b/src/main/java/org/squiddev/cobalt/lib/StringMatch.java @@ -30,20 +30,15 @@ class StringMatch { CHAR_TABLE = new byte[256]; for (int i = 0; i < 256; ++i) { - final char c = (char) i; - CHAR_TABLE[i] = (byte) ((Character.isDigit(c) ? MASK_DIGIT : 0) | - (Character.isLowerCase(c) ? MASK_LOWERCASE : 0) | - (Character.isUpperCase(c) ? MASK_UPPERCASE : 0) | - ((c < ' ' || c == 0x7F) ? MASK_CONTROL : 0)); - if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') || (c >= '0' && c <= '9')) { - CHAR_TABLE[i] |= MASK_HEXDIGIT; - } - if ((c >= '!' && c <= '/') || (c >= ':' && c <= '@')) { - CHAR_TABLE[i] |= MASK_PUNCT; - } - if ((CHAR_TABLE[i] & (MASK_LOWERCASE | MASK_UPPERCASE)) != 0) { - CHAR_TABLE[i] |= MASK_ALPHA; - } + byte mask = 0; + if (i <= ' ' || i == 0x7f) mask |= MASK_CONTROL; + if (i >= '0' && i <= '9') mask |= MASK_DIGIT; + if (i >= 'a' && i <= 'z') mask |= MASK_LOWERCASE; + if (i >= 'A' && i <= 'Z') mask |= MASK_UPPERCASE; + if ((i >= 'a' && i <= 'f') || (i >= 'A' && i <= 'F') || (i >= '0' && i <= '9')) mask |= MASK_HEXDIGIT; + if ((i >= '!' && i <= '/') || (i >= ':' && i <= '@')) mask |= MASK_PUNCT; + if ((mask & (MASK_LOWERCASE | MASK_UPPERCASE)) != 0) mask |= MASK_ALPHA; + CHAR_TABLE[i] = mask; } CHAR_TABLE[' '] = MASK_SPACE; @@ -524,7 +519,7 @@ boolean singlematch(int c, int poff, int ep) { */ int match(int soffset, int poffset) throws LuaError { while (true) { - if(state.isInterrupted()) state.handleInterruptWithoutYield(); + if (state.isInterrupted()) state.handleInterruptWithoutYield(); // Check if we are at the end of the pattern - // equivalent to the '\0' case in the C version, but our pattern