Add support for hex floating point literals

cc-tweaked · Jul 23, 2023 · 4f843b4 · 4f843b4
1 parent 911989b
commit 4f843b4
Show file tree

Hide file tree

Showing 5 changed files with 265 additions and 133 deletions.
diff --git a/src/main/java/cc/tweaked/internal/string/CharProperties.java b/src/main/java/cc/tweaked/internal/string/CharProperties.java
@@ -0,0 +1,39 @@
+package cc.tweaked.internal.string;
+
+/**
+ * Various properties on characters, as used by the lexer and
+ */
+public final class CharProperties {
+	private CharProperties() {
+	}
+
+	public static boolean isAlphaNum(int c) {
+		return c >= '0' && c <= '9'
+			|| c >= 'a' && c <= 'z'
+			|| c >= 'A' && c <= 'Z'
+			|| c == '_';
+	}
+
+	public static boolean isAlpha(int c) {
+		return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z';
+	}
+
+	public static boolean isDigit(int c) {
+		return c >= '0' && c <= '9';
+	}
+
+	public static boolean isSpace(int c) {
+		return c <= ' ';
+	}
+
+	public static boolean isHex(int c) {
+		return c >= '0' && c <= '9' || c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F';
+	}
+
+	public static int hexValue(int c) {
+		// Terrible bit twiddling right here:
+		// 'A'..'F' corresponds to 0x41..0x46, and 'a'..'f' to 0x61..0x66. So bitwise and with 0xf
+		// gives us the last digit, +9 to map from 1..6 to 10..15.
+		return c <= '9' ? c - '0' : (c & 0xf) + 9;
+	}
+}
diff --git a/src/main/java/cc/tweaked/internal/string/NumberParser.java b/src/main/java/cc/tweaked/internal/string/NumberParser.java
@@ -0,0 +1,192 @@
+package cc.tweaked.internal.string;
+
+import org.squiddev.cobalt.lib.StringLib;
+
+/**
+ * Parsers numbers, including hexadecimal integers and doubles.
+ * <p>
+ * This is used by the lexer/parser and {@code tonumber}.
+ */
+public final class NumberParser {
+	private NumberParser() {
+	}
+
+	public static double parse(byte[] bytes, int offset, int length, int base) {
+		int index = offset, end = offset + length;
+		while (index < end && StringLib.isWhitespace(bytes[index])) index++;
+		while (index < end && StringLib.isWhitespace(bytes[end - 1])) end--;
+
+		boolean isNeg = false;
+		if (index < end) {
+			switch (bytes[index]) {
+				case '+' -> index++;
+				case '-' -> {
+					index++;
+					isNeg = true;
+				}
+			}
+		}
+
+		if (index >= end) return Double.NaN;
+
+		if ((base == 10 || base == 16) && (bytes[index] == '0' && index + 1 < end && (bytes[index + 1] == 'x' || bytes[index + 1] == 'X'))) {
+			base = 16;
+			index += 2;
+
+			if (index >= end) return Double.NaN;
+		}
+
+		double value = scanLong(base, bytes, index, end);
+		if (Double.isNaN(value)) {
+			value = switch (base) {
+				case 10 -> scanDouble(bytes, index, end);
+				case 16 -> scanHexDouble(bytes, index, end);
+				default -> Double.NaN;
+			};
+		}
+
+		return isNeg ? -value : value;
+	}
+
+	/**
+	 * Scan and convert a long value, or return Double.NaN if not found.
+	 *
+	 * @param base  the base to use, such as 10
+	 * @param start the index to start searching from
+	 * @param end   the first index beyond the search range
+	 * @return double value if conversion is valid,
+	 * or Double.NaN if not
+	 */
+	private static double scanLong(int base, byte[] bytes, int start, int end) {
+		long x = 0;
+		for (int i = start; i < end; i++) {
+			var chr = bytes[i];
+			int digit;
+			if (CharProperties.isDigit(chr)) {
+				digit = chr - '0';
+			} else if (CharProperties.isAlpha(chr)) {
+				digit = (chr | 0x20) - 'a' + 10;
+			} else {
+				return Double.NaN;
+			}
+
+			if (digit >= base) return Double.NaN;
+			x = x * base + digit;
+		}
+		return x;
+	}
+
+	/**
+	 * Scan and convert a double value, or return Double.NaN if not a double.
+	 *
+	 * @param start the index to start searching from
+	 * @param end   the first index beyond the search range
+	 * @return double value if conversion is valid,
+	 * or Double.NaN if not
+	 */
+	private static double scanDouble(byte[] bytes, int start, int end) {
+		for (int i = start; i < end; i++) {
+			switch (bytes[i]) {
+				case '-':
+				case '+':
+				case '.':
+				case 'e':
+				case 'E':
+				case '0':
+				case '1':
+				case '2':
+				case '3':
+				case '4':
+				case '5':
+				case '6':
+				case '7':
+				case '8':
+				case '9':
+					break;
+				default:
+					return Double.NaN;
+			}
+		}
+		char[] c = new char[end - start];
+		for (int i = start; i < end; i++) {
+			c[i - start] = (char) bytes[i];
+		}
+		try {
+			return Double.parseDouble(String.valueOf(c));
+		} catch (NumberFormatException e) {
+			return Double.NaN;
+		}
+	}
+
+	private static double scanHexDouble(byte[] bytes, int index, int end) {
+		long result = 0; // The mantissa
+		int exponent = 0;
+
+		int sigDigits = 0, nonSigDigits = 0; // Number of significant digits and non-significant digits (leading 0s).
+		boolean hasDot = false;
+		for (; index < end; index++) {
+			var s = bytes[index];
+			if (s == '.') {
+				if (hasDot) return Double.NaN;
+				hasDot = true;
+			} else if (CharProperties.isHex(s)) {
+				if (sigDigits == 0 && s == '0') {
+					// Skip leading 0s
+					nonSigDigits++;
+				} else if (++sigDigits <= 30) {
+					// Only allow up-to 30 significant digits.
+					result = result * 16 + CharProperties.hexValue(s);
+				} else {
+					return Double.NaN;
+				}
+
+				// If we're after the decimal, reduce the exponent.
+				if (hasDot) exponent--;
+			} else {
+				// Non dot-or digit. Assume this is an exponent separator (Pp), and we'll deal with this later.
+				break;
+			}
+		}
+
+		// If we've parsed no numbers, bail. "0x." isn't a valid number for instance!
+		if (sigDigits + nonSigDigits == 0) return Double.NaN;
+
+		exponent *= 4; // Compute the "actual" exponent, not just the number of hex digits after the dot.
+
+		// Parse the exponent part.
+		if (index < end) {
+			var expSeparator = bytes[index];
+			if (expSeparator != 'P' && expSeparator != 'p') return Double.NaN;
+
+			index++;
+
+			int givenExponent = 0;
+			boolean expNegative = false;
+
+			// Parse the exponent sign.
+			if (index < end) {
+				switch (bytes[index]) {
+					case '+' -> index++;
+					case '-' -> {
+						index++;
+						expNegative = true;
+					}
+				}
+			}
+
+			if (index >= end) return Double.NaN; // Exponent is empty!
+
+			// Parse the actual exponent.
+			for (; index < end; index++) {
+				var expChar = bytes[index];
+				if (!CharProperties.isDigit(expChar)) return Double.NaN;
+				givenExponent = givenExponent * 10 + (expChar - '0');
+			}
+
+			if (expNegative) givenExponent = -givenExponent;
+			exponent += givenExponent;
+		}
+
+		return Math.scalb(result, exponent);
+	}
+}
diff --git a/src/main/java/org/squiddev/cobalt/LuaString.java b/src/main/java/org/squiddev/cobalt/LuaString.java
@@ -24,9 +24,9 @@
  */
 package org.squiddev.cobalt;
 
+import cc.tweaked.internal.string.NumberParser;
 import org.checkerframework.checker.nullness.qual.NonNull;
 import org.checkerframework.framework.qual.DefaultQualifier;
-import org.squiddev.cobalt.lib.StringLib;
 
 import java.io.*;
 import java.nio.ByteBuffer;
@@ -629,94 +629,8 @@ public LuaValue toNumber(int base) {
 
 	private double scanNumber(int base) {
 		if (base < 2 || base > 36) return Double.NaN;
-
-		return parseNumber(bytes(), offset, length, base);
-	}
-
-	public static double parseNumber(byte[] bytes, int offset, int length, int base) {
-		int i = offset, j = offset + length;
-		while (i < j && StringLib.isWhitespace(bytes[i])) i++;
-		while (i < j && StringLib.isWhitespace(bytes[j - 1])) j--;
-
-		boolean isNeg = i < j && bytes[i] == '-';
-		if (isNeg) i++;
-
-		if (i >= j) return Double.NaN;
-
-		if ((base == 10 || base == 16) && (bytes[i] == '0' && i + 1 < j && (bytes[i + 1] == 'x' || bytes[i + 1] == 'X'))) {
-			base = 16;
-			i += 2;
-
-			if (i >= j) return Double.NaN;
-		}
-
-		double l = scanLong(base, bytes, i, j);
-		double value = Double.isNaN(l) && base == 10 ? scanDouble(bytes, i, j) : l;
-		return isNeg ? -value : value;
-	}
-
-	/**
-	 * Scan and convert a long value, or return Double.NaN if not found.
-	 *
-	 * @param base  the base to use, such as 10
-	 * @param start the index to start searching from
-	 * @param end   the first index beyond the search range
-	 * @return double value if conversion is valid,
-	 * or Double.NaN if not
-	 */
-	private static double scanLong(int base, byte[] bytes, int start, int end) {
-		long x = 0;
-		for (int i = start; i < end; i++) {
-			int digit = bytes[i] - (base <= 10 || (bytes[i] >= '0' && bytes[i] <= '9') ? '0' :
-				bytes[i] >= 'A' && bytes[i] <= 'Z' ? ('A' - 10) : ('a' - 10));
-			if (digit < 0 || digit >= base) {
-				return Double.NaN;
-			}
-			x = x * base + digit;
-		}
-		return x;
+		return NumberParser.parse(bytes(), offset, length, base);
 	}
 
-	/**
-	 * Scan and convert a double value, or return Double.NaN if not a double.
-	 *
-	 * @param start the index to start searching from
-	 * @param end   the first index beyond the search range
-	 * @return double value if conversion is valid,
-	 * or Double.NaN if not
-	 */
-	private static double scanDouble(byte[] bytes, int start, int end) {
-		for (int i = start; i < end; i++) {
-			switch (bytes[i]) {
-				case '-':
-				case '+':
-				case '.':
-				case 'e':
-				case 'E':
-				case '0':
-				case '1':
-				case '2':
-				case '3':
-				case '4':
-				case '5':
-				case '6':
-				case '7':
-				case '8':
-				case '9':
-					break;
-				default:
-					return Double.NaN;
-			}
-		}
-		char[] c = new char[end - start];
-		for (int i = start; i < end; i++) {
-			c[i - start] = (char) bytes[i];
-		}
-		try {
-			return Double.parseDouble(String.valueOf(c));
-		} catch (NumberFormatException e) {
-			return Double.NaN;
-		}
-	}
 	// endregion
 }