From bce90befc0e4b5560489579079d6fbdd80ee72c4 Mon Sep 17 00:00:00 2001 From: Tyler Gregg Date: Tue, 25 Jun 2024 17:07:28 -0700 Subject: [PATCH] Adds support for reading binary tagless types. --- .../com/amazon/ion/impl/IonCursorBinary.java | 210 ++++++++---- .../impl/IonReaderContinuableCoreBinary.java | 247 +++++++++++++- .../amazon/ion/impl/IonCursorBinaryTest.java | 18 +- .../ion/impl/IonCursorTestUtilities.java | 46 +++ .../IonReaderContinuableCoreBinaryTest.java | 310 ++++++++++++++++++ ...onReaderContinuableTopLevelBinaryTest.java | 41 +-- 6 files changed, 741 insertions(+), 131 deletions(-) diff --git a/src/main/java/com/amazon/ion/impl/IonCursorBinary.java b/src/main/java/com/amazon/ion/impl/IonCursorBinary.java index 75374d95a..281067755 100644 --- a/src/main/java/com/amazon/ion/impl/IonCursorBinary.java +++ b/src/main/java/com/amazon/ion/impl/IonCursorBinary.java @@ -25,6 +25,7 @@ import static com.amazon.ion.impl.IonTypeID.SYSTEM_SYMBOL_VALUE; import static com.amazon.ion.impl.IonTypeID.TWO_ANNOTATION_FLEX_SYMS_LOWER_NIBBLE_1_1; import static com.amazon.ion.impl.IonTypeID.TWO_ANNOTATION_SIDS_LOWER_NIBBLE_1_1; +import static com.amazon.ion.impl.IonTypeID.TYPE_IDS_1_1; import static com.amazon.ion.util.IonStreamUtils.throwAsIonException; /** @@ -349,6 +350,11 @@ private static class RefillableState { */ private boolean isSystemInvocation = false; + /** + * The type of the current value, if tagless. Otherwise, null. + */ + PrimitiveType taglessType = null; + /** * @return the given configuration's DataHandler, or null if that DataHandler is a no-op. */ @@ -1232,12 +1238,12 @@ private boolean uncheckedReadAnnotationWrapperHeader_1_1(IonTypeID valueTid) { private boolean slowSkipNonPrefixedAnnotations_1_1(IonTypeID valueTid) { if (valueTid.isInlineable) { // Opcodes 0xE7 (one annotation FlexSym) and 0xE8 (two annotation FlexSyms) - if (slowSkipFlexSym_1_1()) { + if (slowSkipFlexSym_1_1(null) == FlexSymType.INCOMPLETE) { return true; } if (valueTid.lowerNibble == TWO_ANNOTATION_FLEX_SYMS_LOWER_NIBBLE_1_1) { // Opcode 0xE8 (two annotation FlexSyms) - return slowSkipFlexSym_1_1(); + return slowSkipFlexSym_1_1(null) == FlexSymType.INCOMPLETE; } } else { // Opcodes 0xE4 (one annotation SID) and 0xE5 (two annotation SIDs) @@ -1544,46 +1550,127 @@ private long slowReadFlexSym_1_1(Marker markerToSet) { return result; } + /** + * FlexSym encoding types. + */ + private enum FlexSymType { + INCOMPLETE { + @Override + IonTypeID typeIdFor(int length) { + throw new IllegalStateException("The FlexSym is incomplete."); + } + }, + INLINE_TEXT { + @Override + IonTypeID typeIdFor(int length) { + if (length <= 0xF) { + return TYPE_IDS_1_1[0xA0 | length]; + } + return TYPE_IDS_1_1[OpCodes.VARIABLE_LENGTH_INLINE_SYMBOL & SINGLE_BYTE_MASK]; + } + }, + SYMBOL_ID { + @Override + IonTypeID typeIdFor(int length) { + if (length == 0) { + return TYPE_IDS_1_1[OpCodes.SYMBOL_ADDRESS_1_BYTE & SINGLE_BYTE_MASK]; + } + if (length < 3) { + return TYPE_IDS_1_1[0xE0 | length]; + } + return TYPE_IDS_1_1[OpCodes.SYMBOL_ADDRESS_MANY_BYTES & SINGLE_BYTE_MASK]; + } + }, + STRUCT_END { + @Override + IonTypeID typeIdFor(int length) { + throw new IllegalStateException("The special struct end FlexSym is not associated with a type ID."); + } + }; + + /** + * Classifies a special FlexSym (beginning with FlexInt zero) based on the byte that follows. + * @param specialByte the byte that followed FlexInt zero. + * @return the FlexSymType that corresponds to the given special byte. + */ + static FlexSymType classifySpecialFlexSym(int specialByte) { + if (specialByte < 0) { + return FlexSymType.INCOMPLETE; + } + if ((byte) specialByte == OpCodes.INLINE_SYMBOL_ZERO_LENGTH) { + return FlexSymType.SYMBOL_ID; + } + if ((byte) specialByte == OpCodes.STRING_ZERO_LENGTH) { + return FlexSymType.INLINE_TEXT; + } + if ((byte) specialByte == OpCodes.DELIMITED_END_MARKER) { + return FlexSymType.STRUCT_END; + } + throw new IonException("FlexSyms may only wrap symbol zero, empty string, or delimited end."); + } + + /** + * Gets the most appropriate IonTypeID for a FlexSym of this type and the given length. + * @param length the length of the FlexSym. + * @return an Ion 1.1 IonTypeID with appropriate values for 'length' and 'isInlineable'. + */ + abstract IonTypeID typeIdFor(int length); + } + /** * Skips a FlexSym. After this method returns, `peekIndex` points to the first byte after the end of the FlexSym. + * @param markerToSet the method returns `INLINE_TEXT, will have `startIndex` and `endIndex` set to the bounds of + * the inline UTF-8 byte sequence. + * @return the type of FlexSym that was skipped. */ - private void uncheckedSkipFlexSym_1_1() { + private FlexSymType uncheckedSkipFlexSym_1_1(Marker markerToSet) { long result = uncheckedReadFlexInt_1_1(); if (result == 0) { - peekIndex++; + markerToSet.startIndex = peekIndex + 1; + markerToSet.endIndex = markerToSet.startIndex; + return FlexSymType.classifySpecialFlexSym(buffer[(int) peekIndex++] & SINGLE_BYTE_MASK); } else if (result < 0) { - peekIndex -= result; + markerToSet.startIndex = peekIndex; + markerToSet.endIndex = peekIndex - result; + peekIndex = markerToSet.endIndex; + return FlexSymType.INLINE_TEXT; } + return FlexSymType.SYMBOL_ID; } /** * Skips a FlexSym, ensuring enough space is available in the buffer. After this method returns, `peekIndex` points * to the first byte after the end of the FlexSym. - * @return true if there are not enough bytes in the stream to complete the FlexSym; otherwise, false. + * @param markerToSet if non-null and the method returns `INLINE_TEXT`, will have `startIndex` and `endIndex` set + * to the bounds of the inline UTF-8 byte sequence. + * @return INCOMPLETE if there are not enough bytes in the stream to complete the FlexSym; otherwise, the type + * of FlexSym that was skipped. */ - private boolean slowSkipFlexSym_1_1() { + private FlexSymType slowSkipFlexSym_1_1(Marker markerToSet) { long result = slowReadFlexUInt_1_1(); if (result < 0) { - return true; + return FlexSymType.INCOMPLETE; } if (buffer[(int) peekIndex - 1] < 0) { // Sign extension. result |= ~(-1 >>> Long.numberOfLeadingZeros(result)); } if (result == 0) { - int nextByte = slowReadByte(); - if (nextByte < 0) { - return true; - } - if ((byte) nextByte != OpCodes.INLINE_SYMBOL_ZERO_LENGTH && (byte) nextByte != OpCodes.STRING_ZERO_LENGTH && (byte) nextByte != OpCodes.DELIMITED_END_MARKER) { - throw new IonException("FlexSyms may only wrap symbol zero, empty string, or delimited end."); + FlexSymType flexSymType = FlexSymType.classifySpecialFlexSym(slowReadByte()); + if (markerToSet != null && flexSymType != FlexSymType.INCOMPLETE) { + markerToSet.startIndex = peekIndex; + markerToSet.endIndex = peekIndex; } - return false; + return flexSymType; } else if (result < 0) { + if (markerToSet != null) { + markerToSet.startIndex = peekIndex; + markerToSet.endIndex = peekIndex - result; + } peekIndex -= result; - return false; + return FlexSymType.INLINE_TEXT; } - return false; + return FlexSymType.SYMBOL_ID; } /** @@ -1970,6 +2057,7 @@ private void reset() { annotationSequenceMarker.endIndex = -1; macroInvocationId = -1; isSystemInvocation = false; + taglessType = null; } /** @@ -2850,64 +2938,55 @@ private boolean slowSkipToNextToken() { } /** - * Reads the length of the FlexSym that starts at the given position. - * @param position the start position of the FlexSym. - * @return the length of the FlexSym. - */ - private long uncheckedReadLengthOfFlexSym_1_1(long position) { - uncheckedSkipFlexSym_1_1(); - long lengthOfFlexSym = (int) (peekIndex - position); - peekIndex = position; - return lengthOfFlexSym; - } - - /** - * Reads the length of the FlexSym that starts at the given position, ensuring enough bytes are available in the - * stream. - * @param position the start position of the FlexSym. + * Reads the length and type of the FlexSym that starts at the given position, ensuring enough bytes are available + * in the stream. After this method returns with a value greater than or equal to zero, `valueTid` and + * `valueMarker.typeId` will be set to the IonTypeID that most closely corresponds to the length and type of the + * FlexSym. * @return the length of the FlexSym, or -1 if not enough bytes are available in the stream to determine the length. */ - private long slowReadLengthOfFlexSym_1_1(long position) { - if (slowSkipFlexSym_1_1()) { - return -1; + private long readFlexSymLengthAndType_1_1() { + FlexSymType flexSymType; + if (isSlowMode) { + flexSymType = slowSkipFlexSym_1_1(valueMarker); + if (flexSymType == FlexSymType.INCOMPLETE) { + return -1; + } + } else { + flexSymType = uncheckedSkipFlexSym_1_1(valueMarker); } - int lengthOfFlexSym = (int) (peekIndex - position); - peekIndex = position; + int lengthOfFlexSym = (int) (peekIndex - valueMarker.startIndex); + peekIndex = valueMarker.startIndex; + valueTid = flexSymType.typeIdFor(lengthOfFlexSym); + valueMarker.typeId = valueTid; return lengthOfFlexSym; } /** - * Calculates the length of a variable-length primitive value. - * @param primitiveType the variable-length primitive type of the tagless value that starts at `peekIndex`. - * @return the length of the value. - */ - private long uncheckedCalculateTaglessLength(PrimitiveType primitiveType) { - switch (primitiveType) { - case FLEX_UINT: - case FLEX_INT: - return uncheckedReadLengthOfFlexUInt_1_1(peekIndex); - case COMPACT_SYMBOL: - return uncheckedReadLengthOfFlexSym_1_1(peekIndex); - default: - throw new IllegalStateException("Length is built into the primitive type's IonTypeID."); - } - } - - /** - * Calculates the length of a variable-length primitive value, ensuring enough bytes are available in the stream. + * Calculates the length and type of variable-length primitive value, ensuring enough bytes are available in the + * stream. * @param primitiveType the variable-length primitive type of the tagless value that starts at `peekIndex`. * @return the length of the value, or -1 if not enough bytes are available in the stream to determine the length. */ - private long slowCalculateTaglessLength(PrimitiveType primitiveType) { + private long calculateTaglessLengthAndType(PrimitiveType primitiveType) { + // TODO length calculation for these types could be deferred until they are consumed to avoid duplicate + // work. This would trade some added complexity for a potential performance gain that would need to be + // quantified. + long length; switch (primitiveType) { case FLEX_UINT: case FLEX_INT: - return slowReadLengthOfFlexUInt_1_1(peekIndex); + length = isSlowMode ? slowReadLengthOfFlexUInt_1_1(peekIndex) : uncheckedReadLengthOfFlexUInt_1_1(peekIndex); + break; case COMPACT_SYMBOL: - return slowReadLengthOfFlexSym_1_1(peekIndex); + length = readFlexSymLengthAndType_1_1(); + break; default: throw new IllegalStateException("Length is built into the primitive type's IonTypeID."); } + if (length >= 0) { + valueMarker.endIndex = peekIndex + length; + } + return length; } /** @@ -2938,24 +3017,15 @@ public Event nextTaglessValue(PrimitiveType primitiveType) { reportConsumedData(); } reset(); + taglessType = primitiveType; valueTid = primitiveType.typeID; valueMarker.typeId = valueTid; valueMarker.startIndex = peekIndex; valuePreHeaderIndex = peekIndex; if (valueTid.variableLength) { - // TODO length calculation for these types could be deferred until they are consumed to avoid duplicate - // work. This would trade some added complexity for a potential performance gain that would need to be - // quantified. - long length; - if (isSlowMode) { - length = slowCalculateTaglessLength(primitiveType); - if (length < 0) { - return event; - } - } else { - length = uncheckedCalculateTaglessLength(primitiveType); + if (calculateTaglessLengthAndType(primitiveType) < 0) { + return event; } - valueMarker.endIndex = peekIndex + length; } else { valueMarker.endIndex = peekIndex + valueTid.length; } diff --git a/src/main/java/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java b/src/main/java/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java index 023eb2b39..078a9de90 100644 --- a/src/main/java/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java +++ b/src/main/java/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java @@ -465,16 +465,21 @@ private boolean classifyInteger_1_0() { * @return the value. */ private long readLargeFlexUInt_1_1(int firstByte) { + byte length = 0; + int bitShift = 0; if (firstByte == 0) { - // Note: this is conservative, as 9-byte flex subfields (with a continuation bit in the second byte) can fit - // in a long. However, the flex subfields parsed by the methods in this class are used only in cases that - // require an int anyway (symbol IDs, decimal scale), so the added complexity is not warranted. - throw new IonException("Flex subfield exceeds the length of a long."); + length = 7; // Don't include the skipped zero byte. + bitShift = -7; + firstByte = buffer[(int) peekIndex++] & SINGLE_BYTE_MASK; + if (firstByte == 0) { + throw new IonException("Flex subfield exceeds the length of a long."); + } } - byte length = (byte) (Integer.numberOfTrailingZeros(firstByte) + 1); - long result = firstByte >>> length; + length += (byte) (Integer.numberOfTrailingZeros(firstByte) + 1); + bitShift += length; + long result = firstByte >>> bitShift; for (byte i = 1; i < length; i++) { - result |= ((long) (buffer[(int) (peekIndex++)] & SINGLE_BYTE_MASK) << (8 * i - length)); + result |= ((long) (buffer[(int) (peekIndex++)] & SINGLE_BYTE_MASK) << (8 * i - bitShift)); } return result; } @@ -512,7 +517,7 @@ long readLargeFlexInt_1_1(int firstByte) { long result = readLargeFlexUInt_1_1(firstByte); if (buffer[(int) peekIndex - 1] < 0) { // Sign extension. - result |= ~(-1 >>> Long.numberOfLeadingZeros(result)); + result |= ~(-1L >>> Long.numberOfLeadingZeros(result)); } return result; } @@ -617,6 +622,75 @@ private BigInteger readLargeFixedIntOrFixedUIntAsBigInteger(int length) { return new BigInteger(bytes); } + /** + * Reads a FixedUInt value into a BigInteger. + * @return the value. + */ + private BigInteger readFixedUIntAsBigInteger_1_1(int length) { + if (buffer[(int) valueMarker.endIndex - 1] < 0) { + // The most-significant bit is set; pad the length by one byte so that the value remains unsigned. + length += 1; + } + return readLargeFixedIntOrFixedUIntAsBigInteger(length); + } + + /** + * Reads a FlexUInt or FlexInt value into a BigInteger. + * @param length the byte length of the encoded FlexUInt or FlexInt to read. + * @return the value. + */ + private BigInteger readLargeFlexIntOrFlexUIntAsBigInteger(int length) { + int bitShift = length; + int maskForLength = (SINGLE_BYTE_MASK >>> (8 - bitShift)); + int numberOfLeadingZeroBytes = 0; + // First count the leading zeroes and calculate the number of bits that need to be shifted out of each + // encoded byte. + for (long i = peekIndex; i < valueMarker.endIndex; i++) { + int b = buffer[(int) i] & SINGLE_BYTE_MASK; + if (b == 0) { + bitShift -= 8; + numberOfLeadingZeroBytes++; + maskForLength = (SINGLE_BYTE_MASK >>> (8 - bitShift)); + continue; // Skip over any bytes that contain only continuation bits. + } + break; + } + // FlexInt and FlexUInt are little-endian. Reverse the bytes and shift out the continuation bits. + byte[] bytes = getScratchForSize(length - numberOfLeadingZeroBytes); + int copyIndex = bytes.length; + for (long i = peekIndex + numberOfLeadingZeroBytes; i < valueMarker.endIndex; i++) { + int b = buffer[(int) i] & SINGLE_BYTE_MASK; + if (copyIndex < bytes.length) { + bytes[copyIndex] |= (byte) ((b & maskForLength) << (8 - bitShift)); + } + if (--copyIndex == 0 && !taglessType.isUnsigned) { + bytes[copyIndex] = (byte) ((byte) b >> bitShift); // Sign extend most significant byte. + } else { + bytes[copyIndex] = (byte) (b >>> bitShift); + } + } + peekIndex = valueMarker.endIndex; + return new BigInteger(bytes); + } + + /** + * Reads a tagless int value into a BigInteger. + * @return the value. + */ + private BigInteger readTaglessIntAsBigInteger_1_1() { + BigInteger value; + int length = (int) (valueMarker.endIndex - peekIndex); + if (valueTid.variableLength) { + value = readLargeFlexIntOrFlexUIntAsBigInteger(length); + } else if (length < LONG_SIZE_IN_BYTES || !taglessType.isUnsigned) { + // Note: all fixed-width tagless signed ints fit in a Java long. + value = BigInteger.valueOf(readTaglessInt_1_1()); + } else { + value = readFixedUIntAsBigInteger_1_1(length); + } + return value; + } + /** * Reads a FixedInt value into a BigInteger. * @return the value. @@ -674,12 +748,35 @@ private Decimal readDecimal_1_1() { return Decimal.valueOf(coefficient, scale); } + /** + * Reads the tagless int bounded by 'valueMarker` into a long. + * @return the value. + */ + private long readTaglessInt_1_1() { + // TODO performance: the fixed width types all correspond to Java primitives and could therefore be read + // using ByteBuffer, possibly more quickly than using the following methods, especially if several in a row + // can be read without requiring the cursor's state to be modified before each one. + if (taglessType.isUnsigned) { + if (taglessType == PrimitiveType.FLEX_UINT) { + return readFlexUInt_1_1(); + } + return readFixedUInt_1_1(valueMarker.startIndex, valueMarker.endIndex); + } + if (taglessType == PrimitiveType.FLEX_INT) { + return readFlexInt_1_1(); + } + return readFixedInt_1_1(); + } + /** * Reads the FixedInt bounded by `valueMarker` into a `long`. * @return the value. */ private long readLong_1_1() { peekIndex = valueMarker.startIndex; + if (taglessType != null) { + return readTaglessInt_1_1(); + } return readFixedInt_1_1(); } @@ -689,6 +786,9 @@ private long readLong_1_1() { */ private BigInteger readBigInteger_1_1() { peekIndex = valueMarker.startIndex; + if (taglessType != null) { + return readTaglessIntAsBigInteger_1_1(); + } return readFixedIntAsBigInteger_1_1(); } @@ -708,11 +808,7 @@ private BigDecimal readTimestampFraction_1_1() { int scale = (int) readFlexUInt_1_1(); int length = (int) (valueMarker.endIndex - peekIndex); if (length >= LONG_SIZE_IN_BYTES) { - if (buffer[(int) valueMarker.endIndex - 1] < 0) { - // The most-significant bit is set; pad the length by one byte so that the value remains unsigned. - length += 1; - } - value = new BigDecimal(readLargeFixedIntOrFixedUIntAsBigInteger(length), scale); + value = new BigDecimal(readFixedUIntAsBigInteger_1_1(length), scale); } else if (length > 0) { value = BigDecimal.valueOf(readFixedUInt_1_1(peekIndex, valueMarker.endIndex), scale); } else { @@ -974,23 +1070,136 @@ void prepareScalar() { } } + /** + * Determines whether the tagless integer starting at `valueMarker.startIndex` and ending at `valueMarker.endIndex` + * crosses a type boundary. Callers must only invoke this method when the integer's size is known to be either + * 4 or 8 bytes. + * @return true if the value fits in the Java integer type that matches its Ion serialized size; false if it + * requires the next larger size. + */ + private boolean classifyFixedWidthTaglessInteger_1_1() { + if (!taglessType.isUnsigned || taglessType.typeID.variableLength) { + return true; + } + // UInt values with the most significant bit set will not fit in the signed Java primitive of the same width. + return buffer[(int) valueMarker.endIndex - 1] >= 0; + } + + /** + * Selects and returns the size of the current integer value from the given options. Callers must only invoke this + * method when the integer's size is known to be either 4 or 8 bytes, and it is the caller's responsibility to + * provide correct values to 'smaller' and 'larger'. + * @param smaller the smaller of the possible sizes. + * @param larger the larger of the possible sizes. + * @return the matching size. + */ + private IntegerSize classifyFixedWidthInteger(IntegerSize smaller, IntegerSize larger) { + if (minorVersion == 0) { + return classifyInteger_1_0() ? smaller : larger; + } + if (taglessType == null) { + return smaller; + } + return classifyFixedWidthTaglessInteger_1_1() ? smaller : larger; + } + + // The maximum most-significant byte of a positive 5-byte FlexUInt or FlexUInt value that can fit in + // a Java int. Integer.MAX_VALUE is 0x7FFFFFFF and a 5-byte Flex integer requires a right-shift of 5 bits. + // 0x0FFF... >> 5 == 0x007F..., so all less significant byte values are guaranteed to fit and therefore do not + // need to be examined individually. + private static final int MAX_POSITIVE_FLEX_MSB_JAVA_INT = 0x0F; + + // The maximum most-significant byte of a positive 10-byte FlexUInt or FlexUInt value that can fit in + // a Java long. Long.MAX_VALUE is 0x7FFFFFFFFFFFFFFF and a 10-byte Flex integer requires a right-shift of 10 bits. + // 0x01FFFF... >> 10 == 0x00007F..., so all less significant byte values are guaranteed to fit and therefore do not + // need to be examined individually. + private static final int MAX_POSITIVE_FLEX_MSB_JAVA_LONG = 0x01; + + // The minimum most-significant byte of a negative 5-byte FlexInt with that can fit in a Java int. + // Integer.MIN_VALUE is 0x80000000 and a 5-byte FlexInt requires a right-shift of 5 bits. + // (int)(0xF000... >> 5) == 0x80... Any bits set in the less significant bytes would lessen the magnitude + // and therefore do not need to be examined individually. + private static final int MIN_NEGATIVE_FLEX_MSB_JAVA_INT = (byte) 0xF0; + + // The minimum most-significant byte of a negative 10-byte FlexInt with that can fit in a Java long. + // Long.MIN_VALUE is 0x8000000000000000 and a 10-byte FlexInt requires a right-shift of 10 bits. + // (long) (0xFE0000... >> 10) == 0x80... Any bits set in the less significant bytes would lessen the magnitude + // and therefore do not need to be examined individually. + private static final int MIN_NEGATIVE_FLEX_MSB_JAVA_LONG = (byte) 0xFE; + + /** + * Classifies a 5- or 10-byte FlexInt or FlexUInt according the Java integer size required to represent it without + * data loss. + * @param maxPositiveMsb the maximum most-significant byte of a positive encoded integer that would allow the + * value to fit in the smaller of the two Java types applicable to the relevant boundary. + * @param minNegativeMsb the minimum most-significant byte of a negative encoded integer that would allow the + * value to fit in the smaller of the two Java types applicable to the relevant boundary. + * @return true if the encoded value fits in the smaller of the two Java types applicable to the relevant boundary; + * otherwise, false. + */ + private boolean classifyVariableWidthTaglessIntegerAtBoundary_1_1(int maxPositiveMsb, int minNegativeMsb) { + int mostSignificantByte = buffer[(int) valueMarker.endIndex - 1]; + if (taglessType.isUnsigned) { + return (mostSignificantByte & SINGLE_BYTE_MASK) <= maxPositiveMsb; + } + return mostSignificantByte >= minNegativeMsb && mostSignificantByte <= maxPositiveMsb; + } + + /** + * Classifies the current variable-length integer (FlexInt or FlexUInt) according to the IntegerSize required to + * represent it without data loss. For efficiency, does not attempt to find the smallest-possible size for + * overpadded representations. + * @param length the byte length of the FlexInt or FlexUInt to classify. + * @return an IntegerSize capable of holding the value without data loss. + */ + private IntegerSize classifyVariableWidthTaglessInteger_1_1(int length) { + if (length < 5) { + // Flex integers of less than 5 bytes cannot hit the Java int boundaries. + return IntegerSize.INT; + } + if (length == 5) { + return classifyVariableWidthTaglessIntegerAtBoundary_1_1(MAX_POSITIVE_FLEX_MSB_JAVA_INT, MIN_NEGATIVE_FLEX_MSB_JAVA_INT) + ? IntegerSize.INT + : IntegerSize.LONG; + } + if (length < 10) { + // Flex integers of less than 10 bytes cannot hit the Java long boundaries. + return IntegerSize.LONG; + } + if (length == 10) { + return classifyVariableWidthTaglessIntegerAtBoundary_1_1(MAX_POSITIVE_FLEX_MSB_JAVA_LONG, MIN_NEGATIVE_FLEX_MSB_JAVA_LONG) + ? IntegerSize.LONG + : IntegerSize.BIG_INTEGER; + } + return IntegerSize.BIG_INTEGER; + } + @Override public IntegerSize getIntegerSize() { if (valueTid == null || valueTid.type != IonType.INT || valueTid.isNull) { return null; } prepareScalar(); - int length = valueTid.variableLength ? ((int) (valueMarker.endIndex - valueMarker.startIndex)) : valueTid.length; + int length; + if (valueTid.variableLength) { + length = (int) (valueMarker.endIndex - valueMarker.startIndex); + if (taglessType != null) { + // FlexUInt or FlexInt + return classifyVariableWidthTaglessInteger_1_1(length); + } + } else { + length = valueTid.length; + } if (length < 0) { return IntegerSize.BIG_INTEGER; } else if (length < INT_SIZE_IN_BYTES) { return IntegerSize.INT; } else if (length == INT_SIZE_IN_BYTES) { - return (minorVersion != 0 || classifyInteger_1_0()) ? IntegerSize.INT : IntegerSize.LONG; + return classifyFixedWidthInteger(IntegerSize.INT, IntegerSize.LONG); } else if (length < LONG_SIZE_IN_BYTES) { return IntegerSize.LONG; } else if (length == LONG_SIZE_IN_BYTES) { - return (minorVersion != 0 || classifyInteger_1_0()) ? IntegerSize.LONG : IntegerSize.BIG_INTEGER; + return classifyFixedWidthInteger(IntegerSize.LONG, IntegerSize.BIG_INTEGER); } return IntegerSize.BIG_INTEGER; } @@ -1324,6 +1533,12 @@ public int symbolValueId() { if (minorVersion == 0) { return (int) readUInt(valueMarker.startIndex, valueMarker.endIndex); } else { + if (taglessType != null) { + // It is the caller's responsibility to call 'symbolValueId()' only when 'hasSymbolText()' is false, + // meaning that the tagless FlexSym is encoded as a FlexInt representing a symbol ID. + peekIndex = valueMarker.startIndex; + return (int) readFlexInt_1_1(); + } if (valueTid.length == 1){ return (int) readFixedUInt_1_1(valueMarker.startIndex, valueMarker.endIndex); } else if (valueTid.length == 2){ diff --git a/src/test/java/com/amazon/ion/impl/IonCursorBinaryTest.java b/src/test/java/com/amazon/ion/impl/IonCursorBinaryTest.java index 18437f2fc..7504fc72c 100644 --- a/src/test/java/com/amazon/ion/impl/IonCursorBinaryTest.java +++ b/src/test/java/com/amazon/ion/impl/IonCursorBinaryTest.java @@ -827,7 +827,7 @@ private static ExpectationProvider macroInvocation(int id) { * Provides Expectations that advance the reader to the next value and verify that it is a macro invocation with * the given ID. */ - private static ExpectationProvider nextMacroInvocation(int id) { + static ExpectationProvider nextMacroInvocation(int id) { return consumer -> consumer.accept(new Expectation<>( String.format("next macro invocation %d", id), cursor -> { @@ -939,9 +939,9 @@ public void taglessCompactSymbols(boolean constructFromBytes) throws Exception { assertSequence( cursor, nextMacroInvocation(0), - nextTaglessValue(IonCursorBinary.PrimitiveType.COMPACT_SYMBOL, IonType.SYMBOL, 5, 10), + nextTaglessValue(IonCursorBinary.PrimitiveType.COMPACT_SYMBOL, IonType.SYMBOL, 6, 10), nextTaglessValue(IonCursorBinary.PrimitiveType.COMPACT_SYMBOL, IonType.SYMBOL, 10, 11), - nextTaglessValue(IonCursorBinary.PrimitiveType.COMPACT_SYMBOL, IonType.SYMBOL, 11, 13), + nextTaglessValue(IonCursorBinary.PrimitiveType.COMPACT_SYMBOL, IonType.SYMBOL, 13, 13), endStream() ); } @@ -971,7 +971,7 @@ public void taglessValuesInterspersedWithTaggedValues(boolean constructFromBytes nextTaggedValue(IonType.INT, 7, 7), nextTaglessValue(IonCursorBinary.PrimitiveType.FLOAT32, IonType.FLOAT, 7, 11), nextTaggedValue(IonType.FLOAT, 12, 16), - nextTaglessValue(IonCursorBinary.PrimitiveType.COMPACT_SYMBOL, IonType.SYMBOL, 16, 21), + nextTaglessValue(IonCursorBinary.PrimitiveType.COMPACT_SYMBOL, IonType.SYMBOL, 17, 21), nextTaggedValue(IonType.SYMBOL, 22, 26), endStream() ); @@ -990,7 +990,7 @@ public void fillTaglessValuesInterspersedWithTaggedValues(boolean constructFromB fillScalar(7, 7), type(IonType.INT), fillNextTaglessValue(IonCursorBinary.PrimitiveType.FLOAT32, IonType.FLOAT, 7, 11), fillScalar(12, 16), type(IonType.FLOAT), - fillNextTaglessValue(IonCursorBinary.PrimitiveType.COMPACT_SYMBOL, IonType.SYMBOL, 16, 21), + fillNextTaglessValue(IonCursorBinary.PrimitiveType.COMPACT_SYMBOL, IonType.SYMBOL, 17, 21), fillScalar(22, 26), type(IonType.SYMBOL), endStream() ); @@ -1103,11 +1103,11 @@ public void fillTaglessValuesInterspersedWithTaggedValuesIncremental() throws Ex ), instruction( cursor -> cursor.nextTaglessValue(IonCursorBinary.PrimitiveType.COMPACT_SYMBOL), - valueMarker(IonType.SYMBOL, 16, 21) + valueMarker(IonType.SYMBOL, 17, 21) ), instruction( IonCursorBinary::fillValue, - valueReady(IonType.SYMBOL, 16, 21) + valueReady(IonType.SYMBOL, 17, 21) ), instruction( IonCursorBinary::nextValue, @@ -1150,8 +1150,8 @@ public void skipTaglessValuesInterspersedWithTaggedValuesIncremental() throws Ex ), instruction( cursor -> cursor.nextTaglessValue(IonCursorBinary.PrimitiveType.COMPACT_SYMBOL), - // All five bytes are skipped. - valueMarker(IonType.SYMBOL, 7, 12) + // All four bytes are skipped. + valueMarker(IonType.SYMBOL, 8, 12) ), instruction( IonCursorBinary::nextValue, diff --git a/src/test/java/com/amazon/ion/impl/IonCursorTestUtilities.java b/src/test/java/com/amazon/ion/impl/IonCursorTestUtilities.java index 09b7e0105..6e6443fc0 100644 --- a/src/test/java/com/amazon/ion/impl/IonCursorTestUtilities.java +++ b/src/test/java/com/amazon/ion/impl/IonCursorTestUtilities.java @@ -2,9 +2,11 @@ // SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl; +import com.amazon.ion.IntegerSize; import com.amazon.ion.IonBufferConfiguration; import com.amazon.ion.IonType; +import java.math.BigInteger; import java.util.ArrayList; import java.util.List; import java.util.function.Consumer; @@ -16,6 +18,7 @@ import static com.amazon.ion.IonCursor.Event.START_SCALAR; import static com.amazon.ion.IonCursor.Event.VALUE_READY; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; public class IonCursorTestUtilities { @@ -223,6 +226,49 @@ static ExpectationProvider fillSym )); } + static ExpectationProvider integerSize(IntegerSize expectedSize) { + return consumer -> consumer.accept(new Expectation<>( + String.format("integerSize(%s)", expectedSize), + reader -> { + assertEquals(expectedSize, reader.getIntegerSize()); + } + )); + } + + static ExpectationProvider intValue(int expectedValue) { + return consumer -> consumer.accept(new Expectation<>( + String.format("int(%d)", expectedValue), + reader -> { + assertEquals(IntegerSize.INT, reader.getIntegerSize()); + assertEquals(expectedValue, reader.intValue()); + } + )); + } + + static ExpectationProvider longValue(long expectedValue) { + return consumer -> consumer.accept(new Expectation<>( + String.format("long(%d)", expectedValue), + reader -> { + assertTrue(reader.getIntegerSize().ordinal() <= IntegerSize.LONG.ordinal()); + assertEquals(expectedValue, reader.longValue()); + } + )); + } + + static ExpectationProvider bigIntegerValue(BigInteger expectedValue) { + return consumer -> consumer.accept(new Expectation<>( + String.format("bigInteger(%s)", expectedValue), + reader -> assertEquals(expectedValue, reader.bigIntegerValue()) + )); + } + + static ExpectationProvider doubleValue(double expectedValue) { + return consumer -> consumer.accept(new Expectation<>( + String.format("double(%f)", expectedValue), + reader -> assertEquals(expectedValue, reader.doubleValue(), 1e-9) + )); + } + /** * Provides an Expectation that verifies that advancing the cursor positions it on a container value, without * filling that container. diff --git a/src/test/java/com/amazon/ion/impl/IonReaderContinuableCoreBinaryTest.java b/src/test/java/com/amazon/ion/impl/IonReaderContinuableCoreBinaryTest.java index f0e1d2c1e..c831fe672 100644 --- a/src/test/java/com/amazon/ion/impl/IonReaderContinuableCoreBinaryTest.java +++ b/src/test/java/com/amazon/ion/impl/IonReaderContinuableCoreBinaryTest.java @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl; +import com.amazon.ion.IntegerSize; import com.amazon.ion.IonCursor; import com.amazon.ion.IonException; import com.amazon.ion.IonType; @@ -12,13 +13,27 @@ import org.junit.jupiter.params.provider.ValueSource; import java.io.ByteArrayInputStream; +import java.math.BigInteger; import static com.amazon.ion.BitUtils.bytes; +import static com.amazon.ion.IonCursor.Event.START_SCALAR; +import static com.amazon.ion.IonCursor.Event.VALUE_READY; +import static com.amazon.ion.TestUtils.withIvm; +import static com.amazon.ion.impl.IonCursorBinary.PrimitiveType.FLEX_INT; +import static com.amazon.ion.impl.IonCursorBinary.PrimitiveType.FLEX_UINT; +import static com.amazon.ion.impl.IonCursorBinary.PrimitiveType.INT16; +import static com.amazon.ion.impl.IonCursorBinary.PrimitiveType.INT32; +import static com.amazon.ion.impl.IonCursorBinary.PrimitiveType.INT64; +import static com.amazon.ion.impl.IonCursorBinary.PrimitiveType.UINT32; +import static com.amazon.ion.impl.IonCursorBinary.PrimitiveType.UINT64; +import static com.amazon.ion.impl.IonCursorBinary.PrimitiveType.UINT8; +import static com.amazon.ion.impl.IonCursorBinaryTest.nextMacroInvocation; import static com.amazon.ion.impl.IonCursorTestUtilities.STANDARD_BUFFER_CONFIGURATION; import static com.amazon.ion.impl.IonCursorTestUtilities.Expectation; import static com.amazon.ion.impl.IonCursorTestUtilities.ExpectationProvider; import static com.amazon.ion.impl.IonCursorTestUtilities.assertSequence; import static com.amazon.ion.impl.IonCursorTestUtilities.container; +import static com.amazon.ion.impl.IonCursorTestUtilities.doubleValue; import static com.amazon.ion.impl.IonCursorTestUtilities.endContainer; import static com.amazon.ion.impl.IonCursorTestUtilities.endStream; import static com.amazon.ion.impl.IonCursorTestUtilities.fillContainer; @@ -28,8 +43,10 @@ import static com.amazon.ion.impl.IonCursorTestUtilities.scalar; import static com.amazon.ion.impl.IonCursorTestUtilities.startContainer; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; public class IonReaderContinuableCoreBinaryTest { @@ -669,4 +686,297 @@ public void expectIncompleteAnnotationHeaderToFailCleanly() { assertThrows(IonException.class, reader::nextValue); reader.close(); } + + /** + * Provides Expectations that advance the reader to the next tagless value, fill the value, and verify that it has + * the given attributes. + */ + private static ExpectationProvider fillNextTaglessValue(IonCursorBinary.PrimitiveType primitiveType, IonType expectedType) { + return consumer -> consumer.accept(new Expectation<>( + String.format("fill tagless %s", primitiveType.name()), + reader -> { + assertEquals(START_SCALAR, reader.nextTaglessValue(primitiveType)); + assertEquals(VALUE_READY, reader.fillValue()); + assertEquals(expectedType, reader.getType()); + } + )); + } + + /** + * Provides Expectations that advance the reader to the next tagless value, fill the value, and verify that it is + * an integer that fits in a Java int with the expected value. + */ + private static ExpectationProvider nextTaglessIntValue(IonCursorBinary.PrimitiveType primitiveType, int expectedValue) { + return consumer -> consumer.accept(new Expectation<>( + String.format("fill tagless int from %s", primitiveType.name()), + reader -> { + assertEquals(START_SCALAR, reader.nextTaglessValue(primitiveType)); + assertEquals(VALUE_READY, reader.fillValue()); + assertEquals(IonType.INT, reader.getType()); + assertEquals(IntegerSize.INT, reader.getIntegerSize()); + assertEquals(expectedValue, reader.intValue()); + } + )); + } + + /** + * Provides Expectations that advance the reader to the next tagless value, fill the value, and verify that it is + * an integer that fits in a Java long with the expected value. + */ + private static ExpectationProvider nextTaglessLongValue(IonCursorBinary.PrimitiveType primitiveType, long expectedValue) { + return consumer -> consumer.accept(new Expectation<>( + String.format("fill tagless long from %s", primitiveType.name()), + reader -> { + assertEquals(START_SCALAR, reader.nextTaglessValue(primitiveType)); + assertEquals(VALUE_READY, reader.fillValue()); + assertEquals(IonType.INT, reader.getType()); + assertEquals(IntegerSize.LONG, reader.getIntegerSize()); + assertEquals(expectedValue, reader.longValue()); + } + )); + } + + /** + * Provides Expectations that advance the reader to the next tagless value, fill the value, and verify that it is + * an integer that fits in a BigInteger with the expected value. + */ + private static ExpectationProvider nextTaglessBigIntegerValue(IonCursorBinary.PrimitiveType primitiveType, BigInteger expectedValue) { + return consumer -> consumer.accept(new Expectation<>( + String.format("fill tagless BigInteger from %s", primitiveType.name()), + reader -> { + assertEquals(START_SCALAR, reader.nextTaglessValue(primitiveType)); + assertEquals(VALUE_READY, reader.fillValue()); + assertEquals(IonType.INT, reader.getType()); + assertEquals(IntegerSize.BIG_INTEGER, reader.getIntegerSize()); + assertEquals(expectedValue, reader.bigIntegerValue()); + } + )); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void taglessInts(boolean constructFromBytes) throws Exception { + byte[] data = withIvm(1, bytes( + 0x00, // User macro ID 0 + 0xFF, // Interpreted as uint8 + 0xFF, 0xFF, // Interpreted as int16 + 0xFF, 0xFF, 0xFF, 0xFF, // Interpreted as uint32 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // Interpreted as int64 + 0xFC, 0xFF, 0xFF, // Interpreted as flex_uint + 0xFC, 0xFF, 0xFF // Interpreted as flex_int + )); + try (IonReaderContinuableCoreBinary reader = initializeReader(constructFromBytes, data)) { + assertSequence( + reader, + nextMacroInvocation(0), + nextTaglessIntValue(UINT8, 0xFF), + nextTaglessIntValue(INT16, -1), + nextTaglessLongValue(UINT32, 0xFFFFFFFFL), + nextTaglessLongValue(INT64, -1), + nextTaglessIntValue(FLEX_UINT, 0x1FFFFF), + nextTaglessIntValue(FLEX_INT, -1), + endStream() + ); + } + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void taglessFixedIntBoundaries(boolean constructFromBytes) throws Exception { + byte[] data = withIvm(1, bytes( + 0x00, // User macro ID 0 + 0xFF, 0xFF, 0xFF, 0x7F, // Interpreted as uint32 -- this is Integer.MAX_VALUE + 0xFF, 0xFF, 0xFF, 0x7F, // Interpreted as int32 -- this is Integer.MAX_VALUE + 0x00, 0x00, 0x00, 0x80, // Interpreted as uint32 -- this won't fit in a Java int, which is signed + 0x00, 0x00, 0x00, 0x80, // Interpreted as int32 -- this is Integer.MIN_VALUE + 0xFF, 0xFF, 0xFF, 0xFF, // Interpreted as uint32 -- this won't fit in a Java int + 0xFF, 0xFF, 0xFF, 0xFF // Interpreted as int32 -- this is -1 + )); + try (IonReaderContinuableCoreBinary reader = initializeReader(constructFromBytes, data)) { + assertSequence( + reader, + nextMacroInvocation(0), + nextTaglessIntValue(UINT32, Integer.MAX_VALUE), + nextTaglessIntValue(INT32, Integer.MAX_VALUE), + nextTaglessLongValue(UINT32, 0x80000000L), + nextTaglessIntValue(INT32, Integer.MIN_VALUE), + nextTaglessLongValue(UINT32, 0xFFFFFFFFL), + nextTaglessIntValue(INT32, -1), + endStream() + ); + } + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void taglessFixedLongBoundaries(boolean constructFromBytes) throws Exception { + byte[] data = withIvm(1, bytes( + 0x00, // User macro ID 0 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, // Interpreted as uint64 -- this is Long.MAX_VALUE + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, // Interpreted as int64 -- this is Long.MAX_VALUE + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, // Interpreted as uint64 -- this won't fit in a Java long + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, // Interpreted as int64 -- this is Long.MIN_VALUE + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // Interpreted as uint64 -- this won't fit in a Java long + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF // Interpreted as int64 -- this is -1 + )); + try (IonReaderContinuableCoreBinary reader = initializeReader(constructFromBytes, data)) { + assertSequence( + reader, + nextMacroInvocation(0), + nextTaglessLongValue(UINT64, Long.MAX_VALUE), + nextTaglessLongValue(INT64, Long.MAX_VALUE), + nextTaglessBigIntegerValue(UINT64, BigInteger.valueOf(Long.MAX_VALUE).add(BigInteger.ONE)), + nextTaglessLongValue(INT64, Long.MIN_VALUE), + nextTaglessBigIntegerValue(UINT64, BigInteger.valueOf(Long.MAX_VALUE).shiftLeft(1).add(BigInteger.ONE)), + nextTaglessLongValue(INT64, -1), + endStream() + ); + } + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void taglessFlexIntBoundaries(boolean constructFromBytes) throws Exception { + byte[] data = withIvm(1, bytes( + 0x00, // User macro ID 0 + 0xF0, 0xFF, 0xFF, 0xFF, 0x0F, // 31 set bits. As flex_uint this is Integer.MAX_VALUE + 0xF0, 0xFF, 0xFF, 0xFF, 0x0F, // 31 set bits. As flex_int this is Integer.MAX_VALUE + 0x10, 0x00, 0x00, 0x00, 0x10, // Bit 31 set. As a flex_uint this is Integer.MAX_VALUE + 1 + 0x10, 0x00, 0x00, 0x00, 0x10, // Bit 31 set (sign not extended). As flex_int this is Integer.MAX_VALUE + 1 + 0xF0, 0xFF, 0xFF, 0xFF, 0x1F, // 32 set bits. As flex_uint this is (Integer.MAX_VALUE << 1) + 1 + 0xF0, 0xFF, 0xFF, 0xFF, 0x1F, // 32 set bits. As flex_int this is (Integer.MAX_VALUE << 1) + 1 + 0x10, 0x00, 0x00, 0x00, 0xF0, // Bits 31+ set. As flex_uint this won't fit in an int + 0x10, 0x00, 0x00, 0x00, 0xF0, // Bits 31+ set (sign extended). As flex_int this is Integer.MIN_VALUE + 0xF0, 0xFF, 0xFF, 0xFF, 0xEF, // All bits except bit 31 set. As flex_uint this won't fit in an int + 0xF0, 0xFF, 0xFF, 0xFF, 0xEF, // All bits except bit 31 set (sign extended). As flex_int this is Integer.MIN_VALUE - 1 + 0xF0, 0xFF, 0xFF, 0xFF, 0xFF, // All bits set. As flex_uint this won't fit in a Java int + 0xF0, 0xFF, 0xFF, 0xFF, 0xFF // All bits set. As flex_int this is -1 + )); + try (IonReaderContinuableCoreBinary reader = initializeReader(constructFromBytes, data)) { + assertSequence( + reader, + nextMacroInvocation(0), + nextTaglessIntValue(FLEX_UINT, Integer.MAX_VALUE), + nextTaglessIntValue(FLEX_INT, Integer.MAX_VALUE), + nextTaglessLongValue(FLEX_UINT, Integer.MAX_VALUE + 1L), + nextTaglessLongValue(FLEX_INT, Integer.MAX_VALUE + 1L), + nextTaglessLongValue(FLEX_UINT, 0xFFFFFFFFL), + nextTaglessLongValue(FLEX_INT, 0xFFFFFFFFL), + nextTaglessLongValue(FLEX_UINT, 0x780000000L), // 0xF000... >> 5 == 0x780... + nextTaglessIntValue(FLEX_INT, Integer.MIN_VALUE), + nextTaglessLongValue(FLEX_UINT, 0x77FFFFFFFL), // 0xEFFF... >> 5 == 0x77F... + nextTaglessLongValue(FLEX_INT, Integer.MIN_VALUE - 1L), + nextTaglessLongValue(FLEX_UINT, 0x7FFFFFFFFL), // 0xFFFF... >> 5 == 0x7FF... + nextTaglessIntValue(FLEX_INT, -1), + endStream() + ); + } + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void taglessFlexLongBoundaries(boolean constructFromBytes) throws Exception { + byte[] data = withIvm(1, bytes( + 0x00, // User macro ID 0 + 0x00, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01, // 63 set bits. As flex_uint this is Long.MAX_VALUE + 0x00, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01, // 63 set bits. As flex_int this is Long.MAX_VALUE + 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, // Bit 63 set. As a flex_uint this is Long.MAX_VALUE + 1 + 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, // Bit 63 set (sign not extended). As flex_int this is Long.MAX_VALUE + 1 + 0x00, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x03, // 64 set bits. As flex_uint this is (Long.MAX_VALUE << 1) + 1 + 0x00, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x03, // 64 set bits. As flex_int this is (Long.MAX_VALUE << 1) + 1 + 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFE, // Bits 63+ set. As flex_uint this won't fit in a long + 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFE, // Bits 63+ set (sign extended). As flex_int this is Long.MIN_VALUE + 0x00, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFD, // All bits except bit 63 set. As flex_uint this won't fit in a long + 0x00, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFD, // All bits except bit 63 set (sign extended). As flex_int this is Long.MIN_VALUE - 1 + 0x00, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // All bits set. As flex_uint this won't fit in a Java long + 0x00, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF // All bits set. As flex_int this is -1 + )); + try (IonReaderContinuableCoreBinary reader = initializeReader(constructFromBytes, data)) { + assertSequence( + reader, + nextMacroInvocation(0), + nextTaglessLongValue(FLEX_UINT, Long.MAX_VALUE), + nextTaglessLongValue(FLEX_INT, Long.MAX_VALUE), + nextTaglessBigIntegerValue(FLEX_UINT, BigInteger.valueOf(Long.MAX_VALUE).add(BigInteger.ONE)), + nextTaglessBigIntegerValue(FLEX_INT, BigInteger.valueOf(Long.MAX_VALUE).add(BigInteger.ONE)), + nextTaglessBigIntegerValue(FLEX_UINT, BigInteger.valueOf(Long.MAX_VALUE).shiftLeft(1).add(BigInteger.ONE)), + nextTaglessBigIntegerValue(FLEX_INT, BigInteger.valueOf(Long.MAX_VALUE).shiftLeft(1).add(BigInteger.ONE)), + nextTaglessBigIntegerValue(FLEX_UINT, BigInteger.valueOf(0x3F80000000000000L).shiftLeft(8)), // 0xFE00... >>> 2 == 0x3F80... + nextTaglessLongValue(FLEX_INT, Long.MIN_VALUE), + nextTaglessBigIntegerValue(FLEX_UINT, BigInteger.valueOf(0x3F7FFFFFFFFFFFFFL).shiftLeft(8).or(BigInteger.valueOf(0xFF))), // 0xFDFF... >>> 2 == 0x3F7F... + nextTaglessBigIntegerValue(FLEX_INT, BigInteger.valueOf(Long.MIN_VALUE).subtract(BigInteger.ONE)), + nextTaglessBigIntegerValue(FLEX_UINT, BigInteger.valueOf(0x3FFFFFFFFFFFFFFFL).shiftLeft(8).or(BigInteger.valueOf(0xFF))), // 0xFF... >>> 2 == 0x3F... + nextTaglessLongValue(FLEX_INT, -1), + endStream() + ); + } + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void taglessFloats(boolean constructFromBytes) throws Exception { + byte[] data = withIvm(1, bytes( + 0x00, // User macro ID 0 + 0x00, 0x3C, // Interpreted as float16 (1.0) + 0x00, 0x00, 0x80, 0x3F, // Interpreted as float32 (1.0) + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF0, 0x3F // Interpreted as float64 (1.0) + )); + try (IonReaderContinuableCoreBinary reader = initializeReader(constructFromBytes, data)) { + assertSequence( + reader, + nextMacroInvocation(0), + fillNextTaglessValue(IonCursorBinary.PrimitiveType.FLOAT16, IonType.FLOAT), + doubleValue(1.0), + fillNextTaglessValue(IonCursorBinary.PrimitiveType.FLOAT32, IonType.FLOAT), + doubleValue(1.0), + fillNextTaglessValue(IonCursorBinary.PrimitiveType.FLOAT64, IonType.FLOAT), + doubleValue(1.0), + endStream() + ); + } + } + + static ExpectationProvider symbolValue(String expectedText) { + return consumer -> consumer.accept(new Expectation<>( + String.format("symbol(%s)", expectedText), + reader -> { + assertTrue(reader.hasSymbolText()); + assertEquals(expectedText, reader.getSymbolText()); + } + )); + } + + static ExpectationProvider symbolValue(int expectedSid) { + return consumer -> consumer.accept(new Expectation<>( + String.format("symbol(%d)", expectedSid), + reader -> { + assertFalse(reader.hasSymbolText()); + assertEquals(expectedSid, reader.symbolValueId()); + } + )); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void taglessCompactSymbols(boolean constructFromBytes) throws Exception { + byte[] data = withIvm(1, bytes( + 0x00, // User macro ID 0 + 0xF9, 0x6E, 0x61, 0x6D, 0x65, // interpreted as compact symbol (FlexSym with inline text "name") + 0x09, // interpreted as compact symbol (FlexSym with SID 4) + 0x01, 0x90 // interpreted as compact symbol (special FlexSym) + )); + try (IonReaderContinuableCoreBinary reader = initializeReader(constructFromBytes, data)) { + assertSequence( + reader, + nextMacroInvocation(0), + fillNextTaglessValue(IonCursorBinary.PrimitiveType.COMPACT_SYMBOL, IonType.SYMBOL), + symbolValue("name"), + fillNextTaglessValue(IonCursorBinary.PrimitiveType.COMPACT_SYMBOL, IonType.SYMBOL), + symbolValue(4), + fillNextTaglessValue(IonCursorBinary.PrimitiveType.COMPACT_SYMBOL, IonType.SYMBOL), + symbolValue(""), + endStream() + ); + } + } } diff --git a/src/test/java/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java b/src/test/java/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java index e9a6784e3..9759af8c8 100644 --- a/src/test/java/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java +++ b/src/test/java/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java @@ -4,7 +4,6 @@ import com.amazon.ion.BufferConfiguration; import com.amazon.ion.Decimal; -import com.amazon.ion.IntegerSize; import com.amazon.ion.IonBufferConfiguration; import com.amazon.ion.IonDatagram; import com.amazon.ion.IonException; @@ -62,6 +61,10 @@ import static com.amazon.ion.TestUtils.*; import static com.amazon.ion.impl.IonCursorTestUtilities.Expectation; import static com.amazon.ion.impl.IonCursorTestUtilities.ExpectationProvider; +import static com.amazon.ion.impl.IonCursorTestUtilities.bigIntegerValue; +import static com.amazon.ion.impl.IonCursorTestUtilities.doubleValue; +import static com.amazon.ion.impl.IonCursorTestUtilities.intValue; +import static com.amazon.ion.impl.IonCursorTestUtilities.longValue; import static com.amazon.ion.impl.IonCursorTestUtilities.type; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -381,40 +384,6 @@ static ExpectationProvider booleanValue(bool )); } - static ExpectationProvider intValue(int expectedValue) { - return consumer -> consumer.accept(new Expectation<>( - String.format("int(%d)", expectedValue), - reader -> { - assertEquals(IntegerSize.INT, reader.getIntegerSize()); - assertEquals(expectedValue, reader.intValue()); - } - )); - } - - static ExpectationProvider longValue(long expectedValue) { - return consumer -> consumer.accept(new Expectation<>( - String.format("long(%d)", expectedValue), - reader -> { - assertTrue(reader.getIntegerSize().ordinal() <= IntegerSize.LONG.ordinal()); - assertEquals(expectedValue, reader.longValue()); - } - )); - } - - static ExpectationProvider bigIntegerValue(BigInteger expectedValue) { - return consumer -> consumer.accept(new Expectation<>( - String.format("bigInteger(%s)", expectedValue), - reader -> assertEquals(expectedValue, reader.bigIntegerValue()) - )); - } - - static ExpectationProvider doubleValue(double expectedValue) { - return consumer -> consumer.accept(new Expectation<>( - String.format("double(%f)", expectedValue), - reader -> assertEquals(expectedValue, reader.doubleValue(), 1e-9) - )); - } - static ExpectationProvider decimalValue(BigDecimal expectedValue) { return consumer -> consumer.accept(new Expectation<>( String.format("decimal(%s)", expectedValue), @@ -1825,7 +1794,7 @@ private final void readIntsIntoOtherType( public void doubleValueOnInt(boolean constructFromBytes) throws Exception { readIntsIntoOtherType( constructFromBytes, - IonReaderContinuableTopLevelBinaryTest::doubleValue, + IonCursorTestUtilities::doubleValue, 0.0, 1.0, -1.0,