Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds support for reading binary tagless types. #889

Merged
merged 2 commits into from
Jun 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
280 changes: 265 additions & 15 deletions src/main/java/com/amazon/ion/impl/IonCursorBinary.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import static com.amazon.ion.impl.IonTypeID.SYSTEM_SYMBOL_VALUE;
import static com.amazon.ion.impl.IonTypeID.TWO_ANNOTATION_FLEX_SYMS_LOWER_NIBBLE_1_1;
import static com.amazon.ion.impl.IonTypeID.TWO_ANNOTATION_SIDS_LOWER_NIBBLE_1_1;
import static com.amazon.ion.impl.IonTypeID.TYPE_IDS_1_1;
import static com.amazon.ion.util.IonStreamUtils.throwAsIonException;

/**
Expand Down Expand Up @@ -349,6 +350,11 @@ private static class RefillableState {
*/
private boolean isSystemInvocation = false;

/**
* The type of the current value, if tagless. Otherwise, null.
*/
PrimitiveType taglessType = null;

/**
* @return the given configuration's DataHandler, or null if that DataHandler is a no-op.
*/
Expand Down Expand Up @@ -822,6 +828,8 @@ private boolean slowSeek(long numberOfBytes) {
} while (shortfall > 0 && skipped > 0);
if (shortfall <= 0) {
refillableState.bytesRequested = 0;
// The value has been entirely skipped, so its endIndex is now the buffer's limit.
valueMarker.endIndex = limit;
refillableState.state = State.READY;
return false;
}
Expand Down Expand Up @@ -1230,12 +1238,12 @@ private boolean uncheckedReadAnnotationWrapperHeader_1_1(IonTypeID valueTid) {
private boolean slowSkipNonPrefixedAnnotations_1_1(IonTypeID valueTid) {
if (valueTid.isInlineable) {
// Opcodes 0xE7 (one annotation FlexSym) and 0xE8 (two annotation FlexSyms)
if (slowSkipFlexSym_1_1()) {
if (slowSkipFlexSym_1_1(null) == FlexSymType.INCOMPLETE) {
return true;
}
if (valueTid.lowerNibble == TWO_ANNOTATION_FLEX_SYMS_LOWER_NIBBLE_1_1) {
// Opcode 0xE8 (two annotation FlexSyms)
return slowSkipFlexSym_1_1();
return slowSkipFlexSym_1_1(null) == FlexSymType.INCOMPLETE;
}
} else {
// Opcodes 0xE4 (one annotation SID) and 0xE5 (two annotation SIDs)
Expand Down Expand Up @@ -1542,27 +1550,127 @@ private long slowReadFlexSym_1_1(Marker markerToSet) {
return result;
}

/**
* FlexSym encoding types.
*/
private enum FlexSymType {
INCOMPLETE {
@Override
IonTypeID typeIdFor(int length) {
throw new IllegalStateException("The FlexSym is incomplete.");
}
},
INLINE_TEXT {
@Override
IonTypeID typeIdFor(int length) {
if (length <= 0xF) {
return TYPE_IDS_1_1[0xA0 | length];
}
return TYPE_IDS_1_1[OpCodes.VARIABLE_LENGTH_INLINE_SYMBOL & SINGLE_BYTE_MASK];
}
},
SYMBOL_ID {
@Override
IonTypeID typeIdFor(int length) {
if (length == 0) {
return TYPE_IDS_1_1[OpCodes.SYMBOL_ADDRESS_1_BYTE & SINGLE_BYTE_MASK];
}
if (length < 3) {
return TYPE_IDS_1_1[0xE0 | length];
}
return TYPE_IDS_1_1[OpCodes.SYMBOL_ADDRESS_MANY_BYTES & SINGLE_BYTE_MASK];
}
},
STRUCT_END {
@Override
IonTypeID typeIdFor(int length) {
throw new IllegalStateException("The special struct end FlexSym is not associated with a type ID.");
}
};

/**
* Classifies a special FlexSym (beginning with FlexInt zero) based on the byte that follows.
* @param specialByte the byte that followed FlexInt zero.
* @return the FlexSymType that corresponds to the given special byte.
*/
static FlexSymType classifySpecialFlexSym(int specialByte) {
if (specialByte < 0) {
return FlexSymType.INCOMPLETE;
}
if ((byte) specialByte == OpCodes.INLINE_SYMBOL_ZERO_LENGTH) {
return FlexSymType.SYMBOL_ID;
}
if ((byte) specialByte == OpCodes.STRING_ZERO_LENGTH) {
return FlexSymType.INLINE_TEXT;
}
if ((byte) specialByte == OpCodes.DELIMITED_END_MARKER) {
return FlexSymType.STRUCT_END;
}
throw new IonException("FlexSyms may only wrap symbol zero, empty string, or delimited end.");
}

/**
* Gets the most appropriate IonTypeID for a FlexSym of this type and the given length.
* @param length the length of the FlexSym.
* @return an Ion 1.1 IonTypeID with appropriate values for 'length' and 'isInlineable'.
*/
abstract IonTypeID typeIdFor(int length);
}

/**
* Skips a FlexSym. After this method returns, `peekIndex` points to the first byte after the end of the FlexSym.
* @param markerToSet the method returns `INLINE_TEXT, will have `startIndex` and `endIndex` set to the bounds of
* the inline UTF-8 byte sequence.
* @return the type of FlexSym that was skipped.
*/
private FlexSymType uncheckedSkipFlexSym_1_1(Marker markerToSet) {
long result = uncheckedReadFlexInt_1_1();
if (result == 0) {
markerToSet.startIndex = peekIndex + 1;
markerToSet.endIndex = markerToSet.startIndex;
return FlexSymType.classifySpecialFlexSym(buffer[(int) peekIndex++] & SINGLE_BYTE_MASK);
} else if (result < 0) {
markerToSet.startIndex = peekIndex;
markerToSet.endIndex = peekIndex - result;
peekIndex = markerToSet.endIndex;
return FlexSymType.INLINE_TEXT;
}
return FlexSymType.SYMBOL_ID;
}

/**
* Skips a FlexSym, ensuring enough space is available in the buffer. After this method returns, `peekIndex` points
* to the first byte after the end of the FlexSym.
* @return true if there are not enough bytes in the stream to complete the FlexSym; otherwise, false.
* @param markerToSet if non-null and the method returns `INLINE_TEXT`, will have `startIndex` and `endIndex` set
* to the bounds of the inline UTF-8 byte sequence.
* @return INCOMPLETE if there are not enough bytes in the stream to complete the FlexSym; otherwise, the type
* of FlexSym that was skipped.
*/
private boolean slowSkipFlexSym_1_1() {
long result = slowReadFlexInt_1_1();
private FlexSymType slowSkipFlexSym_1_1(Marker markerToSet) {
long result = slowReadFlexUInt_1_1();
if (result < 0) {
return FlexSymType.INCOMPLETE;
}
if (buffer[(int) peekIndex - 1] < 0) {
// Sign extension.
result |= ~(-1 >>> Long.numberOfLeadingZeros(result));
}
if (result == 0) {
int nextByte = slowReadByte();
if (nextByte < 0) {
return true;
}
if ((byte) nextByte != OpCodes.INLINE_SYMBOL_ZERO_LENGTH && (byte) nextByte != OpCodes.STRING_ZERO_LENGTH && (byte) nextByte != OpCodes.DELIMITED_END_MARKER) {
throw new IonException("FlexSyms may only wrap symbol zero, empty string, or delimited end.");
FlexSymType flexSymType = FlexSymType.classifySpecialFlexSym(slowReadByte());
if (markerToSet != null && flexSymType != FlexSymType.INCOMPLETE) {
markerToSet.startIndex = peekIndex;
markerToSet.endIndex = peekIndex;
}
return false;
return flexSymType;
} else if (result < 0) {
if (markerToSet != null) {
markerToSet.startIndex = peekIndex;
markerToSet.endIndex = peekIndex - result;
}
peekIndex -= result;
return false;
return FlexSymType.INLINE_TEXT;
}
return false;
return FlexSymType.SYMBOL_ID;
}

/**
Expand Down Expand Up @@ -1949,6 +2057,7 @@ private void reset() {
annotationSequenceMarker.endIndex = -1;
macroInvocationId = -1;
isSystemInvocation = false;
taglessType = null;
}

/**
Expand Down Expand Up @@ -2318,7 +2427,8 @@ private boolean slowReadValueHeader(IonTypeID valueTid, boolean isAnnotated, Mar
}
valueLength = 0;
} else if (minorVersion == 1 && valueTid.isMacroInvocation) {
return slowReadMacroInvocationHeader(valueTid, markerToSet, valueLength);
slowReadMacroInvocationHeader(valueTid, markerToSet, valueLength);
return true;
} else {
setCheckpoint(CheckpointLocation.AFTER_SCALAR_HEADER);
event = Event.START_SCALAR;
Expand Down Expand Up @@ -2784,6 +2894,146 @@ private Event slowNextValue() {
return slowOverflowableNextToken();
}


/**
* The tagless primitive types supported by Ion 1.1+.
*/
enum PrimitiveType {
UINT8(IonTypeID.TYPE_IDS_1_1[0x61], true),
UINT16(IonTypeID.TYPE_IDS_1_1[0x62], true),
UINT32(IonTypeID.TYPE_IDS_1_1[0x64], true),
UINT64(IonTypeID.TYPE_IDS_1_1[0x68], true),
FLEX_UINT(IonTypeID.TYPE_IDS_1_1[0xF6], true),
INT8(IonTypeID.TYPE_IDS_1_1[0x61], false),
INT16(IonTypeID.TYPE_IDS_1_1[0x62], false),
INT32(IonTypeID.TYPE_IDS_1_1[0x64], false),
INT64(IonTypeID.TYPE_IDS_1_1[0x68], false),
FLEX_INT(IonTypeID.TYPE_IDS_1_1[0xF6], false),
FLOAT16(IonTypeID.TYPE_IDS_1_1[0x6B], false),
FLOAT32(IonTypeID.TYPE_IDS_1_1[0x6C], false),
FLOAT64(IonTypeID.TYPE_IDS_1_1[0x6D], false),
COMPACT_SYMBOL(IonTypeID.TYPE_IDS_1_1[0xFA], false);

final IonTypeID typeID;
final boolean isUnsigned;

PrimitiveType(IonTypeID typeID, boolean isUnsigned) {
this.typeID = typeID;
this.isUnsigned = isUnsigned;
}
}

/**
* Skips any bytes remaining in the previous value, positioning the cursor on the next token.
* @return true if not enough data was available in the stream to skip the previous value; otherwise, false.
*/
private boolean slowSkipToNextToken() {
if ((refillableState.state != State.READY && !slowMakeBufferReady())) {
return true;
}
if (checkpointLocation == CheckpointLocation.AFTER_SCALAR_HEADER || checkpointLocation == CheckpointLocation.AFTER_CONTAINER_HEADER) {
return slowSkipRemainingValueBytes();
}
return false;
}

/**
* Reads the length and type of the FlexSym that starts at the given position, ensuring enough bytes are available
* in the stream. After this method returns with a value greater than or equal to zero, `valueTid` and
* `valueMarker.typeId` will be set to the IonTypeID that most closely corresponds to the length and type of the
* FlexSym.
* @return the length of the FlexSym, or -1 if not enough bytes are available in the stream to determine the length.
*/
private long readFlexSymLengthAndType_1_1() {
FlexSymType flexSymType;
if (isSlowMode) {
flexSymType = slowSkipFlexSym_1_1(valueMarker);
if (flexSymType == FlexSymType.INCOMPLETE) {
return -1;
}
} else {
flexSymType = uncheckedSkipFlexSym_1_1(valueMarker);
}
int lengthOfFlexSym = (int) (peekIndex - valueMarker.startIndex);
peekIndex = valueMarker.startIndex;
valueTid = flexSymType.typeIdFor(lengthOfFlexSym);
valueMarker.typeId = valueTid;
return lengthOfFlexSym;
}

/**
* Calculates the length and type of variable-length primitive value, ensuring enough bytes are available in the
* stream.
* @param primitiveType the variable-length primitive type of the tagless value that starts at `peekIndex`.
* @return the length of the value, or -1 if not enough bytes are available in the stream to determine the length.
*/
private long calculateTaglessLengthAndType(PrimitiveType primitiveType) {
// TODO length calculation for these types could be deferred until they are consumed to avoid duplicate
// work. This would trade some added complexity for a potential performance gain that would need to be
// quantified.
long length;
switch (primitiveType) {
case FLEX_UINT:
case FLEX_INT:
length = isSlowMode ? slowReadLengthOfFlexUInt_1_1(peekIndex) : uncheckedReadLengthOfFlexUInt_1_1(peekIndex);
break;
case COMPACT_SYMBOL:
length = readFlexSymLengthAndType_1_1();
break;
default:
throw new IllegalStateException("Length is built into the primitive type's IonTypeID.");
}
if (length >= 0) {
valueMarker.endIndex = peekIndex + length;
}
return length;
}

/**
* Advances the cursor to the next value, assuming that it is tagless with the given type, skipping the current
* value (if any). This method may return:
* <ul>
* <li>NEEDS_DATA, if not enough data is available in the stream</li>
* <li>START_SCALAR, if the reader is now positioned on a scalar value</li>
* </ul>
* @param primitiveType the {@link PrimitiveType} of the tagless value on which to position the cursor.
* @return an Event conveying the result of the operation.
*/
public Event nextTaglessValue(PrimitiveType primitiveType) {
event = Event.NEEDS_DATA;
if (isSlowMode) {
if (slowSkipToNextToken()) {
return event;
}
} else {
if (peekIndex < valueMarker.endIndex) {
peekIndex = valueMarker.endIndex;
} else if (valueTid != null && valueTid.isDelimited) {
seekPastDelimitedContainer_1_1();
}
}
valueTid = null;
if (dataHandler != null) {
reportConsumedData();
}
reset();
taglessType = primitiveType;
valueTid = primitiveType.typeID;
valueMarker.typeId = valueTid;
valueMarker.startIndex = peekIndex;
valuePreHeaderIndex = peekIndex;
if (valueTid.variableLength) {
if (calculateTaglessLengthAndType(primitiveType) < 0) {
return event;
}
} else {
valueMarker.endIndex = peekIndex + valueTid.length;
}
setCheckpoint(CheckpointLocation.AFTER_SCALAR_HEADER);
event = Event.START_SCALAR;
return event;
}

@Override
public Event fillValue() {
event = Event.VALUE_READY;
Expand Down
Loading
Loading