Skip to content

Commit

Permalink
Adds binary read support for macro invocation headers.
Browse files Browse the repository at this point in the history
  • Loading branch information
tgregg committed Jun 12, 2024
1 parent d103547 commit 81ef1da
Show file tree
Hide file tree
Showing 5 changed files with 342 additions and 31 deletions.
177 changes: 174 additions & 3 deletions src/main/java/com/amazon/ion/impl/IonCursorBinary.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import static com.amazon.ion.impl.IonTypeID.DELIMITED_END_ID;
import static com.amazon.ion.impl.IonTypeID.ONE_ANNOTATION_FLEX_SYM_LOWER_NIBBLE_1_1;
import static com.amazon.ion.impl.IonTypeID.ONE_ANNOTATION_SID_LOWER_NIBBLE_1_1;
import static com.amazon.ion.impl.IonTypeID.SYSTEM_SYMBOL_VALUE;
import static com.amazon.ion.impl.IonTypeID.TWO_ANNOTATION_FLEX_SYMS_LOWER_NIBBLE_1_1;
import static com.amazon.ion.impl.IonTypeID.TWO_ANNOTATION_SIDS_LOWER_NIBBLE_1_1;
import static com.amazon.ion.util.IonStreamUtils.throwAsIonException;
Expand Down Expand Up @@ -334,6 +335,19 @@ private static class RefillableState {
*/
private long lastReportedByteTotal = 0;

/**
* The ID of the current macro invocation. When `isSystemInvocation` is true, a positive value indicates a system
* macro address, while a negative value indicates a system symbol ID. When `isSystemInvocation` is false, a
* positive value indicates a user macro address, while a negative value indicates that the cursor's current token
* is not a macro invocation.
*/
private long macroInvocationId = -1;

/**
* True if the given token represents a system invocation (either a system macro invocation or a system symbol
* value). When true, `macroInvocationId` is used to retrieve the ID of the system token.
*/
private boolean isSystemInvocation = false;

/**
* @return the given configuration's DataHandler, or null if that DataHandler is a no-op.
Expand Down Expand Up @@ -956,6 +970,7 @@ private boolean slowReadAnnotationWrapperHeader_1_0(IonTypeID valueTid) {
if (peekIndex >= valueMarker.endIndex) {
throw new IonException("Annotation wrapper must wrap a value.");
}
valueMarker.typeId = valueTid;
return false;
}

Expand Down Expand Up @@ -1318,6 +1333,7 @@ private boolean slowReadAnnotationWrapperHeader_1_1(IonTypeID valueTid) {
}

}
valueMarker.typeId = valueTid;
return false;
}

Expand Down Expand Up @@ -1931,6 +1947,8 @@ private void reset() {
annotationSequenceMarker.typeId = null;
annotationSequenceMarker.startIndex = -1;
annotationSequenceMarker.endIndex = -1;
macroInvocationId = -1;
isSystemInvocation = false;
}

/**
Expand Down Expand Up @@ -2018,6 +2036,91 @@ private void validateAnnotationWrapperEndIndex(long endIndex) {
}
}

/**
* Sets the given marker to represent the current system token (system macro invocation or system symbol value).
* Before calling this method, `macroInvocationId` must be set from the one-byte FixedInt that represents the ID;
* positive values indicate a macro address, while negative values indicate a system symbol ID.
* @param valueTid the type ID of the system token.
* @param markerToSet the marker to set.
*/
private void setSystemTokenMarker(IonTypeID valueTid, Marker markerToSet) {
isSystemInvocation = true;
markerToSet.startIndex = peekIndex;
if (macroInvocationId < 0) {
// This is a system symbol value.
event = Event.START_SCALAR;
markerToSet.typeId = SYSTEM_SYMBOL_VALUE;
markerToSet.endIndex = peekIndex;
} else {
event = Event.NEEDS_INSTRUCTION;
markerToSet.typeId = valueTid;
markerToSet.endIndex = -1;
}
}

/**
* Sets the given marker to represent the current user macro invocation.
* @param valueTid the type ID of the macro invocation.
* @param markerToSet the Marker to set with information parsed from the macro invocation. After return, the
* marker's type ID will be set, startIndex will point to the first byte of the invocation's
* body, and endIndex will either be -1 (when not a system symbol or prefixed invocation), or
* will be set to the end of the invocation.
* @param length the declared length of the invocation. Ignored unless this is a length-prefixed invocation
* (denoted by `valueTid.variableLength == true`).
*/
private void setUserMacroInvocationMarker(IonTypeID valueTid, Marker markerToSet, long length) {
// It's not yet known whether the invocation represents a scalar or container, or even if it is complete.
// A higher-level reader must provide additional instructions to evaluate the invocation.
event = Event.NEEDS_INSTRUCTION;
markerToSet.typeId = valueTid;
markerToSet.startIndex = peekIndex;
// Unless this is a length-prefixed invocation, the end index of the macro invocation cannot be known until
// evaluation.
markerToSet.endIndex = valueTid.variableLength ? peekIndex + length : -1;
}

/**
* Reads a macro invocation header, ensuring enough bytes are buffered. `peekIndex` must be positioned on the
* first byte that follows the opcode. After return, `peekIndex` will be positioned after any macro address
* byte(s), and `macroInvocationId` will be set to the address of the macro being invoked.
* @param valueTid the type ID of the macro invocation.
* @param markerToSet the Marker to set with information parsed from the macro invocation. After return, the
* marker's type ID will be set, startIndex will point to the first byte of the invocation's
* body, and endIndex will either be -1 (when not a system symbol or prefixed invocation), or
* will be set to the end of the invocation.
* @param length the declared length of the invocation. Ignored unless this is a length-prefixed invocation
* (denoted by `valueTid.variableLength == true`).
*/
private void uncheckedReadMacroInvocationHeader(IonTypeID valueTid, Marker markerToSet, long length) {
if (valueTid.macroId < 0) {
if (valueTid.lowerNibble == 0xE || valueTid.variableLength) {
// Opcode 0xEE or Opcode 0xF5 (when length > 0): Read the macro ID as a FlexUInt.
long idStart = peekIndex;
macroInvocationId = uncheckedReadFlexUInt_1_1();
// The length included the macro ID. Subtract the length of the macro ID so that the end index can
// be set correctly.
length -= peekIndex - idStart;
} else {
// Opcode 0xEF: system macro invocation or system symbol value.
macroInvocationId = buffer[(int) peekIndex++];
setSystemTokenMarker(valueTid, markerToSet);
return;
}
} else if (valueTid.length > 0) {
// Opcodes 0x4_: the rest of the macro ID follows in a 1-byte FixedUInt.
// Opcodes 0x5_: the rest of the macro ID follows in a 2-byte FixedUInt.
int remainingId = buffer[(int) peekIndex++] & SINGLE_BYTE_MASK;
if (valueTid.length > 1) {
remainingId |= ((buffer[(int) peekIndex++] & SINGLE_BYTE_MASK) << 8);
}
macroInvocationId = valueTid.macroId + remainingId;
} else {
// Opcodes 0x00 - 0x3F -- the opcode is the macro ID.
macroInvocationId = valueTid.macroId;
}
setUserMacroInvocationMarker(valueTid, markerToSet, length);
}

/**
* Reads a value header, consuming the value's annotation wrapper header, if any. Upon invocation,
* `peekIndex` must be positioned on the first byte that follows the given type ID byte. After return, `peekIndex`
Expand All @@ -2041,7 +2144,10 @@ private boolean uncheckedReadHeader(final int typeIdByte, final boolean isAnnota
return true;
}
hasAnnotations = true;
return uncheckedReadHeader(buffer[(int)(peekIndex++)] & SINGLE_BYTE_MASK, true, valueMarker);
return uncheckedReadHeader(buffer[(int) (peekIndex++)] & SINGLE_BYTE_MASK, true, valueMarker);
} else if (minorVersion == 1 && valueTid.isMacroInvocation) {
uncheckedReadMacroInvocationHeader(valueTid, markerToSet, valueTid.variableLength ? uncheckedReadFlexUInt_1_1() : -1);
return true;
} else {
long endIndex = minorVersion == 0
? calculateEndIndex_1_0(valueTid, isAnnotated)
Expand Down Expand Up @@ -2105,9 +2211,8 @@ private boolean slowReadHeader(final int typeIdByte, final boolean isAnnotated,
if (nullTypeIndex < 0) {
return true;
}
valueTid = IonTypeID.NULL_TYPE_IDS_1_1[nullTypeIndex];
markerToSet.typeId = IonTypeID.NULL_TYPE_IDS_1_1[nullTypeIndex];
}
markerToSet.typeId = valueTid;
if (checkpointLocation == CheckpointLocation.AFTER_SCALAR_HEADER) {
return true;
}
Expand All @@ -2120,6 +2225,61 @@ private boolean slowReadHeader(final int typeIdByte, final boolean isAnnotated,
return false;
}

/**
* Reads a macro invocation header, ensuring enough bytes are buffered. `peekIndex` must be positioned on the
* first byte that follows the opcode. After return, `peekIndex` will be positioned after any macro address
* byte(s), and `macroInvocationId` will be set to the address of the macro being invoked.
* @param valueTid the type ID of the macro invocation.
* @param markerToSet the Marker to set with information parsed from the macro invocation. After returning `false`,
* the marker's type ID will be set, startIndex will point to the first byte of the invocation's
* body, and endIndex will either be -1 (when not a system symbol or prefixed invocation), or
* will be set to the end of the invocation.
* @param length the declared length of the invocation. Ignored unless this is a length-prefixed invocation
* (denoted by `valueTid.variableLength == true`).
* @return true if not enough data was available in the stream to complete the header; otherwise, false.
*/
private boolean slowReadMacroInvocationHeader(IonTypeID valueTid, Marker markerToSet, long length) {
if (valueTid.macroId < 0) {
if (valueTid.lowerNibble == 0xE || valueTid.variableLength) {
// Opcode 0xEE or Opcode 0xF5 (when length > 0): Read the macro ID as a FlexUInt.
long idStart = peekIndex;
macroInvocationId = slowReadFlexUInt_1_1();
// The length included the macro ID. Subtract the length of the macro ID so that the end index can
// be set correctly.
length -= peekIndex - idStart;
if (macroInvocationId < 0) {
return true;
}
} else {
// Opcode 0xEF: system macro invocation or system symbol value.
int truncatedId = slowReadByte();
if (truncatedId < 0) {
return true;
}
// The downcast to byte then upcast to long results in sign extension, treating the byte as a FixedInt.
macroInvocationId = (byte) truncatedId;
setSystemTokenMarker(valueTid, markerToSet);
return false;
}
} else if (valueTid.length > 0) {
// Opcode 0x4: the rest of the macro ID follows in a 1-byte FixedUInt.
// Opcode 0x5: the rest of the macro ID follows in a 2-byte FixedUInt.
if (!fillAt(peekIndex, valueTid.length)) {
return true;
}
int remainingId = slowPeekByte();
if (valueTid.length > 1) {
remainingId |= ((byte) slowPeekByte() << 8);
}
macroInvocationId = valueTid.macroId + remainingId;
} else {
// Opcodes 0x00 - 0x3F -- the opcode is the macro ID.
macroInvocationId = valueTid.macroId;
}
setUserMacroInvocationMarker(valueTid, markerToSet, length);
return false;
}

/**
* Reads a value header, ensuring enough bytes are buffered. Upon invocation, `peekIndex` must
* be positioned on the first byte that follows the given type ID byte. After return, `peekIndex`
Expand Down Expand Up @@ -2157,6 +2317,8 @@ private boolean slowReadValueHeader(IonTypeID valueTid, boolean isAnnotated, Mar
return true;
}
valueLength = 0;
} else if (minorVersion == 1 && valueTid.isMacroInvocation) {
return slowReadMacroInvocationHeader(valueTid, markerToSet, valueLength);
} else {
setCheckpoint(CheckpointLocation.AFTER_SCALAR_HEADER);
event = Event.START_SCALAR;
Expand All @@ -2180,6 +2342,7 @@ private boolean slowReadValueHeader(IonTypeID valueTid, boolean isAnnotated, Mar
validateAnnotationWrapperEndIndex(endIndex);
}
setMarker(endIndex, markerToSet);
markerToSet.typeId = valueTid;
return false;
}

Expand Down Expand Up @@ -2689,6 +2852,14 @@ Marker getValueMarker() {
return valueMarker;
}

long getMacroInvocationId() {
return macroInvocationId;
}

boolean isSystemInvocation() {
return isSystemInvocation;
}

/**
* Slices the buffer using the given offset and limit. Slices are treated as if they were at the top level. This
* can be used to seek the reader to a "span" of bytes that represent a value in the stream.
Expand Down
29 changes: 17 additions & 12 deletions src/main/java/com/amazon/ion/impl/IonTypeID.java
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ final class IonTypeID {
static final IonTypeID[] NULL_TYPE_IDS_1_1;
static final IonTypeID STRUCT_WITH_FLEX_SYMS_ID;
static final IonTypeID DELIMITED_END_ID;
static final IonTypeID SYSTEM_SYMBOL_VALUE;
static {
TYPE_IDS_NO_IVM = new IonTypeID[NUMBER_OF_BYTES];
TYPE_IDS_1_0 = new IonTypeID[NUMBER_OF_BYTES];
Expand Down Expand Up @@ -134,6 +135,8 @@ final class IonTypeID {
// This is used as a dummy ID when a delimited container reaches its end. The key here is that the type ID's
// lower nibble is OpCodes.DELIMITED_END_MARKER.
DELIMITED_END_ID = TYPE_IDS_1_1[DELIMITED_END_MARKER & 0xFF];
// This is used as a dummy ID when a system symbol value is encoded using the 0xEF opcode in Ion 1.1.
SYSTEM_SYMBOL_VALUE = TYPE_IDS_1_1[SYMBOL_ADDRESS_1_BYTE & 0xFF];
}

final IonType type;
Expand Down Expand Up @@ -266,29 +269,31 @@ private IonTypeID(byte id, int minorVersion) {
boolean isNull = false;
int length = -1;
if (isMacroInvocation) {
if (id == E_EXPRESSION_FLEX_UINT) {
variableLength = true;
macroId = -1;
} else if (upperNibble == 0x5) {
// TODO: For 0x4_ and 0x5_, the bias can be precomputed based on the lower nibble.
// Consider precomputing and adding it to the type id or some other relevant location.
if (upperNibble == 0x5) {
variableLength = false;
length = 2;
// This isn't the whole macro ID, but it's all the relevant bits from the type ID byte (the 4
// least-significant bits).
macroId = lowerNibble;
// least-significant bits), with pre-computed bias.
macroId = (lowerNibble << 16) + 4160;
} else if (upperNibble == 0x4) {
variableLength = false;
length = 1;
// This isn't the whole macro ID, but it's all the relevant bits from the type ID byte (the 4
// least-significant bits).
macroId = lowerNibble;
// least-significant bits), with pre-computed bias.
macroId = (lowerNibble << 8) + 64;
} else if (upperNibble < 0x4){
variableLength = false;
macroId = id;
length = 0;
} else {
// System or flexuint macro invocation.
variableLength = upperNibble == 0xF;
if (upperNibble == 0xF) {
// FlexUInt length-prefixed macro invocation.
variableLength = true;
} else {
// System invocation; ID follows as a 1-byte FixedInt.
variableLength = false;
length = 1;
}
macroId = -1;
}
type = null;
Expand Down
10 changes: 10 additions & 0 deletions src/test/java/com/amazon/ion/TestUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,16 @@ public byte[] toByteArray() {
}
}

/**
* Returns the given data prepended with an IVM for the requested 1.x minor version.
* @param minorVersion the IVM version to prepend.
* @param data the data.
* @return the data with an IVM prepended.
*/
public static byte[] withIvm(int minorVersion, byte[] data) throws Exception {
return new TestUtils.BinaryIonAppender(minorVersion).append(data).toByteArray();
}

/**
* Compresses the given bytes using GZIP.
* @param bytes the bytes to compress.
Expand Down
Loading

0 comments on commit 81ef1da

Please sign in to comment.