Skip to content

Commit

Permalink
Adds read support for system symbols
Browse files Browse the repository at this point in the history
  • Loading branch information
popematt committed Sep 24, 2024
1 parent 4b373e5 commit b728bd6
Show file tree
Hide file tree
Showing 16 changed files with 407 additions and 171 deletions.
110 changes: 74 additions & 36 deletions src/main/java/com/amazon/ion/impl/IonCursorBinary.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,13 @@
import static com.amazon.ion.impl.IonTypeID.DELIMITED_END_ID;
import static com.amazon.ion.impl.IonTypeID.ONE_ANNOTATION_FLEX_SYM_LOWER_NIBBLE_1_1;
import static com.amazon.ion.impl.IonTypeID.ONE_ANNOTATION_SID_LOWER_NIBBLE_1_1;
import static com.amazon.ion.impl.IonTypeID.SYSTEM_MACRO_INVOCATION_ID;
import static com.amazon.ion.impl.IonTypeID.SYSTEM_SYMBOL_VALUE;
import static com.amazon.ion.impl.IonTypeID.TWO_ANNOTATION_FLEX_SYMS_LOWER_NIBBLE_1_1;
import static com.amazon.ion.impl.IonTypeID.TWO_ANNOTATION_SIDS_LOWER_NIBBLE_1_1;
import static com.amazon.ion.impl.IonTypeID.TYPE_IDS_1_1;
import static com.amazon.ion.impl.bin.Ion_1_1_Constants.FLEX_SYM_MAX_SYSTEM_SYMBOL;
import static com.amazon.ion.impl.bin.Ion_1_1_Constants.FLEX_SYM_SYSTEM_SYMBOL_OFFSET;
import static com.amazon.ion.util.IonStreamUtils.throwAsIonException;

/**
Expand Down Expand Up @@ -306,7 +309,7 @@ private static class ArgumentGroupMarker {
/**
* The major version of the Ion encoding currently being read.
*/
private int majorVersion = -1;
private int majorVersion = 1;

/**
* The minor version of the Ion encoding currently being read.
Expand Down Expand Up @@ -1490,24 +1493,23 @@ private long uncheckedReadFlexInt_1_1() {
* Marker's endIndex is set to the symbol ID value and its startIndex is set to -1. When this FlexSym wraps a
* delimited end marker, neither the Marker's startIndex nor its endIndex is set.
* @param markerToSet the marker to populate.
* @return the symbol ID value if one was present, otherwise -1.
* @return the user-space symbol ID value if one was present, otherwise -1.
*/
private long uncheckedReadFlexSym_1_1(Marker markerToSet) {
long result = uncheckedReadFlexInt_1_1();
if (result == 0) {
int nextByte = buffer[(int)(peekIndex++)];
if (nextByte == OpCodes.INLINE_SYMBOL_ZERO_LENGTH) {
// TODO: We could pretend $0 is a system symbol and consolidate some of the branches here. Is it worth it?
if (nextByte == FLEX_SYM_SYSTEM_SYMBOL_OFFSET) {
// Symbol zero.
markerToSet.endIndex = 0;
return 0;
}
if (nextByte == OpCodes.STRING_ZERO_LENGTH) {
// Inline symbol with zero length.
markerToSet.startIndex = peekIndex;
markerToSet.endIndex = peekIndex;
if (isFlexSymSystemSymbol(nextByte & 0xFF)) {
setSystemSymbolMarker(markerToSet, (byte)(nextByte - FLEX_SYM_SYSTEM_SYMBOL_OFFSET));
return -1;
} else if (nextByte != OpCodes.DELIMITED_END_MARKER) {
throw new IonException("FlexSym 0 may only precede symbol zero, empty string, or delimited end.");
throw new IonException("FlexSym 0 may only precede symbol zero, system symbol, or delimited end.");

Check warning on line 1512 in src/main/java/com/amazon/ion/impl/IonCursorBinary.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/com/amazon/ion/impl/IonCursorBinary.java#L1512

Added line #L1512 was not covered by tests
}
markerToSet.typeId = IonTypeID.DELIMITED_END_ID;
return -1;
Expand All @@ -1523,6 +1525,17 @@ private long uncheckedReadFlexSym_1_1(Marker markerToSet) {
return result;
}

/*
* Determines whether a byte (specifically, the byte following a FlexSym escape byte) represents a system symbol.
*
* @param byteAfterEscapeCode The unsigned value of the byte after the FlexSym escape byte
* @return true if the byte is in the reserved range for system symbols.
*/
private static boolean isFlexSymSystemSymbol(int byteAfterEscapeCode) {
// TODO: We could pretend $0 is a system symbol and consolidate some of the branches elsewhere. Is it worth it?
return byteAfterEscapeCode > FLEX_SYM_SYSTEM_SYMBOL_OFFSET && byteAfterEscapeCode <= FLEX_SYM_MAX_SYSTEM_SYMBOL;
}

/**
* Reads a FlexInt into a long, ensuring enough space is available in the buffer. After this method returns false,
* `peekIndex` points to the first byte after the end of the FlexInt and `markerToSet.endIndex` contains the
Expand Down Expand Up @@ -1589,15 +1602,14 @@ private boolean slowReadFlexSym_1_1(Marker markerToSet) {
if (nextByte < 0) {
return true;
}
if ((byte) nextByte == OpCodes.INLINE_SYMBOL_ZERO_LENGTH) {
// TODO: We could pretend $0 is a system symbol and consolidate some of the branches here. Is it worth it?
if ((byte) nextByte == FLEX_SYM_SYSTEM_SYMBOL_OFFSET) {
// Symbol zero.
markerToSet.endIndex = 0;
return false;
}
if ((byte) nextByte == OpCodes.STRING_ZERO_LENGTH) {
// Inline symbol with zero length.
markerToSet.startIndex = peekIndex;
markerToSet.endIndex = peekIndex;
if (isFlexSymSystemSymbol(nextByte)) {
setSystemSymbolMarker(markerToSet, nextByte - FLEX_SYM_SYSTEM_SYMBOL_OFFSET);
return false;
} else if ((byte) nextByte != OpCodes.DELIMITED_END_MARKER) {
throw new IonException("FlexSyms may only wrap symbol zero, empty string, or delimited end.");
Expand Down Expand Up @@ -1645,6 +1657,13 @@ IonTypeID typeIdFor(int length) {
return TYPE_IDS_1_1[OpCodes.SYMBOL_ADDRESS_MANY_BYTES & SINGLE_BYTE_MASK];
}
},
SYSTEM_SYMBOL_ID {
@Override
IonTypeID typeIdFor(int length) {
// if (length > 1) throw new IllegalStateException("System Symbols always have a length of 1");
return SYSTEM_SYMBOL_VALUE;
}
},
STRUCT_END {
@Override
IonTypeID typeIdFor(int length) {
Expand All @@ -1661,11 +1680,12 @@ static FlexSymType classifySpecialFlexSym(int specialByte) {
if (specialByte < 0) {
return FlexSymType.INCOMPLETE;
}
if ((byte) specialByte == OpCodes.INLINE_SYMBOL_ZERO_LENGTH) {
// TODO: We could pretend $0 is a system symbol and consolidate some of the branches here. Is it worth it?
if ((byte) specialByte == FLEX_SYM_SYSTEM_SYMBOL_OFFSET) {
return FlexSymType.SYMBOL_ID;
}
if ((byte) specialByte == OpCodes.STRING_ZERO_LENGTH) {
return FlexSymType.INLINE_TEXT;
if (isFlexSymSystemSymbol(specialByte)) {
return FlexSymType.SYSTEM_SYMBOL_ID;
}
if ((byte) specialByte == OpCodes.DELIMITED_END_MARKER) {
return FlexSymType.STRUCT_END;
Expand All @@ -1692,7 +1712,12 @@ private FlexSymType uncheckedSkipFlexSym_1_1(Marker markerToSet) {
if (result == 0) {
markerToSet.startIndex = peekIndex + 1;
markerToSet.endIndex = markerToSet.startIndex;
return FlexSymType.classifySpecialFlexSym(buffer[(int) peekIndex++] & SINGLE_BYTE_MASK);
int specialByte = buffer[(int) peekIndex++] & SINGLE_BYTE_MASK;
FlexSymType type = FlexSymType.classifySpecialFlexSym(specialByte);
if (type == FlexSymType.SYSTEM_SYMBOL_ID) {
setSystemSymbolMarker(markerToSet, (byte)(specialByte - FLEX_SYM_SYSTEM_SYMBOL_OFFSET));
}
return type;
} else if (result < 0) {
markerToSet.startIndex = peekIndex;
markerToSet.endIndex = peekIndex - result;
Expand Down Expand Up @@ -1720,11 +1745,19 @@ private FlexSymType slowSkipFlexSym_1_1(Marker markerToSet) {
result |= ~(-1 >>> Long.numberOfLeadingZeros(result));
}
if (result == 0) {
FlexSymType flexSymType = FlexSymType.classifySpecialFlexSym(slowReadByte());
int specialByte = slowReadByte();
FlexSymType flexSymType = FlexSymType.classifySpecialFlexSym(specialByte);
if (markerToSet != null && flexSymType != FlexSymType.INCOMPLETE) {
markerToSet.startIndex = peekIndex;
markerToSet.endIndex = peekIndex;
}
if (markerToSet != null && flexSymType == FlexSymType.SYSTEM_SYMBOL_ID) {
// FIXME: See if we can set the SID in the endIndex here without causing the slow reader to get confused
// about where the end of the value is for tagless symbols.
// I.e. use setSystemSymbolMarker(markerToSet, (byte)(specialByte - FLEX_SYM_SYSTEM_SYMBOL_OFFSET));
markerToSet.typeId = SYSTEM_SYMBOL_VALUE;
markerToSet.startIndex = peekIndex - 1;
}
return flexSymType;
} else if (result < 0) {
if (markerToSet != null) {
Expand Down Expand Up @@ -2229,26 +2262,28 @@ private void validateAnnotationWrapperEndIndex(long endIndex) {
}
}

/*
* The given Marker's endIndex is set to the system symbol ID value and its startIndex is set to -1
* @param markerToSet the marker to set.
*/
private void setSystemSymbolMarker(Marker markerToSet, int systemSid) {
event = Event.START_SCALAR;
markerToSet.typeId = SYSTEM_SYMBOL_VALUE;
markerToSet.startIndex = -1;
markerToSet.endIndex = systemSid;
}

/**
* Sets the given marker to represent the current system token (system macro invocation or system symbol value).
* Before calling this method, `macroInvocationId` must be set from the one-byte FixedInt that represents the ID;
* positive values indicate a macro address, while negative values indicate a system symbol ID.
* @param valueTid the type ID of the system token.
* Sets the given marker to represent the current system macro invocation.
* Before calling this method, `macroInvocationId` must be set from the one-byte FixedUInt that represents the ID.
* @param markerToSet the marker to set.
*/
private void setSystemTokenMarker(IonTypeID valueTid, Marker markerToSet) {
private void setSystemMacroInvocationMarker(Marker markerToSet) {
isSystemInvocation = true;
event = Event.NEEDS_INSTRUCTION;
markerToSet.typeId = SYSTEM_MACRO_INVOCATION_ID;
markerToSet.startIndex = peekIndex;
if (macroInvocationId < 0) {
// This is a system symbol value.
event = Event.START_SCALAR;
markerToSet.typeId = SYSTEM_SYMBOL_VALUE;
markerToSet.endIndex = peekIndex;
} else {
event = Event.NEEDS_INSTRUCTION;
markerToSet.typeId = valueTid;
markerToSet.endIndex = -1;
}
markerToSet.endIndex = -1;
}

/**
Expand Down Expand Up @@ -2293,9 +2328,9 @@ private void uncheckedReadMacroInvocationHeader(IonTypeID valueTid, Marker marke
setUserMacroInvocationMarker(valueTid, markerToSet, uncheckedReadFlexUInt_1_1());
return;
} else {
// Opcode 0xEF: system macro invocation or system symbol value.
// Opcode 0xEF: system macro invocation
macroInvocationId = buffer[(int) peekIndex++];
setSystemTokenMarker(valueTid, markerToSet);
setSystemMacroInvocationMarker(markerToSet);
return;
}
} else if (valueTid.length > 0) {
Expand Down Expand Up @@ -2461,7 +2496,7 @@ private boolean slowReadMacroInvocationHeader(IonTypeID valueTid, Marker markerT
}
// The downcast to byte then upcast to long results in sign extension, treating the byte as a FixedInt.
macroInvocationId = (byte) truncatedId;
setSystemTokenMarker(valueTid, markerToSet);
setSystemMacroInvocationMarker(markerToSet);
return false;
}
} else if (valueTid.length > 0) {
Expand Down Expand Up @@ -3123,6 +3158,9 @@ private long calculateTaglessLengthAndType(TaglessEncoding taglessEncoding) {
default:
throw new IllegalStateException("Length is built into the primitive type's IonTypeID.");
}
if (valueTid == SYSTEM_SYMBOL_VALUE) {
return 1;
}
if (length >= 0) {
valueMarker.endIndex = peekIndex + length;
}
Expand Down
Loading

0 comments on commit b728bd6

Please sign in to comment.