Skip to content

Commit

Permalink
Ensures annotations are not overwritten when filling a delimited cont…
Browse files Browse the repository at this point in the history
…ainer.
  • Loading branch information
tgregg committed May 31, 2024
1 parent 10430b8 commit eb193f0
Show file tree
Hide file tree
Showing 3 changed files with 153 additions and 24 deletions.
94 changes: 87 additions & 7 deletions src/main/java/com/amazon/ion/impl/IonCursorBinary.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
import java.nio.ByteOrder;

import static com.amazon.ion.impl.IonTypeID.DELIMITED_END_ID;
import static com.amazon.ion.impl.IonTypeID.ONE_ANNOTATION_FLEX_SYM_LOWER_NIBBLE_1_1;
import static com.amazon.ion.impl.IonTypeID.ONE_ANNOTATION_SID_LOWER_NIBBLE_1_1;
import static com.amazon.ion.impl.IonTypeID.TWO_ANNOTATION_FLEX_SYMS_LOWER_NIBBLE_1_1;
import static com.amazon.ion.impl.IonTypeID.TWO_ANNOTATION_SIDS_LOWER_NIBBLE_1_1;
import static com.amazon.ion.util.IonStreamUtils.throwAsIonException;
Expand Down Expand Up @@ -155,6 +157,14 @@ private static class RefillableState {
*/
int lastUnbufferedByte = -1;

/**
* Whether to skip over annotation sequences rather than recording them for consumption by the user. This is
* used when probing forward in the stream for the end of a delimited container while remaining logically
* positioned on the current value. This is only needed in 'slow' mode because in quick mode the entire
* container is assumed to be buffered in its entirety and no probing occurs.
*/
private boolean skipAnnotations = false;

RefillableState(InputStream inputStream, int capacity, int maximumBufferSize, State initialState) {
this.inputStream = inputStream;
this.capacity = capacity;
Expand Down Expand Up @@ -1195,6 +1205,38 @@ private boolean uncheckedReadAnnotationWrapperHeader_1_1(IonTypeID valueTid) {
return false;
}

/**
* Skips a non-length-prefixed annotation sequence (opcodes E4, E5, E7, or E8), ensuring enough space is available
* in the buffer. After this method returns, `peekIndex` points to the first byte after the end of the annotation
* sequence.
* @param valueTid the type ID of the annotation sequence to skip.
* @return true if there are not enough bytes in the stream to complete the annotation sequence; otherwise, false.
*/
private boolean slowSkipNonPrefixedAnnotations_1_1(IonTypeID valueTid) {
if (valueTid.isInlineable) {
// Opcodes 0xE7 (one annotation FlexSym) and 0xE8 (two annotation FlexSyms)
if (slowSkipFlexSym_1_1()) {
return true;
}
if (valueTid.lowerNibble == TWO_ANNOTATION_FLEX_SYMS_LOWER_NIBBLE_1_1) {
// Opcode 0xE8 (two annotation FlexSyms)
return slowSkipFlexSym_1_1();
}
} else {
// Opcodes 0xE4 (one annotation SID) and 0xE5 (two annotation SIDs)
int annotationSid = (int) slowReadFlexUInt_1_1();
if (annotationSid < 0) {
return true;
}
if (valueTid.lowerNibble == TWO_ANNOTATION_SIDS_LOWER_NIBBLE_1_1) {
// Opcode 0xE5 (two annotation SIDs)
annotationSid = (int) slowReadFlexUInt_1_1();
return annotationSid < 0;
}
}
return false;
}

/**
* Reads the header of an Ion 1.1 annotation wrapper, ensuring enough data is available in the buffer. Sets
* `valueMarker` with the start and end indices of the wrapped value. Sets `annotationSequenceMarker` with the start
Expand All @@ -1205,7 +1247,9 @@ private boolean uncheckedReadAnnotationWrapperHeader_1_1(IonTypeID valueTid) {
* @return true if there are not enough bytes in the stream to complete the value; otherwise, false.
*/
private boolean slowReadAnnotationWrapperHeader_1_1(IonTypeID valueTid) {
annotationTokenMarkers.clear();
if (!refillableState.skipAnnotations) {
annotationTokenMarkers.clear();
}
if (valueTid.variableLength) {
// Opcodes 0xE6 (variable-length annotation SIDs) and 0xE9 (variable-length annotation FlexSyms)
// At this point the value must be at least 3 more bytes: one for the smallest-possible annotations
Expand All @@ -1221,13 +1265,22 @@ private boolean slowReadAnnotationWrapperHeader_1_1(IonTypeID valueTid) {
if (!fillAt(peekIndex, annotationsLength)) {
return true;
}
annotationSequenceMarker.typeId = valueTid;
annotationSequenceMarker.startIndex = peekIndex;
annotationSequenceMarker.endIndex = annotationSequenceMarker.startIndex + annotationsLength;
peekIndex = annotationSequenceMarker.endIndex;
long annotationsEnd = peekIndex + annotationsLength;
if (!refillableState.skipAnnotations) {
annotationSequenceMarker.typeId = valueTid;
annotationSequenceMarker.startIndex = peekIndex;
annotationSequenceMarker.endIndex = annotationsEnd;
}
peekIndex = annotationsEnd;
} else {
// At this point the value must have at least one more byte for each annotation FlexSym (one for lower
// nibble 7, two for lower nibble 8), plus one for the smallest-possible value representation.
if (!fillAt(peekIndex, (valueTid.lowerNibble == ONE_ANNOTATION_FLEX_SYM_LOWER_NIBBLE_1_1 || valueTid.lowerNibble == ONE_ANNOTATION_SID_LOWER_NIBBLE_1_1) ? 2 : 3)) {
return true;
}
if (refillableState.skipAnnotations) {
return slowSkipNonPrefixedAnnotations_1_1(valueTid);
}
if (valueTid.isInlineable) {
// Opcodes 0xE7 (one annotation FlexSym) and 0xE8 (two annotation FlexSyms)
Marker provisionalMarker = annotationTokenMarkers.provisionalElement();
Expand Down Expand Up @@ -1473,6 +1526,29 @@ private long slowReadFlexSym_1_1(Marker markerToSet) {
return result;
}

/**
* Skips a FlexSym, ensuring enough space is available in the buffer. After this method returns, `peekIndex` points
* to the first byte after the end of the FlexSym.
* @return true if there are not enough bytes in the stream to complete the FlexSym; otherwise, false.
*/
private boolean slowSkipFlexSym_1_1() {
long result = slowReadFlexInt_1_1();
if (result == 0) {
int nextByte = slowReadByte();
if (nextByte < 0) {
return true;
}
if ((byte) nextByte != OpCodes.INLINE_SYMBOL_ZERO_LENGTH && (byte) nextByte != OpCodes.STRING_ZERO_LENGTH && (byte) nextByte != OpCodes.DELIMITED_END_MARKER) {
throw new IonException("FlexSyms may only wrap symbol zero, empty string, or delimited end.");
}
return false;
} else if (result < 0) {
peekIndex -= result;
return false;
}
return false;
}

/**
* Reads the field name at `peekIndex`, ensuring enough bytes are available in the buffer. After this method returns
* `peekIndex` points to the first byte of the value that follows the field name. If the field name contained a
Expand Down Expand Up @@ -1661,7 +1737,11 @@ private boolean slowFindDelimitedEnd_1_1() {
long savedCheckpoint = checkpoint;
int savedContainerIndex = containerIndex;
Marker savedParent = parent;
boolean savedHasAnnotations = hasAnnotations; // TODO have to set aside all annotation markers... or go into a mode where the markers are not recorded
boolean savedHasAnnotations = hasAnnotations;
// The cursor remains logically positioned at the current value despite probing forward for the end of the
// delimited value. Accordingly, do not overwrite the existing annotations with any annotations found during
// the probe.
refillableState.skipAnnotations = true;
// ------------

// TODO performance: the following line causes the end indexes of any child delimited containers that are not
Expand All @@ -1670,6 +1750,7 @@ private boolean slowFindDelimitedEnd_1_1() {
// additional complexity.
seekPastDelimitedContainer_1_1();

refillableState.skipAnnotations = false;
boolean isReady = event != Event.NEEDS_DATA;
if (refillableState.isSkippingCurrentValue) {
// This delimited container is oversized. The cursor must seek past it.
Expand Down Expand Up @@ -1850,7 +1931,6 @@ private void reset() {
annotationSequenceMarker.typeId = null;
annotationSequenceMarker.startIndex = -1;
annotationSequenceMarker.endIndex = -1;
// TOOD annotationTokenMarkers?
}

/**
Expand Down
33 changes: 17 additions & 16 deletions src/test/java/com/amazon/ion/Ion_1_1_RoundTripTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -441,29 +441,29 @@ abstract class Ion_1_1_RoundTripBase {

@JvmStatic
protected val WRITER_INTERNED_PREFIXED: (OutputStream) -> IonWriter = ION_1_1.binaryWriterBuilder()
.withSymbolInliningStrategy(SymbolInliningStrategy.NEVER_INLINE)
.withDelimitedContainerStrategy(DelimitedContainerStrategy.ALWAYS_PREFIXED)::build
.withSymbolInliningStrategy(SymbolInliningStrategy.NEVER_INLINE)
.withDelimitedContainerStrategy(DelimitedContainerStrategy.ALWAYS_PREFIXED)::build
@JvmStatic
protected val WRITER_INLINE_PREFIXED: (OutputStream) -> IonWriter = ION_1_1.binaryWriterBuilder()
.withSymbolInliningStrategy(SymbolInliningStrategy.ALWAYS_INLINE)
.withDelimitedContainerStrategy(DelimitedContainerStrategy.ALWAYS_PREFIXED)::build
.withSymbolInliningStrategy(SymbolInliningStrategy.ALWAYS_INLINE)
.withDelimitedContainerStrategy(DelimitedContainerStrategy.ALWAYS_PREFIXED)::build
@JvmStatic
protected val WRITER_INTERNED_DELIMITED: (OutputStream) -> IonWriter = ION_1_1.binaryWriterBuilder()
.withSymbolInliningStrategy(SymbolInliningStrategy.NEVER_INLINE)
.withDelimitedContainerStrategy(DelimitedContainerStrategy.ALWAYS_DELIMITED)::build
.withSymbolInliningStrategy(SymbolInliningStrategy.NEVER_INLINE)
.withDelimitedContainerStrategy(DelimitedContainerStrategy.ALWAYS_DELIMITED)::build
@JvmStatic
protected val WRITER_INLINE_DELIMITED: (OutputStream) -> IonWriter = ION_1_1.binaryWriterBuilder()
.withSymbolInliningStrategy(SymbolInliningStrategy.ALWAYS_INLINE)
.withDelimitedContainerStrategy(DelimitedContainerStrategy.ALWAYS_DELIMITED)::build
.withSymbolInliningStrategy(SymbolInliningStrategy.ALWAYS_INLINE)
.withDelimitedContainerStrategy(DelimitedContainerStrategy.ALWAYS_DELIMITED)::build

/**
* Checks if this ByteArray contains Ion Binary.
*/
private fun ByteArray.isIonBinary(): Boolean {
return get(0) == 0xE0.toByte() &&
get(1) == 0x01.toByte() &&
get(2) in setOf<Byte>(0, 1) &&
get(3) == 0xEA.toByte()
get(1) == 0x01.toByte() &&
get(2) in setOf<Byte>(0, 1) &&
get(3) == 0xEA.toByte()
}

/**
Expand All @@ -473,11 +473,11 @@ abstract class Ion_1_1_RoundTripBase {
protected fun ByteArray.printDisplayString() {
if (isIonBinary()) {
map { it.toHexString(HexFormat.UpperCase) }
.windowed(4, 4, partialWindows = true)
.windowed(8, 8, partialWindows = true)
.forEach {
println(it.joinToString(" ") { it.joinToString(" ") })
}
.windowed(4, 4, partialWindows = true)
.windowed(8, 8, partialWindows = true)
.forEach {
println(it.joinToString(" ") { it.joinToString(" ") })
}
} else {
println(toString(Charsets.UTF_8))
}
Expand Down Expand Up @@ -525,6 +525,7 @@ abstract class Ion_1_1_RoundTripBase {
$10 $11 $12 $13 $14
""".trimIndent()
),
ionText("foo::(bar::baz::{abc: zar::qux::xyz::123, def: 456})")
) + files().flatMap { f ->
val ion = ION.loader.load(f)
// If there are embedded documents, flatten them into separate test cases.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5018,7 +5018,6 @@ private void readAnnotationsThatForceBufferShiftInDelimitedStruct_1_1(
byte[] inputBytes,
int initialBufferSize
) throws Exception {
//readerBuilder = readerBuilder.withIncrementalReadingEnabled(false);
reader = boundedReaderFor(constructFromBytes, inputBytes, initialBufferSize, Integer.MAX_VALUE, byteCountingHandler);
assertSequence(
container(IonType.STRUCT,
Expand Down Expand Up @@ -5920,5 +5919,54 @@ public void readDataThatSwitchesVersionsMultipleTimes(boolean constructFromBytes
closeAndCount();
}

@ParameterizedTest(name = "constructFromBytes={0}")
@ValueSource(booleans = {true, false})
public void prefixedAnnotatedContainerInsideDelimitedAnnotatedContainerPreservesSidAnnotations(boolean constructFromBytes) throws Exception {
byte[] input = withIvm(1, hexStringToByteArray(cleanCommentedHexBytes(
"E4 09 | Annotation symbol 4 (name) \n" +
"F1 | Delimited list start \n" +
"E5 0B 0D | Annotation symbol 5 (version), annotation symbol 6 (imports) \n" +
"C7 | Prefixed s-exp length 7 \n" +
"E4 0F | Annotation symbol 7 (symbols) \n" +
"60 | Int 0 \n" +
"E5 11 13 | Annotation symbol 8 (max_id), annotation symbol 9 ($ion_shared_symbol_table) \n" +
"60 | Int 0 \n" +
"F0 | End of delimited list"
)));
reader = readerFor(readerBuilder, constructFromBytes, input);
assertSequence(
next(IonType.LIST), annotations("name"), STEP_IN,
next(IonType.SEXP), annotations("version", "imports"),
STEP_OUT,
next(null)
);
closeAndCount();
}

@ParameterizedTest(name = "constructFromBytes={0}")
@ValueSource(booleans = {true, false})
public void prefixedAnnotatedContainerInsideDelimitedAnnotatedContainerPreservesFlexSymAnnotations(boolean constructFromBytes) throws Exception {
byte[] input = withIvm(1, hexStringToByteArray(cleanCommentedHexBytes(
"E7 FF 61 | Annotation FlexSym 'a' \n" +
"F1 | Delimited list start \n" +
"E8 FF 62 09 | Annotation FlexSym 'b', annotation FlexSym SID 4 (name) \n" +
"C8 | Prefixed s-exp length 8 \n" +
"E4 0F | Annotation symbol 7 (symbols) \n" +
"60 | Int 0 \n" +
"E8 0B FF 63 | Annotation FlexSym SID 5 (version), annotation FlexSym 'c' \n" +
"60 | Int 0 \n" +
"F0 | End of delimited list"
)));
readerBuilder = readerBuilder.withIncrementalReadingEnabled(false);
reader = readerFor(readerBuilder, constructFromBytes, input);
assertSequence(
next(IonType.LIST), annotations("a"), STEP_IN,
next(IonType.SEXP), annotations("b", "name"),
STEP_OUT,
next(null)
);
closeAndCount();
}

// TODO Ion 1.1 symbol tables with all kinds of annotation encodings (opcodes E4 - E9, inline and SID)
}

0 comments on commit eb193f0

Please sign in to comment.