Skip to content

Commit

Permalink
Fixes incremental reading of FlexSyms.
Browse files Browse the repository at this point in the history
  • Loading branch information
tgregg committed Jun 28, 2024
1 parent 4f64e6f commit 5674e82
Show file tree
Hide file tree
Showing 3 changed files with 200 additions and 45 deletions.
112 changes: 72 additions & 40 deletions src/main/java/com/amazon/ion/impl/IonCursorBinary.java
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ private static class RefillableState {
* Marker for the sequence of annotation symbol IDs on the current value. If there are no annotations on
* the current value, the startIndex will be negative.
*/
final Marker annotationSequenceMarker = new Marker(-1, 0);
final Marker annotationSequenceMarker = new Marker(-1, -1);

/**
* Holds both inline text markers and symbol IDs. If representing a symbol ID, the symbol ID value will
Expand All @@ -242,7 +242,7 @@ private static class RefillableState {
/**
* Marker representing the current value.
*/
final Marker valueMarker = new Marker(-1, 0);
final Marker valueMarker = new Marker(-1, -1);

/**
* The index of the first byte in the header of the value at which the reader is currently positioned.
Expand Down Expand Up @@ -1307,15 +1307,13 @@ private boolean slowReadAnnotationWrapperHeader_1_1(IonTypeID valueTid) {
if (valueTid.isInlineable) {
// Opcodes 0xE7 (one annotation FlexSym) and 0xE8 (two annotation FlexSyms)
Marker provisionalMarker = annotationTokenMarkers.provisionalElement();
slowReadFlexSym_1_1(provisionalMarker);
if (provisionalMarker.endIndex < 0) {
if (slowReadFlexSym_1_1(provisionalMarker)) {
return true;
}
if (valueTid.lowerNibble == TWO_ANNOTATION_FLEX_SYMS_LOWER_NIBBLE_1_1) {
// Opcode 0xE8 (two annotation FlexSyms)
provisionalMarker = annotationTokenMarkers.provisionalElement();
slowReadFlexSym_1_1(provisionalMarker);
if (provisionalMarker.endIndex < 0) {
if (slowReadFlexSym_1_1(provisionalMarker)) {
return true;
}
annotationTokenMarkers.commit();
Expand Down Expand Up @@ -1478,35 +1476,48 @@ private long uncheckedReadFlexSym_1_1(Marker markerToSet) {
}

/**
* Reads a FlexInt into a long, ensuring enough space is available in the buffer. After this method returns,
* `peekIndex` points to the first byte after the end of the FlexUInt.
* @return the value.
* Reads a FlexInt into a long, ensuring enough space is available in the buffer. After this method returns false,
* `peekIndex` points to the first byte after the end of the FlexInt and `markerToSet.endIndex` contains the
* FlexInt value.
* @param firstByte the first (least-significant) byte of the FlexInt.
* @param markerToSet the marker to populate.
* @return true if there are not enough bytes to complete the FlexSym; otherwise, false.
*/
private long slowReadLargeFlexInt_1_1(int firstByte) {
private boolean slowReadLargeFlexInt_1_1(int firstByte, Marker markerToSet) {
firstByte &= SINGLE_BYTE_MASK;
// FlexInts are essentially just FlexUInts that interpret the most significant bit as a sign that needs to be
// extended.
long result = slowReadLargeFlexUInt_1_1(firstByte);
if (result < 0) {
return true;
}
if (buffer[(int) peekIndex - 1] < 0) {
// Sign extension.
result |= ~(-1 >>> Long.numberOfLeadingZeros(result));
}
return result;
markerToSet.endIndex = result;
return false;
}

/**
* Reads a FlexInt into a long, ensuring enough space is available in the buffer. After this method returns,
* `peekIndex` points to the first byte after the end of the FlexUInt.
* @return the value.
* Reads a FlexInt into a long, ensuring enough space is available in the buffer. After this method returns false,
* `peekIndex` points to the first byte after the end of the FlexInt and `markerToSet.endIndex` contains the
* FlexInt value.
* @param markerToSet the marker to populate.
* @return true if there are not enough bytes to complete the FlexSym; otherwise, false.
*/
private long slowReadFlexInt_1_1() {
// The following up-cast to int performs sign extension, if applicable.
int currentByte = (byte) slowReadByte();
private boolean slowReadFlexInt_1_1(Marker markerToSet) {
int currentByte = slowReadByte();
if (currentByte < 0) {
return true;
}
if ((currentByte & 1) == 1) {
// Single byte; shift out the continuation bit while preserving the sign.
return currentByte >> 1;
// Single byte; shift out the continuation bit while preserving the sign. The downcast to byte and implicit
// upcast back to int results in sign extension.
markerToSet.endIndex = ((byte) currentByte) >> 1;
return false;
}
return slowReadLargeFlexInt_1_1(currentByte);
return slowReadLargeFlexInt_1_1(currentByte, markerToSet);
}

/**
Expand All @@ -1517,37 +1528,42 @@ private long slowReadFlexInt_1_1() {
* startIndex is set to -1. When this FlexSym wraps a delimited end marker, neither the Marker's startIndex nor its
* endIndex is set.
* @param markerToSet the marker to populate.
* @return the symbol ID value if one was present, otherwise -1.
* @return true if there are not enough bytes to complete the FlexSym; otherwise, false.
*/
private long slowReadFlexSym_1_1(Marker markerToSet) {
long result = slowReadFlexInt_1_1();
private boolean slowReadFlexSym_1_1(Marker markerToSet) {
if (slowReadFlexInt_1_1(markerToSet)) {
return true;
}
long result = markerToSet.endIndex;
markerToSet.endIndex = -1;
if (result == 0) {
int nextByte = (byte) slowReadByte();
if (nextByte == OpCodes.INLINE_SYMBOL_ZERO_LENGTH) {
int nextByte = slowReadByte();
if (nextByte < 0) {
return true;
}
if ((byte) nextByte == OpCodes.INLINE_SYMBOL_ZERO_LENGTH) {
// Symbol zero.
markerToSet.endIndex = 0;
return 0;
return false;
}
if (nextByte == OpCodes.STRING_ZERO_LENGTH) {
if ((byte) nextByte == OpCodes.STRING_ZERO_LENGTH) {
// Inline symbol with zero length.
markerToSet.startIndex = peekIndex;
markerToSet.endIndex = peekIndex;
return -1;
} else if (nextByte != OpCodes.DELIMITED_END_MARKER) {
return false;
} else if ((byte) nextByte != OpCodes.DELIMITED_END_MARKER) {
throw new IonException("FlexSyms may only wrap symbol zero, empty string, or delimited end.");
}
markerToSet.typeId = DELIMITED_END_ID;
return -1;
} else if (result < 0) {
markerToSet.startIndex = peekIndex;
markerToSet.endIndex = peekIndex - result;
peekIndex = markerToSet.endIndex;
return -1;
} else {
markerToSet.startIndex = -1;
markerToSet.endIndex = result;
}
return result;
return false;
}

/**
Expand Down Expand Up @@ -1674,10 +1690,27 @@ private FlexSymType slowSkipFlexSym_1_1(Marker markerToSet) {
}

/**
* Reads the field name at `peekIndex`, ensuring enough bytes are available in the buffer. After this method returns
* `peekIndex` points to the first byte of the value that follows the field name. If the field name contained a
* symbol ID, `fieldSid` is set to that symbol ID. If it contained inline text, `fieldSid` is set to -1, and the
* start and end indexes of the inline text are described by `fieldTextMarker`.
* Reads the field name FlexSym at `peekIndex`, ensuring enough bytes are available in the buffer. After this method
* returns `peekIndex` points to the first byte of the value that follows the field name. If the field name
* contained a symbol ID, `fieldSid` is set to that symbol ID. If it contained inline text, `fieldSid` is set to -1,
* and the start and end indexes of the inline text are described by `fieldTextMarker`.
* @return true if there are not enough bytes in the stream to complete the field name; otherwise, false.
*/
private boolean slowReadFieldNameFlexSym_1_1() {
if (slowReadFlexSym_1_1(fieldTextMarker)) {
return true;
}
if (fieldTextMarker.startIndex < 0) {
fieldSid = (int) fieldTextMarker.endIndex;
}
return false;
}

/**
* Reads the field name FlexSym or FlexUInt at `peekIndex`, ensuring enough bytes are available in the buffer. After
* this method returns `peekIndex` points to the first byte of the value that follows the field name. If the field
* name contained a symbol ID, `fieldSid` is set to that symbol ID. If it contained inline text, `fieldSid` is set
* to -1, and the start and end indexes of the inline text are described by `fieldTextMarker`.
* @return true if there are not enough bytes in the stream to complete the field name; otherwise, false.
*/
private boolean slowReadFieldName_1_1() {
Expand All @@ -1687,16 +1720,15 @@ private boolean slowReadFieldName_1_1() {
return true;
}
if (parent.typeId.isInlineable) {
fieldSid = (int) slowReadFlexSym_1_1(fieldTextMarker);
return fieldSid < 0 && fieldTextMarker.endIndex < 0;
return slowReadFieldNameFlexSym_1_1();
} else {
// 0 in field name position of a SID struct indicates that all field names that follow are represented as
// using FlexSyms.
if (buffer[(int) peekIndex] == FlexInt.ZERO) {
peekIndex++;
setCheckpoint(CheckpointLocation.BEFORE_UNANNOTATED_TYPE_ID);
parent.typeId = IonTypeID.STRUCT_WITH_FLEX_SYMS_ID;
fieldSid = (int) slowReadFlexSym_1_1(fieldTextMarker);
return fieldSid < 0 && fieldTextMarker.endIndex < 0;
return slowReadFieldNameFlexSym_1_1();
} else {
fieldSid = (int) slowReadFlexUInt_1_1();
return fieldSid < 0;
Expand Down
7 changes: 3 additions & 4 deletions src/main/java/com/amazon/ion/impl/Marker.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

package com.amazon.ion.impl;

/**
Expand All @@ -25,11 +24,11 @@ class Marker {

/**
* @param startIndex index of the first byte in the slice.
* @param length the number of bytes in the slice.
* @param endIndex index of the first byte after the slice.
*/
Marker(final int startIndex, final int length) {
Marker(final int startIndex, final int endIndex) {
this.startIndex = startIndex;
this.endIndex = startIndex + length;
this.endIndex = endIndex;
}

/**
Expand Down
Loading

0 comments on commit 5674e82

Please sign in to comment.