Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds support for Ion 1.1 inline symbols to the system-level binary reader. #860

Merged
merged 1 commit into from
May 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 46 additions & 14 deletions src/main/java/com/amazon/ion/impl/IonReaderContinuableCore.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

package com.amazon.ion.impl;

import com.amazon.ion.Decimal;
Expand All @@ -9,12 +8,14 @@
import com.amazon.ion.IonInt;
import com.amazon.ion.IonType;
import com.amazon.ion.IvmNotificationConsumer;
import com.amazon.ion.SymbolToken;
import com.amazon.ion.Timestamp;
import com.amazon.ion.UnknownSymbolException;

import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.Date;
import java.util.function.Consumer;

/**
* IonCursor with the core IonReader interface methods. Useful for adapting an IonCursor implementation into a
Expand Down Expand Up @@ -71,24 +72,43 @@ interface IonReaderContinuableCore extends IonCursor {
* @return the symbol ID of the field name, if the current value is a
* field within a struct.
* If the current value is not a field, or if the symbol ID cannot be
* determined, this method returns a value <em>less than one</em>.
* determined, this method returns a value <em>less than zero</em>.
* If this method returns less than zero and the reader is positioned
* on a value with a field name, then the text for the field name can be
* retrieved using {@link #getFieldText()}.
*
*/
@Deprecated
int getFieldId();

/**
* Gets the symbol IDs of the annotations attached to the current value.
* @return true if the value on which the reader is currently positioned has field
* name text available for reading via {@link #getFieldText()}. If this
* method returns false but the reader is positioned on a value with a field name,
* then the field name symbol ID can be retrieved using {@link #getFieldId()}.
*/
boolean hasFieldText();

/**
* Reads the text for the current field name. It is the caller's responsibility to
* ensure {@link #hasFieldText()} returns true before calling this method.
* @return the field name text.
*/
String getFieldText();

/**
* Consumes SymbolTokens representing the annotations attached to the current value.
* Each SymbolToken provided will contain *either* a symbol ID, *or* its symbol
* text, depending on how it was encoded.
* <p>
* <b>This is an "expert method": correct use requires deep understanding
* of the Ion binary format. You almost certainly don't want to use it.</b>
*
* @return the symbol IDs of the annotations on the current value.
* If the current value has no annotations, this method returns an empty array.
*
* <p>
* It is the caller's responsibility to ensure {@link #hasAnnotations()} returns
* true before calling this method.
Comment on lines +107 to +108
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it easier to implement this way, or can we just say that the consumer will receive no values if there are no annotations?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an internal API, so I'm happy to have a stricter contract that cuts down on extra branching.

*/
@Deprecated
int[] getAnnotationIds();
void consumeAnnotationTokens(Consumer<SymbolToken> consumer);

/**
* Returns the current value as an boolean.
Expand Down Expand Up @@ -180,23 +200,35 @@ interface IonReaderContinuableCore extends IonCursor {
*/
String stringValue();

/**
* Reads the symbol ID of the symbol value that begins at `valueMarker.startIndex` and ends at
* `valueMarker.endIndex`.
* @return -1 if the value is null
*/
/**
* Gets the symbol ID of the current symbol value.
* <p>
* <b>This is an "expert method": correct use requires deep understanding
* of the Ion binary format. You almost certainly don't want to use it.</b>
*
* @return the symbol ID of the value.
* If the symbol ID cannot be determined, this method returns a value <em>less than one</em>.
* If the symbol ID cannot be determined, this method returns a value <em>less than zero</em>.
* If this is the case and the reader is positioned on a symbol value, then the text for the
* symbol can be retrieved using {@link #hasSymbolText()}.
*/
@Deprecated
int symbolValueId();

/**
* @return true if the value on which the reader is currently positioned is a
* symbol with text available for reading via {@link #getSymbolText()}. If this
* method returns false but the reader is positioned on a symbol value, then
* the value's symbol ID can be retrieved using {@link #symbolValueId()}.
*/
boolean hasSymbolText();
popematt marked this conversation as resolved.
Show resolved Hide resolved

/**
* Reads the text for the current symbol value. It is the caller's responsibility to
* ensure {@link #hasSymbolText()} returns true before calling this method.
* @return the symbol value text.
*/
String getSymbolText();

/**
* Gets the size in bytes of the current lob value.
* This is only valid when {@link #getType()} returns {@link IonType#BLOB}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import com.amazon.ion.IonBufferConfiguration;
import com.amazon.ion.IonException;
import com.amazon.ion.IonType;
import com.amazon.ion.SymbolToken;
import com.amazon.ion.Timestamp;
import com.amazon.ion._private.SuppressFBWarnings;
import com.amazon.ion.impl.bin.IntList;
Expand All @@ -19,6 +20,7 @@
import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.util.Date;
import java.util.function.Consumer;

import static com.amazon.ion.impl.bin.Ion_1_1_Constants.*;

Expand Down Expand Up @@ -1297,6 +1299,19 @@ public String stringValue() {
return readString();
}

@Override
public boolean hasSymbolText() {
if (valueTid == null || IonType.SYMBOL != valueTid.type) {
return false;
}
return valueTid.isInlineable;
}

@Override
public String getSymbolText() {
return readString();
}

@Override
public int symbolValueId() {
if (valueTid == null || IonType.SYMBOL != valueTid.type) {
Expand Down Expand Up @@ -1344,13 +1359,28 @@ IntList getAnnotationSidList() {
}

@Override
public int[] getAnnotationIds() {
getAnnotationSidList();
int[] annotationArray = new int[annotationSids.size()];
for (int i = 0; i < annotationArray.length; i++) {
annotationArray[i] = annotationSids.get(i);
public void consumeAnnotationTokens(Consumer<SymbolToken> consumer) {
if (annotationSequenceMarker.startIndex >= 0) {
if (annotationSequenceMarker.typeId != null && annotationSequenceMarker.typeId.isInlineable) {
getAnnotationMarkerList();
} else {
getAnnotationSidList();
for (int i = 0; i < annotationSids.size(); i++) {
consumer.accept(new SymbolTokenImpl(annotationSids.get(i)));
}
}
}
for (int i = 0; i < annotationTokenMarkers.size(); i++) {
Marker marker = annotationTokenMarkers.get(i);
if (marker.startIndex < 0) {
// This means the endIndex represents the token's symbol ID.
consumer.accept(new SymbolTokenImpl((int) marker.endIndex));
} else {
// The token is inline UTF-8 text.
ByteBuffer utf8InputBuffer = prepareByteBuffer(marker.startIndex, marker.endIndex);
consumer.accept(new SymbolTokenImpl(utf8Decoder.decode(utf8InputBuffer, (int) (marker.endIndex - marker.startIndex)), -1));
}
}
return annotationArray;
}

/**
Expand Down Expand Up @@ -1380,11 +1410,13 @@ public int getFieldId() {
return fieldSid;
}

/**
* Reads the text for the current field name.
* @return the field name text.
*/
String getFieldText() {
@Override
public boolean hasFieldText() {
return fieldTextMarker.startIndex > -1;
}

@Override
public String getFieldText() {
ByteBuffer utf8InputBuffer = prepareByteBuffer(fieldTextMarker.startIndex, fieldTextMarker.endIndex);
return utf8Decoder.decode(utf8InputBuffer, (int) (fieldTextMarker.endIndex - fieldTextMarker.startIndex));
}
Expand Down
101 changes: 70 additions & 31 deletions src/main/java/com/amazon/ion/impl/IonReaderNonContinuableSystem.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

package com.amazon.ion.impl;

import com.amazon.ion.Decimal;
Expand All @@ -19,8 +18,10 @@
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Queue;

import static com.amazon.ion.IonCursor.Event.NEEDS_DATA;
Expand All @@ -46,8 +47,15 @@ final class IonReaderNonContinuableSystem implements IonReader {
*/
IonReaderNonContinuableSystem(IonReaderContinuableCore reader) {
this.reader = reader;
reader.registerIvmNotificationConsumer((x, y) -> {
pendingIvmSids.add(SystemSymbols.ION_1_0_SID); // TODO generalize for Ion 1.1
reader.registerIvmNotificationConsumer((major, minor) -> {
if (major != 1 || minor > 1) {
throw new IllegalStateException("The parser should have already thrown upon encountering this illegal IVM.");
}
if (minor == 0) {
pendingIvmSids.add(SystemSymbols.ION_1_0_SID);
} else {
// TODO how should this be handled for Ion 1.1?
}
Comment on lines +54 to +58
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how should this be handled for Ion 1.1?

I have some ideas, but I haven't checked them for feasibility w.r.t. backwards compatibility with existing public APIs.

We don't actually need the SID of the IVM. We just need some value that represents which version it is. I don't think we should re-use IonEncodingVersion for this, but we could create an internal-only VersionMarker enum or something like that.

Alternately, maybe we could say that the system reader doesn't give you IVMs as symbols—it requires registering an IVM listener just like the regular reader.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we're on the same page here. I would like if we could avoid requiring the Ion 1.1 writer to recognize SIDs as IVMs, while still allowing IVMs to be propagated from the system reader to a user-level writer. We may have to add some extra logic / callback registration between the writer and system reader to make that work, but it should be doable.

});
}

Expand Down Expand Up @@ -224,10 +232,14 @@ public String stringValue() {
prepareScalar();
String value;
if (type == IonType.SYMBOL) {
int sid = reader.symbolValueId();
value = getSymbolTable().findKnownSymbol(sid);
if (value == null) {
throw new UnknownSymbolException(sid);
if (reader.hasSymbolText()) {
value = reader.getSymbolText();
} else {
int sid = reader.symbolValueId();
value = getSymbolTable().findKnownSymbol(sid);
if (value == null) {
throw new UnknownSymbolException(sid);
}
}
} else {
value = reader.stringValue();
Expand Down Expand Up @@ -269,33 +281,43 @@ public String[] getTypeAnnotations() {
if (pendingIvmSid != -1 || !reader.hasAnnotations()) {
return _Private_Utils.EMPTY_STRING_ARRAY;
}
int[] annotationIds = reader.getAnnotationIds();
String[] annotations = new String[annotationIds.length];
// Note: it is not expected that the system reader is used in performance-sensitive applications; hence,
// no effort is made optimize the following.
List<String> annotations = new ArrayList<>();
SymbolTable symbolTable = getSymbolTable();
for (int i = 0; i < annotationIds.length; i++) {
int sid = annotationIds[i];
String annotation = symbolTable.findKnownSymbol(sid);
if (annotation == null) {
throw new UnknownSymbolException(sid);
reader.consumeAnnotationTokens((token) -> {
String text = token.getText();
if (text == null) {
int sid = token.getSid();
text = symbolTable.findKnownSymbol(sid);
if (text == null) {
throw new UnknownSymbolException(sid);
}
}
annotations[i] = annotation;
}
return annotations;
annotations.add(text);
});
return annotations.toArray(_Private_Utils.EMPTY_STRING_ARRAY);
}

@Override
public SymbolToken[] getTypeAnnotationSymbols() {
if (pendingIvmSid != -1 || !reader.hasAnnotations()) {
return SymbolToken.EMPTY_ARRAY;
}
int[] annotationIds = reader.getAnnotationIds();
SymbolToken[] annotationSymbolTokens = new SymbolToken[annotationIds.length];
// Note: it is not expected that the system reader is used in performance-sensitive applications; hence,
// no effort is made optimize the following.
List<SymbolToken> annotations = new ArrayList<>();
SymbolTable symbolTable = getSymbolTable();
for (int i = 0; i < annotationIds.length; i++) {
int sid = annotationIds[i];
annotationSymbolTokens[i] = new SymbolTokenImpl(symbolTable.findKnownSymbol(sid), sid);
}
return annotationSymbolTokens;
reader.consumeAnnotationTokens((token) -> {
String text = token.getText();
if (text != null) {
annotations.add(token);
} else {
int sid = token.getSid();
annotations.add(new SymbolTokenImpl(symbolTable.findKnownSymbol(sid), sid));
}
});
return annotations.toArray(SymbolToken.EMPTY_ARRAY);
}

@Override
Expand All @@ -313,6 +335,9 @@ public int getFieldId() {

@Override
public String getFieldName() {
if (reader.hasFieldText()) {
return reader.getFieldText();
}
int sid = reader.getFieldId();
if (sid < 0) {
return null;
Expand All @@ -326,23 +351,37 @@ public String getFieldName() {

@Override
public SymbolToken getFieldNameSymbol() {
int sid = reader.getFieldId();
if (sid < 0) {
return null;
String fieldText;
int sid = -1;
if (reader.hasFieldText()) {
fieldText = reader.getFieldText();
} else {
sid = reader.getFieldId();
if (sid < 0) {
return null;
}
fieldText = getSymbolTable().findKnownSymbol(sid);
}
return new SymbolTokenImpl(getSymbolTable().findKnownSymbol(sid), sid);
return new SymbolTokenImpl(fieldText, sid);
}

@Override
public SymbolToken symbolValue() {
int sid;
String symbolText;
int sid = -1;
if (pendingIvmSid != -1) {
sid = pendingIvmSid;
symbolText = getSymbolTable().findKnownSymbol(sid);
} else {
prepareScalar();
sid = reader.symbolValueId();
if (reader.hasSymbolText()) {
symbolText = reader.getSymbolText();
} else {
sid = reader.symbolValueId();
symbolText = getSymbolTable().findKnownSymbol(sid);
}
}
return new SymbolTokenImpl(getSymbolTable().findKnownSymbol(sid), sid);
return new SymbolTokenImpl(symbolText, sid);
}

@Override
Expand Down
Loading
Loading