diff --git a/src/main/java/com/amazon/ion/SystemSymbols.java b/src/main/java/com/amazon/ion/SystemSymbols.java index f39b11744..9da1e41dd 100644 --- a/src/main/java/com/amazon/ion/SystemSymbols.java +++ b/src/main/java/com/amazon/ion/SystemSymbols.java @@ -134,34 +134,4 @@ private SystemSymbols() { } * The name of the macro table s-expression within an Ion encoding directive. */ public static final String MACRO_TABLE = "macro_table"; - - /** - * The name of the "annotate" system macro. - */ - public static final String ANNOTATE = "annotate"; - - /** - * The name of the "literal" special form. - */ - public static final String LITERAL = "literal"; - - /** - * The name of the "macro" s-expression in the macro table. - */ - public static final String MACRO = "macro"; - - /** - * The name of the "export" s-expression in the macro table. - */ - public static final String EXPORT = "export"; - - /** - * The name of the "make_sexp" system macro. - */ - public static final String MAKE_SEXP = "make_sexp"; - - /** - * The sigil used to denote an expression group in TDL. - */ - public static final String TDL_EXPRESSION_GROUP = ";"; } diff --git a/src/main/java/com/amazon/ion/impl/IonCursorBinary.java b/src/main/java/com/amazon/ion/impl/IonCursorBinary.java index 7c7a833a2..b9695b70e 100644 --- a/src/main/java/com/amazon/ion/impl/IonCursorBinary.java +++ b/src/main/java/com/amazon/ion/impl/IonCursorBinary.java @@ -22,10 +22,13 @@ import static com.amazon.ion.impl.IonTypeID.DELIMITED_END_ID; import static com.amazon.ion.impl.IonTypeID.ONE_ANNOTATION_FLEX_SYM_LOWER_NIBBLE_1_1; import static com.amazon.ion.impl.IonTypeID.ONE_ANNOTATION_SID_LOWER_NIBBLE_1_1; +import static com.amazon.ion.impl.IonTypeID.SYSTEM_MACRO_INVOCATION_ID; import static com.amazon.ion.impl.IonTypeID.SYSTEM_SYMBOL_VALUE; import static com.amazon.ion.impl.IonTypeID.TWO_ANNOTATION_FLEX_SYMS_LOWER_NIBBLE_1_1; import static com.amazon.ion.impl.IonTypeID.TWO_ANNOTATION_SIDS_LOWER_NIBBLE_1_1; import static com.amazon.ion.impl.IonTypeID.TYPE_IDS_1_1; +import static com.amazon.ion.impl.bin.Ion_1_1_Constants.FLEX_SYM_MAX_SYSTEM_SYMBOL; +import static com.amazon.ion.impl.bin.Ion_1_1_Constants.FLEX_SYM_SYSTEM_SYMBOL_OFFSET; import static com.amazon.ion.util.IonStreamUtils.throwAsIonException; /** @@ -306,7 +309,7 @@ private static class ArgumentGroupMarker { /** * The major version of the Ion encoding currently being read. */ - private int majorVersion = -1; + private int majorVersion = 1; /** * The minor version of the Ion encoding currently being read. @@ -1490,24 +1493,17 @@ private long uncheckedReadFlexInt_1_1() { * Marker's endIndex is set to the symbol ID value and its startIndex is set to -1. When this FlexSym wraps a * delimited end marker, neither the Marker's startIndex nor its endIndex is set. * @param markerToSet the marker to populate. - * @return the symbol ID value if one was present, otherwise -1. + * @return the user-space symbol ID value if one was present, otherwise -1. */ private long uncheckedReadFlexSym_1_1(Marker markerToSet) { long result = uncheckedReadFlexInt_1_1(); if (result == 0) { int nextByte = buffer[(int)(peekIndex++)]; - if (nextByte == OpCodes.INLINE_SYMBOL_ZERO_LENGTH) { - // Symbol zero. - markerToSet.endIndex = 0; - return 0; - } - if (nextByte == OpCodes.STRING_ZERO_LENGTH) { - // Inline symbol with zero length. - markerToSet.startIndex = peekIndex; - markerToSet.endIndex = peekIndex; + if (isFlexSymSystemSymbolOrSid0(nextByte & SINGLE_BYTE_MASK)) { + setSystemSymbolMarker(markerToSet, (byte)(nextByte - FLEX_SYM_SYSTEM_SYMBOL_OFFSET)); return -1; } else if (nextByte != OpCodes.DELIMITED_END_MARKER) { - throw new IonException("FlexSym 0 may only precede symbol zero, empty string, or delimited end."); + throw new IonException("FlexSym 0 may only precede symbol zero, system symbol, or delimited end."); } markerToSet.typeId = IonTypeID.DELIMITED_END_ID; return -1; @@ -1523,6 +1519,16 @@ private long uncheckedReadFlexSym_1_1(Marker markerToSet) { return result; } + /* + * Determines whether a byte (specifically, the byte following a FlexSym escape byte) represents a system symbol. + * + * @param byteAfterEscapeCode The unsigned value of the byte after the FlexSym escape byte + * @return true if the byte is in the reserved range for system symbols or $0. + */ + private static boolean isFlexSymSystemSymbolOrSid0(int byteAfterEscapeCode) { + return byteAfterEscapeCode >= FLEX_SYM_SYSTEM_SYMBOL_OFFSET && byteAfterEscapeCode <= FLEX_SYM_MAX_SYSTEM_SYMBOL; + } + /** * Reads a FlexInt into a long, ensuring enough space is available in the buffer. After this method returns false, * `peekIndex` points to the first byte after the end of the FlexInt and `markerToSet.endIndex` contains the @@ -1589,15 +1595,8 @@ private boolean slowReadFlexSym_1_1(Marker markerToSet) { if (nextByte < 0) { return true; } - if ((byte) nextByte == OpCodes.INLINE_SYMBOL_ZERO_LENGTH) { - // Symbol zero. - markerToSet.endIndex = 0; - return false; - } - if ((byte) nextByte == OpCodes.STRING_ZERO_LENGTH) { - // Inline symbol with zero length. - markerToSet.startIndex = peekIndex; - markerToSet.endIndex = peekIndex; + if (isFlexSymSystemSymbolOrSid0(nextByte)) { + setSystemSymbolMarker(markerToSet, nextByte - FLEX_SYM_SYSTEM_SYMBOL_OFFSET); return false; } else if ((byte) nextByte != OpCodes.DELIMITED_END_MARKER) { throw new IonException("FlexSyms may only wrap symbol zero, empty string, or delimited end."); @@ -1645,6 +1644,12 @@ IonTypeID typeIdFor(int length) { return TYPE_IDS_1_1[OpCodes.SYMBOL_ADDRESS_MANY_BYTES & SINGLE_BYTE_MASK]; } }, + SYSTEM_SYMBOL_ID { + @Override + IonTypeID typeIdFor(int length) { + return SYSTEM_SYMBOL_VALUE; + } + }, STRUCT_END { @Override IonTypeID typeIdFor(int length) { @@ -1661,11 +1666,8 @@ static FlexSymType classifySpecialFlexSym(int specialByte) { if (specialByte < 0) { return FlexSymType.INCOMPLETE; } - if ((byte) specialByte == OpCodes.INLINE_SYMBOL_ZERO_LENGTH) { - return FlexSymType.SYMBOL_ID; - } - if ((byte) specialByte == OpCodes.STRING_ZERO_LENGTH) { - return FlexSymType.INLINE_TEXT; + if (isFlexSymSystemSymbolOrSid0(specialByte)) { + return FlexSymType.SYSTEM_SYMBOL_ID; } if ((byte) specialByte == OpCodes.DELIMITED_END_MARKER) { return FlexSymType.STRUCT_END; @@ -1692,7 +1694,12 @@ private FlexSymType uncheckedSkipFlexSym_1_1(Marker markerToSet) { if (result == 0) { markerToSet.startIndex = peekIndex + 1; markerToSet.endIndex = markerToSet.startIndex; - return FlexSymType.classifySpecialFlexSym(buffer[(int) peekIndex++] & SINGLE_BYTE_MASK); + int specialByte = buffer[(int) peekIndex++] & SINGLE_BYTE_MASK; + FlexSymType type = FlexSymType.classifySpecialFlexSym(specialByte); + if (type == FlexSymType.SYSTEM_SYMBOL_ID) { + setSystemSymbolMarker(markerToSet, (byte)(specialByte - FLEX_SYM_SYSTEM_SYMBOL_OFFSET)); + } + return type; } else if (result < 0) { markerToSet.startIndex = peekIndex; markerToSet.endIndex = peekIndex - result; @@ -1720,11 +1727,19 @@ private FlexSymType slowSkipFlexSym_1_1(Marker markerToSet) { result |= ~(-1 >>> Long.numberOfLeadingZeros(result)); } if (result == 0) { - FlexSymType flexSymType = FlexSymType.classifySpecialFlexSym(slowReadByte()); + int specialByte = slowReadByte(); + FlexSymType flexSymType = FlexSymType.classifySpecialFlexSym(specialByte); if (markerToSet != null && flexSymType != FlexSymType.INCOMPLETE) { markerToSet.startIndex = peekIndex; markerToSet.endIndex = peekIndex; } + if (markerToSet != null && flexSymType == FlexSymType.SYSTEM_SYMBOL_ID) { + // FIXME: See if we can set the SID in the endIndex here without causing the slow reader to get confused + // about where the end of the value is for tagless symbols. + // I.e. use setSystemSymbolMarker(markerToSet, (byte)(specialByte - FLEX_SYM_SYSTEM_SYMBOL_OFFSET)); + markerToSet.typeId = SYSTEM_SYMBOL_VALUE; + markerToSet.startIndex = peekIndex - 1; + } return flexSymType; } else if (result < 0) { if (markerToSet != null) { @@ -2229,26 +2244,28 @@ private void validateAnnotationWrapperEndIndex(long endIndex) { } } + /* + * The given Marker's endIndex is set to the system symbol ID value and its startIndex is set to -1 + * @param markerToSet the marker to set. + */ + private void setSystemSymbolMarker(Marker markerToSet, int systemSid) { + event = Event.START_SCALAR; + markerToSet.typeId = SYSTEM_SYMBOL_VALUE; + markerToSet.startIndex = -1; + markerToSet.endIndex = systemSid; + } + /** - * Sets the given marker to represent the current system token (system macro invocation or system symbol value). - * Before calling this method, `macroInvocationId` must be set from the one-byte FixedInt that represents the ID; - * positive values indicate a macro address, while negative values indicate a system symbol ID. - * @param valueTid the type ID of the system token. + * Sets the given marker to represent the current system macro invocation. + * Before calling this method, `macroInvocationId` must be set from the one-byte FixedUInt that represents the ID. * @param markerToSet the marker to set. */ - private void setSystemTokenMarker(IonTypeID valueTid, Marker markerToSet) { + private void setSystemMacroInvocationMarker(Marker markerToSet) { isSystemInvocation = true; + event = Event.NEEDS_INSTRUCTION; + markerToSet.typeId = SYSTEM_MACRO_INVOCATION_ID; markerToSet.startIndex = peekIndex; - if (macroInvocationId < 0) { - // This is a system symbol value. - event = Event.START_SCALAR; - markerToSet.typeId = SYSTEM_SYMBOL_VALUE; - markerToSet.endIndex = peekIndex; - } else { - event = Event.NEEDS_INSTRUCTION; - markerToSet.typeId = valueTid; - markerToSet.endIndex = -1; - } + markerToSet.endIndex = -1; } /** @@ -2293,9 +2310,9 @@ private void uncheckedReadMacroInvocationHeader(IonTypeID valueTid, Marker marke setUserMacroInvocationMarker(valueTid, markerToSet, uncheckedReadFlexUInt_1_1()); return; } else { - // Opcode 0xEF: system macro invocation or system symbol value. + // Opcode 0xEF: system macro invocation macroInvocationId = buffer[(int) peekIndex++]; - setSystemTokenMarker(valueTid, markerToSet); + setSystemMacroInvocationMarker(markerToSet); return; } } else if (valueTid.length > 0) { @@ -2461,7 +2478,7 @@ private boolean slowReadMacroInvocationHeader(IonTypeID valueTid, Marker markerT } // The downcast to byte then upcast to long results in sign extension, treating the byte as a FixedInt. macroInvocationId = (byte) truncatedId; - setSystemTokenMarker(valueTid, markerToSet); + setSystemMacroInvocationMarker(markerToSet); return false; } } else if (valueTid.length > 0) { @@ -3123,6 +3140,9 @@ private long calculateTaglessLengthAndType(TaglessEncoding taglessEncoding) { default: throw new IllegalStateException("Length is built into the primitive type's IonTypeID."); } + if (valueTid == SYSTEM_SYMBOL_VALUE) { + return 1; + } if (length >= 0) { valueMarker.endIndex = peekIndex + length; } diff --git a/src/main/java/com/amazon/ion/impl/IonReaderContinuableApplicationBinary.java b/src/main/java/com/amazon/ion/impl/IonReaderContinuableApplicationBinary.java index ade9f73b1..6072c05ac 100644 --- a/src/main/java/com/amazon/ion/impl/IonReaderContinuableApplicationBinary.java +++ b/src/main/java/com/amazon/ion/impl/IonReaderContinuableApplicationBinary.java @@ -55,7 +55,7 @@ class IonReaderContinuableApplicationBinary extends IonReaderContinuableCoreBina private static final IonCatalog EMPTY_CATALOG = new SimpleCatalog(); // Initial capacity of the ArrayList used to hold the text in the current symbol table. - private static final int SYMBOLS_LIST_INITIAL_CAPACITY = 128; + static final int SYMBOLS_LIST_INITIAL_CAPACITY = 128; // The imports for Ion 1.0 data with no shared user imports. private static final LocalSymbolTableImports ION_1_0_IMPORTS @@ -70,7 +70,7 @@ class IonReaderContinuableApplicationBinary extends IonReaderContinuableCoreBina // The shared symbol tables imported by the local symbol table that is currently in scope. private LocalSymbolTableImports imports = ION_1_0_IMPORTS; - // The first lowest local symbol ID in the symbol table. + // The first (lowest) local symbol ID in the symbol table. private int firstLocalSymbolId = imports.getMaxId() + 1; // The cached SymbolTable representation of the current local symbol table. Invalidated whenever a local @@ -92,14 +92,16 @@ class IonReaderContinuableApplicationBinary extends IonReaderContinuableCoreBina IonReaderContinuableApplicationBinary(IonReaderBuilder builder, byte[] bytes, int offset, int length) { super(builder.getBufferConfiguration(), bytes, offset, length); this.catalog = builder.getCatalog() == null ? EMPTY_CATALOG : builder.getCatalog(); - symbols = new String[SYMBOLS_LIST_INITIAL_CAPACITY]; symbolTableReader = new SymbolTableReader(); - resetImports(); + resetImports(getIonMajorVersion(), getIonMinorVersion()); registerIvmNotificationConsumer((x, y) -> { // Note: for Ion 1.1 support, use the versions to set the proper system symbol table and local symbol table // processing logic. resetSymbolTable(); - resetImports(); + resetImports(x, y); + if (y == 1) { + installSymbols(SystemSymbols_1_1.allSymbolTexts()); + } }); } @@ -113,14 +115,16 @@ class IonReaderContinuableApplicationBinary extends IonReaderContinuableCoreBina IonReaderContinuableApplicationBinary(final IonReaderBuilder builder, final InputStream inputStream, byte[] alreadyRead, int alreadyReadOff, int alreadyReadLen) { super(builder.getBufferConfiguration(), inputStream, alreadyRead, alreadyReadOff, alreadyReadLen); this.catalog = builder.getCatalog() == null ? EMPTY_CATALOG : builder.getCatalog(); - symbols = new String[SYMBOLS_LIST_INITIAL_CAPACITY]; symbolTableReader = new SymbolTableReader(); - resetImports(); + resetImports(getIonMajorVersion(), getIonMinorVersion()); registerIvmNotificationConsumer((x, y) -> { // Note: for Ion 1.1 support, use the versions to set the proper system symbol table and local symbol table // processing logic. resetSymbolTable(); - resetImports(); + resetImports(x, y); + if (y == 1) { + installSymbols(SystemSymbols_1_1.allSymbolTexts()); + } }); registerOversizedValueHandler( () -> { @@ -192,8 +196,12 @@ public String next() { } Marker marker = annotationTokenMarkers.get((int) nextAnnotationPeekIndex++); if (marker.startIndex < 0) { - // This means the endIndex represents the token's symbol ID. - return convertToString((int) marker.endIndex); + if (marker.typeId == IonTypeID.SYSTEM_SYMBOL_VALUE) { + return getSystemSymbolToken(marker).assumeText(); + } else { + // This means the endIndex represents the token's symbol ID. + return convertToString((int) marker.endIndex); + } } // The token is inline UTF-8 text. java.nio.ByteBuffer utf8InputBuffer = prepareByteBuffer(marker.startIndex, marker.endIndex); @@ -210,6 +218,13 @@ SymbolToken nextSymbolToken() { return getSymbolToken(sid); } Marker marker = annotationTokenMarkers.get((int) nextAnnotationPeekIndex++); + if (marker.typeId == IonTypeID.SYSTEM_SYMBOL_VALUE) { + if (marker.startIndex < 0) { + return getSystemSymbolToken(marker); + } else { + throw new IllegalStateException("This should be unreachable."); + } + } if (marker.startIndex < 0) { // This means the endIndex represents the token's symbol ID. return getSymbolToken((int) marker.endIndex); @@ -485,10 +500,12 @@ private void resetSymbolTable() { /** * Reset the list of imported shared symbol tables. */ - private void resetImports() { - // Note: when support for the next version of Ion is added, conditionals on 'majorVersion' and 'minorVersion' - // must be added here. - imports = ION_1_0_IMPORTS; + private void resetImports(int major, int minor) { + if (minor == 0) { + imports = ION_1_0_IMPORTS; + } else { + imports = LocalSymbolTableImports.EMPTY; + } firstLocalSymbolId = imports.getMaxId() + 1; } @@ -516,7 +533,8 @@ protected void restoreSymbolTable(SymbolTable symbolTable) { // Note: this will only happen when `symbolTable` is the system symbol table. resetSymbolTable(); cachedReadOnlySymbolTable = symbolTable; - resetImports(); + // FIXME: This should take into account the version at the point in the stream. + resetImports(1, 0); localSymbolMaxOffset = -1; } } @@ -575,11 +593,7 @@ private String getSymbolString(int sid, LocalSymbolTableImports importedSymbols, return localSymbols[sid - (importedSymbols.getMaxId() + 1)]; } - /** - * Retrieves the String text for the given symbol ID. - * @param sid a symbol ID. - * @return a String. - */ + @Override String getSymbol(int sid) { if (sid < firstLocalSymbolId) { return imports.findKnownSymbol(sid); @@ -638,7 +652,7 @@ private void finishReadingSymbolTableStruct() { stepOutOfContainer(); if (!hasSeenImports) { resetSymbolTable(); - resetImports(); + resetImports(getIonMajorVersion(), getIonMinorVersion()); } installSymbols(newSymbols); state = State.READING_VALUE; @@ -674,8 +688,9 @@ private int mapInlineTextToSystemSid(Marker marker) { } private void readSymbolTableStructField() { - if (minorVersion > 0 && fieldSid < 0) { - fieldSid = mapInlineTextToSystemSid(fieldTextMarker); + if (minorVersion > 0) { + readSymbolTableStructField_1_1(); + return; } if (fieldSid == SYMBOLS_SID) { state = State.ON_SYMBOL_TABLE_SYMBOLS; @@ -692,22 +707,42 @@ private void readSymbolTableStructField() { } } + private void readSymbolTableStructField_1_1() { + if (matchesSystemSymbol_1_1(fieldTextMarker, SystemSymbols_1_1.SYMBOLS)) { + state = State.ON_SYMBOL_TABLE_SYMBOLS; + if (hasSeenSymbols) { + throw new IonException("Symbol table contained multiple symbols fields."); + } + hasSeenSymbols = true; + } else if (matchesSystemSymbol_1_1(fieldTextMarker, SystemSymbols_1_1.IMPORTS)) { + state = State.ON_SYMBOL_TABLE_IMPORTS; + if (hasSeenImports) { + throw new IonException("Symbol table contained multiple imports fields."); + } + hasSeenImports = true; + } + } + private void startReadingImportsList() { - resetImports(); + resetImports(getIonMajorVersion(), getIonMinorVersion()); resetSymbolTable(); newImports = new ArrayList<>(3); - newImports.add(getSystemSymbolTable()); + if (minorVersion == 0) { + newImports.add(getSystemSymbolTable()); + } state = State.READING_SYMBOL_TABLE_IMPORTS_LIST; } private void preparePossibleAppend() { - if (minorVersion > 0 && hasSymbolText()) { + if (minorVersion > 0) { prepareScalar(); - if (!bytesMatch(ION_SYMBOL_TABLE_UTF8, buffer, (int) valueMarker.startIndex, (int) valueMarker.endIndex)) { + if (!matchesSystemSymbol_1_1(valueMarker, SystemSymbols_1_1.ION_SYMBOL_TABLE)) { + resetSymbolTable(); + } + } else { + if (symbolValueId() != ION_SYMBOL_TABLE_SID) { resetSymbolTable(); } - } else if (symbolValueId() != ION_SYMBOL_TABLE_SID) { - resetSymbolTable(); } state = State.ON_SYMBOL_TABLE_FIELD; } @@ -941,13 +976,9 @@ boolean startsWithIonSymbolTable() { int sid = readVarUInt_1_0(); peekIndex = savedPeekIndex; return ION_SYMBOL_TABLE_SID == sid; - } else if (minorVersion > 0) { + } else if (minorVersion == 1) { Marker marker = annotationTokenMarkers.get(0); - if (marker.startIndex < 0) { - return marker.endIndex == ION_SYMBOL_TABLE_SID; - } else { - return bytesMatch(ION_SYMBOL_TABLE_UTF8, buffer, (int) marker.startIndex, (int) marker.endIndex); - } + return matchesSystemSymbol_1_1(marker, SystemSymbols_1_1.ION_SYMBOL_TABLE); } return false; } @@ -1009,6 +1040,8 @@ public String stringValue() { } else if (type == IonType.SYMBOL) { if (valueTid.isInlineable) { value = readString(); + } else if (valueTid == IonTypeID.SYSTEM_SYMBOL_VALUE) { + value = getSymbolText(); } else { int sid = symbolValueId(); if (sid < 0) { @@ -1135,7 +1168,7 @@ public Iterator iterateTypeAnnotations() { @Override public String getFieldName() { - if (fieldTextMarker.startIndex > -1 || isEvaluatingEExpression) { + if (fieldTextMarker.startIndex > -1 || isEvaluatingEExpression || fieldTextMarker.typeId == IonTypeID.SYSTEM_SYMBOL_VALUE) { return getFieldText(); } if (fieldSid < 0) { diff --git a/src/main/java/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java b/src/main/java/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java index e3286ef2f..1e36c6d05 100644 --- a/src/main/java/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java +++ b/src/main/java/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java @@ -40,8 +40,8 @@ import java.util.function.Consumer; import static com.amazon.ion.SystemSymbols.ION_ENCODING; -import static com.amazon.ion.SystemSymbols.MACRO_TABLE; -import static com.amazon.ion.SystemSymbols.SYMBOL_TABLE; +import static com.amazon.ion.impl.IonReaderContinuableApplicationBinary.SYMBOLS_LIST_INITIAL_CAPACITY; +import static com.amazon.ion.impl.IonTypeID.SYSTEM_SYMBOL_VALUE; import static com.amazon.ion.impl.bin.Ion_1_1_Constants.*; /** @@ -129,7 +129,7 @@ class IonReaderContinuableCoreBinary extends IonCursorBinary implements IonReade // The text representations of the symbol table that is currently in scope, indexed by symbol ID. If the element at // a particular index is null, that symbol has unknown text. - protected String[] symbols; + protected String[] symbols = new String[SYMBOLS_LIST_INITIAL_CAPACITY]; // The maximum offset into the 'symbols' array that points to a valid local symbol. protected int localSymbolMaxOffset = -1; @@ -607,17 +607,13 @@ private long readFlexSym_1_1(Marker markerToSet) { long result = readFlexInt_1_1(); if (result == 0) { int nextByte = buffer[(int)(peekIndex++)]; - if (nextByte == OpCodes.INLINE_SYMBOL_ZERO_LENGTH) { - // Symbol zero. - markerToSet.endIndex = 0; - return 0; - } - if (nextByte == OpCodes.STRING_ZERO_LENGTH) { - // Inline symbol with zero length. - markerToSet.startIndex = peekIndex; - markerToSet.endIndex = peekIndex; + // We pretend $0 is a system symbol to reduce the number of branches here. + if (nextByte >= FLEX_SYM_SYSTEM_SYMBOL_OFFSET || nextByte <= (byte) (FLEX_SYM_SYSTEM_SYMBOL_OFFSET + Byte.MAX_VALUE)) { + markerToSet.typeId = SYSTEM_SYMBOL_VALUE; + markerToSet.startIndex = -1; + markerToSet.endIndex = (byte)(nextByte - FLEX_SYM_SYSTEM_SYMBOL_OFFSET); } else if (nextByte != OpCodes.DELIMITED_END_MARKER) { - throw new IonException("FlexSym 0 may only precede symbol zero, empty string, or delimited end."); + throw new IonException("FlexSym 0 may only precede symbol zero, system symbol, or delimited end."); } return -1; } else if (result < 0) { @@ -1086,16 +1082,39 @@ static boolean bytesMatch(byte[] target, byte[] buffer, int start, int end) { /** * @return true if current value has a sequence of annotations that begins with `$ion_encoding`; otherwise, false. */ - private boolean startsWithIonEncoding() { - Marker marker = annotationTokenMarkers.get(0); - if (marker.startIndex < 0) { - // TODO this is temporary until the Ion 1.1 system symbol table is finalized. At that point, we will - // look up the symbol ID (held in `marker.endIndex`) in the system symbol table. Below, 10 is the - // number of Ion 1.1 system symbols, providing the conversion from local symbol ID to `symbols` array - // index. - return ION_ENCODING.equals(symbols[(int) (marker.endIndex) - 10]); + boolean startsWithIonEncoding() { + if (minorVersion > 0) { + Marker marker = annotationTokenMarkers.get(0); + return matchesSystemSymbol_1_1(marker, SystemSymbols_1_1.ION_ENCODING); + } + return false; + } + + /** + * Retrieves the String text for the given symbol ID, if the text is available. + * @param sid a symbol ID. + * @return a String. + */ + String getSymbol(int sid) { + return null; // Symbol IDs are not resolved by the core reader. + } + + /** + * Returns true if the symbol at `marker`... + *

* is a system symbol with the same ID as the expected System Symbol + *

* is an inline symbol with the same utf8 bytes as the expected System Symbol + *

* is a user symbol that maps to the same text as the expected System Symbol + *

+ */ + boolean matchesSystemSymbol_1_1(Marker marker, SystemSymbols_1_1 systemSymbol) { + if (marker.typeId == IonTypeID.SYSTEM_SYMBOL_VALUE) { + return marker.endIndex == systemSymbol.getId(); + } else if (marker.startIndex < 0) { + // This is a local symbol whose ID is stored in marker.endIndex. + return systemSymbol.getText().equals(getSymbol((int) marker.endIndex)); } else { - return bytesMatch(ION_ENCODING_UTF8, buffer, (int) marker.startIndex, (int) marker.endIndex); + // This is an inline symbol with UTF-8 bytes bounded by the marker. + return bytesMatch(systemSymbol.getUtf8Bytes(), buffer, (int) marker.startIndex, (int) marker.endIndex); } } @@ -1171,13 +1190,13 @@ private boolean valueUnavailable() { } private void classifySexpWithinEncodingDirective() { - String name = stringValue(); - if (SYMBOL_TABLE.equals(name)) { + String name = getSymbolText(); + if (SystemSymbols_1_1.SYMBOL_TABLE.getText().equals(name)) { state = State.IN_SYMBOL_TABLE_SEXP; - } else if (MACRO_TABLE.equals(name)) { + } else if (SystemSymbols_1_1.MACRO_TABLE.getText().equals(name)) { state = State.IN_MACRO_TABLE_SEXP; } else { - throw new IonException(String.format("$ion_encoding expressions %s not supported.", name)); + throw new IonException(String.format("$ion_encoding expressions '%s' not supported.", name)); } } @@ -2347,7 +2366,7 @@ public boolean hasSymbolText() { if (valueTid == null || IonType.SYMBOL != valueTid.type) { return false; } - return valueTid.isInlineable; + return valueTid.isInlineable || valueTid == IonTypeID.SYSTEM_SYMBOL_VALUE; } @Override @@ -2355,6 +2374,9 @@ public String getSymbolText() { if (isEvaluatingEExpression) { return macroEvaluatorIonReader.symbolValue().assumeText(); } + if (valueMarker.typeId == IonTypeID.SYSTEM_SYMBOL_VALUE) { + return getSystemSymbolToken(valueMarker).getText(); + } return readString(); } @@ -2425,6 +2447,32 @@ protected SymbolToken getSymbolToken(int sid) { return new SymbolTokenImpl(sid); } + protected final SymbolToken getSystemSymbolToken(Marker marker) { + long id; + if (marker.startIndex == -1) { + id = marker.endIndex; + } else { + id = readFixedUInt_1_1(marker.startIndex, marker.endIndex); + + // FIXME: This is a hack that works as long as our system symbol table doesn't grow to + // more than ~95 symbols. We need this hack because when we have to read the FixedInt, + // we don't know whether it's a tagless FlexSym or a Regular value. + // Possible solutions include: + // * changing the spec so that FlexSym System SIDs line up with the regular System SIDs + // * Introducing a dummy IonTypeID that indicates that we need to add the bias + // * Update IonCursorBinary.slowSkipFlexSym_1_1() to put the id into valueMarker.endIndex, + // though that seems to have its own problems. + if (id >= FLEX_SYM_SYSTEM_SYMBOL_OFFSET) { + id = id - FLEX_SYM_SYSTEM_SYMBOL_OFFSET; + } + } + // In some cases, we pretend that $0 is a system symbol, so we must handle it here. + if (id == 0) { + return _Private_Utils.SYMBOL_0; + } + return SystemSymbols_1_1.get((int) id).getToken(); + } + @Override public void consumeAnnotationTokens(Consumer consumer) { if (annotationSequenceMarker.startIndex >= 0) { @@ -2441,7 +2489,11 @@ public void consumeAnnotationTokens(Consumer consumer) { Marker marker = annotationTokenMarkers.get(i); if (marker.startIndex < 0) { // This means the endIndex represents the token's symbol ID. - consumer.accept(getSymbolToken((int) marker.endIndex)); + if (minorVersion == 1 && marker.typeId == IonTypeID.SYSTEM_SYMBOL_VALUE) { + consumer.accept(getSystemSymbolToken(marker)); + } else { + consumer.accept(getSymbolToken((int) marker.endIndex)); + } } else { // The token is inline UTF-8 text. ByteBuffer utf8InputBuffer = prepareByteBuffer(marker.startIndex, marker.endIndex); @@ -2485,7 +2537,7 @@ public boolean hasFieldText() { if (isEvaluatingEExpression) { return macroEvaluatorIonReader.getFieldName() != null; } - return fieldTextMarker.startIndex > -1; + return fieldTextMarker.startIndex > -1 || fieldTextMarker.typeId == IonTypeID.SYSTEM_SYMBOL_VALUE; } @Override @@ -2493,6 +2545,9 @@ public String getFieldText() { if (isEvaluatingEExpression) { return macroEvaluatorIonReader.getFieldName(); } + if (fieldTextMarker.typeId == IonTypeID.SYSTEM_SYMBOL_VALUE) { + return getSystemSymbolToken(fieldTextMarker).getText(); + } ByteBuffer utf8InputBuffer = prepareByteBuffer(fieldTextMarker.startIndex, fieldTextMarker.endIndex); return utf8Decoder.decode(utf8InputBuffer, (int) (fieldTextMarker.endIndex - fieldTextMarker.startIndex)); } @@ -2503,7 +2558,10 @@ public SymbolToken getFieldNameSymbol() { return macroEvaluatorIonReader.getFieldNameSymbol(); } if (fieldTextMarker.startIndex > -1) { - return new SymbolTokenImpl(getFieldText(), -1); + return new SymbolTokenImpl(getFieldText(), SymbolTable.UNKNOWN_SYMBOL_ID); + } + if (fieldTextMarker.typeId == IonTypeID.SYSTEM_SYMBOL_VALUE) { + return getSystemSymbolToken(fieldTextMarker); } if (fieldSid < 0) { return null; @@ -2516,8 +2574,11 @@ public SymbolToken symbolValue() { if (isEvaluatingEExpression) { return macroEvaluatorIonReader.symbolValue(); } + if (valueTid == SYSTEM_SYMBOL_VALUE) { + return getSystemSymbolToken(valueMarker); + } if (valueTid.isInlineable) { - return new SymbolTokenImpl(stringValue(), SymbolTable.UNKNOWN_SYMBOL_ID); + return new SymbolTokenImpl(getSymbolText(), SymbolTable.UNKNOWN_SYMBOL_ID); } int sid = symbolValueId(); diff --git a/src/main/java/com/amazon/ion/impl/IonTypeID.java b/src/main/java/com/amazon/ion/impl/IonTypeID.java index f686f8714..29c840721 100644 --- a/src/main/java/com/amazon/ion/impl/IonTypeID.java +++ b/src/main/java/com/amazon/ion/impl/IonTypeID.java @@ -3,6 +3,7 @@ package com.amazon.ion.impl; import com.amazon.ion.IonType; +import com.amazon.ion.impl.bin.OpCodes; import static com.amazon.ion.impl.bin.OpCodes.*; @@ -86,6 +87,7 @@ final class IonTypeID { static final IonTypeID STRUCT_WITH_FLEX_SYMS_ID; static final IonTypeID DELIMITED_END_ID; static final IonTypeID SYSTEM_SYMBOL_VALUE; + static final IonTypeID SYSTEM_MACRO_INVOCATION_ID; static { TYPE_IDS_NO_IVM = new IonTypeID[NUMBER_OF_BYTES]; TYPE_IDS_1_0 = new IonTypeID[NUMBER_OF_BYTES]; @@ -135,8 +137,9 @@ final class IonTypeID { // This is used as a dummy ID when a delimited container reaches its end. The key here is that the type ID's // lower nibble is OpCodes.DELIMITED_END_MARKER. DELIMITED_END_ID = TYPE_IDS_1_1[DELIMITED_END_MARKER & 0xFF]; - // This is used as a dummy ID when a system symbol value is encoded using the 0xEF opcode in Ion 1.1. - SYSTEM_SYMBOL_VALUE = TYPE_IDS_1_1[SYMBOL_ADDRESS_1_BYTE & 0xFF]; + + SYSTEM_SYMBOL_VALUE = TYPE_IDS_1_1[SYSTEM_SYMBOL & 0xFF]; + SYSTEM_MACRO_INVOCATION_ID = TYPE_IDS_1_1[OpCodes.SYSTEM_MACRO_INVOCATION & 0xFF]; } final IonType type; @@ -153,6 +156,7 @@ final class IonTypeID { // For structs, denotes whether field names are FlexSyms. For symbols, denotes whether the text is inline. // For annotation wrappers, denotes whether tokens are FlexSyms. final boolean isInlineable; + final int theByte; /** * Determines whether the Ion 1.0 spec allows this particular upperNibble/lowerNibble pair. @@ -225,9 +229,11 @@ private IonTypeID( this.macroId = macroId; this.isDelimited = isDelimited; this.isInlineable = isInlineable; + theByte = -1; } private IonTypeID(byte id, int minorVersion) { + theByte = 0xFF & (int) id; if (minorVersion == 0) { byte upperNibble = (byte) ((id >> BITS_PER_NIBBLE) & LOW_NIBBLE_BITMASK); this.lowerNibble = (byte) (id & LOW_NIBBLE_BITMASK); @@ -450,6 +456,6 @@ private IonTypeID(byte id, int minorVersion) { */ @Override public String toString() { - return String.format("%s(%s)", type, length); + return String.format("%02X(%s,%s)>", theByte, type, length); } } diff --git a/src/main/java/com/amazon/ion/impl/LocalSymbolTableImports.java b/src/main/java/com/amazon/ion/impl/LocalSymbolTableImports.java index 3f6efe9d5..6c19756a6 100644 --- a/src/main/java/com/amazon/ion/impl/LocalSymbolTableImports.java +++ b/src/main/java/com/amazon/ion/impl/LocalSymbolTableImports.java @@ -1,18 +1,5 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl; import static com.amazon.ion.SymbolTable.UNKNOWN_SYMBOL_ID; @@ -20,6 +7,7 @@ import com.amazon.ion.SymbolTable; import com.amazon.ion.SymbolToken; import java.util.Arrays; +import java.util.Collections; import java.util.List; /** @@ -33,6 +21,8 @@ // there are zero or one imported non-system shared symtab(s). final class LocalSymbolTableImports { + public static final LocalSymbolTableImports EMPTY = new LocalSymbolTableImports(Collections.emptyList()); + /** * The symtabs imported by a local symtab, never null or empty. The first * symtab must be a system symtab, the rest must be non-system shared @@ -139,10 +129,11 @@ final class LocalSymbolTableImports */ private static int prepBaseSids(int[] baseSids, SymbolTable[] imports) { - SymbolTable firstImport = imports[0]; + if (imports.length == 0) { + return 0; + } - assert firstImport.isSystemTable() - : "first symtab must be a system symtab"; + SymbolTable firstImport = imports[0]; baseSids[0] = 0; int total = firstImport.getMaxId(); @@ -170,7 +161,7 @@ String findKnownSymbol(int sid) { String name = null; - if (sid <= myMaxId) + if (sid > 0 && sid <= myMaxId) { int i, previousBaseSid = 0; for (i = 1; i < myImports.length; i++) diff --git a/src/main/java/com/amazon/ion/impl/SystemSymbols_1_1.kt b/src/main/java/com/amazon/ion/impl/SystemSymbols_1_1.kt index 81b180b33..61478e03b 100644 --- a/src/main/java/com/amazon/ion/impl/SystemSymbols_1_1.kt +++ b/src/main/java/com/amazon/ion/impl/SystemSymbols_1_1.kt @@ -2,6 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl +import com.amazon.ion.* +import com.amazon.ion.SymbolTable.* import java.util.* enum class SystemSymbols_1_1(val id: Int, val text: String) { @@ -70,6 +72,8 @@ enum class SystemSymbols_1_1(val id: Int, val text: String) { val utf8Bytes = text.encodeToByteArray() + val token: SymbolToken = SymbolTokenImpl(text, UNKNOWN_SYMBOL_ID) + companion object { private val ALL_VALUES: Array = entries.toTypedArray().apply { // Put all system symbol enum values into an array, and ensure that they are sorted by ID in that array. @@ -87,6 +91,16 @@ enum class SystemSymbols_1_1(val id: Int, val text: String) { } } + @JvmStatic + fun size() = ALL_VALUES.size + + // Private to avoid potential clashes with enum member names. + @JvmStatic + private val ALL_SYMBOL_TEXTS = ALL_VALUES.map { it.text } + + @JvmStatic + fun allSymbolTexts() = ALL_SYMBOL_TEXTS + /** * Returns true if the [id] is a valid system symbol ID. */ @@ -99,8 +113,8 @@ enum class SystemSymbols_1_1(val id: Int, val text: String) { * Returns the text of the given system symbol ID, or null if not a valid system symbol ID. */ @JvmStatic - operator fun get(id: Int): String { - return SystemSymbols_1_1.ALL_VALUES[id - 1].text + operator fun get(id: Int): SystemSymbols_1_1 { + return SystemSymbols_1_1.ALL_VALUES[id - 1] } } } diff --git a/src/main/java/com/amazon/ion/impl/_Private_Utils.java b/src/main/java/com/amazon/ion/impl/_Private_Utils.java index 751fb4648..9de2e70ab 100644 --- a/src/main/java/com/amazon/ion/impl/_Private_Utils.java +++ b/src/main/java/com/amazon/ion/impl/_Private_Utils.java @@ -1,18 +1,5 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl; import static com.amazon.ion.SymbolTable.UNKNOWN_SYMBOL_ID; @@ -193,6 +180,9 @@ public static SymbolTokenImpl newSymbolToken(String text, int sid) return new SymbolTokenImpl(text, sid); } + /** Cached copy of $0 */ + public static final SymbolTokenImpl SYMBOL_0 = newSymbolToken((String) null, 0); + /** * @return not null */ diff --git a/src/main/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1.kt b/src/main/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1.kt index f13696cfb..8b0e3be78 100644 --- a/src/main/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1.kt +++ b/src/main/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1.kt @@ -649,6 +649,8 @@ internal class IonManagedWriter_1_1( if (sid == UNKNOWN_SYMBOL_ID) { // No (known) SID either. throw UnknownSymbolException("Cannot write a symbol token with unknown text and unknown SID.") + } else if (sid == 0) { + rawWriter.write(kind, 0) } else { rawWriter.write(kind, sidTransformer?.transform(sid) ?: sid) } diff --git a/src/main/java/com/amazon/ion/impl/bin/Ion_1_1_Constants.java b/src/main/java/com/amazon/ion/impl/bin/Ion_1_1_Constants.java index e5ca627a1..5c233dafc 100644 --- a/src/main/java/com/amazon/ion/impl/bin/Ion_1_1_Constants.java +++ b/src/main/java/com/amazon/ion/impl/bin/Ion_1_1_Constants.java @@ -4,6 +4,7 @@ import com.amazon.ion.Timestamp; import com.amazon.ion._private.SuppressFBWarnings; +import com.amazon.ion.impl.SystemSymbols_1_1; /** * Contains constants (other than OpCodes) which are generally applicable to both reading and writing binary Ion 1.1 @@ -14,6 +15,7 @@ private Ion_1_1_Constants() {} // When writing system symbols (or $0) in a flex sym, the SID must be offset to // avoid clashing with E-Expression op codes. public static final int FLEX_SYM_SYSTEM_SYMBOL_OFFSET = 0x60; + public static final int FLEX_SYM_MAX_SYSTEM_SYMBOL = 0x60 + SystemSymbols_1_1.size(); static final int FIRST_2_BYTE_SYMBOL_ADDRESS = 256; static final int FIRST_MANY_BYTE_SYMBOL_ADDRESS = 65792; diff --git a/src/test/java/com/amazon/ion/Ion_1_1_RoundTripTest.kt b/src/test/java/com/amazon/ion/Ion_1_1_RoundTripTest.kt index 35fb41983..31eea4df3 100644 --- a/src/test/java/com/amazon/ion/Ion_1_1_RoundTripTest.kt +++ b/src/test/java/com/amazon/ion/Ion_1_1_RoundTripTest.kt @@ -5,6 +5,7 @@ package com.amazon.ion import com.amazon.ion.IonEncodingVersion.* import com.amazon.ion.TestUtils.* import com.amazon.ion.impl._Private_IonSystem +import com.amazon.ion.impl._Private_IonWriter import com.amazon.ion.impl.bin.* import com.amazon.ion.system.* import java.io.ByteArrayInputStream @@ -21,7 +22,6 @@ import org.junit.jupiter.params.provider.MethodSource /** * Suite of tests for running round trip tests on user and system values for various Ion 1.1 encodings. */ -@Disabled("IonCursorBinary has not been updated to read system symbols") class Ion_1_1_RoundTripTest { @Nested @@ -330,10 +330,14 @@ abstract class Ion_1_1_RoundTripBase { @ParameterizedTest(name = "{0}") @MethodSource("testData") + @Disabled("Re-interpreting system directives is not supported yet.") open fun testUserValuesArePreservedWhenTransferringSystemValues(name: String, ion: ByteArray) { // Read and compare the data. - val actual = roundTripToByteArray { w -> w.writeValues(newSystemReader(ion)) } + val actual = roundTripToByteArray { w -> + w as _Private_IonWriter + w.writeValues(newSystemReader(ion)) { x -> x - 9 } + } printDebugInfo(ion, actual) @@ -346,10 +350,14 @@ abstract class Ion_1_1_RoundTripBase { @ParameterizedTest(name = "{0}") @MethodSource("testData") + @Disabled("Re-interpreting system directives is not supported yet.") open fun testSystemValuesArePreservedWhenTransferringSystemValues(name: String, ion: ByteArray) { // Read and compare the data. - val actual = roundTripToByteArray { w -> w.writeValues(newSystemReader(ion)) } + val actual = roundTripToByteArray { w -> + w as _Private_IonWriter + w.writeValues(newSystemReader(ion)) { x -> x - 9 } + } printDebugInfo(ion, actual) diff --git a/src/test/java/com/amazon/ion/impl/EncodingDirectiveCompilationTest.java b/src/test/java/com/amazon/ion/impl/EncodingDirectiveCompilationTest.java index f81959667..7951a7913 100644 --- a/src/test/java/com/amazon/ion/impl/EncodingDirectiveCompilationTest.java +++ b/src/test/java/com/amazon/ion/impl/EncodingDirectiveCompilationTest.java @@ -33,6 +33,8 @@ */ public class EncodingDirectiveCompilationTest { + private static final int FIRST_LOCAL_SYMBOL_ID = 1; + private static void assertMacroTablesEqual(IonReader reader, Map expected) { Map actual = ((IonReaderContinuableCoreBinary) reader).getEncodingContext().getMacroTable(); assertEquals(expected, actual); @@ -47,36 +49,8 @@ private static Map newMacroTable(Macro... macros) { return macroTable; } - // Note: this may go away once the Ion 1.1 system symbol table is finalized and implemented, or if we were to - // make use of inline symbols in the encoding directive. - private static Map initializeSymbolTable(IonRawWriter_1_1 writer, String... userSymbols) { - Map symbols = new HashMap<>(); - int localSymbolId = SystemSymbols.ION_1_0_MAX_ID; - writer.writeAnnotations(SystemSymbols.ION_SYMBOL_TABLE_SID); - writer.stepInStruct(false); - writer.writeFieldName(SystemSymbols.SYMBOLS); - writer.stepInList(false); - writer.writeString(SystemSymbols.ION_ENCODING); - symbols.put(SystemSymbols.ION_ENCODING, ++localSymbolId); - writer.writeString(SystemSymbols.SYMBOL_TABLE); - symbols.put(SystemSymbols.SYMBOL_TABLE, ++localSymbolId); - writer.writeString(SystemSymbols.MACRO_TABLE); - symbols.put(SystemSymbols.MACRO_TABLE, ++localSymbolId); - writer.writeString("macro"); - symbols.put("macro", ++localSymbolId); - writer.writeString("?"); - symbols.put("?", ++localSymbolId); - for (String userSymbol : userSymbols) { - writer.writeString(userSymbol); - symbols.put(userSymbol, ++localSymbolId); - } - writer.stepOut(); - writer.stepOut(); - return symbols; - } - - private static void startEncodingDirective(IonRawWriter_1_1 writer, Map symbols) { - writer.writeAnnotations(symbols.get(SystemSymbols.ION_ENCODING)); + private static void startEncodingDirective(IonRawWriter_1_1 writer) { + writer.writeAnnotations(SystemSymbols_1_1.ION_ENCODING); writer.stepInSExp(false); } @@ -84,9 +58,9 @@ private static void endEncodingDirective(IonRawWriter_1_1 writer) { writer.stepOut(); } - private static void writeEncodingDirectiveSymbolTable(IonRawWriter_1_1 writer, Map symbols, String... userSymbols) { + private static void writeEncodingDirectiveSymbolTable(IonRawWriter_1_1 writer, String... userSymbols) { writer.stepInSExp(false); - writer.writeSymbol(symbols.get(SystemSymbols.SYMBOL_TABLE)); + writer.writeSymbol(SystemSymbols.SYMBOL_TABLE); writer.stepInList(false); for (String userSymbol : userSymbols) { writer.writeString(userSymbol); @@ -95,9 +69,21 @@ private static void writeEncodingDirectiveSymbolTable(IonRawWriter_1_1 writer, M writer.stepOut(); } - private static void startMacroTable(IonRawWriter_1_1 writer, Map symbols) { + private static Map initializeSymbolTable(IonRawWriter_1_1 writer, String... userSymbols) { + startEncodingDirective(writer); + writeEncodingDirectiveSymbolTable(writer, userSymbols); + endEncodingDirective(writer); + Map symbols = new HashMap<>(); + int localSymbolId = FIRST_LOCAL_SYMBOL_ID; + for (String userSymbol : userSymbols) { + symbols.put(userSymbol, localSymbolId++); + } + return symbols; + } + + private static void startMacroTable(IonRawWriter_1_1 writer) { writer.stepInSExp(false); - writer.writeSymbol(symbols.get(SystemSymbols.MACRO_TABLE)); + writer.writeSymbol(SystemSymbols_1_1.MACRO_TABLE); } private static void endMacroTable(IonRawWriter_1_1 writer) { @@ -106,7 +92,7 @@ private static void endMacroTable(IonRawWriter_1_1 writer) { private static void startMacro(IonRawWriter_1_1 writer, Map symbols, String name) { writer.stepInSExp(false); - writer.writeSymbol(symbols.get("macro")); + writer.writeSymbol(SystemSymbols_1_1.MACRO); writer.writeSymbol(symbols.get(name)); } @@ -137,9 +123,9 @@ public void structMacroWithOneOptional() throws Exception { ByteArrayOutputStream out = new ByteArrayOutputStream(); IonRawWriter_1_1 writer = IonRawBinaryWriter_1_1.from(out, 256, 0); writer.writeIVM(); - Map symbols = initializeSymbolTable(writer, "People", "ID", "Name", "Bald", "$ID", "$Name", "$Bald"); - startEncodingDirective(writer, symbols); - startMacroTable(writer, symbols); + Map symbols = initializeSymbolTable(writer, "People", "ID", "Name", "Bald", "$ID", "$Name", "$Bald", "?"); + startEncodingDirective(writer); + startMacroTable(writer); startMacro(writer, symbols, "People"); writeMacroSignature(writer, symbols, "$ID", "$Name", "$Bald", "?"); // The macro body @@ -187,17 +173,16 @@ public void constantMacroWithUserSymbol() throws Exception { IonRawWriter_1_1 writer = IonRawBinaryWriter_1_1.from(out, 256, 0); writer.writeIVM(); Map symbols = initializeSymbolTable(writer, "Pi"); - startEncodingDirective(writer, symbols); - writeEncodingDirectiveSymbolTable(writer, symbols, "foo"); - startMacroTable(writer, symbols); + startEncodingDirective(writer); + writeEncodingDirectiveSymbolTable(writer, "foo"); + startMacroTable(writer); startMacro(writer, symbols, "Pi"); writeMacroSignature(writer, symbols); // Empty signature writer.writeDecimal(new BigDecimal("3.14159")); // The body: a constant endMacro(writer); endMacroTable(writer); endEncodingDirective(writer); - // Note: this will change when the system symbol table is implemented. This is the first local symbol ID. - writer.writeSymbol(10); // foo + writer.writeSymbol(FIRST_LOCAL_SYMBOL_ID); // foo byte[] data = getBytes(writer, out); Macro expectedMacro = new TemplateMacro( @@ -217,9 +202,9 @@ public void structMacroWithOneOptionalInvoked() throws Exception { ByteArrayOutputStream out = new ByteArrayOutputStream(); IonRawWriter_1_1 writer = IonRawBinaryWriter_1_1.from(out, 256, 0); writer.writeIVM(); - Map symbols = initializeSymbolTable(writer, "People", "ID", "Name", "Bald", "$ID", "$Name", "$Bald"); - startEncodingDirective(writer, symbols); - startMacroTable(writer, symbols); + Map symbols = initializeSymbolTable(writer, "People", "ID", "Name", "Bald", "$ID", "$Name", "$Bald", "?"); + startEncodingDirective(writer); + startMacroTable(writer); startMacro(writer, symbols, "People"); writeMacroSignature(writer, symbols, "$ID", "$Name", "$Bald", "?"); // The macro body @@ -306,10 +291,10 @@ public void macroInvocationWithinStruct() throws Exception { ByteArrayOutputStream out = new ByteArrayOutputStream(); IonRawWriter_1_1 writer = IonRawBinaryWriter_1_1.from(out, 256, 0); writer.writeIVM(); - Map symbols = initializeSymbolTable(writer, "People", "ID", "Name", "Bald", "$ID", "$Name", "$Bald"); - startEncodingDirective(writer, symbols); - writeEncodingDirectiveSymbolTable(writer, symbols, "foo"); - startMacroTable(writer, symbols); + Map symbols = initializeSymbolTable(writer, "People", "ID", "Name", "Bald", "$ID", "$Name", "$Bald", "?"); + startEncodingDirective(writer); + writeEncodingDirectiveSymbolTable(writer, "foo"); + startMacroTable(writer); startMacro(writer, symbols, "People"); writeMacroSignature(writer, symbols, "$ID", "$Name", "?", "$Bald", "?"); // The macro body @@ -344,11 +329,9 @@ public void macroInvocationWithinStruct() throws Exception { ); writer.stepInStruct(true); - // Note: this will change when the system symbol table is implemented. This is the first local symbol ID. - writer.writeFieldName(10); + writer.writeFieldName(FIRST_LOCAL_SYMBOL_ID); writer.stepInEExp(0, false, expectedMacro); - // Note: this will change when the system symbol table is implemented. This is the first local symbol ID. - writer.writeSymbol(10); + writer.writeSymbol(FIRST_LOCAL_SYMBOL_ID); // Two trailing optionals are elided. writer.stepOut(); writer.stepOut(); @@ -380,10 +363,10 @@ public void macroInvocationWithOptionalSuppressedBeforeEndWithinStruct() throws ByteArrayOutputStream out = new ByteArrayOutputStream(); IonRawWriter_1_1 writer = IonRawBinaryWriter_1_1.from(out, 256, 0); writer.writeIVM(); - Map symbols = initializeSymbolTable(writer, "People", "ID", "Name", "Bald", "$ID", "$Name", "$Bald"); - startEncodingDirective(writer, symbols); - writeEncodingDirectiveSymbolTable(writer, symbols, "foo"); - startMacroTable(writer, symbols); + Map symbols = initializeSymbolTable(writer, "People", "ID", "Name", "Bald", "$ID", "$Name", "$Bald", "?"); + startEncodingDirective(writer); + writeEncodingDirectiveSymbolTable(writer, "foo"); + startMacroTable(writer); startMacro(writer, symbols, "People"); writeMacroSignature(writer, symbols, "$ID", "$Name", "?", "$Bald", "?"); // The macro body @@ -418,11 +401,9 @@ public void macroInvocationWithOptionalSuppressedBeforeEndWithinStruct() throws ); writer.stepInStruct(true); - // Note: this will change when the system symbol table is implemented. This is the first local symbol ID. - writer.writeFieldName(10); + writer.writeFieldName(FIRST_LOCAL_SYMBOL_ID); writer.stepInEExp(0, false, expectedMacro); - // Note: this will change when the system symbol table is implemented. This is the first local symbol ID. - writer.writeSymbol(10); + writer.writeSymbol(FIRST_LOCAL_SYMBOL_ID); // Explicitly elide the optional "Name" writer.stepInExpressionGroup(false); writer.stepOut(); @@ -459,9 +440,9 @@ public void constantMacroInvoked() throws Exception { IonRawWriter_1_1 writer = IonRawBinaryWriter_1_1.from(out, 256, 0); writer.writeIVM(); Map symbols = initializeSymbolTable(writer, "Pi"); - startEncodingDirective(writer, symbols); - writeEncodingDirectiveSymbolTable(writer, symbols, "foo"); - startMacroTable(writer, symbols); + startEncodingDirective(writer); + writeEncodingDirectiveSymbolTable(writer, "foo"); + startMacroTable(writer); startMacro(writer, symbols, "Pi"); writeMacroSignature(writer, symbols); // Empty signature writer.writeDecimal(new BigDecimal("3.14159")); // The body: a constant @@ -489,9 +470,9 @@ public void constantMacroInvoked() throws Exception { private Macro writeSimonSaysMacro(IonRawWriter_1_1 writer) { writer.writeIVM(); Map symbols = initializeSymbolTable(writer, "SimonSays", "anything"); - startEncodingDirective(writer, symbols); - writeEncodingDirectiveSymbolTable(writer, symbols, "foo"); - startMacroTable(writer, symbols); + startEncodingDirective(writer); + writeEncodingDirectiveSymbolTable(writer, "foo"); + startMacroTable(writer); startMacro(writer, symbols, "SimonSays"); writeMacroSignature(writer, symbols, "anything"); writer.writeSymbol(symbols.get("anything")); // The body: a variable @@ -513,8 +494,7 @@ public void structAsParameter() throws Exception { writer.stepInEExp(0, false, expectedMacro); writer.stepInStruct(true); - // Note: this will change when the system symbol table is implemented. This is the first local symbol ID. - writer.writeFieldName(10); + writer.writeFieldName(FIRST_LOCAL_SYMBOL_ID); writer.writeInt(123); writer.stepOut(); writer.stepOut(); @@ -591,8 +571,7 @@ public void macroInvocationsNestedWithinParameter() throws Exception { writer.stepInList(true); writer.stepInEExp(0, false, expectedMacro); writer.stepInStruct(true); - // Note: this will change when the system symbol table is implemented. This is the first local symbol ID. - writer.writeFieldName(10); + writer.writeFieldName(FIRST_LOCAL_SYMBOL_ID); writer.writeFloat(1.23); writer.stepOut(); writer.stepOut(); @@ -635,8 +614,7 @@ public void annotationInParameter() throws Exception { Macro expectedMacro = writeSimonSaysMacro(writer); writer.stepInEExp(0, false, expectedMacro); - // Note: this will change when the system symbol table is implemented. This is the first local symbol ID. - writer.writeAnnotations(10); + writer.writeAnnotations(FIRST_LOCAL_SYMBOL_ID); writer.writeNull(IonType.TIMESTAMP); writer.stepOut(); @@ -659,9 +637,9 @@ public void twoArgumentGroups() throws Exception { writer.writeIVM(); Map symbols = initializeSymbolTable(writer, "Groups", "these", "those", "*", "+"); - startEncodingDirective(writer, symbols); - writeEncodingDirectiveSymbolTable(writer, symbols, "foo"); - startMacroTable(writer, symbols); + startEncodingDirective(writer); + writeEncodingDirectiveSymbolTable(writer, "foo"); + startMacroTable(writer); startMacro(writer, symbols, "Groups"); writeMacroSignature(writer, symbols, "these", "*", "those", "+"); writer.stepInList(true); @@ -691,8 +669,7 @@ public void twoArgumentGroups() throws Exception { writer.stepInEExp(0, false, expectedMacro); writer.stepInExpressionGroup(false); // TODO add a test for length-prefixed argument groups - // Note: this will change when the system symbol table is implemented. This is the first local symbol ID. - writer.writeSymbol(10); + writer.writeSymbol(FIRST_LOCAL_SYMBOL_ID); writer.writeString("bar"); writer.stepOut(); writer.stepInExpressionGroup(false); @@ -732,9 +709,9 @@ public void macroInvocationInMacroDefinition() throws Exception { writer.writeIVM(); Map symbols = initializeSymbolTable(writer, "SimonSays", "anything", "Echo"); - startEncodingDirective(writer, symbols); - writeEncodingDirectiveSymbolTable(writer, symbols, "foo"); - startMacroTable(writer, symbols); + startEncodingDirective(writer); + writeEncodingDirectiveSymbolTable(writer, "foo"); + startMacroTable(writer); startMacro(writer, symbols, "SimonSays"); writeMacroSignature(writer, symbols, "anything"); writer.writeSymbol(symbols.get("anything")); // The body: a variable diff --git a/src/test/java/com/amazon/ion/impl/IonCursorBinaryTest.java b/src/test/java/com/amazon/ion/impl/IonCursorBinaryTest.java index bb3c16771..ef3c1054c 100644 --- a/src/test/java/com/amazon/ion/impl/IonCursorBinaryTest.java +++ b/src/test/java/com/amazon/ion/impl/IonCursorBinaryTest.java @@ -6,6 +6,7 @@ import com.amazon.ion.IonCursor; import com.amazon.ion.IonException; import com.amazon.ion.IonType; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.EnumSource; @@ -43,7 +44,6 @@ import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.containsString; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -841,24 +841,6 @@ public void systemMacroInvocation(InputType inputType) throws Exception { testMacroInvocation(bytes(0xEF, 0x04), inputType, 6, -1, 4, true); } - @ParameterizedTest(name = "inputType={0}") - @EnumSource(InputType.class) - public void systemSymbolValue(InputType inputType) throws Exception { - // Opcode 0xEF; 1-byte FixedInt follows. 0xFE (-2) indicates system symbol ID 2. - byte[] data = withIvm(1, bytes(0xEF, 0xFE)); - try (IonCursorBinary cursor = inputType.initializeCursor(data)) { - assertEquals(START_SCALAR, cursor.nextValue()); - assertTrue(cursor.isSystemInvocation()); - Marker invocationMarker = cursor.getValueMarker(); - assertFalse(invocationMarker.typeId.isMacroInvocation); - assertEquals(6, invocationMarker.startIndex); - assertEquals(6, invocationMarker.endIndex); - // Note: a higher-level reader will use the sign to direct the lookup to the system symbol table instead of - // the system macro table. - assertEquals(-2, cursor.getMacroInvocationId()); - } - } - /** * Asserts that the given cursor's current value marker has the given attributes. */ @@ -1116,6 +1098,7 @@ public void taglessFloats(boolean constructFromBytes) throws Exception { } } + @Disabled("Until we fix the 'FIXME' in IonReaderContinuableCore") @ParameterizedTest(name = "constructFromBytes={0}") @ValueSource(booleans = {true, false}) public void taglessCompactSymbols(boolean constructFromBytes) throws Exception { @@ -1123,7 +1106,7 @@ public void taglessCompactSymbols(boolean constructFromBytes) throws Exception { 0x00, // User macro ID 0 0xF9, 0x6E, 0x61, 0x6D, 0x65, // interpreted as compact symbol (FlexSym with inline text "name") 0x09, // interpreted as compact symbol (FlexSym with SID 4) - 0x01, 0x90 // interpreted as compact symbol (special FlexSym) + 0x01, 0x75 // interpreted as compact symbol (special FlexSym) )); try (IonCursorBinary cursor = initializeCursor(STANDARD_BUFFER_CONFIGURATION, constructFromBytes, data)) { assertSequence( diff --git a/src/test/java/com/amazon/ion/impl/IonCursorTestUtilities.java b/src/test/java/com/amazon/ion/impl/IonCursorTestUtilities.java index 6e6443fc0..159a1675f 100644 --- a/src/test/java/com/amazon/ion/impl/IonCursorTestUtilities.java +++ b/src/test/java/com/amazon/ion/impl/IonCursorTestUtilities.java @@ -5,9 +5,11 @@ import com.amazon.ion.IntegerSize; import com.amazon.ion.IonBufferConfiguration; import com.amazon.ion.IonType; +import com.amazon.ion.SymbolToken; import java.math.BigInteger; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.function.Consumer; @@ -313,6 +315,32 @@ static ExpectationProvider fillContainer(IonType }; } + + static ExpectationProvider fieldName(String expectedValue) { + return consumer -> consumer.accept(new Expectation<>( + String.format("fieldName(%s)", expectedValue), + reader -> { + assertEquals(expectedValue, reader.getFieldText()); + } + )); + } + + static ExpectationProvider annotations(String... expectedAnnotations) { + return consumer -> consumer.accept(new Expectation<>( + String.format("annotations(%s)", Arrays.toString(expectedAnnotations)), + reader -> { + reader.nextValue(); + assertTrue(reader.hasAnnotations(), "Expected there to be annotations"); + List tokens = new ArrayList<>(); + reader.consumeAnnotationTokens(tokens::add); + for (int i = 0; i < Math.min(tokens.size(), expectedAnnotations.length); i++) { + assertEquals(expectedAnnotations[i], tokens.get(i).getText()); + } + assertEquals(expectedAnnotations.length, tokens.size()); + } + )); + } + /** * Provides an Expectation that verifies that advancing the cursor positions it at the current end of the stream. */ diff --git a/src/test/java/com/amazon/ion/impl/IonReaderContinuableApplicationBinaryTest.java b/src/test/java/com/amazon/ion/impl/IonReaderContinuableApplicationBinaryTest.java index 30cac7c32..e7ecd4b29 100644 --- a/src/test/java/com/amazon/ion/impl/IonReaderContinuableApplicationBinaryTest.java +++ b/src/test/java/com/amazon/ion/impl/IonReaderContinuableApplicationBinaryTest.java @@ -100,6 +100,22 @@ public void basicSystemSymbols(boolean constructFromBytes) { ); } + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void basicSystemSymbols_1_1(boolean constructFromBytes) { + IonReaderContinuableApplicationBinary reader = initializeReader( + constructFromBytes, + 0xE0, 0x01, 0x01, 0xEA, + 0xEE, 0x04, // Symbol value SID 4 ("name") + 0xEE, 0x05 // Symbol value SID 5 ("version") + ); + assertSequence( + reader, + scalar(), fillSymbolValue("name"), + scalar(), fillSymbolValue("version") + ); + } + @ParameterizedTest(name = "constructFromBytes={0}") @ValueSource(booleans = {true, false}) public void basicLocalSymbols(boolean constructFromBytes) { @@ -178,4 +194,29 @@ public void basicStepOutEarly(boolean constructFromBytes) { endStream() ); } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void systemSymbolsEncodedUsingUserIdsAndInlineText_1_1(boolean constructFromBytes) { + IonReaderContinuableApplicationBinary reader = initializeReader( + constructFromBytes, + 0xE0, 0x01, 0x01, 0xEA, + 0xE7, 0xE7, '$', 'i', 'o', 'n', '_', 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g', // $ion_encoding:: + 0xFC, 0x27, // s-expression, length 19 + 0xFC, 0x23, // s-expression, length 17 + 0xEE, 0x0F, // 'symbol_table' (encoded as system symbol ID 15) + 0xBE, 0x9D, '$', 'i', 'o', 'n', '_', 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g', // ["$ion_encoding"] + 0xE4, 0x03, // $1::, where $1 is a local SID that points to the text "$ion_encoding" + 0xC6, // s-expression, length 6 + 0xC5, // s-expression, length 5 + 0xEE, 0x0F, // 'symbol_table' (encoded as system symbol ID 15) + 0xB2, 0x91, 'a', // ["a"] + 0xE1, 0x01 // $1, which now points to "a" + ); + assertSequence( + reader, + scalar(), fillSymbolValue("a"), + endStream() + ); + } } diff --git a/src/test/java/com/amazon/ion/impl/IonReaderContinuableCoreBinaryTest.java b/src/test/java/com/amazon/ion/impl/IonReaderContinuableCoreBinaryTest.java index 1de2823db..fd6133274 100644 --- a/src/test/java/com/amazon/ion/impl/IonReaderContinuableCoreBinaryTest.java +++ b/src/test/java/com/amazon/ion/impl/IonReaderContinuableCoreBinaryTest.java @@ -10,6 +10,7 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; +import org.junit.jupiter.params.provider.EnumSource; import org.junit.jupiter.params.provider.ValueSource; import java.io.ByteArrayInputStream; @@ -19,6 +20,8 @@ import static com.amazon.ion.IonCursor.Event.START_SCALAR; import static com.amazon.ion.IonCursor.Event.VALUE_READY; import static com.amazon.ion.TestUtils.withIvm; +import static com.amazon.ion.impl.IonCursorTestUtilities.annotations; +import static com.amazon.ion.impl.IonCursorTestUtilities.fieldName; import static com.amazon.ion.impl.TaglessEncoding.FLEX_INT; import static com.amazon.ion.impl.TaglessEncoding.FLEX_UINT; import static com.amazon.ion.impl.TaglessEncoding.INT16; @@ -174,6 +177,70 @@ void sidSymbols_1_1_helper(int sid, String bytes, boolean constructFromBytes) { ); } + @ParameterizedTest(name = "constructFromBytes={0}") + @EnumSource(SystemSymbols_1_1.class) + public void systemSymbols_1_1(SystemSymbols_1_1 systemSymbol) { + systemSymbols_1_1_helper(systemSymbol, true); + systemSymbols_1_1_helper(systemSymbol, false); + } + void systemSymbols_1_1_helper(SystemSymbols_1_1 systemSymbol, boolean constructFromBytes) { + String systemSidBytes = Integer.toHexString(systemSymbol.getId()); + IonReaderContinuableCoreBinary reader = initializeReader( + constructFromBytes, + TestUtils.hexStringToByteArray("E0 01 01 EA EE " + systemSidBytes + " 60") + ); + assertSequence( + reader, + scalar(), + symbolValue(systemSymbol.getText()), + scalar(), + fillIntValue(0), + endStream() + ); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @EnumSource(SystemSymbols_1_1.class) + public void systemSymbols_1_1_fieldNames(SystemSymbols_1_1 systemSymbol) { + systemSymbols_1_1_fieldNamesHelper(systemSymbol, true); + systemSymbols_1_1_fieldNamesHelper(systemSymbol, false); + } + void systemSymbols_1_1_fieldNamesHelper(SystemSymbols_1_1 systemSymbol, boolean constructFromBytes) { + String systemSidBytes = Integer.toHexString(0x60 + systemSymbol.getId()); + IonReaderContinuableCoreBinary reader = initializeReader( + constructFromBytes, + TestUtils.hexStringToByteArray("E0 01 01 EA F3 01 " + systemSidBytes + " 60 01 F0") + ); + assertSequence( + reader, + fillContainer(IonType.STRUCT, + scalar(), + fieldName(systemSymbol.getText()), + fillIntValue(0) + ), + endStream() + ); + } + + @ParameterizedTest(name = "symbol={0}") + @EnumSource(SystemSymbols_1_1.class) + public void systemSymbols_1_1_annotations(SystemSymbols_1_1 systemSymbol) { + systemSymbols_1_1_annotationsHelper(systemSymbol, true); + systemSymbols_1_1_annotationsHelper(systemSymbol, false); + } + void systemSymbols_1_1_annotationsHelper(SystemSymbols_1_1 systemSymbol, boolean constructFromBytes) { + String systemSidBytes = Integer.toHexString(0x60 + systemSymbol.getId()); + IonReaderContinuableCoreBinary reader = initializeReader( + constructFromBytes, + TestUtils.hexStringToByteArray("E0 01 01 EA E7 01 " + systemSidBytes + " 60") + ); + assertSequence( + reader, + annotations(systemSymbol.getText()), + fillIntValue(0), + endStream() + ); + } @ParameterizedTest(name = "constructFromBytes={0}") @ValueSource(booleans = {true, false}) @@ -996,7 +1063,7 @@ public void taglessCompactSymbols(boolean constructFromBytes) throws Exception { 0x00, // User macro ID 0 0xF9, 0x6E, 0x61, 0x6D, 0x65, // interpreted as compact symbol (FlexSym with inline text "name") 0x09, // interpreted as compact symbol (FlexSym with SID 4) - 0x01, 0x90 // interpreted as compact symbol (special FlexSym) + 0x01, 0x75 // interpreted as compact symbol (special FlexSym) )); try (IonReaderContinuableCoreBinary reader = initializeReader(constructFromBytes, data)) { assertSequence( diff --git a/src/test/java/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java b/src/test/java/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java index 9759af8c8..ed62b4c30 100644 --- a/src/test/java/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java +++ b/src/test/java/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java @@ -3288,6 +3288,15 @@ public void multiByteSymbolTokens(boolean constructFromBytes) throws Exception { closeAndCount(); } + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void systemSymbolAnnotations(boolean constructFromBytes) throws Exception { + reader = readerFor(constructFromBytes,0xE0, 0x01, 0x01, 0xEA, 0xE7, 0x01, 0x61, 0x60); + assertSequence( + next(IonType.INT), annotationSymbols("$ion") + ); + } + @ParameterizedTest(name = "constructFromBytes={0}") @ValueSource(booleans = {true, false}) public void symbolTableWithOpenContentImportsListField(boolean constructFromBytes) throws Exception { @@ -4965,7 +4974,7 @@ public void readOneAnnotationFlexSymThatForcesBufferShift_1_1(int initialBufferS @ParameterizedTest @ValueSource(ints={5, 6, 7, 8, 9, 10}) public void readTwoAnnotationFlexSymsThatForceBufferShift_1_1(int initialBufferSize) throws Exception { - byte[] data = withIvm(1, hexStringToByteArray("E8 F1 61 62 63 64 65 66 67 68 3C 00 00 60")); + byte[] data = withIvm(1, hexStringToByteArray("E8 F1 61 62 63 64 65 66 67 68 01 67 60")); Supplier> annotationExpectation = () -> annotations("abcdefgh", "symbols"); Supplier> valueExpectation = () -> intValue(0); readAnnotationsThatForceBufferShift_1_1(true, data, initialBufferSize, annotationExpectation, IonType.INT, valueExpectation); @@ -4991,7 +5000,7 @@ private void readAnnotationsThatForceBufferShiftInDelimitedStruct_1_1( @ParameterizedTest @ValueSource(ints={5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}) public void readTwoAnnotationFlexSymsThatForceBufferShiftInDelimitedStruct_1_1(int initialBufferSize) throws Exception { - byte[] data = withIvm(1, hexStringToByteArray("F3 FD 61 62 E8 F1 61 62 63 64 65 66 67 68 3C 00 00 60 01 F0")); + byte[] data = withIvm(1, hexStringToByteArray("F3 FD 61 62 E8 F1 61 62 63 64 65 66 67 68 01 67 60 01 F0")); readAnnotationsThatForceBufferShiftInDelimitedStruct_1_1(true, data, initialBufferSize); readAnnotationsThatForceBufferShiftInDelimitedStruct_1_1(false, data, initialBufferSize); } @@ -5053,15 +5062,15 @@ private void readAnnotationsWithSpecialFlexSyms_1_1(boolean constructFromBytes, @ParameterizedTest @ValueSource(strings = { // Minimal representations - "E7 01 90 60 | One empty-text annotation; value int 0 \n" + - "E7 01 A0 60 | One SID 0 annotation; value int 0 \n" + - "E8 01 90 01 A0 60 | Two annotations: empty text, SID 0; value int 0 \n" + - "E9 09 01 A0 01 90 60 | Variable length = 4 annotations: SID 0, empty text; value int 0 \n", + "E7 01 75 60 | One empty-text annotation; value int 0 \n" + + "E7 01 60 60 | One SID 0 annotation; value int 0 \n" + + "E8 01 75 01 60 60 | Two annotations: empty text, SID 0; value int 0 \n" + + "E9 09 01 60 01 75 60 | Variable length = 4 annotations: SID 0, empty text; value int 0 \n", // Overpadded representations - "E7 02 00 90 60 | One overpadded empty-text annotation; value int 0 \n" + - "E7 04 00 00 A0 60 | One overpadded SID 0 annotation; value int 0 \n" + - "E8 08 00 00 00 90 02 00 A0 60 | Two overpadded annotations: empty text, SID 0; value int 0 \n" + - "E9 90 00 00 00 00 01 A0 01 90 60 | Variable overpadded length = 4 annotations: SID 0, empty text; value int 0 \n" + "E7 02 00 75 60 | One overpadded empty-text annotation; value int 0 \n" + + "E7 04 00 00 60 60 | One overpadded SID 0 annotation; value int 0 \n" + + "E8 08 00 00 00 75 02 00 60 60 | Two overpadded annotations: empty text, SID 0; value int 0 \n" + + "E9 90 00 00 00 00 01 60 01 75 60 | Variable overpadded length = 4 annotations: SID 0, empty text; value int 0 \n" }) public void readAnnotationsWithSpecialFlexSyms_1_1(String inputBytes) throws Exception { readAnnotationsWithSpecialFlexSyms_1_1(true, inputBytes); @@ -5071,7 +5080,7 @@ public void readAnnotationsWithSpecialFlexSyms_1_1(String inputBytes) throws Exc @ParameterizedTest @ValueSource(booleans = {true, false}) public void getAnnotationsAsStringFailsWhenTextIsUndefined(boolean constructFromBytes) throws Exception { - reader = readerForIon11(hexStringToByteArray("E7 01 A0 60"), constructFromBytes); + reader = readerForIon11(hexStringToByteArray("E7 01 60 60"), constructFromBytes); assertSequence(next(IonType.INT), intValue(0)); assertThrows(IonException.class, () -> reader.getTypeAnnotations()); assertThrows(IonException.class, () -> reader.iterateTypeAnnotations().next()); @@ -5214,35 +5223,35 @@ private void assertStructWithSymbolZeroFieldNamesCorrectlyParsed(boolean constru // SID 0 in fixed-length SID struct "DC | Struct Length = 12 \n" + "01 | Switch to FlexSyms \n" + - "01 A0 | FlexSym 0 \n" + + "01 60 | FlexSym 0 \n" + "6E | true \n" + - "01 A0 | FlexSym SID 0 \n" + + "01 60 | FlexSym SID 0 \n" + "6E | true \n" + "09 | FlexSym SID 4 (name) \n" + "6E | true \n" + - "01 A0 | FlexSym SID 0 \n" + + "01 60 | FlexSym SID 0 \n" + "6E | true", // SID 0 in variable-length SID to FlexSyms "FD | Variable length SID struct \n" + "19 | Length = FlexUInt 12 \n" + "01 | Switch to FlexSyms \n" + - "01 A0 | SID 0 \n" + + "01 60 | SID 0 \n" + "6E | true \n" + - "01 A0 | FlexSym SID 0 \n" + + "01 60 | FlexSym SID 0 \n" + "6E | true \n" + "09 | FlexSym SID 4 (name) \n" + "6E | true \n" + - "01 A0 | FlexSym SID 0 \n" + + "01 60 | FlexSym SID 0 \n" + "6E | true", // SID 0 in delimited struct "F3 | Delimited struct \n" + - "01 A0 | FlexSym SID 0 \n" + + "01 60 | FlexSym SID 0 \n" + "6E | true \n" + - "01 A0 | FlexSym SID 0 \n" + + "01 60 | FlexSym SID 0 \n" + "6E | true \n" + "09 | FlexSym SID 4 (name) \n" + "6E | true \n" + - "01 A0 | FlexSym SID 0 \n" + + "01 60 | FlexSym SID 0 \n" + "6E | true \n" + "01 F0 | End delimited struct" }) @@ -5299,17 +5308,17 @@ public void assertStructWithEmptyInlineFieldNamesCorrectlyParsed(boolean constru // Empty field name in fixed-length SID struct "D4 | Struct Length = 4 \n" + "01 | switch to FlexSym encoding \n" + - "01 90 | FlexSym empty text \n" + + "01 75 | FlexSym empty text \n" + "6F | false", // Empty field name in variable-length SID struct "FD | Variable length SID struct \n" + "09 | Length = 4 \n" + "01 | switch to FlexSym encoding \n" + - "01 90 | FlexSym empty text \n" + + "01 75 | FlexSym empty text \n" + "6F | false", // Empty field name in delimited struct "F3 | Delimited struct \n" + - "01 90 | FlexSym empty text \n" + + "01 75 | FlexSym empty text \n" + "6F | false \n" + "01 F0 | End delimited struct" }) @@ -5705,14 +5714,14 @@ public void nestedDelimitedContainerInlineFieldNamesIncremental(int initialBuffe private byte[] delimitedSymbolTable() throws Exception { byte[] input = withIvm(1, hexStringToByteArray(cleanCommentedHexBytes( - "E4 07 | Annotation symbol ID 3 ($ion_symbol_table)\n" + + "E7 01 63 | Annotation System SID 3 ($ion_symbol_table)\n" + "F3 | Delimited struct\n" + - "0F | FlexSym SID 7 (symbols)\n" + + "01 67 | FlexSym System SID 7 (symbols)\n" + "F1 | Delimited list\n" + "96 66 6F 6F 62 61 72 | string foobar\n" + "F0 | End delimited list\n" + "01 F0 | End delimited struct\n" + - "E1 0A | Symbol ID 10" + "E1 01 | Symbol ID 1" ))); totalBytesInStream = input.length; return input; @@ -5724,8 +5733,6 @@ public void delimitedSymbolTable(boolean constructFromBytes) throws Exception { for (int initialBufferSize = 5; initialBufferSize <= 20; initialBufferSize++) { reader = boundedReaderFor(constructFromBytes, delimitedSymbolTable(), initialBufferSize, Integer.MAX_VALUE, byteCountingHandler); assertSequence( - // Note: this will fail if the Ion 1.1 system symbol table changes because SID 10 will point to something - // else. If that happens, change the input data to point to the first Ion 1.1 local symbol ID. next(IonType.SYMBOL), symbolValue("foobar"), next(null) ); diff --git a/src/test/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1_Test.kt b/src/test/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1_Test.kt index 4d57999b5..0b148e09b 100644 --- a/src/test/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1_Test.kt +++ b/src/test/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1_Test.kt @@ -13,7 +13,6 @@ import java.io.ByteArrayOutputStream import java.math.BigInteger import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.api.Assertions.assertNull -import org.junit.jupiter.api.Disabled import org.junit.jupiter.api.Test import org.junit.jupiter.api.assertThrows import org.junit.jupiter.params.ParameterizedTest @@ -135,7 +134,6 @@ internal class IonManagedWriter_1_1_Test { } @Test - @Disabled("IonCursorBinary has not been updated to read system symbols in FlexSyms") fun `use writeValues to transform symbol IDS`() { `transform symbol IDS` { reader -> writeValues(reader) { sid -> sid + 32 } @@ -143,7 +141,6 @@ internal class IonManagedWriter_1_1_Test { } @Test - @Disabled("IonCursorBinary has not been updated to read system symbols in FlexSyms") fun `use writeValue to transform symbol IDS`() { `transform symbol IDS` { reader -> while (reader.next() != null) {