From 72c58b019b01aefdc8fed991f806200cdddaed93 Mon Sep 17 00:00:00 2001 From: Matthew Pope <81593196+popematt@users.noreply.github.com> Date: Fri, 24 May 2024 22:59:56 -0700 Subject: [PATCH] Implements writeValues for IonManagedWriter_1_1 (#867) --- .../amazon/ion/impl/IonRawTextWriter_1_1.kt | 11 + .../com/amazon/ion/impl/IonRawWriter_1_1.kt | 6 + ...IonReaderContinuableApplicationBinary.java | 20 +- .../impl/IonReaderNonContinuableSystem.java | 63 +-- .../amazon/ion/impl/IonReaderTextUserX.java | 5 +- .../amazon/ion/impl/IonReaderTreeUserX.java | 34 +- .../ion/impl/bin/IonManagedWriter_1_1.kt | 216 ++++++++--- .../ion/impl/bin/IonRawBinaryWriter_1_1.kt | 11 + ...ByteArrayIteratorSystemProcessingTest.java | 8 + .../ion/BinaryReaderSystemProcessingTest.java | 8 + src/test/java/com/amazon/ion/Ion11Test.kt | 202 ---------- .../com/amazon/ion/Ion_1_1_RoundTripTest.kt | 358 ++++++++++++++++++ .../ion/impl/IonRawTextWriterTest_1_1.kt | 33 +- .../ion/impl/bin/IonManagedWriter_1_1_Test.kt | 46 +++ .../impl/bin/IonRawBinaryWriterTest_1_1.kt | 45 ++- 15 files changed, 761 insertions(+), 305 deletions(-) delete mode 100644 src/test/java/com/amazon/ion/Ion11Test.kt create mode 100644 src/test/java/com/amazon/ion/Ion_1_1_RoundTripTest.kt create mode 100644 src/test/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1_Test.kt diff --git a/src/main/java/com/amazon/ion/impl/IonRawTextWriter_1_1.kt b/src/main/java/com/amazon/ion/impl/IonRawTextWriter_1_1.kt index 4a947e8831..4210fab96e 100644 --- a/src/main/java/com/amazon/ion/impl/IonRawTextWriter_1_1.kt +++ b/src/main/java/com/amazon/ion/impl/IonRawTextWriter_1_1.kt @@ -211,6 +211,17 @@ class IonRawTextWriter_1_1 internal constructor( numAnnotations = 0 } + override fun _private_hasFirstAnnotation(sid: Int, text: String?): Boolean { + if (numAnnotations == 0) return false + if (sid >= 0 && annotationsIdBuffer[0] == sid) { + return true + } + if (text != null && annotationsTextBuffer[0] == text) { + return true + } + return false + } + override fun _private_hasFieldName(): Boolean = hasFieldName override fun writeFieldName(sid: Int) { diff --git a/src/main/java/com/amazon/ion/impl/IonRawWriter_1_1.kt b/src/main/java/com/amazon/ion/impl/IonRawWriter_1_1.kt index e0fad59a67..473e6a988f 100644 --- a/src/main/java/com/amazon/ion/impl/IonRawWriter_1_1.kt +++ b/src/main/java/com/amazon/ion/impl/IonRawWriter_1_1.kt @@ -59,6 +59,12 @@ interface IonRawWriter_1_1 { */ fun _private_clearAnnotations() + /** + * Returns true if the reader has at least one annotation set and the first annotation matches the + * given sid OR text. + */ + fun _private_hasFirstAnnotation(sid: Int, text: String?): Boolean + /** * Writes one annotation for the next value. * [writeAnnotations] may be called more than once to build up a list of annotations. diff --git a/src/main/java/com/amazon/ion/impl/IonReaderContinuableApplicationBinary.java b/src/main/java/com/amazon/ion/impl/IonReaderContinuableApplicationBinary.java index e01580edd4..6301580baf 100644 --- a/src/main/java/com/amazon/ion/impl/IonReaderContinuableApplicationBinary.java +++ b/src/main/java/com/amazon/ion/impl/IonReaderContinuableApplicationBinary.java @@ -891,7 +891,7 @@ void readSymbolTable() { } readImportMaxId(); break; - default: throw new IllegalStateException(); + default: throw new IllegalStateException(state.toString()); } } } @@ -924,14 +924,28 @@ private enum State { * false. */ boolean startsWithIonSymbolTable() { - if (minorVersion == 0 || annotationTokenMarkers.isEmpty()) { + if (minorVersion == 0 && annotationSequenceMarker.startIndex >= 0) { long savedPeekIndex = peekIndex; peekIndex = annotationSequenceMarker.startIndex; int sid = readVarUInt_1_0(); peekIndex = savedPeekIndex; return ION_SYMBOL_TABLE_SID == sid; } - return ION_SYMBOL_TABLE_SID == annotationTokenMarkers.get(0).endIndex; + if (minorVersion == 1) { + Marker marker = annotationTokenMarkers.get(0); + if (marker.startIndex < 0) { + return marker.endIndex == ION_SYMBOL_TABLE_SID; + } else { + if (marker.endIndex - marker.startIndex != ION_SYMBOL_TABLE_UTF8.length) return false; + int start = (int) marker.startIndex; + boolean isIonSymbolTable = true; + for (int i = 0; i < ION_SYMBOL_TABLE_UTF8.length; i++) { + isIonSymbolTable &= buffer[start + i] == ION_SYMBOL_TABLE_UTF8[i]; + } + return isIonSymbolTable; + } + } + return false; } /** diff --git a/src/main/java/com/amazon/ion/impl/IonReaderNonContinuableSystem.java b/src/main/java/com/amazon/ion/impl/IonReaderNonContinuableSystem.java index dda97a3740..5dd4765b3b 100644 --- a/src/main/java/com/amazon/ion/impl/IonReaderNonContinuableSystem.java +++ b/src/main/java/com/amazon/ion/impl/IonReaderNonContinuableSystem.java @@ -35,11 +35,34 @@ */ final class IonReaderNonContinuableSystem implements IonReader { + private static final SymbolToken IVM_1_0 = new SymbolTokenImpl(SystemSymbols.ION_1_0, SystemSymbols.ION_1_0_SID); + private static final SymbolToken IVM_1_1 = new SymbolTokenImpl("$ion_1_1", -1); + + /** + * Represents an IVM that was read that has not yet been exposed as a Symbol value. + */ + private enum PendingIvm { + ION_1_0(IVM_1_0), + ION_1_1(IVM_1_1); + + private final SymbolToken token; + PendingIvm(SymbolToken symbolToken) { + token = symbolToken; + } + + static PendingIvm pendingIvmForVersionOrNull(int major, int minor) { + if (major != 1) return null; + if (minor == 0) return ION_1_0; + if (minor == 1) return ION_1_1; + return null; + } + } + private final IonReaderContinuableCore reader; private IonType type = null; private IonType typeAfterIvm = null; - private final Queue pendingIvmSids = new ArrayDeque<>(1); - private int pendingIvmSid = -1; + private final Queue pendingIvms = new ArrayDeque<>(1); + private PendingIvm pendingIvm = null; /** * Constructs a new non-continuable system-level reader over the given continuable reader. @@ -48,14 +71,11 @@ final class IonReaderNonContinuableSystem implements IonReader { IonReaderNonContinuableSystem(IonReaderContinuableCore reader) { this.reader = reader; reader.registerIvmNotificationConsumer((major, minor) -> { - if (major != 1 || minor > 1) { + PendingIvm ivm = PendingIvm.pendingIvmForVersionOrNull(major, minor); + if (ivm == null) { throw new IllegalStateException("The parser should have already thrown upon encountering this illegal IVM."); } - if (minor == 0) { - pendingIvmSids.add(SystemSymbols.ION_1_0_SID); - } else { - // TODO how should this be handled for Ion 1.1? - } + pendingIvms.add(ivm); }); } @@ -70,8 +90,8 @@ public boolean hasNext() { * @return true if a value is ready to be presented to the user; otherwise, false. */ private boolean handleIvm() { - Integer ivmSid = pendingIvmSids.poll(); - if (ivmSid != null) { + PendingIvm nextPendingIvm = pendingIvms.poll(); + if (nextPendingIvm != null) { // An IVM has been found between values. if (typeAfterIvm == null) { // Only save the type of the next user value the first time an IVM is encountered before that value. @@ -79,11 +99,11 @@ private boolean handleIvm() { } // For consistency with the legacy implementation, the system reader surfaces IVMs as symbol values. type = IonType.SYMBOL; - pendingIvmSid = ivmSid; + pendingIvm = nextPendingIvm; return true; - } else if (pendingIvmSid != -1) { + } else if (pendingIvm != null) { // All preceding IVMs have been surfaced. Restore the value that follows. - pendingIvmSid = -1; + pendingIvm = null; type = typeAfterIvm; typeAfterIvm = null; return true; @@ -162,7 +182,7 @@ public IntegerSize getIntegerSize() { @Override public boolean isNullValue() { - return pendingIvmSid == -1 && reader.isNullValue(); + return pendingIvm == null && reader.isNullValue(); } @Override @@ -226,8 +246,8 @@ public Timestamp timestampValue() { @Override public String stringValue() { - if (pendingIvmSid != -1) { - return getSymbolTable().findKnownSymbol(pendingIvmSid); + if (pendingIvm != null) { + return pendingIvm.token.getText(); } prepareScalar(); String value; @@ -278,7 +298,7 @@ public SymbolTable getSymbolTable() { @Override public String[] getTypeAnnotations() { - if (pendingIvmSid != -1 || !reader.hasAnnotations()) { + if (pendingIvm != null || !reader.hasAnnotations()) { return _Private_Utils.EMPTY_STRING_ARRAY; } // Note: it is not expected that the system reader is used in performance-sensitive applications; hence, @@ -301,7 +321,7 @@ public String[] getTypeAnnotations() { @Override public SymbolToken[] getTypeAnnotationSymbols() { - if (pendingIvmSid != -1 || !reader.hasAnnotations()) { + if (pendingIvm != null || !reader.hasAnnotations()) { return SymbolToken.EMPTY_ARRAY; } // Note: it is not expected that the system reader is used in performance-sensitive applications; hence, @@ -322,7 +342,7 @@ public SymbolToken[] getTypeAnnotationSymbols() { @Override public Iterator iterateTypeAnnotations() { - if (pendingIvmSid != -1 || !reader.hasAnnotations()) { + if (pendingIvm != null || !reader.hasAnnotations()) { return _Private_Utils.emptyIterator(); } return _Private_Utils.stringIterator(getTypeAnnotations()); @@ -369,9 +389,8 @@ public SymbolToken getFieldNameSymbol() { public SymbolToken symbolValue() { String symbolText; int sid = -1; - if (pendingIvmSid != -1) { - sid = pendingIvmSid; - symbolText = getSymbolTable().findKnownSymbol(sid); + if (pendingIvm != null) { + return pendingIvm.token; } else { prepareScalar(); if (reader.hasSymbolText()) { diff --git a/src/main/java/com/amazon/ion/impl/IonReaderTextUserX.java b/src/main/java/com/amazon/ion/impl/IonReaderTextUserX.java index ca6f0cbd8f..67d27958aa 100644 --- a/src/main/java/com/amazon/ion/impl/IonReaderTextUserX.java +++ b/src/main/java/com/amazon/ion/impl/IonReaderTextUserX.java @@ -4,6 +4,7 @@ import static com.amazon.ion.SystemSymbols.ION_1_0; import static com.amazon.ion.SystemSymbols.ION_SYMBOL_TABLE; +import static com.amazon.ion.SystemSymbols.ION_SYMBOL_TABLE_SID; import com.amazon.ion.IonCatalog; import com.amazon.ion.IonType; @@ -118,7 +119,7 @@ private final boolean has_next_user_value() if (_value_type != null && !isNullValue() && IonType.DATAGRAM.equals(getContainerType())) { switch (_value_type) { case STRUCT: - if (_annotation_count > 0 && ION_SYMBOL_TABLE.equals(_annotations[0].getText())) { + if (_annotation_count > 0 && (ION_SYMBOL_TABLE.equals(_annotations[0].getText()) || ION_SYMBOL_TABLE_SID == _annotations[0].getSid())) { _symbols = _lstFactory.newLocalSymtab(_catalog, this, true); @@ -158,7 +159,7 @@ private final boolean has_next_user_value() return (!_eof); } - private static boolean isIonVersionMarker(String text) + static boolean isIonVersionMarker(String text) { return text != null && ION_VERSION_MARKER_REGEX.matcher(text).matches(); } diff --git a/src/main/java/com/amazon/ion/impl/IonReaderTreeUserX.java b/src/main/java/com/amazon/ion/impl/IonReaderTreeUserX.java index 6977da27a6..d028532115 100644 --- a/src/main/java/com/amazon/ion/impl/IonReaderTreeUserX.java +++ b/src/main/java/com/amazon/ion/impl/IonReaderTreeUserX.java @@ -1,23 +1,12 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl; import static com.amazon.ion.SymbolTable.UNKNOWN_SYMBOL_ID; import static com.amazon.ion.SystemSymbols.ION_1_0_SID; import static com.amazon.ion.SystemSymbols.ION_SYMBOL_TABLE; +import static com.amazon.ion.SystemSymbols.ION_SYMBOL_TABLE_SID; +import static com.amazon.ion.impl.IonReaderTextUserX.isIonVersionMarker; import com.amazon.ion.IonCatalog; import com.amazon.ion.IonDatagram; @@ -29,6 +18,7 @@ import com.amazon.ion.Span; import com.amazon.ion.SpanProvider; import com.amazon.ion.SymbolTable; +import com.amazon.ion.SymbolToken; final class IonReaderTreeUserX @@ -111,9 +101,9 @@ boolean next_helper_user() sid = _system_symtab.findSymbol(name); } } - if (sid == ION_1_0_SID + if ((sid == ION_1_0_SID || isIonVersionMarker(sym.symbolValue().getText())) && _next.getTypeAnnotationSymbols().length == 0) { - // $ion_1_0 is read as an IVM only if it is not annotated + // $ion_1_0 and other version markers are read as an IVM only if unannotated SymbolTable symbols = _system_symtab; _symbols = symbols; push_symbol_table(symbols); @@ -122,7 +112,7 @@ boolean next_helper_user() } } else if (IonType.STRUCT.equals(next_type) - && _next.findTypeAnnotation(ION_SYMBOL_TABLE) == 0 + && _next_has_ion_symbol_table_annotation() ) { assert(_next instanceof IonStruct); // read a local symbol table @@ -143,6 +133,14 @@ else if (IonType.STRUCT.equals(next_type) } return (next_type != null); } + + private boolean _next_has_ion_symbol_table_annotation() { + SymbolToken[] annotations = _next.getTypeAnnotationSymbols(); + if (annotations.length == 0) return false; + return annotations[0].getSid() == ION_SYMBOL_TABLE_SID + || annotations[0].getText() == ION_SYMBOL_TABLE; + } + // // This code handles the skipped symbol table // support - it is cloned in IonReaderTextUserX diff --git a/src/main/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1.kt b/src/main/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1.kt index d914268956..bb2a6aaa13 100644 --- a/src/main/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1.kt +++ b/src/main/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1.kt @@ -3,6 +3,7 @@ package com.amazon.ion.impl.bin import com.amazon.ion.* +import com.amazon.ion.SymbolTable.UNKNOWN_SYMBOL_ID import com.amazon.ion.impl.* import com.amazon.ion.impl.bin.DelimitedContainerStrategy.* import com.amazon.ion.impl.bin.SymbolInliningStrategy.* @@ -10,6 +11,7 @@ import com.amazon.ion.system.* import java.io.OutputStream import java.math.BigDecimal import java.math.BigInteger +import java.util.* /** * A managed writer for Ion 1.1 that is generic over whether the raw encoding is text or binary. @@ -27,7 +29,7 @@ internal class IonManagedWriter_1_1( private val systemData: IonRawWriter_1_1, private val options: ManagedWriterOptions_1_1, private val onClose: () -> Unit, -) : _Private_IonManagedWriter, AbstractIonWriter(WriteValueOptimization.NONE) { +) : _Private_IonWriter { private val systemSymbolTableMap = hashMapOf() @@ -41,6 +43,8 @@ internal class IonManagedWriter_1_1( } companion object { + private val ION_VERSION_MARKER_REGEX = Regex("^\\\$ion_\\d+_\\d+$") + @JvmStatic fun textWriter(output: OutputStream, managedWriterOptions: ManagedWriterOptions_1_1, textOptions: _Private_IonTextWriterBuilder_1_1): IonManagedWriter_1_1 { // TODO support all options configurable via IonTextWriterBuilder_1_1 @@ -95,7 +99,7 @@ internal class IonManagedWriter_1_1( ), systemData = IonRawBinaryWriter_1_1( out = output, - buffer = WriteBuffer(BlockAllocatorProviders.basicProvider().vendAllocator(binaryOptions.blockSize),) {}, + buffer = WriteBuffer(BlockAllocatorProviders.basicProvider().vendAllocator(binaryOptions.blockSize)) {}, lengthPrefixPreallocation = 1 ), options = managedWriterOptions.copy(internEncodingDirectiveSymbols = true), @@ -122,7 +126,7 @@ internal class IonManagedWriter_1_1( if (sid != null) return sid // Check the to-be-appended symbols sid = newSymbols.indexOf(text) - if (sid != SymbolTable.UNKNOWN_SYMBOL_ID) return sid + priorMaxId + 1 + if (sid != UNKNOWN_SYMBOL_ID) return sid + priorMaxId + 1 // Add to the to-be-appended symbols sid = priorMaxId + newSymbols.size + 1 newSymbols.add(text) @@ -187,18 +191,15 @@ internal class IonManagedWriter_1_1( } override fun setFieldName(name: String) { - handleSymbolText(name, options.shouldWriteInline(SymbolKind.FIELD_NAME, name), userData::writeFieldName, userData::writeFieldName) + handleSymbolToken(UNKNOWN_SYMBOL_ID, name, SymbolKind.FIELD_NAME, userData) } override fun setFieldNameSymbol(name: SymbolToken) { - handleSymbolToken(name, options.shouldWriteInline(SymbolKind.FIELD_NAME, name), userData::writeFieldName, userData::writeFieldName) + handleSymbolToken(name.sid, name.text, SymbolKind.FIELD_NAME, userData) } override fun addTypeAnnotation(annotation: String) { - if (annotation == SystemSymbols.ION_SYMBOL_TABLE && depth == 0) { - throw IonException("User-defined symbol tables not permitted by the managed writer.") - } - handleSymbolText(annotation, options.shouldWriteInline(SymbolKind.ANNOTATION, annotation), userData::writeAnnotations, userData::writeAnnotations) + handleSymbolToken(UNKNOWN_SYMBOL_ID, annotation, SymbolKind.ANNOTATION, userData) } override fun setTypeAnnotations(annotations: Array?) { @@ -209,15 +210,7 @@ internal class IonManagedWriter_1_1( override fun setTypeAnnotationSymbols(annotations: Array?) { userData._private_clearAnnotations() - annotations?.forEachIndexed { i, it -> - // TODO: This is handled inconsistently. If you add annotations one at a time using addTypeAnnotation, - // we don't know whether the $ion_symbol_table annotation is the first one or not. - if (depth == 0 && i == 0) { - if (it.sid == SystemSymbols.ION_SYMBOL_TABLE_SID || it.text == SystemSymbols.ION_SYMBOL_TABLE) - throw IonException("User-defined symbol tables not permitted by the managed writer.") - } - handleSymbolToken(it, options.shouldWriteInline(SymbolKind.ANNOTATION, it), userData::writeAnnotations, userData::writeAnnotations) - } + annotations?.forEach { handleSymbolToken(it.sid, it.text, SymbolKind.ANNOTATION, userData) } } override fun stepIn(containerType: IonType?) { @@ -225,7 +218,12 @@ internal class IonManagedWriter_1_1( when (containerType) { IonType.LIST -> userData.stepInList(options.writeDelimited(ContainerType.LIST, newDepth)) IonType.SEXP -> userData.stepInSExp(options.writeDelimited(ContainerType.SEXP, newDepth)) - IonType.STRUCT -> userData.stepInStruct(options.writeDelimited(ContainerType.STRUCT, newDepth)) + IonType.STRUCT -> { + if (depth == 0 && userData._private_hasFirstAnnotation(SystemSymbols.ION_SYMBOL_TABLE_SID, SystemSymbols.ION_SYMBOL_TABLE)) { + throw IonException("User-defined symbol tables not permitted by the Ion 1.1 managed writer.") + } + userData.stepInStruct(options.writeDelimited(ContainerType.STRUCT, newDepth)) + } else -> throw IllegalArgumentException("Not a container type: $containerType") } } @@ -246,7 +244,7 @@ internal class IonManagedWriter_1_1( if (content == null) { userData.writeNull(IonType.SYMBOL) } else { - handleSymbolText(content, options.shouldWriteInline(SymbolKind.VALUE, content), userData::writeSymbol, userData::writeSymbol) + handleSymbolToken(UNKNOWN_SYMBOL_ID, content, SymbolKind.VALUE, userData) } } @@ -257,31 +255,38 @@ internal class IonManagedWriter_1_1( val text: String? = content.text if (content.sid == SystemSymbols.ION_1_0_SID) throw IonException("Can't write a top-level symbol that is the same as the IVM.") if (text == SystemSymbols.ION_1_0) throw IonException("Can't write a top-level symbol that is the same as the IVM.") - handleSymbolToken(content, options.shouldWriteInline(SymbolKind.VALUE, content), userData::writeSymbol, userData::writeSymbol) + handleSymbolToken(content.sid, content.text, SymbolKind.VALUE, userData) } } + private inline fun IonRawWriter_1_1.write(kind: SymbolKind, sid: Int) = when (kind) { + SymbolKind.VALUE -> writeSymbol(sid) + SymbolKind.FIELD_NAME -> writeFieldName(sid) + SymbolKind.ANNOTATION -> writeAnnotations(sid) + } + + private inline fun IonRawWriter_1_1.write(kind: SymbolKind, text: String) = when (kind) { + SymbolKind.VALUE -> writeSymbol(text) + SymbolKind.FIELD_NAME -> writeFieldName(text) + SymbolKind.ANNOTATION -> writeAnnotations(text) + } + /** Helper function that determines whether to write a symbol token as a SID or inline symbol */ - private inline fun handleSymbolToken(sym: SymbolToken, inline: Boolean, writeSymbolText: (String) -> Unit, writeSymbolId: (Int) -> Unit) { - val text: String? = sym.text + private inline fun handleSymbolToken(sid: Int, text: String?, kind: SymbolKind, rawWriter: IonRawWriter_1_1, preserveEncoding: Boolean = false) { if (text == null) { - if (sym.sid < priorMaxId) { - // It's in the system symbol table or local table but was constructed without the text for some reason. - writeSymbolId(sym.sid) + // No text. Decide whether to write $0 or some other SID + if (sid == UNKNOWN_SYMBOL_ID) { + // No (known) SID either. + throw UnknownSymbolException("Cannot write a symbol token with unknown text and unknown SID.") } else { - // Unknown Local Symbol - writeSymbolId(0) + rawWriter.write(kind, sid) } + } else if (preserveEncoding && sid < 0) { + rawWriter.write(kind, text) + } else if (options.shouldWriteInline(kind, text)) { + rawWriter.write(kind, text) } else { - handleSymbolText(text, inline, writeSymbolText, writeSymbolId) - } - } - - private inline fun handleSymbolText(text: String, inline: Boolean, writeSymbolText: (String) -> Unit, writeSymbolId: (Int) -> Unit) { - if (inline) { - writeSymbolText(text) - } else { - writeSymbolId(intern(text)) + rawWriter.write(kind, intern(text)) } } @@ -296,11 +301,6 @@ internal class IonManagedWriter_1_1( override fun writeTimestamp(value: Timestamp?) = value.writeMaybeNull(IonType.TIMESTAMP, userData::writeTimestamp) override fun writeString(value: String?) = value.writeMaybeNull(IonType.STRING, userData::writeString) - override fun writeString(data: ByteArray?, offset: Int, length: Int) = data.writeMaybeNull(IonType.STRING) { bytes -> - // TODO: We should probably plumb this through to the Ion 1.1 raw writer rather than decoding it here - userData.writeString(bytes.decodeToString(offset, length + offset, throwOnInvalidSequence = true)) - } - override fun writeClob(value: ByteArray?) = value.writeMaybeNull(IonType.CLOB, userData::writeClob) override fun writeClob(value: ByteArray?, start: Int, len: Int) = value.writeMaybeNull(IonType.CLOB) { userData.writeClob(it, start, len) } @@ -316,20 +316,132 @@ internal class IonManagedWriter_1_1( } override fun writeIonVersionMarker() { - // Make sure we write out any symbol tables and buffered values before the IVM - flush() - userData.writeIVM() + if (depth == 0) { + // Make sure we write out any symbol tables and buffered values before the IVM + finish() + systemData.writeIVM() + } else { + writeSymbol("\$ion_1_1") + } } - override fun writeBytes(data: ByteArray?, off: Int, len: Int) { - TODO("Not implemented. Is this actually needed?") + @Deprecated("Use IonValue.writeTo(IonWriter) instead.") + override fun writeValue(value: IonValue) = value.writeTo(this) + + @Deprecated("Use writeTimestamp instead.") + override fun writeTimestampUTC(value: Date?) { + TODO("Use writeTimestamp instead.") } - override fun getRawWriter(): _Private_IonRawWriter = TODO("Not yet implemented") + override fun isStreamCopyOptimized(): Boolean = false + + override fun writeValues(reader: IonReader) { + // There's a possibility that we could have interference between encoding contexts if we're transferring from a + // system reader. However, this is the same behavior as the other implementations. + + val startingDepth = reader.depth + while (true) { + val nextType = reader.next() + if (nextType == null) { + // Nothing more *and* we're at the starting depth? We're all done. + if (reader.depth == startingDepth) return + // Otherwise, step out and continue. + userData.stepOut() + reader.stepOut() + } else { + transferScalarOrStepIn(reader, nextType) + } + } + } + + override fun writeValue(reader: IonReader) { + // There's a possibility that we could have interference between encoding contexts if we're transferring from a + // system reader. However, this is the same behavior as the other implementations. + + if (reader.type == null) return + val startingDepth = reader.depth + transferScalarOrStepIn(reader, reader.type) + if (reader.depth != startingDepth) { + // We stepped into a container, so write the content of the container and then step out. + writeValues(reader) + reader.stepOut() + userData.stepOut() + } + } + + /** + * Can only be called when the reader is positioned on a value. Having [currentType] in the + * function signature helps to enforce that requirement because [currentType] is not allowed + * to be `null`. + */ + private fun transferScalarOrStepIn(reader: IonReader, currentType: IonType) { + // TODO: If the Ion 1.1 symbol table differs at all from the Ion 1.0 symbol table, and we're copying + // from Ion 1.0, we will have to adjust any SIDs that we are writing. + + reader.typeAnnotationSymbols.forEach { + handleSymbolToken(it.sid, it.text, SymbolKind.ANNOTATION, userData, preserveEncoding = true) + } + if (isInStruct) { + // TODO: Can't use reader.fieldId, reader.fieldName because it will throw UnknownSymbolException. + // However, this might mean we're unnecessarily constructing `SymbolToken` instances. + val fieldName = reader.fieldNameSymbol + handleSymbolToken(fieldName.sid, fieldName.text, SymbolKind.FIELD_NAME, userData, preserveEncoding = true) + } + + if (reader.isNullValue) { + userData.writeNull(currentType) + } else when (currentType) { + IonType.BOOL -> userData.writeBool(reader.booleanValue()) + IonType.INT -> { + if (reader.integerSize == IntegerSize.BIG_INTEGER) { + userData.writeInt(reader.bigIntegerValue()) + } else { + userData.writeInt(reader.longValue()) + } + } + IonType.FLOAT -> userData.writeFloat(reader.doubleValue()) + IonType.DECIMAL -> userData.writeDecimal(reader.decimalValue()) + IonType.TIMESTAMP -> userData.writeTimestamp(reader.timestampValue()) + IonType.SYMBOL -> { + if (reader.isCurrentValueAnIvm()) { + // TODO: What about the case where it's an IVM, but the writer is not at depth==0? Should we write + // it as a symbol or just ignore it? (This can only happen if the writer is stepped in, but + // the reader starts at depth==0.) + + // Just in case—call finish to flush the current system values, then user values, and then write the IVM. + finish() + userData.writeIVM() + } else { + val symbol = reader.symbolValue() + handleSymbolToken(symbol.sid, symbol.text, SymbolKind.VALUE, userData, preserveEncoding = true) + } + } + IonType.STRING -> userData.writeString(reader.stringValue()) + IonType.CLOB -> userData.writeClob(reader.newBytes()) + IonType.BLOB -> userData.writeBlob(reader.newBytes()) + // TODO: See if we can preserve the encoding of containers (delimited vs length-prefixed) + IonType.LIST -> { + userData.stepInList(options.writeDelimited(ContainerType.LIST, reader.depth)) + reader.stepIn() + } + IonType.SEXP -> { + userData.stepInSExp(options.writeDelimited(ContainerType.SEXP, reader.depth)) + reader.stepIn() + } + IonType.STRUCT -> { + userData.stepInStruct(options.writeDelimited(ContainerType.STRUCT, reader.depth)) + reader.stepIn() + } + else -> TODO("NULL and DATAGRAM are unreachable.") + } + } - override fun requireLocalSymbolTable() { - // Can this be a no-op? - TODO("Not yet implemented") + private fun IonReader.isCurrentValueAnIvm(): Boolean { + if (depth != 0 || type != IonType.SYMBOL || typeAnnotationSymbols.isNotEmpty()) return false + val symbol = symbolValue() ?: return false + if (symbol.sid == 2) return true + symbol.text ?: return false + return ION_VERSION_MARKER_REGEX.matches(symbol.assumeText()) } // Stream termination @@ -343,7 +455,7 @@ internal class IonManagedWriter_1_1( override fun flush() { writeSymbolTable() - // TODO: This method should probably be called `flush()` instead of `finish()`. + // TODO: This method on the raw writer should probably be called `flush()` instead of `finish()`. systemData.finish() userData.finish() } diff --git a/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter_1_1.kt b/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter_1_1.kt index ec430e79b7..15217e3684 100644 --- a/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter_1_1.kt +++ b/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter_1_1.kt @@ -221,6 +221,17 @@ class IonRawBinaryWriter_1_1 internal constructor( annotationFlexSymFlag = 0 } + override fun _private_hasFirstAnnotation(sid: Int, text: String?): Boolean { + if (numAnnotations == 0) return false + if (sid >= 0 && annotationsIdBuffer[0] == sid) { + return true + } + if (text != null && annotationsTextBuffer[0] == text) { + return true + } + return false + } + /** * Helper function for handling annotations and field names when starting a value. */ diff --git a/src/test/java/com/amazon/ion/BinaryByteArrayIteratorSystemProcessingTest.java b/src/test/java/com/amazon/ion/BinaryByteArrayIteratorSystemProcessingTest.java index f905b69acf..f1ad1e0cdc 100644 --- a/src/test/java/com/amazon/ion/BinaryByteArrayIteratorSystemProcessingTest.java +++ b/src/test/java/com/amazon/ion/BinaryByteArrayIteratorSystemProcessingTest.java @@ -62,6 +62,10 @@ public void inlineFieldName() { ); Iterator iterator = systemIterate(); assertTrue(iterator.hasNext()); + IonValue shouldBeAnIvm = iterator.next(); + assertEquals(IonType.SYMBOL, shouldBeAnIvm.getType()); + assertEquals("$ion_1_1", ((IonSymbol) shouldBeAnIvm).stringValue()); + assertTrue(iterator.hasNext()); IonStruct struct = (IonStruct) iterator.next(); assertEquals(1, struct.size()); IonStruct nested = (IonStruct) struct.get("a"); @@ -88,6 +92,10 @@ public void inlineAnnotation() { ); Iterator iterator = systemIterate(); assertTrue(iterator.hasNext()); + IonValue shouldBeAnIvm = iterator.next(); + assertEquals(IonType.SYMBOL, shouldBeAnIvm.getType()); + assertEquals("$ion_1_1", ((IonSymbol) shouldBeAnIvm).stringValue()); + assertTrue(iterator.hasNext()); IonBool value = (IonBool) iterator.next(); String[] annotations = value.getTypeAnnotations(); assertEquals(2, annotations.length); diff --git a/src/test/java/com/amazon/ion/BinaryReaderSystemProcessingTest.java b/src/test/java/com/amazon/ion/BinaryReaderSystemProcessingTest.java index e031e7d39f..a109ae5e56 100644 --- a/src/test/java/com/amazon/ion/BinaryReaderSystemProcessingTest.java +++ b/src/test/java/com/amazon/ion/BinaryReaderSystemProcessingTest.java @@ -219,6 +219,10 @@ public void inlineFieldName() throws Exception { "62 | UTF-8 byte 'b' \n" ); IonReader reader = systemRead(); + + assertEquals(IonType.SYMBOL, reader.next()); + assertEquals("$ion_1_1", reader.symbolValue().getText()); + assertEquals(IonType.STRUCT, reader.next()); reader.stepIn(); assertEquals(IonType.STRUCT, reader.next()); @@ -255,6 +259,10 @@ public void inlineAnnotation() throws Exception { "6F | boolean false\n" ); IonReader reader = systemRead(); + + assertEquals(IonType.SYMBOL, reader.next()); + assertEquals("$ion_1_1", reader.symbolValue().getText()); + assertEquals(IonType.BOOL, reader.next()); String[] annotations = reader.getTypeAnnotations(); assertEquals(2, annotations.length); diff --git a/src/test/java/com/amazon/ion/Ion11Test.kt b/src/test/java/com/amazon/ion/Ion11Test.kt deleted file mode 100644 index c9b08438c9..0000000000 --- a/src/test/java/com/amazon/ion/Ion11Test.kt +++ /dev/null @@ -1,202 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 -package com.amazon.ion - -import com.amazon.ion.IonEncodingVersion.ION_1_1 -import com.amazon.ion.TestUtils.And -import com.amazon.ion.TestUtils.GLOBAL_SKIP_LIST -import com.amazon.ion.TestUtils.GOOD_IONTESTS_FILES -import com.amazon.ion.TestUtils.TEXT_ONLY_FILTER -import com.amazon.ion.TestUtils.hexStringToByteArray -import com.amazon.ion.TestUtils.testdataFiles -import com.amazon.ion.impl.bin.* -import com.amazon.ion.system.IonBinaryWriterBuilder -import com.amazon.ion.system.IonSystemBuilder -import com.amazon.ion.system.IonTextWriterBuilder -import java.io.ByteArrayOutputStream -import java.io.FilenameFilter -import java.io.OutputStream -import org.junit.jupiter.api.Assertions.assertEquals -import org.junit.jupiter.api.Assertions.assertTrue -import org.junit.jupiter.params.ParameterizedTest -import org.junit.jupiter.params.provider.MethodSource - -/** - * TODO: Clean this up. Document why various tests are skipped. Etc. - */ -class Ion11Test { - - companion object { - val ION = IonSystemBuilder.standard().build() - - fun ionText(text: String): Array = arrayOf(text, text.encodeToByteArray()) - fun ionBinary(name: String, bytes: String): Array = arrayOf(name, hexStringToByteArray(bytes)) - - // Arguments here are an array containing a String for the test case name, and a ByteArray of the test data. - @JvmStatic - fun ionData() = listOf( - ionBinary("{a:{$4:b}}", "E0 01 01 EA FD 0F 01 FF 61 D3 09 A1 62"), - ionText("""a::a::c::a::0 a::a::0"""), - ionText("""a::a::c::a::0 a::0"""), - ionText("""foo::bar::baz::false foo::0"""), - ionText("""a::b::c::0 d::0"""), - ionText("""a::0 b::c::d::0"""), - ionText("""a::b::c::d::0 a::b::c::0"""), - ionText("""a::b::c::d::0 a::0 a::0"""), - ionText("""abc"""), - ) + files().flatMap { f -> - val ion = ION.loader.load(f) - // If there are embedded documents, flatten them into separate test cases. - if (ion.size == 1 && ion.first().hasTypeAnnotation("embedded_documents")) { - (ion.first() as IonContainer).mapIndexed { i, ionValue -> - arrayOf("${f.path}[$i]", (ionValue as IonString).stringValue().toByteArray(Charsets.UTF_8)) - } - } else { - listOf(arrayOf(f.path, ion.toString(IonTextWriterBuilder.standard()).toByteArray(Charsets.UTF_8))) - } - } - - @JvmField - val FILES_TO_SKIP = setOf( - "notVersionMarkers.ion", - "symbolTablesUnknownText.ion" - ) - - @JvmStatic - fun files() = testdataFiles( - And( - TEXT_ONLY_FILTER, - GLOBAL_SKIP_LIST, - FilenameFilter { _, name -> name !in FILES_TO_SKIP } - ), - GOOD_IONTESTS_FILES - ) - } - - @ParameterizedTest(name = "{0}") - @MethodSource("ionData") - fun writeIon11Text(name: String, ion: ByteArray) { - textTest(ion) { - ION_1_1.textWriterBuilder() - .withNewLineType(IonTextWriterBuilder.NewLineType.LF) - .build(it) - } - } - - @ParameterizedTest(name = "{0}") - @MethodSource("ionData") - fun writeIon11TextToAppendable(name: String, ion: ByteArray) { - textTestAppendable(ion) { - ION_1_1.textWriterBuilder() - .withNewLineType(IonTextWriterBuilder.NewLineType.LF) - .withSymbolInliningStrategy(SymbolInliningStrategy.NEVER_INLINE) - .build(it) - } - } - - @ParameterizedTest(name = "{0}") - @MethodSource("ionData") - fun writeIon11TextWithSymtab(name: String, ion: ByteArray) { - textTest(ion) { - ION_1_1.textWriterBuilder() - .withNewLineType(IonTextWriterBuilder.NewLineType.LF) - .withSymbolInliningStrategy(SymbolInliningStrategy.NEVER_INLINE) - .build(it) - } - } - - @ParameterizedTest(name = "{0}") - @MethodSource("ionData") - fun writeIon11Binary(name: String, ion: ByteArray) { - binaryTest(ion) { - ION_1_1.binaryWriterBuilder().build(it) - } - } - - @ParameterizedTest(name = "{0}") - @MethodSource("ionData") - fun writeIon11BinaryInlineSymbols(name: String, ion: ByteArray) { - - binaryTest(ion) { - ION_1_1.binaryWriterBuilder() - .withSymbolInliningStrategy(SymbolInliningStrategy.ALWAYS_INLINE) - .build(it) - } - } - - @ParameterizedTest(name = "{0}") - @MethodSource("ionData") - fun writeIon11BinaryDelimited(name: String, ion: ByteArray) { - binaryTest(ion) { - ION_1_1.binaryWriterBuilder() - .withDelimitedContainerStrategy(DelimitedContainerStrategy.ALWAYS_DELIMITED) - .build(it) - } - } - - fun textTest(ion: ByteArray, writerFn: (OutputStream) -> IonWriter) { - val data: List = ION.loader.load(ion).map { it } - val baos = ByteArrayOutputStream() - val writer = writerFn(baos) - data.forEach { it.writeTo(writer) } - writer.close() - println(baos.toByteArray().toString(Charsets.UTF_8)) - val loadedData = ION.loader.load(baos.toByteArray()) - println(loadedData) - assertEquals(data, loadedData.toList()) - } - - fun textTestAppendable(ion: ByteArray, writerFn: (Appendable) -> IonWriter) { - val data: List = ION.loader.load(ion).map { it } - val appendable = StringBuilder() - val writer = writerFn(appendable) - data.forEach { it.writeTo(writer) } - writer.close() - println(appendable.toString()) - val loadedData = ION.loader.load(appendable.toString()) - println(loadedData) - assertEquals(data, loadedData.toList()) - } - - fun binaryTest(ion: ByteArray, writerFn: (OutputStream) -> IonWriter) { - val data: List = ION.loader.load(ion).map { it } - val baos = ByteArrayOutputStream() - val writer = writerFn(baos) - data.forEach { it.writeTo(writer) } - writer.close() - - ION.loader.load(ion).dump10Text() - println("Ion 1.1 binary:") - assertTrue(baos.toByteArray().isNotEmpty()) - baos.dump() - val loadedData = ION.loader.load(baos.toByteArray()) - println("Round-tripped data") - println(loadedData) - assertEquals(data, loadedData.toList()) - } - - @OptIn(ExperimentalStdlibApi::class) - fun ByteArrayOutputStream.dump() { - this.toByteArray() - .map { it.toHexString(HexFormat.UpperCase) } - .windowed(4, 4, partialWindows = true) - .windowed(8, 8, partialWindows = true) - .forEach { - println(it.joinToString(" ") { it.joinToString(" ") }) - } - } - - fun List.dump10Binary() { - val baos = ByteArrayOutputStream() - val writer = IonBinaryWriterBuilder.standard().build(baos) - forEach { it.writeTo(writer) } - writer.close() - println("Ion 1.0 Binary:") - baos.dump() - } - - fun IonValue.dump10Text() { - // println("Ion 1.0 Text:") - // println(this.toString(IonTextWriterBuilder.standard())) - } -} diff --git a/src/test/java/com/amazon/ion/Ion_1_1_RoundTripTest.kt b/src/test/java/com/amazon/ion/Ion_1_1_RoundTripTest.kt new file mode 100644 index 0000000000..c78569ce21 --- /dev/null +++ b/src/test/java/com/amazon/ion/Ion_1_1_RoundTripTest.kt @@ -0,0 +1,358 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion + +import com.amazon.ion.IonEncodingVersion.* +import com.amazon.ion.TestUtils.* +import com.amazon.ion.impl._Private_IonSystem +import com.amazon.ion.impl.bin.* +import com.amazon.ion.system.* +import java.io.ByteArrayOutputStream +import java.io.FilenameFilter +import java.io.OutputStream +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Disabled +import org.junit.jupiter.api.Nested +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.MethodSource + +/** + * Suite of tests for running round trip tests on user and system values for various Ion 1.1 encodings. + */ +class Ion_1_1_RoundTripTest { + + @Nested + inner class Text : Ion_1_1_RoundTripTextBase() { + private val builder = ION_1_1.textWriterBuilder().withNewLineType(IonTextWriterBuilder.NewLineType.LF) + + override val writerFn: (OutputStream) -> IonWriter = builder::build + override val newWriterForAppendable: (Appendable) -> IonWriter = builder::build + } + + @Nested + inner class TextWithSymbolTable : Ion_1_1_RoundTripTextBase() { + private val builder = ION_1_1.textWriterBuilder() + .withNewLineType(IonTextWriterBuilder.NewLineType.LF) + .withSymbolInliningStrategy(SymbolInliningStrategy.NEVER_INLINE) + + override val writerFn: (OutputStream) -> IonWriter = builder::build + override val newWriterForAppendable: (Appendable) -> IonWriter = builder::build + } + + @Nested + inner class BinaryWithInternedSymbolsAndPrefixedContainers : Ion_1_1_RoundTripBase() { + private val builder = ION_1_1.binaryWriterBuilder() + .withSymbolInliningStrategy(SymbolInliningStrategy.NEVER_INLINE) + .withDelimitedContainerStrategy(DelimitedContainerStrategy.ALWAYS_PREFIXED) + + override val writerFn: (OutputStream) -> IonWriter = builder::build + } + + @Nested + inner class BinaryWithInlineSymbolsAndPrefixedContainers : Ion_1_1_RoundTripBase() { + private val builder = ION_1_1.binaryWriterBuilder() + .withSymbolInliningStrategy(SymbolInliningStrategy.ALWAYS_INLINE) + .withDelimitedContainerStrategy(DelimitedContainerStrategy.ALWAYS_PREFIXED) + + override val writerFn: (OutputStream) -> IonWriter = builder::build + + @Disabled("Ion binary reader can't seem to discover symbol tables with inline annotations") + override fun testUserValuesArePreservedWhenTransferringSystemValues(name: String, ion: ByteArray) { + super.testUserValuesArePreservedWhenTransferringSystemValues(name, ion) + } + } + + @Nested + inner class BinaryWithInlineSymbolsAndDelimitedContainers : Ion_1_1_RoundTripBase() { + private val builder = ION_1_1.binaryWriterBuilder() + .withSymbolInliningStrategy(SymbolInliningStrategy.ALWAYS_INLINE) + .withDelimitedContainerStrategy(DelimitedContainerStrategy.ALWAYS_DELIMITED) + + override val writerFn: (OutputStream) -> IonWriter = builder::build + + @Disabled("Ion binary reader can't seem to discover symbol tables with inline annotations") + override fun testUserValuesArePreservedWhenTransferringSystemValues(name: String, ion: ByteArray) { + super.testUserValuesArePreservedWhenTransferringSystemValues(name, ion) + } + } + + @Nested + inner class BinaryWithInternedSymbolsAndDelimitedContainers : Ion_1_1_RoundTripBase() { + private val builder = ION_1_1.binaryWriterBuilder() + .withSymbolInliningStrategy(SymbolInliningStrategy.NEVER_INLINE) + .withDelimitedContainerStrategy(DelimitedContainerStrategy.ALWAYS_DELIMITED) + + override val writerFn: (OutputStream) -> IonWriter = builder::build + } +} + +/** + * Base class that contains text-specific cases + */ +abstract class Ion_1_1_RoundTripTextBase : Ion_1_1_RoundTripBase() { + abstract val newWriterForAppendable: (Appendable) -> IonWriter + + @ParameterizedTest(name = "{0}") + @MethodSource("testData") + fun testUserValuesSurviveRoundTripWrittenToAppendable(name: String, ion: ByteArray) { + val data: List = ION.loader.load(ion) + val appendable = StringBuilder() + val writer = newWriterForAppendable(appendable) + data.forEach { it.writeTo(writer) } + writer.close() + val actual = appendable.toString() + + println("Expected:") + ion.printDisplayString() + println("Actual:") + println(actual) + + assertReadersHaveEquivalentValues( + ION.newReader(ion), + ION.newReader(actual) + ) + } +} + +@OptIn(ExperimentalStdlibApi::class) +abstract class Ion_1_1_RoundTripBase { + + abstract val writerFn: (OutputStream) -> IonWriter + + @ParameterizedTest(name = "{0}") + @MethodSource("testData") + fun testUserValuesArePreservedWhenTransferringUserValues(name: String, ion: ByteArray) { + + // Read and compare the data. + val actual = roundTripToByteArray { w -> newReader(ion).let(::iterate).forEach { it.writeTo(w) } } + println("Expected:") + ion.printDisplayString() + println("Actual:") + actual.printDisplayString() + + assertReadersHaveEquivalentValues( + ION.newReader(ion), + ION.newReader(actual) + ) + } + + @ParameterizedTest(name = "{0}") + @MethodSource("testData") + fun testUserValuesArePreservedWhenTransferringUserValuesUsingWriteValueForReader(name: String, ion: ByteArray) { + + // Read and compare the data. + val actual = roundTripToByteArray { w -> newReader(ion).let { r -> while (r.next() != null) w.writeValue(r) } } + println("Expected:") + ion.printDisplayString() + println("Actual:") + actual.printDisplayString() + + assertReadersHaveEquivalentValues( + ION.newReader(ion), + ION.newReader(actual) + ) + } + + @ParameterizedTest(name = "{0}") + @MethodSource("testData") + fun testUserValuesArePreservedWhenTransferringUserValuesUsingWriteValueForIonValue(name: String, ion: ByteArray) { + // Read and compare the data. + val actual = roundTripToByteArray { w -> newReader(ion).let(::iterate).forEach { w.writeValue(it) } } + println("Expected:") + ion.printDisplayString() + println("Actual:") + actual.printDisplayString() + + assertReadersHaveEquivalentValues( + ION.newReader(ion), + ION.newReader(actual) + ) + } + + @ParameterizedTest(name = "{0}") + @MethodSource("testData") + open fun testUserValuesArePreservedWhenTransferringSystemValues(name: String, ion: ByteArray) { + + // Read and compare the data. + val actual = roundTripToByteArray { w -> w.writeValues(newSystemReader(ion)) } + println("Expected:") + ion.printDisplayString() + println("Actual:") + actual.printDisplayString() + + // Check the user values + assertReadersHaveEquivalentValues( + ION.newReader(ion), + ION.newReader(actual) + ) + } + + @ParameterizedTest(name = "{0}") + @MethodSource("testData") + open fun testSystemValuesArePreservedWhenTransferringSystemValues(name: String, ion: ByteArray) { + + // Read and compare the data. + val actual = roundTripToByteArray { w -> w.writeValues(newSystemReader(ion)) } + println("Expected:") + ion.printDisplayString() + println("Actual:") + actual.printDisplayString() + + // Check the system values + assertReadersHaveEquivalentValues( + ION.newSystemReader(ion), + // Skip the initial IVM since it ends up being doubled when we're copying. + ION.newSystemReader(actual).apply { next() } + ) + } + + private fun roundTripToByteArray(block: _Private_IonSystem.(IonWriter) -> Unit): ByteArray { + // Create a new copy of the data in Ion 1.1 + val baos = ByteArrayOutputStream() + val writer = writerFn(baos) + block(ION, writer) + writer.close() + return baos.toByteArray() + } + + /** + * Prints this ByteArray as hex octets if this contains Ion Binary, otherwise prints as UTF-8 decoded string. + */ + protected fun ByteArray.printDisplayString() { + if (isIonBinary()) { + map { it.toHexString(HexFormat.UpperCase) } + .windowed(4, 4, partialWindows = true) + .windowed(8, 8, partialWindows = true) + .forEach { + println(it.joinToString(" ") { it.joinToString(" ") }) + } + } else { + println(toString(Charsets.UTF_8)) + } + } + + fun assertReadersHaveEquivalentValues(expectedDataReader: IonReader, actualDataReader: IonReader) { + // Read and compare the data. + val expectedData: Iterator = ION.iterate(expectedDataReader) + val actualData: Iterator = ION.iterate(actualDataReader) + + var ie = 0 + while (expectedData.hasNext() && actualData.hasNext()) { + val expected = expectedData.next() + try { + val actual = actualData.next() + + if (expected is IonSymbol && actual is IonSymbol) { + if (expected.typeAnnotationSymbols.isEmpty() && + isIonVersionMarker(expected.symbolValue()) && + actual.typeAnnotationSymbols.isEmpty() && + isIonVersionMarker(actual.symbolValue()) + ) { + // Both are IVMs. We won't actually compare them because we + // could be comparing data from different Ion versions + continue + } + } + + assertEquals(expected, actual, "value $ie is different") + } catch (e: IonException) { + throw AssertionError("Encountered IonException when reading the transcribed version of value #$ie\n$expected", e) + } + ie++ + } + + // Make sure that both are fully consumed. + var ia = ie + while (expectedData.hasNext()) { expectedData.next(); ie++ } + while (actualData.hasNext()) { actualData.next(); ia++ } + + assertEquals(ie, ia, "Data is unequal length") + } + + /** + * Checks if this ByteArray contains Ion Binary. + */ + private fun ByteArray.isIonBinary(): Boolean { + return get(0) == 0xE0.toByte() && + get(1) == 0x01.toByte() && + get(2) in setOf(0, 1) && + get(3) == 0xEA.toByte() + } + + private fun isIonVersionMarker(symbol: SymbolToken?): Boolean { + symbol ?: return false + if (symbol.sid == 2) return true + symbol.text ?: return false + return ION_VERSION_MARKER_REGEX.matches(symbol.assumeText()) + } + + companion object { + @JvmStatic + protected val ION = IonSystemBuilder.standard().build() as _Private_IonSystem + private val ION_VERSION_MARKER_REGEX = Regex("^\\\$ion_[0-9]+_[0-9]+$") + + private fun ionText(text: String): Array = arrayOf(text, text.encodeToByteArray()) + private fun ionBinary(name: String, bytes: String): Array = arrayOf(name, hexStringToByteArray(bytes)) + + // Arguments here are an array containing a String for the test case name, and a ByteArray of the test data. + @JvmStatic + fun testData() = listOf( + ionText("\$ion_1_1 true \$ion_1_0 true \$ion_1_1 true"), + ionBinary("Binary IVMs", "E0 01 01 EA 6F E0 01 00 EA 10 E0 01 01 EA 6F"), + ionBinary("{a:{$4:b}}", "E0 01 01 EA FD 0F 01 FF 61 D3 09 A1 62"), + ionText("""a::a::c::a::0 a::a::0"""), + ionText("""a::a::c::a::0 a::0"""), + ionText("""foo::bar::baz::false foo::0"""), + ionText("""a::b::c::0 d::0"""), + ionText("""a::0 b::c::d::0"""), + ionText("""a::b::c::d::0 a::b::c::0"""), + ionText("""a::b::c::d::0 a::0 a::0"""), + ionText("""abc"""), + // This test case has a top-level annotation that is the same number of utf-8 bytes as $ion_symbol_table + ionText("fake_symbol_table::{}"), + ionText( + """ + ${'$'}ion_1_0 + ${'$'}ion_symbol_table::{ + symbols:[ "a", "b", "c", "d", "e" ] + } + $10 $11 $12 $13 $14 + ${'$'}ion_1_0 + ${'$'}ion_symbol_table::{ + symbols:[ "rock", "paper", "scissors", "lizard", "spock" ] + } + $10 $11 $12 $13 $14 + """.trimIndent() + ), + ) + files().flatMap { f -> + val ion = ION.loader.load(f) + // If there are embedded documents, flatten them into separate test cases. + if (ion.size == 1 && ion.first().hasTypeAnnotation("embedded_documents")) { + (ion.first() as IonContainer).mapIndexed { i, ionValue -> + arrayOf("${f.path}[$i]", (ionValue as IonString).stringValue().toByteArray(Charsets.UTF_8)) + } + } else { + listOf(arrayOf(f.path, f.readBytes())) + } + } + + @JvmStatic + fun files() = testdataFiles( + And(GLOBAL_SKIP_LIST, LOCAL_SKIP_LIST), + GOOD_IONTESTS_FILES + ) + + @JvmField + val LOCAL_SKIP_LIST = setOf( + // Has an unknown, imported symbol + "symbolTablesUnknownText.ion", + // Skipped because there are no user values in these, and IonReaderNonContinuableSystem will throw an exception. + "blank.ion", + "empty.ion", + "emptyThreeByteNopPad.10n", + "nopPad16Bytes.10n", + "nopPadOneByte.10n", + "T15.10n", + ).let { FilenameFilter { _, name -> name !in it } } + } +} diff --git a/src/test/java/com/amazon/ion/impl/IonRawTextWriterTest_1_1.kt b/src/test/java/com/amazon/ion/impl/IonRawTextWriterTest_1_1.kt index 4d7ae773d8..f77c1a5b45 100644 --- a/src/test/java/com/amazon/ion/impl/IonRawTextWriterTest_1_1.kt +++ b/src/test/java/com/amazon/ion/impl/IonRawTextWriterTest_1_1.kt @@ -7,6 +7,7 @@ import com.amazon.ion.system.* import java.math.BigDecimal import java.math.BigInteger import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Assertions.assertFalse import org.junit.jupiter.api.Assertions.assertTrue import org.junit.jupiter.api.Test import org.junit.jupiter.api.assertThrows @@ -22,7 +23,7 @@ class IonRawTextWriterTest_1_1 { private inline fun ionWriter( out: StringBuilder = StringBuilder(), builderConfigurator: IonTextWriterBuilder_1_1.() -> Unit = { /* noop */ }, - block: IonRawTextWriter_1_1.() -> Unit, + block: IonRawTextWriter_1_1.() -> Unit = {}, ): IonRawTextWriter_1_1 { val b = standardBuilder() .apply(builderConfigurator) @@ -552,6 +553,36 @@ class IonRawTextWriterTest_1_1 { } } + @Test + fun `_private_hasFirstAnnotation() should return false when there are no annotations`() { + val rawWriter = ionWriter() + assertFalse(rawWriter._private_hasFirstAnnotation(SystemSymbols.ION_SID, SystemSymbols.ION)) + } + + @Test + fun `_private_hasFirstAnnotation() should return true if only the sid matches`() { + val rawWriter = ionWriter() + rawWriter.writeAnnotations(SystemSymbols.ION_SID) + assertTrue(rawWriter._private_hasFirstAnnotation(SystemSymbols.ION_SID, null)) + } + + @Test + fun `_private_hasFirstAnnotation() should return true if only the text matches`() { + val rawWriter = ionWriter() + rawWriter.writeAnnotations(SystemSymbols.ION) + assertTrue(rawWriter._private_hasFirstAnnotation(-1, SystemSymbols.ION)) + } + + @Test + fun `_private_hasFirstAnnotation() should return false if the first annotation does not match the sid or text`() { + val rawWriter = ionWriter() + rawWriter.writeAnnotations(SystemSymbols.IMPORTS_SID) + rawWriter.writeAnnotations(SystemSymbols.ION) + rawWriter.writeAnnotations(SystemSymbols.ION_SID) + // Matches the second and third annotations, but not the first one. + assertFalse(rawWriter._private_hasFirstAnnotation(SystemSymbols.ION_SID, SystemSymbols.ION)) + } + @Test fun `write int`() { assertWriterOutputEquals( diff --git a/src/test/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1_Test.kt b/src/test/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1_Test.kt new file mode 100644 index 0000000000..658c6cd879 --- /dev/null +++ b/src/test/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1_Test.kt @@ -0,0 +1,46 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.bin + +import com.amazon.ion.* +import com.amazon.ion.IonEncodingVersion.* +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.assertThrows + +internal class IonManagedWriter_1_1_Test { + + val appendable = StringBuilder() + val writer = ION_1_1.textWriterBuilder() + .withSymbolInliningStrategy(SymbolInliningStrategy.ALWAYS_INLINE) + .build(appendable) as IonManagedWriter_1_1 + + @Test + fun `attempting to manually write a symbol table throws an exception`() { + writer.addTypeAnnotation(SystemSymbols.ION_SYMBOL_TABLE) + assertThrows { writer.stepIn(IonType.STRUCT) } + } + + @Test + fun `attempting to step into a scalar type throws an exception`() { + assertThrows { writer.stepIn(IonType.NULL) } + } + + @Test + fun `write an IVM`() { + writer.writeIonVersionMarker() + writer.close() + assertEquals("\$ion_1_1 \$ion_1_1", appendable.toString().trim()) + } + + @Test + fun `write an IVM in a container should write a symbol`() { + with(writer) { + stepIn(IonType.LIST) + writeIonVersionMarker() + stepOut() + close() + } + assertEquals("\$ion_1_1 [\$ion_1_1]", appendable.toString().trim()) + } +} diff --git a/src/test/java/com/amazon/ion/impl/bin/IonRawBinaryWriterTest_1_1.kt b/src/test/java/com/amazon/ion/impl/bin/IonRawBinaryWriterTest_1_1.kt index a726e77acd..1f39798d3c 100644 --- a/src/test/java/com/amazon/ion/impl/bin/IonRawBinaryWriterTest_1_1.kt +++ b/src/test/java/com/amazon/ion/impl/bin/IonRawBinaryWriterTest_1_1.kt @@ -9,6 +9,7 @@ import java.io.ByteArrayOutputStream import java.math.BigDecimal import java.math.BigInteger import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Assertions.assertFalse import org.junit.jupiter.api.Assertions.assertTrue import org.junit.jupiter.api.Test import org.junit.jupiter.api.assertThrows @@ -17,13 +18,17 @@ import org.junit.jupiter.params.provider.CsvSource class IonRawBinaryWriterTest_1_1 { + private fun ionWriter( + baos: ByteArrayOutputStream = ByteArrayOutputStream() + ) = IonRawBinaryWriter_1_1( + out = baos, + buffer = WriteBuffer(BlockAllocatorProviders.basicProvider().vendAllocator(32)) {}, + lengthPrefixPreallocation = 1, + ) + private inline fun writeAsHexString(autoClose: Boolean = true, block: IonRawBinaryWriter_1_1.() -> Unit): String { val baos = ByteArrayOutputStream() - val rawWriter = IonRawBinaryWriter_1_1( - out = baos, - buffer = WriteBuffer(BlockAllocatorProviders.basicProvider().vendAllocator(32)) {}, - lengthPrefixPreallocation = 1, - ) + val rawWriter = ionWriter(baos) block.invoke(rawWriter) if (autoClose) rawWriter.close() @OptIn(ExperimentalStdlibApi::class) @@ -805,6 +810,36 @@ class IonRawBinaryWriterTest_1_1 { } } + @Test + fun `_private_hasFirstAnnotation() should return false when there are no annotations`() { + val rawWriter = ionWriter() + assertFalse(rawWriter._private_hasFirstAnnotation(SystemSymbols.ION_SID, SystemSymbols.ION)) + } + + @Test + fun `_private_hasFirstAnnotation() should return true if only the sid matches`() { + val rawWriter = ionWriter() + rawWriter.writeAnnotations(SystemSymbols.ION_SID) + assertTrue(rawWriter._private_hasFirstAnnotation(SystemSymbols.ION_SID, null)) + } + + @Test + fun `_private_hasFirstAnnotation() should return true if only the text matches`() { + val rawWriter = ionWriter() + rawWriter.writeAnnotations(SystemSymbols.ION) + assertTrue(rawWriter._private_hasFirstAnnotation(-1, SystemSymbols.ION)) + } + + @Test + fun `_private_hasFirstAnnotation() should return false if the first annotation does not match the sid or text`() { + val rawWriter = ionWriter() + rawWriter.writeAnnotations(SystemSymbols.IMPORTS_SID) + rawWriter.writeAnnotations(SystemSymbols.ION) + rawWriter.writeAnnotations(SystemSymbols.ION_SID) + // Matches the second and third annotations, but not the first one. + assertFalse(rawWriter._private_hasFirstAnnotation(SystemSymbols.ION_SID, SystemSymbols.ION)) + } + @Test fun `write int`() { assertWriterOutputEquals(