diff --git a/src/main/java/com/amazon/ion/impl/_Private_IonWriter.java b/src/main/java/com/amazon/ion/impl/_Private_IonWriter.java index 519a14f3d..a527aad2a 100644 --- a/src/main/java/com/amazon/ion/impl/_Private_IonWriter.java +++ b/src/main/java/com/amazon/ion/impl/_Private_IonWriter.java @@ -1,22 +1,11 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl; import com.amazon.ion.IonCatalog; +import com.amazon.ion.IonReader; import com.amazon.ion.IonWriter; + import java.io.IOException; /** @@ -62,4 +51,52 @@ public interface _Private_IonWriter /** Indicates whether the writer is stream copy optimized through {@link #writeValue(com.amazon.ion.IonReader)}. */ public boolean isStreamCopyOptimized(); + + @FunctionalInterface + interface IntTransformer { + + /** + * Transforms an int to another int. + * @param original the int to transform. + * @return the transformed int. + */ + int transform(int original); + } + + /** + * Returns the provided int unchanged. + */ + IntTransformer IDENTITY_INT_TRANSFORMER = i -> i; + + /** + * Transforms Ion 1.0 local symbol IDs to the equivalent Ion 1.1 local symbol ID. Note: system symbols do not + * follow this path. + */ + // TODO change the following once the Ion 1.1 symbol table is finalized. Probably: + // sid10 -> sid10 - SystemSymbols.ION_1_0_MAX_ID; + IntTransformer ION_1_0_SID_TO_ION_1_1_SID = IDENTITY_INT_TRANSFORMER; + + /** + * Works the same as {@link IonWriter#writeValues(IonReader)}, but transforms all symbol IDs that would otherwise + * be written verbatim using the given transform function. This can be used to do a system-level transcode of + * Ion 1.0 data to Ion 1.1 while preserving symbol IDs that point to the same text. + * @param reader the reader from which to transcode. + * @param symbolIdTransformer the symbol ID transform function. + * @throws IOException if thrown during write. + */ + default void writeValues(IonReader reader, IntTransformer symbolIdTransformer) throws IOException { + throw new UnsupportedOperationException(); + } + + /** + * Works the same as {@link IonWriter#writeValue(IonReader)}, but transforms all symbol IDs that would otherwise + * be written verbatim using the given transform function. This can be used to do a system-level transcode of + * Ion 1.0 data to Ion 1.1 while preserving symbol IDs that point to the same text. + * @param reader the reader from which to transcode. + * @param symbolIdTransformer the symbol ID transform function. + * @throws IOException if thrown during write. + */ + default void writeValue(IonReader reader, IntTransformer symbolIdTransformer) throws IOException { + throw new UnsupportedOperationException(); + } } diff --git a/src/main/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1.kt b/src/main/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1.kt index bb2a6aaa1..5fdd4edae 100644 --- a/src/main/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1.kt +++ b/src/main/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1.kt @@ -5,6 +5,7 @@ package com.amazon.ion.impl.bin import com.amazon.ion.* import com.amazon.ion.SymbolTable.UNKNOWN_SYMBOL_ID import com.amazon.ion.impl.* +import com.amazon.ion.impl._Private_IonWriter.IntTransformer import com.amazon.ion.impl.bin.DelimitedContainerStrategy.* import com.amazon.ion.impl.bin.SymbolInliningStrategy.* import com.amazon.ion.system.* @@ -120,6 +121,12 @@ internal class IonManagedWriter_1_1( /** Symbols to be interned since the prior encoding context. */ private var newSymbols = arrayListOf() + /** + * Transformer for symbol IDs encountered during writeValues. Can be used to upgrade Ion 1.0 symbol IDs to the + * Ion 1.1 equivalents. + */ + private var sidTransformer: IntTransformer? = null + private fun intern(text: String): Int { // Check the current symbol table var sid = symbolTable[text] @@ -279,7 +286,7 @@ internal class IonManagedWriter_1_1( // No (known) SID either. throw UnknownSymbolException("Cannot write a symbol token with unknown text and unknown SID.") } else { - rawWriter.write(kind, sid) + rawWriter.write(kind, sidTransformer?.transform(sid) ?: sid) } } else if (preserveEncoding && sid < 0) { rawWriter.write(kind, text) @@ -335,6 +342,15 @@ internal class IonManagedWriter_1_1( override fun isStreamCopyOptimized(): Boolean = false + override fun writeValues(reader: IonReader, symbolIdTransformer: IntTransformer) { + sidTransformer = symbolIdTransformer + try { + writeValues(reader) + } finally { + sidTransformer = null + } + } + override fun writeValues(reader: IonReader) { // There's a possibility that we could have interference between encoding contexts if we're transferring from a // system reader. However, this is the same behavior as the other implementations. @@ -354,6 +370,15 @@ internal class IonManagedWriter_1_1( } } + override fun writeValue(reader: IonReader, symbolIdTransformer: IntTransformer) { + sidTransformer = symbolIdTransformer + try { + writeValue(reader) + } finally { + sidTransformer = null + } + } + override fun writeValue(reader: IonReader) { // There's a possibility that we could have interference between encoding contexts if we're transferring from a // system reader. However, this is the same behavior as the other implementations. diff --git a/src/test/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1_Test.kt b/src/test/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1_Test.kt index 658c6cd87..05506df82 100644 --- a/src/test/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1_Test.kt +++ b/src/test/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1_Test.kt @@ -4,7 +4,11 @@ package com.amazon.ion.impl.bin import com.amazon.ion.* import com.amazon.ion.IonEncodingVersion.* +import com.amazon.ion.impl.* +import com.amazon.ion.system.* +import java.io.ByteArrayOutputStream import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Assertions.assertNull import org.junit.jupiter.api.Test import org.junit.jupiter.api.assertThrows @@ -43,4 +47,54 @@ internal class IonManagedWriter_1_1_Test { } assertEquals("\$ion_1_1 [\$ion_1_1]", appendable.toString().trim()) } + + private fun `transform symbol IDS`(writeValuesFn: _Private_IonWriter.(IonReader) -> Unit) { + // Craft the input data: {a: b::c}, encoded as {$10: $11::$12} + val input = ByteArrayOutputStream() + ION_1_0.binaryWriterBuilder().build(input).use { + it.stepIn(IonType.STRUCT) + it.setFieldName("a") + it.addTypeAnnotation("b") + it.writeSymbol("c") + it.stepOut() + } + // Do a system-level transcode of the Ion 1.0 data to Ion 1.1, adding 32 to each local symbol ID. + val system = IonSystemBuilder.standard().build() as _Private_IonSystem + val output = ByteArrayOutputStream() + system.newSystemReader(input.toByteArray()).use { reader -> + (ION_1_1.binaryWriterBuilder().build(output) as _Private_IonWriter).use { + it.writeValuesFn(reader) + } + } + // Verify the transformed symbol IDs using another system read. + system.newSystemReader(output.toByteArray()).use { + while (it.next() == IonType.SYMBOL) { + assertEquals("\$ion_1_1", it.stringValue()) + } + assertEquals(IonType.STRUCT, it.next()) + it.stepIn() + assertEquals(IonType.SYMBOL, it.next()) + assertEquals(42, it.fieldNameSymbol.sid) + assertEquals(43, it.typeAnnotationSymbols[0].sid) + assertEquals(44, it.symbolValue().sid) + assertNull(it.next()) + it.stepOut() + } + } + + @Test + fun `use writeValues to transform symbol IDS`() { + `transform symbol IDS` { reader -> + writeValues(reader) { sid -> sid + 32 } + } + } + + @Test + fun `use writeValue to transform symbol IDS`() { + `transform symbol IDS` { reader -> + while (reader.next() != null) { + writeValue(reader) { sid -> sid + 32 } + } + } + } }