Skip to content

Commit

Permalink
Adds the ability to transform Ion 1.0 symbol IDs to Ion 1.1 equivalen…
Browse files Browse the repository at this point in the history
…ts when performing a system-level transcoding.
  • Loading branch information
tgregg committed Jun 7, 2024
1 parent e5025ea commit 34e4e52
Show file tree
Hide file tree
Showing 3 changed files with 132 additions and 16 deletions.
67 changes: 52 additions & 15 deletions src/main/java/com/amazon/ion/impl/_Private_IonWriter.java
Original file line number Diff line number Diff line change
@@ -1,22 +1,11 @@
/*
* Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
package com.amazon.ion.impl;

import com.amazon.ion.IonCatalog;
import com.amazon.ion.IonReader;
import com.amazon.ion.IonWriter;

import java.io.IOException;

/**
Expand Down Expand Up @@ -62,4 +51,52 @@ public interface _Private_IonWriter

/** Indicates whether the writer is stream copy optimized through {@link #writeValue(com.amazon.ion.IonReader)}. */
public boolean isStreamCopyOptimized();

@FunctionalInterface
interface IntTransformer {

/**
* Transforms an int to another int.
* @param original the int to transform.
* @return the transformed int.
*/
int transform(int original);
}

/**
* Returns the provided int unchanged.
*/
IntTransformer IDENTITY_INT_TRANSFORMER = i -> i;

/**
* Transforms Ion 1.0 local symbol IDs to the equivalent Ion 1.1 local symbol ID. Note: system symbols do not
* follow this path.
*/
// TODO change the following once the Ion 1.1 symbol table is finalized. Probably:
// sid10 -> sid10 - SystemSymbols.ION_1_0_MAX_ID;
IntTransformer ION_1_0_SID_TO_ION_1_1_SID = IDENTITY_INT_TRANSFORMER;

/**
* Works the same as {@link IonWriter#writeValues(IonReader)}, but transforms all symbol IDs that would otherwise
* be written verbatim using the given transform function. This can be used to do a system-level transcode of
* Ion 1.0 data to Ion 1.1 while preserving symbol IDs that point to the same text.
* @param reader the reader from which to transcode.
* @param symbolIdTransformer the symbol ID transform function.
* @throws IOException if thrown during write.
*/
default void writeValues(IonReader reader, IntTransformer symbolIdTransformer) throws IOException {
throw new UnsupportedOperationException();
}

/**
* Works the same as {@link IonWriter#writeValue(IonReader)}, but transforms all symbol IDs that would otherwise
* be written verbatim using the given transform function. This can be used to do a system-level transcode of
* Ion 1.0 data to Ion 1.1 while preserving symbol IDs that point to the same text.
* @param reader the reader from which to transcode.
* @param symbolIdTransformer the symbol ID transform function.
* @throws IOException if thrown during write.
*/
default void writeValue(IonReader reader, IntTransformer symbolIdTransformer) throws IOException {
throw new UnsupportedOperationException();
}
}
27 changes: 26 additions & 1 deletion src/main/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1.kt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package com.amazon.ion.impl.bin
import com.amazon.ion.*
import com.amazon.ion.SymbolTable.UNKNOWN_SYMBOL_ID
import com.amazon.ion.impl.*
import com.amazon.ion.impl._Private_IonWriter.IntTransformer
import com.amazon.ion.impl.bin.DelimitedContainerStrategy.*
import com.amazon.ion.impl.bin.SymbolInliningStrategy.*
import com.amazon.ion.system.*
Expand Down Expand Up @@ -120,6 +121,12 @@ internal class IonManagedWriter_1_1(
/** Symbols to be interned since the prior encoding context. */
private var newSymbols = arrayListOf<String>()

/**
* Transformer for symbol IDs encountered during writeValues. Can be used to upgrade Ion 1.0 symbol IDs to the
* Ion 1.1 equivalents.
*/
private var sidTransformer: IntTransformer? = null

private fun intern(text: String): Int {
// Check the current symbol table
var sid = symbolTable[text]
Expand Down Expand Up @@ -279,7 +286,7 @@ internal class IonManagedWriter_1_1(
// No (known) SID either.
throw UnknownSymbolException("Cannot write a symbol token with unknown text and unknown SID.")
} else {
rawWriter.write(kind, sid)
rawWriter.write(kind, sidTransformer?.transform(sid) ?: sid)
}
} else if (preserveEncoding && sid < 0) {
rawWriter.write(kind, text)
Expand Down Expand Up @@ -335,6 +342,15 @@ internal class IonManagedWriter_1_1(

override fun isStreamCopyOptimized(): Boolean = false

override fun writeValues(reader: IonReader, symbolIdTransformer: IntTransformer) {
sidTransformer = symbolIdTransformer
try {
writeValues(reader)
} finally {
sidTransformer = null
}
}

override fun writeValues(reader: IonReader) {
// There's a possibility that we could have interference between encoding contexts if we're transferring from a
// system reader. However, this is the same behavior as the other implementations.
Expand All @@ -354,6 +370,15 @@ internal class IonManagedWriter_1_1(
}
}

override fun writeValue(reader: IonReader, symbolIdTransformer: IntTransformer) {
sidTransformer = symbolIdTransformer
try {
writeValue(reader)
} finally {
sidTransformer = null
}
}

override fun writeValue(reader: IonReader) {
// There's a possibility that we could have interference between encoding contexts if we're transferring from a
// system reader. However, this is the same behavior as the other implementations.
Expand Down
54 changes: 54 additions & 0 deletions src/test/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1_Test.kt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@ package com.amazon.ion.impl.bin

import com.amazon.ion.*
import com.amazon.ion.IonEncodingVersion.*
import com.amazon.ion.impl.*
import com.amazon.ion.system.*
import java.io.ByteArrayOutputStream
import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.Assertions.assertNull
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.assertThrows

Expand Down Expand Up @@ -43,4 +47,54 @@ internal class IonManagedWriter_1_1_Test {
}
assertEquals("\$ion_1_1 [\$ion_1_1]", appendable.toString().trim())
}

private fun `transform symbol IDS`(writeValuesFn: _Private_IonWriter.(IonReader) -> Unit) {
// Craft the input data: {a: b::c}, encoded as {$10: $11::$12}
val input = ByteArrayOutputStream()
ION_1_0.binaryWriterBuilder().build(input).use {
it.stepIn(IonType.STRUCT)
it.setFieldName("a")
it.addTypeAnnotation("b")
it.writeSymbol("c")
it.stepOut()
}
// Do a system-level transcode of the Ion 1.0 data to Ion 1.1, adding 32 to each local symbol ID.
val system = IonSystemBuilder.standard().build() as _Private_IonSystem
val output = ByteArrayOutputStream()
system.newSystemReader(input.toByteArray()).use { reader ->
(ION_1_1.binaryWriterBuilder().build(output) as _Private_IonWriter).use {
it.writeValuesFn(reader)
}
}
// Verify the transformed symbol IDs using another system read.
system.newSystemReader(output.toByteArray()).use {
while (it.next() == IonType.SYMBOL) {
assertEquals("\$ion_1_1", it.stringValue())
}
assertEquals(IonType.STRUCT, it.next())
it.stepIn()
assertEquals(IonType.SYMBOL, it.next())
assertEquals(42, it.fieldNameSymbol.sid)
assertEquals(43, it.typeAnnotationSymbols[0].sid)
assertEquals(44, it.symbolValue().sid)
assertNull(it.next())
it.stepOut()
}
}

@Test
fun `use writeValues to transform symbol IDS`() {
`transform symbol IDS` { reader ->
writeValues(reader) { sid -> sid + 32 }
}
}

@Test
fun `use writeValue to transform symbol IDS`() {
`transform symbol IDS` { reader ->
while (reader.next() != null) {
writeValue(reader) { sid -> sid + 32 }
}
}
}
}

0 comments on commit 34e4e52

Please sign in to comment.