From cc4cf2cda92be0cd78758929a4a79df3c00e312a Mon Sep 17 00:00:00 2001 From: Matthew Pope <81593196+popematt@users.noreply.github.com> Date: Tue, 10 Sep 2024 11:26:59 -0700 Subject: [PATCH] Adds custom formatting for encoding directives when writing text (#936) --- .../amazon/ion/impl/IonRawTextWriter_1_1.kt | 13 +++- .../ion/impl/PrivateIonRawWriter_1_1.kt | 38 ++++++++++++ .../ion/impl/bin/IonManagedWriter_1_1.kt | 35 +++++++++-- .../ion/impl/bin/IonRawBinaryWriter_1_1.kt | 7 ++- .../ion/impl/bin/IonManagedWriter_1_1_Test.kt | 62 +++++++++++++++++-- 5 files changed, 140 insertions(+), 15 deletions(-) create mode 100644 src/main/java/com/amazon/ion/impl/PrivateIonRawWriter_1_1.kt diff --git a/src/main/java/com/amazon/ion/impl/IonRawTextWriter_1_1.kt b/src/main/java/com/amazon/ion/impl/IonRawTextWriter_1_1.kt index 20f20decb..c6e205f05 100644 --- a/src/main/java/com/amazon/ion/impl/IonRawTextWriter_1_1.kt +++ b/src/main/java/com/amazon/ion/impl/IonRawTextWriter_1_1.kt @@ -21,7 +21,7 @@ import java.math.BigInteger class IonRawTextWriter_1_1 internal constructor( private val options: _Private_IonTextWriterBuilder_1_1, private val output: _Private_IonTextAppender, -) : IonRawWriter_1_1 { +) : IonRawWriter_1_1, `PrivateIonRawWriter_1_1` { companion object { const val IVM = "\$ion_1_1" @@ -63,7 +63,7 @@ class IonRawTextWriter_1_1 internal constructor( Top -> options.topLevelSeparator() } - if (options.isPrettyPrintOn) { + if (options.isPrettyPrintOn && !forceNoNewlines) { if (isPendingSeparator && !IonTextUtils.isAllWhitespace(separatorCharacter)) { // Only bother if the separator is non-whitespace. output.appendAscii(separatorCharacter) @@ -384,11 +384,18 @@ class IonRawTextWriter_1_1 internal constructor( currentContainer = ancestorContainersStack.removeLast() closeValue { - if (options.isPrettyPrintOn && currentContainerHasValues) { + if (options.isPrettyPrintOn && currentContainerHasValues && !forceNoNewlines) { output.appendAscii(options.lineSeparator()) output.appendAscii(" ".repeat(ancestorContainersStack.size * 2)) } output.appendAscii(endChar) } } + + private var forceNoNewlines: Boolean = false + override fun forceNoNewlines(boolean: Boolean) { forceNoNewlines = boolean } + + override fun writeMacroParameterCardinality(cardinality: Macro.ParameterCardinality) { + output.appendAscii(cardinality.sigil) + } } diff --git a/src/main/java/com/amazon/ion/impl/PrivateIonRawWriter_1_1.kt b/src/main/java/com/amazon/ion/impl/PrivateIonRawWriter_1_1.kt new file mode 100644 index 000000000..fd75926ec --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/PrivateIonRawWriter_1_1.kt @@ -0,0 +1,38 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl + +import com.amazon.ion.impl.macro.Macro + +/** + * Allows us to write encoding directives in a more optimized and/or readable way. + * Could be used to construct invalid data if used in the wrong way, so we don't + * expose this to users. + * + * Some functions may be meaningless to a particular underlying implementation. + */ +internal interface PrivateIonRawWriter_1_1 : IonRawWriter_1_1 { + /** + * Writes a parameter cardinality. + */ + fun writeMacroParameterCardinality(cardinality: Macro.ParameterCardinality) + + /** + * Sets a flag that can override the newlines that are normally inserted by a pretty printer. + * + * Ignored by binary implementations. + * + * TODO: Once system symbols are implemented, consider replacing this with dedicated + * `startClause(SystemSymbol)` and `endClause()`, or similar. + * * This will allow the text writer to + * * start the clauses without added newlines + * * Skip checking whether to write annotations + * * This will allow the binary writer to + * * Leverage macros, when possible + * * Skip checking whether to write annotations + * * Skip checking whether a given string or SID is a system symbol + * * E.g. `startClause(SystemSymbol_1_1.MACRO_TABLE)` could directly write the + * bytes `F2 EF` followed by `E3` for "macro_table". + */ + fun forceNoNewlines(boolean: Boolean) = Unit +} diff --git a/src/main/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1.kt b/src/main/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1.kt index 93fd928c8..dd54d4de0 100644 --- a/src/main/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1.kt +++ b/src/main/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1.kt @@ -32,7 +32,7 @@ import kotlin.collections.LinkedHashMap */ internal class IonManagedWriter_1_1( private val userData: IonRawWriter_1_1, - private val systemData: IonRawWriter_1_1, + private val systemData: PrivateIonRawWriter_1_1, private val options: ManagedWriterOptions_1_1, private val onClose: () -> Unit, ) : _Private_IonWriter, MacroAwareIonWriter { @@ -54,6 +54,11 @@ internal class IonManagedWriter_1_1( private val ION_VERSION_MARKER_REGEX = Regex("^\\\$ion_\\d+_\\d+$") + // These are chosen subjectively to be neither too big nor too small. + private const val MAX_PARAMETERS_IN_ONE_LINE_SIGNATURE = 4 + private const val MAX_SYMBOLS_IN_SINGLE_LINE_SYMBOL_TABLE = 10 + private const val MAX_EXPRESSIONS_IN_SINGLE_LINE_MACRO_BODY = 8 + @JvmStatic fun textWriter(output: OutputStream, managedWriterOptions: ManagedWriterOptions_1_1, textOptions: _Private_IonTextWriterBuilder_1_1): IonManagedWriter_1_1 { // TODO support all options configurable via IonTextWriterBuilder_1_1 @@ -189,7 +194,7 @@ internal class IonManagedWriter_1_1( } /** Helper function for writing encoding directives */ - private inline fun writeSystemSexp(content: IonRawWriter_1_1.() -> Unit) { + private inline fun writeSystemSexp(content: PrivateIonRawWriter_1_1.() -> Unit) { systemData.stepInSExp(usingLengthPrefix = false) systemData.content() systemData.stepOut() @@ -228,18 +233,25 @@ internal class IonManagedWriter_1_1( if (!hasSymbolsToAdd && !hasSymbolsToRetain) return writeSystemSexp { + forceNoNewlines(true) systemData.writeSymbol(SystemSymbols.SYMBOL_TABLE) + // Add previous symbol table if (hasSymbolsToRetain) { + if (newSymbols.size > 0) forceNoNewlines(false) writeSymbol(SystemSymbols.ION_ENCODING) } + // Add new symbols if (hasSymbolsToAdd) { stepInList(usingLengthPrefix = false) + if (newSymbols.size <= MAX_SYMBOLS_IN_SINGLE_LINE_SYMBOL_TABLE) forceNoNewlines(true) newSymbols.forEach { (text, _) -> writeString(text) } stepOut() } + forceNoNewlines(true) } + systemData.forceNoNewlines(false) } /** @@ -253,10 +265,13 @@ internal class IonManagedWriter_1_1( if (!hasMacrosToAdd && !hasMacrosToRetain) return writeSystemSexp { + forceNoNewlines(true) writeSymbol(SystemSymbols.MACRO_TABLE) + if (newMacros.size > 0) forceNoNewlines(false) if (hasMacrosToRetain) { writeSymbol(SystemSymbols.ION_ENCODING) } + forceNoNewlines(false) newMacros.forEach { (macro, address) -> val name = macroNames[address] when (macro) { @@ -264,23 +279,30 @@ internal class IonManagedWriter_1_1( is SystemMacro -> exportSystemMacro(macro) } } + forceNoNewlines(true) } + systemData.forceNoNewlines(false) } private fun exportSystemMacro(macro: SystemMacro) { // TODO: Support for aliases writeSystemSexp { + forceNoNewlines(true) writeSymbol(SystemSymbols.EXPORT) writeAnnotations(SystemSymbols.ION) writeSymbol(macro.macroName) } + systemData.forceNoNewlines(false) } private fun writeMacroDefinition(name: String?, macro: TemplateMacro) { writeSystemSexp { + forceNoNewlines(true) writeSymbol(SystemSymbols.MACRO) if (name != null) writeSymbol(name) else writeNull() + if (macro.signature.size > MAX_PARAMETERS_IN_ONE_LINE_SIGNATURE) forceNoNewlines(false) + // Signature writeSystemSexp { macro.signature.forEach { parameter -> @@ -289,14 +311,13 @@ internal class IonManagedWriter_1_1( } writeSymbol(parameter.variableName) if (parameter.cardinality != Macro.ParameterCardinality.ExactlyOne) { - // TODO: Consider adding a method to the raw writer that can write a single-character - // symbol without constructing a string. It might be a minor performance improvement. - // TODO: See if we can write this without a space between the parameter name and the sigil. - writeSymbol(parameter.cardinality.sigil.toString()) + writeMacroParameterCardinality(parameter.cardinality) } } } + if (macro.body.size > MAX_EXPRESSIONS_IN_SINGLE_LINE_MACRO_BODY) forceNoNewlines(false) + // Template Body // TODO: See if there's any benefit to using a smaller number type, if we can @@ -445,7 +466,9 @@ internal class IonManagedWriter_1_1( // Step out for anything where endExclusive is beyond the end of the expression list. repeat(numberOfTimesToStepOut.last()) { stepOut() } + forceNoNewlines(true) } + systemData.forceNoNewlines(false) } override fun getCatalog(): IonCatalog { diff --git a/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter_1_1.kt b/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter_1_1.kt index 59d2f50e1..5729afec3 100644 --- a/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter_1_1.kt +++ b/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter_1_1.kt @@ -21,7 +21,7 @@ class IonRawBinaryWriter_1_1 internal constructor( private val out: OutputStream, private val buffer: WriteBuffer, private val lengthPrefixPreallocation: Int, -) : IonRawWriter_1_1 { +) : IonRawWriter_1_1, PrivateIonRawWriter_1_1 { /** * Types of encoding containers. @@ -889,4 +889,9 @@ class IonRawBinaryWriter_1_1 internal constructor( } } } + + override fun writeMacroParameterCardinality(cardinality: Macro.ParameterCardinality) { + // TODO: Write as a system symbol + writeSymbol(cardinality.sigil.toString()) + } } diff --git a/src/test/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1_Test.kt b/src/test/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1_Test.kt index 2f0390874..aad929269 100644 --- a/src/test/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1_Test.kt +++ b/src/test/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1_Test.kt @@ -36,6 +36,7 @@ internal class IonManagedWriter_1_1_Test { private fun write( topLevelValuesOnNewLines: Boolean = true, closeWriter: Boolean = true, + pretty: Boolean = false, symbolInliningStrategy: SymbolInliningStrategy = SymbolInliningStrategy.ALWAYS_INLINE, block: IonManagedWriter_1_1.() -> Unit ): String { @@ -43,6 +44,7 @@ internal class IonManagedWriter_1_1_Test { val writer = ION_1_1.textWriterBuilder() .withWriteTopLevelValuesOnNewLines(topLevelValuesOnNewLines) .withSymbolInliningStrategy(symbolInliningStrategy) + .apply { if (pretty) withPrettyPrinting() } .build(appendable) as IonManagedWriter_1_1 writer.apply(block) if (closeWriter) writer.close() @@ -204,7 +206,7 @@ internal class IonManagedWriter_1_1_Test { val expected = """ $ion_1_1 - $ion_encoding::((macro_table (macro null (a * b *) "foo"))) + $ion_encoding::((macro_table (macro null (a* b*) "foo"))) (:0 (:) (: 1 2 3)) """.trimIndent() @@ -409,17 +411,17 @@ internal class IonManagedWriter_1_1_Test { case( "optional parameter", signature = listOf(Parameter("x", ParameterEncoding.Tagged, ParameterCardinality.ZeroOrOne)), - expectedSignature = "(x ?)" + expectedSignature = "(x?)" ), case( "zero-to-many parameter", signature = listOf(Parameter("x", ParameterEncoding.Tagged, ParameterCardinality.ZeroOrMore)), - expectedSignature = "(x *)" + expectedSignature = "(x*)" ), case( "one-to-many parameter", signature = listOf(Parameter("x", ParameterEncoding.Tagged, ParameterCardinality.OneOrMore)), - expectedSignature = "(x +)" + expectedSignature = "(x+)" ), case( "tagless parameter", @@ -434,7 +436,7 @@ internal class IonManagedWriter_1_1_Test { Parameter("c", ParameterEncoding.CompactSymbol, ParameterCardinality.ZeroOrMore), Parameter("d", ParameterEncoding.Float64, ParameterCardinality.ZeroOrOne), ), - expectedSignature = "(int32::a b + compact_symbol::c * float64::d ?)" + expectedSignature = "(int32::a b+ compact_symbol::c* float64::d?)" ), case( "null", @@ -646,6 +648,56 @@ internal class IonManagedWriter_1_1_Test { assertEquals(expected, actual) } + @Test + fun `when pretty printing, system s-expressions should have the clause name on the first line`() { + // ...and look reasonably pleasant. + // However, this should be held loosely. + val expected = """ + $ion_1_1 + $ion_encoding::( + (symbol_table ["foo","bar","baz"]) + (macro_table + (macro null () "foo") + (macro null () "bar")) + ) + $10 + $11 + $12 + (:0) + (:1) + $ion_encoding::( + (symbol_table + $ion_encoding + ["a","b","c"]) + (macro_table + $ion_encoding + (macro null () "abc")) + ) + $13 + $14 + $15 + (:2) + """.trimIndent() + + val actual = write(symbolInliningStrategy = SymbolInliningStrategy.NEVER_INLINE, pretty = true) { + writeSymbol("foo") + writeSymbol("bar") + writeSymbol("baz") + startMacro(constantMacro { string("foo") }) + endMacro() + startMacro(constantMacro { string("bar") }) + endMacro() + flush() + writeSymbol("a") + writeSymbol("b") + writeSymbol("c") + startMacro(constantMacro { string("abc") }) + endMacro() + } + + assertEquals(expected, actual) + } + @Test fun `writeObject() should write something with a macro representation`() { val expected = """