Skip to content

Commit

Permalink
Adds support for parsing Ion 1.1 encoding directives in the text reader.
Browse files Browse the repository at this point in the history
  • Loading branch information
tgregg committed Oct 3, 2024
1 parent fc8f49e commit 41a666e
Show file tree
Hide file tree
Showing 6 changed files with 440 additions and 56 deletions.
145 changes: 145 additions & 0 deletions src/main/java/com/amazon/ion/impl/EncodingDirectiveReader.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
package com.amazon.ion.impl.macro

import com.amazon.ion.*
import com.amazon.ion.impl.*
import com.amazon.ion.impl.macro.MacroRef.Companion.byId

/**
* Reads encoding directives from the given [IonReader].
*/
class EncodingDirectiveReader(private val reader: IonReader) {

private var macroCompiler: MacroCompiler = MacroCompilerIonReader(reader) { key: Any? -> newMacros[key] }
private var localMacroMaxOffset: Int = -1
private var state: State = State.READING_VALUE

var isSymbolTableAppend = false
var newSymbols: MutableList<String> = ArrayList(8)
var newMacros: MutableMap<MacroRef, Macro> = HashMap()

private enum class State {
IN_ION_ENCODING_SEXP,
IN_SYMBOL_TABLE_SEXP,
IN_SYMBOL_TABLE_LIST,
IN_MACRO_TABLE_SEXP,
COMPILING_MACRO,
READING_VALUE
}

private fun classifySexpWithinEncodingDirective() {
val name: String = reader.stringValue()
state = if (SystemSymbols_1_1.SYMBOL_TABLE.text == name) {
State.IN_SYMBOL_TABLE_SEXP
} else if (SystemSymbols_1_1.MACRO_TABLE.text == name) {
State.IN_MACRO_TABLE_SEXP
} else {
throw IonException(String.format("\$ion_encoding expressions '%s' not supported.", name))
}
}

private fun classifySymbolTable() {
val type: IonType = reader.type
if (IonType.isText(type)) {
if (SystemSymbols.ION_ENCODING == reader.stringValue() && !isSymbolTableAppend) {
if (reader.next() == null || reader.type != IonType.LIST) {
throw IonException("symbol_table s-expression must begin with a list.")
}
isSymbolTableAppend = true
} else {
throw IonException("symbol_table s-expression must begin with either \$ion_encoding or a list.")
}
} else if (type != IonType.LIST) {
throw IonException("symbol_table s-expression must begin with either \$ion_encoding or a list.")
}
reader.stepIn()
state = State.IN_SYMBOL_TABLE_LIST
}

/**
* Reads an encoding directive. After this method returns, the caller should access this class's properties to
* retrieve the symbols and macros declared within the directive.
*/
fun readEncodingDirective() {
reader.stepIn()
state = State.IN_ION_ENCODING_SEXP
while (true) {
when (state) {

State.IN_ION_ENCODING_SEXP -> {
if (reader.next() == null) {
reader.stepOut()
state = State.READING_VALUE
return
}
if (reader.type != IonType.SEXP) {
throw IonException("Ion encoding directives must contain only s-expressions.")
}
reader.stepIn()
if (reader.next() == null || !IonType.isText(reader.type)) {
throw IonException("S-expressions within encoding directives must begin with a text token.")
}
classifySexpWithinEncodingDirective()
}

State.IN_SYMBOL_TABLE_SEXP -> {
if (reader.next() == null) {
reader.stepOut()
state = State.IN_ION_ENCODING_SEXP
continue
}
classifySymbolTable()
}

State.IN_SYMBOL_TABLE_LIST -> {
if (reader.next() == null) {
reader.stepOut()
state = State.IN_SYMBOL_TABLE_SEXP
continue
}
if (!IonType.isText(reader.type)) {
throw IonException("The symbol_table must contain text.")
}
newSymbols.add(reader.stringValue())
}

State.IN_MACRO_TABLE_SEXP -> {
if (reader.next() == null) {
reader.stepOut()
state = State.IN_ION_ENCODING_SEXP
continue
}
if (reader.type != IonType.SEXP) {
throw IonException("macro_table s-expression must contain s-expressions.")
}
state = State.COMPILING_MACRO
val newMacro: Macro = macroCompiler.compileMacro()
newMacros[byId(++localMacroMaxOffset)] = newMacro
state = State.IN_MACRO_TABLE_SEXP
}

// TODO handle other legal encoding directive s-expression shapes.
// TODO add strict enforcement of the schema around e.g. repeats

else -> throw IllegalStateException(state.toString())
}
}
}

/**
* @return true if the reader is currently being used by the [MacroCompiler].
*/
fun isMacroCompilationInProgress(): Boolean {
return state == State.COMPILING_MACRO
}

/**
* Prepares the EncodingDirectiveReader to read a new encoding directive.
*/
fun reset() {
isSymbolTableAppend = false
newSymbols.clear()
newMacros.clear()
}
}
16 changes: 16 additions & 0 deletions src/main/java/com/amazon/ion/impl/IonRawTextWriter_1_1.kt
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,12 @@ package com.amazon.ion.impl

import com.amazon.ion.*
import com.amazon.ion.impl.IonRawTextWriter_1_1.ContainerType.*
import com.amazon.ion.impl.IonRawTextWriter_1_1.ContainerType.List
import com.amazon.ion.impl.bin.*
import com.amazon.ion.impl.macro.*
import com.amazon.ion.system.*
import com.amazon.ion.util.*
import java.io.OutputStream
import java.math.BigDecimal
import java.math.BigInteger

Expand All @@ -25,6 +29,18 @@ class IonRawTextWriter_1_1 internal constructor(

companion object {
const val IVM = "\$ion_1_1"

@JvmStatic
fun from(output: OutputStream, blockSize: Int, options: IonTextWriterBuilder_1_1): IonRawTextWriter_1_1 {
val bufferedOutput = BufferedOutputStreamFastAppendable(
output,
BlockAllocatorProviders.basicProvider().vendAllocator(blockSize)
)
return IonRawTextWriter_1_1(
options as _Private_IonTextWriterBuilder_1_1,
_Private_IonTextAppender.forFastAppendable(bufferedOutput, Charsets.UTF_8)
)
}
}

enum class ContainerType {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1233,7 +1233,9 @@ private void installMacros() {
* Install any new symbols and macros, step out of the encoding directive, and resume reading raw values.
*/
private void finishEncodingDirective() {
resetSymbolTable(); // TODO handle appended symbols
if (!isSymbolTableAppend) {
resetSymbolTable();
}
installSymbols(newSymbols);
installMacros();
stepOutOfContainer();
Expand Down Expand Up @@ -1361,10 +1363,6 @@ void readEncodingDirective() {
newMacros.put(MacroRef.byId(++localMacroMaxOffset), newMacro);
state = State.IN_MACRO_TABLE_SEXP;
break;
case COMPILING_MACRO:
// This state can only be reached during compilation of a macro. Do nothing, as the reader must
// navigate normally while the macro is compiled.
break;
default:
throw new IllegalStateException(state.toString());
}
Expand Down
133 changes: 117 additions & 16 deletions src/main/java/com/amazon/ion/impl/IonReaderTextSystemX.java
Original file line number Diff line number Diff line change
@@ -1,18 +1,5 @@
/*
* Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
package com.amazon.ion.impl;

import static com.amazon.ion.impl._Private_ScalarConversions.getValueTypeName;
Expand All @@ -38,11 +25,17 @@
import com.amazon.ion.impl.IonTokenConstsX.CharacterSequence;
import com.amazon.ion.impl._Private_ScalarConversions.AS_TYPE;
import com.amazon.ion.impl._Private_ScalarConversions.CantConvertException;
import com.amazon.ion.impl.macro.EncodingContext;
import com.amazon.ion.impl.macro.EncodingDirectiveReader;
import com.amazon.ion.impl.macro.MacroEvaluator;
import com.amazon.ion.impl.macro.MacroEvaluatorAsIonReader;

import java.io.IOException;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.Collections;
import java.util.Date;
import java.lang.Character;
import java.util.List;

/**
* This reader calls the {@link IonReaderTextRawX} for low level events.
Expand All @@ -62,6 +55,18 @@ class IonReaderTextSystemX

SymbolTable _system_symtab;

// The IonReader-like MacroEvaluator that this core reader delegates to when evaluating a macro invocation.
protected MacroEvaluatorAsIonReader macroEvaluatorIonReader = null;

// The core MacroEvaluator that this core reader delegates to when evaluating a macro invocation.
private MacroEvaluator macroEvaluator = null;

// The encoding context (macro table) that is currently active.
private EncodingContext encodingContext = null;

// Reads encoding directives from the stream.
private EncodingDirectiveReader encodingDirectiveReader = null;

protected IonReaderTextSystemX(UnifiedInputStreamX iis)
{
_system_symtab = _Private_Utils.systemSymtab(1); // TODO check IVM to determine version: amazon-ion/ion-java/issues/19
Expand Down Expand Up @@ -1009,4 +1014,100 @@ public SymbolTable pop_passed_symbol_table()
{
return null;
}

/**
* Sets the active symbol table.
* @param symbolTable the symbol table to make active.
*/
protected void setSymbolTable(SymbolTable symbolTable) {
// System readers don't handle symbol tables.
}

/**
* While reading an encoding directive, the reader allows itself to be controlled by the MacroCompiler during
* compilation of a macro. While this is happening, the reader should never attempt to read another encoding
* directive.
* @return true if the reader is not in the process of compiling a macro; false if it is.
*/
private boolean macroCompilationNotInProgress() {
return encodingDirectiveReader == null || !encodingDirectiveReader.isMacroCompilationInProgress();
}

/**
* @return true if current value has a sequence of annotations that begins with `$ion_encoding`; otherwise, false.
*/
boolean startsWithIonEncoding() {
// TODO also resolve symbol identifiers and compare against text that looks like $ion_encoding
return SystemSymbols_1_1.ION_ENCODING.getText().equals(_annotations[0].getText());
}

/**
* @return true if the reader is positioned on an encoding directive; otherwise, false.
*/
private boolean isPositionedOnEncodingDirective() {
return _annotation_count > 0
&& _value_type == IonType.SEXP
&& !isNullValue()
&& macroCompilationNotInProgress()
&& startsWithIonEncoding();
}

/**
* Reads an encoding directive and installs any symbols and/or macros found within. Upon calling this method,
* the reader must be positioned on an s-expression annotated with `$ion_encoding`.
*/
private void readEncodingDirective() {
if (encodingDirectiveReader == null) {
encodingDirectiveReader = new EncodingDirectiveReader(this);
}
encodingDirectiveReader.reset();
encodingDirectiveReader.readEncodingDirective();
List<String> newSymbols = encodingDirectiveReader.getNewSymbols();
if (encodingDirectiveReader.isSymbolTableAppend()) {
LocalSymbolTable current = ((LocalSymbolTable) getSymbolTable());
for (String appendedSymbol : newSymbols) {
current.putSymbol(appendedSymbol);
}
} else {
setSymbolTable(new LocalSymbolTable(
// TODO handle shared symbol table imports declared in the encoding directive
LocalSymbolTableImports.EMPTY,
newSymbols
));
}
encodingContext = new EncodingContext(encodingDirectiveReader.getNewMacros());
macroEvaluator = new MacroEvaluator();
macroEvaluatorIonReader = new MacroEvaluatorAsIonReader(macroEvaluator);
}

/**
* Advances the reader, if necessary and possible, to the next value, reading any Ion 1.1+ encoding directives
* found along the way.
* @return true if the reader is positioned on a value; otherwise, false.
*/
protected final boolean has_next_system_value() {
while (!_has_next_called && !_eof) {
has_next_raw_value();
if (minorVersion > 0 && _value_type != null && IonType.DATAGRAM.equals(getContainerType()) && isPositionedOnEncodingDirective()) {
readEncodingDirective();
_has_next_called = false;
continue;
}
break;
}
return !_eof;
}

@Override
public boolean hasNext()
{
return has_next_system_value();
}

/**
* @return the {@link EncodingContext} currently active, or {@code null}.
*/
EncodingContext getEncodingContext() {
return encodingContext;
}
}
Loading

0 comments on commit 41a666e

Please sign in to comment.