Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds support for parsing Ion 1.1 encoding directives in the text reader. #954

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 145 additions & 0 deletions src/main/java/com/amazon/ion/impl/EncodingDirectiveReader.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
package com.amazon.ion.impl.macro

import com.amazon.ion.*
import com.amazon.ion.impl.*
import com.amazon.ion.impl.macro.MacroRef.Companion.byId

/**
* Reads encoding directives from the given [IonReader].
*/
class EncodingDirectiveReader(private val reader: IonReader) {

private var macroCompiler: MacroCompiler = MacroCompilerIonReader(reader) { key: Any? -> newMacros[key] }
private var localMacroMaxOffset: Int = -1
private var state: State = State.READING_VALUE

var isSymbolTableAppend = false
var newSymbols: MutableList<String> = ArrayList(8)
var newMacros: MutableMap<MacroRef, Macro> = HashMap()

private enum class State {
IN_ION_ENCODING_SEXP,
IN_SYMBOL_TABLE_SEXP,
IN_SYMBOL_TABLE_LIST,
IN_MACRO_TABLE_SEXP,
COMPILING_MACRO,
READING_VALUE
}

private fun classifySexpWithinEncodingDirective() {
val name: String = reader.stringValue()
state = if (SystemSymbols_1_1.SYMBOL_TABLE.text == name) {
State.IN_SYMBOL_TABLE_SEXP
} else if (SystemSymbols_1_1.MACRO_TABLE.text == name) {
State.IN_MACRO_TABLE_SEXP
} else {
throw IonException(String.format("\$ion_encoding expressions '%s' not supported.", name))
}
}

private fun classifySymbolTable() {
val type: IonType = reader.type
if (IonType.isText(type)) {
if (SystemSymbols.ION_ENCODING == reader.stringValue() && !isSymbolTableAppend) {
if (reader.next() == null || reader.type != IonType.LIST) {
throw IonException("symbol_table s-expression must begin with a list.")
}
isSymbolTableAppend = true
} else {
throw IonException("symbol_table s-expression must begin with either \$ion_encoding or a list.")
}
} else if (type != IonType.LIST) {
throw IonException("symbol_table s-expression must begin with either \$ion_encoding or a list.")
}
reader.stepIn()
state = State.IN_SYMBOL_TABLE_LIST
}

/**
* Reads an encoding directive. After this method returns, the caller should access this class's properties to
* retrieve the symbols and macros declared within the directive.
*/
fun readEncodingDirective() {
reader.stepIn()
state = State.IN_ION_ENCODING_SEXP
while (true) {
when (state) {

State.IN_ION_ENCODING_SEXP -> {
if (reader.next() == null) {
reader.stepOut()
state = State.READING_VALUE
return
}
if (reader.type != IonType.SEXP) {
throw IonException("Ion encoding directives must contain only s-expressions.")
}
reader.stepIn()
if (reader.next() == null || !IonType.isText(reader.type)) {
throw IonException("S-expressions within encoding directives must begin with a text token.")
}
classifySexpWithinEncodingDirective()
}

State.IN_SYMBOL_TABLE_SEXP -> {
if (reader.next() == null) {
reader.stepOut()
state = State.IN_ION_ENCODING_SEXP
continue
}
classifySymbolTable()
}

State.IN_SYMBOL_TABLE_LIST -> {
if (reader.next() == null) {
reader.stepOut()
state = State.IN_SYMBOL_TABLE_SEXP
continue
}
if (!IonType.isText(reader.type)) {
throw IonException("The symbol_table must contain text.")
}
newSymbols.add(reader.stringValue())
}

State.IN_MACRO_TABLE_SEXP -> {
if (reader.next() == null) {
reader.stepOut()
state = State.IN_ION_ENCODING_SEXP
continue
}
if (reader.type != IonType.SEXP) {
throw IonException("macro_table s-expression must contain s-expressions.")
}
state = State.COMPILING_MACRO
val newMacro: Macro = macroCompiler.compileMacro()
newMacros[byId(++localMacroMaxOffset)] = newMacro
state = State.IN_MACRO_TABLE_SEXP
}

// TODO handle other legal encoding directive s-expression shapes.
// TODO add strict enforcement of the schema around e.g. repeats

else -> throw IllegalStateException(state.toString())
}
}
}

/**
* @return true if the reader is currently being used by the [MacroCompiler].
*/
fun isMacroCompilationInProgress(): Boolean {
return state == State.COMPILING_MACRO
}

/**
* Prepares the EncodingDirectiveReader to read a new encoding directive.
*/
fun reset() {
isSymbolTableAppend = false
newSymbols.clear()
newMacros.clear()
}
}
16 changes: 16 additions & 0 deletions src/main/java/com/amazon/ion/impl/IonRawTextWriter_1_1.kt
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,12 @@ package com.amazon.ion.impl

import com.amazon.ion.*
import com.amazon.ion.impl.IonRawTextWriter_1_1.ContainerType.*
import com.amazon.ion.impl.IonRawTextWriter_1_1.ContainerType.List
import com.amazon.ion.impl.bin.*
import com.amazon.ion.impl.macro.*
import com.amazon.ion.system.*
import com.amazon.ion.util.*
import java.io.OutputStream
import java.math.BigDecimal
import java.math.BigInteger

Expand All @@ -25,6 +29,18 @@ class IonRawTextWriter_1_1 internal constructor(

companion object {
const val IVM = "\$ion_1_1"

@JvmStatic
fun from(output: OutputStream, blockSize: Int, options: IonTextWriterBuilder_1_1): IonRawTextWriter_1_1 {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This mirrors a utility I added on the binary side for testing.

val bufferedOutput = BufferedOutputStreamFastAppendable(
output,
BlockAllocatorProviders.basicProvider().vendAllocator(blockSize)
)
return IonRawTextWriter_1_1(
options as _Private_IonTextWriterBuilder_1_1,
_Private_IonTextAppender.forFastAppendable(bufferedOutput, Charsets.UTF_8)
)
}
}

enum class ContainerType {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1233,7 +1233,9 @@ private void installMacros() {
* Install any new symbols and macros, step out of the encoding directive, and resume reading raw values.
*/
private void finishEncodingDirective() {
resetSymbolTable(); // TODO handle appended symbols
if (!isSymbolTableAppend) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a case where writing the code to do what the TODO said to do would have been faster than writing the TODO itself.

resetSymbolTable();
}
installSymbols(newSymbols);
installMacros();
stepOutOfContainer();
Expand Down Expand Up @@ -1361,10 +1363,6 @@ void readEncodingDirective() {
newMacros.put(MacroRef.byId(++localMacroMaxOffset), newMacro);
state = State.IN_MACRO_TABLE_SEXP;
break;
case COMPILING_MACRO:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I found that this state is unreachable due to a check elsewhere in the reader.

// This state can only be reached during compilation of a macro. Do nothing, as the reader must
// navigate normally while the macro is compiled.
break;
default:
throw new IllegalStateException(state.toString());
}
Expand Down
133 changes: 117 additions & 16 deletions src/main/java/com/amazon/ion/impl/IonReaderTextSystemX.java
Original file line number Diff line number Diff line change
@@ -1,18 +1,5 @@
/*
* Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
package com.amazon.ion.impl;

import static com.amazon.ion.impl._Private_ScalarConversions.getValueTypeName;
Expand All @@ -38,11 +25,17 @@
import com.amazon.ion.impl.IonTokenConstsX.CharacterSequence;
import com.amazon.ion.impl._Private_ScalarConversions.AS_TYPE;
import com.amazon.ion.impl._Private_ScalarConversions.CantConvertException;
import com.amazon.ion.impl.macro.EncodingContext;
import com.amazon.ion.impl.macro.EncodingDirectiveReader;
import com.amazon.ion.impl.macro.MacroEvaluator;
import com.amazon.ion.impl.macro.MacroEvaluatorAsIonReader;

import java.io.IOException;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.Collections;
import java.util.Date;
import java.lang.Character;
import java.util.List;

/**
* This reader calls the {@link IonReaderTextRawX} for low level events.
Expand All @@ -62,6 +55,18 @@ class IonReaderTextSystemX

SymbolTable _system_symtab;

// The IonReader-like MacroEvaluator that this core reader delegates to when evaluating a macro invocation.
protected MacroEvaluatorAsIonReader macroEvaluatorIonReader = null;

// The core MacroEvaluator that this core reader delegates to when evaluating a macro invocation.
private MacroEvaluator macroEvaluator = null;

// The encoding context (macro table) that is currently active.
private EncodingContext encodingContext = null;

Comment on lines +59 to +65
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These will start to be used in the future PR that adds macro evaluation.

// Reads encoding directives from the stream.
private EncodingDirectiveReader encodingDirectiveReader = null;

protected IonReaderTextSystemX(UnifiedInputStreamX iis)
{
_system_symtab = _Private_Utils.systemSymtab(1); // TODO check IVM to determine version: amazon-ion/ion-java/issues/19
Expand Down Expand Up @@ -1009,4 +1014,100 @@ public SymbolTable pop_passed_symbol_table()
{
return null;
}

/**
* Sets the active symbol table.
* @param symbolTable the symbol table to make active.
*/
protected void setSymbolTable(SymbolTable symbolTable) {
// System readers don't handle symbol tables.
}

/**
* While reading an encoding directive, the reader allows itself to be controlled by the MacroCompiler during
* compilation of a macro. While this is happening, the reader should never attempt to read another encoding
* directive.
* @return true if the reader is not in the process of compiling a macro; false if it is.
*/
private boolean macroCompilationNotInProgress() {
return encodingDirectiveReader == null || !encodingDirectiveReader.isMacroCompilationInProgress();
}

/**
* @return true if current value has a sequence of annotations that begins with `$ion_encoding`; otherwise, false.
*/
boolean startsWithIonEncoding() {
// TODO also resolve symbol identifiers and compare against text that looks like $ion_encoding
return SystemSymbols_1_1.ION_ENCODING.getText().equals(_annotations[0].getText());
}

/**
* @return true if the reader is positioned on an encoding directive; otherwise, false.
*/
private boolean isPositionedOnEncodingDirective() {
return _annotation_count > 0
&& _value_type == IonType.SEXP
&& !isNullValue()
&& macroCompilationNotInProgress()
&& startsWithIonEncoding();
}

/**
* Reads an encoding directive and installs any symbols and/or macros found within. Upon calling this method,
* the reader must be positioned on an s-expression annotated with `$ion_encoding`.
*/
private void readEncodingDirective() {
if (encodingDirectiveReader == null) {
encodingDirectiveReader = new EncodingDirectiveReader(this);
}
encodingDirectiveReader.reset();
encodingDirectiveReader.readEncodingDirective();
List<String> newSymbols = encodingDirectiveReader.getNewSymbols();
if (encodingDirectiveReader.isSymbolTableAppend()) {
LocalSymbolTable current = ((LocalSymbolTable) getSymbolTable());
for (String appendedSymbol : newSymbols) {
current.putSymbol(appendedSymbol);
}
} else {
setSymbolTable(new LocalSymbolTable(
// TODO handle shared symbol table imports declared in the encoding directive
LocalSymbolTableImports.EMPTY,
newSymbols
));
}
encodingContext = new EncodingContext(encodingDirectiveReader.getNewMacros());
macroEvaluator = new MacroEvaluator();
macroEvaluatorIonReader = new MacroEvaluatorAsIonReader(macroEvaluator);
}

/**
* Advances the reader, if necessary and possible, to the next value, reading any Ion 1.1+ encoding directives
* found along the way.
* @return true if the reader is positioned on a value; otherwise, false.
*/
protected final boolean has_next_system_value() {
while (!_has_next_called && !_eof) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This mimics the existing naming convention for the has_next methods. Previously, no system-level has_next was needed because the system-level reader did not interpret any system values. However, as in the binary reader, macro compilation and evaluation has to happen one layer below Ion 1.0 system table handling since Ion 1.1 macros can expand to system values.

has_next_raw_value();
if (minorVersion > 0 && _value_type != null && IonType.DATAGRAM.equals(getContainerType()) && isPositionedOnEncodingDirective()) {
readEncodingDirective();
_has_next_called = false;
continue;
}
break;
}
return !_eof;
}

@Override
public boolean hasNext()
{
return has_next_system_value();
}

/**
* @return the {@link EncodingContext} currently active, or {@code null}.
*/
EncodingContext getEncodingContext() {
return encodingContext;
}
}
Loading
Loading