-
Notifications
You must be signed in to change notification settings - Fork 110
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adds support for parsing Ion 1.1 encoding directives in the text reader. #954
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
// SPDX-License-Identifier: Apache-2.0 | ||
package com.amazon.ion.impl.macro | ||
|
||
import com.amazon.ion.* | ||
import com.amazon.ion.impl.* | ||
import com.amazon.ion.impl.macro.MacroRef.Companion.byId | ||
|
||
/** | ||
* Reads encoding directives from the given [IonReader]. | ||
*/ | ||
class EncodingDirectiveReader(private val reader: IonReader) { | ||
|
||
private var macroCompiler: MacroCompiler = MacroCompilerIonReader(reader) { key: Any? -> newMacros[key] } | ||
private var localMacroMaxOffset: Int = -1 | ||
private var state: State = State.READING_VALUE | ||
|
||
var isSymbolTableAppend = false | ||
var newSymbols: MutableList<String> = ArrayList(8) | ||
var newMacros: MutableMap<MacroRef, Macro> = HashMap() | ||
|
||
private enum class State { | ||
IN_ION_ENCODING_SEXP, | ||
IN_SYMBOL_TABLE_SEXP, | ||
IN_SYMBOL_TABLE_LIST, | ||
IN_MACRO_TABLE_SEXP, | ||
COMPILING_MACRO, | ||
READING_VALUE | ||
} | ||
|
||
private fun classifySexpWithinEncodingDirective() { | ||
val name: String = reader.stringValue() | ||
state = if (SystemSymbols_1_1.SYMBOL_TABLE.text == name) { | ||
State.IN_SYMBOL_TABLE_SEXP | ||
} else if (SystemSymbols_1_1.MACRO_TABLE.text == name) { | ||
State.IN_MACRO_TABLE_SEXP | ||
} else { | ||
throw IonException(String.format("\$ion_encoding expressions '%s' not supported.", name)) | ||
} | ||
} | ||
|
||
private fun classifySymbolTable() { | ||
val type: IonType = reader.type | ||
if (IonType.isText(type)) { | ||
if (SystemSymbols.ION_ENCODING == reader.stringValue() && !isSymbolTableAppend) { | ||
if (reader.next() == null || reader.type != IonType.LIST) { | ||
throw IonException("symbol_table s-expression must begin with a list.") | ||
} | ||
isSymbolTableAppend = true | ||
} else { | ||
throw IonException("symbol_table s-expression must begin with either \$ion_encoding or a list.") | ||
} | ||
} else if (type != IonType.LIST) { | ||
throw IonException("symbol_table s-expression must begin with either \$ion_encoding or a list.") | ||
} | ||
reader.stepIn() | ||
state = State.IN_SYMBOL_TABLE_LIST | ||
} | ||
|
||
/** | ||
* Reads an encoding directive. After this method returns, the caller should access this class's properties to | ||
* retrieve the symbols and macros declared within the directive. | ||
*/ | ||
fun readEncodingDirective() { | ||
reader.stepIn() | ||
state = State.IN_ION_ENCODING_SEXP | ||
while (true) { | ||
when (state) { | ||
|
||
State.IN_ION_ENCODING_SEXP -> { | ||
if (reader.next() == null) { | ||
reader.stepOut() | ||
state = State.READING_VALUE | ||
return | ||
} | ||
if (reader.type != IonType.SEXP) { | ||
throw IonException("Ion encoding directives must contain only s-expressions.") | ||
} | ||
reader.stepIn() | ||
if (reader.next() == null || !IonType.isText(reader.type)) { | ||
throw IonException("S-expressions within encoding directives must begin with a text token.") | ||
} | ||
classifySexpWithinEncodingDirective() | ||
} | ||
|
||
State.IN_SYMBOL_TABLE_SEXP -> { | ||
if (reader.next() == null) { | ||
reader.stepOut() | ||
state = State.IN_ION_ENCODING_SEXP | ||
continue | ||
} | ||
classifySymbolTable() | ||
} | ||
|
||
State.IN_SYMBOL_TABLE_LIST -> { | ||
if (reader.next() == null) { | ||
reader.stepOut() | ||
state = State.IN_SYMBOL_TABLE_SEXP | ||
continue | ||
} | ||
if (!IonType.isText(reader.type)) { | ||
throw IonException("The symbol_table must contain text.") | ||
} | ||
newSymbols.add(reader.stringValue()) | ||
} | ||
|
||
State.IN_MACRO_TABLE_SEXP -> { | ||
if (reader.next() == null) { | ||
reader.stepOut() | ||
state = State.IN_ION_ENCODING_SEXP | ||
continue | ||
} | ||
if (reader.type != IonType.SEXP) { | ||
throw IonException("macro_table s-expression must contain s-expressions.") | ||
} | ||
state = State.COMPILING_MACRO | ||
val newMacro: Macro = macroCompiler.compileMacro() | ||
newMacros[byId(++localMacroMaxOffset)] = newMacro | ||
state = State.IN_MACRO_TABLE_SEXP | ||
} | ||
|
||
// TODO handle other legal encoding directive s-expression shapes. | ||
// TODO add strict enforcement of the schema around e.g. repeats | ||
|
||
else -> throw IllegalStateException(state.toString()) | ||
} | ||
} | ||
} | ||
|
||
/** | ||
* @return true if the reader is currently being used by the [MacroCompiler]. | ||
*/ | ||
fun isMacroCompilationInProgress(): Boolean { | ||
return state == State.COMPILING_MACRO | ||
} | ||
|
||
/** | ||
* Prepares the EncodingDirectiveReader to read a new encoding directive. | ||
*/ | ||
fun reset() { | ||
isSymbolTableAppend = false | ||
newSymbols.clear() | ||
newMacros.clear() | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1233,7 +1233,9 @@ private void installMacros() { | |
* Install any new symbols and macros, step out of the encoding directive, and resume reading raw values. | ||
*/ | ||
private void finishEncodingDirective() { | ||
resetSymbolTable(); // TODO handle appended symbols | ||
if (!isSymbolTableAppend) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a case where writing the code to do what the TODO said to do would have been faster than writing the TODO itself. |
||
resetSymbolTable(); | ||
} | ||
installSymbols(newSymbols); | ||
installMacros(); | ||
stepOutOfContainer(); | ||
|
@@ -1361,10 +1363,6 @@ void readEncodingDirective() { | |
newMacros.put(MacroRef.byId(++localMacroMaxOffset), newMacro); | ||
state = State.IN_MACRO_TABLE_SEXP; | ||
break; | ||
case COMPILING_MACRO: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I found that this state is unreachable due to a check elsewhere in the reader. |
||
// This state can only be reached during compilation of a macro. Do nothing, as the reader must | ||
// navigate normally while the macro is compiled. | ||
break; | ||
default: | ||
throw new IllegalStateException(state.toString()); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,5 @@ | ||
/* | ||
* Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"). | ||
* You may not use this file except in compliance with the License. | ||
* A copy of the License is located at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* or in the "license" file accompanying this file. This file is distributed | ||
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either | ||
* express or implied. See the License for the specific language governing | ||
* permissions and limitations under the License. | ||
*/ | ||
|
||
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
// SPDX-License-Identifier: Apache-2.0 | ||
package com.amazon.ion.impl; | ||
|
||
import static com.amazon.ion.impl._Private_ScalarConversions.getValueTypeName; | ||
|
@@ -38,11 +25,17 @@ | |
import com.amazon.ion.impl.IonTokenConstsX.CharacterSequence; | ||
import com.amazon.ion.impl._Private_ScalarConversions.AS_TYPE; | ||
import com.amazon.ion.impl._Private_ScalarConversions.CantConvertException; | ||
import com.amazon.ion.impl.macro.EncodingContext; | ||
import com.amazon.ion.impl.macro.EncodingDirectiveReader; | ||
import com.amazon.ion.impl.macro.MacroEvaluator; | ||
import com.amazon.ion.impl.macro.MacroEvaluatorAsIonReader; | ||
|
||
import java.io.IOException; | ||
import java.math.BigDecimal; | ||
import java.math.BigInteger; | ||
import java.util.Collections; | ||
import java.util.Date; | ||
import java.lang.Character; | ||
import java.util.List; | ||
|
||
/** | ||
* This reader calls the {@link IonReaderTextRawX} for low level events. | ||
|
@@ -62,6 +55,18 @@ class IonReaderTextSystemX | |
|
||
SymbolTable _system_symtab; | ||
|
||
// The IonReader-like MacroEvaluator that this core reader delegates to when evaluating a macro invocation. | ||
protected MacroEvaluatorAsIonReader macroEvaluatorIonReader = null; | ||
|
||
// The core MacroEvaluator that this core reader delegates to when evaluating a macro invocation. | ||
private MacroEvaluator macroEvaluator = null; | ||
|
||
// The encoding context (macro table) that is currently active. | ||
private EncodingContext encodingContext = null; | ||
|
||
Comment on lines
+59
to
+65
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These will start to be used in the future PR that adds macro evaluation. |
||
// Reads encoding directives from the stream. | ||
private EncodingDirectiveReader encodingDirectiveReader = null; | ||
|
||
protected IonReaderTextSystemX(UnifiedInputStreamX iis) | ||
{ | ||
_system_symtab = _Private_Utils.systemSymtab(1); // TODO check IVM to determine version: amazon-ion/ion-java/issues/19 | ||
|
@@ -1009,4 +1014,100 @@ public SymbolTable pop_passed_symbol_table() | |
{ | ||
return null; | ||
} | ||
|
||
/** | ||
* Sets the active symbol table. | ||
* @param symbolTable the symbol table to make active. | ||
*/ | ||
protected void setSymbolTable(SymbolTable symbolTable) { | ||
// System readers don't handle symbol tables. | ||
} | ||
|
||
/** | ||
* While reading an encoding directive, the reader allows itself to be controlled by the MacroCompiler during | ||
* compilation of a macro. While this is happening, the reader should never attempt to read another encoding | ||
* directive. | ||
* @return true if the reader is not in the process of compiling a macro; false if it is. | ||
*/ | ||
private boolean macroCompilationNotInProgress() { | ||
return encodingDirectiveReader == null || !encodingDirectiveReader.isMacroCompilationInProgress(); | ||
} | ||
|
||
/** | ||
* @return true if current value has a sequence of annotations that begins with `$ion_encoding`; otherwise, false. | ||
*/ | ||
boolean startsWithIonEncoding() { | ||
// TODO also resolve symbol identifiers and compare against text that looks like $ion_encoding | ||
return SystemSymbols_1_1.ION_ENCODING.getText().equals(_annotations[0].getText()); | ||
} | ||
|
||
/** | ||
* @return true if the reader is positioned on an encoding directive; otherwise, false. | ||
*/ | ||
private boolean isPositionedOnEncodingDirective() { | ||
return _annotation_count > 0 | ||
&& _value_type == IonType.SEXP | ||
&& !isNullValue() | ||
&& macroCompilationNotInProgress() | ||
&& startsWithIonEncoding(); | ||
} | ||
|
||
/** | ||
* Reads an encoding directive and installs any symbols and/or macros found within. Upon calling this method, | ||
* the reader must be positioned on an s-expression annotated with `$ion_encoding`. | ||
*/ | ||
private void readEncodingDirective() { | ||
if (encodingDirectiveReader == null) { | ||
encodingDirectiveReader = new EncodingDirectiveReader(this); | ||
} | ||
encodingDirectiveReader.reset(); | ||
encodingDirectiveReader.readEncodingDirective(); | ||
List<String> newSymbols = encodingDirectiveReader.getNewSymbols(); | ||
if (encodingDirectiveReader.isSymbolTableAppend()) { | ||
LocalSymbolTable current = ((LocalSymbolTable) getSymbolTable()); | ||
for (String appendedSymbol : newSymbols) { | ||
current.putSymbol(appendedSymbol); | ||
} | ||
} else { | ||
setSymbolTable(new LocalSymbolTable( | ||
// TODO handle shared symbol table imports declared in the encoding directive | ||
LocalSymbolTableImports.EMPTY, | ||
newSymbols | ||
)); | ||
} | ||
encodingContext = new EncodingContext(encodingDirectiveReader.getNewMacros()); | ||
macroEvaluator = new MacroEvaluator(); | ||
macroEvaluatorIonReader = new MacroEvaluatorAsIonReader(macroEvaluator); | ||
} | ||
|
||
/** | ||
* Advances the reader, if necessary and possible, to the next value, reading any Ion 1.1+ encoding directives | ||
* found along the way. | ||
* @return true if the reader is positioned on a value; otherwise, false. | ||
*/ | ||
protected final boolean has_next_system_value() { | ||
while (!_has_next_called && !_eof) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This mimics the existing naming convention for the has_next methods. Previously, no |
||
has_next_raw_value(); | ||
if (minorVersion > 0 && _value_type != null && IonType.DATAGRAM.equals(getContainerType()) && isPositionedOnEncodingDirective()) { | ||
readEncodingDirective(); | ||
_has_next_called = false; | ||
continue; | ||
} | ||
break; | ||
} | ||
return !_eof; | ||
} | ||
|
||
@Override | ||
public boolean hasNext() | ||
{ | ||
return has_next_system_value(); | ||
} | ||
|
||
/** | ||
* @return the {@link EncodingContext} currently active, or {@code null}. | ||
*/ | ||
EncodingContext getEncodingContext() { | ||
return encodingContext; | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This mirrors a utility I added on the binary side for testing.