Skip to content

Commit

Permalink
Adds support for text and lob types and some annotations (#637)
Browse files Browse the repository at this point in the history
  • Loading branch information
popematt committed Nov 16, 2023
1 parent 4bebe46 commit b08cbc4
Show file tree
Hide file tree
Showing 4 changed files with 345 additions and 2 deletions.
128 changes: 128 additions & 0 deletions src/com/amazon/ion/impl/bin/IonEncoder_1_1.java
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
package com.amazon.ion.impl.bin;

import com.amazon.ion.Decimal;
import com.amazon.ion.IonText;
import com.amazon.ion.IonType;
import com.amazon.ion.Timestamp;
import com.amazon.ion.impl.bin.utf8.Utf8StringEncoder;
import com.amazon.ion.impl.bin.utf8.Utf8StringEncoderPool;

import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.charset.StandardCharsets;

import static com.amazon.ion.impl.bin.Ion_1_1_Constants.*;
import static java.lang.Double.doubleToRawLongBits;
Expand Down Expand Up @@ -428,4 +432,128 @@ static int writeLongFormTimestampValue(WriteBuffer buffer, Timestamp value) {
return 1 + WriteBuffer.flexUIntLength(dataLength) + dataLength;
}

/**
* Writes a String to the given WriteBuffer using the Ion 1.1 encoding for Ion Strings.
* @return the number of bytes written
*/
public static int writeStringValue(WriteBuffer buffer, String value) {
return writeInlineText(buffer, value, IonType.STRING, OpCodes.STRING_ZERO_LENGTH, OpCodes.VARIABLE_LENGTH_STRING);
}

/**
* Writes an inline Symbol to the given WriteBuffer using the Ion 1.1 encoding for Ion Symbols.
* @return the number of bytes written
*/
public static int writeSymbolValue(WriteBuffer buffer, String value) {
return writeInlineText(buffer, value, IonType.SYMBOL, OpCodes.INLINE_SYMBOL_ZERO_LENGTH, OpCodes.VARIABLE_LENGTH_INLINE_SYMBOL);
}

private static int writeInlineText(WriteBuffer buffer, String value, IonType type, byte zeroLengthOpCode, byte variableLengthOpCode) {
if (value == null) {
return writeNullValue(buffer, type);
}

// TODO: When merging into the Ion 1.1 raw writer, keep a single instance of the Utf8StringEncoder
// instead of fetching one on every call.
Utf8StringEncoder.Result encoderResult = Utf8StringEncoderPool.getInstance().getOrCreate().encode(value);

byte[] utf8Buffer = encoderResult.getBuffer();
int numValueBytes = encoderResult.getEncodedLength();
int numLengthBytes = 0;

if (numValueBytes <= 0xF) {
buffer.writeByte((byte)(zeroLengthOpCode | numValueBytes));
} else {
buffer.writeByte(variableLengthOpCode);
numLengthBytes = buffer.writeFlexUInt(numValueBytes);
}
buffer.writeBytes(utf8Buffer, 0, numValueBytes);
return 1 + numLengthBytes + numValueBytes;
}

/**
* Writes an interned Symbol's address to the given WriteBuffer using the Ion 1.1 encoding for Ion Symbols.
* @return the number of bytes written
*
* TODO: Do we need to support Symbol Addresses greater than Long.MAX_VALUE?
*/
public static int writeSymbolValue(WriteBuffer buffer, long value) {
if (value < 0) {
throw new IllegalArgumentException("Symbol Address cannot be negative; was: " + value);
} else if (value < FIRST_2_BYTE_SYMBOL_ADDRESS) {
buffer.writeByte(OpCodes.SYMBOL_ADDRESS_1_BYTE);
buffer.writeFixedUInt(value);
return 2;
} else if (value < FIRST_MANY_BYTE_SYMBOL_ADDRESS) {
buffer.writeByte(OpCodes.SYMBOL_ADDRESS_2_BYTES);
buffer.writeFixedIntOrUInt(value - FIRST_2_BYTE_SYMBOL_ADDRESS, 2);
return 3;
} else {
buffer.writeByte(OpCodes.SYMBOL_ADDRESS_MANY_BYTES);
int addressBytes = buffer.writeFlexUInt(value - FIRST_MANY_BYTE_SYMBOL_ADDRESS);
return 1 + addressBytes;
}
}

/**
* Writes a Blob to the given WriteBuffer using the Ion 1.1 encoding for Ion Blobs.
* @return the number of bytes written
*/
public static int writeBlobValue(WriteBuffer buffer, byte[] value) {
if (value == null) {
return writeNullValue(buffer, IonType.BLOB);
}

buffer.writeByte(OpCodes.VARIABLE_LENGTH_BLOB);
int numLengthBytes = buffer.writeFlexUInt(value.length);
buffer.writeBytes(value);
return 1 + numLengthBytes + value.length;
}

/**
* Writes a Clob to the given WriteBuffer using the Ion 1.1 encoding for Ion Clobs.
* @return the number of bytes written
*/
public static int writeClobValue(WriteBuffer buffer, byte[] value) {
if (value == null) {
return writeNullValue(buffer, IonType.CLOB);
}

buffer.writeByte(OpCodes.VARIABLE_LENGTH_CLOB);
int numLengthBytes = buffer.writeFlexUInt(value.length);
buffer.writeBytes(value);
return 1 + numLengthBytes + value.length;
}

// TODO: Implement FlexSym Annotations

/**
* Writes annotations using the given symbol addresses.
*/
public static int writeAnnotations(WriteBuffer buffer, long[] annotations) {
if (annotations == null || annotations.length == 0) {
return 0;
}
if (annotations.length == 1) {
buffer.writeByte(OpCodes.ANNOTATIONS_1_SYMBOL_ADDRESS);
int numAddressBytes = buffer.writeFlexUInt(annotations[0]);
return 1 + numAddressBytes;
} else if (annotations.length == 2) {
buffer.writeByte(OpCodes.ANNOTATIONS_2_SYMBOL_ADDRESS);
int numAddressBytes = buffer.writeFlexUInt(annotations[0]);
numAddressBytes += buffer.writeFlexUInt(annotations[1]);
return 1 + numAddressBytes;
} else {
int numAddressBytes = 0;
for (long ann : annotations) {
numAddressBytes += WriteBuffer.flexUIntLength(ann);
}
buffer.writeByte(OpCodes.ANNOTATIONS_MANY_SYMBOL_ADDRESS);
int numLengthBytes = buffer.writeFlexUInt(numAddressBytes);
for (long ann : annotations) {
buffer.writeFlexUInt(ann);
}
return 1 + numLengthBytes + numAddressBytes;
}
}
}
3 changes: 3 additions & 0 deletions src/com/amazon/ion/impl/bin/Ion_1_1_Constants.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
public class Ion_1_1_Constants {
private Ion_1_1_Constants() {}

static final int FIRST_2_BYTE_SYMBOL_ADDRESS = 256;
static final int FIRST_MANY_BYTE_SYMBOL_ADDRESS = 65792;

//////// Timestamp Field Constants ////////

// S_TIMESTAMP_* is applicable to all short-form timestamps
Expand Down
20 changes: 20 additions & 0 deletions src/com/amazon/ion/impl/bin/OpCodes.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,30 @@ private OpCodes() {}
public static final byte TIMESTAMP_NANOS_PRECISION_WITH_OFFSET = 0x7C;
// 0x7D-0x7F Reserved

public static final byte STRING_ZERO_LENGTH = (byte) 0x80;

public static final byte INLINE_SYMBOL_ZERO_LENGTH = (byte) 0x90;

public static final byte SYMBOL_ADDRESS_1_BYTE = (byte) 0xE1;
public static final byte SYMBOL_ADDRESS_2_BYTES = (byte) 0xE2;
public static final byte SYMBOL_ADDRESS_MANY_BYTES = (byte) 0xE3;
public static final byte ANNOTATIONS_1_SYMBOL_ADDRESS = (byte) 0xE4;
public static final byte ANNOTATIONS_2_SYMBOL_ADDRESS = (byte) 0xE5;
public static final byte ANNOTATIONS_MANY_SYMBOL_ADDRESS = (byte) 0xE6;
public static final byte ANNOTATIONS_1_FLEX_SYM = (byte) 0xE7;
public static final byte ANNOTATIONS_2_FLEX_SYM = (byte) 0xE8;
public static final byte ANNOTATIONS_MANY_FLEX_SYM = (byte) 0xE9;
public static final byte NULL_UNTYPED = (byte) 0xEA;
public static final byte NULL_TYPED = (byte) 0xEB;
// 0xEC, 0xED NOP
// 0xEE Reserved
// 0xEF System Macro Invocation

public static final byte VARIABLE_LENGTH_INTEGER = (byte) 0xF5;
public static final byte VARIABLE_LENGTH_DECIMAL = (byte) 0xF6;
public static final byte VARIABLE_LENGTH_TIMESTAMP = (byte) 0xF7;
public static final byte VARIABLE_LENGTH_STRING = (byte) 0xF8;
public static final byte VARIABLE_LENGTH_INLINE_SYMBOL = (byte) 0xF9;
public static final byte VARIABLE_LENGTH_BLOB = (byte) 0xFE;
public static final byte VARIABLE_LENGTH_CLOB = (byte) 0xFF;
}
Loading

0 comments on commit b08cbc4

Please sign in to comment.