From f165357f23c950723921b26a791da4e874e0b937 Mon Sep 17 00:00:00 2001 From: Shivam Malhotra Date: Wed, 26 Jun 2024 16:33:02 -0500 Subject: [PATCH] Further improvements to parquet page materializaition (#5670) Also fixes the crash when reading from parquet with decoders --- .../io/deephaven/util/codec/MapCodec.java | 16 +- .../deephaven/util/codec/ObjectDecoder.java | 22 ++- .../util/codec/SimpleByteArrayCodec.java | 7 +- .../parquet/base/ColumnChunkReader.java | 6 +- .../parquet/base/ColumnChunkReaderImpl.java | 21 ++- .../parquet/base/ColumnPageReaderImpl.java | 2 +- .../parquet/base/PageMaterializer.java | 140 ------------------ .../parquet/base/PageMaterializerFactory.java | 12 ++ .../BigDecimalFromBytesMaterializer.java | 3 +- .../materializers/BigIntegerMaterializer.java | 3 +- .../base/materializers/BlobMaterializer.java | 5 +- .../base/materializers/BoolMaterializer.java | 2 +- .../base/materializers/ByteMaterializer.java | 2 +- .../base/materializers/CharMaterializer.java | 2 +- .../materializers/DoubleMaterializer.java | 2 +- .../base/materializers/FloatMaterializer.java | 2 +- ...=> InstantNanosFromInt96Materializer.java} | 12 +- .../InstantNanosFromMicrosMaterializer.java | 2 +- .../InstantNanosFromMillisMaterializer.java | 2 +- .../base/materializers/IntMaterializer.java | 2 +- .../materializers/LocalDateMaterializer.java | 2 +- .../LocalDateTimeFromMicrosMaterializer.java | 2 +- .../LocalDateTimeFromMillisMaterializer.java | 2 +- .../LocalDateTimeFromNanosMaterializer.java | 2 +- .../LocalTimeFromMicrosMaterializer.java | 2 +- .../LocalTimeFromMillisMaterializer.java | 2 +- .../LocalTimeFromNanosMaterializer.java | 2 +- .../LongFromUnsignedIntMaterializer.java | 2 +- .../base/materializers/LongMaterializer.java | 2 +- .../materializers/ObjectMaterializer.java | 59 ++++++++ .../base/materializers/ShortMaterializer.java | 2 +- .../materializers/StringMaterializer.java | 2 +- .../parquet/table/ParquetSchemaReader.java | 10 +- .../io/deephaven/parquet/table/TypeInfos.java | 3 +- .../table/location/ParquetColumnLocation.java | 95 ++++++++---- .../table/pagestore/ColumnChunkPageStore.java | 5 +- .../OffsetIndexBasedColumnChunkPageStore.java | 3 +- .../VariablePageSizeColumnChunkPageStore.java | 2 +- .../topage/ToBigDecimalFromNumeric.java | 58 ++++++++ .../pagestore/topage/ToBigDecimalPage.java | 26 ++-- .../pagestore/topage/ToBigIntegerPage.java | 26 ++-- .../pagestore/topage/ToBooleanAsBytePage.java | 8 + .../table/pagestore/topage/ToBytePage.java | 9 ++ .../table/pagestore/topage/ToCharPage.java | 9 ++ .../table/pagestore/topage/ToDoublePage.java | 9 ++ .../table/pagestore/topage/ToFloatPage.java | 9 ++ .../table/pagestore/topage/ToInstantPage.java | 64 ++++++-- .../table/pagestore/topage/ToIntPage.java | 9 ++ .../pagestore/topage/ToLocalDatePage.java | 9 ++ .../pagestore/topage/ToLocalDateTimePage.java | 55 +++++-- .../pagestore/topage/ToLocalTimePage.java | 53 +++++-- .../table/pagestore/topage/ToLongPage.java | 42 ++++-- .../table/pagestore/topage/ToObjectPage.java | 52 +++---- .../table/pagestore/topage/ToPage.java | 13 +- .../topage/ToPageWithDictionary.java | 20 ++- .../table/pagestore/topage/ToShortPage.java | 9 ++ .../table/pagestore/topage/ToStringPage.java | 13 +- .../table/ParquetTableReadWriteTest.java | 30 +++- .../ReferenceDecimalLogicalType.parquet | 4 +- .../ReferenceDecimalLogicalType2.parquet | 4 +- .../replicators/ReplicateToPage.java | 20 +-- .../replication/ReplicatePrimitiveCode.java | 16 ++ 62 files changed, 683 insertions(+), 348 deletions(-) rename extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/{InstantFromInt96Materializer.java => InstantNanosFromInt96Materializer.java} (85%) create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ObjectMaterializer.java create mode 100644 extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBigDecimalFromNumeric.java diff --git a/Util/src/main/java/io/deephaven/util/codec/MapCodec.java b/Util/src/main/java/io/deephaven/util/codec/MapCodec.java index 6bd18bb96da..2c1c85cae69 100644 --- a/Util/src/main/java/io/deephaven/util/codec/MapCodec.java +++ b/Util/src/main/java/io/deephaven/util/codec/MapCodec.java @@ -115,11 +115,10 @@ private ByteBuffer encodeIntoBuffer(final ByteBuffer scratch, @NotNull Map @Nullable @Override - public Map decode(@NotNull final byte[] input, final int offset, final int length) { - if (input.length == 0) { + public Map decode(@NotNull final ByteBuffer byteBuffer) { + if (byteBuffer.remaining() == 0) { return null; } - final ByteBuffer byteBuffer = ByteBuffer.wrap(input); final int size = byteBuffer.getInt(); if (size == 0) { return Collections.emptyMap(); @@ -129,13 +128,22 @@ public Map decode(@NotNull final byte[] input, final int offset, final int final V value = decodeValue(byteBuffer); return Collections.singletonMap(key, value); } - final LinkedHashMap result = new LinkedHashMap<>(size); + final Map result = new LinkedHashMap<>(size); for (int ii = 0; ii < size; ++ii) { result.put(decodeKey(byteBuffer), decodeValue(byteBuffer)); } return Collections.unmodifiableMap(result); } + @Nullable + @Override + public Map decode(@NotNull final byte[] input, final int offset, final int length) { + if (input.length == 0) { + return null; + } + return decode(ByteBuffer.wrap(input, offset, length)); + } + /** * Estimate the size of the encoded map. * diff --git a/Util/src/main/java/io/deephaven/util/codec/ObjectDecoder.java b/Util/src/main/java/io/deephaven/util/codec/ObjectDecoder.java index 6e52078c0d4..72728684abd 100644 --- a/Util/src/main/java/io/deephaven/util/codec/ObjectDecoder.java +++ b/Util/src/main/java/io/deephaven/util/codec/ObjectDecoder.java @@ -7,6 +7,8 @@ import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; +import java.nio.ByteBuffer; + /** *

* Codec superinterface for Object translation from byte arrays for serialization and deserialization. @@ -30,12 +32,30 @@ public interface ObjectDecoder { * * @param input The input byte array containing bytes to decode * @param offset The offset into the byte array to start decoding from - * @param length The length of the byte array to decode from, starting at the offset + * @param length The number of bytes to decode, starting at the offset * @return The output object, possibly null */ @Nullable TYPE decode(@NotNull byte[] input, int offset, int length); + /** + * Decode an object from a ByteBuffer. The position of the input buffer may or may not be modified by this method. + * + * @param buffer The input ByteBuffer containing bytes to decode + * @return The output object, possibly null + */ + @Nullable + default TYPE decode(@NotNull final ByteBuffer buffer) { + if (buffer.hasArray()) { + return decode(buffer.array(), buffer.arrayOffset() + buffer.position(), buffer.remaining()); + } else { + // Make a copy of the buffer's contents + final byte[] bytes = new byte[buffer.remaining()]; + buffer.get(bytes); + return decode(bytes, 0, bytes.length); + } + } + /** * What width byte array does this ObjectCodec expect to encode and decode? * diff --git a/Util/src/main/java/io/deephaven/util/codec/SimpleByteArrayCodec.java b/Util/src/main/java/io/deephaven/util/codec/SimpleByteArrayCodec.java index e315e851b37..8bf70622a03 100644 --- a/Util/src/main/java/io/deephaven/util/codec/SimpleByteArrayCodec.java +++ b/Util/src/main/java/io/deephaven/util/codec/SimpleByteArrayCodec.java @@ -49,7 +49,9 @@ public byte[] encode(@Nullable final byte[] input) { if (input == null) { throw new IllegalArgumentException(SimpleByteArrayCodec.class.getSimpleName() + " cannot encode nulls"); } - return input; + final byte[] output = new byte[input.length]; + System.arraycopy(input, 0, output, 0, input.length); + return output; } @Override @@ -73,9 +75,6 @@ public byte[] decode(@NotNull final byte[] input, final int offset, final int le if (input.length == 0) { return CollectionUtil.ZERO_LENGTH_BYTE_ARRAY; } - if (offset == 0 && length == input.length) { - return input; - } final byte[] output = new byte[length]; System.arraycopy(input, offset, output, 0, length); return output; diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ColumnChunkReader.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ColumnChunkReader.java index 00d67f43860..d3cc0657078 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ColumnChunkReader.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ColumnChunkReader.java @@ -71,9 +71,10 @@ interface ColumnPageReaderIterator { } /** + * @param pageMaterializerFactory The factory to use for constructing page materializers. * @return An iterator over individual parquet pages. */ - ColumnPageReaderIterator getPageIterator() throws IOException; + ColumnPageReaderIterator getPageIterator(PageMaterializerFactory pageMaterializerFactory) throws IOException; interface ColumnPageDirectAccessor { /** @@ -86,9 +87,10 @@ interface ColumnPageDirectAccessor { } /** + * @param pageMaterializerFactory The factory to use for constructing page materializers. * @return An accessor for individual parquet pages which uses the provided offset index. */ - ColumnPageDirectAccessor getPageAccessor(OffsetIndex offsetIndex); + ColumnPageDirectAccessor getPageAccessor(OffsetIndex offsetIndex, PageMaterializerFactory pageMaterializerFactory); /** * @return Whether this column chunk uses a dictionary-based encoding on every page. diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ColumnChunkReaderImpl.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ColumnChunkReaderImpl.java index 504b31c17c5..00d35aac8e6 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ColumnChunkReaderImpl.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ColumnChunkReaderImpl.java @@ -48,7 +48,6 @@ final class ColumnChunkReaderImpl implements ColumnChunkReader { private final OffsetIndexReader offsetIndexReader; private final List fieldTypes; private final Function dictionarySupplier; - private final PageMaterializerFactory pageMaterializerFactory; private final URI columnChunkURI; /** * Number of rows in the row group of this column chunk. @@ -81,7 +80,6 @@ final class ColumnChunkReaderImpl implements ColumnChunkReader { } this.fieldTypes = fieldTypes; this.dictionarySupplier = new SoftCachingFunction<>(this::getDictionary); - this.pageMaterializerFactory = PageMaterializer.factoryForType(path.getPrimitiveType()); this.numRows = numRows; this.version = version; if (columnChunk.isSetFile_path() && FILE_URI_SCHEME.equals(rootURI.getScheme())) { @@ -130,16 +128,18 @@ public OffsetIndex getOffsetIndex(final SeekableChannelContext context) { } @Override - public ColumnPageReaderIterator getPageIterator() { - return new ColumnPageReaderIteratorImpl(); + public ColumnPageReaderIterator getPageIterator(final PageMaterializerFactory pageMaterializerFactory) { + return new ColumnPageReaderIteratorImpl(pageMaterializerFactory); } @Override - public ColumnPageDirectAccessor getPageAccessor(final OffsetIndex offsetIndex) { + public ColumnPageDirectAccessor getPageAccessor( + final OffsetIndex offsetIndex, + final PageMaterializerFactory pageMaterializerFactory) { if (offsetIndex == null) { throw new UnsupportedOperationException("Cannot use direct accessor without offset index"); } - return new ColumnPageDirectAccessorImpl(offsetIndex); + return new ColumnPageDirectAccessorImpl(offsetIndex, pageMaterializerFactory); } @Override @@ -247,10 +247,12 @@ private Dictionary readDictionary(long dictionaryPageOffset, SeekableChannelCont private final class ColumnPageReaderIteratorImpl implements ColumnPageReaderIterator { private long nextHeaderOffset; private long remainingValues; + PageMaterializerFactory pageMaterializerFactory; - ColumnPageReaderIteratorImpl() { + ColumnPageReaderIteratorImpl(final PageMaterializerFactory pageMaterializerFactory) { this.remainingValues = columnChunk.meta_data.getNum_values(); this.nextHeaderOffset = columnChunk.meta_data.getData_page_offset(); + this.pageMaterializerFactory = pageMaterializerFactory; } @Override @@ -335,9 +337,12 @@ private static int getNumValues(PageHeader pageHeader) { private final class ColumnPageDirectAccessorImpl implements ColumnPageDirectAccessor { private final OffsetIndex offsetIndex; + private final PageMaterializerFactory pageMaterializerFactory; - ColumnPageDirectAccessorImpl(final OffsetIndex offsetIndex) { + ColumnPageDirectAccessorImpl(@NotNull final OffsetIndex offsetIndex, + @NotNull final PageMaterializerFactory pageMaterializerFactory) { this.offsetIndex = offsetIndex; + this.pageMaterializerFactory = pageMaterializerFactory; } @Override diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ColumnPageReaderImpl.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ColumnPageReaderImpl.java index 14c60369f01..21ecc5cef7e 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ColumnPageReaderImpl.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ColumnPageReaderImpl.java @@ -345,7 +345,7 @@ private IntBuffer readKeysFromPageCommon( final RunLengthBitPackingHybridBufferDecoder rlDecoder, final RunLengthBitPackingHybridBufferDecoder dlDecoder, final ValuesReader dataReader) throws IOException { - final Object result = materialize(IntMaterializer.Factory, dlDecoder, rlDecoder, dataReader, nullPlaceholder); + final Object result = materialize(IntMaterializer.FACTORY, dlDecoder, rlDecoder, dataReader, nullPlaceholder); if (result instanceof DataWithOffsets) { keyDest.put((int[]) ((DataWithOffsets) result).materializeResult); return ((DataWithOffsets) result).offsets; diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/PageMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/PageMaterializer.java index 8aca259445b..8b5039cd7b3 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/PageMaterializer.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/PageMaterializer.java @@ -3,148 +3,8 @@ // package io.deephaven.parquet.base; -import io.deephaven.parquet.base.materializers.*; -import org.apache.parquet.schema.LogicalTypeAnnotation; -import org.apache.parquet.schema.PrimitiveType; -import org.jetbrains.annotations.NotNull; - -import java.math.BigDecimal; -import java.math.BigInteger; - -import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY; - public interface PageMaterializer { - /** - * Get the internal type used by Deephaven to represent a Parquet - * {@link LogicalTypeAnnotation.DecimalLogicalTypeAnnotation Decimal} logical type - */ - static Class resolveDecimalLogicalType( - final LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) { - // This pair of values (precision=1, scale=0) is set at write time as a marker so that we can recover - // the fact that the type is a BigInteger, not a BigDecimal when the fies are read. - if (decimalLogicalType.getPrecision() == 1 && decimalLogicalType.getScale() == 0) { - return BigInteger.class; - } - return BigDecimal.class; - } - - static PageMaterializerFactory factoryForType(@NotNull final PrimitiveType primitiveType) { - final PrimitiveType.PrimitiveTypeName primitiveTypeName = primitiveType.getPrimitiveTypeName(); - final LogicalTypeAnnotation logicalTypeAnnotation = primitiveType.getLogicalTypeAnnotation(); - switch (primitiveTypeName) { - case INT32: - if (logicalTypeAnnotation instanceof LogicalTypeAnnotation.IntLogicalTypeAnnotation) { - final LogicalTypeAnnotation.IntLogicalTypeAnnotation intLogicalType = - (LogicalTypeAnnotation.IntLogicalTypeAnnotation) logicalTypeAnnotation; - if (intLogicalType.isSigned()) { - switch (intLogicalType.getBitWidth()) { - case 8: - return ByteMaterializer.Factory; - case 16: - return ShortMaterializer.Factory; - case 32: - return IntMaterializer.Factory; - } - } else { - switch (intLogicalType.getBitWidth()) { - case 8: - case 16: - return CharMaterializer.Factory; - case 32: - return LongFromUnsignedIntMaterializer.Factory; - } - } - } else if (logicalTypeAnnotation instanceof LogicalTypeAnnotation.DateLogicalTypeAnnotation) { - return LocalDateMaterializer.Factory; - } else if (logicalTypeAnnotation instanceof LogicalTypeAnnotation.TimeLogicalTypeAnnotation) { - final LogicalTypeAnnotation.TimeLogicalTypeAnnotation timeLogicalType = - (LogicalTypeAnnotation.TimeLogicalTypeAnnotation) logicalTypeAnnotation; - if (timeLogicalType.getUnit() != LogicalTypeAnnotation.TimeUnit.MILLIS) { - throw new IllegalArgumentException( - "Expected unit type to be MILLIS, found " + timeLogicalType.getUnit()); - } - // isAdjustedToUTC parameter is ignored while reading LocalTime from Parquet files - return LocalTimeFromMillisMaterializer.Factory; - } else if (logicalTypeAnnotation instanceof LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) { - final LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType = - (LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) logicalTypeAnnotation; - return new BigDecimalFromIntMaterializer.Factory(decimalLogicalType.getScale()); - } - return IntMaterializer.Factory; - case INT64: - if (logicalTypeAnnotation instanceof LogicalTypeAnnotation.TimestampLogicalTypeAnnotation) { - final LogicalTypeAnnotation.TimestampLogicalTypeAnnotation timestampLogicalType = - (LogicalTypeAnnotation.TimestampLogicalTypeAnnotation) logicalTypeAnnotation; - if (timestampLogicalType.isAdjustedToUTC()) { - // The column will store nanoseconds elapsed since epoch as long values - switch (timestampLogicalType.getUnit()) { - case MILLIS: - return InstantNanosFromMillisMaterializer.Factory; - case MICROS: - return InstantNanosFromMicrosMaterializer.Factory; - case NANOS: - return LongMaterializer.Factory; - } - } else { - // The column will be stored as LocalDateTime values - // Ref:https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#local-semantics-timestamps-not-normalized-to-utc - switch (timestampLogicalType.getUnit()) { - case MILLIS: - return LocalDateTimeFromMillisMaterializer.Factory; - case MICROS: - return LocalDateTimeFromMicrosMaterializer.Factory; - case NANOS: - return LocalDateTimeFromNanosMaterializer.Factory; - } - } - } else if (logicalTypeAnnotation instanceof LogicalTypeAnnotation.TimeLogicalTypeAnnotation) { - final LogicalTypeAnnotation.TimeLogicalTypeAnnotation timeLogicalType = - (LogicalTypeAnnotation.TimeLogicalTypeAnnotation) logicalTypeAnnotation; - // isAdjustedToUTC parameter is ignored while reading LocalTime from Parquet files - switch (timeLogicalType.getUnit()) { - case MICROS: - return LocalTimeFromMicrosMaterializer.Factory; - case NANOS: - return LocalTimeFromNanosMaterializer.Factory; - default: - throw new IllegalArgumentException("Unsupported unit=" + timeLogicalType.getUnit()); - } - } else if (logicalTypeAnnotation instanceof LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) { - final LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType = - (LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) logicalTypeAnnotation; - return new BigDecimalFromLongMaterializer.Factory(decimalLogicalType.getScale()); - } - return LongMaterializer.Factory; - case INT96: - return InstantFromInt96Materializer.Factory; - case FLOAT: - return FloatMaterializer.Factory; - case DOUBLE: - return DoubleMaterializer.Factory; - case BOOLEAN: - return BoolMaterializer.Factory; - case BINARY: - if (logicalTypeAnnotation instanceof LogicalTypeAnnotation.StringLogicalTypeAnnotation) { - return StringMaterializer.Factory; - } - case FIXED_LEN_BYTE_ARRAY: // fall through - if (logicalTypeAnnotation instanceof LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) { - final LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType = - (LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) logicalTypeAnnotation; - final int encodedSizeInBytes = primitiveTypeName == BINARY ? -1 : primitiveType.getTypeLength(); - if (resolveDecimalLogicalType(decimalLogicalType) == BigInteger.class) { - return new BigIntegerMaterializer.Factory(new BigIntegerParquetBytesCodec(encodedSizeInBytes)); - } - return new BigDecimalFromBytesMaterializer.Factory(new BigDecimalParquetBytesCodec( - decimalLogicalType.getPrecision(), decimalLogicalType.getScale(), encodedSizeInBytes)); - } - return BlobMaterializer.Factory; - default: - throw new RuntimeException("Unexpected type name:" + primitiveTypeName); - } - } - void fillNulls(int startIndex, int endIndex); void fillValues(int startIndex, int endIndex); diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/PageMaterializerFactory.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/PageMaterializerFactory.java index a276ca24ff0..1db402b14d4 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/PageMaterializerFactory.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/PageMaterializerFactory.java @@ -9,4 +9,16 @@ public interface PageMaterializerFactory { PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues); PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues); + + PageMaterializerFactory NULL_FACTORY = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + throw new UnsupportedOperationException("Does not support materializing pages"); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + throw new UnsupportedOperationException("Does not support materializing pages"); + } + }; } diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/BigDecimalFromBytesMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/BigDecimalFromBytesMaterializer.java index 5080ea35dd8..754e4b156f8 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/BigDecimalFromBytesMaterializer.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/BigDecimalFromBytesMaterializer.java @@ -49,8 +49,7 @@ private BigDecimalFromBytesMaterializer(ValuesReader dataReader, BigDecimal null @Override public void fillValues(int startIndex, int endIndex) { for (int ii = startIndex; ii < endIndex; ii++) { - final byte[] bytes = dataReader.readBytes().getBytes(); - data[ii] = codec.decode(bytes, 0, bytes.length); + data[ii] = codec.decode(dataReader.readBytes().toByteBuffer()); } } } diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/BigIntegerMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/BigIntegerMaterializer.java index 36589a48a41..944b9a8f965 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/BigIntegerMaterializer.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/BigIntegerMaterializer.java @@ -53,8 +53,7 @@ private BigIntegerMaterializer(ValuesReader dataReader, BigInteger nullValue, in @Override public void fillValues(int startIndex, int endIndex) { for (int ii = startIndex; ii < endIndex; ii++) { - final byte[] bytes = dataReader.readBytes().getBytes(); - data[ii] = codec.decode(bytes, 0, bytes.length); + data[ii] = codec.decode(dataReader.readBytes().toByteBuffer()); } } } diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/BlobMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/BlobMaterializer.java index f796d94979e..7e21a194398 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/BlobMaterializer.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/BlobMaterializer.java @@ -8,9 +8,12 @@ import org.apache.parquet.column.values.ValuesReader; import org.apache.parquet.io.api.Binary; +/** + * Materializer for binary data. + */ public class BlobMaterializer extends ObjectMaterializerBase implements PageMaterializer { - public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { @Override public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { return new BlobMaterializer(dataReader, (Binary) nullValue, numValues); diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/BoolMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/BoolMaterializer.java index 89871fbf277..6e5b7f56994 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/BoolMaterializer.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/BoolMaterializer.java @@ -11,7 +11,7 @@ public class BoolMaterializer implements PageMaterializer { - public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { @Override public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { return new BoolMaterializer(dataReader, (byte) nullValue, numValues); diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ByteMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ByteMaterializer.java index 66477aed7c5..d110d91e564 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ByteMaterializer.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ByteMaterializer.java @@ -15,7 +15,7 @@ public class ByteMaterializer implements PageMaterializer { - public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { @Override public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { return new ByteMaterializer(dataReader, (byte) nullValue, numValues); diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/CharMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/CharMaterializer.java index ee752bdecd8..ea3a28b4586 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/CharMaterializer.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/CharMaterializer.java @@ -11,7 +11,7 @@ public class CharMaterializer implements PageMaterializer { - public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { @Override public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { return new CharMaterializer(dataReader, (char) nullValue, numValues); diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/DoubleMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/DoubleMaterializer.java index 2e373e20172..f5143679f1f 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/DoubleMaterializer.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/DoubleMaterializer.java @@ -15,7 +15,7 @@ public class DoubleMaterializer implements PageMaterializer { - public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { @Override public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { return new DoubleMaterializer(dataReader, (double) nullValue, numValues); diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/FloatMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/FloatMaterializer.java index fff9142984a..4e802b0b73b 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/FloatMaterializer.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/FloatMaterializer.java @@ -11,7 +11,7 @@ public class FloatMaterializer implements PageMaterializer { - public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { @Override public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { return new FloatMaterializer(dataReader, (float) nullValue, numValues); diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/InstantFromInt96Materializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/InstantNanosFromInt96Materializer.java similarity index 85% rename from extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/InstantFromInt96Materializer.java rename to extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/InstantNanosFromInt96Materializer.java index 4782303467d..eb5f731cc6e 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/InstantFromInt96Materializer.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/InstantNanosFromInt96Materializer.java @@ -18,17 +18,17 @@ * {@link PageMaterializer} implementation for {@link Instant Instants} stored as Int96s representing an Impala format * Timestamp (nanoseconds of day and Julian date encoded as 8 bytes and 4 bytes, respectively) */ -public class InstantFromInt96Materializer extends LongMaterializerBase implements PageMaterializer { +public class InstantNanosFromInt96Materializer extends LongMaterializerBase implements PageMaterializer { - public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { @Override public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { - return new InstantFromInt96Materializer(dataReader, (long) nullValue, numValues); + return new InstantNanosFromInt96Materializer(dataReader, (long) nullValue, numValues); } @Override public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { - return new InstantFromInt96Materializer(dataReader, numValues); + return new InstantNanosFromInt96Materializer(dataReader, numValues); } }; @@ -48,11 +48,11 @@ public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int num private final ValuesReader dataReader; - private InstantFromInt96Materializer(ValuesReader dataReader, int numValues) { + private InstantNanosFromInt96Materializer(ValuesReader dataReader, int numValues) { this(dataReader, 0, numValues); } - private InstantFromInt96Materializer(ValuesReader dataReader, long nullValue, int numValues) { + private InstantNanosFromInt96Materializer(ValuesReader dataReader, long nullValue, int numValues) { super(nullValue, numValues); this.dataReader = dataReader; } diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/InstantNanosFromMicrosMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/InstantNanosFromMicrosMaterializer.java index 751bf7ab1b0..97a25b6d6e0 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/InstantNanosFromMicrosMaterializer.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/InstantNanosFromMicrosMaterializer.java @@ -10,7 +10,7 @@ public class InstantNanosFromMicrosMaterializer extends LongMaterializerBase implements PageMaterializer { - public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { @Override public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { return new InstantNanosFromMicrosMaterializer(dataReader, (long) nullValue, numValues); diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/InstantNanosFromMillisMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/InstantNanosFromMillisMaterializer.java index 9e7907a0f82..ee08531571c 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/InstantNanosFromMillisMaterializer.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/InstantNanosFromMillisMaterializer.java @@ -14,7 +14,7 @@ public class InstantNanosFromMillisMaterializer extends LongMaterializerBase implements PageMaterializer { - public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { @Override public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { return new InstantNanosFromMillisMaterializer(dataReader, (long) nullValue, numValues); diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntMaterializer.java index 847ea3a3e18..97f47e23ebe 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntMaterializer.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntMaterializer.java @@ -15,7 +15,7 @@ public class IntMaterializer implements PageMaterializer { - public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { @Override public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { return new IntMaterializer(dataReader, (int) nullValue, numValues); diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalDateMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalDateMaterializer.java index 202e54d83cb..5cd7a76c298 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalDateMaterializer.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalDateMaterializer.java @@ -12,7 +12,7 @@ public class LocalDateMaterializer extends ObjectMaterializerBase implements PageMaterializer { - public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { @Override public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { return new LocalDateMaterializer(dataReader, (LocalDate) nullValue, numValues); diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalDateTimeFromMicrosMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalDateTimeFromMicrosMaterializer.java index 022586e6a73..e8b37cc889b 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalDateTimeFromMicrosMaterializer.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalDateTimeFromMicrosMaterializer.java @@ -17,7 +17,7 @@ public class LocalDateTimeFromMicrosMaterializer extends ObjectMaterializerBase implements PageMaterializer { - public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { @Override public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { return new LocalDateTimeFromMicrosMaterializer(dataReader, (LocalDateTime) nullValue, numValues); diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalDateTimeFromMillisMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalDateTimeFromMillisMaterializer.java index 7f2f5ec1f92..023be3cdd97 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalDateTimeFromMillisMaterializer.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalDateTimeFromMillisMaterializer.java @@ -13,7 +13,7 @@ public class LocalDateTimeFromMillisMaterializer extends ObjectMaterializerBase implements PageMaterializer { - public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { @Override public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { return new LocalDateTimeFromMillisMaterializer(dataReader, (LocalDateTime) nullValue, numValues); diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalDateTimeFromNanosMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalDateTimeFromNanosMaterializer.java index ae3f66bd813..9fd20c3b6ec 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalDateTimeFromNanosMaterializer.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalDateTimeFromNanosMaterializer.java @@ -17,7 +17,7 @@ public class LocalDateTimeFromNanosMaterializer extends ObjectMaterializerBase implements PageMaterializer { - public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { @Override public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { return new LocalDateTimeFromNanosMaterializer(dataReader, (LocalDateTime) nullValue, numValues); diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalTimeFromMicrosMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalTimeFromMicrosMaterializer.java index b276c808e4c..9067936bf5b 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalTimeFromMicrosMaterializer.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalTimeFromMicrosMaterializer.java @@ -12,7 +12,7 @@ public class LocalTimeFromMicrosMaterializer extends ObjectMaterializerBase implements PageMaterializer { - public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { @Override public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { return new LocalTimeFromMicrosMaterializer(dataReader, (LocalTime) nullValue, numValues); diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalTimeFromMillisMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalTimeFromMillisMaterializer.java index aed1d7e6308..aaf3aeaed4e 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalTimeFromMillisMaterializer.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalTimeFromMillisMaterializer.java @@ -16,7 +16,7 @@ public class LocalTimeFromMillisMaterializer extends ObjectMaterializerBase implements PageMaterializer { - public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { @Override public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { return new LocalTimeFromMillisMaterializer(dataReader, (LocalTime) nullValue, numValues); diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalTimeFromNanosMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalTimeFromNanosMaterializer.java index 75e707442c9..28f34528344 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalTimeFromNanosMaterializer.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LocalTimeFromNanosMaterializer.java @@ -16,7 +16,7 @@ public class LocalTimeFromNanosMaterializer extends ObjectMaterializerBase implements PageMaterializer { - public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { @Override public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { return new LocalTimeFromNanosMaterializer(dataReader, (LocalTime) nullValue, numValues); diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromUnsignedIntMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromUnsignedIntMaterializer.java index add4270f164..297d7ae6cbb 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromUnsignedIntMaterializer.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromUnsignedIntMaterializer.java @@ -9,7 +9,7 @@ public class LongFromUnsignedIntMaterializer extends LongMaterializerBase implements PageMaterializer { - public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { @Override public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { return new LongFromUnsignedIntMaterializer(dataReader, (long) nullValue, numValues); diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongMaterializer.java index 9f57f184ddb..f28bc8cdcc7 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongMaterializer.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongMaterializer.java @@ -9,7 +9,7 @@ public class LongMaterializer extends LongMaterializerBase implements PageMaterializer { - public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { @Override public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { return new LongMaterializer(dataReader, (long) nullValue, numValues); diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ObjectMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ObjectMaterializer.java new file mode 100644 index 00000000000..fe542b83c1b --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ObjectMaterializer.java @@ -0,0 +1,59 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.util.codec.ObjectCodec; +import org.apache.parquet.column.values.ValuesReader; + +import java.lang.reflect.Array; + +public class ObjectMaterializer extends ObjectMaterializerBase implements PageMaterializer { + + public static final class Factory implements PageMaterializerFactory { + + final ObjectCodec codec; + final Class nativeType; + + public Factory(ObjectCodec codec, final Class nativeType) { + this.codec = codec; + this.nativeType = nativeType; + } + + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + // noinspection unchecked + return new ObjectMaterializer<>(dataReader, (TYPE) nullValue, numValues, codec, nativeType); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new ObjectMaterializer<>(dataReader, numValues, codec, nativeType); + } + }; + + private final ValuesReader dataReader; + private final ObjectCodec codec; + + private ObjectMaterializer(ValuesReader dataReader, int numValues, final ObjectCodec codec, + final Class nativeType) { + this(dataReader, null, numValues, codec, nativeType); + } + + private ObjectMaterializer(ValuesReader dataReader, TYPE nullValue, int numValues, final ObjectCodec codec, + final Class nativeType) { + // noinspection unchecked + super(nullValue, (TYPE[]) Array.newInstance(nativeType, numValues)); + this.dataReader = dataReader; + this.codec = codec; + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = codec.decode(dataReader.readBytes().toByteBuffer()); + } + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortMaterializer.java index cff03881692..b3c5eae6d9a 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortMaterializer.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortMaterializer.java @@ -15,7 +15,7 @@ public class ShortMaterializer implements PageMaterializer { - public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { @Override public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { return new ShortMaterializer(dataReader, (short) nullValue, numValues); diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/StringMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/StringMaterializer.java index b594aeb351e..b179c5fb5e9 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/StringMaterializer.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/StringMaterializer.java @@ -9,7 +9,7 @@ public class StringMaterializer extends ObjectMaterializerBase implements PageMaterializer { - public static final PageMaterializerFactory Factory = new PageMaterializerFactory() { + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { @Override public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { return new StringMaterializer(dataReader, (String) nullValue, numValues); diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetSchemaReader.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetSchemaReader.java index 4a61cbef58a..dc99271414b 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetSchemaReader.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetSchemaReader.java @@ -35,6 +35,8 @@ import org.jetbrains.annotations.NotNull; import java.io.IOException; +import java.math.BigDecimal; +import java.math.BigInteger; import java.time.Instant; import java.time.LocalDate; import java.time.LocalDateTime; @@ -43,7 +45,6 @@ import java.util.function.BiFunction; import java.util.function.Supplier; -import static io.deephaven.parquet.base.PageMaterializer.resolveDecimalLogicalType; import static io.deephaven.parquet.base.ParquetUtils.METADATA_KEY; public class ParquetSchemaReader { @@ -402,7 +403,12 @@ public Optional> visit(final LogicalTypeAnnotation.EnumLogicalTypeAnnot @Override public Optional> visit( final LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) { - return Optional.of(resolveDecimalLogicalType(decimalLogicalType)); + // This pair of values (precision=1, scale=0) is set at write time as a marker so that we can recover + // the fact that the type is a BigInteger, not a BigDecimal when the fies are read. + if (decimalLogicalType.getPrecision() == 1 && decimalLogicalType.getScale() == 0) { + return Optional.of(BigInteger.class); + } + return Optional.of(BigDecimal.class); } @Override diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/TypeInfos.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/TypeInfos.java index eecfdde8d1c..36ca92c5116 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/TypeInfos.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/TypeInfos.java @@ -8,7 +8,6 @@ import io.deephaven.engine.table.ColumnSource; import io.deephaven.engine.table.impl.CodecLookup; import io.deephaven.engine.table.impl.dataindex.RowSetCodec; -import io.deephaven.parquet.base.PageMaterializer; import io.deephaven.stringset.StringSet; import io.deephaven.util.codec.ExternalizableCodec; import io.deephaven.util.codec.SerializableCodec; @@ -413,7 +412,7 @@ public PrimitiveBuilder getBuilderImpl(boolean required, boolean * external compatibility by encoding them as fixed length decimals of scale 1. Internally, we'll record that we * wrote this as a decimal, so we can properly decode it back to BigInteger. * - * @see PageMaterializer#resolveDecimalLogicalType + * @see ParquetSchemaReader */ private enum BigIntegerType implements TypeInfo { INSTANCE; diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/location/ParquetColumnLocation.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/location/ParquetColumnLocation.java index a131d8d8d06..4c6e53c4b7b 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/location/ParquetColumnLocation.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/location/ParquetColumnLocation.java @@ -379,7 +379,7 @@ private static ToPage makeToPage( toPage = ToLongPage.create(pageType); break; case INT96: - toPage = ToInstantPage.create(pageType); + toPage = ToInstantPage.createFromInt96(pageType); break; case DOUBLE: toPage = ToDoublePage.create(pageType); @@ -456,17 +456,37 @@ private static class LogicalTypeVisitor @Override public Optional> visit( final LogicalTypeAnnotation.StringLogicalTypeAnnotation stringLogicalType) { - return Optional - .of(ToStringPage.create(pageType, columnChunkReader.getDictionarySupplier())); + return Optional.of(ToStringPage.create(pageType, columnChunkReader.getDictionarySupplier())); } @Override public Optional> visit( final LogicalTypeAnnotation.TimestampLogicalTypeAnnotation timestampLogicalType) { if (timestampLogicalType.isAdjustedToUTC()) { - return Optional.of(ToInstantPage.create(pageType)); + // The column will be stored as nanoseconds elapsed since epoch as long values + switch (timestampLogicalType.getUnit()) { + case MILLIS: + return Optional.of(ToInstantPage.createFromMillis(pageType)); + case MICROS: + return Optional.of(ToInstantPage.createFromMicros(pageType)); + case NANOS: + return Optional.of(ToInstantPage.createFromNanos(pageType)); + default: + throw new IllegalArgumentException("Unsupported unit=" + timestampLogicalType.getUnit()); + } + } + // The column will be stored as as LocalDateTime + // Ref:https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#local-semantics-timestamps-not-normalized-to-utc + switch (timestampLogicalType.getUnit()) { + case MILLIS: + return Optional.of(ToLocalDateTimePage.createFromMillis(pageType)); + case MICROS: + return Optional.of(ToLocalDateTimePage.createFromMicros(pageType)); + case NANOS: + return Optional.of(ToLocalDateTimePage.createFromNanos(pageType)); + default: + throw new IllegalArgumentException("Unsupported unit=" + timestampLogicalType.getUnit()); } - return Optional.of(ToLocalDateTimePage.create(pageType)); } @Override @@ -488,7 +508,7 @@ private static class LogicalTypeVisitor case 16: return Optional.of(ToCharPage.create(pageType)); case 32: - return Optional.of(ToLongPage.create(pageType)); + return Optional.of(ToLongPage.createFromUnsignedInt(pageType)); } } return Optional.empty(); @@ -501,7 +521,17 @@ private static class LogicalTypeVisitor @Override public Optional> visit(final LogicalTypeAnnotation.TimeLogicalTypeAnnotation timeLogicalType) { - return Optional.of(ToLocalTimePage.create(pageType)); + // isAdjustedToUTC parameter is ignored while reading LocalTime from Parquet files + switch (timeLogicalType.getUnit()) { + case MILLIS: + return Optional.of(ToLocalTimePage.createFromMillis(pageType)); + case MICROS: + return Optional.of(ToLocalTimePage.createFromMicros(pageType)); + case NANOS: + return Optional.of(ToLocalTimePage.createFromNanos(pageType)); + default: + throw new IllegalArgumentException("Unsupported unit=" + timeLogicalType.getUnit()); + } } @Override @@ -509,28 +539,37 @@ private static class LogicalTypeVisitor final LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) { final PrimitiveType type = columnChunkReader.getType(); final PrimitiveType.PrimitiveTypeName typeName = type.getPrimitiveTypeName(); - final int encodedSizeInBytes = typeName == BINARY ? -1 : type.getTypeLength(); - if (BigDecimal.class.equals(pageType)) { - final int precision = decimalLogicalType.getPrecision(); - final int scale = decimalLogicalType.getScale(); - try { - verifyPrecisionAndScale(precision, scale, typeName); - } catch (final IllegalArgumentException exception) { - throw new TableDataException( - "Invalid scale and precision for column " + name + ": " + exception.getMessage()); - } - return Optional.of(ToBigDecimalPage.create( - pageType, - new BigDecimalParquetBytesCodec(precision, scale, encodedSizeInBytes), - columnChunkReader.getDictionarySupplier())); - } else if (BigInteger.class.equals(pageType)) { - return Optional.of(ToBigIntegerPage.create( - pageType, - new BigIntegerParquetBytesCodec(encodedSizeInBytes), - columnChunkReader.getDictionarySupplier())); + switch (typeName) { + case INT32: + return Optional.of(ToBigDecimalFromNumeric.createFromInt(pageType, decimalLogicalType.getScale())); + case INT64: + return Optional.of(ToBigDecimalFromNumeric.createFromLong(pageType, decimalLogicalType.getScale())); + case FIXED_LEN_BYTE_ARRAY: // fall through + case BINARY: + final int encodedSizeInBytes = typeName == BINARY ? -1 : type.getTypeLength(); + if (BigDecimal.class.equals(pageType)) { + final int precision = decimalLogicalType.getPrecision(); + final int scale = decimalLogicalType.getScale(); + try { + verifyPrecisionAndScale(precision, scale, typeName); + } catch (final IllegalArgumentException exception) { + throw new TableDataException( + "Invalid scale and precision for column " + name + ": " + exception.getMessage()); + } + return Optional.of(ToBigDecimalPage.create( + pageType, + new BigDecimalParquetBytesCodec(precision, scale, encodedSizeInBytes), + columnChunkReader.getDictionarySupplier())); + } else if (BigInteger.class.equals(pageType)) { + return Optional.of(ToBigIntegerPage.create( + pageType, + new BigIntegerParquetBytesCodec(encodedSizeInBytes), + columnChunkReader.getDictionarySupplier())); + } + // We won't blow up here, Maybe someone will provide us a codec instead. + default: + return Optional.empty(); } - // We won't blow up here, Maybe someone will provide us a codec instead. - return Optional.empty(); } } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/ColumnChunkPageStore.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/ColumnChunkPageStore.java index ba3ca21a328..daed8692a55 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/ColumnChunkPageStore.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/ColumnChunkPageStore.java @@ -39,7 +39,7 @@ public abstract class ColumnChunkPageStore final PageCache pageCache; final ColumnChunkReader columnChunkReader; private final long mask; - private final ToPage toPage; + final ToPage toPage; private final long numRows; @@ -108,8 +108,7 @@ public static CreatorResult create( final ColumnChunkPageStore columnChunkPageStore = canUseOffsetIndex ? new OffsetIndexBasedColumnChunkPageStore<>(pageCache, columnChunkReader, mask, toPage) : new VariablePageSizeColumnChunkPageStore<>(pageCache, columnChunkReader, mask, toPage); - final ToPage dictionaryKeysToPage = - toPage.getDictionaryKeysToPage(); + final ToPage dictionaryKeysToPage = toPage.getDictionaryKeysToPage(); final ColumnChunkPageStore dictionaryKeysColumnChunkPageStore = dictionaryKeysToPage == null ? null : canUseOffsetIndex diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/OffsetIndexBasedColumnChunkPageStore.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/OffsetIndexBasedColumnChunkPageStore.java index a4721ca2384..30a18dd00f4 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/OffsetIndexBasedColumnChunkPageStore.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/OffsetIndexBasedColumnChunkPageStore.java @@ -80,7 +80,8 @@ private void ensureInitialized(@Nullable final FillContext fillContext) { numPages = offsetIndex.getPageCount(); Assert.gtZero(numPages, "numPages"); pageStates = new AtomicReferenceArray<>(numPages); - columnPageDirectAccessor = columnChunkReader.getPageAccessor(offsetIndex); + columnPageDirectAccessor = + columnChunkReader.getPageAccessor(offsetIndex, toPage.getPageMaterializerFactory()); if (numPages == 1) { fixedPageSize = numRows(); diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/VariablePageSizeColumnChunkPageStore.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/VariablePageSizeColumnChunkPageStore.java index 0a0ea268a21..55af02eb5af 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/VariablePageSizeColumnChunkPageStore.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/VariablePageSizeColumnChunkPageStore.java @@ -47,7 +47,7 @@ final class VariablePageSizeColumnChunkPageStore extends Colum columnPageReaders = new ColumnPageReader[INIT_ARRAY_SIZE]; // TODO(deephaven-core#4836): We probably need a super-interface of Iterator to allow ourselves to set or clear // the inner fill context to be used by next. - columnPageReaderIterator = columnChunkReader.getPageIterator(); + columnPageReaderIterator = columnChunkReader.getPageIterator(toPage.getPageMaterializerFactory()); // noinspection unchecked pages = (WeakReference>[]) new WeakReference[INIT_ARRAY_SIZE]; diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBigDecimalFromNumeric.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBigDecimalFromNumeric.java new file mode 100644 index 00000000000..0cb3ae7477d --- /dev/null +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBigDecimalFromNumeric.java @@ -0,0 +1,58 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.parquet.table.pagestore.topage; + +import io.deephaven.chunk.ChunkType; +import io.deephaven.chunk.attributes.Any; +import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.parquet.base.materializers.BigDecimalFromIntMaterializer; +import io.deephaven.parquet.base.materializers.BigDecimalFromLongMaterializer; +import org.jetbrains.annotations.NotNull; + +import java.math.BigDecimal; + +public class ToBigDecimalFromNumeric implements ToPage { + + public static ToPage createFromInt( + @NotNull final Class nativeType, + final int scale) { + return new ToBigDecimalFromNumeric<>(nativeType, new BigDecimalFromIntMaterializer.Factory(scale)); + } + + public static ToPage createFromLong( + @NotNull final Class nativeType, + final int scale) { + return new ToBigDecimalFromNumeric<>(nativeType, new BigDecimalFromLongMaterializer.Factory(scale)); + } + + private final PageMaterializerFactory pageMaterializerFactory; + + private ToBigDecimalFromNumeric( + @NotNull final Class nativeType, + @NotNull final PageMaterializerFactory pageMaterializerFactory) { + if (!BigDecimal.class.equals(nativeType)) { + throw new IllegalArgumentException( + "The native type for a BigDecimal column is " + nativeType.getCanonicalName()); + } + this.pageMaterializerFactory = pageMaterializerFactory; + } + + @NotNull + @Override + public final Class getNativeType() { + return BigDecimal.class; + } + + @Override + @NotNull + public final ChunkType getChunkType() { + return ChunkType.Object; + } + + @Override + @NotNull + public final PageMaterializerFactory getPageMaterializerFactory() { + return pageMaterializerFactory; + } +} diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBigDecimalPage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBigDecimalPage.java index 0e60264aa3e..07d3f11e999 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBigDecimalPage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBigDecimalPage.java @@ -9,7 +9,8 @@ import io.deephaven.chunk.ChunkType; import io.deephaven.chunk.attributes.Any; -import io.deephaven.parquet.base.BigDecimalParquetBytesCodec; +import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.parquet.base.materializers.BigDecimalFromBytesMaterializer; import io.deephaven.util.channel.SeekableChannelContext; import io.deephaven.util.codec.ObjectCodec; import org.apache.parquet.column.Dictionary; @@ -20,7 +21,7 @@ public class ToBigDecimalPage implements ToPage { - private static final ToBigDecimalPage INSTANCE = new ToBigDecimalPage<>(); + private final PageMaterializerFactory pageMaterializerFactory; public static ToPage create( final Class nativeType, @@ -28,8 +29,7 @@ public static ToPage create( final Function dictionarySupplier) { if (nativeType == null || BigDecimal.class.equals(nativeType)) { if (dictionarySupplier == null) { - // noinspection unchecked - return (ToPage) INSTANCE; + return new ToBigDecimalPage<>(codec); } // Note that dictionary supplier is never null, even if it points to a NULL_DICTIONARY. // So we always use the following dictionary version of ToPage but internally, we check if the dictionary is @@ -37,18 +37,18 @@ public static ToPage create( return new ToPageWithDictionary<>( BigDecimal.class, new ChunkDictionary<>( - (dictionary, key) -> { - final byte[] bytes = dictionary.decodeToBinary(key).getBytes(); - return codec.decode(bytes, 0, bytes.length); - }, + (dictionary, key) -> codec.decode(dictionary.decodeToBinary(key).toByteBuffer()), dictionarySupplier), - INSTANCE::convertResult); + (final Object result) -> (BigDecimal[]) result, + new BigDecimalFromBytesMaterializer.Factory(codec)); } throw new IllegalArgumentException( "The native type for a BigDecimal column is " + nativeType.getCanonicalName()); } - private ToBigDecimalPage() {} + private ToBigDecimalPage(@NotNull final ObjectCodec codec) { + pageMaterializerFactory = new BigDecimalFromBytesMaterializer.Factory(codec); + } @Override @NotNull @@ -61,4 +61,10 @@ public final Class getNativeType() { public final ChunkType getChunkType() { return ChunkType.Object; } + + @Override + @NotNull + public final PageMaterializerFactory getPageMaterializerFactory() { + return pageMaterializerFactory; + } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBigIntegerPage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBigIntegerPage.java index 7b38928009d..9597eafb5c7 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBigIntegerPage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBigIntegerPage.java @@ -5,7 +5,8 @@ import io.deephaven.chunk.ChunkType; import io.deephaven.chunk.attributes.Any; -import io.deephaven.parquet.base.BigIntegerParquetBytesCodec; +import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.parquet.base.materializers.BigIntegerMaterializer; import io.deephaven.util.channel.SeekableChannelContext; import io.deephaven.util.codec.ObjectCodec; import org.apache.parquet.column.Dictionary; @@ -16,7 +17,7 @@ public class ToBigIntegerPage implements ToPage { - private static final ToBigIntegerPage INSTANCE = new ToBigIntegerPage<>(); + private final PageMaterializerFactory pageMaterializerFactory; public static ToPage create( final Class nativeType, @@ -24,8 +25,7 @@ public static ToPage create( final Function dictionarySupplier) { if (nativeType == null || BigInteger.class.equals(nativeType)) { if (dictionarySupplier == null) { - // noinspection unchecked - return (ToPage) INSTANCE; + return new ToBigIntegerPage<>(codec); } // Note that dictionary supplier is never null, even if it points to a NULL_DICTIONARY. // So we always use the following dictionary version of ToPage but internally, we check if the dictionary is @@ -33,18 +33,18 @@ public static ToPage create( return new ToPageWithDictionary<>( BigInteger.class, new ChunkDictionary<>( - (dictionary, key) -> { - final byte[] bytes = dictionary.decodeToBinary(key).getBytes(); - return codec.decode(bytes, 0, bytes.length); - }, + (dictionary, key) -> codec.decode(dictionary.decodeToBinary(key).toByteBuffer()), dictionarySupplier), - INSTANCE::convertResult); + (final Object result) -> (BigInteger[]) result, + new BigIntegerMaterializer.Factory(codec)); } throw new IllegalArgumentException( "The native type for a BigInteger column is " + nativeType.getCanonicalName()); } - private ToBigIntegerPage() {} + private ToBigIntegerPage(@NotNull final ObjectCodec codec) { + pageMaterializerFactory = new BigIntegerMaterializer.Factory(codec); + } @Override @NotNull @@ -57,4 +57,10 @@ public final Class getNativeType() { public final ChunkType getChunkType() { return ChunkType.Object; } + + @Override + @NotNull + public final PageMaterializerFactory getPageMaterializerFactory() { + return pageMaterializerFactory; + } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBooleanAsBytePage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBooleanAsBytePage.java index f7e13a3e036..8be732df7a2 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBooleanAsBytePage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBooleanAsBytePage.java @@ -4,6 +4,8 @@ package io.deephaven.parquet.table.pagestore.topage; import io.deephaven.chunk.attributes.Any; +import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.parquet.base.materializers.BoolMaterializer; import io.deephaven.vector.ObjectVector; import io.deephaven.vector.ObjectVectorDirect; import io.deephaven.util.BooleanUtils; @@ -14,6 +16,7 @@ public class ToBooleanAsBytePage implements ToPage { + @SuppressWarnings("rawtypes") private static final ToBooleanAsBytePage INSTANCE = new ToBooleanAsBytePage<>(); public static ToBooleanAsBytePage create(Class nativeType) { @@ -51,6 +54,11 @@ public final Object nullValue() { return NULL_BOOLEAN_AS_BYTE_BOXED; } + @Override + public final PageMaterializerFactory getPageMaterializerFactory() { + return BoolMaterializer.FACTORY; + } + @Override @NotNull public ObjectVector makeVector(byte[] result) { diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBytePage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBytePage.java index ff24ec4aeda..f63d9f2ebcb 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBytePage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBytePage.java @@ -9,12 +9,15 @@ import io.deephaven.chunk.ChunkType; import io.deephaven.chunk.attributes.Any; +import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.parquet.base.materializers.ByteMaterializer; import org.jetbrains.annotations.NotNull; import static io.deephaven.util.QueryConstants.NULL_BYTE_BOXED; public class ToBytePage implements ToPage { + @SuppressWarnings("rawtypes") private static final ToBytePage INSTANCE = new ToBytePage<>(); public static ToBytePage create(Class nativeType) { @@ -45,4 +48,10 @@ public final ChunkType getChunkType() { public final Object nullValue() { return NULL_BYTE_BOXED; } + + @Override + @NotNull + public final PageMaterializerFactory getPageMaterializerFactory() { + return ByteMaterializer.FACTORY; + } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToCharPage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToCharPage.java index 45f13abec94..3fbccc9eb95 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToCharPage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToCharPage.java @@ -9,12 +9,15 @@ import io.deephaven.chunk.ChunkType; import io.deephaven.chunk.attributes.Any; +import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.parquet.base.materializers.CharMaterializer; import org.jetbrains.annotations.NotNull; import static io.deephaven.util.QueryConstants.NULL_CHAR_BOXED; public class ToCharPage implements ToPage { + @SuppressWarnings("rawtypes") private static final ToCharPage INSTANCE = new ToCharPage<>(); public static ToCharPage create(Class nativeType) { @@ -45,4 +48,10 @@ public final ChunkType getChunkType() { public final Object nullValue() { return NULL_CHAR_BOXED; } + + @Override + @NotNull + public final PageMaterializerFactory getPageMaterializerFactory() { + return CharMaterializer.FACTORY; + } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToDoublePage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToDoublePage.java index 3a68addb35e..f5f450f4753 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToDoublePage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToDoublePage.java @@ -9,12 +9,15 @@ import io.deephaven.chunk.ChunkType; import io.deephaven.chunk.attributes.Any; +import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.parquet.base.materializers.DoubleMaterializer; import org.jetbrains.annotations.NotNull; import static io.deephaven.util.QueryConstants.NULL_DOUBLE_BOXED; public class ToDoublePage implements ToPage { + @SuppressWarnings("rawtypes") private static final ToDoublePage INSTANCE = new ToDoublePage<>(); public static ToDoublePage create(Class nativeType) { @@ -45,4 +48,10 @@ public final ChunkType getChunkType() { public final Object nullValue() { return NULL_DOUBLE_BOXED; } + + @Override + @NotNull + public final PageMaterializerFactory getPageMaterializerFactory() { + return DoubleMaterializer.FACTORY; + } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToFloatPage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToFloatPage.java index 3801e9ae21d..64ef1bd83ed 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToFloatPage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToFloatPage.java @@ -9,12 +9,15 @@ import io.deephaven.chunk.ChunkType; import io.deephaven.chunk.attributes.Any; +import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.parquet.base.materializers.FloatMaterializer; import org.jetbrains.annotations.NotNull; import static io.deephaven.util.QueryConstants.NULL_FLOAT_BOXED; public class ToFloatPage implements ToPage { + @SuppressWarnings("rawtypes") private static final ToFloatPage INSTANCE = new ToFloatPage<>(); public static ToFloatPage create(Class nativeType) { @@ -45,4 +48,10 @@ public final ChunkType getChunkType() { public final Object nullValue() { return NULL_FLOAT_BOXED; } + + @Override + @NotNull + public final PageMaterializerFactory getPageMaterializerFactory() { + return FloatMaterializer.FACTORY; + } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToInstantPage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToInstantPage.java index 2a961f688fe..ed53a510dc7 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToInstantPage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToInstantPage.java @@ -5,6 +5,11 @@ import io.deephaven.chunk.ChunkType; import io.deephaven.chunk.attributes.Any; +import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.parquet.base.materializers.InstantNanosFromInt96Materializer; +import io.deephaven.parquet.base.materializers.InstantNanosFromMicrosMaterializer; +import io.deephaven.parquet.base.materializers.InstantNanosFromMillisMaterializer; +import io.deephaven.parquet.base.materializers.LongMaterializer; import io.deephaven.time.DateTimeUtils; import io.deephaven.vector.ObjectVector; import io.deephaven.vector.ObjectVectorDirect; @@ -16,20 +21,51 @@ public class ToInstantPage implements ToPage { - private static final ToInstantPage INSTANCE = new ToInstantPage<>(); + public static ToPage createFromMillis(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_MILLIS; + } - @SuppressWarnings("unchecked") - public static ToPage create(final Class nativeType) { - if (nativeType == null || Instant.class.equals(nativeType)) { - // noinspection unchecked - return INSTANCE; - } + public static ToPage createFromMicros(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_MICROS; + } + + public static ToPage createFromNanos(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_NANOS; + } - throw new IllegalArgumentException( - "The native type for an Instant column is " + nativeType.getCanonicalName()); + public static ToPage createFromInt96(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_INT96; } - private ToInstantPage() {} + @SuppressWarnings("rawtypes") + private static final ToPage FROM_MILLIS = new ToInstantPage<>(InstantNanosFromMillisMaterializer.FACTORY); + @SuppressWarnings("rawtypes") + private static final ToPage FROM_MICROS = new ToInstantPage<>(InstantNanosFromMicrosMaterializer.FACTORY); + @SuppressWarnings("rawtypes") + private static final ToPage FROM_NANOS = new ToInstantPage<>(LongMaterializer.FACTORY); + @SuppressWarnings("rawtypes") + private static final ToPage FROM_INT96 = new ToInstantPage<>(InstantNanosFromInt96Materializer.FACTORY); + + private static void verifyNativeType(final Class nativeType) { + if (nativeType != null && !Instant.class.equals(nativeType)) { + throw new IllegalArgumentException( + "The native type for an Instant column is " + nativeType.getCanonicalName()); + } + } + + private final PageMaterializerFactory pageMaterializerFactory; + + private ToInstantPage(@NotNull final PageMaterializerFactory pageMaterializerFactory) { + this.pageMaterializerFactory = pageMaterializerFactory; + } @Override @NotNull @@ -55,10 +91,16 @@ public final Object nullValue() { return NULL_LONG_BOXED; } + @Override + @NotNull + public final PageMaterializerFactory getPageMaterializerFactory() { + return pageMaterializerFactory; + } + @Override @NotNull public ObjectVector makeVector(long[] result) { - Instant[] to = new Instant[result.length]; + final Instant[] to = new Instant[result.length]; for (int i = 0; i < result.length; ++i) { to[i] = DateTimeUtils.epochNanosToInstant(result[i]); diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToIntPage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToIntPage.java index 845ea12ec9a..3217d95c0f5 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToIntPage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToIntPage.java @@ -5,12 +5,15 @@ import io.deephaven.chunk.ChunkType; import io.deephaven.chunk.attributes.Any; +import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.parquet.base.materializers.IntMaterializer; import org.jetbrains.annotations.NotNull; import static io.deephaven.util.QueryConstants.NULL_INT_BOXED; public class ToIntPage implements ToPage { + @SuppressWarnings("rawtypes") private static final ToIntPage INSTANCE = new ToIntPage<>(); public static ToIntPage create(Class nativeType) { @@ -41,4 +44,10 @@ public final ChunkType getChunkType() { public final Object nullValue() { return NULL_INT_BOXED; } + + @Override + @NotNull + public final PageMaterializerFactory getPageMaterializerFactory() { + return IntMaterializer.FACTORY; + } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLocalDatePage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLocalDatePage.java index a2747a8d78e..96dce0d8bfd 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLocalDatePage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLocalDatePage.java @@ -5,12 +5,15 @@ import io.deephaven.chunk.ChunkType; import io.deephaven.chunk.attributes.Any; +import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.parquet.base.materializers.LocalDateMaterializer; import org.jetbrains.annotations.NotNull; import java.time.LocalDate; public class ToLocalDatePage implements ToPage { + @SuppressWarnings("rawtypes") private static final ToLocalDatePage INSTANCE = new ToLocalDatePage<>(); public static ToLocalDatePage create(final Class nativeType) { @@ -35,4 +38,10 @@ public final Class getNativeType() { public final ChunkType getChunkType() { return ChunkType.Object; } + + @Override + @NotNull + public final PageMaterializerFactory getPageMaterializerFactory() { + return LocalDateMaterializer.FACTORY; + } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLocalDateTimePage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLocalDateTimePage.java index 97f8ef5842c..3e78bb797dd 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLocalDateTimePage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLocalDateTimePage.java @@ -1,32 +1,57 @@ // // Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending // -// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY -// ****** Edit ToLocalDatePage and run "./gradlew replicateToPage" to regenerate -// -// @formatter:off package io.deephaven.parquet.table.pagestore.topage; import io.deephaven.chunk.ChunkType; import io.deephaven.chunk.attributes.Any; +import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.parquet.base.materializers.LocalDateTimeFromMillisMaterializer; +import io.deephaven.parquet.base.materializers.LocalDateTimeFromMicrosMaterializer; +import io.deephaven.parquet.base.materializers.LocalDateTimeFromNanosMaterializer; import org.jetbrains.annotations.NotNull; import java.time.LocalDateTime; public class ToLocalDateTimePage implements ToPage { - private static final ToLocalDateTimePage INSTANCE = new ToLocalDateTimePage<>(); + public static ToPage createFromMillis(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_MILLIS; + } + + public static ToPage createFromMicros(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_MICROS; + } + + public static ToPage createFromNanos(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_NANOS; + } + + @SuppressWarnings("rawtypes") + private static final ToPage FROM_MILLIS = new ToLocalDateTimePage<>(LocalDateTimeFromMillisMaterializer.FACTORY); + @SuppressWarnings("rawtypes") + private static final ToPage FROM_MICROS = new ToLocalDateTimePage<>(LocalDateTimeFromMicrosMaterializer.FACTORY); + @SuppressWarnings("rawtypes") + private static final ToPage FROM_NANOS = new ToLocalDateTimePage<>(LocalDateTimeFromNanosMaterializer.FACTORY); - public static ToLocalDateTimePage create(final Class nativeType) { - if (nativeType == null || LocalDateTime.class.equals(nativeType)) { - // noinspection unchecked - return INSTANCE; + private static void verifyNativeType(final Class nativeType) { + if (nativeType != null && !LocalDateTime.class.equals(nativeType)) { + throw new IllegalArgumentException( + "The native type for a LocalDateTime column is " + nativeType.getCanonicalName()); } - throw new IllegalArgumentException( - "The native type for a LocalDateTime column is " + nativeType.getCanonicalName()); } - private ToLocalDateTimePage() {} + private final PageMaterializerFactory pageMaterializerFactory; + + private ToLocalDateTimePage(@NotNull final PageMaterializerFactory pageMaterializerFactory) { + this.pageMaterializerFactory = pageMaterializerFactory; + } @Override @NotNull @@ -39,4 +64,10 @@ public final Class getNativeType() { public final ChunkType getChunkType() { return ChunkType.Object; } + + @Override + @NotNull + public final PageMaterializerFactory getPageMaterializerFactory() { + return pageMaterializerFactory; + } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLocalTimePage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLocalTimePage.java index 2c6fa645e27..265b277ff37 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLocalTimePage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLocalTimePage.java @@ -2,31 +2,60 @@ // Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending // // ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY -// ****** Edit ToLocalDatePage and run "./gradlew replicateToPage" to regenerate +// ****** Edit ToLocalDateTimePage and run "./gradlew replicateToPage" to regenerate // // @formatter:off package io.deephaven.parquet.table.pagestore.topage; import io.deephaven.chunk.ChunkType; import io.deephaven.chunk.attributes.Any; +import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.parquet.base.materializers.LocalTimeFromMillisMaterializer; +import io.deephaven.parquet.base.materializers.LocalTimeFromMicrosMaterializer; +import io.deephaven.parquet.base.materializers.LocalTimeFromNanosMaterializer; import org.jetbrains.annotations.NotNull; import java.time.LocalTime; public class ToLocalTimePage implements ToPage { - private static final ToLocalTimePage INSTANCE = new ToLocalTimePage<>(); + public static ToPage createFromMillis(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_MILLIS; + } + + public static ToPage createFromMicros(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_MICROS; + } + + public static ToPage createFromNanos(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_NANOS; + } + + @SuppressWarnings("rawtypes") + private static final ToPage FROM_MILLIS = new ToLocalTimePage<>(LocalTimeFromMillisMaterializer.FACTORY); + @SuppressWarnings("rawtypes") + private static final ToPage FROM_MICROS = new ToLocalTimePage<>(LocalTimeFromMicrosMaterializer.FACTORY); + @SuppressWarnings("rawtypes") + private static final ToPage FROM_NANOS = new ToLocalTimePage<>(LocalTimeFromNanosMaterializer.FACTORY); - public static ToLocalTimePage create(final Class nativeType) { - if (nativeType == null || LocalTime.class.equals(nativeType)) { - // noinspection unchecked - return INSTANCE; + private static void verifyNativeType(final Class nativeType) { + if (nativeType != null && !LocalTime.class.equals(nativeType)) { + throw new IllegalArgumentException( + "The native type for a LocalTime column is " + nativeType.getCanonicalName()); } - throw new IllegalArgumentException( - "The native type for a LocalTime column is " + nativeType.getCanonicalName()); } - private ToLocalTimePage() {} + private final PageMaterializerFactory pageMaterializerFactory; + + private ToLocalTimePage(@NotNull final PageMaterializerFactory pageMaterializerFactory) { + this.pageMaterializerFactory = pageMaterializerFactory; + } @Override @NotNull @@ -39,4 +68,10 @@ public final Class getNativeType() { public final ChunkType getChunkType() { return ChunkType.Object; } + + @Override + @NotNull + public final PageMaterializerFactory getPageMaterializerFactory() { + return pageMaterializerFactory; + } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLongPage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLongPage.java index 51f77f9ed24..ca86ced2540 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLongPage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLongPage.java @@ -1,32 +1,48 @@ // // Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending // -// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY -// ****** Edit ToIntPage and run "./gradlew replicateToPage" to regenerate -// -// @formatter:off package io.deephaven.parquet.table.pagestore.topage; import io.deephaven.chunk.ChunkType; import io.deephaven.chunk.attributes.Any; +import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.parquet.base.materializers.LongFromUnsignedIntMaterializer; +import io.deephaven.parquet.base.materializers.LongMaterializer; import org.jetbrains.annotations.NotNull; import static io.deephaven.util.QueryConstants.NULL_LONG_BOXED; public class ToLongPage implements ToPage { - private static final ToLongPage INSTANCE = new ToLongPage<>(); + public static ToLongPage create(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_LONG; + } + + public static ToLongPage createFromUnsignedInt(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_UNSIGNED_INT; + } + + @SuppressWarnings("rawtypes") + private static final ToLongPage FROM_LONG = new ToLongPage<>(LongMaterializer.FACTORY); + @SuppressWarnings("rawtypes") + private static final ToLongPage FROM_UNSIGNED_INT = new ToLongPage<>(LongFromUnsignedIntMaterializer.FACTORY); - public static ToLongPage create(Class nativeType) { + private static void verifyNativeType(final Class nativeType) { if (nativeType == null || long.class.equals(nativeType)) { - // noinspection unchecked - return INSTANCE; + return; } - throw new IllegalArgumentException("The native type for a Long column is " + nativeType.getCanonicalName()); } - private ToLongPage() {} + private final PageMaterializerFactory pageMaterializerFactory; + + private ToLongPage(@NotNull final PageMaterializerFactory pageMaterializerFactory) { + this.pageMaterializerFactory = pageMaterializerFactory; + } @Override @NotNull @@ -45,4 +61,10 @@ public final ChunkType getChunkType() { public final Object nullValue() { return NULL_LONG_BOXED; } + + @Override + @NotNull + public final PageMaterializerFactory getPageMaterializerFactory() { + return pageMaterializerFactory; + } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToObjectPage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToObjectPage.java index e13c7b9fe7f..d03bc420641 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToObjectPage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToObjectPage.java @@ -5,35 +5,39 @@ import io.deephaven.chunk.attributes.Any; import io.deephaven.chunk.ChunkType; +import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.parquet.base.materializers.ObjectMaterializer; import io.deephaven.util.channel.SeekableChannelContext; import io.deephaven.util.codec.ObjectCodec; import org.apache.parquet.column.Dictionary; -import org.apache.parquet.io.api.Binary; import org.jetbrains.annotations.NotNull; -import java.lang.reflect.Array; import java.util.function.Function; public class ToObjectPage implements ToPage { + private final PageMaterializerFactory pageMaterializerFactory; private final Class nativeType; - private final ObjectCodec codec; public static ToPage create( final Class nativeType, @NotNull final ObjectCodec codec, final Function dictionarySupplier) { if (!nativeType.isPrimitive()) { - return dictionarySupplier == null ? new ToObjectPage<>(nativeType, codec) - : new ToPageWithDictionary<>( - nativeType, - new ChunkDictionary<>( - (dictionary, key) -> { - final byte[] bytes = dictionary.decodeToBinary(key).getBytes(); - return codec.decode(bytes, 0, bytes.length); - }, - dictionarySupplier), - (final Object result) -> convertResult(nativeType, codec, result)); + if (dictionarySupplier == null) { + return new ToObjectPage<>(nativeType, codec); + } + // Note that dictionary supplier is never null, even if it points to a NULL_DICTIONARY. + // So we always use the following dictionary version of ToPage but internally, we check if the dictionary is + // NULL and fall back to the default implementation. + // noinspection unchecked + return new ToPageWithDictionary<>( + nativeType, + new ChunkDictionary<>( + (dictionary, key) -> codec.decode(dictionary.decodeToBinary(key).toByteBuffer()), + dictionarySupplier), + (final Object result) -> (T[]) result, + new ObjectMaterializer.Factory<>(codec, nativeType)); } throw new IllegalArgumentException("The native type for a Object column is " + nativeType.getCanonicalName()); @@ -41,7 +45,7 @@ public static ToPage create( private ToObjectPage(Class nativeType, ObjectCodec codec) { this.nativeType = nativeType; - this.codec = codec; + this.pageMaterializerFactory = new ObjectMaterializer.Factory<>(codec, nativeType); } @Override @@ -58,23 +62,7 @@ public final ChunkType getChunkType() { @Override @NotNull - public final T[] convertResult(Object result) { - return convertResult(nativeType, codec, result); - } - - private static T2[] convertResult(final Class nativeType, final ObjectCodec codec, - final Object result) { - Binary[] from = (Binary[]) result; - // noinspection unchecked - T2[] to = (T2[]) Array.newInstance(nativeType, from.length); - - for (int ri = 0; ri < to.length; ++ri) { - if (from[ri] != null) { - byte[] bytes = from[ri].getBytes(); - to[ri] = codec.decode(bytes, 0, bytes.length); - } - } - - return to; + public final PageMaterializerFactory getPageMaterializerFactory() { + return pageMaterializerFactory; } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToPage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToPage.java index c8bf4476115..0db2710f488 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToPage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToPage.java @@ -6,6 +6,7 @@ import io.deephaven.chunk.attributes.Any; import io.deephaven.engine.page.ChunkPageFactory; import io.deephaven.engine.table.impl.chunkattributes.DictionaryKeys; +import io.deephaven.parquet.base.PageMaterializerFactory; import io.deephaven.util.channel.SeekableChannelContext; import io.deephaven.vector.Vector; import io.deephaven.engine.page.ChunkPage; @@ -105,13 +106,18 @@ default ToPage getDictionaryKeysToPage() { } /** - * @return an reverse lookup map of the dictionary. + * @return a reverse lookup map of the dictionary. * @apiNote null iff {@link #getDictionaryChunk()} is null. */ default LongBitmapStringSet.ReversibleLookup getReversibleLookup() { return null; } + /** + * @return the factory to create the materializers for this column. + */ + PageMaterializerFactory getPageMaterializerFactory(); + abstract class Wrap implements ToPage { @@ -150,5 +156,10 @@ public ToPage getDictionaryKeysToPage() { public LongBitmapStringSet.ReversibleLookup getReversibleLookup() { return toPage.getReversibleLookup(); } + + @Override + public PageMaterializerFactory getPageMaterializerFactory() { + return toPage.getPageMaterializerFactory(); + } } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToPageWithDictionary.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToPageWithDictionary.java index 94cb1ccc415..8797282c29d 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToPageWithDictionary.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToPageWithDictionary.java @@ -3,6 +3,7 @@ // package io.deephaven.parquet.table.pagestore.topage; +import io.deephaven.parquet.base.PageMaterializerFactory; import io.deephaven.util.channel.SeekableChannelContext; import io.deephaven.stringset.LongBitmapStringSet; import io.deephaven.chunk.attributes.Any; @@ -28,14 +29,17 @@ public class ToPageWithDictionary private final Class nativeType; private final ChunkDictionary chunkDictionary; private final Function convertResultFallbackFun; + private final PageMaterializerFactory pageMaterializerFactory; ToPageWithDictionary( @NotNull final Class nativeType, @NotNull final ChunkDictionary chunkDictionary, - @NotNull final Function convertResultFallbackFun) { + @NotNull final Function convertResultFallbackFun, + @NotNull final PageMaterializerFactory pageMaterializerFactory) { this.nativeType = nativeType; this.chunkDictionary = chunkDictionary; this.convertResultFallbackFun = convertResultFallbackFun; + this.pageMaterializerFactory = pageMaterializerFactory; } @Override @@ -50,6 +54,12 @@ public final ChunkType getChunkType() { return ChunkType.Object; } + @Override + @NotNull + public final PageMaterializerFactory getPageMaterializerFactory() { + return pageMaterializerFactory; + } + @Override @NotNull public final Object getResult(@NotNull final ColumnPageReader columnPageReader, @@ -115,6 +125,14 @@ public Object nullValue() { return NULL_INT; } + @Override + @NotNull + public PageMaterializerFactory getPageMaterializerFactory() { + // This factory should not be used for materializing any pages. + // The factory used for reading dictionary keys is provided inside ColumnPageReader#readKeyValues + return PageMaterializerFactory.NULL_FACTORY; + } + @Override public Object getResult(@NotNull final ColumnPageReader columnPageReader, @NotNull final SeekableChannelContext channelContext) diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToShortPage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToShortPage.java index 23368baa1b8..ce749f217a4 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToShortPage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToShortPage.java @@ -9,12 +9,15 @@ import io.deephaven.chunk.ChunkType; import io.deephaven.chunk.attributes.Any; +import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.parquet.base.materializers.ShortMaterializer; import org.jetbrains.annotations.NotNull; import static io.deephaven.util.QueryConstants.NULL_SHORT_BOXED; public class ToShortPage implements ToPage { + @SuppressWarnings("rawtypes") private static final ToShortPage INSTANCE = new ToShortPage<>(); public static ToShortPage create(Class nativeType) { @@ -45,4 +48,10 @@ public final ChunkType getChunkType() { public final Object nullValue() { return NULL_SHORT_BOXED; } + + @Override + @NotNull + public final PageMaterializerFactory getPageMaterializerFactory() { + return ShortMaterializer.FACTORY; + } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToStringPage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToStringPage.java index 83447e68e93..1884fc83832 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToStringPage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToStringPage.java @@ -5,6 +5,8 @@ import io.deephaven.chunk.attributes.Any; import io.deephaven.chunk.ChunkType; +import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.parquet.base.materializers.StringMaterializer; import io.deephaven.util.channel.SeekableChannelContext; import org.apache.parquet.column.Dictionary; import org.jetbrains.annotations.NotNull; @@ -26,15 +28,14 @@ public static ToPage create( new ChunkDictionary<>( (dictionary, key) -> dictionary.decodeToBinary(key).toStringUsingUTF8(), dictionarySupplier), - INSTANCE::convertResult); + INSTANCE::convertResult, + INSTANCE.getPageMaterializerFactory()); } throw new IllegalArgumentException( "The native type for a String column is " + nativeType.getCanonicalName()); } - private ToStringPage() {} - @Override @NotNull public final Class getNativeType() { @@ -46,4 +47,10 @@ public final Class getNativeType() { public final ChunkType getChunkType() { return ChunkType.Object; } + + @Override + @NotNull + public final PageMaterializerFactory getPageMaterializerFactory() { + return StringMaterializer.FACTORY; + } } diff --git a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java index 67bcc373e4c..ee9ab2b081e 100644 --- a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java +++ b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java @@ -164,7 +164,7 @@ private static Table getTableFlat(int size, boolean includeSerializable, boolean "someCharColumn = (char)i", "someTime = DateTimeUtils.now() + i", "someKey = `` + (int)(i /100)", - "someBiColumn = java.math.BigInteger.valueOf(ii)", + "someBiColumn = i % 10 == 0 ? null : java.math.BigInteger.valueOf(ii)", "someDateColumn = i % 10 == 0 ? null : java.time.LocalDate.ofEpochDay(i)", "someTimeColumn = i % 10 == 0 ? null : java.time.LocalTime.of(i%24, i%60, (i+10)%60)", "someDateTimeColumn = i % 10 == 0 ? null : java.time.LocalDateTime.of(2000+i%10, i%12+1, i%30+1, (i+4)%24, (i+5)%60, (i+6)%60, i)", @@ -183,10 +183,10 @@ private static Table getTableFlat(int size, boolean includeSerializable, boolean "nullDateColumn = (java.time.LocalDate)null", "nullTimeColumn = (java.time.LocalTime)null")); if (includeBigDecimal) { - columns.add("bdColumn = java.math.BigDecimal.valueOf(ii).stripTrailingZeros()"); + columns.add("bdColumn = i % 10 == 0 ? null : java.math.BigDecimal.valueOf(ii).stripTrailingZeros()"); } if (includeSerializable) { - columns.add("someSerializable = new SomeSillyTest(i)"); + columns.add("someSerializable = i % 10 == 0 ? null : new SomeSillyTest(i)"); } return TableTools.emptyTable(size).select( Selectable.from(columns)); @@ -1369,8 +1369,8 @@ public void testAllNonPartitioningColumnTypes() { @Test public void decimalLogicalTypeTest() { final Table expected = TableTools.emptyTable(100_000).update( - "DecimalIntCol = java.math.BigDecimal.valueOf(ii*12, 5)", - "DecimalLongCol = java.math.BigDecimal.valueOf(ii*212, 8)"); + "DecimalIntCol = ii % 10 == 0 ? null : java.math.BigDecimal.valueOf(ii*12, 5)", + "DecimalLongCol = ii % 10 == 0 ? null : java.math.BigDecimal.valueOf(ii*212, 8)"); { // This reference file has Decimal logical type columns stored as INT32 and INT64 physical types @@ -2349,6 +2349,26 @@ public void partitionedParquetWithDotFilesTest() throws IOException { assertTableEquals(fromDisk, partitionedTable); } + @Test + public void readParquetFilesWithCodec() { + final Table table = TableTools.emptyTable(10000) + .update("bdColumn = java.math.BigDecimal.valueOf(ii).stripTrailingZeros()", + "biColumn = i % 10 == 0 ? null : java.math.BigInteger.valueOf(ii*512)") + .select(); + + // Set codecs for each column + final ParquetInstructions instructions = ParquetInstructions.builder() + .addColumnCodec("bdColumn", "io.deephaven.util.codec.BigDecimalCodec", "20,1,allowrounding") + .addColumnCodec("biColumn", "io.deephaven.util.codec.BigIntegerCodec") + .build(); + + final File parentDir = new File(rootFile, "tempDir"); + parentDir.mkdir(); + final File dest = new File(parentDir, "dataWithCodecInfo.parquet"); + ParquetTools.writeTable(table, dest.getPath(), instructions); + checkSingleTable(table, dest); + } + @Test public void partitionedParquetWithDuplicateDataTest() throws IOException { // Create an empty parent directory diff --git a/extensions/parquet/table/src/test/resources/ReferenceDecimalLogicalType.parquet b/extensions/parquet/table/src/test/resources/ReferenceDecimalLogicalType.parquet index f6cb4cf8507..b64f1f8c01b 100644 --- a/extensions/parquet/table/src/test/resources/ReferenceDecimalLogicalType.parquet +++ b/extensions/parquet/table/src/test/resources/ReferenceDecimalLogicalType.parquet @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e559539ce712961badc6f4af7586f02f281c7164b762ca39bacef962ad70c203 -size 891184 +oid sha256:d4c3cab569de5ba8e8a7648f5047f6e5ba8f2360dd85fe1dc175f1baa95955cc +size 800150 diff --git a/extensions/parquet/table/src/test/resources/ReferenceDecimalLogicalType2.parquet b/extensions/parquet/table/src/test/resources/ReferenceDecimalLogicalType2.parquet index e9aa389d375..47b9cbf3093 100644 --- a/extensions/parquet/table/src/test/resources/ReferenceDecimalLogicalType2.parquet +++ b/extensions/parquet/table/src/test/resources/ReferenceDecimalLogicalType2.parquet @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6b37b67a9b5aa942e23bec6fa9c9db3d1af847626c0a5f0d2c3e6d0c45cd53cd -size 1249011 +oid sha256:81d82b705cec5c7c0c9ac2c4681c5ed857b8dd8f8260d8557eff347cfab74b9a +size 1128004 diff --git a/replication/static/src/main/java/io/deephaven/replicators/ReplicateToPage.java b/replication/static/src/main/java/io/deephaven/replicators/ReplicateToPage.java index 991943678ab..bd76f91cf1a 100644 --- a/replication/static/src/main/java/io/deephaven/replicators/ReplicateToPage.java +++ b/replication/static/src/main/java/io/deephaven/replicators/ReplicateToPage.java @@ -5,7 +5,7 @@ import java.io.IOException; -import static io.deephaven.replication.ReplicatePrimitiveCode.intToAllButBoolean; +import static io.deephaven.replication.ReplicatePrimitiveCode.intToAllButBooleanAndLong; import static io.deephaven.replication.ReplicatePrimitiveCode.replaceAll; /** @@ -19,29 +19,23 @@ public class ReplicateToPage { "extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/"; private static final String TO_INT_PAGE_PATH = TO_PAGE_DIR + "ToIntPage.java"; - private static final String TO_LOCAL_DATE_PAGE_PATH = TO_PAGE_DIR + "ToLocalDatePage.java"; + private static final String TO_LOCAL_DATE_TIME_PAGE_PATH = TO_PAGE_DIR + "ToLocalDateTimePage.java"; private static final String TO_BIG_INTEGER_PAGE_PATH = TO_PAGE_DIR + "ToBigIntegerPage.java"; public static void main(String... args) throws IOException { - intToAllButBoolean(TASK, TO_INT_PAGE_PATH, "interface"); + intToAllButBooleanAndLong(TASK, TO_INT_PAGE_PATH, "interface"); - // LocalDate -> LocalDateTime + // LocalDateTime -> LocalTime String[][] pairs = new String[][] { - {"LocalDate", "LocalDateTime"} + {"LocalDateTime", "LocalTime"} }; - replaceAll(TASK, TO_LOCAL_DATE_PAGE_PATH, null, NO_EXCEPTIONS, pairs); - - // LocalDate -> LocalTime - pairs = new String[][] { - {"LocalDate", "LocalTime"} - }; - replaceAll(TASK, TO_LOCAL_DATE_PAGE_PATH, null, NO_EXCEPTIONS, pairs); + replaceAll(TASK, TO_LOCAL_DATE_TIME_PAGE_PATH, null, NO_EXCEPTIONS, pairs); // BigInteger -> BigDecimal pairs = new String[][] { + {"BigIntegerMaterializer", "BigDecimalFromBytesMaterializer"}, {"BigInteger", "BigDecimal"} }; replaceAll(TASK, TO_BIG_INTEGER_PAGE_PATH, null, NO_EXCEPTIONS, pairs); - } } diff --git a/replication/util/src/main/java/io/deephaven/replication/ReplicatePrimitiveCode.java b/replication/util/src/main/java/io/deephaven/replication/ReplicatePrimitiveCode.java index 921601649ad..2fecc1946ec 100644 --- a/replication/util/src/main/java/io/deephaven/replication/ReplicatePrimitiveCode.java +++ b/replication/util/src/main/java/io/deephaven/replication/ReplicatePrimitiveCode.java @@ -478,6 +478,22 @@ public static List intToAllButBoolean(String gradleTask, String sourceCl return results; } + public static List intToAllButBooleanAndLong(String gradleTask, String sourceClassJavaPath, + String... exemptions) throws IOException { + return intToAllButBooleanAndLong(gradleTask, sourceClassJavaPath, null, exemptions); + } + + public static List intToAllButBooleanAndLong(String gradleTask, String sourceClassJavaPath, + Map serialVersionUIDs, String... exemptions) throws IOException { + final List results = new ArrayList<>(); + results.add(intToChar(gradleTask, sourceClassJavaPath, serialVersionUIDs, exemptions)); + results.add(intToByte(gradleTask, sourceClassJavaPath, serialVersionUIDs, exemptions)); + results.add(intToShort(gradleTask, sourceClassJavaPath, serialVersionUIDs, exemptions)); + results.add(intToDouble(gradleTask, sourceClassJavaPath, serialVersionUIDs, exemptions)); + results.add(intToFloat(gradleTask, sourceClassJavaPath, serialVersionUIDs, exemptions)); + return results; + } + public static List shortToAllIntegralTypes(String gradleTask, String sourceClass, String... exemptions) throws IOException { return shortToAllIntegralTypes(gradleTask, sourceClass, null, exemptions);