From ac121dd42868c9f0104999ed63ac0aa31ddd5814 Mon Sep 17 00:00:00 2001 From: Devin Smith Date: Mon, 5 Aug 2024 14:37:08 -0700 Subject: [PATCH 01/43] feat: Add java client MultiJoin (#5886) Fixes #5883 --- .../sql/TableCreatorTicketInterceptor.java | 7 + .../engine/table/impl/TableCreatorImpl.java | 16 ++- .../client/impl/BarrageSnapshotImpl.java | 9 +- .../client/impl/BarrageSubscriptionImpl.java | 41 +----- .../client/impl/TableSpecLabeler.java | 57 ++++++++ .../client/DeephavenSessionTestBase.java | 9 ++ .../io/deephaven/client/MultiJoinTest.java | 57 ++++++++ .../client/impl/BatchTableRequestBuilder.java | 26 ++++ .../client/impl/TableHandleManagerBase.java | 18 +++ .../impl/TableHandleManagerDelegate.java | 6 + .../io/deephaven/qst/TableAdapterImpl.java | 52 ++++++- .../java/io/deephaven/qst/TableCreator.java | 10 ++ .../deephaven/qst/table/MultiJoinInput.java | 67 +++++++++ .../deephaven/qst/table/MultiJoinTable.java | 89 ++++++++++++ .../deephaven/qst/table/ParentsVisitor.java | 6 +- .../qst/table/StackTraceMixInCreator.java | 18 +++ .../deephaven/qst/table/TableCreatorImpl.java | 9 +- .../qst/table/TableLabelVisitor.java | 5 + .../io/deephaven/qst/table/TableSpec.java | 2 + .../qst/table/TableVisitorGeneric.java | 5 + .../qst/table/MultiJoinTableTest.java | 131 ++++++++++++++++++ 21 files changed, 587 insertions(+), 53 deletions(-) create mode 100644 java-client/barrage/src/main/java/io/deephaven/client/impl/TableSpecLabeler.java create mode 100644 java-client/session-dagger/src/test/java/io/deephaven/client/MultiJoinTest.java create mode 100644 qst/src/main/java/io/deephaven/qst/table/MultiJoinInput.java create mode 100644 qst/src/main/java/io/deephaven/qst/table/MultiJoinTable.java create mode 100644 qst/src/test/java/io/deephaven/qst/table/MultiJoinTableTest.java diff --git a/engine/sql/src/main/java/io/deephaven/engine/sql/TableCreatorTicketInterceptor.java b/engine/sql/src/main/java/io/deephaven/engine/sql/TableCreatorTicketInterceptor.java index 3ef1534ceeb..c3a81c9590b 100644 --- a/engine/sql/src/main/java/io/deephaven/engine/sql/TableCreatorTicketInterceptor.java +++ b/engine/sql/src/main/java/io/deephaven/engine/sql/TableCreatorTicketInterceptor.java @@ -7,10 +7,12 @@ import io.deephaven.qst.TableCreator; import io.deephaven.qst.table.EmptyTable; import io.deephaven.qst.table.InputTable; +import io.deephaven.qst.table.MultiJoinInput; import io.deephaven.qst.table.NewTable; import io.deephaven.qst.table.TicketTable; import io.deephaven.qst.table.TimeTable; +import java.util.List; import java.util.Map; import java.util.Objects; @@ -52,6 +54,11 @@ public Table of(InputTable inputTable) { return delegate.of(inputTable); } + @Override + public Table multiJoin(List> multiJoinInputs) { + return delegate.multiJoin(multiJoinInputs); + } + @Override public Table merge(Iterable tables) { return delegate.merge(tables); diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/TableCreatorImpl.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/TableCreatorImpl.java index 8753d531ae4..c1713ba7ff8 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/TableCreatorImpl.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/TableCreatorImpl.java @@ -5,6 +5,8 @@ import com.google.auto.service.AutoService; import io.deephaven.engine.context.ExecutionContext; +import io.deephaven.engine.table.MultiJoinFactory; +import io.deephaven.engine.table.MultiJoinInput; import io.deephaven.engine.table.Table; import io.deephaven.engine.table.TableDefinition; import io.deephaven.engine.table.TableFactory; @@ -13,6 +15,8 @@ import io.deephaven.engine.util.TableTools; import io.deephaven.qst.TableCreator; import io.deephaven.qst.table.BlinkInputTable; +import io.deephaven.qst.table.Clock; +import io.deephaven.qst.table.ClockSystem; import io.deephaven.qst.table.EmptyTable; import io.deephaven.qst.table.InMemoryAppendOnlyInputTable; import io.deephaven.qst.table.InMemoryKeyBackedInputTable; @@ -22,12 +26,11 @@ import io.deephaven.qst.table.TableSchema; import io.deephaven.qst.table.TableSpec; import io.deephaven.qst.table.TicketTable; -import io.deephaven.qst.table.Clock; -import io.deephaven.qst.table.ClockSystem; import io.deephaven.qst.table.TimeTable; import io.deephaven.stream.TablePublisher; import java.util.Arrays; +import java.util.List; import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Stream; import java.util.stream.StreamSupport; @@ -85,6 +88,15 @@ public final Table of(InputTable inputTable) { return InputTableAdapter.of(inputTable); } + @Override + public final Table multiJoin(List> multiJoinInputs) { + return MultiJoinFactory.of(multiJoinInputs.stream().map(TableCreatorImpl::adapt).toArray(MultiJoinInput[]::new)) + .table(); + } + + private static MultiJoinInput adapt(io.deephaven.qst.table.MultiJoinInput
input) { + return MultiJoinInput.of(input.table(), input.matches(), input.additions()); + } @Override public final Table merge(Iterable
tables) { diff --git a/java-client/barrage/src/main/java/io/deephaven/client/impl/BarrageSnapshotImpl.java b/java-client/barrage/src/main/java/io/deephaven/client/impl/BarrageSnapshotImpl.java index 8ff73e27d93..f34382297e0 100644 --- a/java-client/barrage/src/main/java/io/deephaven/client/impl/BarrageSnapshotImpl.java +++ b/java-client/barrage/src/main/java/io/deephaven/client/impl/BarrageSnapshotImpl.java @@ -22,8 +22,6 @@ import io.deephaven.extensions.barrage.util.*; import io.deephaven.internal.log.LoggerFactory; import io.deephaven.io.logger.Logger; -import io.deephaven.qst.table.TableLabelVisitor; -import io.deephaven.qst.table.TicketTable; import io.grpc.CallOptions; import io.grpc.ClientCall; import io.grpc.Context; @@ -161,12 +159,7 @@ public void onError(final Throwable t) { .append(": Error detected in snapshot: ") .append(t).endl(); - final String label = tableHandle.export().table().walk(new TableLabelVisitor() { - @Override - public String visit(TicketTable ticketTable) { - return BarrageSubscriptionImpl.nameForTableTicket(ticketTable); - } - }); + final String label = TableSpecLabeler.of(tableHandle.export().table()); // this error will always be propagated to our CheckForCompletion#onError callback resultTable.handleBarrageError(new TableDataException( String.format("Barrage snapshot error for %s (%s)", logName, label), t)); diff --git a/java-client/barrage/src/main/java/io/deephaven/client/impl/BarrageSubscriptionImpl.java b/java-client/barrage/src/main/java/io/deephaven/client/impl/BarrageSubscriptionImpl.java index 0ddf9b9e87b..26c0672e649 100644 --- a/java-client/barrage/src/main/java/io/deephaven/client/impl/BarrageSubscriptionImpl.java +++ b/java-client/barrage/src/main/java/io/deephaven/client/impl/BarrageSubscriptionImpl.java @@ -27,12 +27,6 @@ import io.deephaven.extensions.barrage.util.*; import io.deephaven.internal.log.LoggerFactory; import io.deephaven.io.logger.Logger; -import io.deephaven.proto.util.ApplicationTicketHelper; -import io.deephaven.proto.util.ExportTicketHelper; -import io.deephaven.proto.util.ScopeTicketHelper; -import io.deephaven.proto.util.SharedTicketHelper; -import io.deephaven.qst.table.TableLabelVisitor; -import io.deephaven.qst.table.TicketTable; import io.deephaven.util.annotations.FinalDefault; import io.deephaven.util.annotations.VisibleForTesting; import io.grpc.CallOptions; @@ -45,7 +39,6 @@ import io.grpc.stub.ClientResponseObserver; import org.apache.arrow.flight.impl.Flight.FlightData; import org.apache.arrow.flight.impl.FlightServiceGrpc; -import org.apache.commons.codec.binary.Hex; import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; @@ -162,12 +155,7 @@ public void onError(final Throwable t) { .append(": Error detected in subscription: ") .append(t).endl(); - final String label = tableHandle.export().table().walk(new TableLabelVisitor() { - @Override - public String visit(TicketTable ticketTable) { - return nameForTableTicket(ticketTable); - } - }); + final String label = TableSpecLabeler.of(tableHandle.export().table()); resultTable.handleBarrageError(new TableDataException( String.format("Barrage subscription error for %s (%s)", logName, label), t)); cleanup(); @@ -185,33 +173,6 @@ public void onCompleted() { } } - static String nameForTableTicket(TicketTable table) { - byte[] ticket = table.ticket(); - if (ticket.length == 0) { - return "ticketTable(EMPTY)"; - } - - // We'll try our best to decode the ticket, but it's not guaranteed to be a well-known ticket route. - try { - switch (ticket[0]) { - case 'a': - return ApplicationTicketHelper.toReadableString(ticket); - case 's': - return ScopeTicketHelper.toReadableString(ticket); - case 'e': - return ExportTicketHelper.toReadableString(ByteBuffer.wrap(ticket), "TicketTable"); - case 'h': - return SharedTicketHelper.toReadableString(ticket); - default: - break; - } - } catch (Exception err) { - // ignore - let's just return the hex representation - } - - return "ticketTable(0x" + Hex.encodeHexString(ticket) + ")"; - } - @Override public Future
entireTable() { return partialTable(null, null, false); diff --git a/java-client/barrage/src/main/java/io/deephaven/client/impl/TableSpecLabeler.java b/java-client/barrage/src/main/java/io/deephaven/client/impl/TableSpecLabeler.java new file mode 100644 index 00000000000..4e2c119d89a --- /dev/null +++ b/java-client/barrage/src/main/java/io/deephaven/client/impl/TableSpecLabeler.java @@ -0,0 +1,57 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.client.impl; + +import io.deephaven.proto.util.ApplicationTicketHelper; +import io.deephaven.proto.util.ExportTicketHelper; +import io.deephaven.proto.util.ScopeTicketHelper; +import io.deephaven.proto.util.SharedTicketHelper; +import io.deephaven.qst.table.TableLabelVisitor; +import io.deephaven.qst.table.TableSpec; +import io.deephaven.qst.table.TicketTable; +import org.apache.commons.codec.binary.Hex; + +import java.nio.ByteBuffer; + +final class TableSpecLabeler extends TableLabelVisitor { + private static final TableSpecLabeler INSTANCE = new TableSpecLabeler(); + + public static String of(TableSpec tableSpec) { + return tableSpec.walk(INSTANCE); + } + + private TableSpecLabeler() {} + + static String nameForTableTicket(TicketTable table) { + byte[] ticket = table.ticket(); + if (ticket.length == 0) { + return "ticketTable(EMPTY)"; + } + + // We'll try our best to decode the ticket, but it's not guaranteed to be a well-known ticket route. + try { + switch (ticket[0]) { + case 'a': + return ApplicationTicketHelper.toReadableString(ticket); + case 's': + return ScopeTicketHelper.toReadableString(ticket); + case 'e': + return ExportTicketHelper.toReadableString(ByteBuffer.wrap(ticket), "TicketTable"); + case 'h': + return SharedTicketHelper.toReadableString(ticket); + default: + break; + } + } catch (Exception err) { + // ignore - let's just return the hex representation + } + + return "ticketTable(0x" + Hex.encodeHexString(ticket) + ")"; + } + + @Override + public String visit(TicketTable ticketTable) { + return nameForTableTicket(ticketTable); + } +} diff --git a/java-client/session-dagger/src/test/java/io/deephaven/client/DeephavenSessionTestBase.java b/java-client/session-dagger/src/test/java/io/deephaven/client/DeephavenSessionTestBase.java index 08b7fe8673c..5875a99d816 100644 --- a/java-client/session-dagger/src/test/java/io/deephaven/client/DeephavenSessionTestBase.java +++ b/java-client/session-dagger/src/test/java/io/deephaven/client/DeephavenSessionTestBase.java @@ -6,8 +6,12 @@ import io.deephaven.base.verify.Require; import io.deephaven.client.impl.Session; import io.deephaven.client.impl.SessionImpl; +import io.deephaven.engine.table.Table; +import io.deephaven.qst.table.TableSpec; +import io.deephaven.qst.table.TicketTable; import io.deephaven.server.runner.DeephavenApiServerTestBase; import io.deephaven.server.session.SessionState; +import io.deephaven.server.session.SessionState.ExportObject; import io.grpc.ManagedChannel; import org.junit.After; import org.junit.Before; @@ -50,4 +54,9 @@ public void tearDown() throws Exception { } super.tearDown(); } + + public TicketTable ref(Table table) { + final ExportObject
export = serverSessionState.newServerSideExport(table); + return TableSpec.ticket(export.getExportId().getTicket().toByteArray()); + } } diff --git a/java-client/session-dagger/src/test/java/io/deephaven/client/MultiJoinTest.java b/java-client/session-dagger/src/test/java/io/deephaven/client/MultiJoinTest.java new file mode 100644 index 00000000000..6b2bb23897b --- /dev/null +++ b/java-client/session-dagger/src/test/java/io/deephaven/client/MultiJoinTest.java @@ -0,0 +1,57 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.client; + +import io.deephaven.api.ColumnName; +import io.deephaven.api.JoinMatch; +import io.deephaven.client.impl.TableHandle; +import io.deephaven.client.impl.TableHandle.TableHandleException; +import io.deephaven.engine.table.Table; +import io.deephaven.engine.util.TableTools; +import io.deephaven.qst.table.MultiJoinInput; +import io.deephaven.qst.table.MultiJoinTable; +import io.deephaven.qst.table.TableSpec; +import org.junit.Test; + +import static org.assertj.core.api.Assertions.assertThat; + +public class MultiJoinTest extends DeephavenSessionTestBase { + + @Test + public void multiJoinTableExecute() throws TableHandleException, InterruptedException { + try (final TableHandle handle = session.batch().execute(prototype())) { + assertThat(handle.isSuccessful()).isTrue(); + } + } + + private MultiJoinTable prototype() { + final Table t1 = TableTools.newTable( + TableTools.longCol("Key", 0L), + TableTools.longCol("First", 0L)); + final Table t2 = TableTools.newTable( + TableTools.longCol("Key", 0L), + TableTools.longCol("Second", 1L)); + final Table t3 = TableTools.newTable( + TableTools.longCol("Key", 0L), + TableTools.longCol("Third", 2L)); + return MultiJoinTable.builder() + .addInputs(MultiJoinInput.builder() + .table(ref(t1)) + .addMatches(JoinMatch.parse("OutputKey=Key")) + .addAdditions(ColumnName.of("First")) + .build()) + .addInputs(MultiJoinInput.builder() + .table(ref(t2)) + .addMatches(JoinMatch.parse("OutputKey=Key")) + .addAdditions(ColumnName.of("Second")) + .build()) + .addInputs(MultiJoinInput.builder() + .table(ref(t3)) + .addMatches(JoinMatch.parse("OutputKey=Key")) + .addAdditions(ColumnName.of("Third")) + .build()) + .build(); + + } +} diff --git a/java-client/session/src/main/java/io/deephaven/client/impl/BatchTableRequestBuilder.java b/java-client/session/src/main/java/io/deephaven/client/impl/BatchTableRequestBuilder.java index ee3888560de..136ef3d2fb4 100644 --- a/java-client/session/src/main/java/io/deephaven/client/impl/BatchTableRequestBuilder.java +++ b/java-client/session/src/main/java/io/deephaven/client/impl/BatchTableRequestBuilder.java @@ -53,6 +53,7 @@ import io.deephaven.proto.backplane.grpc.InCondition; import io.deephaven.proto.backplane.grpc.IsNullCondition; import io.deephaven.proto.backplane.grpc.MergeTablesRequest; +import io.deephaven.proto.backplane.grpc.MultiJoinTablesRequest; import io.deephaven.proto.backplane.grpc.NaturalJoinTablesRequest; import io.deephaven.proto.backplane.grpc.NotCondition; import io.deephaven.proto.backplane.grpc.OrCondition; @@ -90,6 +91,8 @@ import io.deephaven.qst.table.JoinTable; import io.deephaven.qst.table.LazyUpdateTable; import io.deephaven.qst.table.MergeTable; +import io.deephaven.qst.table.MultiJoinInput; +import io.deephaven.qst.table.MultiJoinTable; import io.deephaven.qst.table.NaturalJoinTable; import io.deephaven.qst.table.NewTable; import io.deephaven.qst.table.RangeJoinTable; @@ -583,6 +586,29 @@ public Operation visit(DropColumnsTable dropColumnsTable) { return op(Builder::setDropColumns, request); } + @Override + public Operation visit(MultiJoinTable multiJoinTable) { + final MultiJoinTablesRequest.Builder request = MultiJoinTablesRequest.newBuilder() + .setResultId(ticket); + for (MultiJoinInput input : multiJoinTable.inputs()) { + request.addMultiJoinInputs(adapt(input)); + } + return op(Builder::setMultiJoin, request); + } + + private io.deephaven.proto.backplane.grpc.MultiJoinInput adapt(MultiJoinInput input) { + io.deephaven.proto.backplane.grpc.MultiJoinInput.Builder builder = + io.deephaven.proto.backplane.grpc.MultiJoinInput.newBuilder() + .setSourceId(ref(input.table())); + for (JoinMatch match : input.matches()) { + builder.addColumnsToMatch(Strings.of(match)); + } + for (JoinAddition addition : input.additions()) { + builder.addColumnsToAdd(Strings.of(addition)); + } + return builder.build(); + } + private SelectOrUpdateRequest selectOrUpdate(SingleParentTable x, Collection columns) { SelectOrUpdateRequest.Builder builder = diff --git a/java-client/session/src/main/java/io/deephaven/client/impl/TableHandleManagerBase.java b/java-client/session/src/main/java/io/deephaven/client/impl/TableHandleManagerBase.java index eb07aade9c2..a813f108864 100644 --- a/java-client/session/src/main/java/io/deephaven/client/impl/TableHandleManagerBase.java +++ b/java-client/session/src/main/java/io/deephaven/client/impl/TableHandleManagerBase.java @@ -9,11 +9,15 @@ import io.deephaven.qst.table.InputTable; import io.deephaven.qst.table.LabeledTables; import io.deephaven.qst.table.MergeTable; +import io.deephaven.qst.table.MultiJoinInput; +import io.deephaven.qst.table.MultiJoinTable; import io.deephaven.qst.table.NewTable; import io.deephaven.qst.table.TableSpec; import io.deephaven.qst.table.TicketTable; import io.deephaven.qst.table.TimeTable; +import java.util.List; + abstract class TableHandleManagerBase implements TableHandleManager { protected abstract TableHandle handle(TableSpec table); @@ -49,6 +53,20 @@ public final TableHandle of(InputTable inputTable) { return handle(inputTable); } + @Override + public final TableHandle multiJoin(List> multiJoinInputs) { + MultiJoinTable.Builder builder = MultiJoinTable.builder(); + for (MultiJoinInput input : multiJoinInputs) { + // noinspection resource We're not making new TableHandles here + builder.addInputs(MultiJoinInput.builder() + .table(input.table().table()) + .addAllMatches(input.matches()) + .addAllAdditions(input.additions()) + .build()); + } + return handle(builder.build()); + } + @Override public final TableHandle merge(Iterable tableProxies) { MergeTable.Builder builder = MergeTable.builder(); diff --git a/java-client/session/src/main/java/io/deephaven/client/impl/TableHandleManagerDelegate.java b/java-client/session/src/main/java/io/deephaven/client/impl/TableHandleManagerDelegate.java index 4bb73bef7c1..3cad339e159 100644 --- a/java-client/session/src/main/java/io/deephaven/client/impl/TableHandleManagerDelegate.java +++ b/java-client/session/src/main/java/io/deephaven/client/impl/TableHandleManagerDelegate.java @@ -13,6 +13,7 @@ import io.deephaven.qst.table.EmptyTable; import io.deephaven.qst.table.InputTable; import io.deephaven.qst.table.LabeledTables; +import io.deephaven.qst.table.MultiJoinInput; import io.deephaven.qst.table.NewTable; import io.deephaven.qst.table.TableSpec; import io.deephaven.qst.table.TicketTable; @@ -100,6 +101,11 @@ public final TableHandle of(InputTable inputTable) { return delegate().of(inputTable); } + @Override + public final TableHandle multiJoin(List> multiJoinInputs) { + return delegate().multiJoin(multiJoinInputs); + } + @Override public final TableHandle merge(Iterable tableHandles) { return delegate().merge(tableHandles); diff --git a/qst/src/main/java/io/deephaven/qst/TableAdapterImpl.java b/qst/src/main/java/io/deephaven/qst/TableAdapterImpl.java index b065b14d69c..045f02261c6 100644 --- a/qst/src/main/java/io/deephaven/qst/TableAdapterImpl.java +++ b/qst/src/main/java/io/deephaven/qst/TableAdapterImpl.java @@ -7,8 +7,42 @@ import io.deephaven.api.TableOperations; import io.deephaven.api.agg.spec.AggSpec; import io.deephaven.qst.TableAdapterResults.Output; -import io.deephaven.qst.table.*; +import io.deephaven.qst.table.AggregateAllTable; +import io.deephaven.qst.table.AggregateTable; +import io.deephaven.qst.table.AsOfJoinTable; +import io.deephaven.qst.table.DropColumnsTable; +import io.deephaven.qst.table.EmptyTable; +import io.deephaven.qst.table.ExactJoinTable; +import io.deephaven.qst.table.HeadTable; +import io.deephaven.qst.table.InputTable; +import io.deephaven.qst.table.JoinTable; +import io.deephaven.qst.table.LazyUpdateTable; +import io.deephaven.qst.table.MergeTable; +import io.deephaven.qst.table.MultiJoinInput; +import io.deephaven.qst.table.MultiJoinTable; +import io.deephaven.qst.table.NaturalJoinTable; +import io.deephaven.qst.table.NewTable; +import io.deephaven.qst.table.ParentsVisitor; +import io.deephaven.qst.table.RangeJoinTable; +import io.deephaven.qst.table.ReverseTable; +import io.deephaven.qst.table.SelectDistinctTable; +import io.deephaven.qst.table.SelectTable; +import io.deephaven.qst.table.SingleParentTable; +import io.deephaven.qst.table.SnapshotTable; +import io.deephaven.qst.table.SnapshotWhenTable; +import io.deephaven.qst.table.SortTable; +import io.deephaven.qst.table.TableSpec; import io.deephaven.qst.table.TableSpec.Visitor; +import io.deephaven.qst.table.TailTable; +import io.deephaven.qst.table.TicketTable; +import io.deephaven.qst.table.TimeTable; +import io.deephaven.qst.table.UngroupTable; +import io.deephaven.qst.table.UpdateByTable; +import io.deephaven.qst.table.UpdateTable; +import io.deephaven.qst.table.UpdateViewTable; +import io.deephaven.qst.table.ViewTable; +import io.deephaven.qst.table.WhereInTable; +import io.deephaven.qst.table.WhereTable; import java.util.Collections; import java.util.LinkedHashMap; @@ -98,6 +132,22 @@ public Void visit(TimeTable timeTable) { return null; } + @Override + public Void visit(MultiJoinTable multiJoinTable) { + final List> inputs = + multiJoinTable.inputs().stream().map(this::adapt).collect(Collectors.toList()); + addTable(multiJoinTable, tableCreation.multiJoin(inputs)); + return null; + } + + private MultiJoinInput
adapt(MultiJoinInput input) { + return MultiJoinInput.
builder() + .table(table(input.table())) + .addAllMatches(input.matches()) + .addAllAdditions(input.additions()) + .build(); + } + @Override public Void visit(MergeTable mergeTable) { List
tables = diff --git a/qst/src/main/java/io/deephaven/qst/TableCreator.java b/qst/src/main/java/io/deephaven/qst/TableCreator.java index 30f5a6355f3..c5d8e61a795 100644 --- a/qst/src/main/java/io/deephaven/qst/TableCreator.java +++ b/qst/src/main/java/io/deephaven/qst/TableCreator.java @@ -7,6 +7,7 @@ import io.deephaven.qst.column.Column; import io.deephaven.qst.table.EmptyTable; import io.deephaven.qst.table.InputTable; +import io.deephaven.qst.table.MultiJoinInput; import io.deephaven.qst.table.NewTable; import io.deephaven.qst.table.TableSpec; import io.deephaven.qst.table.TicketTable; @@ -16,6 +17,7 @@ import java.time.Duration; import java.time.Instant; import java.util.Arrays; +import java.util.List; import java.util.stream.Stream; /** @@ -99,6 +101,14 @@ static , TABLE> TableAdapterResults> inputs); + /** * Merges the given {@code tables}. * diff --git a/qst/src/main/java/io/deephaven/qst/table/MultiJoinInput.java b/qst/src/main/java/io/deephaven/qst/table/MultiJoinInput.java new file mode 100644 index 00000000000..61f9fc2bd85 --- /dev/null +++ b/qst/src/main/java/io/deephaven/qst/table/MultiJoinInput.java @@ -0,0 +1,67 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.qst.table; + +import io.deephaven.annotations.BuildableStyle; +import io.deephaven.api.JoinAddition; +import io.deephaven.api.JoinMatch; +import org.immutables.value.Value.Check; +import org.immutables.value.Value.Immutable; + +import java.util.List; + +@Immutable +@BuildableStyle +public abstract class MultiJoinInput { + + public static Builder builder() { + return ImmutableMultiJoinInput.builder(); + } + + public static MultiJoinInput of(T table, String columnsToMatch, String columnsToAdd) { + final Builder builder = builder(); + builder.table(table); + if (columnsToMatch != null) { + builder.addAllMatches(JoinMatch.from(columnsToMatch.split(","))); + } + if (columnsToAdd != null) { + builder.addAllAdditions(JoinAddition.from(columnsToAdd.split(","))); + } + return builder.build(); + } + + public abstract T table(); + + public abstract List matches(); + + public abstract List additions(); + + @Check + final void checkAdditions() { + if (additions().stream().map(JoinAddition::newColumn).distinct().count() != additions() + .size()) { + throw new IllegalArgumentException( + "Invalid MultiJoinInput additions, must not use the same output column multiple times."); + } + } + + public interface Builder { + + Builder table(T table); + + Builder addMatches(JoinMatch element); + + Builder addMatches(JoinMatch... elements); + + Builder addAllMatches(Iterable elements); + + Builder addAdditions(JoinAddition element); + + Builder addAdditions(JoinAddition... elements); + + Builder addAllAdditions(Iterable elements); + + MultiJoinInput build(); + } +} diff --git a/qst/src/main/java/io/deephaven/qst/table/MultiJoinTable.java b/qst/src/main/java/io/deephaven/qst/table/MultiJoinTable.java new file mode 100644 index 00000000000..07c394ce7e5 --- /dev/null +++ b/qst/src/main/java/io/deephaven/qst/table/MultiJoinTable.java @@ -0,0 +1,89 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.qst.table; + +import io.deephaven.annotations.BuildableStyle; +import io.deephaven.api.JoinAddition; +import io.deephaven.api.JoinMatch; +import org.immutables.value.Value.Check; +import org.immutables.value.Value.Immutable; + +import java.util.Collection; +import java.util.List; + +@BuildableStyle +@Immutable +public abstract class MultiJoinTable extends TableBase { + + public static Builder builder() { + return ImmutableMultiJoinTable.builder(); + } + + public static MultiJoinTable of(MultiJoinInput... elements) { + return builder().addInputs(elements).build(); + } + + public static MultiJoinTable of(Iterable> elements) { + return builder().addAllInputs(elements).build(); + } + + public static MultiJoinTable from(String columnsToMatch, TableSpec... inputs) { + return of(inputs, JoinMatch.from(columnsToMatch.split(","))); + } + + public static MultiJoinTable from(Collection columnsToMatch, TableSpec... inputs) { + return of(inputs, JoinMatch.from(columnsToMatch)); + } + + public abstract List> inputs(); + + @Override + public final T walk(Visitor visitor) { + return visitor.visit(this); + } + + public interface Builder { + Builder addInputs(MultiJoinInput element); + + Builder addInputs(MultiJoinInput... elements); + + Builder addAllInputs(Iterable> elements); + + MultiJoinTable build(); + } + + @Check + final void checkInputs() { + if (inputs().isEmpty()) { + throw new IllegalArgumentException("MultiJoin inputs must be non-empty"); + } + } + + @Check + final void checkAdditions() { + if (inputs().stream() + .map(MultiJoinInput::additions) + .flatMap(Collection::stream) + .map(JoinAddition::newColumn) + .distinct() + .count() != inputs().stream() + .map(MultiJoinInput::additions) + .mapToLong(Collection::size) + .sum()) { + throw new IllegalArgumentException( + "Invalid MultiJoin additions, must not use the same output column multiple times."); + } + } + + private static MultiJoinTable of(TableSpec[] inputs, List matches) { + final Builder builder = builder(); + for (TableSpec input : inputs) { + builder.addInputs(MultiJoinInput.builder() + .table(input) + .addAllMatches(matches) + .build()); + } + return builder.build(); + } +} diff --git a/qst/src/main/java/io/deephaven/qst/table/ParentsVisitor.java b/qst/src/main/java/io/deephaven/qst/table/ParentsVisitor.java index e25f5b0873f..8a6d6bb49e9 100644 --- a/qst/src/main/java/io/deephaven/qst/table/ParentsVisitor.java +++ b/qst/src/main/java/io/deephaven/qst/table/ParentsVisitor.java @@ -8,7 +8,6 @@ import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Comparator; -import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Objects; @@ -302,6 +301,11 @@ public Stream visit(DropColumnsTable dropColumnsTable) { return single(dropColumnsTable); } + @Override + public Stream visit(MultiJoinTable multiJoinTable) { + return multiJoinTable.inputs().stream().map(MultiJoinInput::table); + } + private static class Search { private final Predicate excludePaths; diff --git a/qst/src/main/java/io/deephaven/qst/table/StackTraceMixInCreator.java b/qst/src/main/java/io/deephaven/qst/table/StackTraceMixInCreator.java index 3a7287fa1f5..3b460bcf0b6 100644 --- a/qst/src/main/java/io/deephaven/qst/table/StackTraceMixInCreator.java +++ b/qst/src/main/java/io/deephaven/qst/table/StackTraceMixInCreator.java @@ -8,9 +8,11 @@ import java.util.Arrays; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; +import java.util.stream.Collectors; import java.util.stream.StreamSupport; public final class StackTraceMixInCreator, TABLE> @@ -77,6 +79,22 @@ public synchronized StackTraceMixIn of(InputTable inputTable) { return map.computeIfAbsent(tops, this::mixin); } + @Override + public synchronized StackTraceMixIn multiJoin( + List>> multiJoinInputs) { + final TABLE table = creator.multiJoin(multiJoinInputs.stream().map(this::adapt).collect(Collectors.toList())); + final TOPS tops = toOps.of(table); + return map.computeIfAbsent(tops, this::mixin); + } + + private MultiJoinInput
adapt(MultiJoinInput> input) { + return MultiJoinInput.
builder() + .table(input.table().table()) + .addAllMatches(input.matches()) + .addAllAdditions(input.additions()) + .build(); + } + @Override public synchronized StackTraceMixIn merge( Iterable> stackTraceMixIns) { diff --git a/qst/src/main/java/io/deephaven/qst/table/TableCreatorImpl.java b/qst/src/main/java/io/deephaven/qst/table/TableCreatorImpl.java index b63628d5288..3d45ddcaa21 100644 --- a/qst/src/main/java/io/deephaven/qst/table/TableCreatorImpl.java +++ b/qst/src/main/java/io/deephaven/qst/table/TableCreatorImpl.java @@ -6,6 +6,8 @@ import io.deephaven.api.TableOperations; import io.deephaven.qst.TableCreator; +import java.util.List; + /** * A "no-op" table creator impl, based on the QST structure itself. Mainly useful for testing the equivalence for the * {@link TableOperations} of {@link TableSpec}; but publicly available for functional completeness. @@ -42,8 +44,13 @@ public final TableSpec of(InputTable inputTable) { return inputTable; } + @Override + public final MultiJoinTable multiJoin(List> multiJoinInputs) { + return MultiJoinTable.builder().addAllInputs(multiJoinInputs).build(); + } + @Override public final MergeTable merge(Iterable tables) { - return ImmutableMergeTable.builder().addAllTables(tables).build(); + return MergeTable.builder().addAllTables(tables).build(); } } diff --git a/qst/src/main/java/io/deephaven/qst/table/TableLabelVisitor.java b/qst/src/main/java/io/deephaven/qst/table/TableLabelVisitor.java index 47b3bdec247..488558a0697 100644 --- a/qst/src/main/java/io/deephaven/qst/table/TableLabelVisitor.java +++ b/qst/src/main/java/io/deephaven/qst/table/TableLabelVisitor.java @@ -202,6 +202,11 @@ public String visit(SnapshotTable snapshotTable) { return "snapshot()"; } + @Override + public String visit(MultiJoinTable multiJoinTable) { + return "multiJoin()"; + } + private String join(String name, Join j) { return String.format("%s([%s],[%s])", name, diff --git a/qst/src/main/java/io/deephaven/qst/table/TableSpec.java b/qst/src/main/java/io/deephaven/qst/table/TableSpec.java index f926bb40d39..95784ca388b 100644 --- a/qst/src/main/java/io/deephaven/qst/table/TableSpec.java +++ b/qst/src/main/java/io/deephaven/qst/table/TableSpec.java @@ -149,5 +149,7 @@ interface Visitor { T visit(UngroupTable ungroupTable); T visit(DropColumnsTable dropColumnsTable); + + T visit(MultiJoinTable multiJoinTable); } } diff --git a/qst/src/main/java/io/deephaven/qst/table/TableVisitorGeneric.java b/qst/src/main/java/io/deephaven/qst/table/TableVisitorGeneric.java index cc02f4f46e0..3ef31a5cfc6 100644 --- a/qst/src/main/java/io/deephaven/qst/table/TableVisitorGeneric.java +++ b/qst/src/main/java/io/deephaven/qst/table/TableVisitorGeneric.java @@ -156,4 +156,9 @@ public T visit(UngroupTable ungroupTable) { public T visit(DropColumnsTable dropColumnsTable) { return accept(dropColumnsTable); } + + @Override + public T visit(MultiJoinTable multiJoinTable) { + return accept(multiJoinTable); + } } diff --git a/qst/src/test/java/io/deephaven/qst/table/MultiJoinTableTest.java b/qst/src/test/java/io/deephaven/qst/table/MultiJoinTableTest.java new file mode 100644 index 00000000000..a850bef1e01 --- /dev/null +++ b/qst/src/test/java/io/deephaven/qst/table/MultiJoinTableTest.java @@ -0,0 +1,131 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.qst.table; + +import io.deephaven.api.ColumnName; +import io.deephaven.api.JoinAddition; +import io.deephaven.api.JoinMatch; +import org.junit.jupiter.api.Test; + +import java.util.Arrays; +import java.util.List; +import java.util.function.Supplier; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.failBecauseExceptionWasNotThrown; + +public class MultiJoinTableTest { + + @Test + void inputMissingTable() { + try { + MultiJoinInput.builder().build(); + failBecauseExceptionWasNotThrown(IllegalStateException.class); + } catch (IllegalStateException e) { + assertThat(e).hasMessageContaining("not set [table]"); + } + } + + @Test + void inputOverlappingAdditions() { + try { + MultiJoinInput.builder() + .table(TableSpec.empty(1)) + .addAdditions(ColumnName.of("Foo")) + .addAdditions(ColumnName.of("Foo")) + .build(); + failBecauseExceptionWasNotThrown(IllegalArgumentException.class); + } catch (IllegalArgumentException e) { + assertThat(e).hasMessageContaining("must not use the same output column multiple times"); + } + } + + @Test + void input() { + final MultiJoinInput input = MultiJoinInput.of( + TableSpec.empty(1), + "Foo,Bar=Baz", + "Bap,Bom=Blip"); + assertThat(input.table()).isEqualTo(TableSpec.empty(1)); + assertThat(input.matches()).containsExactly( + ColumnName.of("Foo"), + JoinMatch.of(ColumnName.of("Bar"), ColumnName.of("Baz"))); + assertThat(input.additions()).containsExactly( + ColumnName.of("Bap"), + JoinAddition.of(ColumnName.of("Bom"), ColumnName.of("Blip"))); + } + + @Test + void emptyInputs() { + for (Supplier supplier : Arrays.>asList( + () -> MultiJoinTable.builder().build(), + MultiJoinTable::of, + () -> MultiJoinTable.from("Key"), + () -> MultiJoinTable.from(List.of("Key1", "Key2")))) { + try { + supplier.get(); + failBecauseExceptionWasNotThrown(IllegalArgumentException.class); + } catch (IllegalArgumentException e) { + assertThat(e).hasMessageContaining("MultiJoin inputs must be non-empty"); + } + } + } + + @Test + void overlappingAdditions() { + try { + MultiJoinTable.of( + MultiJoinInput.builder() + .table(TableSpec.empty(1)) + .addAdditions(ColumnName.of("Foo")) + .build(), + MultiJoinInput.builder() + .table(TableSpec.empty(1)) + .addAdditions(ColumnName.of("Foo")) + .build()); + failBecauseExceptionWasNotThrown(IllegalArgumentException.class); + } catch (IllegalArgumentException e) { + assertThat(e).hasMessageContaining("must not use the same output column multiple times"); + } + } + + @Test + void keyHelpers() { + for (MultiJoinTable multiJoinTable : Arrays.asList( + MultiJoinTable.from( + "Key,Key2=Key3", + TableSpec.empty(1), + TableSpec.empty(2)), + MultiJoinTable.from( + List.of("Key", "Key2=Key3"), + TableSpec.empty(1), + TableSpec.empty(2)))) { + assertThat(multiJoinTable.inputs()).containsExactly( + MultiJoinInput.builder() + .table(TableSpec.empty(1)) + .addMatches( + ColumnName.of("Key"), + JoinMatch.of(ColumnName.of("Key2"), ColumnName.of("Key3"))) + .build(), + MultiJoinInput.builder() + .table(TableSpec.empty(2)) + .addMatches( + ColumnName.of("Key"), + JoinMatch.of(ColumnName.of("Key2"), ColumnName.of("Key3"))) + .build()); + } + } + + @Test + void multiJoinTable() { + final MultiJoinInput i1 = MultiJoinInput.of(TableSpec.empty(1), "Foo", "Bar"); + final MultiJoinInput i2 = MultiJoinInput.of(TableSpec.empty(2), "Baz", "Bap"); + for (MultiJoinTable mjt : Arrays.asList( + MultiJoinTable.builder().addInputs(i1).addInputs(i2).build(), + MultiJoinTable.of(i1, i2), + MultiJoinTable.of(List.of(i1, i2)))) { + assertThat(mjt.inputs()).containsExactly(i1, i2); + } + } +} From 74cd34a931e78fcb10133ea1a9b55e33d30cc3b0 Mon Sep 17 00:00:00 2001 From: Jianfeng Mao <4297243+jmao-denver@users.noreply.github.com> Date: Mon, 5 Aug 2024 17:14:52 -0600 Subject: [PATCH 02/43] fix: update snapshot_when doc with trigger table being append-only requirement when history is requested. (#5853) Fixes #5841 --- .../java/io/deephaven/engine/table/impl/QueryTable.java | 4 +++- py/server/deephaven/table.py | 4 ++++ py/server/tests/test_table.py | 9 +++++++++ .../io/deephaven/api/snapshot/SnapshotWhenOptions.java | 3 ++- 4 files changed, 18 insertions(+), 2 deletions(-) diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/QueryTable.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/QueryTable.java index ea698c1c8e7..be12408a63b 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/QueryTable.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/QueryTable.java @@ -2401,7 +2401,9 @@ private static long snapshotHistoryInternal( private Table snapshotHistory(final String nuggetName, final Table baseTable, Collection stampColumns) { return QueryPerformanceRecorder.withNugget(nuggetName, baseTable.sizeForInstrumentation(), - () -> maybeViewForSnapshot(stampColumns).snapshotHistoryInternal(baseTable)); + () -> ((QueryTable) withAttributes(Map.of(APPEND_ONLY_TABLE_ATTRIBUTE, TRUE))) + .maybeViewForSnapshot(stampColumns) + .snapshotHistoryInternal(baseTable)); } private Table snapshotHistoryInternal(final Table baseTable) { diff --git a/py/server/deephaven/table.py b/py/server/deephaven/table.py index b29d911801f..72c496b8422 100644 --- a/py/server/deephaven/table.py +++ b/py/server/deephaven/table.py @@ -725,6 +725,10 @@ def snapshot_when(self, trigger_table: Table, stamp_cols: Union[str, List[str]] table. The "stamp key" is the last row of the trigger_table, limited by the stamp_cols. If trigger_table is empty, the "stamp key" will be represented by NULL values. + Note: the trigger_table must be append-only when the history flag is set to True. If the trigger_table is not + append-only and has modified or removed rows in its updates, the result snapshot table will be put in a failure + state and become unusable. + Args: trigger_table (Table): the trigger table stamp_cols (Union[str, Sequence[str]): The columns from trigger_table that form the "stamp key", may be diff --git a/py/server/tests/test_table.py b/py/server/tests/test_table.py index c9abe604a08..a0c4e751827 100644 --- a/py/server/tests/test_table.py +++ b/py/server/tests/test_table.py @@ -1,6 +1,7 @@ # # Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending # +import random import unittest from types import SimpleNamespace from typing import List, Any @@ -13,6 +14,7 @@ from deephaven.html import to_html from deephaven.jcompat import j_hashmap from deephaven.pandas import to_pandas +from deephaven.stream.table_publisher import table_publisher from deephaven.table import Table, SearchDisplayMode from tests.testbase import BaseTestCase, table_equals @@ -551,6 +553,13 @@ def test_snapshot_when_with_history(self): self.assertEqual(1 + len(self.test_table.columns), len(snapshot_hist.columns)) self.assertEqual(self.test_table.size, snapshot_hist.size) + t = time_table("PT0.1S").update("X = i % 2 == 0 ? i : i - 1").sort("X").tail(10) + with update_graph.shared_lock(t): + snapshot_hist = self.test_table.snapshot_when(t, history=True) + self.assertFalse(snapshot_hist.j_table.isFailed()) + self.wait_ticking_table_update(t, row_count=10, timeout=2) + self.assertTrue(snapshot_hist.j_table.isFailed()) + def test_agg_all_by(self): test_table = empty_table(10) test_table = test_table.update( diff --git a/table-api/src/main/java/io/deephaven/api/snapshot/SnapshotWhenOptions.java b/table-api/src/main/java/io/deephaven/api/snapshot/SnapshotWhenOptions.java index 7dcd27d7b5a..8a9f83c1228 100644 --- a/table-api/src/main/java/io/deephaven/api/snapshot/SnapshotWhenOptions.java +++ b/table-api/src/main/java/io/deephaven/api/snapshot/SnapshotWhenOptions.java @@ -39,7 +39,8 @@ public enum Flag { INCREMENTAL, /** * Whether the resulting table should keep history. A history table appends a full snapshot of {@code base} and - * the "stamp key" as opposed to updating existing rows. + * the "stamp key" as opposed to updating existing rows. When this flag is used, the trigger table must be + * append-only. * *

* Note: this flag is currently incompatible with {@link #INITIAL} and {@link #INCREMENTAL}. From 863283bed3fcf1daeb197d551f3e211cd66c6fcb Mon Sep 17 00:00:00 2001 From: Ryan Caudy Date: Mon, 5 Aug 2024 19:47:20 -0400 Subject: [PATCH 03/43] fix: Unit test and fix for AIOOBE in multiJoin when adding more than 3072 rows to an initially-empty result (#5906) --- ...ementalMultiJoinStateManagerTypedBase.java | 12 ++++++------ .../table/impl/QueryTableMultiJoinTest.java | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/multijoin/IncrementalMultiJoinStateManagerTypedBase.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/multijoin/IncrementalMultiJoinStateManagerTypedBase.java index ee0c1417aac..4d7f84d6a35 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/multijoin/IncrementalMultiJoinStateManagerTypedBase.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/multijoin/IncrementalMultiJoinStateManagerTypedBase.java @@ -437,11 +437,6 @@ public boolean doRehash(boolean fullRehash, MutableInt rehashCredits, int nextCh return false; } - Assert.eqZero(rehashPointer, "rehashPointer"); - - if (numEntries == 0) { - return false; - } setupNewAlternate(oldTableSize); adviseNewAlternate(); return true; @@ -454,6 +449,8 @@ public boolean doRehash(boolean fullRehash, MutableInt rehashCredits, int nextCh protected abstract void adviseNewAlternate(); private void setupNewAlternate(int oldTableSize) { + Assert.eqZero(rehashPointer, "rehashPointer"); + alternateSlotToOutputRow = slotToOutputRow; slotToOutputRow = new ImmutableIntArraySource(); slotToOutputRow.ensureCapacity(tableSize); @@ -469,7 +466,10 @@ private void setupNewAlternate(int oldTableSize) { mainKeySources[ii].ensureCapacity(tableSize); } alternateTableSize = oldTableSize; - rehashPointer = alternateTableSize; + + if (numEntries > 0) { + rehashPointer = alternateTableSize; + } } protected void clearAlternate() { diff --git a/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableMultiJoinTest.java b/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableMultiJoinTest.java index 7d0490ef513..5cd865802de 100644 --- a/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableMultiJoinTest.java +++ b/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableMultiJoinTest.java @@ -26,6 +26,7 @@ import java.util.*; import java.util.function.IntUnaryOperator; import java.util.stream.Collectors; +import java.util.stream.IntStream; import static io.deephaven.engine.testutil.TstUtils.*; import static io.deephaven.engine.testutil.junit4.EngineCleanup.printTableUpdates; @@ -819,6 +820,24 @@ public void testMultiJoinInputColumnParsing() { Assert.assertEquals(mjiArr[1].columnsToMatch()[1].right().name(), "B"); } + @Test + public void testRehashWhenEmpty() { + final QueryTable t1 = TstUtils.testRefreshingTable(stringCol("Key"), intCol("S1")); + final QueryTable t2 = TstUtils.testRefreshingTable(stringCol("Key"), intCol("S2")); + + final ControlledUpdateGraph updateGraph = ExecutionContext.getContext().getUpdateGraph().cast(); + + final Table result = updateGraph.sharedLock().computeLocked( + () -> MultiJoinFactory.of(new String[] {"Key"}, t1, t2).table()); + + updateGraph.runWithinUnitTestCycle(() -> { + final RowSet additions = RowSetFactory.fromRange(0, 3073); + TstUtils.addToTable(t1, additions, + stringCol("Key", IntStream.rangeClosed(0, 3073).mapToObj(Integer::toString).toArray(String[]::new)), + intCol("S1", IntStream.rangeClosed(0, 3073).map(i -> i * 2).toArray())); + t1.notifyListeners(additions, RowSetFactory.empty(), RowSetFactory.empty()); + }); + } private Table doIterativeMultiJoin(String[] keyColumns, List inputTables) { final List

keyTables = inputTables.stream() From 58ce658c875497c7588aed969bcbf9f5f130c880 Mon Sep 17 00:00:00 2001 From: Brian Ingles Date: Tue, 6 Aug 2024 08:36:23 -0500 Subject: [PATCH 04/43] feat: Added --browser / --no-browser flag (#5866) Added a `--browser` / `--no-browser` boolean flag to the embedded server cli. resolves #5838 --- py/embedded-server/deephaven_server/cli.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/py/embedded-server/deephaven_server/cli.py b/py/embedded-server/deephaven_server/cli.py index f4e057c38e9..04b499d5d0b 100644 --- a/py/embedded-server/deephaven_server/cli.py +++ b/py/embedded-server/deephaven_server/cli.py @@ -23,7 +23,8 @@ def cli(): @click.option("--jvm-args", default=None, help="The JVM arguments to use.") @click.option("--extra-classpath", default=None, help="The extra classpath to use.") @click.option("--default-jvm-args", default=None, help="The advanced JVM arguments to use in place of the default ones that Deephaven recommends.") -def server(host, port, jvm_args, extra_classpath, default_jvm_args): +@click.option('--browser/--no-browser', default=True, help="Whether to open the browser when the server starts.") +def server(host, port, jvm_args, extra_classpath, default_jvm_args, browser): """ Start the Deephaven server. """ @@ -46,7 +47,8 @@ def server(host, port, jvm_args, extra_classpath, default_jvm_args): authentication_url = authentication_urls[0] break - webbrowser.open(authentication_url) + if browser: + webbrowser.open(authentication_url) click.echo( f"Deephaven is running at {authentication_url} with authentication type {authentication_type}" From 9156dd8c78b9db2dc3d7bf8730f411cb86cf88be Mon Sep 17 00:00:00 2001 From: Devin Smith Date: Tue, 6 Aug 2024 08:34:40 -0700 Subject: [PATCH 05/43] feat: provide TableDefinition functions for Iceberg tables (#5891) Fixes #5868 --------- Co-authored-by: Larry Booker --- .../io/deephaven/engine/util/TableTools.java | 3 +- .../iceberg/util/IcebergToolsTest.java | 419 +++++++++++++----- .../iceberg/util/IcebergCatalogAdapter.java | 356 ++++++++++++--- 3 files changed, 593 insertions(+), 185 deletions(-) diff --git a/engine/table/src/main/java/io/deephaven/engine/util/TableTools.java b/engine/table/src/main/java/io/deephaven/engine/util/TableTools.java index 62a873325e2..924137c6947 100644 --- a/engine/table/src/main/java/io/deephaven/engine/util/TableTools.java +++ b/engine/table/src/main/java/io/deephaven/engine/util/TableTools.java @@ -15,6 +15,7 @@ import io.deephaven.engine.table.*; import io.deephaven.engine.table.impl.InMemoryTable; import io.deephaven.engine.table.impl.perf.QueryPerformanceRecorder; +import io.deephaven.engine.table.impl.sources.NullValueColumnSource; import io.deephaven.internal.log.LoggerFactory; import io.deephaven.time.DateTimeUtils; import io.deephaven.engine.table.impl.QueryTable; @@ -731,7 +732,7 @@ public static Table newTable(long size, Map> columns) { public static Table newTable(TableDefinition definition) { Map> columns = new LinkedHashMap<>(); for (ColumnDefinition columnDefinition : definition.getColumns()) { - columns.put(columnDefinition.getName(), ArrayBackedColumnSource.getMemoryColumnSource(0, + columns.put(columnDefinition.getName(), NullValueColumnSource.getInstance( columnDefinition.getDataType(), columnDefinition.getComponentType())); } return new QueryTable(definition, RowSetFactory.empty().toTracking(), columns) { diff --git a/extensions/iceberg/s3/src/test/java/io/deephaven/iceberg/util/IcebergToolsTest.java b/extensions/iceberg/s3/src/test/java/io/deephaven/iceberg/util/IcebergToolsTest.java index da373ef3a37..44a17942cdf 100644 --- a/extensions/iceberg/s3/src/test/java/io/deephaven/iceberg/util/IcebergToolsTest.java +++ b/extensions/iceberg/s3/src/test/java/io/deephaven/iceberg/util/IcebergToolsTest.java @@ -9,36 +9,92 @@ import io.deephaven.engine.table.Table; import io.deephaven.engine.table.TableDefinition; import io.deephaven.engine.table.impl.locations.TableDataException; -import io.deephaven.engine.util.TableTools; import io.deephaven.extensions.s3.S3Instructions; import io.deephaven.iceberg.TestCatalog.IcebergTestCatalog; import io.deephaven.iceberg.TestCatalog.IcebergTestFileIO; -import io.deephaven.time.DateTimeUtils; import org.apache.iceberg.Snapshot; import org.apache.iceberg.catalog.Catalog; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.io.FileIO; -import org.junit.jupiter.api.Test; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; import software.amazon.awssdk.core.async.AsyncRequestBody; import software.amazon.awssdk.services.s3.S3AsyncClient; -import software.amazon.awssdk.services.s3.model.*; +import software.amazon.awssdk.services.s3.model.CreateBucketRequest; +import software.amazon.awssdk.services.s3.model.DeleteBucketRequest; +import software.amazon.awssdk.services.s3.model.DeleteObjectRequest; +import software.amazon.awssdk.services.s3.model.PutObjectRequest; +import software.amazon.awssdk.services.s3.model.PutObjectResponse; import java.io.File; -import java.time.Instant; +import java.math.BigDecimal; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.List; -import java.util.Map; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.stream.Collectors; +import static io.deephaven.iceberg.util.IcebergCatalogAdapter.NAMESPACE_DEFINITION; +import static io.deephaven.iceberg.util.IcebergCatalogAdapter.SNAPSHOT_DEFINITION; +import static io.deephaven.iceberg.util.IcebergCatalogAdapter.TABLES_DEFINITION; + public abstract class IcebergToolsTest { + + private static final TableDefinition SALES_SINGLE_DEFINITION = TableDefinition.of( + ColumnDefinition.ofString("Region"), + ColumnDefinition.ofString("Item_Type"), + ColumnDefinition.ofInt("Units_Sold"), + ColumnDefinition.ofDouble("Unit_Price"), + ColumnDefinition.ofTime("Order_Date")); + + private static final TableDefinition SALES_RENAMED_DEFINITION = TableDefinition.of( + ColumnDefinition.ofString("Region_Name"), + ColumnDefinition.ofString("ItemType"), + ColumnDefinition.ofInt("UnitsSold"), + ColumnDefinition.ofDouble("Unit_Price"), + ColumnDefinition.ofTime("Order_Date")); + + private static final TableDefinition SALES_MULTI_DEFINITION = SALES_SINGLE_DEFINITION; + + private static final TableDefinition SALES_PARTITIONED_DEFINITION = TableDefinition.of( + ColumnDefinition.ofString("Region"), + ColumnDefinition.ofString("Item_Type"), + ColumnDefinition.ofInt("Units_Sold"), + ColumnDefinition.ofDouble("Unit_Price"), + ColumnDefinition.ofTime("Order_Date"), + ColumnDefinition.ofInt("year").withPartitioning(), + ColumnDefinition.ofInt("month").withPartitioning()); + + private static final TableDefinition ALL_TYPES_DEF = TableDefinition.of( + ColumnDefinition.ofBoolean("booleanField"), + ColumnDefinition.ofInt("integerField"), + ColumnDefinition.ofLong("longField"), + ColumnDefinition.ofFloat("floatField"), + ColumnDefinition.ofDouble("doubleField"), + ColumnDefinition.ofString("stringField"), + ColumnDefinition.fromGenericType("dateField", LocalDate.class), + ColumnDefinition.fromGenericType("timeField", LocalTime.class), + ColumnDefinition.fromGenericType("timestampField", LocalDateTime.class), + ColumnDefinition.fromGenericType("decimalField", BigDecimal.class), + ColumnDefinition.fromGenericType("fixedField", byte[].class), + ColumnDefinition.fromGenericType("binaryField", byte[].class), + ColumnDefinition.ofTime("instantField")); + + private static final TableDefinition META_DEF = TableDefinition.of( + ColumnDefinition.ofString("Name"), + ColumnDefinition.ofString("DataType"), + ColumnDefinition.ofString("ColumnType"), + ColumnDefinition.ofBoolean("IsPartitioning")); + IcebergInstructions instructions; public abstract S3AsyncClient s3AsyncClient(); @@ -94,6 +150,31 @@ private void uploadParquetFiles(final File root, final String prefixToRemove) } } + private void uploadSalesPartitioned() throws ExecutionException, InterruptedException, TimeoutException { + uploadParquetFiles(new File(IcebergToolsTest.class.getResource("/warehouse/sales/sales_partitioned").getPath()), + warehousePath); + } + + private void uploadAllTypes() throws ExecutionException, InterruptedException, TimeoutException { + uploadParquetFiles(new File(IcebergToolsTest.class.getResource("/warehouse/sample/all_types").getPath()), + warehousePath); + } + + private void uploadSalesSingle() throws ExecutionException, InterruptedException, TimeoutException { + uploadParquetFiles(new File(IcebergToolsTest.class.getResource("/warehouse/sales/sales_single").getPath()), + warehousePath); + } + + private void uploadSalesMulti() throws ExecutionException, InterruptedException, TimeoutException { + uploadParquetFiles(new File(IcebergToolsTest.class.getResource("/warehouse/sales/sales_multi").getPath()), + warehousePath); + } + + private void uploadSalesRenamed() throws ExecutionException, InterruptedException, TimeoutException { + uploadParquetFiles(new File(IcebergToolsTest.class.getResource("/warehouse/sales/sales_renamed").getPath()), + warehousePath); + } + @AfterEach public void tearDown() throws ExecutionException, InterruptedException { for (String key : keys) { @@ -118,9 +199,7 @@ public void testListNamespaces() { final Table table = adapter.listNamespacesAsTable(); Assert.eq(table.size(), "table.size()", 2, "2 namespace in the catalog"); - Assert.eqTrue(table.getColumnSource("Namespace").getType().equals(String.class), "namespace column type"); - Assert.eqTrue(table.getColumnSource("NamespaceObject").getType().equals(Namespace.class), - "namespace_object column type"); + Assert.equals(table.getDefinition(), "table.getDefinition()", NAMESPACE_DEFINITION); } @Test @@ -139,14 +218,12 @@ public void testListTables() { Table table = adapter.listTablesAsTable(ns); Assert.eq(table.size(), "table.size()", 4, "4 tables in the namespace"); - Assert.eqTrue(table.getColumnSource("Namespace").getType().equals(String.class), "namespace column type"); - Assert.eqTrue(table.getColumnSource("TableName").getType().equals(String.class), "table_name column type"); - Assert.eqTrue(table.getColumnSource("TableIdentifierObject").getType().equals(TableIdentifier.class), - "table_identifier_object column type"); + Assert.equals(table.getDefinition(), "table.getDefinition()", TABLES_DEFINITION); // Test the string versions of the methods table = adapter.listTablesAsTable("sales"); Assert.eq(table.size(), "table.size()", 4, "4 tables in the namespace"); + Assert.equals(table.getDefinition(), "table.getDefinition()", TABLES_DEFINITION); } @Test @@ -167,22 +244,17 @@ public void testListSnapshots() { Table table = adapter.listSnapshotsAsTable(tableIdentifier); Assert.eq(table.size(), "table.size()", 4, "4 snapshots for sales/sales_multi"); - Assert.eqTrue(table.getColumnSource("Id").getType().equals(long.class), "id column type"); - Assert.eqTrue(table.getColumnSource("Timestamp").getType().equals(Instant.class), "timestamp column type"); - Assert.eqTrue(table.getColumnSource("Operation").getType().equals(String.class), "operation column type"); - Assert.eqTrue(table.getColumnSource("Summary").getType().equals(Map.class), "summary column type"); - Assert.eqTrue(table.getColumnSource("SnapshotObject").getType().equals(Snapshot.class), - "snapshot_object column type"); + Assert.equals(table.getDefinition(), "table.getDefinition()", SNAPSHOT_DEFINITION); // Test the string versions of the methods table = adapter.listSnapshotsAsTable("sales.sales_multi"); Assert.eq(table.size(), "table.size()", 4, "4 snapshots for sales/sales_multi"); + Assert.equals(table.getDefinition(), "table.getDefinition()", SNAPSHOT_DEFINITION); } @Test public void testOpenTableA() throws ExecutionException, InterruptedException, TimeoutException { - uploadParquetFiles(new File(IcebergToolsTest.class.getResource("/warehouse/sales/sales_partitioned").getPath()), - warehousePath); + uploadSalesPartitioned(); final IcebergCatalogAdapter adapter = IcebergTools.createAdapter(resourceCatalog, resourceFileIO); @@ -193,18 +265,19 @@ public void testOpenTableA() throws ExecutionException, InterruptedException, Ti // Verify we retrieved all the rows. Assert.eq(table.size(), "table.size()", 100_000, "100_000 rows in the table"); + Assert.equals(table.getDefinition(), "table.getDefinition()", SALES_PARTITIONED_DEFINITION); // Test the string versions of the methods table = adapter.readTable("sales.sales_partitioned", instructions); // Verify we retrieved all the rows. Assert.eq(table.size(), "table.size()", 100_000, "100_000 rows in the table"); + Assert.equals(table.getDefinition(), "table.getDefinition()", SALES_PARTITIONED_DEFINITION); } @Test public void testOpenTableB() throws ExecutionException, InterruptedException, TimeoutException { - uploadParquetFiles(new File(IcebergToolsTest.class.getResource("/warehouse/sales/sales_multi").getPath()), - warehousePath); + uploadSalesMulti(); final IcebergCatalogAdapter adapter = IcebergTools.createAdapter(resourceCatalog, resourceFileIO); @@ -214,18 +287,19 @@ public void testOpenTableB() throws ExecutionException, InterruptedException, Ti io.deephaven.engine.table.Table table = adapter.readTable(tableId, instructions); Assert.eq(table.size(), "table.size()", 100_000, "100_000 rows in the table"); + Assert.equals(table.getDefinition(), "table.getDefinition()", SALES_MULTI_DEFINITION); // Test the string versions of the methods table = adapter.readTable("sales.sales_multi", instructions); // Verify we retrieved all the rows. Assert.eq(table.size(), "table.size()", 100_000, "100_000 rows in the table"); + Assert.equals(table.getDefinition(), "table.getDefinition()", SALES_MULTI_DEFINITION); } @Test public void testOpenTableC() throws ExecutionException, InterruptedException, TimeoutException { - uploadParquetFiles(new File(IcebergToolsTest.class.getResource("/warehouse/sales/sales_single").getPath()), - warehousePath); + uploadSalesSingle(); final IcebergCatalogAdapter adapter = IcebergTools.createAdapter(resourceCatalog, resourceFileIO); @@ -236,18 +310,19 @@ public void testOpenTableC() throws ExecutionException, InterruptedException, Ti // Verify we retrieved all the rows. Assert.eq(table.size(), "table.size()", 100_000, "100_000 rows in the table"); + Assert.equals(table.getDefinition(), "table.getDefinition()", SALES_SINGLE_DEFINITION); // Test the string versions of the methods table = adapter.readTable("sales.sales_single", instructions); // Verify we retrieved all the rows. Assert.eq(table.size(), "table.size()", 100_000, "100_000 rows in the table"); + Assert.equals(table.getDefinition(), "table.getDefinition()", SALES_SINGLE_DEFINITION); } @Test public void testOpenTableS3Only() throws ExecutionException, InterruptedException, TimeoutException { - uploadParquetFiles(new File(IcebergToolsTest.class.getResource("/warehouse/sales/sales_partitioned").getPath()), - warehousePath); + uploadSalesPartitioned(); final IcebergCatalogAdapter adapter = IcebergTools.createAdapter(resourceCatalog, resourceFileIO); @@ -257,24 +332,15 @@ public void testOpenTableS3Only() throws ExecutionException, InterruptedExceptio // Verify we retrieved all the rows. Assert.eq(table.size(), "table.size()", 100_000, "100_000 rows in the table"); + Assert.equals(table.getDefinition(), "table.getDefinition()", SALES_PARTITIONED_DEFINITION); } @Test public void testOpenTableDefinition() throws ExecutionException, InterruptedException, TimeoutException { - uploadParquetFiles(new File(IcebergToolsTest.class.getResource("/warehouse/sales/sales_partitioned").getPath()), - warehousePath); - - final TableDefinition tableDef = TableDefinition.of( - ColumnDefinition.ofInt("year").withPartitioning(), - ColumnDefinition.ofInt("month").withPartitioning(), - ColumnDefinition.ofString("Region"), - ColumnDefinition.ofString("Item_Type"), - ColumnDefinition.ofInt("Units_Sold"), - ColumnDefinition.ofDouble("Unit_Price"), - ColumnDefinition.fromGenericType("Order_Date", Instant.class)); + uploadSalesPartitioned(); final IcebergInstructions localInstructions = IcebergInstructions.builder() - .tableDefinition(tableDef) + .tableDefinition(SALES_PARTITIONED_DEFINITION) .dataInstructions(instructions.dataInstructions().get()) .build(); @@ -287,6 +353,7 @@ public void testOpenTableDefinition() throws ExecutionException, InterruptedExce // Verify we retrieved all the rows. Assert.eq(table.size(), "table.size()", 100_000, "100_000 rows in the table"); + Assert.equals(table.getDefinition(), "table.getDefinition()", SALES_PARTITIONED_DEFINITION); } @Test @@ -298,7 +365,7 @@ public void testOpenTablePartitionTypeException() { ColumnDefinition.ofString("Item_Type"), ColumnDefinition.ofDouble("Units_Sold"), ColumnDefinition.ofLong("Unit_Price"), - ColumnDefinition.fromGenericType("Order_Date", Instant.class)); + ColumnDefinition.ofTime("Order_Date")); final IcebergInstructions localInstructions = IcebergInstructions.builder() .tableDefinition(tableDef) @@ -310,31 +377,35 @@ public void testOpenTablePartitionTypeException() { final Namespace ns = Namespace.of("sales"); final TableIdentifier tableId = TableIdentifier.of(ns, "sales_partitioned"); - try { - final io.deephaven.engine.table.Table table = adapter.readTable(tableId, localInstructions); - TableTools.showWithRowSet(table, 100, DateTimeUtils.timeZone(), System.out); - Assert.statementNeverExecuted("Expected an exception for missing columns"); - } catch (final TableDefinition.IncompatibleTableDefinitionException e) { - Assert.eqTrue(e.getMessage().startsWith("Table definition incompatibilities"), "Exception message"); + + for (Runnable runnable : Arrays.asList( + () -> adapter.readTable(tableId, localInstructions), + () -> adapter.getTableDefinition(tableId, localInstructions), + () -> adapter.getTableDefinitionTable(tableId, localInstructions))) { + try { + runnable.run(); + Assert.statementNeverExecuted("Expected an exception for missing columns"); + } catch (final TableDefinition.IncompatibleTableDefinitionException e) { + Assert.eqTrue(e.getMessage().startsWith("Table definition incompatibilities"), "Exception message"); + } } } @Test public void testOpenTableDefinitionRename() throws ExecutionException, InterruptedException, TimeoutException { - uploadParquetFiles(new File(IcebergToolsTest.class.getResource("/warehouse/sales/sales_partitioned").getPath()), - warehousePath); + uploadSalesPartitioned(); - final TableDefinition tableDef = TableDefinition.of( + final TableDefinition renamed = TableDefinition.of( ColumnDefinition.ofInt("__year").withPartitioning(), ColumnDefinition.ofInt("__month").withPartitioning(), ColumnDefinition.ofString("RegionName"), ColumnDefinition.ofString("ItemType"), ColumnDefinition.ofInt("UnitsSold"), ColumnDefinition.ofDouble("UnitPrice"), - ColumnDefinition.fromGenericType("OrderDate", Instant.class)); + ColumnDefinition.ofTime("OrderDate")); final IcebergInstructions localInstructions = IcebergInstructions.builder() - .tableDefinition(tableDef) + .tableDefinition(renamed) .dataInstructions(instructions.dataInstructions().get()) .putColumnRenames("Region", "RegionName") .putColumnRenames("Item_Type", "ItemType") @@ -354,12 +425,12 @@ public void testOpenTableDefinitionRename() throws ExecutionException, Interrupt // Verify we retrieved all the rows. Assert.eq(table.size(), "table.size()", 100_000, "100_000 rows in the table"); + Assert.equals(table.getDefinition(), "table.getDefinition()", renamed); } @Test public void testSkippedPartitioningColumn() throws ExecutionException, InterruptedException, TimeoutException { - uploadParquetFiles(new File(IcebergToolsTest.class.getResource("/warehouse/sales/sales_partitioned").getPath()), - warehousePath); + uploadSalesPartitioned(); final TableDefinition tableDef = TableDefinition.of( ColumnDefinition.ofInt("year").withPartitioning(), @@ -368,7 +439,7 @@ public void testSkippedPartitioningColumn() throws ExecutionException, Interrupt ColumnDefinition.ofString("Item_Type"), ColumnDefinition.ofInt("Units_Sold"), ColumnDefinition.ofDouble("Unit_Price"), - ColumnDefinition.fromGenericType("Order_Date", Instant.class)); + ColumnDefinition.ofTime("Order_Date")); final IcebergInstructions localInstructions = IcebergInstructions.builder() .tableDefinition(tableDef) @@ -384,12 +455,12 @@ public void testSkippedPartitioningColumn() throws ExecutionException, Interrupt // Verify we retrieved all the rows. Assert.eq(table.size(), "table.size()", 100_000, "100_000 rows in the table"); + Assert.equals(table.getDefinition(), "table.getDefinition()", tableDef); } @Test public void testReorderedPartitioningColumn() throws ExecutionException, InterruptedException, TimeoutException { - uploadParquetFiles(new File(IcebergToolsTest.class.getResource("/warehouse/sales/sales_partitioned").getPath()), - warehousePath); + uploadSalesPartitioned(); final TableDefinition tableDef = TableDefinition.of( ColumnDefinition.ofInt("month").withPartitioning(), @@ -398,7 +469,7 @@ public void testReorderedPartitioningColumn() throws ExecutionException, Interru ColumnDefinition.ofString("Item_Type"), ColumnDefinition.ofInt("Units_Sold"), ColumnDefinition.ofDouble("Unit_Price"), - ColumnDefinition.fromGenericType("Order_Date", Instant.class)); + ColumnDefinition.ofTime("Order_Date")); final IcebergInstructions localInstructions = IcebergInstructions.builder() .tableDefinition(tableDef) @@ -414,22 +485,15 @@ public void testReorderedPartitioningColumn() throws ExecutionException, Interru // Verify we retrieved all the rows. Assert.eq(table.size(), "table.size()", 100_000, "100_000 rows in the table"); + Assert.equals(table.getDefinition(), "table.getDefinition()", tableDef); } @Test public void testZeroPartitioningColumns() throws ExecutionException, InterruptedException, TimeoutException { - uploadParquetFiles(new File(IcebergToolsTest.class.getResource("/warehouse/sales/sales_partitioned").getPath()), - warehousePath); - - final TableDefinition tableDef = TableDefinition.of( - ColumnDefinition.ofString("Region"), - ColumnDefinition.ofString("Item_Type"), - ColumnDefinition.ofInt("Units_Sold"), - ColumnDefinition.ofDouble("Unit_Price"), - ColumnDefinition.fromGenericType("Order_Date", Instant.class)); + uploadSalesPartitioned(); final IcebergInstructions localInstructions = IcebergInstructions.builder() - .tableDefinition(tableDef) + .tableDefinition(SALES_MULTI_DEFINITION) .dataInstructions(instructions.dataInstructions().get()) .build(); @@ -442,6 +506,7 @@ public void testZeroPartitioningColumns() throws ExecutionException, Interrupted // Verify we retrieved all the rows. Assert.eq(table.size(), "table.size()", 100_000, "100_000 rows in the table"); + Assert.equals(table.getDefinition(), "table.getDefinition()", SALES_MULTI_DEFINITION); } @Test @@ -453,7 +518,7 @@ public void testIncorrectPartitioningColumns() throws ExecutionException, Interr ColumnDefinition.ofString("Item_Type"), ColumnDefinition.ofInt("Units_Sold"), ColumnDefinition.ofDouble("Unit_Price"), - ColumnDefinition.fromGenericType("Order_Date", Instant.class)); + ColumnDefinition.ofTime("Order_Date")); final IcebergInstructions localInstructions = IcebergInstructions.builder() .tableDefinition(tableDef) @@ -466,11 +531,17 @@ public void testIncorrectPartitioningColumns() throws ExecutionException, Interr final Namespace ns = Namespace.of("sales"); final TableIdentifier tableId = TableIdentifier.of(ns, "sales_partitioned"); - try { - final io.deephaven.engine.table.Table table = adapter.readTable(tableId, localInstructions); - Assert.statementNeverExecuted("Expected an exception for missing columns"); - } catch (final TableDataException e) { - Assert.eqTrue(e.getMessage().startsWith("The following columns are not partitioned"), "Exception message"); + for (Runnable runnable : Arrays.asList( + () -> adapter.readTable(tableId, localInstructions), + () -> adapter.getTableDefinition(tableId, localInstructions), + () -> adapter.getTableDefinitionTable(tableId, localInstructions))) { + try { + runnable.run(); + Assert.statementNeverExecuted("Expected an exception for missing columns"); + } catch (final TableDataException e) { + Assert.eqTrue(e.getMessage().startsWith("The following columns are not partitioned"), + "Exception message"); + } } } @@ -483,7 +554,7 @@ public void testMissingPartitioningColumns() { ColumnDefinition.ofString("Item_Type"), ColumnDefinition.ofDouble("Units_Sold"), ColumnDefinition.ofLong("Unit_Price"), - ColumnDefinition.fromGenericType("Order_Date", Instant.class)); + ColumnDefinition.ofTime("Order_Date")); final IcebergInstructions localInstructions = IcebergInstructions.builder() .tableDefinition(tableDef) @@ -495,18 +566,23 @@ public void testMissingPartitioningColumns() { final Namespace ns = Namespace.of("sales"); final TableIdentifier tableId = TableIdentifier.of(ns, "sales_partitioned"); - try { - final io.deephaven.engine.table.Table table = adapter.readTable(tableId, localInstructions); - Assert.statementNeverExecuted("Expected an exception for missing columns"); - } catch (final TableDefinition.IncompatibleTableDefinitionException e) { - Assert.eqTrue(e.getMessage().startsWith("Table definition incompatibilities"), "Exception message"); + + for (Runnable runnable : Arrays.asList( + () -> adapter.readTable(tableId, localInstructions), + () -> adapter.getTableDefinition(tableId, localInstructions), + () -> adapter.getTableDefinitionTable(tableId, localInstructions))) { + try { + runnable.run(); + Assert.statementNeverExecuted("Expected an exception for missing columns"); + } catch (final TableDefinition.IncompatibleTableDefinitionException e) { + Assert.eqTrue(e.getMessage().startsWith("Table definition incompatibilities"), "Exception message"); + } } } @Test public void testOpenTableColumnRename() throws ExecutionException, InterruptedException, TimeoutException { - uploadParquetFiles(new File(IcebergToolsTest.class.getResource("/warehouse/sales/sales_partitioned").getPath()), - warehousePath); + uploadSalesPartitioned(); final IcebergInstructions localInstructions = IcebergInstructions.builder() .dataInstructions(instructions.dataInstructions().get()) @@ -527,8 +603,7 @@ public void testOpenTableColumnRename() throws ExecutionException, InterruptedEx @Test public void testOpenTableColumnLegalization() throws ExecutionException, InterruptedException, TimeoutException { - uploadParquetFiles(new File(IcebergToolsTest.class.getResource("/warehouse/sales/sales_renamed").getPath()), - warehousePath); + uploadSalesRenamed(); final IcebergInstructions localInstructions = IcebergInstructions.builder() .dataInstructions(instructions.dataInstructions().get()) @@ -543,19 +618,13 @@ public void testOpenTableColumnLegalization() throws ExecutionException, Interru // Verify we retrieved all the rows. Assert.eq(table.size(), "table.size()", 100_000, "100_000 rows in the table"); - - Assert.eqTrue(table.getDefinition().getColumn("Region_Name") != null, "'Region Name' renamed"); - Assert.eqTrue(table.getDefinition().getColumn("ItemType") != null, "'Item&Type' renamed"); - Assert.eqTrue(table.getDefinition().getColumn("UnitsSold") != null, "'Units/Sold' renamed"); - Assert.eqTrue(table.getDefinition().getColumn("Unit_Price") != null, "'Unit Pricee' renamed"); - Assert.eqTrue(table.getDefinition().getColumn("Order_Date") != null, "'Order Date' renamed"); + Assert.equals(table.getDefinition(), "table.getDefinition()", SALES_RENAMED_DEFINITION); } @Test public void testOpenTableColumnLegalizationRename() throws ExecutionException, InterruptedException, TimeoutException { - uploadParquetFiles(new File(IcebergToolsTest.class.getResource("/warehouse/sales/sales_renamed").getPath()), - warehousePath); + uploadSalesRenamed(); final IcebergInstructions localInstructions = IcebergInstructions.builder() .dataInstructions(instructions.dataInstructions().get()) @@ -570,19 +639,20 @@ public void testOpenTableColumnLegalizationRename() final TableIdentifier tableId = TableIdentifier.of(ns, "sales_renamed"); final io.deephaven.engine.table.Table table = adapter.readTable(tableId, localInstructions); + final TableDefinition expected = TableDefinition.of( + ColumnDefinition.ofString("Region_Name"), + ColumnDefinition.ofString("Item_Type"), + ColumnDefinition.ofInt("Units_Sold"), + ColumnDefinition.ofDouble("Unit_Price"), + ColumnDefinition.ofTime("Order_Date")); + // Verify we retrieved all the rows. Assert.eq(table.size(), "table.size()", 100_000, "100_000 rows in the table"); - - Assert.eqTrue(table.getDefinition().getColumn("Region_Name") != null, "'Region Name' renamed"); - Assert.eqTrue(table.getDefinition().getColumn("Item_Type") != null, "'Item&Type' renamed"); - Assert.eqTrue(table.getDefinition().getColumn("Units_Sold") != null, "'Units/Sold' renamed"); - Assert.eqTrue(table.getDefinition().getColumn("Unit_Price") != null, "'Unit Pricee' renamed"); - Assert.eqTrue(table.getDefinition().getColumn("Order_Date") != null, "'Order Date' renamed"); + Assert.equals(table.getDefinition(), "table.getDefinition()", expected); } @Test - public void testOpenTableColumnLegalizationPartitionException() - throws ExecutionException, InterruptedException, TimeoutException { + public void testOpenTableColumnLegalizationPartitionException() { final TableDefinition tableDef = TableDefinition.of( ColumnDefinition.ofInt("Year").withPartitioning(), ColumnDefinition.ofInt("Month").withPartitioning()); @@ -599,19 +669,24 @@ public void testOpenTableColumnLegalizationPartitionException() final Namespace ns = Namespace.of("sales"); final TableIdentifier tableId = TableIdentifier.of(ns, "sales_partitioned"); - try { - final io.deephaven.engine.table.Table table = adapter.readTable(tableId, localInstructions); - Assert.statementNeverExecuted("Expected an exception for missing columns"); - } catch (final TableDataException e) { - Assert.eqTrue(e.getMessage().contains("invalid column name provided"), "Exception message"); + + for (Runnable runnable : Arrays.asList( + () -> adapter.readTable(tableId, localInstructions), + () -> adapter.getTableDefinition(tableId, localInstructions), + () -> adapter.getTableDefinitionTable(tableId, localInstructions))) { + try { + runnable.run(); + Assert.statementNeverExecuted("Expected an exception for missing columns"); + } catch (final TableDataException e) { + Assert.eqTrue(e.getMessage().contains("invalid column name provided"), "Exception message"); + } } } @Test public void testOpenTableColumnRenamePartitioningColumns() throws ExecutionException, InterruptedException, TimeoutException { - uploadParquetFiles(new File(IcebergToolsTest.class.getResource("/warehouse/sales/sales_partitioned").getPath()), - warehousePath); + uploadSalesPartitioned(); final IcebergInstructions localInstructions = IcebergInstructions.builder() .dataInstructions(instructions.dataInstructions().get()) @@ -627,14 +702,23 @@ public void testOpenTableColumnRenamePartitioningColumns() final TableIdentifier tableId = TableIdentifier.of(ns, "sales_partitioned"); final io.deephaven.engine.table.Table table = adapter.readTable(tableId, localInstructions); + final TableDefinition expected = TableDefinition.of( + ColumnDefinition.ofString("Region"), + ColumnDefinition.ofString("Item_Type"), + ColumnDefinition.ofInt("Units_Sold"), + ColumnDefinition.ofDouble("Unit_Price"), + ColumnDefinition.ofTime("Order_Date"), + ColumnDefinition.ofInt("__year").withPartitioning(), + ColumnDefinition.ofInt("__month").withPartitioning()); + // Verify we retrieved all the rows. Assert.eq(table.size(), "table.size()", 100_000, "100_000 rows in the table"); + Assert.equals(table.getDefinition(), "table.getDefinition()", expected); } @Test public void testOpenTableSnapshot() throws ExecutionException, InterruptedException, TimeoutException { - uploadParquetFiles(new File(IcebergToolsTest.class.getResource("/warehouse/sales/sales_multi").getPath()), - warehousePath); + uploadSalesMulti(); final IcebergCatalogAdapter adapter = IcebergTools.createAdapter(resourceCatalog, resourceFileIO); @@ -646,24 +730,27 @@ public void testOpenTableSnapshot() throws ExecutionException, InterruptedExcept final io.deephaven.engine.table.Table table0 = adapter.readTable(tableId, snapshots.get(0).snapshotId(), instructions); Assert.eq(table0.size(), "table0.size()", 18266, "18266 rows in the table"); + Assert.equals(table0.getDefinition(), "table0.getDefinition()", SALES_MULTI_DEFINITION); final io.deephaven.engine.table.Table table1 = adapter.readTable(tableId, snapshots.get(1).snapshotId(), instructions); Assert.eq(table1.size(), "table1.size()", 54373, "54373 rows in the table"); + Assert.equals(table1.getDefinition(), "table1.getDefinition()", SALES_MULTI_DEFINITION); final io.deephaven.engine.table.Table table2 = adapter.readTable(tableId, snapshots.get(2).snapshotId(), instructions); Assert.eq(table2.size(), "table2.size()", 72603, "72603 rows in the table"); + Assert.equals(table2.getDefinition(), "table2.getDefinition()", SALES_MULTI_DEFINITION); final io.deephaven.engine.table.Table table3 = adapter.readTable(tableId, snapshots.get(3).snapshotId(), instructions); Assert.eq(table3.size(), "table3.size()", 100_000, "100_000 rows in the table"); + Assert.equals(table3.getDefinition(), "table3.getDefinition()", SALES_MULTI_DEFINITION); } @Test public void testOpenTableSnapshotByID() throws ExecutionException, InterruptedException, TimeoutException { - uploadParquetFiles(new File(IcebergToolsTest.class.getResource("/warehouse/sales/sales_multi").getPath()), - warehousePath); + uploadSalesMulti(); final IcebergCatalogAdapter adapter = IcebergTools.createAdapter(resourceCatalog, resourceFileIO); @@ -674,45 +761,151 @@ public void testOpenTableSnapshotByID() throws ExecutionException, InterruptedEx // Verify we retrieved all the rows. io.deephaven.engine.table.Table table0 = adapter.readTable(tableId, snapshots.get(0), instructions); Assert.eq(table0.size(), "table0.size()", 18266, "18266 rows in the table"); + Assert.equals(table0.getDefinition(), "table0.getDefinition()", SALES_MULTI_DEFINITION); io.deephaven.engine.table.Table table1 = adapter.readTable(tableId, snapshots.get(1), instructions); Assert.eq(table1.size(), "table1.size()", 54373, "54373 rows in the table"); + Assert.equals(table1.getDefinition(), "table1.getDefinition()", SALES_MULTI_DEFINITION); io.deephaven.engine.table.Table table2 = adapter.readTable(tableId, snapshots.get(2), instructions); Assert.eq(table2.size(), "table2.size()", 72603, "72603 rows in the table"); + Assert.equals(table2.getDefinition(), "table2.getDefinition()", SALES_MULTI_DEFINITION); io.deephaven.engine.table.Table table3 = adapter.readTable(tableId, snapshots.get(3), instructions); Assert.eq(table3.size(), "table3.size()", 100_000, "100_000 rows in the table"); + Assert.equals(table3.getDefinition(), "table3.getDefinition()", SALES_MULTI_DEFINITION); // Test the string versions of the methods // Verify we retrieved all the rows. table0 = adapter.readTable("sales.sales_multi", snapshots.get(0).snapshotId(), instructions); Assert.eq(table0.size(), "table0.size()", 18266, "18266 rows in the table"); + Assert.equals(table0.getDefinition(), "table0.getDefinition()", SALES_MULTI_DEFINITION); table1 = adapter.readTable(tableId, snapshots.get(1).snapshotId(), instructions); Assert.eq(table1.size(), "table1.size()", 54373, "54373 rows in the table"); + Assert.equals(table1.getDefinition(), "table1.getDefinition()", SALES_MULTI_DEFINITION); table2 = adapter.readTable(tableId, snapshots.get(2).snapshotId(), instructions); Assert.eq(table2.size(), "table2.size()", 72603, "72603 rows in the table"); + Assert.equals(table2.getDefinition(), "table2.getDefinition()", SALES_MULTI_DEFINITION); table3 = adapter.readTable(tableId, snapshots.get(3).snapshotId(), instructions); Assert.eq(table3.size(), "table3.size()", 100_000, "100_000 rows in the table"); + Assert.equals(table3.getDefinition(), "table0.getDefinition()", SALES_MULTI_DEFINITION); } @Test public void testOpenAllTypesTable() throws ExecutionException, InterruptedException, TimeoutException { - uploadParquetFiles(new File(IcebergToolsTest.class.getResource("/warehouse/sample/all_types").getPath()), - warehousePath); + uploadAllTypes(); final IcebergCatalogAdapter adapter = IcebergTools.createAdapter(resourceCatalog, resourceFileIO); final Namespace ns = Namespace.of("sample"); final TableIdentifier tableId = TableIdentifier.of(ns, "all_types"); - final List snapshots = adapter.listSnapshots(tableId); // Verify we retrieved all the rows. final io.deephaven.engine.table.Table table = adapter.readTable(tableId, instructions); Assert.eq(table.size(), "table.size()", 10, "10 rows in the table"); + Assert.equals(table.getDefinition(), "table.getDefinition()", ALL_TYPES_DEF); + } + + @Test + public void testTableDefinition() { + final IcebergCatalogAdapter adapter = IcebergTools.createAdapter(resourceCatalog, resourceFileIO); + + final Namespace ns = Namespace.of("sales"); + final TableIdentifier tableId = TableIdentifier.of(ns, "sales_multi"); + final List snapshots = adapter.listSnapshots(tableId); + + // Use string and current snapshot + TableDefinition tableDef = adapter.getTableDefinition("sales.sales_multi", null); + Assert.equals(tableDef, "tableDef", SALES_MULTI_DEFINITION); + + // Use TableIdentifier and Snapshot + tableDef = adapter.getTableDefinition(tableId, null); + Assert.equals(tableDef, "tableDef", SALES_MULTI_DEFINITION); + + // Use string and long snapshot ID + tableDef = adapter.getTableDefinition("sales.sales_multi", snapshots.get(0).snapshotId(), null); + Assert.equals(tableDef, "tableDef", SALES_MULTI_DEFINITION); + + // Use TableIdentifier and Snapshot + tableDef = adapter.getTableDefinition(tableId, snapshots.get(0), null); + Assert.equals(tableDef, "tableDef", SALES_MULTI_DEFINITION); + } + + @Test + public void testTableDefinitionTable() { + final IcebergCatalogAdapter adapter = IcebergTools.createAdapter(resourceCatalog, resourceFileIO); + + final Namespace ns = Namespace.of("sales"); + final TableIdentifier tableId = TableIdentifier.of(ns, "sales_multi"); + final List snapshots = adapter.listSnapshots(tableId); + + // Use string and current snapshot + Table tableDefTable = adapter.getTableDefinitionTable("sales.sales_multi", null); + + Assert.eq(tableDefTable.size(), "tableDefTable.size()", 5, "5 rows in the table"); + Assert.equals(tableDefTable.getDefinition(), "tableDefTable.getDefinition()", META_DEF); + + // Use TableIdentifier and Snapshot + tableDefTable = adapter.getTableDefinitionTable(tableId, null); + + Assert.eq(tableDefTable.size(), "tableDefTable.size()", 5, "5 rows in the table"); + Assert.equals(tableDefTable.getDefinition(), "tableDefTable.getDefinition()", META_DEF); + + // Use string and long snapshot ID + tableDefTable = adapter.getTableDefinitionTable("sales.sales_multi", snapshots.get(0).snapshotId(), null); + + Assert.eq(tableDefTable.size(), "tableDefTable.size()", 5, "5 rows in the table"); + Assert.equals(tableDefTable.getDefinition(), "tableDefTable.getDefinition()", META_DEF); + + // Use TableIdentifier and Snapshot + tableDefTable = adapter.getTableDefinitionTable(tableId, snapshots.get(0), null); + + Assert.eq(tableDefTable.size(), "tableDefTable.size()", 5, "5 rows in the table"); + Assert.equals(tableDefTable.getDefinition(), "tableDefTable.getDefinition()", META_DEF); + } + + @Test + public void testTableDefinitionWithInstructions() { + final IcebergCatalogAdapter adapter = IcebergTools.createAdapter(resourceCatalog, resourceFileIO); + + IcebergInstructions localInstructions = IcebergInstructions.builder() + .dataInstructions(instructions.dataInstructions().get()) + .putColumnRenames("Region", "Area") + .putColumnRenames("Item_Type", "ItemType") + .putColumnRenames("Units_Sold", "UnitsSold") + .putColumnRenames("Unit_Price", "UnitPrice") + .putColumnRenames("Order_Date", "OrderDate") + .build(); + + final TableDefinition renamed = TableDefinition.of( + ColumnDefinition.ofString("Area"), + ColumnDefinition.ofString("ItemType"), + ColumnDefinition.ofInt("UnitsSold"), + ColumnDefinition.ofDouble("UnitPrice"), + ColumnDefinition.ofTime("OrderDate")); + + // Use string and current snapshot + TableDefinition tableDef = adapter.getTableDefinition("sales.sales_multi", localInstructions); + Assert.equals(tableDef, "tableDef", renamed); + + ///////////////////////////////////////////////////// + + final TableDefinition userTableDef = TableDefinition.of( + ColumnDefinition.ofString("Region"), + ColumnDefinition.ofString("Item_Type"), + ColumnDefinition.ofTime("Order_Date")); + + localInstructions = IcebergInstructions.builder() + .dataInstructions(instructions.dataInstructions().get()) + .tableDefinition(userTableDef) + .build(); + + // Use string and current snapshot + tableDef = adapter.getTableDefinition("sales.sales_multi", localInstructions); + Assert.equals(tableDef, "tableDef", userTableDef); } } diff --git a/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/IcebergCatalogAdapter.java b/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/IcebergCatalogAdapter.java index c3ad3cd4970..486bcf18655 100644 --- a/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/IcebergCatalogAdapter.java +++ b/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/IcebergCatalogAdapter.java @@ -17,11 +17,13 @@ import io.deephaven.engine.table.impl.sources.InMemoryColumnSource; import io.deephaven.engine.table.impl.sources.regioned.RegionedTableComponentFactoryImpl; import io.deephaven.engine.updategraph.UpdateSourceRegistrar; +import io.deephaven.engine.util.TableTools; import io.deephaven.iceberg.layout.IcebergFlatLayout; import io.deephaven.iceberg.layout.IcebergKeyValuePartitionedLayout; import io.deephaven.iceberg.location.IcebergTableLocationFactory; import io.deephaven.iceberg.location.IcebergTableLocationKey; import io.deephaven.time.DateTimeUtils; +import io.deephaven.util.annotations.VisibleForTesting; import org.apache.iceberg.PartitionField; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Schema; @@ -42,6 +44,26 @@ import java.util.stream.Collectors; public class IcebergCatalogAdapter { + + @VisibleForTesting + static final TableDefinition NAMESPACE_DEFINITION = TableDefinition.of( + ColumnDefinition.ofString("Namespace"), + ColumnDefinition.fromGenericType("NamespaceObject", Namespace.class)); + + @VisibleForTesting + static final TableDefinition TABLES_DEFINITION = TableDefinition.of( + ColumnDefinition.ofString("Namespace"), + ColumnDefinition.ofString("TableName"), + ColumnDefinition.fromGenericType("TableIdentifierObject", TableIdentifier.class)); + + @VisibleForTesting + static final TableDefinition SNAPSHOT_DEFINITION = TableDefinition.of( + ColumnDefinition.ofLong("Id"), + ColumnDefinition.ofTime("Timestamp"), + ColumnDefinition.ofString("Operation"), + ColumnDefinition.fromGenericType("Summary", Map.class), + ColumnDefinition.fromGenericType("SnapshotObject", Snapshot.class)); + private final Catalog catalog; private final FileIO fileIO; @@ -61,18 +83,18 @@ public class IcebergCatalogAdapter { * * @param schema The schema of the table. * @param partitionSpec The partition specification of the table. - * @param tableDefinition The table definition. + * @param userTableDef The table definition. * @param columnRename The map for renaming columns. * @return The generated TableDefinition. */ private static TableDefinition fromSchema( @NotNull final Schema schema, @NotNull final PartitionSpec partitionSpec, - @Nullable final TableDefinition tableDefinition, + @Nullable final TableDefinition userTableDef, @NotNull final Map columnRename) { - final Set columnNames = tableDefinition != null - ? tableDefinition.getColumnNameSet() + final Set columnNames = userTableDef != null + ? userTableDef.getColumnNameSet() : null; final Set partitionNames = @@ -100,7 +122,31 @@ private static TableDefinition fromSchema( columns.add(column); } - return TableDefinition.of(columns); + final TableDefinition icebergTableDef = TableDefinition.of(columns); + if (userTableDef == null) { + return icebergTableDef; + } + + // If the user supplied a table definition, make sure it's fully compatible. + final TableDefinition tableDef = icebergTableDef.checkCompatibility(userTableDef); + + // Ensure that the user has not marked non-partitioned columns as partitioned. + final Set userPartitionColumns = userTableDef.getPartitioningColumns().stream() + .map(ColumnDefinition::getName) + .collect(Collectors.toSet()); + final Set partitionColumns = tableDef.getPartitioningColumns().stream() + .map(ColumnDefinition::getName) + .collect(Collectors.toSet()); + + // The working partitioning column set must be a super-set of the user-supplied set. + if (!partitionColumns.containsAll(userPartitionColumns)) { + final Set invalidColumns = new HashSet<>(userPartitionColumns); + invalidColumns.removeAll(partitionColumns); + + throw new TableDataException("The following columns are not partitioned in the Iceberg table: " + + invalidColumns); + } + return tableDef; } /** @@ -214,7 +260,7 @@ public Table listNamespacesAsTable(@NotNull final Namespace namespace) { } // Create and return the table - return new QueryTable(RowSetFactory.flat(size).toTracking(), columnSourceMap); + return new QueryTable(NAMESPACE_DEFINITION, RowSetFactory.flat(size).toTracking(), columnSourceMap); } /** @@ -273,7 +319,7 @@ public Table listTablesAsTable(@NotNull final Namespace namespace) { } // Create and return the table - return new QueryTable(RowSetFactory.flat(size).toTracking(), columnSourceMap); + return new QueryTable(TABLES_DEFINITION, RowSetFactory.flat(size).toTracking(), columnSourceMap); } public Table listTablesAsTable(@NotNull final String... namespace) { @@ -338,7 +384,7 @@ public Table listSnapshotsAsTable(@NotNull final TableIdentifier tableIdentifier } // Create and return the table - return new QueryTable(RowSetFactory.flat(size).toTracking(), columnSourceMap); + return new QueryTable(SNAPSHOT_DEFINITION, RowSetFactory.flat(size).toTracking(), columnSourceMap); } /** @@ -352,6 +398,228 @@ public Table listSnapshotsAsTable(@NotNull final String tableIdentifier) { return listSnapshotsAsTable(TableIdentifier.parse(tableIdentifier)); } + /** + * Get a specific {@link Snapshot snapshot} of a given Iceberg table (or null if it does not exist). + * + * @param tableIdentifier The identifier of the table from which to gather snapshots + * @param snapshotId The id of the snapshot to retrieve + * @return The snapshot with the given id, or null if it does not exist + */ + private Snapshot getSnapshot(@NotNull final TableIdentifier tableIdentifier, final long snapshotId) { + return listSnapshots(tableIdentifier).stream() + .filter(snapshot -> snapshot.snapshotId() == snapshotId) + .findFirst() + .orElse(null); + } + + /** + * Get a legalized column rename map from a table schema and user instructions. + */ + private Map getRenameColumnMap( + @NotNull final org.apache.iceberg.Table table, + @NotNull final Schema schema, + @NotNull final IcebergInstructions instructions) { + + final Set takenNames = new HashSet<>(); + + // Map all the column names in the schema to their legalized names. + final Map legalizedColumnRenames = new HashMap<>(); + + // Validate user-supplied names meet legalization instructions + for (final Map.Entry entry : instructions.columnRenames().entrySet()) { + final String destinationName = entry.getValue(); + if (!NameValidator.isValidColumnName(destinationName)) { + throw new TableDataException( + String.format("%s - invalid column name provided (%s)", table, destinationName)); + } + // Add these renames to the legalized list. + legalizedColumnRenames.put(entry.getKey(), destinationName); + takenNames.add(destinationName); + } + + for (final Types.NestedField field : schema.columns()) { + final String name = field.name(); + // Do we already have a valid rename for this column from the user or a partitioned column? + if (!legalizedColumnRenames.containsKey(name)) { + final String legalizedName = + NameValidator.legalizeColumnName(name, s -> s.replace(" ", "_"), takenNames); + if (!legalizedName.equals(name)) { + legalizedColumnRenames.put(name, legalizedName); + takenNames.add(legalizedName); + } + } + } + + return legalizedColumnRenames; + } + + /** + * Return {@link TableDefinition table definition} for a given Iceberg table, with optional instructions for + * customizations while reading. + * + * @param tableIdentifier The identifier of the table to load + * @param instructions The instructions for customizations while reading + * @return The table definition + */ + public TableDefinition getTableDefinition( + @NotNull final String tableIdentifier, + @Nullable final IcebergInstructions instructions) { + final TableIdentifier tableId = TableIdentifier.parse(tableIdentifier); + // Load the table from the catalog. + return getTableDefinition(tableId, instructions); + } + + /** + * Return {@link TableDefinition table definition} for a given Iceberg table, with optional instructions for + * customizations while reading. + * + * @param tableIdentifier The identifier of the table to load + * @param instructions The instructions for customizations while reading + * @return The table definition + */ + public TableDefinition getTableDefinition( + @NotNull final TableIdentifier tableIdentifier, + @Nullable final IcebergInstructions instructions) { + // Load the table from the catalog. + return getTableDefinitionInternal(tableIdentifier, null, instructions); + } + + /** + * Return {@link TableDefinition table definition} for a given Iceberg table and snapshot id, with optional + * instructions for customizations while reading. + * + * @param tableIdentifier The identifier of the table to load + * @param snapshotId The identifier of the snapshot to load + * @param instructions The instructions for customizations while reading + * @return The table definition + */ + public TableDefinition getTableDefinition( + @NotNull final String tableIdentifier, + final long snapshotId, + @Nullable final IcebergInstructions instructions) { + final TableIdentifier tableId = TableIdentifier.parse(tableIdentifier); + + // Find the snapshot with the given snapshot id + final Snapshot tableSnapshot = getSnapshot(tableId, snapshotId); + if (tableSnapshot == null) { + throw new IllegalArgumentException("Snapshot with id " + snapshotId + " not found"); + } + + // Load the table from the catalog. + return getTableDefinition(tableId, tableSnapshot, instructions); + } + + /** + * Return {@link TableDefinition table definition} for a given Iceberg table and snapshot id, with optional + * instructions for customizations while reading. + * + * @param tableIdentifier The identifier of the table to load + * @param tableSnapshot The snapshot to load + * @param instructions The instructions for customizations while reading + * @return The table definition + */ + public TableDefinition getTableDefinition( + @NotNull final TableIdentifier tableIdentifier, + @Nullable final Snapshot tableSnapshot, + @Nullable final IcebergInstructions instructions) { + // Load the table from the catalog. + return getTableDefinitionInternal(tableIdentifier, tableSnapshot, instructions); + } + + /** + * Return {@link Table table} containing the {@link TableDefinition definition} of a given Iceberg table, with + * optional instructions for customizations while reading. + * + * @param tableIdentifier The identifier of the table to load + * @param instructions The instructions for customizations while reading + * @return The table definition as a Deephaven table + */ + public Table getTableDefinitionTable( + @NotNull final String tableIdentifier, + @Nullable final IcebergInstructions instructions) { + final TableIdentifier tableId = TableIdentifier.parse(tableIdentifier); + return getTableDefinitionTable(tableId, instructions); + } + + /** + * Return {@link Table table} containing the {@link TableDefinition definition} of a given Iceberg table, with + * optional instructions for customizations while reading. + * + * @param tableIdentifier The identifier of the table to load + * @param instructions The instructions for customizations while reading + * @return The table definition as a Deephaven table + */ + public Table getTableDefinitionTable( + @NotNull final TableIdentifier tableIdentifier, + @Nullable final IcebergInstructions instructions) { + final TableDefinition definition = getTableDefinition(tableIdentifier, instructions); + return TableTools.metaTable(definition); + } + + /** + * Return {@link Table table} containing the {@link TableDefinition definition} of a given Iceberg table and + * snapshot id, with optional instructions for customizations while reading. + * + * @param tableIdentifier The identifier of the table to load + * @param snapshotId The identifier of the snapshot to load + * @param instructions The instructions for customizations while reading + * @return The table definition as a Deephaven table + */ + public Table getTableDefinitionTable( + @NotNull final String tableIdentifier, + final long snapshotId, + @Nullable final IcebergInstructions instructions) { + final TableIdentifier tableId = TableIdentifier.parse(tableIdentifier); + + // Find the snapshot with the given snapshot id + final Snapshot tableSnapshot = getSnapshot(tableId, snapshotId); + if (tableSnapshot == null) { + throw new IllegalArgumentException("Snapshot with id " + snapshotId + " not found"); + } + + return getTableDefinitionTable(tableId, tableSnapshot, instructions); + } + + /** + * Return {@link Table table} containing the {@link TableDefinition definition} of a given Iceberg table and + * snapshot id, with optional instructions for customizations while reading. + * + * @param tableIdentifier The identifier of the table to load + * @param tableSnapshot The snapshot to load + * @param instructions The instructions for customizations while reading + * @return The table definition as a Deephaven table + */ + public Table getTableDefinitionTable( + @NotNull final TableIdentifier tableIdentifier, + @Nullable final Snapshot tableSnapshot, + @Nullable final IcebergInstructions instructions) { + final TableDefinition definition = getTableDefinition(tableIdentifier, tableSnapshot, instructions); + return TableTools.metaTable(definition); + } + + /** + * Internal method to create a {@link TableDefinition} from the table schema, snapshot and user instructions. + */ + private TableDefinition getTableDefinitionInternal( + @NotNull final TableIdentifier tableIdentifier, + @Nullable final Snapshot tableSnapshot, + @Nullable final IcebergInstructions instructions) { + final org.apache.iceberg.Table table = catalog.loadTable(tableIdentifier); + if (table == null) { + throw new IllegalArgumentException("Table not found: " + tableIdentifier); + } + + final Snapshot snapshot = tableSnapshot != null ? tableSnapshot : table.currentSnapshot(); + final Schema schema = snapshot != null ? table.schemas().get(snapshot.schemaId()) : table.schema(); + + final IcebergInstructions userInstructions = instructions == null ? IcebergInstructions.DEFAULT : instructions; + + return fromSchema(schema, + table.spec(), + userInstructions.tableDefinition().orElse(null), + getRenameColumnMap(table, schema, userInstructions)); + } + /** * Read the latest static snapshot of an Iceberg table from the Iceberg catalog. * @@ -393,13 +661,11 @@ public Table readTable( @NotNull final TableIdentifier tableIdentifier, final long tableSnapshotId, @Nullable final IcebergInstructions instructions) { - // Find the snapshot with the given snapshot id - final Snapshot tableSnapshot = listSnapshots(tableIdentifier).stream() - .filter(snapshot -> snapshot.snapshotId() == tableSnapshotId) - .findFirst() - .orElseThrow(() -> new IllegalArgumentException("Snapshot with id " + tableSnapshotId + " not found")); - + final Snapshot tableSnapshot = getSnapshot(tableIdentifier, tableSnapshotId); + if (tableSnapshot == null) { + throw new IllegalArgumentException("Snapshot with id " + tableSnapshotId + " not found"); + } return readTableInternal(tableIdentifier, tableSnapshot, instructions); } @@ -439,9 +705,11 @@ private Table readTableInternal( @NotNull final TableIdentifier tableIdentifier, @Nullable final Snapshot tableSnapshot, @Nullable final IcebergInstructions instructions) { - // Load the table from the catalog. final org.apache.iceberg.Table table = catalog.loadTable(tableIdentifier); + if (table == null) { + throw new IllegalArgumentException("Table not found: " + tableIdentifier); + } // Do we want the latest or a specific snapshot? final Snapshot snapshot = tableSnapshot != null ? tableSnapshot : table.currentSnapshot(); @@ -456,66 +724,12 @@ private Table readTableInternal( // Get the user supplied table definition. final TableDefinition userTableDef = userInstructions.tableDefinition().orElse(null); - final Set takenNames = new HashSet<>(); - // Map all the column names in the schema to their legalized names. - final Map legalizedColumnRenames = new HashMap<>(); - - // Validate user-supplied names meet legalization requirements - for (final Map.Entry entry : userInstructions.columnRenames().entrySet()) { - final String destinationName = entry.getValue(); - if (!NameValidator.isValidColumnName(destinationName)) { - throw new TableDataException( - String.format("%s:%d - invalid column name provided (%s)", table, snapshot.snapshotId(), - destinationName)); - } - // Add these renames to the legalized list. - legalizedColumnRenames.put(entry.getKey(), destinationName); - takenNames.add(destinationName); - } - - for (final Types.NestedField field : schema.columns()) { - final String name = field.name(); - // Do we already have a valid rename for this column from the user or a partitioned column? - if (!legalizedColumnRenames.containsKey(name)) { - final String legalizedName = - NameValidator.legalizeColumnName(name, s -> s.replace(" ", "_"), takenNames); - if (!legalizedName.equals(name)) { - legalizedColumnRenames.put(name, legalizedName); - takenNames.add(legalizedName); - } - } - } + final Map legalizedColumnRenames = getRenameColumnMap(table, schema, userInstructions); // Get the table definition from the schema (potentially limited by the user supplied table definition and // applying column renames). - final TableDefinition icebergTableDef = fromSchema(schema, partitionSpec, userTableDef, legalizedColumnRenames); - - // If the user supplied a table definition, make sure it's fully compatible. - final TableDefinition tableDef; - if (userTableDef != null) { - tableDef = icebergTableDef.checkCompatibility(userTableDef); - - // Ensure that the user has not marked non-partitioned columns as partitioned. - final Set userPartitionColumns = userTableDef.getPartitioningColumns().stream() - .map(ColumnDefinition::getName) - .collect(Collectors.toSet()); - final Set partitionColumns = tableDef.getPartitioningColumns().stream() - .map(ColumnDefinition::getName) - .collect(Collectors.toSet()); - - // The working partitioning column set must be a super-set of the user-supplied set. - if (!partitionColumns.containsAll(userPartitionColumns)) { - final Set invalidColumns = new HashSet<>(userPartitionColumns); - invalidColumns.removeAll(partitionColumns); - - throw new TableDataException("The following columns are not partitioned in the Iceberg table: " + - invalidColumns); - } - } else { - // Use the snapshot schema as the table definition. - tableDef = icebergTableDef; - } + final TableDefinition tableDef = fromSchema(schema, partitionSpec, userTableDef, legalizedColumnRenames); final String description; final TableLocationKeyFinder keyFinder; From 501f3fb83a051a4c4c296043c85c39a643e11a96 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 6 Aug 2024 21:13:57 +0000 Subject: [PATCH 06/43] feat: Update web version 0.88.0 (#5907) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Release notes https://github.com/deephaven/web-client-ui/releases/tag/v0.88.0 ## [0.88.0](https://github.com/deephaven/web-client-ui/compare/v0.87.0...v0.88.0) (2024-08-06) ### Features * Allow ref callback for Chart and ChartPanel ([#2174](https://github.com/deephaven/web-client-ui/issues/2174)) ([56d1fa9](https://github.com/deephaven/web-client-ui/commit/56d1fa9ba00d319794d686365be245c757ad2178)) * Export Internationalized Date Types for DatePicker ([#2170](https://github.com/deephaven/web-client-ui/issues/2170)) ([7fb4f64](https://github.com/deephaven/web-client-ui/commit/7fb4f64bf9822c95faa961c53f480da4ea9e0401)) ### Bug Fixes * Check for the getBaseTable API before calling it ([#2168](https://github.com/deephaven/web-client-ui/issues/2168)) ([a5cb947](https://github.com/deephaven/web-client-ui/commit/a5cb94745797e5568826c26ed0cf8e60131326d2)) * DH-17454: Combine modal classes instead of replacing ([#2173](https://github.com/deephaven/web-client-ui/issues/2173)) ([a2d5d5f](https://github.com/deephaven/web-client-ui/commit/a2d5d5f9a63ab2d7ec37b95c716f4bf1ae03b9b8)) * DH-17454: Wrap Modal in SpectrumThemeProvider ([#2169](https://github.com/deephaven/web-client-ui/issues/2169)) ([0058b18](https://github.com/deephaven/web-client-ui/commit/0058b1801c1bfb21e3961a31a8a1c7a27443abb4)) * Input Tables cannot paste more rows than number of visible rows ([#2152](https://github.com/deephaven/web-client-ui/issues/2152)) ([1d51585](https://github.com/deephaven/web-client-ui/commit/1d515850af5affe2ec3ce116cc526097f1c4f389)) * Propogation of Scroll Events when Scroll Position is at a Boundary ([#2166](https://github.com/deephaven/web-client-ui/issues/2166)) ([cb72d29](https://github.com/deephaven/web-client-ui/commit/cb72d294f162a0ca06758692c675b2aeee732a83)) * Restrict officially supported browserlist ([#2159](https://github.com/deephaven/web-client-ui/issues/2159)) ([5b06ecc](https://github.com/deephaven/web-client-ui/commit/5b06eccca1c2dff625bae34e3801940f19e7bb56)) Release notes https://github.com/deephaven/web-client-ui/releases/tag/v0.87.0 ## [0.87.0](https://github.com/deephaven/web-client-ui/compare/v0.86.1...v0.87.0) (2024-07-22) ### ⚠ BREAKING CHANGES * Fix any try / catch blocks that return non-awaited Promises ### Features * Adjustable grid density ([#2151](https://github.com/deephaven/web-client-ui/issues/2151)) ([6bb11f9](https://github.com/deephaven/web-client-ui/commit/6bb11f9a527310801041011be3be78cae07a8bc8)) ### Bug Fixes * Enabled @typescript-eslint/return-await rule and fixed offending code ([#2157](https://github.com/deephaven/web-client-ui/issues/2157)) ([7875d03](https://github.com/deephaven/web-client-ui/commit/7875d03fdbe2dfa1c051c6dfa42cc1d9e7469afb)) Co-authored-by: deephaven-internal <66694643+deephaven-internal@users.noreply.github.com> --- web/client-ui/Dockerfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/web/client-ui/Dockerfile b/web/client-ui/Dockerfile index ea369df0769..85f28a22994 100644 --- a/web/client-ui/Dockerfile +++ b/web/client-ui/Dockerfile @@ -2,10 +2,10 @@ FROM deephaven/node:local-build WORKDIR /usr/src/app # Most of the time, these versions are the same, except in cases where a patch only affects one of the packages -ARG WEB_VERSION=0.86.1 -ARG GRID_VERSION=0.86.0 -ARG CHART_VERSION=0.86.0 -ARG WIDGET_VERSION=0.86.1 +ARG WEB_VERSION=0.88.0 +ARG GRID_VERSION=0.88.0 +ARG CHART_VERSION=0.88.0 +ARG WIDGET_VERSION=0.88.0 # Pull in the published code-studio package from npmjs and extract is RUN set -eux; \ From cfcb494439e85b69e43f218b464acb1bb399ed7f Mon Sep 17 00:00:00 2001 From: Jianfeng Mao <4297243+jmao-denver@users.noreply.github.com> Date: Tue, 6 Aug 2024 17:44:35 -0600 Subject: [PATCH 07/43] fix: make table reader/depending modules work in Py 3.8/3.9 (#5913) Fixes #5912 --- py/server/deephaven/_table_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py/server/deephaven/_table_reader.py b/py/server/deephaven/_table_reader.py index bdb5de8af79..4c9265745c5 100644 --- a/py/server/deephaven/_table_reader.py +++ b/py/server/deephaven/_table_reader.py @@ -72,7 +72,7 @@ def _table_reader_all(table: Table, cols: Optional[Union[str, Sequence[str]]] = def _table_reader_all_dict(table: Table, cols: Optional[Union[str, Sequence[str]]] = None, *, row_set: jpy.JType, - prev: bool = False, to_numpy: bool = True) -> Dict[str, Union[np.ndarray | jpy.JType]]: + prev: bool = False, to_numpy: bool = True) -> Dict[str, Union[np.ndarray, jpy.JType]]: """ Reads all the rows in the given row set of a table into a dictionary. The dictionary is a map of column names to numpy arrays or Java arrays. From bf5bfef85ec309e03383949839255f9964be99a0 Mon Sep 17 00:00:00 2001 From: Devin Smith Date: Wed, 7 Aug 2024 12:09:10 -0700 Subject: [PATCH 08/43] fix: update to setup-gradle (#5905) This upgrades from the no longer maintained action `burrunan/gradle-cache-action` to the gradle maintained action `gradle/actions/setup-gradle`. There is a different usage pattern; with `setup-gradle`, it is invoked first during setup as opposed to at the command site. We were also using job-ids with `burrunan/gradle-cache-action` to implicitly control cache prefixes; it is not clear if we actually need to separate caches, and so (for now), the default caching behavior of `gradle/actions/setup-gradle` is used, https://github.com/gradle/actions/blob/v3.5.0/docs/setup-gradle.md#caching-build-state-between-jobs. By default, the action will read from cache, but will only write to the cache for jobs on the `main` branch. Fixes #5161 --- .github/workflows/build-ci.yml | 18 ++++---- .github/workflows/check-ci.yml | 9 ++-- .github/workflows/docs-ci.yml | 55 +++++++++++------------ .github/workflows/nightly-check-ci.yml | 9 ++-- .github/workflows/nightly-image-check.yml | 9 ++-- .github/workflows/publish-ci.yml | 17 +++---- .github/workflows/quick-ci.yml | 11 +++-- .github/workflows/tag-base-images.yml | 9 ++-- 8 files changed, 60 insertions(+), 77 deletions(-) diff --git a/.github/workflows/build-ci.yml b/.github/workflows/build-ci.yml index a906ca8c8e2..11c58f512eb 100644 --- a/.github/workflows/build-ci.yml +++ b/.github/workflows/build-ci.yml @@ -29,6 +29,9 @@ jobs: distribution: 'temurin' java-version: '17' + - name: Setup Gradle + uses: gradle/actions/setup-gradle@v3 + - name: Set JAVA_HOME run: echo "JAVA_HOME=${{ steps.setup-java-11.outputs.path }}" >> $GITHUB_ENV @@ -88,11 +91,7 @@ jobs: password: ${{ secrets.GITHUB_TOKEN }} - name: Create Dockerfile and context - uses: burrunan/gradle-cache-action@v1 - with: - job-id: build-server - arguments: --scan outputVersion docker-server-slim:prepareDocker docker-server:prepareDockerAll - gradle-version: wrapper + run: ./gradlew --scan outputVersion docker-server-slim:prepareDocker docker-server:prepareDockerAll - name: Get Deephaven Version id: deephaven_version @@ -158,6 +157,9 @@ jobs: distribution: 'temurin' java-version: '11' + - name: Setup Gradle + uses: gradle/actions/setup-gradle@v3 + - name: Set JAVA_HOME run: echo "JAVA_HOME=${{ steps.setup-java-11.outputs.path }}" >> $GITHUB_ENV @@ -182,11 +184,7 @@ jobs: password: ${{ secrets.GITHUB_TOKEN }} - name: Create Dockerfile and context - uses: burrunan/gradle-cache-action@v1 - with: - job-id: build-web - arguments: --scan outputVersion docker-web-plugin-packager:prepareDocker - gradle-version: wrapper + run: ./gradlew --scan outputVersion docker-web-plugin-packager:prepareDocker - name: Get Deephaven Version id: deephaven_version diff --git a/.github/workflows/check-ci.yml b/.github/workflows/check-ci.yml index 56485ce65bb..6a2ca403494 100644 --- a/.github/workflows/check-ci.yml +++ b/.github/workflows/check-ci.yml @@ -33,6 +33,9 @@ jobs: distribution: 'temurin' java-version: '17' + - name: Setup Gradle + uses: gradle/actions/setup-gradle@v3 + - name: Set JAVA_HOME run: echo "JAVA_HOME=${{ steps.setup-java-11.outputs.path }}" >> $GITHUB_ENV @@ -42,11 +45,7 @@ jobs: cat gradle.properties - name: Check - uses: burrunan/gradle-cache-action@v1 - with: - job-id: checks - arguments: --scan --continue check - gradle-version: wrapper + run: ./gradlew --scan --continue check - name: Upload Test Results uses: actions/upload-artifact@v4 diff --git a/.github/workflows/docs-ci.yml b/.github/workflows/docs-ci.yml index 542e6f92b0a..753c78cf432 100644 --- a/.github/workflows/docs-ci.yml +++ b/.github/workflows/docs-ci.yml @@ -76,6 +76,9 @@ jobs: distribution: 'temurin' java-version: '17' + - name: Setup Gradle + uses: gradle/actions/setup-gradle@v3 + - name: Set JAVA_HOME run: echo "JAVA_HOME=${{ steps.setup-java-11.outputs.path }}" >> $GITHUB_ENV @@ -85,12 +88,8 @@ jobs: cat gradle.properties - name: All Javadoc - uses: burrunan/gradle-cache-action@v1 - with: - job-id: allJavadoc - arguments: --scan outputVersion combined-javadoc:allJavadoc - gradle-version: wrapper - + run: ./gradlew --scan outputVersion combined-javadoc:allJavadoc + - name: Get Deephaven Version id: dhc-version run: echo "version=$(cat build/version)" >> $GITHUB_OUTPUT @@ -139,6 +138,9 @@ jobs: distribution: 'temurin' java-version: '17' + - name: Setup Gradle + uses: gradle/actions/setup-gradle@v3 + - name: Set JAVA_HOME run: echo "JAVA_HOME=${{ steps.setup-java-11.outputs.path }}" >> $GITHUB_ENV @@ -148,12 +150,8 @@ jobs: cat gradle.properties - name: Run typedoc on JS API - uses: burrunan/gradle-cache-action@v1 - with: - job-id: typedoc - arguments: --scan outputVersion :web-client-api:types:typedoc - gradle-version: wrapper - + run: ./gradlew --scan outputVersion :web-client-api:types:typedoc + - name: Get Deephaven Version id: dhc-version run: echo "version=$(cat build/version)" >> $GITHUB_OUTPUT @@ -202,6 +200,9 @@ jobs: distribution: 'temurin' java-version: '17' + - name: Setup Gradle + uses: gradle/actions/setup-gradle@v3 + - name: Set JAVA_HOME run: echo "JAVA_HOME=${{ steps.setup-java-11.outputs.path }}" >> $GITHUB_ENV @@ -211,12 +212,8 @@ jobs: cat gradle.properties - name: Generate Python Docs - uses: burrunan/gradle-cache-action@v1 - with: - job-id: pythonDocs - arguments: --scan outputVersion sphinx:pythonDocs sphinx:pydeephavenDocs - gradle-version: wrapper - + run: ./gradlew --scan outputVersion sphinx:pythonDocs sphinx:pydeephavenDocs + - name: Get Deephaven Version id: dhc-version run: echo "version=$(cat build/version)" >> $GITHUB_OUTPUT @@ -285,6 +282,9 @@ jobs: distribution: 'temurin' java-version: '11' + - name: Setup Gradle + uses: gradle/actions/setup-gradle@v3 + - name: Set JAVA_HOME run: echo "JAVA_HOME=${{ steps.setup-java-11.outputs.path }}" >> $GITHUB_ENV @@ -294,12 +294,8 @@ jobs: cat gradle.properties - name: Generate C++ Docs - uses: burrunan/gradle-cache-action@v1 - with: - job-id: cppDocs - arguments: --scan outputVersion sphinx:cppClientDocs sphinx:cppExamplesDocs - gradle-version: wrapper - + run: ./gradlew --scan outputVersion sphinx:cppClientDocs sphinx:cppExamplesDocs + - name: Get Deephaven Version id: dhc-version run: echo "version=$(cat build/version)" >> $GITHUB_OUTPUT @@ -360,6 +356,9 @@ jobs: distribution: 'temurin' java-version: '11' + - name: Setup Gradle + uses: gradle/actions/setup-gradle@v3 + - name: Set JAVA_HOME run: echo "JAVA_HOME=${{ steps.setup-java-11.outputs.path }}" >> $GITHUB_ENV @@ -369,12 +368,8 @@ jobs: cat gradle.properties - name: Generate R Docs - uses: burrunan/gradle-cache-action@v1 - with: - job-id: rDocs - arguments: --scan outputVersion R:rClientSite - gradle-version: wrapper - + run: ./gradlew --scan outputVersion R:rClientSite + - name: Get Deephaven Version id: dhc-version run: echo "version=$(cat build/version)" >> $GITHUB_OUTPUT diff --git a/.github/workflows/nightly-check-ci.yml b/.github/workflows/nightly-check-ci.yml index c306f5bf42e..fd96f8c4e45 100644 --- a/.github/workflows/nightly-check-ci.yml +++ b/.github/workflows/nightly-check-ci.yml @@ -55,6 +55,9 @@ jobs: distribution: 'temurin' java-version: '22' + - name: Setup Gradle + uses: gradle/actions/setup-gradle@v3 + - name: Set JAVA_HOME run: echo "JAVA_HOME=${{ steps.setup-java-11.outputs.path }}" >> $GITHUB_ENV @@ -64,11 +67,7 @@ jobs: cat gradle.properties - name: Run gradle ${{ matrix.gradle-task }} on java ${{ matrix.test-jvm-version }} - uses: burrunan/gradle-cache-action@v1 - with: - job-id: gradle-run - arguments: --scan --continue --rerun-tasks ${{ matrix.gradle-task }} -PtestRuntimeVersion=${{ matrix.test-jvm-version }} - gradle-version: wrapper + run: ./gradlew --scan --continue --rerun-tasks ${{ matrix.gradle-task }} -PtestRuntimeVersion=${{ matrix.test-jvm-version }} - name: Upload Test Results uses: actions/upload-artifact@v4 diff --git a/.github/workflows/nightly-image-check.yml b/.github/workflows/nightly-image-check.yml index 310dacb144d..505a64ef223 100644 --- a/.github/workflows/nightly-image-check.yml +++ b/.github/workflows/nightly-image-check.yml @@ -20,6 +20,9 @@ jobs: distribution: 'temurin' java-version: '11' + - name: Setup Gradle + uses: gradle/actions/setup-gradle@v3 + - name: Set JAVA_HOME run: echo "JAVA_HOME=${{ steps.setup-java-11.outputs.path }}" >> $GITHUB_ENV @@ -29,11 +32,7 @@ jobs: cat gradle.properties - name: Run gradle - uses: burrunan/gradle-cache-action@v1 - with: - job-id: image-compare - arguments: --continue pullImage compareImage - gradle-version: wrapper + run: ./gradlew --continue pullImage compareImage - name: Notify Slack uses: slackapi/slack-github-action@v1.26.0 diff --git a/.github/workflows/publish-ci.yml b/.github/workflows/publish-ci.yml index 53dc8335ad0..ca171650623 100644 --- a/.github/workflows/publish-ci.yml +++ b/.github/workflows/publish-ci.yml @@ -33,6 +33,9 @@ jobs: distribution: 'temurin' java-version: '17' + - name: Setup Gradle + uses: gradle/actions/setup-gradle@v3 + - name: Set JAVA_HOME run: echo "JAVA_HOME=${{ steps.setup-java-11.outputs.path }}" >> $GITHUB_ENV @@ -51,20 +54,12 @@ jobs: - name: Build all artifacts, publish to Maven Local if: ${{ !startsWith(github.ref, 'refs/heads/release/v') }} - uses: burrunan/gradle-cache-action@v1 - with: - job-id: publish-local - arguments: server-netty-app:build server-jetty-app:build py-server:build py-embedded-server:build py-client:build py-client-ticking:build web-client-api:types:build publishToMavenLocal - gradle-version: wrapper + run: ./gradlew server-netty-app:build server-jetty-app:build py-server:build py-embedded-server:build py-client:build py-client-ticking:build web-client-api:types:build publishToMavenLocal - name: Build all artifacts, publish to Sonatype for staging to Maven Central if: ${{ startsWith(github.ref, 'refs/heads/release/v') }} - uses: burrunan/gradle-cache-action@v1 - with: - job-id: publish - # We need to be explicit here about no parallelism to ensure we don't create disjointed staging repositories. - arguments: --no-parallel server-netty-app:build server-jetty-app:build py-server:build py-embedded-server:build py-client:build py-client-ticking:build web-client-api:types:build publish - gradle-version: wrapper + # We need to be explicit here about no parallelism to ensure we don't create disjointed staging repositories. + run: ./gradlew --no-parallel server-netty-app:build server-jetty-app:build py-server:build py-embedded-server:build py-client:build py-client-ticking:build web-client-api:types:build publish env: ORG_GRADLE_PROJECT_ossrhUsername: ${{ secrets.SONATYPE_USERNAME }} ORG_GRADLE_PROJECT_ossrhPassword: ${{ secrets.SONATYPE_PASSWORD }} diff --git a/.github/workflows/quick-ci.yml b/.github/workflows/quick-ci.yml index 56c51ede52f..2372f2ad55e 100644 --- a/.github/workflows/quick-ci.yml +++ b/.github/workflows/quick-ci.yml @@ -27,6 +27,9 @@ jobs: distribution: 'temurin' java-version: '11' + - name: Setup Gradle + uses: gradle/actions/setup-gradle@v3 + - name: Set JAVA_HOME run: echo "JAVA_HOME=${{ steps.setup-java-11.outputs.path }}" >> $GITHUB_ENV @@ -36,12 +39,8 @@ jobs: cat gradle.properties - name: Quick Task - uses: burrunan/gradle-cache-action@v1 - with: - job-id: quick-task - # Even though quick includes spotlessCheck, we want to make sure it runs first and fails ASAP for quick feedback - arguments: --scan spotlessCheck quick - gradle-version: wrapper + # Even though quick includes spotlessCheck, we want to make sure it runs first and fails ASAP for quick feedback + run: ./gradlew --scan spotlessCheck quick - name: Upload JVM Error Logs uses: actions/upload-artifact@v4 diff --git a/.github/workflows/tag-base-images.yml b/.github/workflows/tag-base-images.yml index 3ce799ed989..a54d06ec916 100644 --- a/.github/workflows/tag-base-images.yml +++ b/.github/workflows/tag-base-images.yml @@ -29,6 +29,9 @@ jobs: distribution: 'temurin' java-version: '17' + - name: Setup Gradle + uses: gradle/actions/setup-gradle@v3 + - name: Setup Crane if: ${{ startsWith(github.ref, 'refs/heads/release/v') }} uses: imjasonh/setup-crane@v0.3 @@ -50,11 +53,7 @@ jobs: password: ${{ secrets.GITHUB_TOKEN }} - name: Create Crane scripts - uses: burrunan/gradle-cache-action@v1 - with: - job-id: crane-scripts - arguments: createCraneTagScript - gradle-version: wrapper + run: ./gradlew createCraneTagScript - name: Tag upstream images if: ${{ startsWith(github.ref, 'refs/heads/release/v') }} From 3bd5587310b73c9e81bb8b53a55c96aa2a34db06 Mon Sep 17 00:00:00 2001 From: Colin Alworth Date: Wed, 7 Aug 2024 14:13:03 -0500 Subject: [PATCH 09/43] refactor: BaseChunkInputStreamGenerator can be ReferenceCounted (#5889) Reduces some duplication of code, by sharing the ReferenceCounted logic for tracking when a CISG type should close its chunks or other managed resources. Partial #188 Co-authored-by: Ryan Caudy --- .../chunk/BaseChunkInputStreamGenerator.java | 34 +++++++++---------- .../VarBinaryChunkInputStreamGenerator.java | 13 +++---- .../VarListChunkInputStreamGenerator.java | 19 ++++------- .../VectorChunkInputStreamGenerator.java | 19 ++++------- 4 files changed, 34 insertions(+), 51 deletions(-) diff --git a/extensions/barrage/src/main/java/io/deephaven/extensions/barrage/chunk/BaseChunkInputStreamGenerator.java b/extensions/barrage/src/main/java/io/deephaven/extensions/barrage/chunk/BaseChunkInputStreamGenerator.java index 5f8c65c374a..f51da87e959 100644 --- a/extensions/barrage/src/main/java/io/deephaven/extensions/barrage/chunk/BaseChunkInputStreamGenerator.java +++ b/extensions/barrage/src/main/java/io/deephaven/extensions/barrage/chunk/BaseChunkInputStreamGenerator.java @@ -3,6 +3,7 @@ // package io.deephaven.extensions.barrage.chunk; +import io.deephaven.chunk.Chunk; import io.deephaven.chunk.attributes.Values; import io.deephaven.chunk.util.pools.PoolableChunk; import io.deephaven.engine.rowset.RowSequence; @@ -10,30 +11,24 @@ import io.deephaven.engine.rowset.RowSet; import io.deephaven.extensions.barrage.util.StreamReaderOptions; import io.deephaven.util.datastructures.LongSizedDataStructure; -import io.deephaven.chunk.Chunk; +import io.deephaven.util.referencecounting.ReferenceCounted; import java.io.IOException; -import java.util.concurrent.atomic.AtomicIntegerFieldUpdater; -public abstract class BaseChunkInputStreamGenerator> implements ChunkInputStreamGenerator { +public abstract class BaseChunkInputStreamGenerator> + extends ReferenceCounted + implements ChunkInputStreamGenerator { + public static final byte[] PADDING_BUFFER = new byte[8]; public static final int REMAINDER_MOD_8_MASK = 0x7; - // Ensure that we clean up chunk only after all copies of the update are released. - private volatile int refCount = 1; - - // Field updater for refCount, so we can avoid creating an {@link java.util.concurrent.atomic.AtomicInteger} for - // each instance. - @SuppressWarnings("rawtypes") - protected static final AtomicIntegerFieldUpdater REFERENCE_COUNT_UPDATER = - AtomicIntegerFieldUpdater.newUpdater(BaseChunkInputStreamGenerator.class, "refCount"); - protected final T chunk; protected final int elementSize; private final long rowOffset; BaseChunkInputStreamGenerator(final T chunk, final int elementSize, final long rowOffset) { + super(1); this.chunk = chunk; this.elementSize = elementSize; this.rowOffset = rowOffset; @@ -51,10 +46,13 @@ public long getLastRowOffset() { @Override public void close() { - if (REFERENCE_COUNT_UPDATER.decrementAndGet(this) == 0) { - if (chunk instanceof PoolableChunk) { - ((PoolableChunk) chunk).close(); - } + decrementReferenceCount(); + } + + @Override + protected void onReferenceCountAtZero() { + if (chunk instanceof PoolableChunk) { + ((PoolableChunk) chunk).close(); } } @@ -87,7 +85,7 @@ abstract class BaseChunkInputStream extends DrainableColumn { this.options = options; this.subset = chunk.size() == 0 ? RowSequenceFactory.EMPTY : subset != null ? subset.copy() : RowSequenceFactory.forRange(0, chunk.size() - 1); - REFERENCE_COUNT_UPDATER.incrementAndGet(BaseChunkInputStreamGenerator.this); + BaseChunkInputStreamGenerator.this.incrementReferenceCount(); // ignore the empty chunk as these are intentionally empty generators that should work for any subset if (chunk.size() > 0 && this.subset.lastRowKey() >= chunk.size()) { throw new IllegalStateException( @@ -97,7 +95,7 @@ abstract class BaseChunkInputStream extends DrainableColumn { @Override public void close() throws IOException { - BaseChunkInputStreamGenerator.this.close(); + BaseChunkInputStreamGenerator.this.decrementReferenceCount(); subset.close(); } diff --git a/extensions/barrage/src/main/java/io/deephaven/extensions/barrage/chunk/VarBinaryChunkInputStreamGenerator.java b/extensions/barrage/src/main/java/io/deephaven/extensions/barrage/chunk/VarBinaryChunkInputStreamGenerator.java index 34a460c31d0..b6c85018fb6 100644 --- a/extensions/barrage/src/main/java/io/deephaven/extensions/barrage/chunk/VarBinaryChunkInputStreamGenerator.java +++ b/extensions/barrage/src/main/java/io/deephaven/extensions/barrage/chunk/VarBinaryChunkInputStreamGenerator.java @@ -13,7 +13,6 @@ import io.deephaven.extensions.barrage.util.StreamReaderOptions; import io.deephaven.util.SafeCloseable; import io.deephaven.util.datastructures.LongSizedDataStructure; -import io.deephaven.chunk.util.pools.PoolableChunk; import io.deephaven.engine.rowset.RowSet; import io.deephaven.util.mutable.MutableInt; import io.deephaven.util.mutable.MutableLong; @@ -214,14 +213,10 @@ private synchronized void computePayload() throws IOException { } @Override - public void close() { - if (REFERENCE_COUNT_UPDATER.decrementAndGet(this) == 0) { - if (chunk instanceof PoolableChunk) { - ((PoolableChunk) chunk).close(); - } - if (byteStorage != null) { - byteStorage.close(); - } + protected void onReferenceCountAtZero() { + super.onReferenceCountAtZero(); + if (byteStorage != null) { + byteStorage.close(); } } diff --git a/extensions/barrage/src/main/java/io/deephaven/extensions/barrage/chunk/VarListChunkInputStreamGenerator.java b/extensions/barrage/src/main/java/io/deephaven/extensions/barrage/chunk/VarListChunkInputStreamGenerator.java index 20dbea517c0..8142c8d7d22 100644 --- a/extensions/barrage/src/main/java/io/deephaven/extensions/barrage/chunk/VarListChunkInputStreamGenerator.java +++ b/extensions/barrage/src/main/java/io/deephaven/extensions/barrage/chunk/VarListChunkInputStreamGenerator.java @@ -13,7 +13,6 @@ import io.deephaven.chunk.ObjectChunk; import io.deephaven.chunk.WritableChunk; import io.deephaven.chunk.WritableIntChunk; -import io.deephaven.chunk.util.pools.PoolableChunk; import io.deephaven.engine.rowset.RowSet; import io.deephaven.engine.rowset.RowSetBuilderSequential; import io.deephaven.engine.rowset.RowSetFactory; @@ -60,17 +59,13 @@ private synchronized void computePayload() { } @Override - public void close() { - if (REFERENCE_COUNT_UPDATER.decrementAndGet(this) == 0) { - if (chunk instanceof PoolableChunk) { - ((PoolableChunk) chunk).close(); - } - if (offsets != null) { - offsets.close(); - } - if (innerGenerator != null) { - innerGenerator.close(); - } + protected void onReferenceCountAtZero() { + super.onReferenceCountAtZero(); + if (offsets != null) { + offsets.close(); + } + if (innerGenerator != null) { + innerGenerator.close(); } } diff --git a/extensions/barrage/src/main/java/io/deephaven/extensions/barrage/chunk/VectorChunkInputStreamGenerator.java b/extensions/barrage/src/main/java/io/deephaven/extensions/barrage/chunk/VectorChunkInputStreamGenerator.java index a2f9378ae4c..6b15bb348a4 100644 --- a/extensions/barrage/src/main/java/io/deephaven/extensions/barrage/chunk/VectorChunkInputStreamGenerator.java +++ b/extensions/barrage/src/main/java/io/deephaven/extensions/barrage/chunk/VectorChunkInputStreamGenerator.java @@ -11,7 +11,6 @@ import io.deephaven.chunk.WritableIntChunk; import io.deephaven.chunk.attributes.ChunkPositions; import io.deephaven.chunk.attributes.Values; -import io.deephaven.chunk.util.pools.PoolableChunk; import io.deephaven.engine.rowset.RowSet; import io.deephaven.engine.rowset.RowSetBuilderSequential; import io.deephaven.engine.rowset.RowSetFactory; @@ -60,17 +59,13 @@ private synchronized void computePayload() { } @Override - public void close() { - if (REFERENCE_COUNT_UPDATER.decrementAndGet(this) == 0) { - if (chunk instanceof PoolableChunk) { - ((PoolableChunk) chunk).close(); - } - if (offsets != null) { - offsets.close(); - } - if (innerGenerator != null) { - innerGenerator.close(); - } + protected void onReferenceCountAtZero() { + super.onReferenceCountAtZero(); + if (offsets != null) { + offsets.close(); + } + if (innerGenerator != null) { + innerGenerator.close(); } } From 9bdbac067b06ab244a2ee2854937a8d8e494fbdc Mon Sep 17 00:00:00 2001 From: Devin Smith Date: Wed, 7 Aug 2024 15:17:29 -0700 Subject: [PATCH 10/43] feat: detect piped union (#5918) This allows vermin to correctly warn us if we are using the piped union syntax. Useful to prevent issues like https://github.com/deephaven/deephaven-core/issues/5912 in the future. In addition, self-documenting fstrings will be reported. Both of these features are labelled as "unstable"; if vermin ever produces a false positive, we can re-consider the inclusion of these features. For example, it would have produced the following error: ``` Detecting python files.. Analyzing 247 files using 24 processes.. !2, 3.10 /home/devin/dev/deephaven/deephaven-core/py/server/deephaven/_table_reader.py union types as `X | Y` require !2, 3.10 Minimum required versions: 3.10 Incompatible versions: 2 Target versions not met: 3.8 ``` Here is the documentation for these feature flags: ``` [--feature ] ... Some features are disabled by default due to being unstable: fstring-self-doc - [Unstable] Detect self-documenting fstrings. Can in some cases wrongly report fstrings as self-documenting. union-types - [Unstable] Detect union types `X | Y`. Can in some cases wrongly report union types due to having to employ heuristics. ``` --- .github/workflows/quick-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/quick-ci.yml b/.github/workflows/quick-ci.yml index 2372f2ad55e..a3eb5dca43c 100644 --- a/.github/workflows/quick-ci.yml +++ b/.github/workflows/quick-ci.yml @@ -63,4 +63,4 @@ jobs: run: pip install vermin==1.6.0 - name: Verify minimum version support - run: vermin -t=3.8 --no-tips --eval-annotations --violations py/server/deephaven py/client py/client-ticking py/embedded-server + run: vermin -t=3.8 --no-tips --eval-annotations --violations --feature fstring-self-doc --feature union-types py/server/deephaven py/client py/client-ticking py/embedded-server From 9d8da504f596091588c0340bc376229f6aec9b5a Mon Sep 17 00:00:00 2001 From: elijahpetty <128415452+elijahpetty@users.noreply.github.com> Date: Fri, 9 Aug 2024 13:44:27 -0500 Subject: [PATCH 11/43] fix: `y_twin` pydoc (#5910) This fixes an error in the pydocs where `x_twin` and `y_twin` have the same descriptions, when they should be opposite. --- .../main/java/io/deephaven/plot/util/GeneratePyV2FigureAPI.java | 2 +- py/server/deephaven/plot/figure.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Generators/src/main/java/io/deephaven/plot/util/GeneratePyV2FigureAPI.java b/Generators/src/main/java/io/deephaven/plot/util/GeneratePyV2FigureAPI.java index bcf065d55e5..93db0505dc5 100644 --- a/Generators/src/main/java/io/deephaven/plot/util/GeneratePyV2FigureAPI.java +++ b/Generators/src/main/java/io/deephaven/plot/util/GeneratePyV2FigureAPI.java @@ -1280,7 +1280,7 @@ private static List getPyFuncs() { rst.add(new PyFunc("y_ticks_minor", SEQUENTIAL, new String[] {"yMinorTicks", "yMinorTicksVisible"}, null, "Updates the configuration for minor ticks of the y-Axis.")); rst.add(new PyFunc("y_twin", SINGLETON, new String[] {"twinY"}, null, - "Creates a new Axes which shares the y-Axis with the current Axes. For example, this is used for creating plots with a common x-axis but two different y-axes.")); + "Creates a new Axes which shares the y-Axis with the current Axes. For example, this is used for creating plots with a common y-axis but two different x-axes.")); rst.add(new PyFunc("series", SEQUENTIAL, new String[] {"series", "group", "seriesColor", "toolTipPattern", "xToolTipPattern", "yToolTipPattern", "errorBarColor", "gradientVisible", "seriesNamingFunction"}, null, "Gets a specific data series and updates the data series's configurations.")); diff --git a/py/server/deephaven/plot/figure.py b/py/server/deephaven/plot/figure.py index fdd0a422a69..0caf9017c84 100644 --- a/py/server/deephaven/plot/figure.py +++ b/py/server/deephaven/plot/figure.py @@ -2757,7 +2757,7 @@ def y_twin( self, name: str = None, ) -> Figure: - """Creates a new Axes which shares the y-Axis with the current Axes. For example, this is used for creating plots with a common x-axis but two different y-axes. + """Creates a new Axes which shares the y-Axis with the current Axes. For example, this is used for creating plots with a common y-axis but two different x-axes. Args: name (str): name From f3fb2087befa003673f04f5dd6f06c7d0ac9972f Mon Sep 17 00:00:00 2001 From: Alex Peters <80283343+alexpeters1208@users.noreply.github.com> Date: Fri, 9 Aug 2024 14:07:38 -0500 Subject: [PATCH 12/43] fix: update R DESCRIPTION file to include URL (#5919) Add the URL for Deephaven's RDoc to the `DESCRIPTION` file of the `rdeephaven` package. This change is newly required by `pkgdown` 2.1.0. https://github.com/deephaven/deephaven-core/pull/5729 provided a temporary fix for this break, and this PR also reverts that temporary fix. --- R/build.gradle | 5 +---- R/rdeephaven/DESCRIPTION | 1 + 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/R/build.gradle b/R/build.gradle index 68d1b704599..85583260473 100644 --- a/R/build.gradle +++ b/R/build.gradle @@ -179,10 +179,7 @@ def rClientSite = Docker.registerDockerTask(project, 'rClientSite') { runCommand("mkdir -p ${prefix}/src/rdeephaven/docs") runCommand('''echo "status = tryCatch(" \ " {" \ - " install.packages('devtools'); " \ - " require(devtools); " \ - " remove.packages('pkgdown'); " \ - " devtools::install_version('pkgdown', version='2.0.0', repos='https://cran.r-project.org'); " \ + " install.packages('pkgdown', repos='https://cran.r-project.org'); " \ " 0" \ " }," \ " error=function(e) 1," \ diff --git a/R/rdeephaven/DESCRIPTION b/R/rdeephaven/DESCRIPTION index 5a9a85d0dd0..40d96753586 100644 --- a/R/rdeephaven/DESCRIPTION +++ b/R/rdeephaven/DESCRIPTION @@ -5,6 +5,7 @@ Version: 0.36.0 Date: 2023-05-12 Author: Deephaven Data Labs Maintainer: Alex Peters +URL: https://deephaven.io/core/client-api/r/ Description: The `rdeephaven` package provides an R API for communicating with the Deephaven server and working with Deephaven tables. In this release, we support connecting to the Deephaven server with three authentication methods including anonymous, username/password, and general key/value authentication. Once the connection has been established, we provide the tools to From d26e1b251f10a5f3b518b255dcc1a74379114f95 Mon Sep 17 00:00:00 2001 From: Jianfeng Mao <4297243+jmao-denver@users.noreply.github.com> Date: Fri, 9 Aug 2024 22:22:50 -0600 Subject: [PATCH 13/43] feat(table): add Table.diff method (#5814) Fixes #5756 --------- Co-authored-by: Chip Kent <5250374+chipkent@users.noreply.github.com> --- py/server/deephaven/table.py | 68 ++++++++++++++++++++++++++++++++++- py/server/tests/test_table.py | 61 +++++++++++++++++++++++++++++-- py/server/tests/testbase.py | 7 ++-- 3 files changed, 128 insertions(+), 8 deletions(-) diff --git a/py/server/deephaven/table.py b/py/server/deephaven/table.py index 72c496b8422..26557db18a1 100644 --- a/py/server/deephaven/table.py +++ b/py/server/deephaven/table.py @@ -11,7 +11,7 @@ import inspect from enum import Enum from enum import auto -from typing import Any, Optional, Callable, Dict, Generator, Tuple +from typing import Any, Optional, Callable, Dict, Generator, Tuple, Literal from typing import Sequence, List, Union, Protocol import jpy @@ -43,6 +43,8 @@ _JSearchDisplayMode = jpy.get_type("io.deephaven.engine.util.LayoutHintBuilder$SearchDisplayModes") _JSnapshotWhenOptions = jpy.get_type("io.deephaven.api.snapshot.SnapshotWhenOptions") _JBlinkTableTools = jpy.get_type("io.deephaven.engine.table.impl.BlinkTableTools") +_JDiffItems = jpy.get_type("io.deephaven.engine.util.TableDiff$DiffItems") +_JEnumSet = jpy.get_type("java.util.EnumSet") # PartitionedTable _JPartitionedTable = jpy.get_type("io.deephaven.engine.table.PartitionedTable") @@ -3765,3 +3767,67 @@ def multi_join(input: Union[Table, Sequence[Table], MultiJoinInput, Sequence[Mul table() method. """ return MultiJoinTable(input, on) + + +# region utility functions + +def table_diff(t1: Table, t2: Table, max_diffs: int = 1, floating_comparison: Literal['exact', 'absolute', 'relative'] = 'exact', + ignore_column_order: bool = False) -> str: + """Returns the differences between this table and the provided table as a string. If the two tables are the same, + an empty string is returned. The differences are returned in a human-readable format. + + This method starts by comparing the table sizes, and then the schema of the two tables, such as the number of + columns, column names, column types, column orders. If the schemas are different, the comparison stops and + the differences are returned. If the schemas are the same, the method proceeds to compare the data in the + tables. The method compares the data in the tables column by column (not row by row) and only records the first + difference found in each column. + + Note, inexact comparison of floating numbers may sometimes be desirable due to their inherent imprecision. + When that is the case, the floating_comparison should be set to either 'absolute' or 'relative'. When it is set + to 'absolute', the absolute value of the difference between two floating numbers is used to compare against a + threshold. The threshold is set to 0.0001 for Doubles and 0.005 for Floats. Only differences that are greater + than the threshold are recorded. When floating_comparison is set to 'relative', the relative difference between + two floating numbers is used to compare against the threshold. The relative difference is calculated as the absolute + difference divided by the smaller absolute value between the two numbers. + + Args: + t1 (Table): the table to compare + t2 (Table): the table to compare against + max_diffs (int): the maximum number of differences to return, default is 1 + floating_comparison (Literal['exact', 'absolute', 'relative']): the type of comparison to use for floating numbers, + default is 'exact' + ignore_column_order (bool): whether columns that exist in both tables but in different orders are + treated as differences. False indicates that column order matters (default), and True indicates that + column order does not matter. + + Returns: + string + + Raises: + DHError + """ + try: + diff_items = [] + if max_diffs < 1: + raise ValueError("max_diffs must be greater than 0.") + + if floating_comparison not in ['exact', 'absolute', 'relative']: + raise ValueError("floating_comparison must be one of 'exact', 'absolute', or 'relative'.") + + if floating_comparison != 'exact': + diff_items.append(_JDiffItems.DoublesExact) + if floating_comparison == 'relative': + diff_items.append(_JDiffItems.DoubleFraction) + if ignore_column_order: + diff_items.append(_JDiffItems.ColumnsOrder) + + with auto_locking_ctx(t1, t2): + if diff_items: + j_diff_items = _JEnumSet.of(*diff_items) + return _JTableTools.diff(t1.j_table, t2.j_table, max_diffs, j_diff_items) + else: + return _JTableTools.diff(t1.j_table, t2.j_table, max_diffs) + except Exception as e: + raise DHError(e, "table diff failed") from e + +# endregion diff --git a/py/server/tests/test_table.py b/py/server/tests/test_table.py index a0c4e751827..6b7ecf2168c 100644 --- a/py/server/tests/test_table.py +++ b/py/server/tests/test_table.py @@ -14,8 +14,7 @@ from deephaven.html import to_html from deephaven.jcompat import j_hashmap from deephaven.pandas import to_pandas -from deephaven.stream.table_publisher import table_publisher -from deephaven.table import Table, SearchDisplayMode +from deephaven.table import Table, SearchDisplayMode, table_diff from tests.testbase import BaseTestCase, table_equals @@ -1124,6 +1123,64 @@ def test_arg_validation(self): t.partition_by("A", "B") self.assertIn("drop_keys must be", str(cm.exception)) + def test_table_diff(self): + with self.subTest("diff"): + t1 = empty_table(10).update(["A = i", "B = i", "C = i"]) + t2 = empty_table(10).update(["A = i", "B = i % 2 == 0? i: i + 1", "C = i % 2 == 0? i + 1: i"]) + d = table_diff(t1, t2, max_diffs=10).split("\n") + self.assertEqual(len(d), 3) + self.assertIn("row 1", d[0]) + self.assertIn("row 0", d[1]) + + d = table_diff(t1, t2).split("\n") + self.assertEqual(len(d), 2) + + with self.subTest("diff - ignore column order"): + t1 = empty_table(10).update(["A = i", "B = i + 1"]) + t2 = empty_table(10).update(["B = i + 1", "A = i"]) + d = table_diff(t1, t2, max_diffs=10).split("\n") + self.assertEqual(len(d), 3) + + t1 = empty_table(10).update(["A = i", "B = i"]) + t2 = empty_table(10).update(["B = i", "A = i"]) + d = table_diff(t1, t2, max_diffs=10, ignore_column_order=True) + self.assertEqual(d, "") + + with self.subTest("diff - floating_comparison = 'absolute'-double"): + t1 = empty_table(10).update(["A = i", "B = i + 1.0"]) + t2 = empty_table(10).update(["A = i", "B = i + 1.00001"]) + d = table_diff(t1, t2, max_diffs=10, floating_comparison='exact').split("\n") + self.assertEqual(len(d), 2) + + t1 = empty_table(10).update(["A = i", "B = i + 1.0"]) + t2 = empty_table(10).update(["A = i", "B = i + 1.00001"]) + d = table_diff(t1, t2, max_diffs=10, floating_comparison='absolute') + self.assertEqual(d, "") + + with self.subTest("diff - floating_comparison = 'absolute'-float"): + t1 = empty_table(10).update(["A = i", "B = (float)(i + 1.0)"]) + t2 = empty_table(10).update(["A = i", "B = (float)(i + 1.005)"]) + d = table_diff(t1, t2, max_diffs=10, floating_comparison='exact').split("\n") + self.assertEqual(len(d), 2) + + t1 = empty_table(10).update(["A = i", "B = (float)(i + 1.0)"]) + # 1.005 would cause the difference to be greater than 0.005, something like 0.00500001144 + t2 = empty_table(10).update(["A = i", "B = (float)(i + 1.004999)"]) + d = table_diff(t1, t2, max_diffs=10, floating_comparison='absolute') + self.assertEqual(d, "") + + with self.subTest("diff - floating_comparison='relative'-double"): + t1 = empty_table(10).update(["A = i", "B = i + 1.0"]) + t2 = empty_table(10).update(["A = i", "B = i + 1.00001"]) + d = table_diff(t1, t2, max_diffs=10, floating_comparison='relative') + self.assertEqual(d, "") + + with self.subTest("diff - floating_comparison='relative'-float"): + t1 = empty_table(10).update(["A = i", "B = (float)(i + 1.0)"]) + t2 = empty_table(10).update(["A = i", "B = (float)(i + 1.005)"]) + d = table_diff(t1, t2, max_diffs=10, floating_comparison='relative') + self.assertFalse(d) + if __name__ == "__main__": unittest.main() diff --git a/py/server/tests/testbase.py b/py/server/tests/testbase.py index 14049a62be5..3b26a0bc3b7 100644 --- a/py/server/tests/testbase.py +++ b/py/server/tests/testbase.py @@ -10,16 +10,13 @@ from deephaven.liveness_scope import liveness_scope from deephaven.update_graph import exclusive_lock -from deephaven.table import Table, PartitionedTableProxy +from deephaven.table import Table, PartitionedTableProxy, table_diff from test_helper import py_dh_session -_JTableTools = jpy.get_type("io.deephaven.engine.util.TableTools") - - def table_equals(table_a: Table, table_b: Table) -> bool: try: - return False if _JTableTools.diff(table_a.j_table, table_b.j_table, 1) else True + return False if table_diff(table_a, table_b, 1) else True except Exception as e: raise DHError(e, "table equality test failed.") from e From 71646c35b2987e83cf34b53b632498b8aba21332 Mon Sep 17 00:00:00 2001 From: Cristian Ferretti <37232625+jcferretti@users.noreply.github.com> Date: Mon, 12 Aug 2024 11:20:41 -0400 Subject: [PATCH 14/43] fix(r-client): add missing R package dependencies for building R client in its README. (#5925) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need knitr, xml2 and rmarkdown. Otherwise, in a fresh install of R on an ubuntu 22.04 machine while trying to follow the instructions in `R/README.md`: ``` cfs@pifilca 19:28:20 ~/dh/oss1/deephaven-core/R $ R CMD build rdeephaven && R CMD INSTALL --no-multiarch --with-keep.source rdeephaven_*.tar.gz && rm rdeephaven_*.tar.gz * checking for file ‘rdeephaven/DESCRIPTION’ ... OK * preparing ‘rdeephaven’: * checking DESCRIPTION meta-information ... OK * cleaning src Warning in system2(command, args, stdout = NULL, stderr = NULL, ...) : error in running command Error in loadVignetteBuilder(pkgdir, TRUE) : vignette builder 'knitr' not found Execution halted ``` Tried adding just knitr but still got fialures, we still miss `xml2` and `rmarkdown`. So add all 3. ``` cfs@pifilca 19:40:15 ~/dh/oss1/deephaven-core/R $ R CMD build rdeephaven && R CMD INSTALL --no-multiarch --with-keep.source rdeephaven_*.tar.gz && rm rdeephaven_*.tar.gz * checking for file ‘rdeephaven/DESCRIPTION’ ... OK * preparing ‘rdeephaven’: * checking DESCRIPTION meta-information ... OK * cleaning src Warning in system2(command, args, stdout = NULL, stderr = NULL, ...) : error in running command * installing the package to build vignettes * creating vignettes ... ERROR --- re-building ‘agg_by.Rmd’ using rmarkdown Error: processing vignette 'agg_by.Rmd' failed with diagnostics: there is no package called ‘rmarkdown’ --- failed re-building ‘agg_by.Rmd’ --- re-building ‘rdeephaven.Rmd’ using rmarkdown Error: processing vignette 'rdeephaven.Rmd' failed with diagnostics: there is no package called ‘rmarkdown’ --- failed re-building ‘rdeephaven.Rmd’ --- re-building ‘update_by.Rmd’ using rmarkdown Error: processing vignette 'update_by.Rmd' failed with diagnostics: there is no package called ‘rmarkdown’ --- failed re-building ‘update_by.Rmd’ SUMMARY: processing the following files failed: ‘agg_by.Rmd’ ‘rdeephaven.Rmd’ ‘update_by.Rmd’ Error: Vignette re-building failed. Execution halted ``` --- R/rdeephaven/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/rdeephaven/README.md b/R/rdeephaven/README.md index 0af970d132b..5f81c09ad6c 100644 --- a/R/rdeephaven/README.md +++ b/R/rdeephaven/README.md @@ -118,7 +118,7 @@ Currently, the R client is only supported on Ubuntu 20.04 or 22.04 and must be b 4. Start an R console inside the rdeephaven directory. In that console, install the dephaven client dependencies (since we are building from source, dependencies will not be automatically pulled in): ```r - install.packages(c('Rcpp', 'arrow', 'R6', 'dplyr')) + install.packages(c('Rcpp', 'arrow', 'R6', 'dplyr', 'xml2', 'rmarkdown', 'knitr')) ``` Then, exit the R console with `quit()`. From the rdeephaven directory, build and install the R client: ```r From e53433f777ca78a0cf36a169827b190795ac4eaf Mon Sep 17 00:00:00 2001 From: Devin Smith Date: Tue, 13 Aug 2024 14:46:25 -0700 Subject: [PATCH 15/43] feat: Add versioned gRPC user-agent for java client (#5752) This adds a structured, context-dependenent gRPC user-agent for the java client. Here are some examples using the in-development 0.36.0-SNAPSHOT deephaven version: SessionFactoryConfig client: `grpc-java/1.58.0 (deephaven/0.36.0-SNAPSHOT; deephaven-java-client-session)` FlightSessionFactoryConfig client: `grpc-java/1.58.0 (deephaven/0.36.0-SNAPSHOT; flight/13.0.0; deephaven-java-client-flight)` BarrageSessionFactoryConfig client: `grpc-java/1.58.0 (deephaven/0.36.0-SNAPSHOT; flight/13.0.0; barrage/0.6.0; deephaven-java-client-barrage)` native application server client: `grpc-java/1.58.0 (deephaven/0.36.0-SNAPSHOT; flight/13.0.0; barrage/0.6.0; deephaven-server-jetty)` python embedded server client: `grpc-java/1.58.0 (deephaven/0.36.0-SNAPSHOT; flight/13.0.0; barrage/0.6.0; deephaven-server-embedded)` Integrators will be able to inherit the appropriate grpc-java, deephaven, flight, and barrage versions if they choose to, as well as the ability to append their own versions and contextual properties. Fixes #5860 --- .../examples/BarrageClientExampleBase.java | 8 ++ .../impl/BarrageSessionFactoryConfig.java | 38 +++++++- .../deephaven/client/examples/DoPutSpray.java | 4 + .../client/examples/FlightExampleBase.java | 8 ++ .../impl/FlightSessionFactoryConfig.java | 39 ++++++++- .../client/ClientDefaultsModule.java | 24 ----- .../client/examples/SessionExampleBase.java | 10 ++- .../client/grpc/UserAgentUtility.java | 54 ++++++++++++ .../impl/ClientChannelFactoryDefaulter.java | 87 +++++++++++++++++++ .../deephaven/client/impl/ClientConfig.java | 9 +- .../client/impl/SessionFactoryConfig.java | 35 +++++++- .../python/server/EmbeddedServer.java | 33 +++++-- .../CustomClientChannelFactoryModule.java | 26 ++++++ .../server/custom/CustomComponentFactory.java | 4 + .../jetty/CommunityComponentFactory.java | 1 + .../JettyClientChannelFactoryModule.java | 26 ++++++ .../DeprecatedCommunityComponentFactory.java | 18 ++-- .../NettyClientChannelFactoryModule.java | 26 ++++++ .../client/ClientDefaultsModule.java | 32 +++++++ .../runner/CommunityDefaultsModule.java | 3 - .../runner/DeephavenApiConfigModule.java | 18 ---- .../session/ClientChannelFactoryModule.java | 39 +++++++++ .../server/session/SessionFactoryCreator.java | 17 +--- .../server/session/SslConfigModule.java | 30 +++++++ .../runner/DeephavenApiServerTestBase.java | 36 ++++++-- 25 files changed, 541 insertions(+), 84 deletions(-) delete mode 100644 java-client/session-dagger/src/main/java/io/deephaven/client/ClientDefaultsModule.java create mode 100644 java-client/session/src/main/java/io/deephaven/client/grpc/UserAgentUtility.java create mode 100644 java-client/session/src/main/java/io/deephaven/client/impl/ClientChannelFactoryDefaulter.java create mode 100644 server/jetty-app-custom/src/main/java/io/deephaven/server/custom/CustomClientChannelFactoryModule.java create mode 100644 server/jetty/src/main/java/io/deephaven/server/jetty/JettyClientChannelFactoryModule.java create mode 100644 server/netty/src/main/java/io/deephaven/server/netty/NettyClientChannelFactoryModule.java create mode 100644 server/src/main/java/io/deephaven/client/ClientDefaultsModule.java create mode 100644 server/src/main/java/io/deephaven/server/session/ClientChannelFactoryModule.java create mode 100644 server/src/main/java/io/deephaven/server/session/SslConfigModule.java diff --git a/java-client/barrage-examples/src/main/java/io/deephaven/client/examples/BarrageClientExampleBase.java b/java-client/barrage-examples/src/main/java/io/deephaven/client/examples/BarrageClientExampleBase.java index 26dd4edf098..3902759b93f 100644 --- a/java-client/barrage-examples/src/main/java/io/deephaven/client/examples/BarrageClientExampleBase.java +++ b/java-client/barrage-examples/src/main/java/io/deephaven/client/examples/BarrageClientExampleBase.java @@ -6,6 +6,8 @@ import io.deephaven.client.impl.BarrageSession; import io.deephaven.client.impl.BarrageSessionFactoryConfig; import io.deephaven.client.impl.BarrageSessionFactoryConfig.Factory; +import io.deephaven.client.impl.ClientChannelFactory; +import io.deephaven.client.impl.ClientChannelFactoryDefaulter; import io.deephaven.client.impl.SessionConfig; import io.deephaven.engine.context.ExecutionContext; import io.deephaven.engine.updategraph.impl.PeriodicUpdateGraph; @@ -15,6 +17,7 @@ import org.apache.arrow.memory.RootAllocator; import picocli.CommandLine.ArgGroup; +import java.util.Collections; import java.util.concurrent.Callable; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; @@ -22,6 +25,10 @@ abstract class BarrageClientExampleBase implements Callable { + private static final ClientChannelFactory CLIENT_CHANNEL_FACTORY = ClientChannelFactoryDefaulter.builder() + .userAgent(BarrageSessionFactoryConfig.userAgent(Collections.singletonList("deephaven-barrage-examples"))) + .build(); + @ArgGroup(exclusive = false) ConnectOptions connectOptions; @@ -36,6 +43,7 @@ public final Void call() throws Exception { final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(4); final Factory factory = BarrageSessionFactoryConfig.builder() .clientConfig(ConnectOptions.options(connectOptions).config()) + .clientChannelFactory(CLIENT_CHANNEL_FACTORY) .allocator(bufferAllocator) .scheduler(scheduler) .build() diff --git a/java-client/barrage/src/main/java/io/deephaven/client/impl/BarrageSessionFactoryConfig.java b/java-client/barrage/src/main/java/io/deephaven/client/impl/BarrageSessionFactoryConfig.java index 502d2906f74..f825f9cff06 100644 --- a/java-client/barrage/src/main/java/io/deephaven/client/impl/BarrageSessionFactoryConfig.java +++ b/java-client/barrage/src/main/java/io/deephaven/client/impl/BarrageSessionFactoryConfig.java @@ -4,34 +4,68 @@ package io.deephaven.client.impl; import io.deephaven.annotations.BuildableStyle; +import io.deephaven.barrage.flatbuf.BarrageMessageWrapper; +import io.deephaven.client.grpc.UserAgentUtility; import io.grpc.ManagedChannel; import org.apache.arrow.memory.BufferAllocator; import org.immutables.value.Value.Default; import org.immutables.value.Value.Immutable; +import java.util.Collections; +import java.util.List; import java.util.Objects; import java.util.concurrent.ScheduledExecutorService; +import java.util.stream.Collectors; +import java.util.stream.Stream; @Immutable @BuildableStyle public abstract class BarrageSessionFactoryConfig { private static final SessionConfig SESSION_CONFIG_EMPTY = SessionConfig.builder().build(); + static final List VERSION_PROPERTIES = Collections.unmodifiableList(Stream.concat( + FlightSessionFactoryConfig.VERSION_PROPERTIES.stream(), + Stream.of(UserAgentUtility.versionProperty("barrage", BarrageMessageWrapper.class))) + .collect(Collectors.toUnmodifiableList())); + + private static final String DEEPHAVEN_JAVA_CLIENT_BARRAGE = "deephaven-java-client-barrage"; + + private static final ClientChannelFactory CLIENT_CHANNEL_FACTORY = ClientChannelFactoryDefaulter.builder() + .userAgent(userAgent(List.of(DEEPHAVEN_JAVA_CLIENT_BARRAGE))) + .build(); + public static Builder builder() { return ImmutableBarrageSessionFactoryConfig.builder(); } + /** + * Constructs a grpc + * user-agent with {@code grpc-java}, {@code deephaven}, {@code flight}, and {@code barrage} versions, with the + * addition of {@code extraProperties}. + * + * @param extraProperties the extra properties + * @return the user-agent + * @see UserAgentUtility#userAgent(List) + */ + public static String userAgent(List extraProperties) { + return UserAgentUtility.userAgent(Stream.concat( + VERSION_PROPERTIES.stream(), + extraProperties.stream()) + .collect(Collectors.toList())); + } + /** * The client configuration. */ public abstract ClientConfig clientConfig(); /** - * The client channel factory. By default is {@link ClientChannelFactory#defaultInstance()}. + * The client channel factory. By default, is a factory that sets a user-agent which includes relevant versions (see + * {@link #userAgent(List)}) and the property {@value DEEPHAVEN_JAVA_CLIENT_BARRAGE}. */ @Default public ClientChannelFactory clientChannelFactory() { - return ClientChannelFactory.defaultInstance(); + return CLIENT_CHANNEL_FACTORY; } /** diff --git a/java-client/flight-examples/src/main/java/io/deephaven/client/examples/DoPutSpray.java b/java-client/flight-examples/src/main/java/io/deephaven/client/examples/DoPutSpray.java index 965bda06736..dfa33deb824 100644 --- a/java-client/flight-examples/src/main/java/io/deephaven/client/examples/DoPutSpray.java +++ b/java-client/flight-examples/src/main/java/io/deephaven/client/examples/DoPutSpray.java @@ -26,6 +26,8 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import static io.deephaven.client.examples.FlightExampleBase.CLIENT_CHANNEL_FACTORY; + @Command(name = "do-put-spray", mixinStandardHelpOptions = true, description = "Do Put Spray", version = "0.1.0") class DoPutSpray implements Callable { @@ -48,6 +50,7 @@ public Void call() throws Exception { final Factory sourceFactory = FlightSessionFactoryConfig.builder() .clientConfig(source.config()) + .clientChannelFactory(CLIENT_CHANNEL_FACTORY) .allocator(bufferAllocator) .scheduler(scheduler) .build() @@ -62,6 +65,7 @@ public Void call() throws Exception { for (ConnectOptions other : connects.subList(1, connects.size())) { final Factory otherFactory = FlightSessionFactoryConfig.builder() .clientConfig(other.config()) + .clientChannelFactory(CLIENT_CHANNEL_FACTORY) .allocator(bufferAllocator) .scheduler(scheduler) .build() diff --git a/java-client/flight-examples/src/main/java/io/deephaven/client/examples/FlightExampleBase.java b/java-client/flight-examples/src/main/java/io/deephaven/client/examples/FlightExampleBase.java index b248e2bb5aa..b71805fe64e 100644 --- a/java-client/flight-examples/src/main/java/io/deephaven/client/examples/FlightExampleBase.java +++ b/java-client/flight-examples/src/main/java/io/deephaven/client/examples/FlightExampleBase.java @@ -3,6 +3,8 @@ // package io.deephaven.client.examples; +import io.deephaven.client.impl.ClientChannelFactory; +import io.deephaven.client.impl.ClientChannelFactoryDefaulter; import io.deephaven.client.impl.FlightSession; import io.deephaven.client.impl.FlightSessionFactoryConfig; import io.deephaven.client.impl.FlightSessionFactoryConfig.Factory; @@ -12,6 +14,7 @@ import org.apache.arrow.memory.RootAllocator; import picocli.CommandLine.ArgGroup; +import java.util.Collections; import java.util.concurrent.Callable; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; @@ -19,6 +22,10 @@ abstract class FlightExampleBase implements Callable { + static final ClientChannelFactory CLIENT_CHANNEL_FACTORY = ClientChannelFactoryDefaulter.builder() + .userAgent(FlightSessionFactoryConfig.userAgent(Collections.singletonList("deephaven-flight-examples"))) + .build(); + @ArgGroup(exclusive = false) ConnectOptions connectOptions; @@ -34,6 +41,7 @@ public final Void call() throws Exception { ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(4); final Factory factory = FlightSessionFactoryConfig.builder() .clientConfig(ConnectOptions.options(connectOptions).config()) + .clientChannelFactory(CLIENT_CHANNEL_FACTORY) .allocator(bufferAllocator) .scheduler(scheduler) .build() diff --git a/java-client/flight/src/main/java/io/deephaven/client/impl/FlightSessionFactoryConfig.java b/java-client/flight/src/main/java/io/deephaven/client/impl/FlightSessionFactoryConfig.java index b1b2d3411ee..da94737bc70 100644 --- a/java-client/flight/src/main/java/io/deephaven/client/impl/FlightSessionFactoryConfig.java +++ b/java-client/flight/src/main/java/io/deephaven/client/impl/FlightSessionFactoryConfig.java @@ -4,35 +4,68 @@ package io.deephaven.client.impl; import io.deephaven.annotations.BuildableStyle; +import io.deephaven.client.grpc.UserAgentUtility; import io.grpc.ManagedChannel; +import org.apache.arrow.flight.impl.Flight; import org.apache.arrow.memory.BufferAllocator; import org.immutables.value.Value.Default; import org.immutables.value.Value.Immutable; +import java.util.Collections; +import java.util.List; import java.util.Objects; import java.util.concurrent.ScheduledExecutorService; +import java.util.stream.Collectors; +import java.util.stream.Stream; @Immutable @BuildableStyle public abstract class FlightSessionFactoryConfig { private static final SessionConfig SESSION_CONFIG_EMPTY = SessionConfig.builder().build(); - private static final FlightSessionConfig FLIGHT_SESSION_CONFIG_EMPTY = FlightSessionConfig.builder().build(); + + static final List VERSION_PROPERTIES = Collections.unmodifiableList(Stream.concat( + SessionFactoryConfig.VERSION_PROPERTIES.stream(), + Stream.of(UserAgentUtility.versionProperty("flight", Flight.class))) + .collect(Collectors.toList())); + + private static final String DEEPHAVEN_JAVA_CLIENT_FLIGHT = "deephaven-java-client-flight"; + + private static final ClientChannelFactory CLIENT_CHANNEL_FACTORY = ClientChannelFactoryDefaulter.builder() + .userAgent(userAgent(Collections.singletonList(DEEPHAVEN_JAVA_CLIENT_FLIGHT))) + .build(); public static Builder builder() { return ImmutableFlightSessionFactoryConfig.builder(); } + /** + * Constructs a grpc + * user-agent with {@code grpc-java}, {@code deephaven}, and {@code flight} versions, with the addition of + * {@code extraProperties}. + * + * @param extraProperties the extra properties + * @return the user-agent + * @see UserAgentUtility#userAgent(List) + */ + public static String userAgent(List extraProperties) { + return UserAgentUtility.userAgent(Stream.concat( + VERSION_PROPERTIES.stream(), + extraProperties.stream()) + .collect(Collectors.toList())); + } + /** * The client configuration. */ public abstract ClientConfig clientConfig(); /** - * The client channel factory. By default is {@link ClientChannelFactory#defaultInstance()}. + * The client channel factory. By default, is a factory that sets a user-agent which includes relevant versions (see + * {@link #userAgent(List)}) and the property {@value DEEPHAVEN_JAVA_CLIENT_FLIGHT}. */ @Default public ClientChannelFactory clientChannelFactory() { - return ClientChannelFactory.defaultInstance(); + return CLIENT_CHANNEL_FACTORY; } /** diff --git a/java-client/session-dagger/src/main/java/io/deephaven/client/ClientDefaultsModule.java b/java-client/session-dagger/src/main/java/io/deephaven/client/ClientDefaultsModule.java deleted file mode 100644 index 35d6d15b2d9..00000000000 --- a/java-client/session-dagger/src/main/java/io/deephaven/client/ClientDefaultsModule.java +++ /dev/null @@ -1,24 +0,0 @@ -// -// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending -// -package io.deephaven.client; - -import dagger.Module; -import dagger.Provides; -import io.deephaven.client.impl.ClientChannelFactory; - -/** - * Provides {@link ClientChannelFactory}. - */ -@Module -public interface ClientDefaultsModule { - - - /** - * Equivalent to {@link ClientChannelFactory#defaultInstance()}. - */ - @Provides - static ClientChannelFactory providesClientChannelFactory() { - return ClientChannelFactory.defaultInstance(); - } -} diff --git a/java-client/session-examples/src/main/java/io/deephaven/client/examples/SessionExampleBase.java b/java-client/session-examples/src/main/java/io/deephaven/client/examples/SessionExampleBase.java index 26c63518af0..f3ccafd88b6 100644 --- a/java-client/session-examples/src/main/java/io/deephaven/client/examples/SessionExampleBase.java +++ b/java-client/session-examples/src/main/java/io/deephaven/client/examples/SessionExampleBase.java @@ -3,6 +3,8 @@ // package io.deephaven.client.examples; +import io.deephaven.client.impl.ClientChannelFactory; +import io.deephaven.client.impl.ClientChannelFactoryDefaulter; import io.deephaven.client.impl.SessionConfig; import io.deephaven.client.impl.SessionConfig.Builder; import io.deephaven.client.impl.SessionFactory; @@ -11,6 +13,7 @@ import io.grpc.ManagedChannel; import picocli.CommandLine.ArgGroup; +import java.util.Collections; import java.util.concurrent.Callable; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; @@ -18,6 +21,10 @@ abstract class SessionExampleBase implements Callable { + private static final ClientChannelFactory CLIENT_CHANNEL_FACTORY = ClientChannelFactoryDefaulter.builder() + .userAgent(SessionFactoryConfig.userAgent(Collections.singletonList("deephaven-session-examples"))) + .build(); + @ArgGroup(exclusive = false) ConnectOptions connectOptions; @@ -30,7 +37,8 @@ abstract class SessionExampleBase implements Callable { public final Void call() throws Exception { ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(4); final Factory factory = SessionFactoryConfig.builder() - .clientConfig(connectOptions.config()) + .clientConfig(ConnectOptions.options(connectOptions).config()) + .clientChannelFactory(CLIENT_CHANNEL_FACTORY) .scheduler(scheduler) .sessionConfig(sessionConfig()) .build() diff --git a/java-client/session/src/main/java/io/deephaven/client/grpc/UserAgentUtility.java b/java-client/session/src/main/java/io/deephaven/client/grpc/UserAgentUtility.java new file mode 100644 index 00000000000..90af8cf7530 --- /dev/null +++ b/java-client/session/src/main/java/io/deephaven/client/grpc/UserAgentUtility.java @@ -0,0 +1,54 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.client.grpc; + +import io.grpc.Grpc; + +import java.util.List; + +public final class UserAgentUtility { + + /** + * Constructs a grpc + * user-agent of the form {@code grpc-java/} or + * {@code grpc-java/ (prop1; ...; propN)}. + * + * @param properties the properties + * @return the user-agent + * @see grpc user-agents + * @see #versionProperty(String, String) + * @see #versionProperty(String, Class) + */ + public static String userAgent(List properties) { + final String grpcJavaVersionProperty = versionProperty("grpc-java", Grpc.class); + return properties.isEmpty() + ? grpcJavaVersionProperty + : String.format("%s (%s)", grpcJavaVersionProperty, String.join("; ", properties)); + } + + /** + * Constructs a version property in the + * grpc user-agent style. + * + * @param name the name + * @param version the version + * @return the version property + */ + public static String versionProperty(String name, String version) { + return name + "/" + version; + } + + /** + * Constructs a version property in the + * grpc user-agent style. + * Uses {@code clazz.getPackage().getImplementationVersion()} as the version string. + * + * @param name the name + * @param clazz the class to take the version from + * @return the version property + */ + public static String versionProperty(String name, Class clazz) { + return versionProperty(name, clazz.getPackage().getImplementationVersion()); + } +} diff --git a/java-client/session/src/main/java/io/deephaven/client/impl/ClientChannelFactoryDefaulter.java b/java-client/session/src/main/java/io/deephaven/client/impl/ClientChannelFactoryDefaulter.java new file mode 100644 index 00000000000..56d1b545c6c --- /dev/null +++ b/java-client/session/src/main/java/io/deephaven/client/impl/ClientChannelFactoryDefaulter.java @@ -0,0 +1,87 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.client.impl; + +import io.deephaven.annotations.BuildableStyle; +import io.deephaven.ssl.config.SSLConfig; +import io.deephaven.uri.DeephavenTarget; +import io.grpc.ManagedChannel; +import org.immutables.value.Value.Default; +import org.immutables.value.Value.Immutable; + +import java.util.Optional; + +/** + * A client channel factory that may update {@link ClientConfig} values before handing off to a {@link #delegate()}. + */ +@Immutable +@BuildableStyle +public abstract class ClientChannelFactoryDefaulter implements ClientChannelFactory { + + public static Builder builder() { + return ImmutableClientChannelFactoryDefaulter.builder(); + } + + /** + * The delegated client channel factory. By default, is {@link ClientChannelFactory#defaultInstance()}. + */ + @Default + public ClientChannelFactory delegate() { + return ClientChannelFactory.defaultInstance(); + } + + /** + * The ssl config. This will set {@link ClientConfig#ssl() ssl} for the {@code config} in + * {@link #create(ClientConfig)} if it has not already been set and the {@link ClientConfig#target() target} is + * {@link DeephavenTarget#isSecure() secure}. + */ + public abstract Optional ssl(); + + /** + * The user-agent. This will set {@link ClientConfig#userAgent()} for the {@code config} in + * {@link #create(ClientConfig)} if it has not already been set. + */ + public abstract Optional userAgent(); + + /** + * Creates a managed channel. Will update {@code config} with the defaults as specified by {@link #ssl()} and + * {@link #userAgent()} before delegating to {@link #delegate()}. + */ + @Override + public final ManagedChannel create(ClientConfig config) { + if (ssl().isPresent() && !config.ssl().isPresent() && config.target().isSecure()) { + config = config.withSsl(ssl().get()); + } + if (userAgent().isPresent() && !config.userAgent().isPresent()) { + config = config.withUserAgent(userAgent().get()); + } + return delegate().create(config); + } + + public interface Builder { + + /** + * Initializes the value for the {@link ClientChannelFactoryDefaulter#delegate() delegate} attribute. + *

+ * If not set, this attribute will have a default value as returned by the initializer of + * {@link ClientChannelFactoryDefaulter#delegate()}. + */ + Builder delegate(ClientChannelFactory delegate); + + /** + * Initializes the optional value {@link ClientChannelFactoryDefaulter#ssl() ssl} to ssl. + */ + Builder ssl(SSLConfig ssl); + + /** + * Initializes the optional value {@link ClientChannelFactoryDefaulter#userAgent() userAgent} to userAgent. + */ + Builder userAgent(String userAgent); + + /** + * Builds a new {@link ClientChannelFactoryDefaulter}. + */ + ClientChannelFactoryDefaulter build(); + } +} diff --git a/java-client/session/src/main/java/io/deephaven/client/impl/ClientConfig.java b/java-client/session/src/main/java/io/deephaven/client/impl/ClientConfig.java index 0cddda3cc2d..a52a7faa8b3 100644 --- a/java-client/session/src/main/java/io/deephaven/client/impl/ClientConfig.java +++ b/java-client/session/src/main/java/io/deephaven/client/impl/ClientConfig.java @@ -36,7 +36,9 @@ public static Builder builder() { public abstract Optional ssl(); /** - * The user agent. + * The user-agent. + * + * @see grpc user-agents */ public abstract Optional userAgent(); @@ -63,6 +65,11 @@ public int maxInboundMessageSize() { */ public abstract ClientConfig withSsl(SSLConfig ssl); + /** + * Returns or creates a client config with {@link #userAgent()} as {@code userAgent}. + */ + public abstract ClientConfig withUserAgent(String userAgent); + public interface Builder { Builder target(DeephavenTarget target); diff --git a/java-client/session/src/main/java/io/deephaven/client/impl/SessionFactoryConfig.java b/java-client/session/src/main/java/io/deephaven/client/impl/SessionFactoryConfig.java index 948e621f1b4..265e95086df 100644 --- a/java-client/session/src/main/java/io/deephaven/client/impl/SessionFactoryConfig.java +++ b/java-client/session/src/main/java/io/deephaven/client/impl/SessionFactoryConfig.java @@ -4,12 +4,17 @@ package io.deephaven.client.impl; import io.deephaven.annotations.BuildableStyle; +import io.deephaven.client.grpc.UserAgentUtility; import io.grpc.ManagedChannel; import org.immutables.value.Value.Default; import org.immutables.value.Value.Immutable; +import java.util.Collections; +import java.util.List; import java.util.Objects; import java.util.concurrent.ScheduledExecutorService; +import java.util.stream.Collectors; +import java.util.stream.Stream; @Immutable @BuildableStyle @@ -17,21 +22,47 @@ public abstract class SessionFactoryConfig { private static final SessionConfig SESSION_CONFIG_EMPTY = SessionConfig.builder().build(); + static final List VERSION_PROPERTIES = + Collections.singletonList(UserAgentUtility.versionProperty("deephaven", SessionFactoryConfig.class)); + + private static final String DEEPHAVEN_JAVA_CLIENT_SESSION = "deephaven-java-client-session"; + + private static final ClientChannelFactory CLIENT_CHANNEL_FACTORY = ClientChannelFactoryDefaulter.builder() + .userAgent(userAgent(Collections.singletonList(DEEPHAVEN_JAVA_CLIENT_SESSION))) + .build(); + public static Builder builder() { return ImmutableSessionFactoryConfig.builder(); } + /** + * Constructs a grpc + * user-agent with {@code grpc-java} and {@code deephaven} versions, with the addition of + * {@code extraProperties}. + * + * @param extraProperties the extra properties + * @return the user-agent + * @see UserAgentUtility#userAgent(List) + */ + public static String userAgent(List extraProperties) { + return UserAgentUtility.userAgent(Stream.concat( + VERSION_PROPERTIES.stream(), + extraProperties.stream()) + .collect(Collectors.toList())); + } + /** * The client configuration. */ public abstract ClientConfig clientConfig(); /** - * The client channel factory. By default is {@link ClientChannelFactory#defaultInstance()}. + * The client channel factory. By default, is a factory that sets a user-agent which includes relevant versions (see + * {@link #userAgent(List)}) and the property {@value DEEPHAVEN_JAVA_CLIENT_SESSION}. */ @Default public ClientChannelFactory clientChannelFactory() { - return ClientChannelFactory.defaultInstance(); + return CLIENT_CHANNEL_FACTORY; } /** diff --git a/py/embedded-server/java-runtime/src/main/java/io/deephaven/python/server/EmbeddedServer.java b/py/embedded-server/java-runtime/src/main/java/io/deephaven/python/server/EmbeddedServer.java index a27e33120d6..7e18ccca1bb 100644 --- a/py/embedded-server/java-runtime/src/main/java/io/deephaven/python/server/EmbeddedServer.java +++ b/py/embedded-server/java-runtime/src/main/java/io/deephaven/python/server/EmbeddedServer.java @@ -4,8 +4,10 @@ package io.deephaven.python.server; import dagger.Component; +import dagger.Module; +import dagger.Provides; import io.deephaven.auth.AuthenticationRequestHandler; -import io.deephaven.client.ClientDefaultsModule; +import io.deephaven.client.impl.BarrageSessionFactoryConfig; import io.deephaven.configuration.Configuration; import io.deephaven.engine.util.ScriptSession; import io.deephaven.integrations.python.PyLogOutputStream; @@ -15,7 +17,6 @@ import io.deephaven.io.logger.LogBufferOutputStream; import io.deephaven.server.auth.CommunityAuthorizationModule; import io.deephaven.server.config.ServerConfig; -import io.deephaven.time.calendar.CalendarsFromConfigurationModule; import io.deephaven.server.console.ExecutionContextModule; import io.deephaven.server.console.groovy.GroovyConsoleSessionModule; import io.deephaven.server.console.python.PythonConsoleSessionModule; @@ -30,7 +31,11 @@ import io.deephaven.server.runner.DeephavenApiServer; import io.deephaven.server.runner.DeephavenApiServerModule; import io.deephaven.server.runner.MainHelper; +import io.deephaven.server.session.ClientChannelFactoryModule; +import io.deephaven.server.session.ClientChannelFactoryModule.UserAgent; import io.deephaven.server.session.ObfuscatingErrorTransformerModule; +import io.deephaven.server.session.SslConfigModule; +import io.deephaven.time.calendar.CalendarsFromConfigurationModule; import org.jpy.PyModule; import org.jpy.PyObject; @@ -41,12 +46,25 @@ import java.io.OutputStream; import java.io.PrintStream; import java.util.Collection; +import java.util.List; import java.util.Map; public class EmbeddedServer { - @Singleton - @Component(modules = { + @Module(includes = { + ClientChannelFactoryModule.class, + SslConfigModule.class + }) + public interface EmbeddedPythonClientChannelFactoryModule { + + @Provides + @UserAgent + static String providesUserAgent() { + return BarrageSessionFactoryConfig.userAgent(List.of("deephaven-server-embedded")); + } + } + + @Module(includes = { DeephavenApiServerModule.class, EmbeddedPyLogModule.class, DeephavenApiConfigModule.class, @@ -59,10 +77,15 @@ public class EmbeddedServer { GroovyConsoleSessionModule.class, ExecutionContextModule.class, CommunityAuthorizationModule.class, - ClientDefaultsModule.class, ObfuscatingErrorTransformerModule.class, CalendarsFromConfigurationModule.class, + EmbeddedPythonClientChannelFactoryModule.class, }) + public interface PythonServerModule { + } + + @Singleton + @Component(modules = PythonServerModule.class) public interface PythonServerComponent extends JettyServerComponent { @Component.Builder interface Builder extends JettyServerComponent.Builder { diff --git a/server/jetty-app-custom/src/main/java/io/deephaven/server/custom/CustomClientChannelFactoryModule.java b/server/jetty-app-custom/src/main/java/io/deephaven/server/custom/CustomClientChannelFactoryModule.java new file mode 100644 index 00000000000..b0a01dfd554 --- /dev/null +++ b/server/jetty-app-custom/src/main/java/io/deephaven/server/custom/CustomClientChannelFactoryModule.java @@ -0,0 +1,26 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.server.custom; + +import dagger.Module; +import dagger.Provides; +import io.deephaven.client.impl.BarrageSessionFactoryConfig; +import io.deephaven.server.session.ClientChannelFactoryModule; +import io.deephaven.server.session.ClientChannelFactoryModule.UserAgent; +import io.deephaven.server.session.SslConfigModule; + +import java.util.List; + +@Module(includes = { + ClientChannelFactoryModule.class, + SslConfigModule.class +}) +public interface CustomClientChannelFactoryModule { + + @Provides + @UserAgent + static String providesUserAgent() { + return BarrageSessionFactoryConfig.userAgent(List.of("deephaven-server-jetty-custom")); + } +} diff --git a/server/jetty-app-custom/src/main/java/io/deephaven/server/custom/CustomComponentFactory.java b/server/jetty-app-custom/src/main/java/io/deephaven/server/custom/CustomComponentFactory.java index 543514b8fee..22cf12662b8 100644 --- a/server/jetty-app-custom/src/main/java/io/deephaven/server/custom/CustomComponentFactory.java +++ b/server/jetty-app-custom/src/main/java/io/deephaven/server/custom/CustomComponentFactory.java @@ -10,6 +10,7 @@ import dagger.Provides; import dagger.multibindings.IntoSet; import io.deephaven.appmode.ApplicationState; +import io.deephaven.client.impl.BarrageSessionFactoryConfig; import io.deephaven.configuration.Configuration; import io.deephaven.server.auth.AuthorizationProvider; import io.deephaven.server.custom.CustomComponentFactory.CustomComponent; @@ -18,9 +19,11 @@ import io.deephaven.server.jetty.JettyServerModule; import io.deephaven.server.runner.CommunityDefaultsModule; import io.deephaven.server.runner.ComponentFactoryBase; +import io.deephaven.server.session.ClientChannelFactoryModule.UserAgent; import javax.inject.Singleton; import java.io.PrintStream; +import java.util.List; /** * An example of a "custom integrator" component factory. This is not meant to be an exhaustive example of Deephaven @@ -71,6 +74,7 @@ interface Builder extends JettyServerComponent.Builder @Module(includes = { JettyServerModule.class, + CustomClientChannelFactoryModule.class, CommunityDefaultsModule.class, }) public interface CustomModule { diff --git a/server/jetty/src/main/java/io/deephaven/server/jetty/CommunityComponentFactory.java b/server/jetty/src/main/java/io/deephaven/server/jetty/CommunityComponentFactory.java index 9b91fd187ea..014eaae59eb 100644 --- a/server/jetty/src/main/java/io/deephaven/server/jetty/CommunityComponentFactory.java +++ b/server/jetty/src/main/java/io/deephaven/server/jetty/CommunityComponentFactory.java @@ -69,6 +69,7 @@ interface Builder extends JettyServerComponent.Builder Date: Tue, 13 Aug 2024 18:01:12 -0400 Subject: [PATCH 16/43] fix: Test for proper clock settings before running clock tests (#5932) Clock tests occasionally fail when the system clock is improperly set. Test the system clock's setting before performing more tests. This does not resolve the problem, but it provides clear evidence of the root cause of intermittent clock test failures. --- .../src/test/java/io/deephaven/time/TestDateTimeUtils.java | 7 +++++++ .../deephaven/time/calendar/TestStaticCalendarMethods.java | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/engine/time/src/test/java/io/deephaven/time/TestDateTimeUtils.java b/engine/time/src/test/java/io/deephaven/time/TestDateTimeUtils.java index 7dcad0ad771..585cde2f96a 100644 --- a/engine/time/src/test/java/io/deephaven/time/TestDateTimeUtils.java +++ b/engine/time/src/test/java/io/deephaven/time/TestDateTimeUtils.java @@ -1933,6 +1933,13 @@ public Instant instantMillis() { TestCase.assertEquals(Instant.ofEpochSecond(0, (nanos / DateTimeUtils.MILLI) * DateTimeUtils.MILLI), DateTimeUtils.nowMillisResolution()); + // Occasionally tests fail because of invalid clocks on the test system + final LocalDate startDate = LocalDate.of(1990, 1, 1); + final LocalDate currentDate = DateTimeUtils.toLocalDate( + DateTimeUtils.epochNanosToInstant(Clock.system().currentTimeNanos()), DateTimeUtils.timeZone()); + assertTrue("Checking for a valid date on the test system: currentDate=" + currentDate, + currentDate.isAfter(startDate)); + TestCase.assertTrue(Math.abs(Clock.system().currentTimeNanos() - DateTimeUtils.epochNanos(DateTimeUtils.nowSystem())) < 1_000_000L); TestCase.assertTrue(Math.abs(Clock.system().currentTimeNanos() diff --git a/engine/time/src/test/java/io/deephaven/time/calendar/TestStaticCalendarMethods.java b/engine/time/src/test/java/io/deephaven/time/calendar/TestStaticCalendarMethods.java index 4140b66cf34..47f6ed9ef5a 100644 --- a/engine/time/src/test/java/io/deephaven/time/calendar/TestStaticCalendarMethods.java +++ b/engine/time/src/test/java/io/deephaven/time/calendar/TestStaticCalendarMethods.java @@ -3,6 +3,7 @@ // package io.deephaven.time.calendar; +import io.deephaven.base.clock.Clock; import io.deephaven.base.testing.BaseArrayTestCase; import io.deephaven.time.DateTimeUtils; @@ -107,6 +108,12 @@ public void testAll() { excludes.add("firstValidDate"); excludes.add("lastValidDate"); + // Occasionally tests fail because of invalid clocks on the test system + final LocalDate startDate = LocalDate.of(1990, 1, 1); + final LocalDate currentDate = DateTimeUtils.todayLocalDate(); + assertTrue("Checking for a valid date on the test system: currentDate=" + currentDate, + currentDate.isAfter(startDate)); + for (Method m1 : BusinessCalendar.class.getMethods()) { if (m1.getDeclaringClass() == Object.class || Modifier.isStatic(m1.getModifiers()) || From a922e44d2313aeb8f6ae4a32a745609efd95a8d2 Mon Sep 17 00:00:00 2001 From: Shivam Malhotra Date: Tue, 13 Aug 2024 17:27:58 -0500 Subject: [PATCH 17/43] feat: Throw exception when writing array/vector of big decimals to parquet (#5930) --- .../io/deephaven/parquet/table/TypeInfos.java | 8 +++++-- .../table/ParquetTableReadWriteTest.java | 23 +++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/TypeInfos.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/TypeInfos.java index 36ca92c5116..9307d18ad33 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/TypeInfos.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/TypeInfos.java @@ -175,10 +175,14 @@ static TypeInfo getTypeInfo( final RowSet rowSet, final Map> columnSourceMap, @NotNull final ParquetInstructions instructions) { - final Class dataType = column.getDataType(); - if (BigDecimal.class.equals(dataType)) { + if (BigDecimal.class.equals(column.getDataType())) { return bigDecimalTypeInfo(computedCache, column, rowSet, columnSourceMap); } + if (BigDecimal.class.equals(column.getComponentType())) { + throw new UnsupportedOperationException("Writing arrays/vector columns for big decimals is currently not " + + "supported"); + // TODO(deephaven-core#4612): Add support for this + } return lookupTypeInfo(column, instructions); } diff --git a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java index 416013bf376..9c0746e1850 100644 --- a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java +++ b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java @@ -1479,6 +1479,29 @@ private static Table arrayToVectorTable(final Table table) { return arrayToVectorFormulas.isEmpty() ? table : table.updateView(arrayToVectorFormulas); } + @Test + public void testBigDecimalArrayColumn() { + final Table bdArrayTable = TableTools.emptyTable(10000).select(Selectable.from(List.of( + "someBigDecimalArrayColumn = new java.math.BigDecimal[] {i % 10 == 0 ? null : " + + "java.math.BigDecimal.valueOf(ii).stripTrailingZeros()}"))); + final File dest = new File(rootFile + File.separator + "testBigDecimalArrayColumn.parquet"); + try { + ParquetTools.writeTable(bdArrayTable, dest.getPath()); + fail("Expected exception because writing arrays of big decimal column types is not supported"); + } catch (final RuntimeException e) { + assertTrue(e.getCause() instanceof UnsupportedOperationException); + } + + // Convert array to vector table + final Table bdVectorTable = arrayToVectorTable(bdArrayTable); + try { + ParquetTools.writeTable(bdVectorTable, dest.getPath()); + fail("Expected exception because writing vectors of big decimal column types is not supported"); + } catch (final RuntimeException e) { + assertTrue(e.getCause() instanceof UnsupportedOperationException); + } + } + @Test public void testArrayColumns() { ArrayList columns = From ae17df265c44857de3b4daa4e56f6f2eade856e5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 14 Aug 2024 17:10:20 +0000 Subject: [PATCH 18/43] chore(github-actions): bump gradle/actions from 3 to 4 (#5898) Bumps [gradle/actions](https://github.com/gradle/actions) from 3 to 4. - [Release notes](https://github.com/gradle/actions/releases) - [Commits](gradle/actions@v3...v4) --- updated-dependencies: - dependency-name: gradle/actions dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/build-ci.yml | 4 ++-- .github/workflows/check-ci.yml | 2 +- .github/workflows/dependency-submission.yml | 2 +- .github/workflows/docs-ci.yml | 10 +++++----- .github/workflows/nightly-check-ci.yml | 2 +- .github/workflows/nightly-image-check.yml | 2 +- .github/workflows/nightly-publish-ci.yml | 2 +- .github/workflows/publish-ci.yml | 2 +- .github/workflows/quick-ci.yml | 2 +- .github/workflows/tag-base-images.yml | 2 +- 10 files changed, 15 insertions(+), 15 deletions(-) diff --git a/.github/workflows/build-ci.yml b/.github/workflows/build-ci.yml index 11c58f512eb..a8eecea3f1f 100644 --- a/.github/workflows/build-ci.yml +++ b/.github/workflows/build-ci.yml @@ -30,7 +30,7 @@ jobs: java-version: '17' - name: Setup Gradle - uses: gradle/actions/setup-gradle@v3 + uses: gradle/actions/setup-gradle@v4 - name: Set JAVA_HOME run: echo "JAVA_HOME=${{ steps.setup-java-11.outputs.path }}" >> $GITHUB_ENV @@ -158,7 +158,7 @@ jobs: java-version: '11' - name: Setup Gradle - uses: gradle/actions/setup-gradle@v3 + uses: gradle/actions/setup-gradle@v4 - name: Set JAVA_HOME run: echo "JAVA_HOME=${{ steps.setup-java-11.outputs.path }}" >> $GITHUB_ENV diff --git a/.github/workflows/check-ci.yml b/.github/workflows/check-ci.yml index 6a2ca403494..37b1592322a 100644 --- a/.github/workflows/check-ci.yml +++ b/.github/workflows/check-ci.yml @@ -34,7 +34,7 @@ jobs: java-version: '17' - name: Setup Gradle - uses: gradle/actions/setup-gradle@v3 + uses: gradle/actions/setup-gradle@v4 - name: Set JAVA_HOME run: echo "JAVA_HOME=${{ steps.setup-java-11.outputs.path }}" >> $GITHUB_ENV diff --git a/.github/workflows/dependency-submission.yml b/.github/workflows/dependency-submission.yml index 634846a22af..9d217aa7743 100644 --- a/.github/workflows/dependency-submission.yml +++ b/.github/workflows/dependency-submission.yml @@ -30,7 +30,7 @@ jobs: cat gradle.properties - name: Generate and submit dependency graph - uses: gradle/actions/dependency-submission@v3 + uses: gradle/actions/dependency-submission@v4 env: # Dependencies derived from :server-jetty-app runtimeClasspath get a "runtime" scope and everything else gets # a "development" scope. Ideally, gradle would be able to pass along the finer dependency details (for diff --git a/.github/workflows/docs-ci.yml b/.github/workflows/docs-ci.yml index 753c78cf432..5f78269dd94 100644 --- a/.github/workflows/docs-ci.yml +++ b/.github/workflows/docs-ci.yml @@ -77,7 +77,7 @@ jobs: java-version: '17' - name: Setup Gradle - uses: gradle/actions/setup-gradle@v3 + uses: gradle/actions/setup-gradle@v4 - name: Set JAVA_HOME run: echo "JAVA_HOME=${{ steps.setup-java-11.outputs.path }}" >> $GITHUB_ENV @@ -139,7 +139,7 @@ jobs: java-version: '17' - name: Setup Gradle - uses: gradle/actions/setup-gradle@v3 + uses: gradle/actions/setup-gradle@v4 - name: Set JAVA_HOME run: echo "JAVA_HOME=${{ steps.setup-java-11.outputs.path }}" >> $GITHUB_ENV @@ -201,7 +201,7 @@ jobs: java-version: '17' - name: Setup Gradle - uses: gradle/actions/setup-gradle@v3 + uses: gradle/actions/setup-gradle@v4 - name: Set JAVA_HOME run: echo "JAVA_HOME=${{ steps.setup-java-11.outputs.path }}" >> $GITHUB_ENV @@ -283,7 +283,7 @@ jobs: java-version: '11' - name: Setup Gradle - uses: gradle/actions/setup-gradle@v3 + uses: gradle/actions/setup-gradle@v4 - name: Set JAVA_HOME run: echo "JAVA_HOME=${{ steps.setup-java-11.outputs.path }}" >> $GITHUB_ENV @@ -357,7 +357,7 @@ jobs: java-version: '11' - name: Setup Gradle - uses: gradle/actions/setup-gradle@v3 + uses: gradle/actions/setup-gradle@v4 - name: Set JAVA_HOME run: echo "JAVA_HOME=${{ steps.setup-java-11.outputs.path }}" >> $GITHUB_ENV diff --git a/.github/workflows/nightly-check-ci.yml b/.github/workflows/nightly-check-ci.yml index fd96f8c4e45..d8b5c93fec7 100644 --- a/.github/workflows/nightly-check-ci.yml +++ b/.github/workflows/nightly-check-ci.yml @@ -56,7 +56,7 @@ jobs: java-version: '22' - name: Setup Gradle - uses: gradle/actions/setup-gradle@v3 + uses: gradle/actions/setup-gradle@v4 - name: Set JAVA_HOME run: echo "JAVA_HOME=${{ steps.setup-java-11.outputs.path }}" >> $GITHUB_ENV diff --git a/.github/workflows/nightly-image-check.yml b/.github/workflows/nightly-image-check.yml index 505a64ef223..1e868280782 100644 --- a/.github/workflows/nightly-image-check.yml +++ b/.github/workflows/nightly-image-check.yml @@ -21,7 +21,7 @@ jobs: java-version: '11' - name: Setup Gradle - uses: gradle/actions/setup-gradle@v3 + uses: gradle/actions/setup-gradle@v4 - name: Set JAVA_HOME run: echo "JAVA_HOME=${{ steps.setup-java-11.outputs.path }}" >> $GITHUB_ENV diff --git a/.github/workflows/nightly-publish-ci.yml b/.github/workflows/nightly-publish-ci.yml index 48bfd97f417..a4e2bcbb242 100644 --- a/.github/workflows/nightly-publish-ci.yml +++ b/.github/workflows/nightly-publish-ci.yml @@ -32,7 +32,7 @@ jobs: java-version: '17' - name: Setup Gradle - uses: gradle/actions/setup-gradle@v3 + uses: gradle/actions/setup-gradle@v4 - name: Set JAVA_HOME run: echo "JAVA_HOME=${{ steps.setup-java-11.outputs.path }}" >> $GITHUB_ENV diff --git a/.github/workflows/publish-ci.yml b/.github/workflows/publish-ci.yml index ca171650623..65c6ef8b60d 100644 --- a/.github/workflows/publish-ci.yml +++ b/.github/workflows/publish-ci.yml @@ -34,7 +34,7 @@ jobs: java-version: '17' - name: Setup Gradle - uses: gradle/actions/setup-gradle@v3 + uses: gradle/actions/setup-gradle@v4 - name: Set JAVA_HOME run: echo "JAVA_HOME=${{ steps.setup-java-11.outputs.path }}" >> $GITHUB_ENV diff --git a/.github/workflows/quick-ci.yml b/.github/workflows/quick-ci.yml index a3eb5dca43c..8416bf485c5 100644 --- a/.github/workflows/quick-ci.yml +++ b/.github/workflows/quick-ci.yml @@ -28,7 +28,7 @@ jobs: java-version: '11' - name: Setup Gradle - uses: gradle/actions/setup-gradle@v3 + uses: gradle/actions/setup-gradle@v4 - name: Set JAVA_HOME run: echo "JAVA_HOME=${{ steps.setup-java-11.outputs.path }}" >> $GITHUB_ENV diff --git a/.github/workflows/tag-base-images.yml b/.github/workflows/tag-base-images.yml index a54d06ec916..b9a83a95eeb 100644 --- a/.github/workflows/tag-base-images.yml +++ b/.github/workflows/tag-base-images.yml @@ -30,7 +30,7 @@ jobs: java-version: '17' - name: Setup Gradle - uses: gradle/actions/setup-gradle@v3 + uses: gradle/actions/setup-gradle@v4 - name: Setup Crane if: ${{ startsWith(github.ref, 'refs/heads/release/v') }} From 82fa1f319020de858c93cc461e56fbb85e594c50 Mon Sep 17 00:00:00 2001 From: Devin Smith Date: Wed, 14 Aug 2024 17:42:50 -0700 Subject: [PATCH 19/43] chore: Bump to jpy 0.18.0 (#5936) --- docker/registry/server-base/gradle.properties | 2 +- docker/server-jetty/src/main/server-jetty/requirements.txt | 4 ++-- docker/server/src/main/server-netty/requirements.txt | 4 ++-- gradle/libs.versions.toml | 2 +- py/embedded-server/setup.py | 2 +- py/server/setup.py | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docker/registry/server-base/gradle.properties b/docker/registry/server-base/gradle.properties index d1268475ce4..75412cde87f 100644 --- a/docker/registry/server-base/gradle.properties +++ b/docker/registry/server-base/gradle.properties @@ -1,3 +1,3 @@ io.deephaven.project.ProjectType=DOCKER_REGISTRY deephaven.registry.imageName=ghcr.io/deephaven/server-base:edge -deephaven.registry.imageId=ghcr.io/deephaven/server-base@sha256:736f8fba8433a4823e0169acac8bded28cd82a54e2188d06e8800762c1e4a0a4 +deephaven.registry.imageId=ghcr.io/deephaven/server-base@sha256:3304ec00e18bf86aee18b3491c7d92b326773deacfd98d1556facaddb1957a63 diff --git a/docker/server-jetty/src/main/server-jetty/requirements.txt b/docker/server-jetty/src/main/server-jetty/requirements.txt index d8f9b2ff437..a80149cbaaa 100644 --- a/docker/server-jetty/src/main/server-jetty/requirements.txt +++ b/docker/server-jetty/src/main/server-jetty/requirements.txt @@ -5,10 +5,10 @@ deephaven-plugin==0.6.0 importlib_resources==6.4.0 java-utilities==0.3.0 jedi==0.19.1 -jpy==0.17.0 +jpy==0.18.0 llvmlite==0.43.0 numba==0.60.0 -numpy==2.0.0 +numpy==2.0.1 pandas==2.2.2 parso==0.8.4 pyarrow==17.0.0 diff --git a/docker/server/src/main/server-netty/requirements.txt b/docker/server/src/main/server-netty/requirements.txt index d8f9b2ff437..a80149cbaaa 100644 --- a/docker/server/src/main/server-netty/requirements.txt +++ b/docker/server/src/main/server-netty/requirements.txt @@ -5,10 +5,10 @@ deephaven-plugin==0.6.0 importlib_resources==6.4.0 java-utilities==0.3.0 jedi==0.19.1 -jpy==0.17.0 +jpy==0.18.0 llvmlite==0.43.0 numba==0.60.0 -numpy==2.0.0 +numpy==2.0.1 pandas==2.2.2 parso==0.8.4 pyarrow==17.0.0 diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 50def99c015..b4fb36cbd85 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -51,7 +51,7 @@ javax-validation = "1.0.0.GA" jdom2 = "2.0.6.1" jetbrains = "24.1.0" jetty = "11.0.20" -jpy = "0.17.0" +jpy = "0.18.0" jsinterop = "2.0.2" # google is annoying, and have different versions released for the same groupId jsinterop-base = "1.0.1" diff --git a/py/embedded-server/setup.py b/py/embedded-server/setup.py index b4cdcdcf8a4..6aa8b414457 100644 --- a/py/embedded-server/setup.py +++ b/py/embedded-server/setup.py @@ -62,7 +62,7 @@ def _compute_version(): keywords='Deephaven Development', python_requires='>=3.8', install_requires=[ - 'jpy>=0.17.0', + 'jpy>=0.18.0', 'java-utilities', f"deephaven-core[autocomplete]=={_version}", 'click>=8.1.7', diff --git a/py/server/setup.py b/py/server/setup.py index 19ca7e4952f..874787746d8 100644 --- a/py/server/setup.py +++ b/py/server/setup.py @@ -56,7 +56,7 @@ def _compute_version(): keywords='Deephaven Development', python_requires='>=3.8', install_requires=[ - 'jpy>=0.17.0', + 'jpy>=0.18.0', 'deephaven-plugin>=0.6.0', 'numpy', 'pandas>=1.5.0', From bda75effd98c7bb369e0408cd37d7b084f376e04 Mon Sep 17 00:00:00 2001 From: Shivam Malhotra Date: Thu, 15 Aug 2024 11:56:21 -0500 Subject: [PATCH 20/43] feat: Added support to write parquet files to S3 (#5914) Added a new `OutputStream` to write to S3 and moved all parquet writing code from using SeekableByteChannels to OutputStreams. --- .../util/channel/CachedChannelProvider.java | 13 +- .../util/channel/CompletableOutputStream.java | 51 +++ .../util/channel/LocalFSChannelProvider.java | 86 ---- .../channel/SeekableChannelsProvider.java | 16 +- .../channel/CachedChannelProviderTest.java | 74 +--- extensions/parquet/base/build.gradle | 1 + .../parquet/base/ColumnWriterImpl.java | 37 +- .../base/NullParquetMetadataFileWriter.java | 12 +- .../parquet/base/ParquetFileWriter.java | 62 +-- .../base/ParquetMetadataFileWriter.java | 22 +- .../deephaven/parquet/base/ParquetUtils.java | 4 +- .../base/PositionedBufferedOutputStream.java | 29 -- .../parquet/base/RowGroupWriterImpl.java | 13 +- .../table/ParquetMetadataFileWriterImpl.java | 87 ++-- .../parquet/table/ParquetTableWriter.java | 195 ++++----- .../deephaven/parquet/table/ParquetTools.java | 381 ++++++------------ .../table/ParquetTableReadWriteTest.java | 93 +++-- .../parquet/table/S3ParquetTestBase.java | 210 +++++++++- .../parquet/table/TestParquetTools.java | 4 +- .../extensions/s3/S3AsyncClientFactory.java | 14 +- .../s3/S3CompletableOutputStream.java | 362 +++++++++++++++++ .../extensions/s3/S3Instructions.java | 74 +++- .../s3/S3SeekableChannelProvider.java | 7 +- .../extensions/s3/S3InstructionsTest.java | 57 ++- .../s3/S3SeekableChannelSimpleTestBase.java | 58 +++ .../testlib/S3SeekableChannelTestSetup.java | 11 + extensions/trackedfile/build.gradle | 1 + .../LocalCompletableOutputStream.java | 245 +++++++++++ .../TrackedSeekableChannelsProvider.java | 14 +- py/server/deephaven/experimental/s3.py | 22 +- py/server/deephaven/parquet.py | 27 +- 31 files changed, 1538 insertions(+), 744 deletions(-) create mode 100644 Util/channel/src/main/java/io/deephaven/util/channel/CompletableOutputStream.java delete mode 100644 Util/channel/src/main/java/io/deephaven/util/channel/LocalFSChannelProvider.java delete mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/PositionedBufferedOutputStream.java create mode 100644 extensions/s3/src/main/java/io/deephaven/extensions/s3/S3CompletableOutputStream.java create mode 100644 extensions/trackedfile/src/main/java/io/deephaven/extensions/trackedfile/LocalCompletableOutputStream.java diff --git a/Util/channel/src/main/java/io/deephaven/util/channel/CachedChannelProvider.java b/Util/channel/src/main/java/io/deephaven/util/channel/CachedChannelProvider.java index 507bf46b286..84c2872c108 100644 --- a/Util/channel/src/main/java/io/deephaven/util/channel/CachedChannelProvider.java +++ b/Util/channel/src/main/java/io/deephaven/util/channel/CachedChannelProvider.java @@ -17,7 +17,6 @@ import java.net.URI; import java.nio.ByteBuffer; import java.nio.channels.SeekableByteChannel; -import java.nio.file.Path; import java.util.*; import java.util.stream.Stream; @@ -111,15 +110,9 @@ public InputStream getInputStream(final SeekableByteChannel channel, final int s } @Override - public SeekableByteChannel getWriteChannel(@NotNull final Path path, final boolean append) throws IOException { - final String pathKey = path.toAbsolutePath().toString(); - final ChannelType channelType = append ? ChannelType.WriteAppend : ChannelType.Write; - final KeyedObjectHashMap channelPool = channelPools.get(channelType); - final CachedChannel result = tryGetPooledChannel(pathKey, channelPool); - return result == null - ? new CachedChannel(wrappedProvider.getWriteChannel(path, append), channelType, pathKey) - : result.position(append ? result.size() : 0); // The seek isn't really necessary for append; will be at - // end no matter what. + public final CompletableOutputStream getOutputStream(@NotNull final URI uri, final int bufferSizeHint) + throws IOException { + return wrappedProvider.getOutputStream(uri, bufferSizeHint); } @Override diff --git a/Util/channel/src/main/java/io/deephaven/util/channel/CompletableOutputStream.java b/Util/channel/src/main/java/io/deephaven/util/channel/CompletableOutputStream.java new file mode 100644 index 00000000000..28f3b8af129 --- /dev/null +++ b/Util/channel/src/main/java/io/deephaven/util/channel/CompletableOutputStream.java @@ -0,0 +1,51 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.util.channel; + +import java.io.IOException; +import java.io.OutputStream; + +/** + * An {@link OutputStream} that can be marked as done, completed, or rolled back. + *

+ * The {@link #done()} method is used to flush all buffered data to the underlying storage, {@link #complete()} to + * finalize the write operation, and {@link #rollback()} to cancel the write. Closing this output stream without calling + * complete will not flush data to the underlying storage. + *

+ * One usage pattern can be like this: + * + *

+ * try (final CompletableOutputStream outputStream = CreateCompletableOutputStream()) {
+ *     try {
+ *         IOUtils.copy(inputStream, outputStream);
+ *         outputStream.done(); // Optional; use this to flush buffered data without completing the stream
+ *         outputStream.complete();
+ *     } catch (IOException e) {
+ *         outputStream.rollback();
+ *     }
+ * }
+ * 
+ */ +public abstract class CompletableOutputStream extends OutputStream { + + /** + * Flush all buffered data to the underlying storage. This is optional and should be called after the user is done + * writing to the output stream. All writes to the output stream after calling this method will lead to an + * {@link IOException}. + */ + public abstract void done() throws IOException; + + /** + * Flush all buffered data and save all written data to the underlying storage. This method should be called after + * the user is done writing to the output stream. All writes to the output stream after calling this method will + * lead to an {@link IOException}. + */ + public abstract void complete() throws IOException; + + /** + * Try to roll back any data written to the underlying storage, reverting back to the original state before opening + * this stream. This is an optional operation, as some implementations may not be able to support it. + */ + public abstract void rollback() throws IOException; +} diff --git a/Util/channel/src/main/java/io/deephaven/util/channel/LocalFSChannelProvider.java b/Util/channel/src/main/java/io/deephaven/util/channel/LocalFSChannelProvider.java deleted file mode 100644 index 48083b074c3..00000000000 --- a/Util/channel/src/main/java/io/deephaven/util/channel/LocalFSChannelProvider.java +++ /dev/null @@ -1,86 +0,0 @@ -// -// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending -// -package io.deephaven.util.channel; - -import io.deephaven.base.FileUtils; -import org.jetbrains.annotations.NotNull; -import org.jetbrains.annotations.Nullable; - -import java.io.BufferedInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.net.URI; -import java.nio.channels.FileChannel; -import java.nio.channels.SeekableByteChannel; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.StandardOpenOption; -import java.util.stream.Stream; - -public class LocalFSChannelProvider implements SeekableChannelsProvider { - private static final int MAX_READ_BUFFER_SIZE = 1 << 16; // 64 KiB - - @Override - public SeekableChannelContext makeContext() { - // No additional context required for local FS - return SeekableChannelContext.NULL; - } - - @Override - public boolean isCompatibleWith(@Nullable final SeekableChannelContext channelContext) { - // Context is not used, hence always compatible - return true; - } - - @Override - public boolean exists(@NotNull final URI uri) { - return Files.exists(Path.of(uri)); - } - - @Override - public SeekableByteChannel getReadChannel(@Nullable final SeekableChannelContext channelContext, - @NotNull final URI uri) - throws IOException { - // context is unused here - return FileChannel.open(Path.of(uri), StandardOpenOption.READ); - } - - @Override - public InputStream getInputStream(final SeekableByteChannel channel, final int sizeHint) { - // FileChannel is not buffered, need to buffer - final int bufferSize = Math.min(sizeHint, MAX_READ_BUFFER_SIZE); - return new BufferedInputStream(Channels.newInputStreamNoClose(channel), bufferSize); - } - - @Override - public SeekableByteChannel getWriteChannel(@NotNull final Path filePath, final boolean append) throws IOException { - final FileChannel result = FileChannel.open(filePath, - StandardOpenOption.WRITE, - StandardOpenOption.CREATE, - append ? StandardOpenOption.APPEND : StandardOpenOption.TRUNCATE_EXISTING); - if (append) { - result.position(result.size()); - } else { - result.position(0); - } - return result; - } - - @Override - public final Stream list(@NotNull final URI directory) throws IOException { - // Assuming that the URI is a file, not a directory. The caller should manage file vs. directory handling in - // the processor. - return Files.list(Path.of(directory)).map(path -> FileUtils.convertToURI(path, false)); - } - - @Override - public final Stream walk(@NotNull final URI directory) throws IOException { - // Assuming that the URI is a file, not a directory. The caller should manage file vs. directory handling in - // the processor. - return Files.walk(Path.of(directory)).map(path -> FileUtils.convertToURI(path, false)); - } - - @Override - public void close() {} -} diff --git a/Util/channel/src/main/java/io/deephaven/util/channel/SeekableChannelsProvider.java b/Util/channel/src/main/java/io/deephaven/util/channel/SeekableChannelsProvider.java index 951224b7d8f..c2adf817e29 100644 --- a/Util/channel/src/main/java/io/deephaven/util/channel/SeekableChannelsProvider.java +++ b/Util/channel/src/main/java/io/deephaven/util/channel/SeekableChannelsProvider.java @@ -10,8 +10,6 @@ import java.io.InputStream; import java.net.URI; import java.nio.channels.SeekableByteChannel; -import java.nio.file.Path; -import java.nio.file.Paths; import java.util.stream.Stream; import static io.deephaven.base.FileUtils.convertToURI; @@ -92,11 +90,17 @@ SeekableByteChannel getReadChannel(@NotNull SeekableChannelContext channelContex */ InputStream getInputStream(SeekableByteChannel channel, int sizeHint) throws IOException; - default SeekableByteChannel getWriteChannel(@NotNull final String path, final boolean append) throws IOException { - return getWriteChannel(Paths.get(path), append); - } + /** + * Creates a {@link CompletableOutputStream} to write to the given URI. + * + * @param uri the URI to write to + * @param bufferSizeHint the number of bytes the caller expects to buffer before flushing + * @return the output stream + * @throws IOException if an IO exception occurs + * @see CompletableOutputStream + */ + CompletableOutputStream getOutputStream(@NotNull final URI uri, int bufferSizeHint) throws IOException; - SeekableByteChannel getWriteChannel(@NotNull Path path, boolean append) throws IOException; /** * Returns a stream of URIs, the elements of which are the entries in the directory. The listing is non-recursive. diff --git a/Util/channel/src/test/java/io/deephaven/util/channel/CachedChannelProviderTest.java b/Util/channel/src/test/java/io/deephaven/util/channel/CachedChannelProviderTest.java index 0f23fab7d39..5fa45196075 100644 --- a/Util/channel/src/test/java/io/deephaven/util/channel/CachedChannelProviderTest.java +++ b/Util/channel/src/test/java/io/deephaven/util/channel/CachedChannelProviderTest.java @@ -13,7 +13,6 @@ import java.net.URI; import java.nio.ByteBuffer; import java.nio.channels.SeekableByteChannel; -import java.nio.file.Path; import java.util.ArrayList; import java.util.List; import java.util.concurrent.atomic.AtomicInteger; @@ -54,50 +53,17 @@ public void testSimpleRead() throws IOException { } @Test - public void testSimpleReadWrite() throws IOException { + public void testSimplePooledReadChannelClose() throws IOException { SeekableChannelsProvider wrappedProvider = new TestChannelProvider(); CachedChannelProvider cachedChannelProvider = CachedChannelProvider.create(wrappedProvider, 100); for (int i = 0; i < 1000; i++) { - SeekableByteChannel rc = - ((i / 100) % 2 == 0 ? cachedChannelProvider.getReadChannel(wrappedProvider.makeContext(), "r" + i) - : cachedChannelProvider.getWriteChannel("w" + i, false)); + SeekableByteChannel rc = cachedChannelProvider.getReadChannel(wrappedProvider.makeContext(), "r" + i); rc.close(); } assertEquals(900, closed.size()); assertTrue(closed.get(0).endsWith("r0")); } - @Test - public void testSimpleWrite() throws IOException { - SeekableChannelsProvider wrappedProvider = new TestChannelProvider(); - CachedChannelProvider cachedChannelProvider = CachedChannelProvider.create(wrappedProvider, 100); - for (int i = 0; i < 1000; i++) { - SeekableByteChannel rc = cachedChannelProvider.getWriteChannel("w" + i, false); - // Call write to hit the assertions inside the mock channel - final ByteBuffer buffer = ByteBuffer.allocate(1); - rc.write(buffer); - rc.close(); - } - assertEquals(900, closed.size()); - for (int i = 0; i < 900; i++) { - assertTrue(closed.get(i).endsWith("w" + (i))); - } - } - - @Test - public void testSimpleAppend() throws IOException { - SeekableChannelsProvider wrappedProvider = new TestChannelProvider(); - CachedChannelProvider cachedChannelProvider = CachedChannelProvider.create(wrappedProvider, 100); - for (int i = 0; i < 1000; i++) { - SeekableByteChannel rc = cachedChannelProvider.getWriteChannel("a" + i, true); - rc.close(); - } - assertEquals(900, closed.size()); - for (int i = 0; i < 900; i++) { - assertTrue(closed.get(i).endsWith("a" + (i))); - } - } - @Test public void testCloseOrder() throws IOException { SeekableChannelsProvider wrappedProvider = new TestChannelProvider(); @@ -147,33 +113,6 @@ public void testReuse() throws IOException { assertEquals(0, closed.size()); } - @Test - public void testReuse10() throws IOException { - final SeekableChannelsProvider wrappedProvider = new TestChannelProvider(); - final CachedChannelProvider cachedChannelProvider = CachedChannelProvider.create(wrappedProvider, 100); - final SeekableByteChannel[] someResult = new SeekableByteChannel[100]; - for (int pi = 0; pi < 10; ++pi) { - for (int ci = 0; ci < 10; ++ci) { - someResult[pi * 10 + ci] = cachedChannelProvider.getWriteChannel("w" + pi % 10, false); - } - for (int ci = 0; ci < 10; ++ci) { - someResult[pi * 10 + 9 - ci].close(); - } - } - for (int step = 0; step < 10; ++step) { - final SeekableByteChannel[] reused = new SeekableByteChannel[100]; - for (int ri = 0; ri < 100; ++ri) { - SeekableByteChannel rc = cachedChannelProvider.getWriteChannel("w" + (ri / 10) % 10, false); - assertSame(rc, someResult[ri % 100]); - reused[ri] = rc; - } - for (int ri = 0; ri < 100; ++ri) { - reused[99 - ri].close(); - } - } - assertEquals(0, closed.size()); - } - @Test void testRewrapCachedChannelProvider() { final SeekableChannelsProvider wrappedProvider = new TestChannelProvider(); @@ -231,13 +170,8 @@ public SeekableByteChannel getReadChannel(@NotNull SeekableChannelContext channe } @Override - public SeekableByteChannel getWriteChannel(@NotNull String path, boolean append) { - return new TestMockChannel(count.getAndIncrement(), path); - } - - @Override - public SeekableByteChannel getWriteChannel(@NotNull Path path, boolean append) { - return new TestMockChannel(count.getAndIncrement(), path.toString()); + public CompletableOutputStream getOutputStream(@NotNull final URI uri, int bufferSizeHint) { + throw new UnsupportedOperationException("getOutputStream"); } @Override diff --git a/extensions/parquet/base/build.gradle b/extensions/parquet/base/build.gradle index b6edd934c89..76bcfd2f7ed 100644 --- a/extensions/parquet/base/build.gradle +++ b/extensions/parquet/base/build.gradle @@ -19,6 +19,7 @@ dependencies { implementation project(':Configuration') implementation project(':DataStructures') implementation libs.commons.io + implementation libs.guava compileOnly libs.jetbrains.annotations testImplementation libs.junit4 diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ColumnWriterImpl.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ColumnWriterImpl.java index 7072ab0120f..8d3b523af01 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ColumnWriterImpl.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ColumnWriterImpl.java @@ -3,6 +3,7 @@ // package io.deephaven.parquet.base; +import com.google.common.io.CountingOutputStream; import org.apache.parquet.format.converter.ParquetMetadataConverter; import io.deephaven.parquet.compress.CompressorAdapter; import io.deephaven.util.QueryConstants; @@ -40,7 +41,7 @@ final class ColumnWriterImpl implements ColumnWriter { private static final int MIN_SLAB_SIZE = 64; - private final PositionedBufferedOutputStream bufferedOutput; + private final CountingOutputStream countingOutput; private final ColumnDescriptor column; private final RowGroupWriterImpl owner; private final CompressorAdapter compressorAdapter; @@ -68,12 +69,12 @@ final class ColumnWriterImpl implements ColumnWriter { ColumnWriterImpl( final RowGroupWriterImpl owner, - final PositionedBufferedOutputStream bufferedOutput, + final CountingOutputStream countingOutput, final ColumnDescriptor column, final CompressorAdapter compressorAdapter, final int targetPageSize, final ByteBufferAllocator allocator) { - this.bufferedOutput = bufferedOutput; + this.countingOutput = countingOutput; this.column = column; this.compressorAdapter = compressorAdapter; this.targetPageSize = targetPageSize; @@ -132,7 +133,7 @@ public void addDictionaryPage(@NotNull final Object dictionaryValues, final int // noinspection unchecked dictionaryWriter.writeBulk(dictionaryValues, valuesCount, NullStatistics.INSTANCE); - dictionaryOffset = bufferedOutput.position(); + dictionaryOffset = countingOutput.getCount(); writeDictionaryPage(dictionaryWriter.getByteBufferView(), valuesCount); pageCount++; hasDictionary = true; @@ -140,7 +141,7 @@ public void addDictionaryPage(@NotNull final Object dictionaryValues, final int } private void writeDictionaryPage(final ByteBuffer dictionaryBuffer, final int valuesCount) throws IOException { - final long currentChunkDictionaryPageOffset = bufferedOutput.position(); + final long currentChunkDictionaryPageOffset = countingOutput.getCount(); final int uncompressedSize = dictionaryBuffer.remaining(); final ByteArrayOutputStream baos = new ByteArrayOutputStream(); @@ -157,11 +158,11 @@ private void writeDictionaryPage(final ByteBuffer dictionaryBuffer, final int va compressedPageSize, valuesCount, Encoding.PLAIN, - bufferedOutput); - final long headerSize = bufferedOutput.position() - currentChunkDictionaryPageOffset; + countingOutput); + final long headerSize = countingOutput.getCount() - currentChunkDictionaryPageOffset; this.uncompressedLength += uncompressedSize + headerSize; this.compressedLength += compressedPageSize + headerSize; - compressedBytes.writeAllTo(bufferedOutput); + compressedBytes.writeAllTo(countingOutput); encodings.add(Encoding.PLAIN); } @@ -294,7 +295,7 @@ public void writePageV2( final BytesInput compressedData = BytesInput.from(baos); final int compressedSize = (int) (compressedData.size() + repetitionLevels.size() + definitionLevels.size()); - final long initialOffset = bufferedOutput.position(); + final long initialOffset = countingOutput.getCount(); if (firstDataPageOffset == -1) { firstDataPageOffset = initialOffset; } @@ -303,20 +304,20 @@ public void writePageV2( valueCount, nullCount, rowCount, rlByteLength, dlByteLength, - bufferedOutput); - final long headerSize = bufferedOutput.position() - initialOffset; + countingOutput); + final long headerSize = countingOutput.getCount() - initialOffset; this.uncompressedLength += (uncompressedSize + headerSize); this.compressedLength += (compressedSize + headerSize); this.totalValueCount += valueCount; this.pageCount += 1; - definitionLevels.writeAllTo(bufferedOutput); - compressedData.writeAllTo(bufferedOutput); + definitionLevels.writeAllTo(countingOutput); + compressedData.writeAllTo(countingOutput); } private void writePage(final BytesInput bytes, final int valueCount, final long rowCount, final Encoding valuesEncoding) throws IOException { - final long initialOffset = bufferedOutput.position(); + final long initialOffset = countingOutput.getCount(); if (firstDataPageOffset == -1) { firstDataPageOffset = initialOffset; } @@ -346,15 +347,15 @@ private void writePage(final BytesInput bytes, final int valueCount, final long (int) compressedSize, valueCount, valuesEncoding, - bufferedOutput); - final long headerSize = bufferedOutput.position() - initialOffset; + countingOutput); + final long headerSize = countingOutput.getCount() - initialOffset; this.uncompressedLength += (uncompressedSize + headerSize); this.compressedLength += (compressedSize + headerSize); this.totalValueCount += valueCount; this.pageCount += 1; - compressedBytes.writeAllTo(bufferedOutput); - offsetIndexBuilder.add((int) (bufferedOutput.position() - initialOffset), rowCount); + compressedBytes.writeAllTo(countingOutput); + offsetIndexBuilder.add((int) (countingOutput.getCount() - initialOffset), rowCount); encodings.add(valuesEncoding); encodingStatsBuilder.addDataEncoding(valuesEncoding); } diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/NullParquetMetadataFileWriter.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/NullParquetMetadataFileWriter.java index bb9be1350e6..e0878476b09 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/NullParquetMetadataFileWriter.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/NullParquetMetadataFileWriter.java @@ -3,8 +3,11 @@ // package io.deephaven.parquet.base; +import io.deephaven.util.channel.CompletableOutputStream; import org.apache.parquet.hadoop.metadata.ParquetMetadata; +import java.net.URI; + /** * A no-op implementation of MetadataFileWriterBase when we don't want to write metadata files for Parquet files. */ @@ -13,11 +16,10 @@ public enum NullParquetMetadataFileWriter implements ParquetMetadataFileWriter { INSTANCE; @Override - public void addParquetFileMetadata(final String parquetFilePath, final ParquetMetadata metadata) {} - - @Override - public void writeMetadataFiles(final String metadataFilePath, final String commonMetadataFilePath) {} + public void addParquetFileMetadata(final URI parquetFileURI, final ParquetMetadata metadata) {} @Override - public void clear() {} + public void writeMetadataFiles( + final CompletableOutputStream metadataOutputStream, + final CompletableOutputStream commonMetadataOutputStream) {} } diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ParquetFileWriter.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ParquetFileWriter.java index 81dc13a4430..8cf51a65e7e 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ParquetFileWriter.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ParquetFileWriter.java @@ -3,13 +3,12 @@ // package io.deephaven.parquet.base; +import com.google.common.io.CountingOutputStream; import org.apache.parquet.format.converter.ParquetMetadataConverter; -import io.deephaven.util.channel.SeekableChannelsProvider; import io.deephaven.parquet.compress.CompressorAdapter; import io.deephaven.parquet.compress.DeephavenCompressorAdapterFactory; import org.apache.parquet.Version; import org.apache.parquet.bytes.ByteBufferAllocator; -import org.apache.parquet.bytes.BytesUtils; import org.apache.parquet.format.Util; import org.apache.parquet.hadoop.metadata.*; @@ -19,20 +18,21 @@ import org.jetbrains.annotations.NotNull; import java.io.IOException; +import java.io.OutputStream; +import java.net.URI; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import static io.deephaven.parquet.base.ParquetUtils.MAGIC; -import static io.deephaven.parquet.base.ParquetUtils.PARQUET_OUTPUT_BUFFER_SIZE; import static org.apache.parquet.format.Util.writeFileMetaData; -public final class ParquetFileWriter { +public final class ParquetFileWriter implements AutoCloseable { private static final ParquetMetadataConverter metadataConverter = new ParquetMetadataConverter(); private static final int VERSION = 1; - private final PositionedBufferedOutputStream bufferedOutput; + private final CountingOutputStream countingOutput; private final MessageType type; private final int targetPageSize; private final ByteBufferAllocator allocator; @@ -40,13 +40,12 @@ public final class ParquetFileWriter { private final Map extraMetaData; private final List blocks = new ArrayList<>(); private final List> offsetIndexes = new ArrayList<>(); - private final String destFilePathForMetadata; + private final URI dest; private final ParquetMetadataFileWriter metadataFileWriter; public ParquetFileWriter( - final String destFilePath, - final String destFilePathForMetadata, - final SeekableChannelsProvider channelsProvider, + final URI dest, + final OutputStream destOutputStream, final int targetPageSize, final ByteBufferAllocator allocator, final MessageType type, @@ -56,44 +55,52 @@ public ParquetFileWriter( this.targetPageSize = targetPageSize; this.allocator = allocator; this.extraMetaData = new HashMap<>(extraMetaData); - bufferedOutput = new PositionedBufferedOutputStream(channelsProvider.getWriteChannel(destFilePath, false), - PARQUET_OUTPUT_BUFFER_SIZE); - bufferedOutput.write(MAGIC); + this.countingOutput = new CountingOutputStream(destOutputStream); + countingOutput.write(MAGIC); this.type = type; this.compressorAdapter = DeephavenCompressorAdapterFactory.getInstance().getByName(codecName); - this.destFilePathForMetadata = destFilePathForMetadata; + this.dest = dest; this.metadataFileWriter = metadataFileWriter; } public RowGroupWriter addRowGroup(final long size) { final RowGroupWriterImpl rowGroupWriter = - new RowGroupWriterImpl(bufferedOutput, type, targetPageSize, allocator, compressorAdapter); + new RowGroupWriterImpl(countingOutput, type, targetPageSize, allocator, compressorAdapter); rowGroupWriter.getBlock().setRowCount(size); blocks.add(rowGroupWriter.getBlock()); offsetIndexes.add(rowGroupWriter.offsetIndexes()); return rowGroupWriter; } + @Override public void close() throws IOException { serializeOffsetIndexes(); final ParquetMetadata footer = new ParquetMetadata(new FileMetaData(type, extraMetaData, Version.FULL_VERSION), blocks); - serializeFooter(footer, bufferedOutput); - metadataFileWriter.addParquetFileMetadata(destFilePathForMetadata, footer); - // Flush any buffered data and close the channel - bufferedOutput.close(); + serializeFooter(footer, countingOutput); + metadataFileWriter.addParquetFileMetadata(dest, footer); + // Flush any buffered data, do not close the stream since it is managed by the layer above + countingOutput.flush(); compressorAdapter.close(); } - public static void serializeFooter(final ParquetMetadata footer, - final PositionedBufferedOutputStream bufferedOutput) + public static void serializeFooter(final ParquetMetadata footer, final CountingOutputStream countingOutput) throws IOException { - final long footerIndex = bufferedOutput.position(); + final long footerIndex = countingOutput.getCount(); final org.apache.parquet.format.FileMetaData parquetMetadata = metadataConverter.toParquetMetadata(VERSION, footer); - writeFileMetaData(parquetMetadata, bufferedOutput); - BytesUtils.writeIntLittleEndian(bufferedOutput, (int) (bufferedOutput.position() - footerIndex)); - bufferedOutput.write(MAGIC); + writeFileMetaData(parquetMetadata, countingOutput); + countingOutput.write(intToLittleEndian((int) (countingOutput.getCount() - footerIndex))); + countingOutput.write(MAGIC); + } + + private static byte[] intToLittleEndian(final int value) { + return new byte[] { + (byte) (value & 0xFF), + (byte) ((value >> 8) & 0xFF), + (byte) ((value >> 16) & 0xFF), + (byte) ((value >> 24) & 0xFF) + }; } private void serializeOffsetIndexes() throws IOException { @@ -106,9 +113,10 @@ private void serializeOffsetIndexes() throws IOException { continue; } final ColumnChunkMetaData column = columns.get(cIndex); - final long offset = bufferedOutput.position(); - Util.writeOffsetIndex(ParquetMetadataConverter.toParquetOffsetIndex(offsetIndex), bufferedOutput); - column.setOffsetIndexReference(new IndexReference(offset, (int) (bufferedOutput.position() - offset))); + final long offset = countingOutput.getCount(); + Util.writeOffsetIndex(ParquetMetadataConverter.toParquetOffsetIndex(offsetIndex), countingOutput); + column.setOffsetIndexReference( + new IndexReference(offset, (int) (countingOutput.getCount() - offset))); } } } diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ParquetMetadataFileWriter.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ParquetMetadataFileWriter.java index 42a93dc24e1..0a8c9c4ef42 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ParquetMetadataFileWriter.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ParquetMetadataFileWriter.java @@ -3,9 +3,11 @@ // package io.deephaven.parquet.base; +import io.deephaven.util.channel.CompletableOutputStream; import org.apache.parquet.hadoop.metadata.ParquetMetadata; import java.io.IOException; +import java.net.URI; /** * Used to write {@value ParquetUtils#METADATA_FILE_NAME} and {@value ParquetUtils#COMMON_METADATA_FILE_NAME} files for @@ -17,21 +19,19 @@ public interface ParquetMetadataFileWriter { * Add the parquet metadata for the provided parquet file to the list of metadata to be written to combined metadata * files. * - * @param parquetFilePath The parquet file destination path + * @param parquetFileURI The parquet file destination URI * @param metadata The parquet metadata corresponding to the parquet file */ - void addParquetFileMetadata(String parquetFilePath, ParquetMetadata metadata); + void addParquetFileMetadata(URI parquetFileURI, ParquetMetadata metadata); /** - * Write the combined metadata files for all metadata accumulated so far and clear the list. + * Write the combined metadata to the provided streams and clear the metadata accumulated so far. The output streams + * should be marked as {@link CompletableOutputStream#done()} after writing is finished. * - * @param metadataFilePath The destination path for the {@value ParquetUtils#METADATA_FILE_NAME} file - * @param commonMetadataFilePath The destination path for the {@value ParquetUtils#COMMON_METADATA_FILE_NAME} file + * @param metadataOutputStream The output stream for the {@value ParquetUtils#METADATA_FILE_NAME} file + * @param commonMetadataOutputStream The output stream for the {@value ParquetUtils#COMMON_METADATA_FILE_NAME} file */ - void writeMetadataFiles(String metadataFilePath, String commonMetadataFilePath) throws IOException; - - /** - * Clear the list of metadata accumulated so far. - */ - void clear(); + void writeMetadataFiles( + CompletableOutputStream metadataOutputStream, + CompletableOutputStream commonMetadataOutputStream) throws IOException; } diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ParquetUtils.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ParquetUtils.java index 70f83f9adfc..467c7b22d8a 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ParquetUtils.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ParquetUtils.java @@ -90,7 +90,9 @@ public static boolean isVisibleParquetFile(@NotNull final Path rootDir, @NotNull } /** - * Resolve a relative path against a base URI. The path can be from Windows or Unix systems. + * Resolve a relative path against a base URI. The path can be from Windows or Unix systems. This method should be + * used if we expect the relative path to contain file separators or special characters, otherwise use + * {@code base.resolve(relativePath)} */ public static URI resolve(final URI base, final String relativePath) { final URI relativeURI; diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/PositionedBufferedOutputStream.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/PositionedBufferedOutputStream.java deleted file mode 100644 index 3d26162f806..00000000000 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/PositionedBufferedOutputStream.java +++ /dev/null @@ -1,29 +0,0 @@ -// -// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending -// -package io.deephaven.parquet.base; - -import org.jetbrains.annotations.NotNull; - -import java.io.BufferedOutputStream; -import java.io.IOException; -import java.nio.channels.Channels; -import java.nio.channels.SeekableByteChannel; - -public final class PositionedBufferedOutputStream extends BufferedOutputStream { - - private final SeekableByteChannel writeChannel; - - public PositionedBufferedOutputStream(@NotNull final SeekableByteChannel writeChannel, final int size) { - super(Channels.newOutputStream(writeChannel), size); - this.writeChannel = writeChannel; - } - - /** - * Get the total number of bytes written to this stream - */ - long position() throws IOException { - // Number of bytes buffered in the stream + bytes written to the underlying channel - return this.count + writeChannel.position(); - } -} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/RowGroupWriterImpl.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/RowGroupWriterImpl.java index c873f15d495..6d387228866 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/RowGroupWriterImpl.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/RowGroupWriterImpl.java @@ -3,6 +3,7 @@ // package io.deephaven.parquet.base; +import com.google.common.io.CountingOutputStream; import io.deephaven.parquet.compress.CompressorAdapter; import org.apache.parquet.bytes.ByteBufferAllocator; import org.apache.parquet.hadoop.metadata.BlockMetaData; @@ -17,7 +18,7 @@ import java.util.List; final class RowGroupWriterImpl implements RowGroupWriter { - private final PositionedBufferedOutputStream bufferedOutput; + private final CountingOutputStream countingOutput; private final MessageType type; private final int targetPageSize; private final ByteBufferAllocator allocator; @@ -26,22 +27,22 @@ final class RowGroupWriterImpl implements RowGroupWriter { private final List currentOffsetIndexes = new ArrayList<>(); private final CompressorAdapter compressorAdapter; - RowGroupWriterImpl(PositionedBufferedOutputStream bufferedOutput, + RowGroupWriterImpl(CountingOutputStream countingOutput, MessageType type, int targetPageSize, ByteBufferAllocator allocator, CompressorAdapter compressorAdapter) { - this(bufferedOutput, type, targetPageSize, allocator, new BlockMetaData(), compressorAdapter); + this(countingOutput, type, targetPageSize, allocator, new BlockMetaData(), compressorAdapter); } - private RowGroupWriterImpl(PositionedBufferedOutputStream bufferedOutput, + private RowGroupWriterImpl(CountingOutputStream countingOutput, MessageType type, int targetPageSize, ByteBufferAllocator allocator, BlockMetaData blockMetaData, CompressorAdapter compressorAdapter) { - this.bufferedOutput = bufferedOutput; + this.countingOutput = countingOutput; this.type = type; this.targetPageSize = targetPageSize; this.allocator = allocator; @@ -72,7 +73,7 @@ public ColumnWriter addColumn(String columnName) { + " need to close that before opening a writer for " + columnName); } activeWriter = new ColumnWriterImpl(this, - bufferedOutput, + countingOutput, type.getColumnDescription(getPrimitivePath(columnName)), compressorAdapter, targetPageSize, diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetMetadataFileWriterImpl.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetMetadataFileWriterImpl.java index 90da731eb0c..c0ea3a6f43c 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetMetadataFileWriterImpl.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetMetadataFileWriterImpl.java @@ -3,15 +3,14 @@ // package io.deephaven.parquet.table; +import com.google.common.io.CountingOutputStream; import io.deephaven.UncheckedDeephavenException; import io.deephaven.parquet.base.ParquetFileWriter; import io.deephaven.parquet.base.ParquetMetadataFileWriter; import io.deephaven.parquet.base.ParquetUtils; -import io.deephaven.parquet.base.PositionedBufferedOutputStream; import io.deephaven.parquet.table.metadata.ColumnTypeInfo; import io.deephaven.parquet.table.metadata.TableInfo; -import io.deephaven.util.channel.SeekableChannelsProvider; -import io.deephaven.util.channel.SeekableChannelsProviderLoader; +import io.deephaven.util.channel.CompletableOutputStream; import org.apache.parquet.hadoop.metadata.BlockMetaData; import org.apache.parquet.hadoop.metadata.FileMetaData; import org.apache.parquet.hadoop.metadata.ParquetMetadata; @@ -19,9 +18,9 @@ import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; -import java.io.File; import java.io.IOException; -import java.nio.file.Path; +import java.io.OutputStream; +import java.net.URI; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; @@ -29,7 +28,6 @@ import java.util.List; import java.util.Map; -import static io.deephaven.base.FileUtils.convertToURI; import static io.deephaven.parquet.base.ParquetUtils.MAGIC; import static io.deephaven.parquet.base.ParquetUtils.METADATA_KEY; import static io.deephaven.parquet.base.ParquetUtils.getPerFileMetadataKey; @@ -45,18 +43,17 @@ final class ParquetMetadataFileWriterImpl implements ParquetMetadataFileWriter { * A class to hold the parquet file and its metadata. */ private static class ParquetFileMetadata { - final String filePath; + final URI uri; final ParquetMetadata metadata; - ParquetFileMetadata(final String filePath, final ParquetMetadata metadata) { - this.filePath = filePath; + ParquetFileMetadata(final URI uri, final ParquetMetadata metadata) { + this.uri = uri; this.metadata = metadata; } } - private final Path metadataRootDirAbsPath; + private final URI metadataRootDir; private final List parquetFileMetadataList; - private final SeekableChannelsProvider channelsProvider; private final MessageType partitioningColumnsSchema; // The following fields are used to accumulate metadata for all parquet files @@ -76,23 +73,22 @@ private static class ParquetFileMetadata { * @param partitioningColumnsSchema The common schema for partitioning columns to be included in the * {@value ParquetUtils#COMMON_METADATA_FILE_NAME} file, can be null if there are no partitioning columns. */ - ParquetMetadataFileWriterImpl(@NotNull final File metadataRootDir, @NotNull final File[] destinations, + ParquetMetadataFileWriterImpl( + @NotNull final URI metadataRootDir, + @NotNull final URI[] destinations, @Nullable final MessageType partitioningColumnsSchema) { if (destinations.length == 0) { throw new IllegalArgumentException("No destinations provided"); } - this.metadataRootDirAbsPath = metadataRootDir.getAbsoluteFile().toPath(); - final String metadataRootDirAbsPathString = metadataRootDirAbsPath.toString(); - for (final File destination : destinations) { - if (!destination.getAbsolutePath().startsWith(metadataRootDirAbsPathString)) { + this.metadataRootDir = metadataRootDir; + final String metadataRootDirStr = metadataRootDir.toString(); + for (final URI destination : destinations) { + if (!destination.toString().startsWith(metadataRootDirStr)) { throw new UncheckedDeephavenException("All destinations must be nested under the provided metadata root" - + " directory, provided destination " + destination.getAbsolutePath() + " is not under " + - metadataRootDirAbsPathString); + + " directory, provided destination " + destination + " is not under " + metadataRootDir); } } this.parquetFileMetadataList = new ArrayList<>(destinations.length); - this.channelsProvider = SeekableChannelsProviderLoader.getInstance().fromServiceLoader( - convertToURI(metadataRootDirAbsPathString, true), null); this.partitioningColumnsSchema = partitioningColumnsSchema; this.mergedSchema = null; @@ -106,28 +102,31 @@ private static class ParquetFileMetadata { /** * Add parquet metadata for the provided parquet file to the combined metadata file. * - * @param parquetFilePath The parquet file destination path + * @param parquetFileURI The parquet file destination URI * @param metadata The parquet metadata */ - public void addParquetFileMetadata(final String parquetFilePath, final ParquetMetadata metadata) { - parquetFileMetadataList.add(new ParquetFileMetadata(parquetFilePath, metadata)); + public void addParquetFileMetadata(final URI parquetFileURI, final ParquetMetadata metadata) { + parquetFileMetadataList.add(new ParquetFileMetadata(parquetFileURI, metadata)); } /** - * Write the accumulated metadata to the provided files and clear the metadata accumulated so far. + * Write the combined metadata to the provided streams and clear the metadata accumulated so far. The output streams + * are marked as {@link CompletableOutputStream#done()} after writing is finished. * - * @param metadataFilePath The destination path for the {@value ParquetUtils#METADATA_FILE_NAME} file - * @param commonMetadataFilePath The destination path for the {@value ParquetUtils#COMMON_METADATA_FILE_NAME} file + * @param metadataOutputStream The output stream for the {@value ParquetUtils#METADATA_FILE_NAME} file + * @param commonMetadataOutputStream The output stream for the {@value ParquetUtils#COMMON_METADATA_FILE_NAME} file */ - public void writeMetadataFiles(final String metadataFilePath, final String commonMetadataFilePath) - throws IOException { + public void writeMetadataFiles( + final CompletableOutputStream metadataOutputStream, + final CompletableOutputStream commonMetadataOutputStream) throws IOException { if (parquetFileMetadataList.isEmpty()) { throw new UncheckedDeephavenException("No parquet files to write metadata for"); } mergeMetadata(); final ParquetMetadata metadataFooter = new ParquetMetadata(new FileMetaData(mergedSchema, mergedKeyValueMetaData, mergedCreatedByString), mergedBlocks); - writeMetadataFile(metadataFooter, metadataFilePath); + writeMetadataFile(metadataFooter, metadataOutputStream); + metadataOutputStream.done(); // Skip the blocks data and merge schema with partitioning columns' schema to write the common metadata file. // The ordering of arguments in method call is important because we want to keep partitioning columns in the @@ -136,7 +135,8 @@ public void writeMetadataFiles(final String metadataFilePath, final String commo final ParquetMetadata commonMetadataFooter = new ParquetMetadata(new FileMetaData(mergedSchema, mergedKeyValueMetaData, mergedCreatedByString), new ArrayList<>()); - writeMetadataFile(commonMetadataFooter, commonMetadataFilePath); + writeMetadataFile(commonMetadataFooter, commonMetadataOutputStream); + commonMetadataOutputStream.done(); // Clear the accumulated metadata clear(); @@ -150,7 +150,7 @@ private void mergeMetadata() throws IOException { for (final ParquetFileMetadata parquetFileMetadata : parquetFileMetadataList) { final FileMetaData fileMetaData = parquetFileMetadata.metadata.getFileMetaData(); mergedSchema = mergeSchemaInto(fileMetaData.getSchema(), mergedSchema); - final String relativePath = getRelativePath(parquetFileMetadata.filePath, metadataRootDirAbsPath); + final String relativePath = metadataRootDir.relativize(parquetFileMetadata.uri).getPath(); mergeKeyValueMetaData(parquetFileMetadata, relativePath); mergeBlocksInto(parquetFileMetadata, relativePath, mergedBlocks); mergedCreatedBy.add(fileMetaData.getCreatedBy()); @@ -218,7 +218,7 @@ private void mergeKeyValueMetaData(@NotNull final ParquetFileMetadata parquetFil // Assuming the keys are unique for each file because file names are unique, verified in the constructor if (mergedKeyValueMetaData.containsKey(fileKey)) { throw new IllegalStateException("Could not merge metadata for file " + - parquetFileMetadata.filePath + " because it has conflicting file key: " + fileKey); + parquetFileMetadata.uri + " because it has conflicting file key: " + fileKey); } mergedKeyValueMetaData.put(fileKey, entry.getValue()); @@ -253,21 +253,18 @@ private static void mergeBlocksInto(final ParquetFileMetadata parquetFileMetadat } } - private static String getRelativePath(final String parquetFilePath, final Path metadataRootDirAbsPath) { - final Path parquetFileAbsPath = new File(parquetFilePath).getAbsoluteFile().toPath(); - return metadataRootDirAbsPath.relativize(parquetFileAbsPath).toString(); - } - - private void writeMetadataFile(final ParquetMetadata metadataFooter, final String outputPath) throws IOException { - final PositionedBufferedOutputStream metadataOutputStream = - new PositionedBufferedOutputStream(channelsProvider.getWriteChannel(outputPath, false), - ParquetUtils.PARQUET_OUTPUT_BUFFER_SIZE); - metadataOutputStream.write(MAGIC); - ParquetFileWriter.serializeFooter(metadataFooter, metadataOutputStream); - metadataOutputStream.close(); + private static void writeMetadataFile(final ParquetMetadata metadataFooter, final OutputStream outputStream) + throws IOException { + final CountingOutputStream countingOutputStream = new CountingOutputStream(outputStream); + countingOutputStream.write(MAGIC); + ParquetFileWriter.serializeFooter(metadataFooter, countingOutputStream); + countingOutputStream.flush(); } - public void clear() { + /** + * Clear the list of metadata accumulated so far. + */ + private void clear() { parquetFileMetadataList.clear(); mergedKeyValueMetaData.clear(); mergedBlocks.clear(); diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTableWriter.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTableWriter.java index a0c63614a82..ebb1d17571d 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTableWriter.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTableWriter.java @@ -27,8 +27,7 @@ import io.deephaven.stringset.StringSet; import io.deephaven.util.QueryConstants; import io.deephaven.util.SafeCloseable; -import io.deephaven.util.annotations.VisibleForTesting; -import io.deephaven.util.channel.SeekableChannelsProviderLoader; +import io.deephaven.util.channel.CompletableOutputStream; import io.deephaven.vector.Vector; import org.apache.commons.lang3.tuple.Pair; import org.apache.parquet.bytes.HeapByteBufferAllocator; @@ -39,14 +38,13 @@ import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; -import java.io.File; import java.io.IOException; +import java.io.OutputStream; +import java.net.URI; import java.nio.IntBuffer; -import java.nio.file.Path; import java.util.*; import static io.deephaven.parquet.base.ParquetUtils.METADATA_KEY; -import static io.deephaven.base.FileUtils.convertToURI; /** * API for writing DH tables in parquet format @@ -72,25 +70,23 @@ static class IndexWritingInfo { */ final String[] parquetColumnNames; /** - * File path to be added in the index metadata of the main parquet file + * Destination for writing the index file */ - final File destFileForMetadata; + final URI dest; /** - * Destination path for writing the index file. The two filenames can differ because we write index files to - * shadow file paths first and then place them at the final path once the write is complete. The metadata should - * always hold the accurate path. + * Output stream to write the index file */ - final File destFile; + final CompletableOutputStream destOutputStream; IndexWritingInfo( final List indexColumnNames, final String[] parquetColumnNames, - final File destFileForMetadata, - final File destFile) { + final URI dest, + final CompletableOutputStream destOutputStream) { this.indexColumnNames = indexColumnNames; this.parquetColumnNames = parquetColumnNames; - this.destFileForMetadata = destFileForMetadata.getAbsoluteFile(); - this.destFile = destFile.getAbsoluteFile(); + this.dest = dest; + this.destOutputStream = destOutputStream; } } @@ -100,10 +96,9 @@ static class IndexWritingInfo { * @param t The table to write * @param definition Table definition * @param writeInstructions Write instructions for customizations while writing - * @param destFilePath The destination path - * @param destFilePathForMetadata The destination path to store in the metadata files. This can be different from - * {@code destFilePath} if we are writing the parquet file to a shadow location first since the metadata - * should always hold the accurate path. + * @param dest The destination URI to write to + * @param destOutputStream The output stream to write to dest, should be marked as + * {@link CompletableOutputStream#done()} once writing is finished * @param incomingMeta A map of metadata values to be stores in the file footer * @param indexInfoList Arrays containing the column names for indexes to persist as sidecar tables. Indexes that * are specified but missing will be computed on demand. @@ -120,8 +115,8 @@ static void write( @NotNull final Table t, @NotNull final TableDefinition definition, @NotNull final ParquetInstructions writeInstructions, - @NotNull final String destFilePath, - @NotNull final String destFilePathForMetadata, + @NotNull final URI dest, + @NotNull final CompletableOutputStream destOutputStream, @NotNull final Map incomingMeta, @Nullable final List indexInfoList, @NotNull final ParquetMetadataFileWriter metadataFileWriter, @@ -137,66 +132,49 @@ static void write( } final TableInfo.Builder tableInfoBuilder = TableInfo.builder(); - List cleanupFiles = null; - try { - if (indexInfoList != null) { - cleanupFiles = new ArrayList<>(indexInfoList.size()); - final Path destDirPath = new File(destFilePath).getAbsoluteFile().getParentFile().toPath(); - for (final ParquetTableWriter.IndexWritingInfo info : indexInfoList) { - try (final SafeCloseable ignored = t.isRefreshing() ? LivenessScopeStack.open() : null) { - // This will retrieve an existing index if one exists, or create a new one if not - final BasicDataIndex dataIndex = Optional - .ofNullable(DataIndexer.getDataIndex(t, info.indexColumnNames)) - .or(() -> Optional.of(DataIndexer.getOrCreateDataIndex(t, info.indexColumnNames))) - .get() - .transform(DataIndexTransformer.builder().invertRowSet(t.getRowSet()).build()); - final Table indexTable = dataIndex.table().sort(info.indexColumnNames.toArray(new String[0])); - final TableInfo.Builder indexTableInfoBuilder = TableInfo.builder().addSortingColumns( - info.indexColumnNames.stream() - .map(cn -> SortColumnInfo.of(cn, SortColumnInfo.SortDirection.Ascending)) - .toArray(SortColumnInfo[]::new)); + if (indexInfoList != null) { + final URI destDir = dest.resolve("."); + for (final ParquetTableWriter.IndexWritingInfo info : indexInfoList) { + try (final SafeCloseable ignored = t.isRefreshing() ? LivenessScopeStack.open() : null) { + // This will retrieve an existing index if one exists, or create a new one if not + final BasicDataIndex dataIndex = Optional + .ofNullable(DataIndexer.getDataIndex(t, info.indexColumnNames)) + .or(() -> Optional.of(DataIndexer.getOrCreateDataIndex(t, info.indexColumnNames))) + .get() + .transform(DataIndexTransformer.builder().invertRowSet(t.getRowSet()).build()); + final Table indexTable = dataIndex.table().sort(info.indexColumnNames.toArray(new String[0])); + final TableInfo.Builder indexTableInfoBuilder = TableInfo.builder().addSortingColumns( + info.indexColumnNames.stream() + .map(cn -> SortColumnInfo.of(cn, SortColumnInfo.SortDirection.Ascending)) + .toArray(SortColumnInfo[]::new)); - cleanupFiles.add(info.destFile); - tableInfoBuilder.addDataIndexes(DataIndexInfo.of( - destDirPath.relativize(info.destFileForMetadata.toPath()).toString(), - info.parquetColumnNames)); - final ParquetInstructions writeInstructionsToUse; - if (INDEX_ROW_SET_COLUMN_NAME.equals(dataIndex.rowSetColumnName())) { - writeInstructionsToUse = writeInstructions; - } else { - writeInstructionsToUse = new ParquetInstructions.Builder(writeInstructions) - .addColumnNameMapping(INDEX_ROW_SET_COLUMN_NAME, dataIndex.rowSetColumnName()) - .build(); - } - write(indexTable, indexTable.getDefinition(), writeInstructionsToUse, - info.destFile.getAbsolutePath(), info.destFileForMetadata.getAbsolutePath(), - Collections.emptyMap(), indexTableInfoBuilder, NullParquetMetadataFileWriter.INSTANCE, - computedCache); + tableInfoBuilder.addDataIndexes(DataIndexInfo.of( + destDir.relativize(info.dest).getPath(), + info.parquetColumnNames)); + final ParquetInstructions writeInstructionsToUse; + if (INDEX_ROW_SET_COLUMN_NAME.equals(dataIndex.rowSetColumnName())) { + writeInstructionsToUse = writeInstructions; + } else { + writeInstructionsToUse = new ParquetInstructions.Builder(writeInstructions) + .addColumnNameMapping(INDEX_ROW_SET_COLUMN_NAME, dataIndex.rowSetColumnName()) + .build(); } + write(indexTable, indexTable.getDefinition(), writeInstructionsToUse, info.dest, + info.destOutputStream, Collections.emptyMap(), indexTableInfoBuilder, + NullParquetMetadataFileWriter.INSTANCE, computedCache); } } + } - // SortedColumnsAttribute effectively only stores (zero or more) individual columns by which the table is - // sorted, rather than ordered sets expressing multi-column sorts. Given that mismatch, we can only reflect - // a single column sort in the metadata at this time. - final List sortedColumns = SortedColumnsAttribute.getSortedColumns(t); - if (!sortedColumns.isEmpty()) { - tableInfoBuilder.addSortingColumns(SortColumnInfo.of(sortedColumns.get(0))); - } - write(t, definition, writeInstructions, destFilePath, destFilePathForMetadata, incomingMeta, - tableInfoBuilder, metadataFileWriter, computedCache); - } catch (Exception e) { - if (cleanupFiles != null) { - for (final File cleanupFile : cleanupFiles) { - try { - // noinspection ResultOfMethodCallIgnored - cleanupFile.delete(); - } catch (Exception ignored) { - } - } - } - throw e; + // SortedColumnsAttribute effectively only stores (zero or more) individual columns by which the table is + // sorted, rather than ordered sets expressing multi-column sorts. Given that mismatch, we can only reflect + // a single column sort in the metadata at this time. + final List sortedColumns = SortedColumnsAttribute.getSortedColumns(t); + if (!sortedColumns.isEmpty()) { + tableInfoBuilder.addSortingColumns(SortColumnInfo.of(sortedColumns.get(0))); } + write(t, definition, writeInstructions, dest, destOutputStream, incomingMeta, + tableInfoBuilder, metadataFileWriter, computedCache); } /** @@ -205,10 +183,9 @@ static void write( * @param table The table to write * @param definition The table definition * @param writeInstructions Write instructions for customizations while writing - * @param destFilePath The destination path - * @param destFilePathForMetadata The destination path to store in the metadata files. This can be different from - * {@code destFilePath} if we are writing the parquet file to a shadow location first since the metadata - * should always hold the accurate path. + * @param dest The destination URI to write to + * @param destOutputStream The output stream to write to dest, should be marked as + * {@link CompletableOutputStream#done()} once writing is finished * @param tableMeta A map of metadata values to be stores in the file footer * @param tableInfoBuilder A partially constructed builder for the metadata object * @param metadataFileWriter The writer for the {@value ParquetUtils#METADATA_FILE_NAME} and @@ -216,12 +193,12 @@ static void write( * @param computedCache Per column cache tags * @throws IOException For file writing related errors */ - static void write( + private static void write( @NotNull final Table table, @NotNull final TableDefinition definition, @NotNull final ParquetInstructions writeInstructions, - @NotNull final String destFilePath, - @NotNull final String destFilePathForMetadata, + @NotNull final URI dest, + @NotNull final CompletableOutputStream destOutputStream, @NotNull final Map tableMeta, @NotNull final TableInfo.Builder tableInfoBuilder, @NotNull final ParquetMetadataFileWriter metadataFileWriter, @@ -230,11 +207,13 @@ static void write( final Table t = pretransformTable(table, definition); final TrackingRowSet tableRowSet = t.getRowSet(); final Map> columnSourceMap = t.getColumnSourceMap(); - final ParquetFileWriter parquetFileWriter = getParquetFileWriter(computedCache, definition, tableRowSet, - columnSourceMap, destFilePath, destFilePathForMetadata, writeInstructions, tableMeta, - tableInfoBuilder, metadataFileWriter); - // Given the transformation, do not use the original table's "definition" for writing - write(t, writeInstructions, parquetFileWriter, computedCache); + try (final ParquetFileWriter parquetFileWriter = getParquetFileWriter(computedCache, definition, + tableRowSet, columnSourceMap, dest, destOutputStream, writeInstructions, tableMeta, + tableInfoBuilder, metadataFileWriter)) { + // Given the transformation, do not use the original table's "definition" for writing + write(t, writeInstructions, parquetFileWriter, computedCache); + } + destOutputStream.done(); } } @@ -269,8 +248,6 @@ private static void write( } } } - - parquetFileWriter.close(); } /** @@ -336,19 +313,16 @@ private static Table pretransformTable(@NotNull final Table table, @NotNull fina * Create a {@link ParquetFileWriter} for writing the table to disk. * * @param computedCache Per column cache tags - * @param definition the writable definition - * @param tableRowSet the row set being written - * @param columnSourceMap the columns of the table - * @param destFilePath the destination to write to - * @param destFilePathForMetadata The destination path to store in the metadata files. This can be different from - * {@code destFilePath} if we are writing the parquet file to a shadow location first since the metadata - * should always hold the accurate path. - * @param writeInstructions write instructions for the file - * @param tableMeta metadata to include in the parquet metadata - * @param tableInfoBuilder a builder for accumulating per-column information to construct the deephaven metadata + * @param definition The writable definition + * @param tableRowSet The row set being written + * @param columnSourceMap The columns of the table + * @param dest The destination URI to write to + * @param destOutputStream The output stream to write to dest + * @param writeInstructions Write instructions for the file + * @param tableMeta Metadata to include in the parquet metadata + * @param tableInfoBuilder Builder for accumulating per-column information to construct the deephaven metadata * @param metadataFileWriter The writer for the {@value ParquetUtils#METADATA_FILE_NAME} and * {@value ParquetUtils#COMMON_METADATA_FILE_NAME} files - * * @return a new file writer */ @NotNull @@ -357,8 +331,8 @@ private static ParquetFileWriter getParquetFileWriter( @NotNull final TableDefinition definition, @NotNull final RowSet tableRowSet, @NotNull final Map> columnSourceMap, - @NotNull final String destFilePath, - @NotNull final String destFilePathForMetadata, + @NotNull final URI dest, + @NotNull final OutputStream destOutputStream, @NotNull final ParquetInstructions writeInstructions, @NotNull final Map tableMeta, @NotNull final TableInfo.Builder tableInfoBuilder, @@ -404,21 +378,18 @@ private static ParquetFileWriter getParquetFileWriter( final Map extraMetaData = new HashMap<>(tableMeta); extraMetaData.put(METADATA_KEY, tableInfoBuilder.build().serializeToJSON()); - return new ParquetFileWriter(destFilePath, destFilePathForMetadata, - SeekableChannelsProviderLoader.getInstance().fromServiceLoader(convertToURI(destFilePath, false), null), - writeInstructions.getTargetPageSize(), + return new ParquetFileWriter(dest, destOutputStream, writeInstructions.getTargetPageSize(), new HeapByteBufferAllocator(), mappedSchema.getParquetSchema(), writeInstructions.getCompressionCodecName(), extraMetaData, metadataFileWriter); } - @VisibleForTesting - static void writeColumnSource( + private static void writeColumnSource( @NotNull final RowSet tableRowSet, @NotNull final ParquetInstructions writeInstructions, @NotNull final RowGroupWriter rowGroupWriter, @NotNull final Map> computedCache, @NotNull final String columnName, - @NotNull ColumnSource columnSource) throws IllegalAccessException, IOException { + @NotNull final ColumnSource columnSource) throws IllegalAccessException, IOException { try (final ColumnWriter columnWriter = rowGroupWriter.addColumn( writeInstructions.getParquetColumnNameFromColumnNameOrDefault(columnName))) { boolean usedDictionary = false; @@ -435,8 +406,8 @@ static void writeColumnSource( /** * Makes a copy of the given buffer */ - private static IntBuffer makeCopy(IntBuffer orig) { - IntBuffer copy = IntBuffer.allocate(orig.capacity()); + private static IntBuffer makeCopy(final IntBuffer orig) { + final IntBuffer copy = IntBuffer.allocate(orig.capacity()); copy.put(orig).flip(); return copy; } @@ -534,9 +505,9 @@ private static void encodePlain( try (final TransferObject transferObject = TransferObject.create( tableRowSet, writeInstructions, computedCache, columnName, columnSource)) { final Statistics statistics = columnWriter.getStats(); - boolean writeVectorPages = (transferObject instanceof ArrayAndVectorTransfer); + final boolean writeVectorPages = (transferObject instanceof ArrayAndVectorTransfer); do { - int numValuesBuffered = transferObject.transferOnePageToBuffer(); + final int numValuesBuffered = transferObject.transferOnePageToBuffer(); if (writeVectorPages) { columnWriter.addVectorPage(transferObject.getBuffer(), transferObject.getRepeatCount(), numValuesBuffered, statistics); diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTools.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTools.java index 5422acf5e96..ab035bdd0c8 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTools.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTools.java @@ -22,9 +22,9 @@ import io.deephaven.parquet.base.ParquetMetadataFileWriter; import io.deephaven.parquet.base.NullParquetMetadataFileWriter; import io.deephaven.util.SafeCloseable; +import io.deephaven.util.channel.CompletableOutputStream; import io.deephaven.util.channel.SeekableChannelsProvider; import io.deephaven.util.channel.SeekableChannelsProviderLoader; -import io.deephaven.util.channel.SeekableChannelsProviderPlugin; import io.deephaven.vector.*; import io.deephaven.engine.table.*; import io.deephaven.engine.table.impl.PartitionAwareSourceTable; @@ -53,14 +53,17 @@ import org.jetbrains.annotations.Nullable; import java.io.File; +import java.io.IOException; import java.math.BigDecimal; import java.net.URI; import java.util.*; import java.util.function.Supplier; import java.util.stream.Collectors; +import static io.deephaven.base.FileUtils.URI_SEPARATOR_CHAR; import static io.deephaven.base.FileUtils.convertToURI; -import static io.deephaven.parquet.base.ParquetFileReader.FILE_URI_SCHEME; +import static io.deephaven.parquet.base.ParquetUtils.PARQUET_OUTPUT_BUFFER_SIZE; +import static io.deephaven.parquet.base.ParquetUtils.resolve; import static io.deephaven.parquet.table.ParquetInstructions.FILE_INDEX_TOKEN; import static io.deephaven.parquet.table.ParquetInstructions.PARTITIONS_TOKEN; import static io.deephaven.parquet.table.ParquetInstructions.UUID_TOKEN; @@ -193,15 +196,6 @@ private static ParquetInstructions ensureTableDefinition( return instructions; } - private static File getShadowFile(final File destFile) { - return new File(destFile.getParent(), ".NEW_" + destFile.getName()); - } - - @VisibleForTesting - static File getBackupFile(final File destFile) { - return new File(destFile.getParent(), ".OLD_" + destFile.getName()); - } - private static String minusParquetSuffix(@NotNull final String s) { if (s.endsWith(PARQUET_FILE_EXTENSION)) { return s.substring(0, s.length() - PARQUET_FILE_EXTENSION.length()); @@ -209,10 +203,22 @@ private static String minusParquetSuffix(@NotNull final String s) { return s; } + /** + * Get the name of the file from the URI. + */ + private static String getFileName(@NotNull final URI uri) { + final String path = uri.getPath(); + final int lastSlash = path.lastIndexOf(URI_SEPARATOR_CHAR); + if (lastSlash == path.length() - 1) { + throw new IllegalArgumentException("Directory URIs are not supported, found" + uri); + } + return lastSlash == -1 ? path : path.substring(lastSlash + 1); + } + /** * Generates the index file path relative to the table destination file path. * - * @param tableDest Destination path for the main table containing these indexing columns + * @param destFileName Destination name for the main table containing these indexing columns * @param columnNames Array of indexing column names * * @return The relative index file path. For example, for table with destination {@code "table.parquet"} and @@ -220,10 +226,10 @@ private static String minusParquetSuffix(@NotNull final String s) { * {@code ".dh_metadata/indexes/IndexingColName/index_IndexingColName_table.parquet"} on unix systems. */ @VisibleForTesting - static String getRelativeIndexFilePath(@NotNull final File tableDest, @NotNull final String... columnNames) { + static String getRelativeIndexFilePath(@NotNull final String destFileName, @NotNull final String... columnNames) { final String columns = String.join(",", columnNames); return String.format(".dh_metadata%sindexes%s%s%sindex_%s_%s", File.separator, File.separator, columns, - File.separator, columns, tableDest.getName()); + File.separator, columns, destFileName); } /** @@ -243,139 +249,36 @@ public static String legacyGroupingFileName(@NotNull final File tableDest, @NotN return prefix + "_" + columnName + "_grouping.parquet"; } - /** - * Delete any old backup files created for this destination, and throw an exception on failure - */ - private static void deleteBackupFile(@NotNull final File destFile) { - if (!deleteBackupFileNoExcept(destFile)) { - throw new UncheckedDeephavenException( - String.format("Failed to delete backup file at %s", getBackupFile(destFile).getAbsolutePath())); - } - } - - /** - * Delete any old backup files created for this destination with no exception in case of failure - */ - private static boolean deleteBackupFileNoExcept(@NotNull final File destFile) { - final File backupDestFile = getBackupFile(destFile); - if (backupDestFile.exists() && !backupDestFile.delete()) { - log.error().append("Error in deleting backup file at path ") - .append(backupDestFile.getAbsolutePath()) - .endl(); - return false; - } - return true; - } - - /** - * Backup any existing files at location destFile and rename the shadow file to destFile - */ - private static void installShadowFile(@NotNull final File destFile, @NotNull final File shadowDestFile) { - final File backupDestFile = getBackupFile(destFile); - if (destFile.exists() && !destFile.renameTo(backupDestFile)) { - throw new UncheckedDeephavenException( - String.format( - "Failed to install shadow file at %s because a file already exists at the path which couldn't be renamed to %s", - destFile.getAbsolutePath(), backupDestFile.getAbsolutePath())); - } - if (!shadowDestFile.renameTo(destFile)) { - throw new UncheckedDeephavenException(String.format( - "Failed to install shadow file at %s because couldn't rename temporary shadow file from %s to %s", - destFile.getAbsolutePath(), shadowDestFile.getAbsolutePath(), destFile.getAbsolutePath())); - } - } - - /** - * Roll back any changes made in the {@link #installShadowFile} in best-effort manner - */ - private static void rollbackFile(@NotNull final File destFile) { - final File backupDestFile = getBackupFile(destFile); - final File shadowDestFile = getShadowFile(destFile); - destFile.renameTo(shadowDestFile); - backupDestFile.renameTo(destFile); - } - - /** - * Make any missing ancestor directories of {@code destination}. - * - * @param destination The destination parquet file - * @return The first created directory, or null if no directories were made. - */ - private static File prepareDestinationFileLocation(@NotNull File destination) { - destination = destination.getAbsoluteFile(); - if (!destination.getPath().endsWith(PARQUET_FILE_EXTENSION)) { - throw new UncheckedDeephavenException( - String.format("Destination %s does not end in %s extension", destination, PARQUET_FILE_EXTENSION)); - } - if (destination.exists()) { - if (destination.isDirectory()) { - throw new UncheckedDeephavenException( - String.format("Destination %s exists and is a directory", destination)); - } - if (!destination.canWrite()) { - throw new UncheckedDeephavenException( - String.format("Destination %s exists but is not writable", destination)); - } - return null; - } - final File firstParent = destination.getParentFile(); - if (firstParent.isDirectory()) { - if (firstParent.canWrite()) { - return null; - } - throw new UncheckedDeephavenException( - String.format("Destination %s has non writable parent directory", destination)); - } - File firstCreated = firstParent; - File parent; - for (parent = destination.getParentFile(); parent != null && !parent.exists(); parent = - parent.getParentFile()) { - firstCreated = parent; - } - if (parent == null) { - throw new IllegalArgumentException( - String.format("Can't find any existing parent directory for destination path: %s", destination)); - } - if (!parent.isDirectory()) { - throw new IllegalArgumentException( - String.format("Existing parent file %s of %s is not a directory", parent, destination)); - } - if (!firstParent.mkdirs()) { - throw new UncheckedDeephavenException("Couldn't (re)create destination directory " + firstParent); - } - return firstCreated; - } - /** * Helper function for building index column info for writing and deleting any backup index column files * * @param indexColumns Names of index columns, stored as String list for each index * @param parquetColumnNameArr Names of index columns for the parquet file, stored as String[] for each index - * @param destFile The destination path for the main table containing these index columns + * @param dest The destination URI for the main table containing these index columns + * @param channelProvider The channel provider to use for creating channels to the index files */ private static List indexInfoBuilderHelper( @NotNull final Collection> indexColumns, @NotNull final String[][] parquetColumnNameArr, - @NotNull final File destFile) { + @NotNull final URI dest, + @NotNull final SeekableChannelsProvider channelProvider) throws IOException { Require.eq(indexColumns.size(), "indexColumns.size", parquetColumnNameArr.length, "parquetColumnNameArr.length"); final int numIndexes = indexColumns.size(); final List indexInfoList = new ArrayList<>(numIndexes); int gci = 0; + final String destFileName = getFileName(dest); for (final List indexColumnNames : indexColumns) { final String[] parquetColumnNames = parquetColumnNameArr[gci]; - final String indexFileRelativePath = getRelativeIndexFilePath(destFile, parquetColumnNames); - final File indexFile = new File(destFile.getParent(), indexFileRelativePath); - prepareDestinationFileLocation(indexFile); - deleteBackupFile(indexFile); - - final File shadowIndexFile = getShadowFile(indexFile); - + final String indexFileRelativePath = getRelativeIndexFilePath(destFileName, parquetColumnNames); + final URI indexFileURI = resolve(dest, indexFileRelativePath); + final CompletableOutputStream indexFileOutputStream = + channelProvider.getOutputStream(indexFileURI, PARQUET_OUTPUT_BUFFER_SIZE); final ParquetTableWriter.IndexWritingInfo info = new ParquetTableWriter.IndexWritingInfo( indexColumnNames, parquetColumnNames, - indexFile, - shadowIndexFile); + indexFileURI, + indexFileOutputStream); indexInfoList.add(info); gci++; } @@ -392,7 +295,7 @@ private static List indexInfoBuilderHelper( * while writing, use {@link ParquetInstructions.Builder#addIndexColumns}. * * @param sourceTable The table to partition and write - * @param destinationDir The path to destination root directory to store partitioned data in nested format. + * @param destinationDir The path or URI to destination root directory to store partitioned data in nested format. * Non-existing directories are created. * @param writeInstructions Write instructions for customizations while writing */ @@ -427,7 +330,7 @@ public static void writeKeyValuePartitionedTable( * {@link ParquetInstructions.Builder#addIndexColumns}. * * @param partitionedTable The partitioned table to write - * @param destinationDir The path to destination root directory to store partitioned data in nested format. + * @param destinationDir The path or URI to destination root directory to store partitioned data in nested format. * Non-existing directories are created. * @param writeInstructions Write instructions for customizations while writing */ @@ -458,7 +361,7 @@ public static void writeKeyValuePartitionedTable( * @param partitionedTable The partitioned table to write * @param keyTableDefinition The definition for key columns * @param leafDefinition The definition for leaf parquet files to be written - * @param destinationRoot The path to destination root directory to store partitioned data in nested format + * @param destinationRoot The path or URI to destination root directory to store partitioned data in nested format * @param writeInstructions Write instructions for customizations while writing * @param indexColumns Collection containing the column names for indexes to persist. The write operation will store * the index info as sidecar tables. This argument is used to narrow the set of indexes to write, or to be @@ -512,14 +415,16 @@ private static void writeKeyValuePartitionedTableImpl( }); // For the constituent column for each row, accumulate the constituent tables and build the final file paths final Collection
partitionedData = new ArrayList<>(); - final Collection destinations = new ArrayList<>(); + final Collection destinations = new ArrayList<>(); try (final CloseableIterator> constituentIterator = withGroupConstituents.objectColumnIterator(partitionedTable.constituentColumnName())) { int row = 0; + final URI destinationDir = convertToURI(destinationRoot, true); while (constituentIterator.hasNext()) { final ObjectVector constituentVector = constituentIterator.next(); final List partitionStrings = partitionStringsList.get(row); - final File relativePath = new File(destinationRoot, String.join(File.separator, partitionStrings)); + final String relativePath = concatenatePartitions(partitionStrings); + final URI partitionDir = resolve(destinationDir, relativePath); int count = 0; for (final Table constituent : constituentVector) { String filename = baseName; @@ -533,7 +438,7 @@ private static void writeKeyValuePartitionedTableImpl( filename = filename.replace(UUID_TOKEN, UUID.randomUUID().toString()); } filename += PARQUET_FILE_EXTENSION; - destinations.add(new File(relativePath, filename)); + destinations.add(resolve(partitionDir, filename)); partitionedData.add(constituent); count++; } @@ -557,14 +462,22 @@ private static void writeKeyValuePartitionedTableImpl( // Store hard reference to prevent indexes from being garbage collected final List dataIndexes = addIndexesToTables(partitionedDataArray, indexColumns); writeTablesImpl(partitionedDataArray, leafDefinition, writeInstructions, - destinations.toArray(File[]::new), indexColumns, partitioningColumnsSchema, - new File(destinationRoot), computedCache); + destinations.toArray(URI[]::new), indexColumns, partitioningColumnsSchema, + convertToURI(destinationRoot, true), computedCache); if (dataIndexes != null) { dataIndexes.clear(); } } } + private static String concatenatePartitions(final List partitions) { + final StringBuilder builder = new StringBuilder(); + for (final String partition : partitions) { + builder.append(partition).append(File.separator); + } + return builder.toString(); + } + /** * Add data indexes to provided tables, if not present, and return a list of hard references to the indexes. */ @@ -646,10 +559,10 @@ private static void writeTablesImpl( @NotNull final Table[] sources, @NotNull final TableDefinition definition, @NotNull final ParquetInstructions writeInstructions, - @NotNull final File[] destinations, + @NotNull final URI[] destinations, @NotNull final Collection> indexColumns, @Nullable final MessageType partitioningColumnsSchema, - @Nullable final File metadataRootDir, + @Nullable final URI metadataRootDir, @NotNull final Map> computedCache) { Require.eq(sources.length, "sources.length", destinations.length, "destinations.length"); if (writeInstructions.getFileLayout().isPresent()) { @@ -659,14 +572,9 @@ private static void writeTablesImpl( if (definition.numColumns() == 0) { throw new TableDataException("Cannot write a parquet table with zero columns"); } - Arrays.stream(destinations).forEach(ParquetTools::deleteBackupFile); - - // Write all files at temporary shadow file paths in the same directory to prevent overwriting any existing - // data in case of failure - final File[] shadowDestFiles = - Arrays.stream(destinations).map(ParquetTools::getShadowFile).toArray(File[]::new); - final File[] firstCreatedDirs = - Arrays.stream(shadowDestFiles).map(ParquetTools::prepareDestinationFileLocation).toArray(File[]::new); + // Assuming all destination URIs have the same scheme, and will use the same channels provider instance + final SeekableChannelsProvider channelsProvider = SeekableChannelsProviderLoader.getInstance() + .fromServiceLoader(destinations[0], writeInstructions.getSpecialInstructions()); final ParquetMetadataFileWriter metadataFileWriter; if (writeInstructions.generateMetadataFiles()) { @@ -679,104 +587,74 @@ private static void writeTablesImpl( metadataFileWriter = NullParquetMetadataFileWriter.INSTANCE; } - // List of shadow files, to clean up in case of exceptions - final List shadowFiles = new ArrayList<>(); - // List of all destination files (including index files), to roll back in case of exceptions - final List destFiles = new ArrayList<>(); - try { - final List> indexInfoLists; - if (indexColumns.isEmpty()) { - // Write the tables without any index info - indexInfoLists = null; - for (int tableIdx = 0; tableIdx < sources.length; tableIdx++) { - shadowFiles.add(shadowDestFiles[tableIdx]); - final Table source = sources[tableIdx]; - ParquetTableWriter.write(source, definition, writeInstructions, shadowDestFiles[tableIdx].getPath(), - destinations[tableIdx].getPath(), Collections.emptyMap(), - (List) null, metadataFileWriter, - computedCache); + // List of output streams created, to rollback in case of exceptions + final List outputStreams = new ArrayList<>(destinations.length); + try (final SafeCloseable ignored = () -> SafeCloseable.closeAll(outputStreams.stream())) { + try { + if (indexColumns.isEmpty()) { + // Write the tables without any index info + for (int tableIdx = 0; tableIdx < sources.length; tableIdx++) { + final Table source = sources[tableIdx]; + final CompletableOutputStream outputStream = channelsProvider.getOutputStream( + destinations[tableIdx], PARQUET_OUTPUT_BUFFER_SIZE); + outputStreams.add(outputStream); + ParquetTableWriter.write(source, definition, writeInstructions, destinations[tableIdx], + outputStream, Collections.emptyMap(), (List) null, + metadataFileWriter, computedCache); + } + } else { + // Shared parquet column names across all tables + final String[][] parquetColumnNameArr = indexColumns.stream() + .map((Collection columns) -> columns.stream() + .map(writeInstructions::getParquetColumnNameFromColumnNameOrDefault) + .toArray(String[]::new)) + .toArray(String[][]::new); + + for (int tableIdx = 0; tableIdx < sources.length; tableIdx++) { + final URI tableDestination = destinations[tableIdx]; + final List indexInfoList = + indexInfoBuilderHelper(indexColumns, parquetColumnNameArr, tableDestination, + channelsProvider); + final CompletableOutputStream outputStream = channelsProvider.getOutputStream( + destinations[tableIdx], PARQUET_OUTPUT_BUFFER_SIZE); + outputStreams.add(outputStream); + for (final ParquetTableWriter.IndexWritingInfo info : indexInfoList) { + outputStreams.add(info.destOutputStream); + } + final Table sourceTable = sources[tableIdx]; + ParquetTableWriter.write(sourceTable, definition, writeInstructions, destinations[tableIdx], + outputStream, Collections.emptyMap(), indexInfoList, metadataFileWriter, computedCache); + } } - } else { - // Create index info for each table and write the table and index files to shadow path - indexInfoLists = new ArrayList<>(sources.length); - - // Shared parquet column names across all tables - final String[][] parquetColumnNameArr = indexColumns.stream() - .map((Collection columns) -> columns.stream() - .map(writeInstructions::getParquetColumnNameFromColumnNameOrDefault) - .toArray(String[]::new)) - .toArray(String[][]::new); - for (int tableIdx = 0; tableIdx < sources.length; tableIdx++) { - final File tableDestination = destinations[tableIdx]; - final List indexInfoList = - indexInfoBuilderHelper(indexColumns, parquetColumnNameArr, tableDestination); - indexInfoLists.add(indexInfoList); - - shadowFiles.add(shadowDestFiles[tableIdx]); - indexInfoList.forEach(item -> shadowFiles.add(item.destFile)); - - final Table sourceTable = sources[tableIdx]; - ParquetTableWriter.write(sourceTable, definition, writeInstructions, - shadowDestFiles[tableIdx].getPath(), tableDestination.getPath(), Collections.emptyMap(), - indexInfoList, metadataFileWriter, computedCache); + if (writeInstructions.generateMetadataFiles()) { + final URI metadataDest = metadataRootDir.resolve(METADATA_FILE_NAME); + final CompletableOutputStream metadataOutputStream = channelsProvider.getOutputStream( + metadataDest, PARQUET_OUTPUT_BUFFER_SIZE); + outputStreams.add(metadataOutputStream); + final URI commonMetadataDest = metadataRootDir.resolve(COMMON_METADATA_FILE_NAME); + final CompletableOutputStream commonMetadataOutputStream = channelsProvider.getOutputStream( + commonMetadataDest, PARQUET_OUTPUT_BUFFER_SIZE); + outputStreams.add(commonMetadataOutputStream); + metadataFileWriter.writeMetadataFiles(metadataOutputStream, commonMetadataOutputStream); } - } - // Write the combined metadata files to shadow destinations - final File metadataDestFile, shadowMetadataFile, commonMetadataDestFile, shadowCommonMetadataFile; - if (writeInstructions.generateMetadataFiles()) { - metadataDestFile = new File(metadataRootDir, METADATA_FILE_NAME); - shadowMetadataFile = ParquetTools.getShadowFile(metadataDestFile); - shadowFiles.add(shadowMetadataFile); - commonMetadataDestFile = new File(metadataRootDir, COMMON_METADATA_FILE_NAME); - shadowCommonMetadataFile = ParquetTools.getShadowFile(commonMetadataDestFile); - shadowFiles.add(shadowCommonMetadataFile); - metadataFileWriter.writeMetadataFiles(shadowMetadataFile.getAbsolutePath(), - shadowCommonMetadataFile.getAbsolutePath()); - } else { - metadataDestFile = shadowMetadataFile = commonMetadataDestFile = shadowCommonMetadataFile = null; - } - - // Write to shadow files was successful, now replace the original files with the shadow files - for (int tableIdx = 0; tableIdx < sources.length; tableIdx++) { - destFiles.add(destinations[tableIdx]); - installShadowFile(destinations[tableIdx], shadowDestFiles[tableIdx]); - if (indexInfoLists != null) { - final List indexInfoList = indexInfoLists.get(tableIdx); - for (final ParquetTableWriter.IndexWritingInfo info : indexInfoList) { - final File indexDestFile = info.destFileForMetadata; - final File shadowIndexFile = info.destFile; - destFiles.add(indexDestFile); - installShadowFile(indexDestFile, shadowIndexFile); - } + // Commit all the writes to underlying file system, to detect any exceptions early before closing + for (final CompletableOutputStream outputStream : outputStreams) { + outputStream.complete(); } - } - if (writeInstructions.generateMetadataFiles()) { - destFiles.add(metadataDestFile); - installShadowFile(metadataDestFile, shadowMetadataFile); - destFiles.add(commonMetadataDestFile); - installShadowFile(commonMetadataDestFile, shadowCommonMetadataFile); - } - } catch (Exception e) { - for (final File file : destFiles) { - rollbackFile(file); - } - for (final File file : shadowFiles) { - file.delete(); - } - for (final File firstCreatedDir : firstCreatedDirs) { - if (firstCreatedDir == null) { - continue; + } catch (final Exception e) { + // Try to rollback all the output streams in reverse order to undo any writes + for (int idx = outputStreams.size() - 1; idx >= 0; idx--) { + try { + outputStreams.get(idx).rollback(); + } catch (IOException e1) { + log.error().append("Error in rolling back output stream ").append(e1).endl(); + } } - log.error().append( - "Error in table writing, cleaning up potentially incomplete table destination path starting from ") - .append(firstCreatedDir.getAbsolutePath()).append(e).endl(); - FileUtils.deleteRecursivelyOnNFS(firstCreatedDir); + throw new UncheckedDeephavenException("Error writing parquet tables", e); } - throw new UncheckedDeephavenException("Error writing parquet tables", e); } - destFiles.forEach(ParquetTools::deleteBackupFileNoExcept); } /** @@ -873,28 +751,35 @@ public static void writeTables( } definition = firstDefinition; } - final File[] destinationFiles = new File[destinations.length]; + final URI[] destinationUris = new URI[destinations.length]; + String firstScheme = null; for (int idx = 0; idx < destinations.length; idx++) { - final URI destinationURI = convertToURI(destinations[idx], false); - if (!FILE_URI_SCHEME.equals(destinationURI.getScheme())) { + if (!destinations[idx].endsWith(PARQUET_FILE_EXTENSION)) { throw new IllegalArgumentException( - "Only file URI scheme is supported for writing parquet files, found" + - "non-file URI: " + destinations[idx]); + String.format("Destination %s does not end in %s extension", destinations[idx], + PARQUET_FILE_EXTENSION)); + } + destinationUris[idx] = convertToURI(destinations[idx], false); + if (idx == 0) { + firstScheme = destinationUris[0].getScheme(); + } else if (!firstScheme.equals(destinationUris[idx].getScheme())) { + throw new IllegalArgumentException("All destination URIs must have the same scheme, expected " + + firstScheme + " found " + destinationUris[idx].getScheme()); } - destinationFiles[idx] = new File(destinationURI); } - final File metadataRootDir; + final URI metadataRootDir; if (writeInstructions.generateMetadataFiles()) { // We insist on writing the metadata file in the same directory as the destination files, thus all // destination files should be in the same directory. - final String firstDestinationDir = destinationFiles[0].getAbsoluteFile().getParentFile().getAbsolutePath(); + final URI firstDestinationDir = destinationUris[0].resolve("."); for (int i = 1; i < destinations.length; i++) { - if (!firstDestinationDir.equals(destinationFiles[i].getParentFile().getAbsolutePath())) { + final URI destinationDir = destinationUris[i].resolve("."); + if (!firstDestinationDir.equals(destinationDir)) { throw new IllegalArgumentException("All destination files must be in the same directory for " + - " generating metadata files"); + " generating metadata files, found " + firstDestinationDir + " and " + destinationDir); } } - metadataRootDir = new File(firstDestinationDir); + metadataRootDir = firstDestinationDir; } else { metadataRootDir = null; } @@ -904,7 +789,7 @@ public static void writeTables( buildComputedCache(() -> PartitionedTableFactory.ofTables(definition, sources).merge(), definition); // We do not have any additional schema for partitioning columns in this case. Schema for all columns will be // generated at the time of writing the parquet files and merged to generate the metadata files. - writeTablesImpl(sources, definition, writeInstructions, destinationFiles, indexColumns, null, metadataRootDir, + writeTablesImpl(sources, definition, writeInstructions, destinationUris, indexColumns, null, metadataRootDir, computedCache); } diff --git a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java index 9c0746e1850..8a2df002b0e 100644 --- a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java +++ b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java @@ -432,7 +432,7 @@ public void testSortingMetadata() { StandaloneTableKey.getInstance(), new ParquetTableLocationKey( convertToURI(new File(rootFile, - ParquetTools.getRelativeIndexFilePath(dest, "someString")), false), + ParquetTools.getRelativeIndexFilePath(dest.getName(), "someString")), false), 0, Map.of(), EMPTY), EMPTY); assertEquals(index1Location.getSortedColumns(), List.of(SortColumn.asc(ColumnName.of("someString")))); @@ -443,7 +443,7 @@ public void testSortingMetadata() { StandaloneTableKey.getInstance(), new ParquetTableLocationKey( convertToURI(new File(rootFile, - ParquetTools.getRelativeIndexFilePath(dest, "someInt", "someString")), false), + ParquetTools.getRelativeIndexFilePath(dest.getName(), "someInt", "someString")), false), 0, Map.of(), EMPTY), EMPTY); assertEquals(index2Location.getSortedColumns(), List.of( @@ -453,7 +453,7 @@ public void testSortingMetadata() { assertTableEquals(index2Table, index2Table.sort("someInt", "someString")); } - private static void verifyIndexingInfoExists(final Table table, final String... columnNames) { + static void verifyIndexingInfoExists(final Table table, final String... columnNames) { assertTrue(DataIndexer.hasDataIndex(table, columnNames)); final DataIndex fullIndex = DataIndexer.getDataIndex(table, columnNames); Assert.neqNull(fullIndex, "fullIndex"); @@ -592,6 +592,19 @@ private static void writeReadTableTest(final Table table, final File dest, checkSingleTable(table, dest); } + @Test + public void basicParquetWrongDestinationTest() { + final Table table = TableTools.emptyTable(5).update("A=(int)i"); + final File dest = new File(rootFile, "basicParquetWrongDestinationTest.parquet"); + writeTable(table, dest.getPath()); + final File wrongDest = new File(rootFile, "basicParquetWrongDestinationTest"); + try { + writeTable(table, wrongDest.getPath()); + fail("Expected an exception because destination does not end with .parquet"); + } catch (final IllegalArgumentException expected) { + } + } + @Test public void basicParquetWithMetadataTest() { final Table table = TableTools.emptyTable(5).update("A=(int)i", "B=(long)i", "C=(double)i"); @@ -1114,6 +1127,12 @@ public void writeKeyValuePartitionedDataWithMixedPartitionsTest() { @Test public void someMoreKeyValuePartitionedTestsWithComplexKeys() { + // Verify complex keys both with and without data index + someMoreKeyValuePartitionedTestsWithComplexKeysHelper(true); + someMoreKeyValuePartitionedTestsWithComplexKeysHelper(false); + } + + private void someMoreKeyValuePartitionedTestsWithComplexKeysHelper(final boolean addDataIndex) { final TableDefinition definition = TableDefinition.of( ColumnDefinition.ofString("symbol").withPartitioning(), ColumnDefinition.ofString("epic_collection_id"), @@ -1126,16 +1145,32 @@ public void someMoreKeyValuePartitionedTestsWithComplexKeys() { "I = ii")) .withDefinitionUnsafe(definition); - final File parentDir = new File(rootFile, "someTest"); - final ParquetInstructions writeInstructions = ParquetInstructions.builder() - .setGenerateMetadataFiles(true) - .build(); + final File parentDir = new File(rootFile, "someMoreKeyValuePartitionedTestsWithComplexKeys"); + if (parentDir.exists()) { + FileUtils.deleteRecursively(parentDir); + } + final ParquetInstructions writeInstructions; + if (addDataIndex) { + writeInstructions = ParquetInstructions.builder() + .setGenerateMetadataFiles(true) + .addIndexColumns("I", "epic_request_id") + .build(); + } else { + writeInstructions = ParquetInstructions.builder() + .setGenerateMetadataFiles(true) + .build(); + } final String[] partitioningCols = new String[] {"symbol", "epic_collection_id", "epic_request_id"}; final PartitionedTable partitionedTable = inputData.partitionBy(partitioningCols); writeKeyValuePartitionedTable(partitionedTable, parentDir.getPath(), writeInstructions); final Table fromDisk = readTable(parentDir.getPath(), EMPTY.withLayout(ParquetInstructions.ParquetFileLayout.KV_PARTITIONED)); + if (addDataIndex) { + // Verify if index present on columns "I, epic_request_id" + verifyIndexingInfoExists(fromDisk, "I", "epic_request_id"); + } + for (final String col : partitioningCols) { assertTrue(fromDisk.getDefinition().getColumn(col).isPartitioning()); } @@ -2047,16 +2082,6 @@ public void readFromDirTest() { assertTableEquals(expected, fromDisk); } - /** - * These are tests for writing a table to a parquet file and making sure there are no unnecessary files left in the - * directory after we finish writing. - */ - @Test - public void basicWriteTests() { - basicWriteTestsImpl(SINGLE_WRITER); - basicWriteTestsImpl(MULTI_WRITER); - } - @Test public void readPartitionedDataGeneratedOnWindows() { final String path = ParquetTableReadWriteTest.class @@ -2069,6 +2094,16 @@ public void readPartitionedDataGeneratedOnWindows() { assertTableEquals(expected, partitionedDataFromWindows.sort("year")); } + /** + * These are tests for writing a table to a parquet file and making sure there are no unnecessary files left in the + * directory after we finish writing. + */ + @Test + public void basicWriteTests() { + basicWriteTestsImpl(SINGLE_WRITER); + basicWriteTestsImpl(MULTI_WRITER); + } + private static void basicWriteTestsImpl(TestParquetTableWriter writer) { // Create an empty parent directory final File parentDir = new File(rootFile, "tempDir"); @@ -2087,6 +2122,7 @@ private static void basicWriteTestsImpl(TestParquetTableWriter writer) { // This write should fail final Table badTable = TableTools.emptyTable(5) .updateView("InputString = ii % 2 == 0 ? Long.toString(ii) : null", "A=InputString.charAt(0)"); + DataIndexer.getOrCreateDataIndex(badTable, "InputString"); try { writer.writeTable(badTable, destFile); TestCase.fail("Exception expected for invalid formula"); @@ -2191,9 +2227,10 @@ public void writeMultiTableExceptionTest() { final File parentDir = new File(rootFile, "tempDir"); parentDir.mkdir(); - // Write two tables to parquet file and read them back + // Write two tables to parquet file final Table firstTable = TableTools.emptyTable(5) .updateView("InputString = Long.toString(ii)", "A=InputString.charAt(0)"); + DataIndexer.getOrCreateDataIndex(firstTable, "InputString"); final File firstDestFile = new File(parentDir, "firstTable.parquet"); final Table secondTable = TableTools.emptyTable(5) @@ -2203,7 +2240,7 @@ public void writeMultiTableExceptionTest() { final Table[] tablesToSave = new Table[] {firstTable, secondTable}; final String[] destinations = new String[] {firstDestFile.getPath(), secondDestFile.getPath()}; - // This write should fail + // This write should fail because of the null value in the second table try { writeTables(tablesToSave, destinations, ParquetInstructions.EMPTY.withTableDefinition(firstTable.getDefinition())); @@ -2607,6 +2644,10 @@ public void indexOverwritingTests() { indexOverwritingTestsImpl(MULTI_WRITER); } + private static File getBackupFile(final File destFile) { + return new File(destFile.getParent(), ".OLD_" + destFile.getName()); + } + private void indexOverwritingTestsImpl(TestParquetTableWriter writer) { // Create an empty parent directory final File parentDir = new File(rootFile, "tempDir"); @@ -2652,7 +2693,7 @@ private void indexOverwritingTestsImpl(TestParquetTableWriter writer) { // The directory should still contain the updated table, its index file for column xxx, and old index file // for column vvv final File xxxIndexFile = new File(parentDir, xxxIndexFilePath); - final File backupXXXIndexFile = ParquetTools.getBackupFile(xxxIndexFile); + final File backupXXXIndexFile = getBackupFile(xxxIndexFile); final String backupXXXIndexFileName = backupXXXIndexFile.getName(); verifyFilesInDir(parentDir, new String[] {destFilename}, Map.of("vvv", new String[] {vvvIndexFilePath}, @@ -3011,10 +3052,6 @@ public void singleTable() { final TableDefinition fooBarDefinition; final TableDefinition barDefinition; { - fooSource.mkdirs(); - fooBarSource.mkdirs(); - barSource.mkdirs(); - final ColumnHolder fooCol = intCol("Foo", 1, 2, 3); final ColumnHolder barCol = stringCol("Bar", "Zip", "Zap", "Zoom"); @@ -3126,8 +3163,6 @@ public void flatPartitionedTable() { final File p1FileEmpty = new File(emptySource, "01.parquet"); final File p2FileEmpty = new File(emptySource, "02.parquet"); - p1File.mkdirs(); - p2File.mkdirs(); emptySource.mkdirs(); final ColumnHolder foo1 = intCol("Foo", 1, 2, 3); @@ -3141,8 +3176,6 @@ public void flatPartitionedTable() { writeTable(p1, p1File.getPath()); writeTable(p2, p2File.getPath()); writeIntoEmptySource = () -> { - p1FileEmpty.mkdirs(); - p2FileEmpty.mkdirs(); writeTable(p1, p1FileEmpty.getPath()); writeTable(p2, p2FileEmpty.getPath()); }; @@ -3245,8 +3278,6 @@ public void keyValuePartitionedTable() { final File p1FileEmpty = new File(emptySource, "Partition=1/z.parquet"); final File p2FileEmpty = new File(emptySource, "Partition=2/a.parquet"); - p1File.mkdirs(); - p2File.mkdirs(); emptySource.mkdirs(); final ColumnHolder part1 = intCol("Partition", 1, 1, 1); @@ -3263,8 +3294,6 @@ public void keyValuePartitionedTable() { writeTable(p1, p1File.getPath()); writeTable(p2, p2File.getPath()); writeIntoEmptySource = () -> { - p1FileEmpty.mkdirs(); - p2FileEmpty.mkdirs(); writeTable(p1, p1FileEmpty.getPath()); writeTable(p2, p2FileEmpty.getPath()); }; diff --git a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/S3ParquetTestBase.java b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/S3ParquetTestBase.java index 17e99079e1c..b1ce204a5ea 100644 --- a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/S3ParquetTestBase.java +++ b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/S3ParquetTestBase.java @@ -3,16 +3,20 @@ // package io.deephaven.parquet.table; +import io.deephaven.UncheckedDeephavenException; import io.deephaven.engine.table.ColumnDefinition; import io.deephaven.engine.table.Table; import io.deephaven.engine.table.TableDefinition; import io.deephaven.engine.table.impl.QueryTable; +import io.deephaven.engine.table.impl.indexer.DataIndexer; import io.deephaven.engine.table.impl.locations.TableDataException; +import io.deephaven.engine.table.impl.select.FormulaEvaluationException; import io.deephaven.engine.testutil.junit4.EngineCleanup; import io.deephaven.engine.util.TableTools; import io.deephaven.extensions.s3.S3Instructions; import io.deephaven.extensions.s3.testlib.S3SeekableChannelTestSetup; import io.deephaven.test.types.OutOfBandTest; +import junit.framework.TestCase; import org.junit.After; import org.junit.Before; import org.junit.Rule; @@ -30,7 +34,10 @@ import static io.deephaven.engine.testutil.TstUtils.assertTableEquals; import static io.deephaven.engine.util.TableTools.merge; +import static io.deephaven.parquet.table.ParquetTableReadWriteTest.verifyIndexingInfoExists; import static io.deephaven.parquet.table.ParquetTools.writeKeyValuePartitionedTable; +import static io.deephaven.parquet.table.ParquetTools.writeTable; +import static io.deephaven.parquet.table.ParquetTools.writeTables; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -66,21 +73,135 @@ private static Table getTable(final int numRows) { public final void readSingleParquetFile() throws IOException, ExecutionException, InterruptedException, TimeoutException { final Table table = getTable(500_000); + final URI uri = uri("table.parquet"); + final ParquetInstructions instructions = ParquetInstructions.builder() + .setSpecialInstructions(s3Instructions( + S3Instructions.builder() + .readTimeout(Duration.ofSeconds(10))) + .build()) + .build(); + + // Write the table to S3 using the test async client final File dest = new File(folder.newFolder(), "table.parquet"); ParquetTools.writeTable(table, dest.getAbsolutePath()); putObject("table.parquet", AsyncRequestBody.fromFile(dest)); + final Table fromS3 = ParquetTools.readTable(uri.toString(), instructions); + assertTableEquals(table, fromS3); + } + + @Test + public final void readWriteSingleParquetFile() { + readWriteSingleParquetFileHelper(0); // Empty table + readWriteSingleParquetFileHelper(5_000); + readWriteSingleParquetFileHelper(50_000); + readWriteSingleParquetFileHelper(500_000); + } + private void readWriteSingleParquetFileHelper(final int numRows) { + final Table table = getTable(numRows); final URI uri = uri("table.parquet"); - final ParquetInstructions readInstructions = ParquetInstructions.builder() + final ParquetInstructions instructions = ParquetInstructions.builder() .setSpecialInstructions(s3Instructions( S3Instructions.builder() + .writePartSize(5 << 20) + .numConcurrentWriteParts(5) .readTimeout(Duration.ofSeconds(10))) .build()) .build(); - final Table fromS3 = ParquetTools.readTable(uri.toString(), readInstructions); + + // Write the table to S3 using ParquetTools write API + ParquetTools.writeTable(table, uri.toString(), instructions); + final Table fromS3 = ParquetTools.readTable(uri.toString(), instructions); assertTableEquals(table, fromS3); } + @Test + public final void mixURIWritingTest() { + final Table table1, table2; + table1 = table2 = getTable(5000); + final String uri1 = uri("table1.parquet").toString(); + final String uri2 = new File(folder.getRoot(), "table2.parquet").toURI().toString(); + try { + ParquetTools.writeTables(new Table[] {table1, table2}, new String[] {uri1, uri2}, + ParquetInstructions.EMPTY); + fail("Expected exception because writing to file and to S3 are not allowed in the same call"); + } catch (final IllegalArgumentException e) { + assertTrue(e.getMessage().contains("URIs must have the same scheme")); + } + } + + @Test + public final void writeSingleTableExceptionTest() { + final Table tableToSave = TableTools.emptyTable(5).update("A=(int)i", "B=(long)i", "C=(double)i"); + final URI fileUri = uri("destDir/table.parquet"); + final ParquetInstructions instructions = ParquetInstructions.builder() + .setSpecialInstructions(s3Instructions( + S3Instructions.builder() + .readTimeout(Duration.ofSeconds(10))) + .build()) + .build(); + ParquetTools.writeTable(tableToSave, fileUri.toString(), instructions); + + final URI parentDir = uri("destDir"); + Table fromS3 = ParquetTools.readTable(parentDir.toString(), instructions); + assertTableEquals(tableToSave, fromS3); + + // Try to write a bad table at the same destination. This write should fail midway and the original file should + // be preserved. + final Table badTable = TableTools.emptyTable(5) + .updateView("InputString = ii % 2 == 0 ? Long.toString(ii) : null", "A=InputString.charAt(0)"); + try { + ParquetTools.writeTable(badTable, fileUri.toString(), instructions); + TestCase.fail("Exception expected for invalid formula"); + } catch (UncheckedDeephavenException e) { + assertTrue(e.getCause() instanceof FormulaEvaluationException); + } + + // Make sure that original file is preserved + fromS3 = ParquetTools.readTable(parentDir.toString(), instructions); + assertTableEquals(tableToSave, fromS3); + } + + /** + * These are tests for writing multiple parquet tables such that there is an exception in the second write. + */ + @Test + public void writeMultiTableExceptionTest() { + // Write some initial data to S3 + final Table initialData = TableTools.emptyTable(5).update("A=(int)i", "B=(long)i", "C=(double)i"); + final URI initialDataUri = uri("destDir/initialDate.parquet"); + final ParquetInstructions instructions = ParquetInstructions.builder() + .setSpecialInstructions(s3Instructions( + S3Instructions.builder() + .readTimeout(Duration.ofSeconds(10))) + .build()) + .build(); + ParquetTools.writeTable(initialData, initialDataUri.toString(), instructions); + + // Write two tables to parquet file and read them back + final Table firstTable = TableTools.emptyTable(5) + .updateView("InputString = Long.toString(ii)", "A=InputString.charAt(0)"); + final URI firstFileUri = uri("destDir/table1.parquet"); + final Table badTable = TableTools.emptyTable(5) + .updateView("InputString = ii % 2 == 0 ? Long.toString(ii*5) : null", "A=InputString.charAt(0)"); + final URI secondFileUri = uri("destDir/table2.parquet"); + + // This write should fail for the second table + try { + writeTables(new Table[] {firstTable, badTable}, + new String[] {firstFileUri.toString(), secondFileUri.toString()}, instructions); + TestCase.fail("Exception expected for invalid formula"); + } catch (UncheckedDeephavenException e) { + assertTrue(e.getCause() instanceof FormulaEvaluationException); + } + + // All new files should be deleted even though first table would be written successfully. The directory should + // just have initial data. + final URI parentDir = uri("destDir"); + final Table fromS3 = ParquetTools.readTable(parentDir.toString(), instructions); + assertTableEquals(initialData, fromS3); + } + @Test public final void readFlatPartitionedParquetData() throws ExecutionException, InterruptedException, TimeoutException, IOException { @@ -194,6 +315,33 @@ public void readKeyValuePartitionedParquetData() } } + @Test + public void readWriteKeyValuePartitionedParquetData() { + final TableDefinition definition = TableDefinition.of( + ColumnDefinition.ofInt("PC1").withPartitioning(), + ColumnDefinition.ofInt("PC2").withPartitioning(), + ColumnDefinition.ofInt("someIntColumn"), + ColumnDefinition.ofString("someStringColumn")); + final Table table = ((QueryTable) TableTools.emptyTable(500_000) + .updateView("PC1 = (int)(ii%3)", + "PC2 = (int)(ii%2)", + "someIntColumn = (int) i", + "someStringColumn = String.valueOf(i)")) + .withDefinitionUnsafe(definition); + final URI uri = uri("keyValuePartitionedDataDir"); + final ParquetInstructions instructions = ParquetInstructions.builder() + .setSpecialInstructions(s3Instructions( + S3Instructions.builder() + .readTimeout(Duration.ofSeconds(10))) + .build()) + .setTableDefinition(definition) + .setBaseNameForPartitionedParquetData("data") + .build(); + writeKeyValuePartitionedTable(table, uri.toString(), instructions); + final Table fromS3 = ParquetTools.readTable(uri.toString(), instructions); + assertTableEquals(table.sort("PC1", "PC2"), fromS3.sort("PC1", "PC2")); + } + @Test public void readMetadataPartitionedParquetData() throws ExecutionException, InterruptedException, TimeoutException, IOException { @@ -292,4 +440,62 @@ public void readMetadataPartitionedParquetWithMissingMetadataFile() assertTrue(expected.getMessage().contains("metadata")); } } + + @Test + public void readWriteMetadataPartitionedParquetData() { + final TableDefinition definition = TableDefinition.of( + ColumnDefinition.ofInt("PC1").withPartitioning(), + ColumnDefinition.ofInt("PC2").withPartitioning(), + ColumnDefinition.ofInt("someIntColumn"), + ColumnDefinition.ofString("someStringColumn")); + final Table table = ((QueryTable) TableTools.emptyTable(500_000) + .updateView("PC1 = (int)(ii%3)", + "PC2 = (int)(ii%2)", + "someIntColumn = (int) i", + "someStringColumn = String.valueOf(i)")) + .withDefinitionUnsafe(definition); + final URI uri = uri("keyValuePartitionedDataDir"); + final ParquetInstructions instructions = ParquetInstructions.builder() + .setSpecialInstructions(s3Instructions( + S3Instructions.builder() + .readTimeout(Duration.ofSeconds(10))) + .build()) + .setTableDefinition(definition) + .setBaseNameForPartitionedParquetData("data") + .setGenerateMetadataFiles(true) + .build(); + writeKeyValuePartitionedTable(table, uri.toString(), instructions); + final Table fromS3 = ParquetTools.readTable(uri.toString(), instructions.withTableDefinitionAndLayout(null, + ParquetInstructions.ParquetFileLayout.METADATA_PARTITIONED)); + assertTableEquals(table.sort("PC1", "PC2"), fromS3.sort("PC1", "PC2")); + } + + @Test + public void indexByLongKey() { + final TableDefinition definition = TableDefinition.of( + ColumnDefinition.ofInt("someInt"), + ColumnDefinition.ofLong("someLong")); + final Table testTable = + ((QueryTable) TableTools.emptyTable(10).select("someInt = i", "someLong = ii % 3") + .groupBy("someLong").ungroup("someInt")).withDefinitionUnsafe(definition); + DataIndexer.getOrCreateDataIndex(testTable, "someLong"); + DataIndexer.getOrCreateDataIndex(testTable, "someInt", "someLong"); + + final URI uri = uri("table.parquet"); + final ParquetInstructions instructions = ParquetInstructions.builder() + .setSpecialInstructions(s3Instructions( + S3Instructions.builder() + .readTimeout(Duration.ofSeconds(10))) + .build()) + .build(); + + writeTable(testTable, uri.toString(), instructions); + final Table fromS3 = ParquetTools.readTable(uri.toString(), instructions); + assertTableEquals(testTable, fromS3); + + // Validate the indexes and lookup functions. + verifyIndexingInfoExists(fromS3, "someLong"); + verifyIndexingInfoExists(fromS3, "someInt", "someLong"); + verifyIndexingInfoExists(fromS3, "someLong", "someInt"); + } } diff --git a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/TestParquetTools.java b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/TestParquetTools.java index de89778aefa..d096669b192 100644 --- a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/TestParquetTools.java +++ b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/TestParquetTools.java @@ -259,7 +259,7 @@ public void testWriteTableExceptions() throws IOException { try { ParquetTools.writeTable(table1, testRoot + File.separator + "unexpectedFile" + File.separator + "Table1"); TestCase.fail("Expected exception"); - } catch (UncheckedDeephavenException e) { + } catch (IllegalArgumentException e) { // Expected } @@ -268,7 +268,7 @@ public void testWriteTableExceptions() throws IOException { try { ParquetTools.writeTable(table1, testRoot + File.separator + "Table1"); TestCase.fail("Expected exception"); - } catch (UncheckedDeephavenException e) { + } catch (IllegalArgumentException e) { // Expected } new File(testRoot + File.separator + "Nested").mkdirs(); diff --git a/extensions/s3/src/main/java/io/deephaven/extensions/s3/S3AsyncClientFactory.java b/extensions/s3/src/main/java/io/deephaven/extensions/s3/S3AsyncClientFactory.java index 69150aafa00..f8c0ae3f5b4 100644 --- a/extensions/s3/src/main/java/io/deephaven/extensions/s3/S3AsyncClientFactory.java +++ b/extensions/s3/src/main/java/io/deephaven/extensions/s3/S3AsyncClientFactory.java @@ -9,8 +9,10 @@ import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration; import software.amazon.awssdk.core.client.config.SdkAdvancedAsyncClientOption; import software.amazon.awssdk.core.retry.RetryMode; +import software.amazon.awssdk.http.SdkHttpClient; import software.amazon.awssdk.http.async.SdkAsyncHttpClient; import software.amazon.awssdk.http.crt.AwsCrtAsyncHttpClient; +import software.amazon.awssdk.http.crt.AwsCrtHttpClient; import software.amazon.awssdk.regions.Region; import software.amazon.awssdk.services.s3.S3AsyncClient; import software.amazon.awssdk.services.s3.S3AsyncClientBuilder; @@ -33,7 +35,7 @@ class S3AsyncClientFactory { getOrComputeThreadCountProperty("S3.numScheduledExecutorThreads", 5); private static final Logger log = LoggerFactory.getLogger(S3AsyncClientFactory.class); - private static final Map httpClientCache = new ConcurrentHashMap<>(); + private static final Map httpAsyncClientCache = new ConcurrentHashMap<>(); private static volatile Executor futureCompletionExecutor; private static volatile ScheduledExecutorService scheduledExecutor; @@ -43,7 +45,7 @@ static S3AsyncClient getAsyncClient(@NotNull final S3Instructions instructions) .asyncConfiguration( b -> b.advancedOption(SdkAdvancedAsyncClientOption.FUTURE_COMPLETION_EXECUTOR, ensureAsyncFutureCompletionExecutor())) - .httpClient(getOrBuildHttpClient(instructions)) + .httpClient(getOrBuildHttpAsyncClient(instructions)) .overrideConfiguration(ClientOverrideConfiguration.builder() // If we find that the STANDARD retry policy does not work well in all situations, we might // try experimenting with ADAPTIVE retry policy, potentially with fast fail. @@ -58,11 +60,11 @@ static S3AsyncClient getAsyncClient(@NotNull final S3Instructions instructions) .credentialsProvider(instructions.awsV2CredentialsProvider()); instructions.regionName().map(Region::of).ifPresent(builder::region); instructions.endpointOverride().ifPresent(builder::endpointOverride); - final S3AsyncClient ret = builder.build(); + final S3AsyncClient s3AsyncClient = builder.build(); if (log.isDebugEnabled()) { log.debug().append("Building S3AsyncClient with instructions: ").append(instructions).endl(); } - return ret; + return s3AsyncClient; } private static class HttpClientConfig { @@ -103,10 +105,10 @@ public boolean equals(final Object other) { } } - private static SdkAsyncHttpClient getOrBuildHttpClient(@NotNull final S3Instructions instructions) { + private static SdkAsyncHttpClient getOrBuildHttpAsyncClient(@NotNull final S3Instructions instructions) { final HttpClientConfig config = new HttpClientConfig(instructions.maxConcurrentRequests(), instructions.connectionTimeout()); - return httpClientCache.computeIfAbsent(config, key -> AwsCrtAsyncHttpClient.builder() + return httpAsyncClientCache.computeIfAbsent(config, key -> AwsCrtAsyncHttpClient.builder() .maxConcurrency(config.maxConcurrentRequests()) .connectionTimeout(config.connectionTimeout()) .build()); diff --git a/extensions/s3/src/main/java/io/deephaven/extensions/s3/S3CompletableOutputStream.java b/extensions/s3/src/main/java/io/deephaven/extensions/s3/S3CompletableOutputStream.java new file mode 100644 index 00000000000..43004a6ba70 --- /dev/null +++ b/extensions/s3/src/main/java/io/deephaven/extensions/s3/S3CompletableOutputStream.java @@ -0,0 +1,362 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.extensions.s3; + +import io.deephaven.util.channel.CompletableOutputStream; +import org.jetbrains.annotations.NotNull; +import software.amazon.awssdk.core.async.AsyncRequestBody; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.services.s3.S3Uri; +import software.amazon.awssdk.services.s3.internal.multipart.SdkPojoConversionUtils; +import software.amazon.awssdk.services.s3.model.AbortMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CompletedMultipartUpload; +import software.amazon.awssdk.services.s3.model.CompletedPart; +import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CreateMultipartUploadResponse; +import software.amazon.awssdk.services.s3.model.UploadPartRequest; +import software.amazon.awssdk.services.s3.model.UploadPartResponse; + +import java.io.IOException; +import java.net.URI; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; + +import static io.deephaven.extensions.s3.S3ChannelContext.handleS3Exception; +import static io.deephaven.extensions.s3.S3Instructions.MIN_WRITE_PART_SIZE; + +class S3CompletableOutputStream extends CompletableOutputStream { + + /** + * @see Amazon S3 User Guide + */ + private static final int MIN_PART_NUMBER = 1; + private static final int MAX_PART_NUMBER = 10000; + private static final int INVALID_PART_NUMBER = -1; + + private enum State { + OPEN, DONE, COMPLETED, ABORTED + } + + private final S3Uri uri; + private final S3AsyncClient s3AsyncClient; + private final S3Instructions s3Instructions; + + private final int writePartSize; + private final int numConcurrentWriteParts; + + private final List completedParts; + private final List pendingRequests; + + private int nextPartNumber; + private String uploadId; // Initialized on first write, changed back to null when multipart upload completed/aborted + private State state; + + S3CompletableOutputStream( + @NotNull final URI uri, + @NotNull final S3AsyncClient s3AsyncClient, + @NotNull final S3Instructions s3Instructions) { + this.uri = s3AsyncClient.utilities().parseUri(uri); + this.s3AsyncClient = s3AsyncClient; + this.s3Instructions = s3Instructions; + + this.writePartSize = s3Instructions.writePartSize(); + this.numConcurrentWriteParts = s3Instructions.numConcurrentWriteParts(); + this.pendingRequests = new ArrayList<>(numConcurrentWriteParts); + + this.nextPartNumber = MIN_PART_NUMBER; + this.completedParts = new ArrayList<>(); + this.state = State.OPEN; + } + + @Override + public void write(final int b) throws IOException { + write((dest, destOff, destCount) -> { + dest.put((byte) b); + return 1; + }, 0, 1); + } + + @Override + public void write(final byte @NotNull [] b) throws IOException { + write(b, 0, b.length); + } + + @Override + public void write(final byte @NotNull [] b, final int off, final int len) throws IOException { + write((dest, currentOffset, remainingLength) -> { + final int lengthToWrite = Math.min(remainingLength, dest.remaining()); + dest.put(b, currentOffset, lengthToWrite); + return lengthToWrite; + }, off, len); + } + + @FunctionalInterface + private interface DataWriter { + /** + * Writes source data from a single {@code outputStream.write} call to the given destination buffer, starting + * from the current offset in the source data. + * + * @param dest the destination buffer to write data to + * @param currentOffset the current offset in the source data + * @param remainingLength the remaining number of bytes of source data to write + * @return the number of bytes written to the destination buffer + * + * @throws IOException if an I/O error occurs during the write operation + */ + int write(ByteBuffer dest, int currentOffset, int remainingLength) throws IOException; + } + + /** + * Writes source data from a single {@code outputStream.write} call to S3 using the provided {@link DataWriter}. + * + * @param writer the {@link DataWriter} used to write data to the destination buffer + * @param off the offset in the source data from which to start writing + * @param len the length of the data to be written + * + * @throws IOException if an I/O error occurs during the write operation or if the stream is not {@link State#OPEN} + */ + private void write(@NotNull final DataWriter writer, int off, int len) throws IOException { + if (state != State.OPEN) { + throw new IOException("Cannot write to stream for uri " + uri + " because stream in state " + state + + " instead of OPEN"); + } + while (len != 0) { + if (uploadId == null) { + // Initialize the upload ID for the multipart upload + uploadId = initiateMultipartUpload(); + } + + // We use request slots in a circular queue fashion + final int nextSlotId = (nextPartNumber - 1) % numConcurrentWriteParts; + final OutgoingRequest useRequest; + if (pendingRequests.size() == nextSlotId) { + pendingRequests.add(useRequest = new OutgoingRequest(writePartSize)); + } else if (pendingRequests.size() < nextSlotId) { + throw new IllegalStateException("Unexpected slot ID " + nextSlotId + " for uri " + uri + " with " + + pendingRequests.size() + " pending requests."); + } else { + useRequest = pendingRequests.get(nextSlotId); + // Wait for the oldest upload to complete if no space is available + if (useRequest.future != null) { + waitForCompletion(useRequest); + } + } + + // Write as much as possible to this buffer + final ByteBuffer buffer = useRequest.buffer; + final int lengthWritten = writer.write(buffer, off, len); + if (!buffer.hasRemaining()) { + sendPartRequest(useRequest); + } + off += lengthWritten; + len -= lengthWritten; + } + } + + @Override + public void flush() throws IOException { + // Flush the next part if it is larger than the minimum part size + flushImpl(false); + } + + @Override + public void done() throws IOException { + if (state == State.DONE) { + return; + } + if (state != State.OPEN) { + throw new IOException("Cannot mark stream as done for uri " + uri + " because stream in state " + state + + " instead of OPEN"); + } + flushImpl(true); + state = State.DONE; + } + + @Override + public void complete() throws IOException { + if (state == State.COMPLETED) { + return; + } + done(); + completeMultipartUpload(); + state = State.COMPLETED; + } + + @Override + public void rollback() throws IOException { + if (state == State.COMPLETED || state == State.ABORTED) { + // Cannot roll back a completed or aborted multipart upload + return; + } + abortMultipartUpload(); + state = State.ABORTED; + } + + @Override + public void close() throws IOException { + if (state == State.COMPLETED || state == State.ABORTED) { + return; + } + abortMultipartUpload(); + state = State.ABORTED; + } + + ////////// Helper methods and classes ////////// + + private static class OutgoingRequest { + /** + * The buffer for this request + */ + private final ByteBuffer buffer; + + /** + * The part number for the part to be uploaded + */ + private int partNumber; + + /** + * The future for the part upload + */ + private CompletableFuture future; + + OutgoingRequest(final int writePartSize) { + // TODO(deephaven-core#5935): Experiment with buffer pool here + buffer = ByteBuffer.allocate(writePartSize); + partNumber = INVALID_PART_NUMBER; + } + } + + private String initiateMultipartUpload() throws IOException { + final CreateMultipartUploadRequest createMultipartUploadRequest = CreateMultipartUploadRequest.builder() + .bucket(uri.bucket().orElseThrow()) + .key(uri.key().orElseThrow()) + .build(); + // Note: We can add support for other parameters like tagging, storage class, encryption, permissions, etc. in + // future + final CompletableFuture future = + s3AsyncClient.createMultipartUpload(createMultipartUploadRequest); + final CreateMultipartUploadResponse response; + try { + response = future.get(); + } catch (final InterruptedException | ExecutionException e) { + throw handleS3Exception(e, String.format("initiating multipart upload for uri %s", uri), s3Instructions); + } + return response.uploadId(); + } + + /** + * Send a part request for the given buffer. This method assumes that the buffer is non-empty. + */ + private void sendPartRequest(final OutgoingRequest request) throws IOException { + if (nextPartNumber > MAX_PART_NUMBER) { + throw new IOException("Cannot upload more than " + MAX_PART_NUMBER + " parts for uri " + uri + ", please" + + " try again with a larger part size"); + } + if (request.future != null) { + throw new IllegalStateException("Request already in progress for uri " + uri + " with part number " + + nextPartNumber); + } + final UploadPartRequest uploadPartRequest = UploadPartRequest.builder() + .bucket(uri.bucket().orElseThrow()) + .key(uri.key().orElseThrow()) + .uploadId(uploadId) + .partNumber(nextPartNumber) + .build(); + request.buffer.flip(); + request.future = s3AsyncClient.uploadPart(uploadPartRequest, + AsyncRequestBody.fromByteBufferUnsafe(request.buffer)); + request.partNumber = nextPartNumber; + nextPartNumber++; + } + + private void waitForCompletion(final OutgoingRequest request) throws IOException { + final UploadPartResponse uploadPartResponse; + try { + uploadPartResponse = request.future.get(); + } catch (final InterruptedException | ExecutionException e) { + throw handleS3Exception(e, String.format("waiting for part %d for uri %s to complete uploading", + request.partNumber, uri), s3Instructions); + } + completedParts.add(SdkPojoConversionUtils.toCompletedPart(uploadPartResponse, request.partNumber)); + request.buffer.clear(); + request.future = null; + request.partNumber = INVALID_PART_NUMBER; + } + + /** + * Flushes the current buffer to S3. + * + * @param force if true, forces the buffer to be flushed even if it is smaller than the minimum + * {@value S3Instructions#MIN_WRITE_PART_SIZE} MiB threshold, which should only be done for the very last + * part. + * @throws IOException if an I/O error occurs during the flush operation + */ + private void flushImpl(final boolean force) throws IOException { + final int nextSlotId = (nextPartNumber - 1) % numConcurrentWriteParts; + if (pendingRequests.size() == nextSlotId) { + // Nothing to flush + return; + } + final OutgoingRequest request = pendingRequests.get(nextSlotId); + if (request.buffer.position() != 0 + && request.future == null + && (force || request.buffer.position() >= MIN_WRITE_PART_SIZE)) { + sendPartRequest(request); + } + } + + private void completeMultipartUpload() throws IOException { + if (uploadId == null) { + throw new IllegalStateException("Cannot complete multipart upload for uri " + uri + " because upload ID " + + "is null"); + } + // Complete all pending requests in the exact order they were sent + final int partCount = nextPartNumber - 1; + for (int partNumber = completedParts.size() + 1; partNumber <= partCount; partNumber++) { + // Part numbers start from 1, therefore, we use (partNumber - 1) for the slot ID + final int slotId = (partNumber - 1) % numConcurrentWriteParts; + final OutgoingRequest request = pendingRequests.get(slotId); + waitForCompletion(request); + } + final CompleteMultipartUploadRequest completeRequest = CompleteMultipartUploadRequest.builder() + .bucket(uri.bucket().orElseThrow()) + .key(uri.key().orElseThrow()) + .uploadId(uploadId) + .multipartUpload(CompletedMultipartUpload.builder() + .parts(completedParts) + .build()) + .build(); + try { + s3AsyncClient.completeMultipartUpload(completeRequest).get(); + } catch (final InterruptedException | ExecutionException e) { + throw handleS3Exception(e, String.format("completing multipart upload for uri %s", uri), s3Instructions); + } + uploadId = null; + } + + /** + * Abort the multipart upload if it is in progress. + */ + private void abortMultipartUpload() throws IOException { + if (uploadId == null) { + throw new IllegalStateException("Cannot abort multipart upload for uri " + uri + " because upload ID " + + "is null"); + } + final AbortMultipartUploadRequest abortRequest = AbortMultipartUploadRequest.builder() + .bucket(uri.bucket().orElseThrow()) + .key(uri.key().orElseThrow()) + .uploadId(uploadId) + .build(); + try { + s3AsyncClient.abortMultipartUpload(abortRequest).get(); + } catch (final InterruptedException | ExecutionException e) { + throw handleS3Exception(e, String.format("aborting multipart upload for uri %s", uri), s3Instructions); + } + uploadId = null; + } +} diff --git a/extensions/s3/src/main/java/io/deephaven/extensions/s3/S3Instructions.java b/extensions/s3/src/main/java/io/deephaven/extensions/s3/S3Instructions.java index 27d313a235c..f6a259c26aa 100644 --- a/extensions/s3/src/main/java/io/deephaven/extensions/s3/S3Instructions.java +++ b/extensions/s3/src/main/java/io/deephaven/extensions/s3/S3Instructions.java @@ -24,12 +24,23 @@ @CopyableStyle public abstract class S3Instructions implements LogOutputAppendable { - private final static int DEFAULT_MAX_CONCURRENT_REQUESTS = 256; - private final static int DEFAULT_READ_AHEAD_COUNT = 32; - private final static int DEFAULT_FRAGMENT_SIZE = 1 << 16; // 64 KiB - private final static int MIN_FRAGMENT_SIZE = 8 << 10; // 8 KiB - private final static Duration DEFAULT_CONNECTION_TIMEOUT = Duration.ofSeconds(2); - private final static Duration DEFAULT_READ_TIMEOUT = Duration.ofSeconds(2); + private static final int DEFAULT_MAX_CONCURRENT_REQUESTS = 256; + private static final int DEFAULT_READ_AHEAD_COUNT = 32; + private static final int DEFAULT_FRAGMENT_SIZE = 1 << 16; // 64 KiB + private static final int MIN_FRAGMENT_SIZE = 8 << 10; // 8 KiB + private static final Duration DEFAULT_CONNECTION_TIMEOUT = Duration.ofSeconds(2); + private static final Duration DEFAULT_READ_TIMEOUT = Duration.ofSeconds(2); + private static final int DEFAULT_NUM_CONCURRENT_WRITE_PARTS = 64; + + /** + * We set default part size to 10 MiB. The maximum number of parts allowed is 10,000. This means maximum size of a + * single file that we can write is roughly 100k MiB (or about 98 GiB). For uploading larger files, user would need + * to set a larger part size. + * + * @see Amazon S3 User Guide + */ + private static final int DEFAULT_WRITE_PART_SIZE = 10 << 20; // 10 MiB + static final int MIN_WRITE_PART_SIZE = 5 << 20; // 5 MiB static final S3Instructions DEFAULT = builder().build(); @@ -99,6 +110,28 @@ public Credentials credentials() { return Credentials.defaultCredentials(); } + /** + * The size of each part (in bytes) to upload when writing to S3, defaults to {@value #DEFAULT_WRITE_PART_SIZE}. The + * minimum allowed part size is {@value #MIN_WRITE_PART_SIZE}. Setting a higher value may increase throughput, but + * may also increase memory usage. Note that the maximum number of parts allowed for a single file is 10,000. + * Therefore, for {@value #DEFAULT_WRITE_PART_SIZE} part size, the maximum size of a single file that can be written + * is {@value #DEFAULT_WRITE_PART_SIZE} * 10,000 bytes. + */ + @Default + public int writePartSize() { + return DEFAULT_WRITE_PART_SIZE; + } + + /** + * The maximum number of parts that can be uploaded concurrently when writing to S3 without blocking. Setting a + * higher value may increase throughput, but may also increase memory usage. Defaults to + * {@value #DEFAULT_NUM_CONCURRENT_WRITE_PARTS}. + */ + @Default + public int numConcurrentWriteParts() { + return DEFAULT_NUM_CONCURRENT_WRITE_PARTS; + } + @Override public LogOutput append(final LogOutput logOutput) { return logOutput.append(toString()); @@ -129,6 +162,10 @@ public interface Builder { Builder endpointOverride(URI endpointOverride); + Builder writePartSize(int writePartSize); + + Builder numConcurrentWriteParts(int numConcurrentWriteParts); + default Builder endpointOverride(String endpointOverride) { return endpointOverride(URI.create(endpointOverride)); } @@ -174,6 +211,31 @@ final void awsSdkV2Credentials() { } } + @Check + final void boundsCheckWritePartSize() { + if (writePartSize() < MIN_WRITE_PART_SIZE) { + throw new IllegalArgumentException( + "writePartSize(=" + writePartSize() + ") must be >= " + MIN_WRITE_PART_SIZE + " MiB"); + } + } + + @Check + final void boundsCheckMinNumConcurrentWriteParts() { + if (numConcurrentWriteParts() < 1) { + throw new IllegalArgumentException( + "numConcurrentWriteParts(=" + numConcurrentWriteParts() + ") must be >= 1"); + } + } + + @Check + final void boundsCheckMaxNumConcurrentWriteParts() { + if (numConcurrentWriteParts() > maxConcurrentRequests()) { + throw new IllegalArgumentException( + "numConcurrentWriteParts(=" + numConcurrentWriteParts() + ") must be <= " + + "maxConcurrentRequests(=" + maxConcurrentRequests() + ")"); + } + } + final AwsCredentialsProvider awsV2CredentialsProvider() { return ((AwsSdkV2Credentials) credentials()).awsV2CredentialsProvider(); } diff --git a/extensions/s3/src/main/java/io/deephaven/extensions/s3/S3SeekableChannelProvider.java b/extensions/s3/src/main/java/io/deephaven/extensions/s3/S3SeekableChannelProvider.java index 7083e1c22cf..4bd06a1b661 100644 --- a/extensions/s3/src/main/java/io/deephaven/extensions/s3/S3SeekableChannelProvider.java +++ b/extensions/s3/src/main/java/io/deephaven/extensions/s3/S3SeekableChannelProvider.java @@ -11,6 +11,7 @@ import io.deephaven.internal.log.LoggerFactory; import io.deephaven.io.logger.Logger; import io.deephaven.util.channel.Channels; +import io.deephaven.util.channel.CompletableOutputStream; import io.deephaven.util.channel.SeekableChannelContext; import io.deephaven.util.channel.SeekableChannelsProvider; import org.jetbrains.annotations.NotNull; @@ -28,7 +29,6 @@ import java.net.URI; import java.net.URISyntaxException; import java.nio.channels.SeekableByteChannel; -import java.nio.file.Path; import java.util.Iterator; import java.util.Map; import java.util.NoSuchElementException; @@ -130,8 +130,9 @@ public boolean isCompatibleWith(@NotNull final SeekableChannelContext channelCon } @Override - public SeekableByteChannel getWriteChannel(@NotNull final Path path, final boolean append) { - throw new UnsupportedOperationException("Writing to S3 is currently unsupported"); + public CompletableOutputStream getOutputStream(@NotNull final URI uri, final int bufferSizeHint) { + // bufferSizeHint is unused because s3 output stream is buffered internally into parts + return new S3CompletableOutputStream(uri, s3AsyncClient, s3Instructions); } @Override diff --git a/extensions/s3/src/test/java/io/deephaven/extensions/s3/S3InstructionsTest.java b/extensions/s3/src/test/java/io/deephaven/extensions/s3/S3InstructionsTest.java index 521bc02f6be..4d6ef35ce4a 100644 --- a/extensions/s3/src/test/java/io/deephaven/extensions/s3/S3InstructionsTest.java +++ b/extensions/s3/src/test/java/io/deephaven/extensions/s3/S3InstructionsTest.java @@ -22,6 +22,8 @@ void defaults() { assertThat(instructions.connectionTimeout()).isEqualTo(Duration.ofSeconds(2)); assertThat(instructions.readTimeout()).isEqualTo(Duration.ofSeconds(2)); assertThat(instructions.credentials()).isEqualTo(Credentials.defaultCredentials()); + assertThat(instructions.writePartSize()).isEqualTo(10485760); + assertThat(instructions.numConcurrentWriteParts()).isEqualTo(64); assertThat(instructions.endpointOverride()).isEmpty(); } @@ -36,13 +38,25 @@ void testSetRegion() { } @Test - void minMaxConcurrentRequests() { + void testSetMaxConcurrentRequests() { assertThat(S3Instructions.builder() .regionName("some-region") - .maxConcurrentRequests(1) + .maxConcurrentRequests(100) .build() .maxConcurrentRequests()) - .isEqualTo(1); + .isEqualTo(100); + } + + @Test + void testMinMaxConcurrentRequests() { + try { + S3Instructions.builder() + .regionName("some-region") + .maxConcurrentRequests(-1) + .build(); + } catch (IllegalArgumentException e) { + assertThat(e).hasMessageContaining("maxConcurrentRequests"); + } } @Test @@ -122,4 +136,41 @@ void badCredentials() { assertThat(e).hasMessageContaining("credentials"); } } + + @Test + void tooSmallWritePartSize() { + try { + S3Instructions.builder() + .regionName("some-region") + .writePartSize(1024) + .build(); + } catch (IllegalArgumentException e) { + assertThat(e).hasMessageContaining("writePartSize"); + } + } + + @Test + void tooSmallNumConcurrentWriteParts() { + try { + S3Instructions.builder() + .regionName("some-region") + .numConcurrentWriteParts(0) + .build(); + } catch (IllegalArgumentException e) { + assertThat(e).hasMessageContaining("numConcurrentWriteParts"); + } + } + + @Test + void tooLargeNumConcurrentWriteParts() { + try { + S3Instructions.builder() + .regionName("some-region") + .numConcurrentWriteParts(1001) + .maxConcurrentRequests(1000) + .build(); + } catch (IllegalArgumentException e) { + assertThat(e).hasMessageContaining("numConcurrentWriteParts"); + } + } } diff --git a/extensions/s3/src/test/java/io/deephaven/extensions/s3/S3SeekableChannelSimpleTestBase.java b/extensions/s3/src/test/java/io/deephaven/extensions/s3/S3SeekableChannelSimpleTestBase.java index 68f6a9042c7..a0cf78b0f3a 100644 --- a/extensions/s3/src/test/java/io/deephaven/extensions/s3/S3SeekableChannelSimpleTestBase.java +++ b/extensions/s3/src/test/java/io/deephaven/extensions/s3/S3SeekableChannelSimpleTestBase.java @@ -5,8 +5,10 @@ import io.deephaven.extensions.s3.testlib.S3SeekableChannelTestSetup; import io.deephaven.util.channel.CachedChannelProvider; +import io.deephaven.util.channel.CompletableOutputStream; import io.deephaven.util.channel.SeekableChannelContext; import io.deephaven.util.channel.SeekableChannelsProvider; +import junit.framework.TestCase; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -88,4 +90,60 @@ public int read() { assertThat(readChannel.read(buffer)).isEqualTo(-1); } } + + @Test + void readWriteTest() throws IOException { + final URI uri = uri("writeReadTest.txt"); + final String content = "Hello, world!"; + final byte[] contentBytes = content.getBytes(StandardCharsets.UTF_8); + try ( + final SeekableChannelsProvider providerImpl = providerImpl(uri); + final SeekableChannelsProvider provider = CachedChannelProvider.create(providerImpl, 32); + final CompletableOutputStream outputStream = provider.getOutputStream(uri, 0)) { + final int numBytes = 36 * 1024 * 1024; // 36 Mib -> Three 10-MiB parts + One 6-MiB part + final int numIters = numBytes / contentBytes.length; + for (int i = 0; i < numIters; ++i) { + outputStream.write(contentBytes); + } + outputStream.flush(); + outputStream.flush(); + outputStream.write(contentBytes); + outputStream.flush(); + outputStream.flush(); + outputStream.done(); + outputStream.flush(); + try { + outputStream.write(contentBytes); + TestCase.fail("Failure expected on writing since the stream is marked as done."); + } catch (IOException expected) { + } + + // Push data to S3, but don't close the stream + outputStream.complete(); + try ( + final SeekableChannelContext context = provider.makeContext(); + final SeekableByteChannel readChannel = provider.getReadChannel(context, uri)) { + final ByteBuffer buffer = ByteBuffer.allocate(contentBytes.length); + // We wrote total of numIters + 1 times + for (int i = 0; i < numIters + 1; ++i) { + fillBuffer(readChannel, buffer); + assertThat(buffer).isEqualTo(ByteBuffer.wrap(contentBytes)); + buffer.clear(); + } + // We should have read all the data from the channel + assertThat(readChannel.read(buffer)).isEqualTo(-1); + } + + // Try rollback, should not delete the file + outputStream.rollback(); + try ( + final SeekableChannelContext context = provider.makeContext(); + final SeekableByteChannel readChannel = provider.getReadChannel(context, uri)) { + final ByteBuffer buffer = ByteBuffer.allocate(contentBytes.length); + readChannel.read(buffer); + buffer.flip(); + assertThat(buffer).isEqualTo(ByteBuffer.wrap(contentBytes)); + } + } + } } diff --git a/extensions/s3/src/test/java/io/deephaven/extensions/s3/testlib/S3SeekableChannelTestSetup.java b/extensions/s3/src/test/java/io/deephaven/extensions/s3/testlib/S3SeekableChannelTestSetup.java index 9d4df0a5744..2cb04c83364 100644 --- a/extensions/s3/src/test/java/io/deephaven/extensions/s3/testlib/S3SeekableChannelTestSetup.java +++ b/extensions/s3/src/test/java/io/deephaven/extensions/s3/testlib/S3SeekableChannelTestSetup.java @@ -83,4 +83,15 @@ protected static ByteBuffer readAll(ReadableByteChannel channel, int maxBytes) t dst.flip(); return dst; } + + protected static void fillBuffer(ReadableByteChannel channel, final ByteBuffer dst) throws IOException { + final int numBytes = dst.remaining(); + while (dst.remaining() > 0 && channel.read(dst) != -1) { + // continue + } + if (dst.remaining() > 0) { + throw new RuntimeException(String.format("channel has less than %d bytes", numBytes)); + } + dst.flip(); + } } diff --git a/extensions/trackedfile/build.gradle b/extensions/trackedfile/build.gradle index 3896eb1e4ee..f7e7910289d 100644 --- a/extensions/trackedfile/build.gradle +++ b/extensions/trackedfile/build.gradle @@ -11,6 +11,7 @@ dependencies { implementation project(':Base') implementation project(':Util') implementation project(':engine-table') + implementation project(':log-factory') compileOnly libs.jetbrains.annotations diff --git a/extensions/trackedfile/src/main/java/io/deephaven/extensions/trackedfile/LocalCompletableOutputStream.java b/extensions/trackedfile/src/main/java/io/deephaven/extensions/trackedfile/LocalCompletableOutputStream.java new file mode 100644 index 00000000000..5f6961f8bf6 --- /dev/null +++ b/extensions/trackedfile/src/main/java/io/deephaven/extensions/trackedfile/LocalCompletableOutputStream.java @@ -0,0 +1,245 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.extensions.trackedfile; + +import io.deephaven.UncheckedDeephavenException; +import io.deephaven.base.FileUtils; +import io.deephaven.internal.log.LoggerFactory; +import io.deephaven.io.logger.Logger; +import io.deephaven.util.channel.CompletableOutputStream; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; + +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.channels.Channels; + +/** + * A {@link CompletableOutputStream} that writes to a temporary shadow file paths in the same directory to prevent + * overwriting any existing data in case of failure. + */ +class LocalCompletableOutputStream extends CompletableOutputStream { + + private static final Logger log = LoggerFactory.getLogger(LocalCompletableOutputStream.class); + + private enum State { + OPEN, DONE, COMPLETED, ROLLED_BACK + } + + private final File firstCreatedDir; + private final File destFile; + private final File shadowDestFile; + private final OutputStream shadowDelegateStream; // Writes to the shadow file + + private State state; + + LocalCompletableOutputStream( + @NotNull final File destFile, + @NotNull final TrackedSeekableChannelsProvider provider, + final int bufferSizeHint) throws IOException { + this.firstCreatedDir = prepareDestinationFileLocation(destFile); + this.destFile = destFile; + deleteBackupFile(destFile); + this.shadowDestFile = getShadowFile(destFile); + this.shadowDelegateStream = new BufferedOutputStream(Channels.newOutputStream( + provider.getWriteChannel(shadowDestFile)), bufferSizeHint); + this.state = State.OPEN; + } + + @Override + public void write(int b) throws IOException { + verifyOpen(); + shadowDelegateStream.write(b); + } + + @Override + public void write(byte[] b) throws IOException { + verifyOpen(); + shadowDelegateStream.write(b); + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + verifyOpen(); + shadowDelegateStream.write(b, off, len); + } + + @Override + public void flush() throws IOException { + verifyOpen(); + shadowDelegateStream.flush(); + } + + public void done() throws IOException { + if (state == State.DONE) { + return; + } + if (state != State.OPEN) { + throw new IOException("Cannot mark stream as done for file " + destFile.getAbsolutePath() + " because " + + "stream in state " + state + " instead of OPEN"); + } + flush(); + state = State.DONE; + } + + public void complete() throws IOException { + if (state == State.COMPLETED) { + return; + } + done(); + shadowDelegateStream.close(); + installShadowFile(destFile, shadowDestFile); + state = State.COMPLETED; + } + + @Override + public void rollback() { + if (state == State.ROLLED_BACK) { + return; + } + if (state == State.COMPLETED) { + rollbackShadowFiles(destFile); + } + // noinspection ResultOfMethodCallIgnored + shadowDestFile.delete(); + if (firstCreatedDir != null) { + log.error().append("Cleaning up potentially incomplete table destination path starting from ") + .append(firstCreatedDir.getAbsolutePath()).endl(); + FileUtils.deleteRecursivelyOnNFS(firstCreatedDir); + } + state = State.ROLLED_BACK; + } + + @Override + public void close() throws IOException { + if (state == State.ROLLED_BACK) { + return; + } + if (state != State.COMPLETED) { + rollback(); + return; + } + deleteBackupFileNoExcept(destFile); + } + + ////////////// Helper methods ///////////// + + private void verifyOpen() throws IOException { + if (state != State.OPEN) { + throw new IOException("Cannot write to stream for file " + destFile.getAbsolutePath() + " because stream " + + "in state " + state + " instead of OPEN"); + } + } + + /** + * Delete any old backup files created for this destination, and throw an exception on failure. + */ + private static void deleteBackupFile(@NotNull final File destFile) { + if (!deleteBackupFileNoExcept(destFile)) { + throw new UncheckedDeephavenException( + String.format("Failed to delete backup file at %s", getBackupFile(destFile).getAbsolutePath())); + } + } + + /** + * Delete any old backup files created for this destination with no exception in case of failure. + */ + private static boolean deleteBackupFileNoExcept(@NotNull final File destFile) { + final File backupDestFile = getBackupFile(destFile); + if (backupDestFile.exists() && !backupDestFile.delete()) { + log.error().append("Error in deleting backup file at path ") + .append(backupDestFile.getAbsolutePath()) + .endl(); + return false; + } + return true; + } + + private static File getBackupFile(final File destFile) { + return new File(destFile.getParent(), ".OLD_" + destFile.getName()); + } + + private static File getShadowFile(final File destFile) { + return new File(destFile.getParent(), ".NEW_" + destFile.getName()); + } + + /** + * Make any missing ancestor directories of {@code destination}. + * + * @param destination The destination file + * @return The first created directory, or null if no directories were made. + */ + @Nullable + private static File prepareDestinationFileLocation(@NotNull File destination) { + destination = destination.getAbsoluteFile(); + if (destination.exists()) { + if (destination.isDirectory()) { + throw new UncheckedDeephavenException( + String.format("Destination %s exists and is a directory", destination)); + } + if (!destination.canWrite()) { + throw new UncheckedDeephavenException( + String.format("Destination %s exists but is not writable", destination)); + } + return null; + } + final File firstParent = destination.getParentFile(); + if (firstParent.isDirectory()) { + if (firstParent.canWrite()) { + return null; + } + throw new UncheckedDeephavenException( + String.format("Destination %s has non writable parent directory", destination)); + } + File firstCreated = firstParent; + File parent; + for (parent = destination.getParentFile(); parent != null && !parent.exists(); parent = + parent.getParentFile()) { + firstCreated = parent; + } + if (parent == null) { + throw new IllegalArgumentException( + String.format("Can't find any existing parent directory for destination path: %s", destination)); + } + if (!parent.isDirectory()) { + throw new IllegalArgumentException( + String.format("Existing parent file %s of %s is not a directory", parent, destination)); + } + if (!firstParent.mkdirs()) { + throw new UncheckedDeephavenException("Couldn't (re)create destination directory " + firstParent); + } + return firstCreated; + } + + + /** + * Backup any existing files at destination and rename the shadow file to destination file. + */ + private static void installShadowFile(@NotNull final File destFile, @NotNull final File shadowDestFile) { + final File backupDestFile = getBackupFile(destFile); + if (destFile.exists() && !destFile.renameTo(backupDestFile)) { + throw new UncheckedDeephavenException( + String.format("Failed to install shadow file at %s because a file already exists at the path " + + "which couldn't be renamed to %s", destFile.getAbsolutePath(), + backupDestFile.getAbsolutePath())); + } + if (!shadowDestFile.renameTo(destFile)) { + throw new UncheckedDeephavenException(String.format( + "Failed to install shadow file at %s because couldn't rename temporary shadow file from %s to %s", + destFile.getAbsolutePath(), shadowDestFile.getAbsolutePath(), destFile.getAbsolutePath())); + } + } + + /** + * Roll back any changes made in the {@link #installShadowFile} in best-effort manner. + */ + private static void rollbackShadowFiles(@NotNull final File destFile) { + final File backupDestFile = getBackupFile(destFile); + final File shadowDestFile = getShadowFile(destFile); + destFile.renameTo(shadowDestFile); + backupDestFile.renameTo(destFile); + } +} diff --git a/extensions/trackedfile/src/main/java/io/deephaven/extensions/trackedfile/TrackedSeekableChannelsProvider.java b/extensions/trackedfile/src/main/java/io/deephaven/extensions/trackedfile/TrackedSeekableChannelsProvider.java index 345894bba64..b4fdf1b6157 100644 --- a/extensions/trackedfile/src/main/java/io/deephaven/extensions/trackedfile/TrackedSeekableChannelsProvider.java +++ b/extensions/trackedfile/src/main/java/io/deephaven/extensions/trackedfile/TrackedSeekableChannelsProvider.java @@ -10,6 +10,7 @@ import io.deephaven.engine.util.file.TrackedFileHandleFactory; import io.deephaven.engine.util.file.TrackedSeekableByteChannel; import io.deephaven.util.channel.Channels; +import io.deephaven.util.channel.CompletableOutputStream; import io.deephaven.util.channel.SeekableChannelContext; import io.deephaven.util.channel.BaseSeekableChannelContext; import io.deephaven.util.channel.SeekableChannelsProvider; @@ -73,11 +74,8 @@ public InputStream getInputStream(SeekableByteChannel channel, int sizeHint) { } @Override - public SeekableByteChannel getWriteChannel(@NotNull final Path filePath, final boolean append) - throws IOException { - // NB: I'm not sure this is actually the intended behavior; the "truncate-once" is per-handle, not per file. - return new TrackedSeekableByteChannel(append ? fileHandleFactory.writeAppendCreateHandleCreator - : new TruncateOnceFileCreator(fileHandleFactory), filePath.toFile()); + public CompletableOutputStream getOutputStream(@NotNull final URI uri, int bufferSizeHint) throws IOException { + return new LocalCompletableOutputStream(new File(uri), this, bufferSizeHint); } @Override @@ -94,6 +92,10 @@ public Stream walk(@NotNull final URI directory) throws IOException { return Files.walk(Path.of(directory)).map(path -> FileUtils.convertToURI(path, false)); } + SeekableByteChannel getWriteChannel(@NotNull final File destFile) throws IOException { + return new TrackedSeekableByteChannel(new TruncateOnceFileCreator(fileHandleFactory), destFile); + } + private static final class TruncateOnceFileCreator implements FileHandleFactory.FileToHandleFunction { private static final AtomicIntegerFieldUpdater FIRST_TIME_UPDATER = @@ -111,7 +113,7 @@ private TruncateOnceFileCreator(@NotNull final TrackedFileHandleFactory fileHand @NotNull @Override - public final FileHandle invoke(@NotNull final File file) throws IOException { + public FileHandle invoke(@NotNull final File file) throws IOException { if (FIRST_TIME_UPDATER.compareAndSet(this, FIRST_TIME_TRUE, FIRST_TIME_FALSE)) { return fileHandleFactory.writeTruncateCreateHandleCreator.invoke(file); } diff --git a/py/server/deephaven/experimental/s3.py b/py/server/deephaven/experimental/s3.py index c19a381b4d0..db6168aca16 100644 --- a/py/server/deephaven/experimental/s3.py +++ b/py/server/deephaven/experimental/s3.py @@ -21,14 +21,14 @@ _JS3Instructions = None """ - This module is useful for reading files stored in S3-compatible APIs. + This module is useful for reading from and writing to S3-compatible APIs. Importing this module requires the S3 specific deephaven extensions (artifact name deephaven-extensions-s3) to be included in the package. This is an opt-out functionality included by default. If not included, importing this module will fail to find the java types. """ class S3Instructions(JObjectWrapper): """ - S3Instructions provides specialized instructions for reading from S3-compatible APIs. + S3Instructions provides specialized instructions for reading from and writing to S3-compatible APIs. """ j_object_type = _JS3Instructions or type(None) @@ -45,7 +45,9 @@ def __init__(self, access_key_id: Optional[str] = None, secret_access_key: Optional[str] = None, anonymous_access: bool = False, - endpoint_override: Optional[str] = None): + endpoint_override: Optional[str] = None, + write_part_size: Optional[int] = None, + num_concurrent_write_parts: Optional[int] = None): """ Initializes the instructions. @@ -76,6 +78,14 @@ def __init__(self, anonymous access. Can't be combined with other credentials. By default, is False. endpoint_override (str): the endpoint to connect to. Callers connecting to AWS do not typically need to set this; it is most useful when connecting to non-AWS, S3-compatible APIs. + write_part_size (int): Writes to S3 are done in parts or chunks, and this value determines the size of each + part (in bytes). The default value is 10485760 (= 10 MiB) and minimum allowed part size is 5 MiB. + Setting a higher value may increase throughput, but may also increase memory usage. + Note that the maximum number of parts allowed for a single file is 10,000. Therefore, for 10 MiB part + size, the maximum size of a single file that can be written is roughly 100k MiB (or about 98 GiB). + num_concurrent_write_parts (int): the maximum number of parts that can be uploaded concurrently when writing + to S3 without blocking, defaults to 64. Setting a higher value may increase throughput, but may also + increase memory usage. Raises: DHError: If unable to build the instructions object. @@ -120,6 +130,12 @@ def __init__(self, if endpoint_override is not None: builder.endpointOverride(endpoint_override) + if write_part_size is not None: + builder.writePartSize(write_part_size) + + if num_concurrent_write_parts is not None: + builder.numConcurrentWriteParts(num_concurrent_write_parts) + self._j_object = builder.build() except Exception as e: raise DHError(e, "Failed to build S3 instructions") from e diff --git a/py/server/deephaven/parquet.py b/py/server/deephaven/parquet.py index dc877660671..61614c37061 100644 --- a/py/server/deephaven/parquet.py +++ b/py/server/deephaven/parquet.py @@ -242,13 +242,14 @@ def write( max_dictionary_size: Optional[int] = None, target_page_size: Optional[int] = None, generate_metadata_files: Optional[bool] = None, - index_columns: Optional[Sequence[Sequence[str]]] = None + index_columns: Optional[Sequence[Sequence[str]]] = None, + special_instructions: Optional[s3.S3Instructions] = None ) -> None: """ Write a table to a Parquet file. Args: table (Table): the source table - path (str): the destination file path; the file name should end in a ".parquet" extension. If the path + path (str): the destination file path or URI; the file name should end in a ".parquet" extension. If the path includes any non-existing directories, they are created. If there is an error, any intermediate directories previously created are removed; note this makes this method unsafe for concurrent use table_definition (Optional[Union[Dict[str, DType], List[Column]]): the table definition to use for writing, @@ -275,6 +276,8 @@ def write( source table. This argument can be used to narrow the set of indexes to write, or to be explicit about the expected set of indexes present on all sources. Indexes that are specified but missing will be computed on demand. + special_instructions (Optional[s3.S3Instructions]): Special instructions for writing parquet files, useful when + writing files to a non-local file system, like S3. By default, None. Raises: DHError """ @@ -289,6 +292,7 @@ def write( generate_metadata_files=generate_metadata_files, table_definition=table_definition, index_columns=index_columns, + special_instructions=special_instructions, ) _JParquetTools.writeTable(table.j_table, path, write_instructions) except Exception as e: @@ -306,7 +310,8 @@ def write_partitioned( target_page_size: Optional[int] = None, base_name: Optional[str] = None, generate_metadata_files: Optional[bool] = None, - index_columns: Optional[Sequence[Sequence[str]]] = None + index_columns: Optional[Sequence[Sequence[str]]] = None, + special_instructions: Optional[s3.S3Instructions] = None ) -> None: """ Write table to disk in parquet format with the partitioning columns written as "key=value" format in a nested directory structure. For example, for a partitioned column "date", we will have a directory structure like @@ -316,8 +321,9 @@ def write_partitioned( Args: table (Table): the source table or partitioned table - destination_dir (str): The path to destination root directory in which the partitioned parquet data will be stored - in a nested directory structure format. Non-existing directories in the provided path will be created. + destination_dir (str): The path or URI to the destination root directory in which the partitioned parquet data + will be stored in a nested directory structure format. Non-existing directories in the provided path will be + created. table_definition (Optional[Union[Dict[str, DType], List[Column]]): the table definition to use for writing, instead of the definitions implied by the table. Default is None, which means use the column definitions implied by the table. This definition can be used to skip some columns or add additional columns with @@ -354,6 +360,8 @@ def write_partitioned( source table. This argument can be used to narrow the set of indexes to write, or to be explicit about the expected set of indexes present on all sources. Indexes that are specified but missing will be computed on demand. + special_instructions (Optional[s3.S3Instructions]): Special instructions for writing parquet files, useful when + writing files to a non-local file system, like S3. By default, None. Raises: DHError @@ -370,6 +378,7 @@ def write_partitioned( base_name=base_name, table_definition=table_definition, index_columns=index_columns, + special_instructions=special_instructions, ) _JParquetTools.writeKeyValuePartitionedTable(table.j_object, destination_dir, write_instructions) except Exception as e: @@ -386,7 +395,8 @@ def batch_write( max_dictionary_size: Optional[int] = None, target_page_size: Optional[int] = None, generate_metadata_files: Optional[bool] = None, - index_columns: Optional[Sequence[Sequence[str]]] = None + index_columns: Optional[Sequence[Sequence[str]]] = None, + special_instructions: Optional[s3.S3Instructions] = None ): """ Writes tables to disk in parquet format to a supplied set of paths. @@ -394,7 +404,7 @@ def batch_write( Args: tables (List[Table]): the source tables - paths (List[str]): the destination paths. Any non-existing directories in the paths provided are + paths (List[str]): the destination paths or URIs. Any non-existing directories in the paths provided are created. If there is an error, any intermediate directories previously created are removed; note this makes this method unsafe for concurrent use table_definition (Optional[Union[Dict[str, DType], List[Column]]]): the table definition to use for writing. @@ -420,6 +430,8 @@ def batch_write( source table. This argument can be used to narrow the set of indexes to write, or to be explicit about the expected set of indexes present on all sources. Indexes that are specified but missing will be computed on demand. + special_instructions (Optional[s3.S3Instructions]): Special instructions for writing parquet files, useful when + writing files to a non-local file system, like S3. By default, None. Raises: DHError @@ -435,6 +447,7 @@ def batch_write( generate_metadata_files=generate_metadata_files, table_definition=table_definition, index_columns=index_columns, + special_instructions=special_instructions, ) _JParquetTools.writeTables([t.j_table for t in tables], _j_string_array(paths), write_instructions) except Exception as e: From 57d0188ca8f857dda16e41f66fb254250b094494 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 15 Aug 2024 13:56:03 -0400 Subject: [PATCH 21/43] fix: Update web version 0.89.0 (#5941) Release notes https://github.com/deephaven/web-client-ui/releases/tag/v0.89.0 [0.89.0](https://github.com/deephaven/web-client-ui/compare/v0.88.0...v0.89.0) (2024-08-15) - Features * Refactor console objects menu ([#2013](https://github.com/deephaven/web-client-ui/issues/2013)) ([8251180](https://github.com/deephaven/web-client-ui/commit/825118048326d3622aec2e4b851d81e8b7d93e35)) - Bug Fixes - Errors thrown during a grid update are not caught ([#2188](https://github.com/deephaven/web-client-ui/issues/2188)) ([2e59b92](https://github.com/deephaven/web-client-ui/commit/2e59b92b8ceac211c1d27931513e57136a9fa42c)) - Proxy model not setting defined values in parent class ([#2187](https://github.com/deephaven/web-client-ui/issues/2187)) ([5f9cf7f](https://github.com/deephaven/web-client-ui/commit/5f9cf7f4f39cb19f680e38f907d67201389fea7f)) - Restrict @adobe/spectrum imports ([#2179](https://github.com/deephaven/web-client-ui/issues/2179)) ([a257296](https://github.com/deephaven/web-client-ui/commit/a257296f1433d158439e6ea1b341c81551a38c11)) Co-authored-by: deephaven-internal <66694643+deephaven-internal@users.noreply.github.com> --- web/client-ui/Dockerfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/web/client-ui/Dockerfile b/web/client-ui/Dockerfile index 85f28a22994..4ea68fceda6 100644 --- a/web/client-ui/Dockerfile +++ b/web/client-ui/Dockerfile @@ -2,10 +2,10 @@ FROM deephaven/node:local-build WORKDIR /usr/src/app # Most of the time, these versions are the same, except in cases where a patch only affects one of the packages -ARG WEB_VERSION=0.88.0 -ARG GRID_VERSION=0.88.0 -ARG CHART_VERSION=0.88.0 -ARG WIDGET_VERSION=0.88.0 +ARG WEB_VERSION=0.89.0 +ARG GRID_VERSION=0.89.0 +ARG CHART_VERSION=0.89.0 +ARG WIDGET_VERSION=0.89.0 # Pull in the published code-studio package from npmjs and extract is RUN set -eux; \ From 4d0b82f3dbef8b6d3b8ea832d0627fcc94bb162d Mon Sep 17 00:00:00 2001 From: Ryan Caudy Date: Thu, 15 Aug 2024 14:30:05 -0400 Subject: [PATCH 22/43] fix: Fix PartitioningColumnDataIndex: region RowSet provenance and index RowSet tracking (#5942) --- .../regioned/PartitioningColumnDataIndex.java | 2 +- .../regioned/RegionedColumnSourceManager.java | 16 ++++++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/PartitioningColumnDataIndex.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/PartitioningColumnDataIndex.java index fb972aa92e9..ae95d86b8ee 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/PartitioningColumnDataIndex.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/PartitioningColumnDataIndex.java @@ -216,7 +216,7 @@ private void handleKey( indexKeySource.set(addedKeyPos, locationKey); indexRowSetSource.ensureCapacity(addedKeyPos + 1); - indexRowSetSource.set(addedKeyPos, regionRowSet.shift(regionFirstRowKey)); + indexRowSetSource.set(addedKeyPos, regionRowSet.shift(regionFirstRowKey).toTracking()); } else { // noinspection DataFlowIssue final WritableRowSet existingRowSet = indexRowSetSource.get(pos).writableCast(); diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/RegionedColumnSourceManager.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/RegionedColumnSourceManager.java index 0344e117d75..1889f5dbb25 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/RegionedColumnSourceManager.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/RegionedColumnSourceManager.java @@ -291,11 +291,10 @@ private WritableRowSet update(final boolean initializing) { if (entry.pollUpdates(addedRowSetBuilder)) { // Changes were detected, update the row set in the table and mark the row/column as modified. /* - * Since TableLocationState.getRowSet() returns a copy(), we should consider adding an UpdateCommitter - * to close() the previous row sets for modified locations. This is not important for current - * implementations, since they always allocate new, flat RowSets. + * Since TableLocationState.getRowSet() returns a copy(), we own entry.rowSetAtLastUpdate and can + * propagate it without making another copy(). */ - rowSetSource.set(entry.regionIndex, entry.location.getRowSet()); + rowSetSource.set(entry.regionIndex, entry.rowSetAtLastUpdate); if (modifiedRegionBuilder != null) { modifiedRegionBuilder.appendKey(entry.regionIndex); } @@ -346,7 +345,7 @@ private WritableRowSet update(final boolean initializing) { wcs.set(entry.regionIndex, entry.location.getKey().getPartitionValue(key))); // @formatter:on locationSource.set(entry.regionIndex, entry.location); - rowSetSource.set(entry.regionIndex, entry.location.getRowSet()); + rowSetSource.set(entry.regionIndex, entry.rowSetAtLastUpdate); }); } @@ -574,7 +573,12 @@ private boolean pollUpdates(final RowSetBuilderSequential addedRowSetBuilder) { .appendRange(regionFirstKey + subRegionFirstKey, regionFirstKey + subRegionLastKey)); } } finally { - rowSetAtLastUpdate.close(); + /* + * Since we record rowSetAtLastUpdate in the RowSet column of our includedLocationsTable, we must not + * close() the old rowSetAtLastUpdate here. We should instead consider adding an UpdateCommitter to + * close() the previous RowSets for modified locations, but this is not important for current + * implementations since they always allocate new, flat RowSets. + */ rowSetAtLastUpdate = updateRowSet; } // There was a change to the row set. From 00625b240380bedd82f8049f1b90f5d84c238bb8 Mon Sep 17 00:00:00 2001 From: Devin Smith Date: Thu, 15 Aug 2024 12:00:48 -0700 Subject: [PATCH 23/43] fix: Only run nightly-publish-ci for deephaven project (#5943) --- .github/workflows/nightly-publish-ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/nightly-publish-ci.yml b/.github/workflows/nightly-publish-ci.yml index a4e2bcbb242..a23f9e04a2d 100644 --- a/.github/workflows/nightly-publish-ci.yml +++ b/.github/workflows/nightly-publish-ci.yml @@ -12,6 +12,7 @@ concurrency: jobs: nightly-publish: runs-on: ubuntu-24.04 + if: ${{ github.repository_owner == 'deephaven' }} steps: - name: Checkout From c74e57000221f0c79aba4833599da13402b04b65 Mon Sep 17 00:00:00 2001 From: Shivam Malhotra Date: Thu, 15 Aug 2024 16:04:55 -0500 Subject: [PATCH 24/43] feat: Added support to upcast numeric types when reading parquet (#5923) --- ...er.java => BooleanAsByteMaterializer.java} | 10 +- .../DoubleFromFloatMaterializer.java | 45 +++ .../materializers/DoubleMaterializer.java | 28 +- .../materializers/DoubleMaterializerBase.java | 39 +++ .../IntFromBooleanMaterializer.java | 45 +++ .../IntFromUnsignedByteMaterializer.java | 45 +++ .../IntFromUnsignedShortMaterializer.java | 45 +++ .../base/materializers/IntMaterializer.java | 28 +- .../materializers/IntMaterializerBase.java | 39 +++ .../LongFromBooleanMaterializer.java | 45 +++ .../LongFromIntMaterializer.java | 45 +++ .../LongFromUnsignedByteMaterializer.java | 45 +++ .../LongFromUnsignedIntMaterializer.java | 6 +- .../LongFromUnsignedShortMaterializer.java | 41 +++ .../ShortFromBooleanMaterializer.java | 45 +++ .../ShortFromUnsignedByteMaterializer.java | 41 +++ .../base/materializers/ShortMaterializer.java | 28 +- .../materializers/ShortMaterializerBase.java | 39 +++ .../table/location/ParquetColumnLocation.java | 108 +++++- .../pagestore/topage/ToBooleanAsBytePage.java | 4 +- .../table/pagestore/topage/ToBytePage.java | 35 +- .../table/pagestore/topage/ToCharPage.java | 4 - .../table/pagestore/topage/ToDoublePage.java | 35 +- .../table/pagestore/topage/ToFloatPage.java | 2 +- .../table/pagestore/topage/ToIntPage.java | 49 ++- .../table/pagestore/topage/ToLongPage.java | 36 ++ .../table/pagestore/topage/ToShortPage.java | 44 ++- .../table/ParquetTableReadWriteTest.java | 311 ++++++++++++++++++ py/server/tests/test_parquet.py | 28 +- .../ReplicatePageMaterializers.java | 133 +++++++- .../replicators/ReplicateToPage.java | 8 +- 31 files changed, 1297 insertions(+), 159 deletions(-) rename extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/{BoolMaterializer.java => BooleanAsByteMaterializer.java} (78%) create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/DoubleFromFloatMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/DoubleMaterializerBase.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntFromBooleanMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntFromUnsignedByteMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntFromUnsignedShortMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntMaterializerBase.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromBooleanMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromIntMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromUnsignedByteMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromUnsignedShortMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortFromBooleanMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortFromUnsignedByteMaterializer.java create mode 100644 extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortMaterializerBase.java diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/BoolMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/BooleanAsByteMaterializer.java similarity index 78% rename from extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/BoolMaterializer.java rename to extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/BooleanAsByteMaterializer.java index 6e5b7f56994..e3475a5a6d8 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/BoolMaterializer.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/BooleanAsByteMaterializer.java @@ -9,17 +9,17 @@ import java.util.Arrays; -public class BoolMaterializer implements PageMaterializer { +public class BooleanAsByteMaterializer implements PageMaterializer { public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { @Override public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { - return new BoolMaterializer(dataReader, (byte) nullValue, numValues); + return new BooleanAsByteMaterializer(dataReader, (byte) nullValue, numValues); } @Override public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { - return new BoolMaterializer(dataReader, numValues); + return new BooleanAsByteMaterializer(dataReader, numValues); } }; @@ -28,11 +28,11 @@ public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int num private final byte nullValue; private final byte[] data; - private BoolMaterializer(ValuesReader dataReader, int numValues) { + private BooleanAsByteMaterializer(ValuesReader dataReader, int numValues) { this(dataReader, (byte) 0, numValues); } - private BoolMaterializer(ValuesReader dataReader, byte nullValue, int numValues) { + private BooleanAsByteMaterializer(ValuesReader dataReader, byte nullValue, int numValues) { this.dataReader = dataReader; this.nullValue = nullValue; this.data = new byte[numValues]; diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/DoubleFromFloatMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/DoubleFromFloatMaterializer.java new file mode 100644 index 00000000000..4d97c220c7a --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/DoubleFromFloatMaterializer.java @@ -0,0 +1,45 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit DoubleMaterializer and run "./gradlew replicatePageMaterializers" to regenerate +// +// @formatter:off +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import org.apache.parquet.column.values.ValuesReader; + +public class DoubleFromFloatMaterializer extends DoubleMaterializerBase implements PageMaterializer { + + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new DoubleFromFloatMaterializer(dataReader, (double) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new DoubleFromFloatMaterializer(dataReader, numValues); + } + }; + + private final ValuesReader dataReader; + + private DoubleFromFloatMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, 0, numValues); + } + + private DoubleFromFloatMaterializer(ValuesReader dataReader, double nullValue, int numValues) { + super(nullValue, numValues); + this.dataReader = dataReader; + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = dataReader.readFloat(); + } + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/DoubleMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/DoubleMaterializer.java index f5143679f1f..c8345eff522 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/DoubleMaterializer.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/DoubleMaterializer.java @@ -2,7 +2,7 @@ // Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending // // ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY -// ****** Edit FloatMaterializer and run "./gradlew replicatePageMaterializers" to regenerate +// ****** Edit LongMaterializer and run "./gradlew replicatePageMaterializers" to regenerate // // @formatter:off package io.deephaven.parquet.base.materializers; @@ -11,9 +11,7 @@ import io.deephaven.parquet.base.PageMaterializerFactory; import org.apache.parquet.column.values.ValuesReader; -import java.util.Arrays; - -public class DoubleMaterializer implements PageMaterializer { +public class DoubleMaterializer extends DoubleMaterializerBase implements PageMaterializer { public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { @Override @@ -29,22 +27,13 @@ public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int num private final ValuesReader dataReader; - private final double nullValue; - private final double[] data; - private DoubleMaterializer(ValuesReader dataReader, int numValues) { this(dataReader, 0, numValues); } private DoubleMaterializer(ValuesReader dataReader, double nullValue, int numValues) { + super(nullValue, numValues); this.dataReader = dataReader; - this.nullValue = nullValue; - this.data = new double[numValues]; - } - - @Override - public void fillNulls(int startIndex, int endIndex) { - Arrays.fill(data, startIndex, endIndex, nullValue); } @Override @@ -53,15 +42,4 @@ public void fillValues(int startIndex, int endIndex) { data[ii] = dataReader.readDouble(); } } - - @Override - public Object fillAll() { - fillValues(0, data.length); - return data; - } - - @Override - public Object data() { - return data; - } } diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/DoubleMaterializerBase.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/DoubleMaterializerBase.java new file mode 100644 index 00000000000..22e1c430f33 --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/DoubleMaterializerBase.java @@ -0,0 +1,39 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit LongMaterializerBase and run "./gradlew replicatePageMaterializers" to regenerate +// +// @formatter:off +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; + +import java.util.Arrays; + +abstract class DoubleMaterializerBase implements PageMaterializer { + + private final double nullValue; + final double[] data; + + DoubleMaterializerBase(double nullValue, int numValues) { + this.nullValue = nullValue; + this.data = new double[numValues]; + } + + @Override + public void fillNulls(int startIndex, int endIndex) { + Arrays.fill(data, startIndex, endIndex, nullValue); + } + + @Override + public Object fillAll() { + fillValues(0, data.length); + return data; + } + + @Override + public Object data() { + return data; + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntFromBooleanMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntFromBooleanMaterializer.java new file mode 100644 index 00000000000..6255c5d6bde --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntFromBooleanMaterializer.java @@ -0,0 +1,45 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit IntMaterializer and run "./gradlew replicatePageMaterializers" to regenerate +// +// @formatter:off +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import org.apache.parquet.column.values.ValuesReader; + +public class IntFromBooleanMaterializer extends IntMaterializerBase implements PageMaterializer { + + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new IntFromBooleanMaterializer(dataReader, (int) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new IntFromBooleanMaterializer(dataReader, numValues); + } + }; + + private final ValuesReader dataReader; + + private IntFromBooleanMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, 0, numValues); + } + + private IntFromBooleanMaterializer(ValuesReader dataReader, int nullValue, int numValues) { + super(nullValue, numValues); + this.dataReader = dataReader; + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = dataReader.readBoolean() ? 1 : 0; + } + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntFromUnsignedByteMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntFromUnsignedByteMaterializer.java new file mode 100644 index 00000000000..30b0ae5a86a --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntFromUnsignedByteMaterializer.java @@ -0,0 +1,45 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit LongFromUnsignedShortMaterializer and run "./gradlew replicatePageMaterializers" to regenerate +// +// @formatter:off +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import org.apache.parquet.column.values.ValuesReader; + +public class IntFromUnsignedByteMaterializer extends IntMaterializerBase implements PageMaterializer { + + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new IntFromUnsignedByteMaterializer(dataReader, (int) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new IntFromUnsignedByteMaterializer(dataReader, numValues); + } + }; + + private final ValuesReader dataReader; + + private IntFromUnsignedByteMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, 0, numValues); + } + + private IntFromUnsignedByteMaterializer(ValuesReader dataReader, int nullValue, int numValues) { + super(nullValue, numValues); + this.dataReader = dataReader; + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = Byte.toUnsignedInt((byte) dataReader.readInteger()); + } + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntFromUnsignedShortMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntFromUnsignedShortMaterializer.java new file mode 100644 index 00000000000..369a83d56db --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntFromUnsignedShortMaterializer.java @@ -0,0 +1,45 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit LongFromUnsignedShortMaterializer and run "./gradlew replicatePageMaterializers" to regenerate +// +// @formatter:off +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import org.apache.parquet.column.values.ValuesReader; + +public class IntFromUnsignedShortMaterializer extends IntMaterializerBase implements PageMaterializer { + + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new IntFromUnsignedShortMaterializer(dataReader, (int) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new IntFromUnsignedShortMaterializer(dataReader, numValues); + } + }; + + private final ValuesReader dataReader; + + private IntFromUnsignedShortMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, 0, numValues); + } + + private IntFromUnsignedShortMaterializer(ValuesReader dataReader, int nullValue, int numValues) { + super(nullValue, numValues); + this.dataReader = dataReader; + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = Short.toUnsignedInt((short) dataReader.readInteger()); + } + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntMaterializer.java index 97f47e23ebe..9b6f423c08a 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntMaterializer.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntMaterializer.java @@ -2,7 +2,7 @@ // Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending // // ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY -// ****** Edit FloatMaterializer and run "./gradlew replicatePageMaterializers" to regenerate +// ****** Edit LongMaterializer and run "./gradlew replicatePageMaterializers" to regenerate // // @formatter:off package io.deephaven.parquet.base.materializers; @@ -11,9 +11,7 @@ import io.deephaven.parquet.base.PageMaterializerFactory; import org.apache.parquet.column.values.ValuesReader; -import java.util.Arrays; - -public class IntMaterializer implements PageMaterializer { +public class IntMaterializer extends IntMaterializerBase implements PageMaterializer { public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { @Override @@ -29,22 +27,13 @@ public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int num private final ValuesReader dataReader; - private final int nullValue; - private final int[] data; - private IntMaterializer(ValuesReader dataReader, int numValues) { this(dataReader, 0, numValues); } private IntMaterializer(ValuesReader dataReader, int nullValue, int numValues) { + super(nullValue, numValues); this.dataReader = dataReader; - this.nullValue = nullValue; - this.data = new int[numValues]; - } - - @Override - public void fillNulls(int startIndex, int endIndex) { - Arrays.fill(data, startIndex, endIndex, nullValue); } @Override @@ -53,15 +42,4 @@ public void fillValues(int startIndex, int endIndex) { data[ii] = dataReader.readInteger(); } } - - @Override - public Object fillAll() { - fillValues(0, data.length); - return data; - } - - @Override - public Object data() { - return data; - } } diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntMaterializerBase.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntMaterializerBase.java new file mode 100644 index 00000000000..a307e0c36d7 --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/IntMaterializerBase.java @@ -0,0 +1,39 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit LongMaterializerBase and run "./gradlew replicatePageMaterializers" to regenerate +// +// @formatter:off +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; + +import java.util.Arrays; + +abstract class IntMaterializerBase implements PageMaterializer { + + private final int nullValue; + final int[] data; + + IntMaterializerBase(int nullValue, int numValues) { + this.nullValue = nullValue; + this.data = new int[numValues]; + } + + @Override + public void fillNulls(int startIndex, int endIndex) { + Arrays.fill(data, startIndex, endIndex, nullValue); + } + + @Override + public Object fillAll() { + fillValues(0, data.length); + return data; + } + + @Override + public Object data() { + return data; + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromBooleanMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromBooleanMaterializer.java new file mode 100644 index 00000000000..b3ff8fea210 --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromBooleanMaterializer.java @@ -0,0 +1,45 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit LongMaterializer and run "./gradlew replicatePageMaterializers" to regenerate +// +// @formatter:off +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import org.apache.parquet.column.values.ValuesReader; + +public class LongFromBooleanMaterializer extends LongMaterializerBase implements PageMaterializer { + + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new LongFromBooleanMaterializer(dataReader, (long) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new LongFromBooleanMaterializer(dataReader, numValues); + } + }; + + private final ValuesReader dataReader; + + private LongFromBooleanMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, 0, numValues); + } + + private LongFromBooleanMaterializer(ValuesReader dataReader, long nullValue, int numValues) { + super(nullValue, numValues); + this.dataReader = dataReader; + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = dataReader.readBoolean() ? 1 : 0; + } + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromIntMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromIntMaterializer.java new file mode 100644 index 00000000000..a2359344597 --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromIntMaterializer.java @@ -0,0 +1,45 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit LongMaterializer and run "./gradlew replicatePageMaterializers" to regenerate +// +// @formatter:off +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import org.apache.parquet.column.values.ValuesReader; + +public class LongFromIntMaterializer extends LongMaterializerBase implements PageMaterializer { + + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new LongFromIntMaterializer(dataReader, (long) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new LongFromIntMaterializer(dataReader, numValues); + } + }; + + private final ValuesReader dataReader; + + private LongFromIntMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, 0, numValues); + } + + private LongFromIntMaterializer(ValuesReader dataReader, long nullValue, int numValues) { + super(nullValue, numValues); + this.dataReader = dataReader; + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = dataReader.readInteger(); + } + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromUnsignedByteMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromUnsignedByteMaterializer.java new file mode 100644 index 00000000000..014d6f2d487 --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromUnsignedByteMaterializer.java @@ -0,0 +1,45 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit LongFromUnsignedShortMaterializer and run "./gradlew replicatePageMaterializers" to regenerate +// +// @formatter:off +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import org.apache.parquet.column.values.ValuesReader; + +public class LongFromUnsignedByteMaterializer extends LongMaterializerBase implements PageMaterializer { + + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new LongFromUnsignedByteMaterializer(dataReader, (long) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new LongFromUnsignedByteMaterializer(dataReader, numValues); + } + }; + + private final ValuesReader dataReader; + + private LongFromUnsignedByteMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, 0, numValues); + } + + private LongFromUnsignedByteMaterializer(ValuesReader dataReader, long nullValue, int numValues) { + super(nullValue, numValues); + this.dataReader = dataReader; + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = Byte.toUnsignedLong((byte) dataReader.readInteger()); + } + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromUnsignedIntMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromUnsignedIntMaterializer.java index 297d7ae6cbb..cd37ad39c6a 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromUnsignedIntMaterializer.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromUnsignedIntMaterializer.java @@ -1,6 +1,10 @@ // // Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending // +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit LongFromUnsignedShortMaterializer and run "./gradlew replicatePageMaterializers" to regenerate +// +// @formatter:off package io.deephaven.parquet.base.materializers; import io.deephaven.parquet.base.PageMaterializer; @@ -35,7 +39,7 @@ private LongFromUnsignedIntMaterializer(ValuesReader dataReader, long nullValue, @Override public void fillValues(int startIndex, int endIndex) { for (int ii = startIndex; ii < endIndex; ii++) { - data[ii] = Integer.toUnsignedLong(dataReader.readInteger()); + data[ii] = Integer.toUnsignedLong((int) dataReader.readInteger()); } } } diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromUnsignedShortMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromUnsignedShortMaterializer.java new file mode 100644 index 00000000000..cf6f10049b8 --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/LongFromUnsignedShortMaterializer.java @@ -0,0 +1,41 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import org.apache.parquet.column.values.ValuesReader; + +public class LongFromUnsignedShortMaterializer extends LongMaterializerBase implements PageMaterializer { + + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new LongFromUnsignedShortMaterializer(dataReader, (long) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new LongFromUnsignedShortMaterializer(dataReader, numValues); + } + }; + + private final ValuesReader dataReader; + + private LongFromUnsignedShortMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, 0, numValues); + } + + private LongFromUnsignedShortMaterializer(ValuesReader dataReader, long nullValue, int numValues) { + super(nullValue, numValues); + this.dataReader = dataReader; + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = Short.toUnsignedLong((short) dataReader.readInteger()); + } + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortFromBooleanMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortFromBooleanMaterializer.java new file mode 100644 index 00000000000..df09abb1b8f --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortFromBooleanMaterializer.java @@ -0,0 +1,45 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit ShortMaterializer and run "./gradlew replicatePageMaterializers" to regenerate +// +// @formatter:off +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import org.apache.parquet.column.values.ValuesReader; + +public class ShortFromBooleanMaterializer extends ShortMaterializerBase implements PageMaterializer { + + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new ShortFromBooleanMaterializer(dataReader, (short) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new ShortFromBooleanMaterializer(dataReader, numValues); + } + }; + + private final ValuesReader dataReader; + + private ShortFromBooleanMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, (short) 0, numValues); + } + + private ShortFromBooleanMaterializer(ValuesReader dataReader, short nullValue, int numValues) { + super(nullValue, numValues); + this.dataReader = dataReader; + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = (short) (dataReader.readBoolean() ? 1 : 0); + } + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortFromUnsignedByteMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortFromUnsignedByteMaterializer.java new file mode 100644 index 00000000000..833724a06cc --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortFromUnsignedByteMaterializer.java @@ -0,0 +1,41 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; +import io.deephaven.parquet.base.PageMaterializerFactory; +import org.apache.parquet.column.values.ValuesReader; + +public class ShortFromUnsignedByteMaterializer extends ShortMaterializerBase implements PageMaterializer { + + public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { + @Override + public PageMaterializer makeMaterializerWithNulls(ValuesReader dataReader, Object nullValue, int numValues) { + return new ShortFromUnsignedByteMaterializer(dataReader, (short) nullValue, numValues); + } + + @Override + public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int numValues) { + return new ShortFromUnsignedByteMaterializer(dataReader, numValues); + } + }; + + private final ValuesReader dataReader; + + private ShortFromUnsignedByteMaterializer(ValuesReader dataReader, int numValues) { + this(dataReader, (short) 0, numValues); + } + + private ShortFromUnsignedByteMaterializer(ValuesReader dataReader, short nullValue, int numValues) { + super(nullValue, numValues); + this.dataReader = dataReader; + } + + @Override + public void fillValues(int startIndex, int endIndex) { + for (int ii = startIndex; ii < endIndex; ii++) { + data[ii] = (short) Byte.toUnsignedInt((byte) dataReader.readInteger()); + } + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortMaterializer.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortMaterializer.java index b3c5eae6d9a..cfc389081f0 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortMaterializer.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortMaterializer.java @@ -2,7 +2,7 @@ // Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending // // ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY -// ****** Edit CharMaterializer and run "./gradlew replicatePageMaterializers" to regenerate +// ****** Edit LongMaterializer and run "./gradlew replicatePageMaterializers" to regenerate // // @formatter:off package io.deephaven.parquet.base.materializers; @@ -11,9 +11,7 @@ import io.deephaven.parquet.base.PageMaterializerFactory; import org.apache.parquet.column.values.ValuesReader; -import java.util.Arrays; - -public class ShortMaterializer implements PageMaterializer { +public class ShortMaterializer extends ShortMaterializerBase implements PageMaterializer { public static final PageMaterializerFactory FACTORY = new PageMaterializerFactory() { @Override @@ -29,22 +27,13 @@ public PageMaterializer makeMaterializerNonNull(ValuesReader dataReader, int num private final ValuesReader dataReader; - private final short nullValue; - private final short[] data; - private ShortMaterializer(ValuesReader dataReader, int numValues) { this(dataReader, (short) 0, numValues); } private ShortMaterializer(ValuesReader dataReader, short nullValue, int numValues) { + super(nullValue, numValues); this.dataReader = dataReader; - this.nullValue = nullValue; - this.data = new short[numValues]; - } - - @Override - public void fillNulls(int startIndex, int endIndex) { - Arrays.fill(data, startIndex, endIndex, nullValue); } @Override @@ -53,15 +42,4 @@ public void fillValues(int startIndex, int endIndex) { data[ii] = (short) dataReader.readInteger(); } } - - @Override - public Object fillAll() { - fillValues(0, data.length); - return data; - } - - @Override - public Object data() { - return data; - } } diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortMaterializerBase.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortMaterializerBase.java new file mode 100644 index 00000000000..ff9265748e5 --- /dev/null +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/ShortMaterializerBase.java @@ -0,0 +1,39 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY +// ****** Edit LongMaterializerBase and run "./gradlew replicatePageMaterializers" to regenerate +// +// @formatter:off +package io.deephaven.parquet.base.materializers; + +import io.deephaven.parquet.base.PageMaterializer; + +import java.util.Arrays; + +abstract class ShortMaterializerBase implements PageMaterializer { + + private final short nullValue; + final short[] data; + + ShortMaterializerBase(short nullValue, int numValues) { + this.nullValue = nullValue; + this.data = new short[numValues]; + } + + @Override + public void fillNulls(int startIndex, int endIndex) { + Arrays.fill(data, startIndex, endIndex, nullValue); + } + + @Override + public Object fillAll() { + fillValues(0, data.length); + return data; + } + + @Override + public Object data() { + return data; + } +} diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/location/ParquetColumnLocation.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/location/ParquetColumnLocation.java index 4c6e53c4b7b..4ddebb33685 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/location/ParquetColumnLocation.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/location/ParquetColumnLocation.java @@ -37,6 +37,7 @@ import java.io.IOException; import java.math.BigDecimal; import java.math.BigInteger; +import java.time.Instant; import java.util.Arrays; import java.util.Optional; import java.util.function.Function; @@ -370,22 +371,59 @@ private static ToPage makeToPage( final PrimitiveType.PrimitiveTypeName typeName = type.getPrimitiveTypeName(); switch (typeName) { case BOOLEAN: - toPage = ToBooleanAsBytePage.create(pageType); + if (pageType == Boolean.class) { + toPage = ToBooleanAsBytePage.create(pageType); + } else if (pageType == byte.class) { + toPage = ToBytePage.createFromBoolean(pageType); + } else if (pageType == short.class) { + toPage = ToShortPage.createFromBoolean(pageType); + } else if (pageType == int.class) { + toPage = ToIntPage.createFromBoolean(pageType); + } else if (pageType == long.class) { + toPage = ToLongPage.createFromBoolean(pageType); + } else { + throw new IllegalArgumentException( + "Cannot convert parquet BOOLEAN primitive column to " + pageType); + } break; case INT32: - toPage = ToIntPage.create(pageType); + if (pageType == int.class) { + toPage = ToIntPage.create(pageType); + } else if (pageType == long.class) { + toPage = ToLongPage.createFromInt(pageType); + } else { + throw new IllegalArgumentException("Cannot convert parquet INT32 column to " + pageType); + } break; case INT64: - toPage = ToLongPage.create(pageType); + if (pageType == long.class) { + toPage = ToLongPage.create(pageType); + } else { + throw new IllegalArgumentException("Cannot convert parquet INT64 column to " + pageType); + } break; case INT96: - toPage = ToInstantPage.createFromInt96(pageType); + if (pageType == Instant.class) { + toPage = ToInstantPage.createFromInt96(pageType); + } else { + throw new IllegalArgumentException("Cannot convert parquet INT96 column to " + pageType); + } break; case DOUBLE: - toPage = ToDoublePage.create(pageType); + if (pageType == double.class) { + toPage = ToDoublePage.create(pageType); + } else { + throw new IllegalArgumentException("Cannot convert parquet DOUBLE column to " + pageType); + } break; case FLOAT: - toPage = ToFloatPage.create(pageType); + if (pageType == float.class) { + toPage = ToFloatPage.create(pageType); + } else if (pageType == double.class) { + toPage = ToDoublePage.createFromFloat(pageType); + } else { + throw new IllegalArgumentException("Cannot convert parquet FLOAT column to " + pageType); + } break; case BINARY: case FIXED_LEN_BYTE_ARRAY: @@ -413,9 +451,9 @@ private static ToPage makeToPage( } if (toPage == null) { - throw new TableDataException( + throw new IllegalArgumentException( "Unsupported parquet column type " + type.getPrimitiveTypeName() + - " with logical type " + logicalTypeAnnotation); + " with logical type " + logicalTypeAnnotation + " and page type " + pageType); } if (specialTypeName == ColumnTypeInfo.SpecialType.StringSet) { @@ -433,7 +471,7 @@ private static ToPage makeToPage( // noinspection unchecked return (ToPage) toPage; - } catch (RuntimeException except) { + } catch (final RuntimeException except) { throw new TableDataException( "Unexpected exception accessing column " + parquetColumnName, except); } @@ -494,19 +532,59 @@ private static class LogicalTypeVisitor if (intLogicalType.isSigned()) { switch (intLogicalType.getBitWidth()) { case 8: - return Optional.of(ToBytePage.create(pageType)); + if (pageType == byte.class) { + return Optional.of(ToBytePage.create(pageType)); + } else if (pageType == short.class) { + return Optional.of(ToShortPage.create(pageType)); + } else if (pageType == int.class) { + return Optional.of(ToIntPage.create(pageType)); + } else if (pageType == long.class) { + return Optional.of(ToLongPage.createFromInt(pageType)); + } + throw new IllegalArgumentException("Cannot convert parquet byte column to " + pageType); case 16: - return Optional.of(ToShortPage.create(pageType)); + if (pageType == short.class) { + return Optional.of(ToShortPage.create(pageType)); + } else if (pageType == int.class) { + return Optional.of(ToIntPage.create(pageType)); + } else if (pageType == long.class) { + return Optional.of(ToLongPage.createFromInt(pageType)); + } + throw new IllegalArgumentException("Cannot convert parquet short column to " + pageType); case 32: - return Optional.of(ToIntPage.create(pageType)); + if (pageType == int.class) { + return Optional.of(ToIntPage.create(pageType)); + } else if (pageType == long.class) { + return Optional.of(ToLongPage.createFromInt(pageType)); + } + throw new IllegalArgumentException("Cannot convert parquet int column to " + pageType); case 64: return Optional.of(ToLongPage.create(pageType)); } } else { switch (intLogicalType.getBitWidth()) { case 8: + if (pageType == char.class) { + return Optional.of(ToCharPage.create(pageType)); + } else if (pageType == short.class) { + return Optional.of(ToShortPage.createFromUnsignedByte(pageType)); + } else if (pageType == int.class) { + return Optional.of(ToIntPage.createFromUnsignedByte(pageType)); + } else if (pageType == long.class) { + return Optional.of(ToLongPage.createFromUnsignedByte(pageType)); + } + throw new IllegalArgumentException( + "Cannot convert parquet unsigned byte column to " + pageType); case 16: - return Optional.of(ToCharPage.create(pageType)); + if (pageType == char.class) { + return Optional.of(ToCharPage.create(pageType)); + } else if (pageType == int.class) { + return Optional.of(ToIntPage.createFromUnsignedShort(pageType)); + } else if (pageType == long.class) { + return Optional.of(ToLongPage.createFromUnsignedShort(pageType)); + } + throw new IllegalArgumentException( + "Cannot convert parquet unsigned short column to " + pageType); case 32: return Optional.of(ToLongPage.createFromUnsignedInt(pageType)); } @@ -547,7 +625,7 @@ private static class LogicalTypeVisitor case FIXED_LEN_BYTE_ARRAY: // fall through case BINARY: final int encodedSizeInBytes = typeName == BINARY ? -1 : type.getTypeLength(); - if (BigDecimal.class.equals(pageType)) { + if (pageType == BigDecimal.class) { final int precision = decimalLogicalType.getPrecision(); final int scale = decimalLogicalType.getScale(); try { @@ -560,7 +638,7 @@ private static class LogicalTypeVisitor pageType, new BigDecimalParquetBytesCodec(precision, scale, encodedSizeInBytes), columnChunkReader.getDictionarySupplier())); - } else if (BigInteger.class.equals(pageType)) { + } else if (pageType == BigInteger.class) { return Optional.of(ToBigIntegerPage.create( pageType, new BigIntegerParquetBytesCodec(encodedSizeInBytes), diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBooleanAsBytePage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBooleanAsBytePage.java index 8be732df7a2..529bba27066 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBooleanAsBytePage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBooleanAsBytePage.java @@ -5,7 +5,7 @@ import io.deephaven.chunk.attributes.Any; import io.deephaven.parquet.base.PageMaterializerFactory; -import io.deephaven.parquet.base.materializers.BoolMaterializer; +import io.deephaven.parquet.base.materializers.BooleanAsByteMaterializer; import io.deephaven.vector.ObjectVector; import io.deephaven.vector.ObjectVectorDirect; import io.deephaven.util.BooleanUtils; @@ -56,7 +56,7 @@ public final Object nullValue() { @Override public final PageMaterializerFactory getPageMaterializerFactory() { - return BoolMaterializer.FACTORY; + return BooleanAsByteMaterializer.FACTORY; } @Override diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBytePage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBytePage.java index f63d9f2ebcb..1f953292d20 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBytePage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToBytePage.java @@ -1,15 +1,12 @@ // // Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending // -// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY -// ****** Edit ToIntPage and run "./gradlew replicateToPage" to regenerate -// -// @formatter:off package io.deephaven.parquet.table.pagestore.topage; import io.deephaven.chunk.ChunkType; import io.deephaven.chunk.attributes.Any; import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.parquet.base.materializers.BooleanAsByteMaterializer; import io.deephaven.parquet.base.materializers.ByteMaterializer; import org.jetbrains.annotations.NotNull; @@ -17,19 +14,35 @@ public class ToBytePage implements ToPage { + public static ToBytePage create(Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_BYTE; + } + + public static ToBytePage createFromBoolean(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_BOOLEAN; + } + @SuppressWarnings("rawtypes") - private static final ToBytePage INSTANCE = new ToBytePage<>(); + private static final ToBytePage FROM_BYTE = new ToBytePage<>(ByteMaterializer.FACTORY); + @SuppressWarnings("rawtypes") + private static final ToBytePage FROM_BOOLEAN = new ToBytePage<>(BooleanAsByteMaterializer.FACTORY); - public static ToBytePage create(Class nativeType) { + private static void verifyNativeType(final Class nativeType) { if (nativeType == null || byte.class.equals(nativeType)) { - // noinspection unchecked - return INSTANCE; + return; } - throw new IllegalArgumentException("The native type for a Byte column is " + nativeType.getCanonicalName()); } - private ToBytePage() {} + private final PageMaterializerFactory pageMaterializerFactory; + + private ToBytePage(@NotNull final PageMaterializerFactory pageMaterializerFactory) { + this.pageMaterializerFactory = pageMaterializerFactory; + } @Override @NotNull @@ -52,6 +65,6 @@ public final Object nullValue() { @Override @NotNull public final PageMaterializerFactory getPageMaterializerFactory() { - return ByteMaterializer.FACTORY; + return pageMaterializerFactory; } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToCharPage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToCharPage.java index 3fbccc9eb95..2248cede91a 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToCharPage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToCharPage.java @@ -1,10 +1,6 @@ // // Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending // -// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY -// ****** Edit ToIntPage and run "./gradlew replicateToPage" to regenerate -// -// @formatter:off package io.deephaven.parquet.table.pagestore.topage; import io.deephaven.chunk.ChunkType; diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToDoublePage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToDoublePage.java index f5f450f4753..474b4d3106f 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToDoublePage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToDoublePage.java @@ -1,15 +1,12 @@ // // Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending // -// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY -// ****** Edit ToIntPage and run "./gradlew replicateToPage" to regenerate -// -// @formatter:off package io.deephaven.parquet.table.pagestore.topage; import io.deephaven.chunk.ChunkType; import io.deephaven.chunk.attributes.Any; import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.parquet.base.materializers.DoubleFromFloatMaterializer; import io.deephaven.parquet.base.materializers.DoubleMaterializer; import org.jetbrains.annotations.NotNull; @@ -17,19 +14,35 @@ public class ToDoublePage implements ToPage { + public static ToDoublePage create(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_DOUBLE; + } + + public static ToDoublePage createFromFloat(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_FLOAT; + } + @SuppressWarnings("rawtypes") - private static final ToDoublePage INSTANCE = new ToDoublePage<>(); + private static final ToDoublePage FROM_DOUBLE = new ToDoublePage<>(DoubleMaterializer.FACTORY); + @SuppressWarnings("rawtypes") + private static final ToDoublePage FROM_FLOAT = new ToDoublePage<>(DoubleFromFloatMaterializer.FACTORY); - public static ToDoublePage create(Class nativeType) { + private static void verifyNativeType(final Class nativeType) { if (nativeType == null || double.class.equals(nativeType)) { - // noinspection unchecked - return INSTANCE; + return; } - throw new IllegalArgumentException("The native type for a Double column is " + nativeType.getCanonicalName()); } - private ToDoublePage() {} + private final PageMaterializerFactory pageMaterializerFactory; + + private ToDoublePage(@NotNull final PageMaterializerFactory pageMaterializerFactory) { + this.pageMaterializerFactory = pageMaterializerFactory; + } @Override @NotNull @@ -52,6 +65,6 @@ public final Object nullValue() { @Override @NotNull public final PageMaterializerFactory getPageMaterializerFactory() { - return DoubleMaterializer.FACTORY; + return pageMaterializerFactory; } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToFloatPage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToFloatPage.java index 64ef1bd83ed..63bb095afec 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToFloatPage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToFloatPage.java @@ -2,7 +2,7 @@ // Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending // // ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY -// ****** Edit ToIntPage and run "./gradlew replicateToPage" to regenerate +// ****** Edit ToCharPage and run "./gradlew replicateToPage" to regenerate // // @formatter:off package io.deephaven.parquet.table.pagestore.topage; diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToIntPage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToIntPage.java index 3217d95c0f5..4b0ef392fd3 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToIntPage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToIntPage.java @@ -6,6 +6,9 @@ import io.deephaven.chunk.ChunkType; import io.deephaven.chunk.attributes.Any; import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.parquet.base.materializers.IntFromBooleanMaterializer; +import io.deephaven.parquet.base.materializers.IntFromUnsignedByteMaterializer; +import io.deephaven.parquet.base.materializers.IntFromUnsignedShortMaterializer; import io.deephaven.parquet.base.materializers.IntMaterializer; import org.jetbrains.annotations.NotNull; @@ -13,19 +16,51 @@ public class ToIntPage implements ToPage { + public static ToIntPage create(Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_INT; + } + + public static ToIntPage createFromUnsignedShort(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_UNSIGNED_SHORT; + } + + public static ToIntPage createFromUnsignedByte(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_UNSIGNED_BYTE; + } + + public static ToIntPage createFromBoolean(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_BOOLEAN; + } + + @SuppressWarnings("rawtypes") + private static final ToIntPage FROM_INT = new ToIntPage<>(IntMaterializer.FACTORY); + @SuppressWarnings("rawtypes") + private static final ToIntPage FROM_UNSIGNED_SHORT = new ToIntPage<>(IntFromUnsignedShortMaterializer.FACTORY); + @SuppressWarnings("rawtypes") + private static final ToIntPage FROM_UNSIGNED_BYTE = new ToIntPage<>(IntFromUnsignedByteMaterializer.FACTORY); @SuppressWarnings("rawtypes") - private static final ToIntPage INSTANCE = new ToIntPage<>(); + private static final ToIntPage FROM_BOOLEAN = new ToIntPage<>(IntFromBooleanMaterializer.FACTORY); - public static ToIntPage create(Class nativeType) { + private static void verifyNativeType(final Class nativeType) { if (nativeType == null || int.class.equals(nativeType)) { - // noinspection unchecked - return INSTANCE; + return; } - throw new IllegalArgumentException("The native type for a Int column is " + nativeType.getCanonicalName()); } - private ToIntPage() {} + private final PageMaterializerFactory pageMaterializerFactory; + + private ToIntPage(@NotNull final PageMaterializerFactory pageMaterializerFactory) { + this.pageMaterializerFactory = pageMaterializerFactory; + } @Override @NotNull @@ -48,6 +83,6 @@ public final Object nullValue() { @Override @NotNull public final PageMaterializerFactory getPageMaterializerFactory() { - return IntMaterializer.FACTORY; + return pageMaterializerFactory; } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLongPage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLongPage.java index ca86ced2540..befd67571a1 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLongPage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLongPage.java @@ -6,7 +6,11 @@ import io.deephaven.chunk.ChunkType; import io.deephaven.chunk.attributes.Any; import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.parquet.base.materializers.LongFromBooleanMaterializer; +import io.deephaven.parquet.base.materializers.LongFromIntMaterializer; +import io.deephaven.parquet.base.materializers.LongFromUnsignedByteMaterializer; import io.deephaven.parquet.base.materializers.LongFromUnsignedIntMaterializer; +import io.deephaven.parquet.base.materializers.LongFromUnsignedShortMaterializer; import io.deephaven.parquet.base.materializers.LongMaterializer; import org.jetbrains.annotations.NotNull; @@ -26,10 +30,42 @@ public static ToLongPage createFromUnsignedInt(final Cl return FROM_UNSIGNED_INT; } + public static ToLongPage createFromInt(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_INT; + } + + public static ToLongPage createFromUnsignedShort(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_UNSIGNED_SHORT; + } + + public static ToLongPage createFromUnsignedByte(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_UNSIGNED_BYTE; + } + + public static ToLongPage createFromBoolean(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_BOOLEAN; + } + @SuppressWarnings("rawtypes") private static final ToLongPage FROM_LONG = new ToLongPage<>(LongMaterializer.FACTORY); @SuppressWarnings("rawtypes") private static final ToLongPage FROM_UNSIGNED_INT = new ToLongPage<>(LongFromUnsignedIntMaterializer.FACTORY); + @SuppressWarnings("rawtypes") + private static final ToLongPage FROM_INT = new ToLongPage<>(LongFromIntMaterializer.FACTORY); + @SuppressWarnings("rawtypes") + private static final ToLongPage FROM_UNSIGNED_SHORT = new ToLongPage<>(LongFromUnsignedShortMaterializer.FACTORY); + @SuppressWarnings("rawtypes") + private static final ToLongPage FROM_UNSIGNED_BYTE = new ToLongPage<>(LongFromUnsignedByteMaterializer.FACTORY); + @SuppressWarnings("rawtypes") + private static final ToLongPage FROM_BOOLEAN = new ToLongPage<>(LongFromBooleanMaterializer.FACTORY); private static void verifyNativeType(final Class nativeType) { if (nativeType == null || long.class.equals(nativeType)) { diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToShortPage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToShortPage.java index ce749f217a4..706617025b9 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToShortPage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToShortPage.java @@ -1,15 +1,13 @@ // // Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending // -// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY -// ****** Edit ToIntPage and run "./gradlew replicateToPage" to regenerate -// -// @formatter:off package io.deephaven.parquet.table.pagestore.topage; import io.deephaven.chunk.ChunkType; import io.deephaven.chunk.attributes.Any; import io.deephaven.parquet.base.PageMaterializerFactory; +import io.deephaven.parquet.base.materializers.ShortFromBooleanMaterializer; +import io.deephaven.parquet.base.materializers.ShortFromUnsignedByteMaterializer; import io.deephaven.parquet.base.materializers.ShortMaterializer; import org.jetbrains.annotations.NotNull; @@ -17,19 +15,43 @@ public class ToShortPage implements ToPage { + public static ToShortPage create(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_SHORT; + } + + public static ToShortPage createFromUnsignedByte(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_UNSIGNED_BYTE; + } + + public static ToShortPage createFromBoolean(final Class nativeType) { + verifyNativeType(nativeType); + // noinspection unchecked + return FROM_BOOLEAN; + } + + @SuppressWarnings("rawtypes") + private static final ToShortPage FROM_SHORT = new ToShortPage<>(ShortMaterializer.FACTORY); + @SuppressWarnings("rawtypes") + private static final ToShortPage FROM_UNSIGNED_BYTE = new ToShortPage<>(ShortFromUnsignedByteMaterializer.FACTORY); @SuppressWarnings("rawtypes") - private static final ToShortPage INSTANCE = new ToShortPage<>(); + private static final ToShortPage FROM_BOOLEAN = new ToShortPage<>(ShortFromBooleanMaterializer.FACTORY); - public static ToShortPage create(Class nativeType) { + private static void verifyNativeType(final Class nativeType) { if (nativeType == null || short.class.equals(nativeType)) { - // noinspection unchecked - return INSTANCE; + return; } - throw new IllegalArgumentException("The native type for a Short column is " + nativeType.getCanonicalName()); } - private ToShortPage() {} + private final PageMaterializerFactory pageMaterializerFactory; + + private ToShortPage(@NotNull final PageMaterializerFactory pageMaterializerFactory) { + this.pageMaterializerFactory = pageMaterializerFactory; + } @Override @NotNull @@ -52,6 +74,6 @@ public final Object nullValue() { @Override @NotNull public final PageMaterializerFactory getPageMaterializerFactory() { - return ShortMaterializer.FACTORY; + return pageMaterializerFactory; } } diff --git a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java index 8a2df002b0e..74eca341e24 100644 --- a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java +++ b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java @@ -637,6 +637,317 @@ public void basicParquetWithMetadataTest() { assertTableEquals(table, fromDiskWithCommonMetadata); } + @Test + public void testOverrideBooleanColumnType() { + final Table table = TableTools.emptyTable(5).update("A = i % 3 == 0 ? true : i % 3 == 1 ? false : null"); + final File dest = new File(rootFile, "testOverrideBooleanColumnType.parquet"); + ParquetTools.writeTable(table, dest.getPath()); + assertTableEquals(table, ParquetTools.readTable(dest.getPath())); + + final Table arrayTable = + TableTools.emptyTable(5).update("A = new Boolean[] {i % 3 == 0 ? true : i % 3 == 1 ? false : null}"); + final File arrayTableDest = new File(rootFile, "testOverrideBooleanArrayType.parquet"); + ParquetTools.writeTable(arrayTable, arrayTableDest.getPath()); + assertTableEquals(arrayTable, ParquetTools.readTable(arrayTableDest.getPath())); + // Boolean -> byte + { + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(TableDefinition.of(ColumnDefinition.ofByte("A"))) + .build(); + final Table byteTable = + TableTools.emptyTable(5).update("A = i % 3 == 0 ? (byte)1 : i % 3 == 1 ? (byte)0 : null"); + assertTableEquals(byteTable, ParquetTools.readTable(dest.getPath(), readInstructions)); + + final Table byteArrayTable = + TableTools.emptyTable(5) + .update("A = new byte[] {i % 3 == 0 ? (byte)1 : i % 3 == 1 ? (byte)0 : (byte)null}"); + assertTableEquals(byteArrayTable, ParquetTools.readTable(arrayTableDest.getPath(), + readInstructions.withTableDefinition(byteArrayTable.getDefinition())).select()); + } + + // Boolean -> short + { + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(TableDefinition.of(ColumnDefinition.ofShort("A"))) + .build(); + final Table shortTable = + TableTools.emptyTable(5).update("A = i % 3 == 0 ? (short)1 : i % 3 == 1 ? (short)0 : null"); + assertTableEquals(shortTable, ParquetTools.readTable(dest.getPath(), readInstructions)); + + final Table shortArrayTable = + TableTools.emptyTable(5) + .update("A = new short[] {i % 3 == 0 ? (short)1 : i % 3 == (short)1 ? 0 : (short)null}"); + assertTableEquals(shortArrayTable, ParquetTools.readTable(arrayTableDest.getPath(), + readInstructions.withTableDefinition(shortArrayTable.getDefinition())).select()); + } + + // Boolean -> int + { + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(TableDefinition.of(ColumnDefinition.ofInt("A"))) + .build(); + final Table intTable = TableTools.emptyTable(5).update("A = i % 3 == 0 ? 1 : i % 3 == 1 ? 0 : null"); + assertTableEquals(intTable, ParquetTools.readTable(dest.getPath(), readInstructions)); + + final Table intArrayTable = + TableTools.emptyTable(5).update("A = new int[] {i % 3 == 0 ? 1 : i % 3 == 1 ? 0 : null}"); + assertTableEquals(intArrayTable, ParquetTools.readTable(arrayTableDest.getPath(), + readInstructions.withTableDefinition(intArrayTable.getDefinition())).select()); + } + // Boolean -> long + { + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(TableDefinition.of(ColumnDefinition.ofLong("A"))) + .build(); + final Table longTable = TableTools.emptyTable(5).update("A = i % 3 == 0 ? 1L : i % 3 == 1 ? 0L : null"); + assertTableEquals(longTable, ParquetTools.readTable(dest.getPath(), readInstructions)); + + final Table longArrayTable = + TableTools.emptyTable(5).update("A = new long[] {i % 3 == 0 ? 1L : i % 3 == 1 ? 0L : null}"); + assertTableEquals(longArrayTable, ParquetTools.readTable(arrayTableDest.getPath(), + readInstructions.withTableDefinition(longArrayTable.getDefinition())).select()); + } + } + + @Test + public void testOverrideByteColumnType() { + final Table table = TableTools.emptyTable(5).update("A=(byte)(i-2)"); + final File dest = new File(rootFile, "testOverrideByteColumnType.parquet"); + ParquetTools.writeTable(table, dest.getPath()); + assertTableEquals(table, ParquetTools.readTable(dest.getPath())); + + final Table arrayTable = TableTools.emptyTable(5).update("A = new byte[] {i == 0 ? null : (byte)(i-2)}"); + final File arrayTableDest = new File(rootFile, "testOverrideByteArrayColumnType.parquet"); + ParquetTools.writeTable(arrayTable, arrayTableDest.getPath()); + assertTableEquals(arrayTable, ParquetTools.readTable(arrayTableDest.getPath())); + + // byte -> short + { + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(TableDefinition.of(ColumnDefinition.ofShort("A"))) + .build(); + assertTableEquals(table.updateView("A=(short)A"), + ParquetTools.readTable(dest.getPath(), readInstructions)); + + final Table shortArrayTable = + TableTools.emptyTable(5).update("A = new short[] {i == 0 ? null : (short)(i-2)}"); + assertTableEquals(shortArrayTable, ParquetTools.readTable(arrayTableDest.getPath(), + readInstructions.withTableDefinition(shortArrayTable.getDefinition())).select()); + } + // byte -> int + { + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(TableDefinition.of(ColumnDefinition.ofInt("A"))) + .build(); + assertTableEquals(table.updateView("A=(int)A"), + ParquetTools.readTable(dest.getPath(), readInstructions)); + + final Table intArrayTable = TableTools.emptyTable(5).update("A = new int[] {i == 0 ? null : (int)(i-2)}"); + assertTableEquals(intArrayTable, ParquetTools.readTable(arrayTableDest.getPath(), + readInstructions.withTableDefinition(intArrayTable.getDefinition())).select()); + } + // byte -> long + { + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(TableDefinition.of(ColumnDefinition.ofLong("A"))) + .build(); + assertTableEquals(table.updateView("A=(long)A"), + ParquetTools.readTable(dest.getPath(), readInstructions)); + + final Table longArrayTable = + TableTools.emptyTable(5).update("A = new long[] {i == 0 ? null : (long)(i-2)}"); + assertTableEquals(longArrayTable, ParquetTools.readTable(arrayTableDest.getPath(), + readInstructions.withTableDefinition(longArrayTable.getDefinition())).select()); + } + // byte -> char + { + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(TableDefinition.of(ColumnDefinition.ofChar("A"))) + .build(); + try { + ParquetTools.readTable(dest.getPath(), readInstructions).select(); + fail("Expected an exception because cannot convert byte to char"); + } catch (final RuntimeException ignored) { + } + } + } + + @Test + public void testOverrideShortColumnType() { + final Table table = TableTools.emptyTable(5).update("A=(short)(i-2)"); + final File dest = new File(rootFile, "testOverrideShortColumnType.parquet"); + ParquetTools.writeTable(table, dest.getPath()); + assertTableEquals(table, ParquetTools.readTable(dest.getPath())); + + final Table arrayTable = TableTools.emptyTable(5).update("A = new short[] {i == 0 ? null : (short)(i-2)}"); + final File arrayTableDest = new File(rootFile, "testOverrideShortArrayColumnType.parquet"); + ParquetTools.writeTable(arrayTable, arrayTableDest.getPath()); + assertTableEquals(arrayTable, ParquetTools.readTable(arrayTableDest.getPath())); + + // short -> int + { + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(TableDefinition.of(ColumnDefinition.ofInt("A"))) + .build(); + assertTableEquals(table.updateView("A=(int)A"), + ParquetTools.readTable(dest.getPath(), readInstructions)); + + final Table intArrayTable = TableTools.emptyTable(5).update("A = new int[] {i == 0 ? null : (int)(i-2)}"); + assertTableEquals(intArrayTable, ParquetTools.readTable(arrayTableDest.getPath(), + readInstructions.withTableDefinition(intArrayTable.getDefinition())).select()); + } + // short -> long + { + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(TableDefinition.of(ColumnDefinition.ofLong("A"))) + .build(); + assertTableEquals(table.updateView("A=(long)A"), + ParquetTools.readTable(dest.getPath(), readInstructions)); + + final Table longArrayTable = + TableTools.emptyTable(5).update("A = new long[] {i == 0 ? null : (long)(i-2)}"); + assertTableEquals(longArrayTable, ParquetTools.readTable(arrayTableDest.getPath(), + readInstructions.withTableDefinition(longArrayTable.getDefinition())).select()); + } + // short -> byte + { + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(TableDefinition.of(ColumnDefinition.ofByte("A"))) + .build(); + try { + ParquetTools.readTable(dest.getPath(), readInstructions).select(); + fail("Expected an exception because cannot convert short to byte"); + } catch (final RuntimeException ignored) { + } + } + } + + @Test + public void testOverrideCharColumnType() { + final Table table = TableTools.emptyTable(5).update("A=(char)i"); + final File dest = new File(rootFile, "testOverrideCharColumnType.parquet"); + ParquetTools.writeTable(table, dest.getPath()); + assertTableEquals(table, ParquetTools.readTable(dest.getPath())); + + final Table arrayTable = TableTools.emptyTable(5).update("A = new char[] {i == 0 ? null : (char)i}"); + final File arrayTableDest = new File(rootFile, "testOverrideCharArrayColumnType.parquet"); + ParquetTools.writeTable(arrayTable, arrayTableDest.getPath()); + assertTableEquals(arrayTable, ParquetTools.readTable(arrayTableDest.getPath())); + + // char -> int + { + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(TableDefinition.of(ColumnDefinition.ofInt("A"))) + .build(); + assertTableEquals(table.updateView("A=(int)A"), + ParquetTools.readTable(dest.getPath(), readInstructions)); + + final Table intArrayTable = TableTools.emptyTable(5).update("A = new int[] {i == 0 ? null : (int)i}"); + assertTableEquals(intArrayTable, ParquetTools.readTable(arrayTableDest.getPath(), + readInstructions.withTableDefinition(intArrayTable.getDefinition())).select()); + } + // char -> long + { + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(TableDefinition.of(ColumnDefinition.ofLong("A"))) + .build(); + assertTableEquals(table.updateView("A=(long)A"), + ParquetTools.readTable(dest.getPath(), readInstructions)); + + final Table longArrayTable = TableTools.emptyTable(5).update("A = new long[] {i == 0 ? null : (long)i}"); + assertTableEquals(longArrayTable, ParquetTools.readTable(arrayTableDest.getPath(), + readInstructions.withTableDefinition(longArrayTable.getDefinition())).select()); + } + // char -> short + { + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(TableDefinition.of(ColumnDefinition.ofShort("A"))) + .build(); + try { + ParquetTools.readTable(dest.getPath(), readInstructions).select(); + fail("Expected an exception because cannot convert char to short"); + } catch (final RuntimeException ignored) { + } + } + } + + @Test + public void testOverrideIntColumnType() { + final Table table = TableTools.emptyTable(5).update("A=(int)(i-2)"); + final File dest = new File(rootFile, "testOverrideIntColumnType.parquet"); + ParquetTools.writeTable(table, dest.getPath()); + assertTableEquals(table, ParquetTools.readTable(dest.getPath())); + + final Table arrayTable = TableTools.emptyTable(5).update("A = new int[] {i == 0 ? null : (int)(i-2)}"); + final File arrayTableDest = new File(rootFile, "testOverrideIntArrayColumnType.parquet"); + ParquetTools.writeTable(arrayTable, arrayTableDest.getPath()); + assertTableEquals(arrayTable, ParquetTools.readTable(arrayTableDest.getPath())); + + // int -> long + { + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(TableDefinition.of(ColumnDefinition.ofLong("A"))) + .build(); + assertTableEquals(table.updateView("A=(long)A"), + ParquetTools.readTable(dest.getPath(), readInstructions)); + final Table longArrayTable = + TableTools.emptyTable(5).update("A = new long[] {i == 0 ? null : (long)(i-2)}"); + assertTableEquals(longArrayTable, ParquetTools.readTable(arrayTableDest.getPath(), + readInstructions.withTableDefinition(longArrayTable.getDefinition())).select()); + } + + // int -> short + { + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(TableDefinition.of(ColumnDefinition.ofShort("A"))) + .build(); + try { + ParquetTools.readTable(dest.getPath(), readInstructions).select(); + fail("Expected an exception because cannot convert int to short"); + } catch (final RuntimeException ignored) { + } + } + } + + @Test + public void testOverrideFloatColumnType() { + final Table table = TableTools.emptyTable(5).update("A=(float)(i-2)"); + final File dest = new File(rootFile, "testOverrideFloatColumnType.parquet"); + ParquetTools.writeTable(table, dest.getPath()); + assertTableEquals(table, ParquetTools.readTable(dest.getPath())); + + final Table arrayTable = TableTools.emptyTable(5).update("A = new float[] {i == 0 ? null : (float)(i-2)}"); + final File arrayTableDest = new File(rootFile, "testOverrideFloatArrayColumnType.parquet"); + ParquetTools.writeTable(arrayTable, arrayTableDest.getPath()); + assertTableEquals(arrayTable, ParquetTools.readTable(arrayTableDest.getPath())); + + // float -> double + { + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(TableDefinition.of(ColumnDefinition.ofDouble("A"))) + .build(); + assertTableEquals(table.updateView("A=(double)A"), + ParquetTools.readTable(dest.getPath(), readInstructions)); + final Table doubleArrayTable = + TableTools.emptyTable(5).update("A = new double[] {i == 0 ? null : (double)(i-2)}"); + assertTableEquals(doubleArrayTable, ParquetTools.readTable(arrayTableDest.getPath(), + readInstructions.withTableDefinition(doubleArrayTable.getDefinition())).select()); + } + + // float -> short + { + final ParquetInstructions readInstructions = ParquetInstructions.builder() + .setTableDefinition(TableDefinition.of(ColumnDefinition.ofShort("A"))) + .build(); + try { + ParquetTools.readTable(dest.getPath(), readInstructions).select(); + fail("Expected an exception because cannot convert float to short"); + } catch (final RuntimeException ignored) { + } + } + } + + @Test public void parquetIndexingBuilderTest() { final Table source = TableTools.emptyTable(1_000_000).updateView( diff --git a/py/server/tests/test_parquet.py b/py/server/tests/test_parquet.py index 2d49f7c82cd..1a5d3b3e31c 100644 --- a/py/server/tests/test_parquet.py +++ b/py/server/tests/test_parquet.py @@ -14,7 +14,7 @@ from deephaven import DHError, empty_table, dtypes, new_table from deephaven import arrow as dharrow -from deephaven.column import InputColumn, Column, ColumnType, string_col, int_col, char_col, long_col +from deephaven.column import InputColumn, Column, ColumnType, string_col, int_col, char_col, long_col, short_col from deephaven.pandas import to_pandas, to_table from deephaven.parquet import (write, batch_write, read, delete, ColumnInstruction, ParquetFileLayout, write_partitioned) @@ -782,6 +782,32 @@ def test_unsigned_ints(self): ]) self.assert_table_equals(table_from_disk, expected) + def test_unsigned_byte_cast(self): + data = {'uint8Col': [255, 2, 0]} + df = pandas.DataFrame(data) + df['uint8Col'] = df['uint8Col'].astype(np.uint8) + pyarrow.parquet.write_table(pyarrow.Table.from_pandas(df), 'data_from_pyarrow.parquet') + + # UByte -> Char + table_from_disk = read("data_from_pyarrow.parquet", table_definition={"uint8Col": dtypes.char}) + expected = new_table([char_col("uint8Col", [255, 2, 0])]) + self.assert_table_equals(table_from_disk, expected) + + # UByte -> Short + table_from_disk = read("data_from_pyarrow.parquet", table_definition={"uint8Col": dtypes.short}) + expected = new_table([short_col("uint8Col", [255, 2, 0])]) + self.assert_table_equals(table_from_disk, expected) + + # UByte -> Int + table_from_disk = read("data_from_pyarrow.parquet", table_definition={"uint8Col": dtypes.int32}) + expected = new_table([int_col("uint8Col", [255, 2, 0])]) + self.assert_table_equals(table_from_disk, expected) + + # UByte -> Long + table_from_disk = read("data_from_pyarrow.parquet", table_definition={"uint8Col": dtypes.long}) + expected = new_table([long_col("uint8Col", [255, 2, 0])]) + self.assert_table_equals(table_from_disk, expected) + def test_v2_pages(self): def test_v2_pages_helper(dh_table): write(dh_table, "data_from_dh.parquet") diff --git a/replication/static/src/main/java/io/deephaven/replicators/ReplicatePageMaterializers.java b/replication/static/src/main/java/io/deephaven/replicators/ReplicatePageMaterializers.java index 13909dd32b3..e4e96e356d7 100644 --- a/replication/static/src/main/java/io/deephaven/replicators/ReplicatePageMaterializers.java +++ b/replication/static/src/main/java/io/deephaven/replicators/ReplicatePageMaterializers.java @@ -5,8 +5,7 @@ import java.io.IOException; -import static io.deephaven.replication.ReplicatePrimitiveCode.charToShortAndByte; -import static io.deephaven.replication.ReplicatePrimitiveCode.floatToAllFloatingPoints; +import static io.deephaven.replication.ReplicatePrimitiveCode.charToByte; import static io.deephaven.replication.ReplicatePrimitiveCode.replaceAll; /** @@ -20,7 +19,14 @@ public class ReplicatePageMaterializers { "extensions/parquet/base/src/main/java/io/deephaven/parquet/base/materializers/"; private static final String CHAR_MATERIALIZER_PATH = MATERIALIZER_DIR + "CharMaterializer.java"; - private static final String FLOAT_MATERIALIZER_PATH = MATERIALIZER_DIR + "FloatMaterializer.java"; + private static final String SHORT_MATERIALIZER_PATH = MATERIALIZER_DIR + "ShortMaterializer.java"; + private static final String INT_MATERIALIZER_PATH = MATERIALIZER_DIR + "IntMaterializer.java"; + private static final String LONG_MATERIALIZER_BASE_PATH = MATERIALIZER_DIR + "LongMaterializerBase.java"; + private static final String LONG_MATERIALIZER_PATH = MATERIALIZER_DIR + "LongMaterializer.java"; + private static final String LONG_FROM_INT_MATERIALIZER_PATH = MATERIALIZER_DIR + "LongFromIntMaterializer.java"; + private static final String LONG_FROM_UNSIGNED_SHORT_MATERIALIZER_PATH = + MATERIALIZER_DIR + "LongFromUnsignedShortMaterializer.java"; + private static final String DOUBLE_MATERIALIZER_PATH = MATERIALIZER_DIR + "DoubleMaterializer.java"; private static final String LOCAL_TIME_FROM_MICROS_MATERIALIZER_PATH = MATERIALIZER_DIR + "LocalTimeFromMicrosMaterializer.java"; private static final String LOCAL_DATE_TIME_FROM_MILLIS_MATERIALIZER_PATH = @@ -34,18 +40,121 @@ public class ReplicatePageMaterializers { private static final String BIG_INTEGER_MATERIALIZER_PATH = MATERIALIZER_DIR + "BigIntegerMaterializer.java"; public static void main(String... args) throws IOException { - charToShortAndByte(TASK, CHAR_MATERIALIZER_PATH, NO_EXCEPTIONS); + charToByte(TASK, CHAR_MATERIALIZER_PATH, NO_EXCEPTIONS); - // Float -> Double - floatToAllFloatingPoints(TASK, FLOAT_MATERIALIZER_PATH, NO_EXCEPTIONS); - - // Float -> Int + // LongBase -> IntBase String[][] pairs = new String[][] { - {"readFloat", "readInteger"}, - {"Float", "Int"}, - {"float", "int"} + {"Long", "Int"}, + {"long", "int"}, + }; + replaceAll(TASK, LONG_MATERIALIZER_BASE_PATH, null, NO_EXCEPTIONS, pairs); + + // Long -> Int + pairs = new String[][] { + {"readLong", "readInteger"}, + {"Long", "Int"}, + {"long", "int"} + }; + replaceAll(TASK, LONG_MATERIALIZER_PATH, null, NO_EXCEPTIONS, pairs); + + // Int -> IntFromBoolean + pairs = new String[][] { + {"IntMaterializer", "IntFromBooleanMaterializer"}, + {"readInteger\\(\\)", "readBoolean() ? 1 : 0"} + }; + replaceAll(TASK, INT_MATERIALIZER_PATH, null, new String[] {"IntMaterializerBase"}, pairs); + + // LongBase -> ShortBase + pairs = new String[][] { + {"Long", "Short"}, + {"long", "short"}, + }; + replaceAll(TASK, LONG_MATERIALIZER_BASE_PATH, null, NO_EXCEPTIONS, pairs); + + // Long -> Short + pairs = new String[][] { + {"dataReader.readLong", "(short) dataReader.readInteger"}, + {"dataReader, 0, numValues", "dataReader, (short) 0, numValues"}, + {"Long", "Short"}, + {"long", "short"} + }; + replaceAll(TASK, LONG_MATERIALIZER_PATH, null, NO_EXCEPTIONS, pairs); + + // Long -> LongFromInt + pairs = new String[][] { + {"LongMaterializer", "LongFromIntMaterializer"}, + {"readLong", "readInteger"}, + }; + replaceAll(TASK, LONG_MATERIALIZER_PATH, LONG_FROM_INT_MATERIALIZER_PATH, null, + new String[] {"LongMaterializerBase"}, pairs); + + // Long -> LongFromBoolean + pairs = new String[][] { + {"LongMaterializer", "LongFromBooleanMaterializer"}, + {"readLong\\(\\)", "readBoolean() ? 1 : 0"}, + }; + replaceAll(TASK, LONG_MATERIALIZER_PATH, null, new String[] {"LongMaterializerBase"}, pairs); + + // LongFromUnsignedShort -> LongFromUnsignedByte + pairs = new String[][] { + {"Short", "Byte"}, + {"short", "byte"} + }; + replaceAll(TASK, LONG_FROM_UNSIGNED_SHORT_MATERIALIZER_PATH, null, NO_EXCEPTIONS, pairs); + + // LongFromUnsignedShort -> LongFromUnsignedInt + pairs = new String[][] { + {"Short.toUnsignedLong", "Integer.toUnsignedLong"}, + {"Short", "Int"}, + {"short", "int"} + }; + replaceAll(TASK, LONG_FROM_UNSIGNED_SHORT_MATERIALIZER_PATH, null, NO_EXCEPTIONS, pairs); + + // LongFromUnsignedShort -> IntFromUnsignedShort + pairs = new String[][] { + {"LongFromUnsignedShort", "IntFromUnsignedShort"}, + {"Long", "Int"}, + {"long", "int"} + }; + replaceAll(TASK, LONG_FROM_UNSIGNED_SHORT_MATERIALIZER_PATH, null, NO_EXCEPTIONS, pairs); + + // LongFromUnsignedShort -> IntFromUnsignedByte + pairs = new String[][] { + {"Short", "Byte"}, + {"short", "byte"}, + {"Long", "Int"}, + {"long", "int"} + }; + replaceAll(TASK, LONG_FROM_UNSIGNED_SHORT_MATERIALIZER_PATH, null, NO_EXCEPTIONS, pairs); + + // LongBase -> DoubleBase + pairs = new String[][] { + {"Long", "Double"}, + {"long", "double"}, + }; + replaceAll(TASK, LONG_MATERIALIZER_BASE_PATH, null, NO_EXCEPTIONS, pairs); + + // Long -> Double + pairs = new String[][] { + {"dataReader.readLong", "dataReader.readDouble"}, + {"Long", "Double"}, + {"long", "double"} + }; + replaceAll(TASK, LONG_MATERIALIZER_PATH, null, NO_EXCEPTIONS, pairs); + + // Double -> DoubleFromFloat + pairs = new String[][] { + {"DoubleMaterializer", "DoubleFromFloatMaterializer"}, + {"Double", "Float"} + }; + replaceAll(TASK, DOUBLE_MATERIALIZER_PATH, null, new String[] {"DoubleMaterializerBase"}, pairs); + + // Short -> ShortFromBoolean + pairs = new String[][] { + {"ShortMaterializer", "ShortFromBooleanMaterializer"}, + {"dataReader.readInteger\\(\\)", "(dataReader.readBoolean() ? 1 : 0)"} }; - replaceAll(TASK, FLOAT_MATERIALIZER_PATH, null, NO_EXCEPTIONS, pairs); + replaceAll(TASK, SHORT_MATERIALIZER_PATH, null, new String[] {"ShortMaterializerBase"}, pairs); // LocalTimeFromMicros -> LocalTimeFromMillis // We change from Micros to Millis and not the other way since converting from Long to Integer has fewer diff --git a/replication/static/src/main/java/io/deephaven/replicators/ReplicateToPage.java b/replication/static/src/main/java/io/deephaven/replicators/ReplicateToPage.java index bd76f91cf1a..42e4cf0f6cd 100644 --- a/replication/static/src/main/java/io/deephaven/replicators/ReplicateToPage.java +++ b/replication/static/src/main/java/io/deephaven/replicators/ReplicateToPage.java @@ -4,8 +4,10 @@ package io.deephaven.replicators; import java.io.IOException; +import java.util.Map; -import static io.deephaven.replication.ReplicatePrimitiveCode.intToAllButBooleanAndLong; +import static io.deephaven.replication.ReplicatePrimitiveCode.charToByte; +import static io.deephaven.replication.ReplicatePrimitiveCode.charToFloat; import static io.deephaven.replication.ReplicatePrimitiveCode.replaceAll; /** @@ -18,12 +20,12 @@ public class ReplicateToPage { private static final String TO_PAGE_DIR = "extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/"; - private static final String TO_INT_PAGE_PATH = TO_PAGE_DIR + "ToIntPage.java"; + private static final String TO_CHAR_PAGE_PATH = TO_PAGE_DIR + "ToCharPage.java"; private static final String TO_LOCAL_DATE_TIME_PAGE_PATH = TO_PAGE_DIR + "ToLocalDateTimePage.java"; private static final String TO_BIG_INTEGER_PAGE_PATH = TO_PAGE_DIR + "ToBigIntegerPage.java"; public static void main(String... args) throws IOException { - intToAllButBooleanAndLong(TASK, TO_INT_PAGE_PATH, "interface"); + charToFloat(TASK, TO_CHAR_PAGE_PATH, null, "interface"); // LocalDateTime -> LocalTime String[][] pairs = new String[][] { From 9ca4332bf10d143aa0dfe525353e2f94bc347709 Mon Sep 17 00:00:00 2001 From: Shivam Malhotra Date: Thu, 15 Aug 2024 17:13:22 -0500 Subject: [PATCH 25/43] fix: For failure in reading dictionary encoded parquet strings (#5945) --- .../parquet/base/ColumnChunkReaderImpl.java | 4 +--- .../parquet/table/S3ParquetRemoteTest.java | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ColumnChunkReaderImpl.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ColumnChunkReaderImpl.java index 042a0b5c72d..47126bc595a 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ColumnChunkReaderImpl.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ColumnChunkReaderImpl.java @@ -267,9 +267,7 @@ public ColumnPageReader next(@NotNull final SeekableChannelContext channelContex final long dataOffset = ch.position(); nextHeaderOffset = dataOffset + pageHeader.getCompressed_page_size(); final PageType pageType = pageHeader.type; - if (pageType == PageType.DICTIONARY_PAGE && headerOffset == columnChunk.meta_data.getData_page_offset() - && columnChunk.meta_data.getDictionary_page_offset() == 0) { - // https://stackoverflow.com/questions/55225108/why-is-dictionary-page-offset-0-for-plain-dictionary-encoding + if (pageType == PageType.DICTIONARY_PAGE) { // Skip the dictionary page and jump to the data page return next(holder.get()); } diff --git a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/S3ParquetRemoteTest.java b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/S3ParquetRemoteTest.java index 6d6f1e2803b..53106574553 100644 --- a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/S3ParquetRemoteTest.java +++ b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/S3ParquetRemoteTest.java @@ -86,6 +86,20 @@ public void readSampleParquetFilesFromPublicS3Part2() { .head(10).select(); } + @Test + public void readSampleParquetFilesFromPublicS3Part3() { + Assume.assumeTrue("Skipping test because s3 testing disabled.", ENABLE_REMOTE_S3_TESTING); + final S3Instructions s3Instructions = S3Instructions.builder() + .regionName("us-east-1") + .readTimeout(Duration.ofSeconds(60)) + .credentials(Credentials.anonymous()) + .build(); + final ParquetInstructions readInstructions = new ParquetInstructions.Builder() + .setSpecialInstructions(s3Instructions) + .build(); + readTable("s3://redshift-downloads/redset/serverless/full.parquet", readInstructions).head(10).select(); + } + @Test public void readKeyValuePartitionedParquetFromPublicS3() { Assume.assumeTrue("Skipping test because s3 testing disabled.", ENABLE_REMOTE_S3_TESTING); From 07786ef4e0d53afe74e2a7f27cbd5c3bb331416f Mon Sep 17 00:00:00 2001 From: Devin Smith Date: Thu, 15 Aug 2024 16:04:39 -0700 Subject: [PATCH 26/43] feat!: Add TableDefinition wrapper for python (#5892) * `deephaven.table.TableDefinition`: new python wrapper for `io.deephaven.engine.table.TableDefinition` * `deephaven.column.ColumnDefinition`: new python wrapper for `io.deephaven.engine.table.ColumnDefinition` * `deephaven.table.TableDefinitionLike`: new type alias to allow for consistent public APIs and conversions into `deephaven.table.TableDefinition` * `deephaven.column.Column`: deprecated for removal * `deephaven.jcompat.j_table_definition`: deprecated for removal * `deephaven.stream.kafka.consumer.json_spec`: `cols_defs` specified as `List[Tuple[str, DType]]` deprecated for removal Fixes #4822 BREAKING CHANGE: `deephaven.column.InputColumn` no longer inherits from `deephaven.column.Column`; as such, it no longer exposes Column's attributes. This is unlikely to affect users as InputColumn is really a structure meant to support `new_table`. --- py/server/deephaven/_table_reader.py | 20 +- py/server/deephaven/column.py | 184 +++++++++--- py/server/deephaven/experimental/iceberg.py | 13 +- py/server/deephaven/jcompat.py | 46 +-- py/server/deephaven/learn/__init__.py | 4 +- py/server/deephaven/numpy.py | 18 +- py/server/deephaven/pandas.py | 14 +- py/server/deephaven/parquet.py | 18 +- py/server/deephaven/stream/kafka/consumer.py | 28 +- py/server/deephaven/stream/table_publisher.py | 12 +- py/server/deephaven/table.py | 256 ++++++++++++----- py/server/deephaven/table_factory.py | 13 +- py/server/tests/test_barrage.py | 4 +- py/server/tests/test_column.py | 40 ++- py/server/tests/test_csv.py | 9 +- py/server/tests/test_data_index.py | 2 +- py/server/tests/test_dbc.py | 10 +- py/server/tests/test_experiments.py | 8 +- py/server/tests/test_iceberg.py | 12 +- py/server/tests/test_numpy.py | 26 +- py/server/tests/test_parquet.py | 20 +- py/server/tests/test_partitioned_table.py | 9 +- py/server/tests/test_pt_proxy.py | 18 +- py/server/tests/test_table.py | 58 ++-- py/server/tests/test_table_definition.py | 271 ++++++++++++++++++ py/server/tests/test_table_factory.py | 36 ++- py/server/tests/test_table_iterator.py | 24 +- py/server/tests/test_table_listener.py | 5 +- py/server/tests/test_updateby.py | 12 +- py/server/tests/test_vectorization.py | 2 +- 30 files changed, 854 insertions(+), 338 deletions(-) create mode 100644 py/server/tests/test_table_definition.py diff --git a/py/server/deephaven/_table_reader.py b/py/server/deephaven/_table_reader.py index 4c9265745c5..49fa82c9772 100644 --- a/py/server/deephaven/_table_reader.py +++ b/py/server/deephaven/_table_reader.py @@ -9,7 +9,7 @@ import numpy as np from deephaven import update_graph -from deephaven.column import Column +from deephaven.column import ColumnDefinition from deephaven.jcompat import to_sequence from deephaven.numpy import _column_to_numpy_array from deephaven.table import Table @@ -18,7 +18,7 @@ T = TypeVar('T') -def _col_defs(table: Table, cols: Union[str, Sequence[str]]) -> Sequence[Column]: +def _col_defs(table: Table, cols: Union[str, Sequence[str]]) -> Sequence[ColumnDefinition]: if not cols: col_defs = table.columns else: @@ -31,7 +31,7 @@ def _col_defs(table: Table, cols: Union[str, Sequence[str]]) -> Sequence[Column] def _table_reader_all(table: Table, cols: Optional[Union[str, Sequence[str]]] = None, *, - emitter: Callable[[Sequence[Column], jpy.JType], T], row_set: jpy.JType, + emitter: Callable[[Sequence[ColumnDefinition], jpy.JType], T], row_set: jpy.JType, prev: bool = False) -> T: """ Reads all the rows in the given row set of a table. The emitter converts the Java data into a desired Python object. @@ -103,7 +103,7 @@ def _table_reader_all_dict(table: Table, cols: Optional[Union[str, Sequence[str] def _table_reader_chunk(table: Table, cols: Optional[Union[str, Sequence[str]]] = None, *, - emitter: Callable[[Sequence[Column], jpy.JType], Iterable[T]], row_set: jpy.JType, + emitter: Callable[[Sequence[ColumnDefinition], jpy.JType], Iterable[T]], row_set: jpy.JType, chunk_size: int = 2048, prev: bool = False) \ -> Generator[T, None, None]: """ Returns a generator that reads one chunk of rows at a time from the table. The emitter converts the Java chunk @@ -178,7 +178,7 @@ def _table_reader_chunk_dict(table: Table, cols: Optional[Union[str, Sequence[st Raises: ValueError """ - def _emitter(col_defs: Sequence[Column], j_array: jpy.JType) -> Generator[Dict[str, np.ndarray], None, None]: + def _emitter(col_defs: Sequence[ColumnDefinition], j_array: jpy.JType) -> Generator[Dict[str, np.ndarray], None, None]: yield {col_def.name: _column_to_numpy_array(col_def, j_array[i]) for i, col_def in enumerate(col_defs)} return _table_reader_chunk(table, cols, emitter=_emitter, row_set=row_set, chunk_size=chunk_size, prev=prev) @@ -210,9 +210,9 @@ def _table_reader_chunk_tuple(table: Table, cols: Optional[Union[str, Sequence[s Raises: ValueError """ - named_tuple_class = namedtuple(tuple_name, cols or [col.name for col in table.columns], rename=False) + named_tuple_class = namedtuple(tuple_name, cols or table.column_names, rename=False) - def _emitter(col_defs: Sequence[Column], j_array: jpy.JType) -> Generator[Tuple[np.ndarray], None, None]: + def _emitter(col_defs: Sequence[ColumnDefinition], j_array: jpy.JType) -> Generator[Tuple[np.ndarray], None, None]: yield named_tuple_class._make([_column_to_numpy_array(col_def, j_array[i]) for i, col_def in enumerate(col_defs)]) return _table_reader_chunk(table, cols, emitter=_emitter, row_set=table.j_table.getRowSet(), chunk_size=chunk_size, prev=False) @@ -242,7 +242,7 @@ def _table_reader_row_dict(table: Table, cols: Optional[Union[str, Sequence[str] Raises: ValueError """ - def _emitter(col_defs: Sequence[Column], j_array: jpy.JType) -> Iterable[Dict[str, Any]]: + def _emitter(col_defs: Sequence[ColumnDefinition], j_array: jpy.JType) -> Iterable[Dict[str, Any]]: make_dict = lambda values: {col_def.name: value for col_def, value in zip(col_defs, values)} mvs = [memoryview(j_array[i]) if col_def.data_type.is_primitive else j_array[i] for i, col_def in enumerate(col_defs)] return map(make_dict, zip(*mvs)) @@ -275,9 +275,9 @@ def _table_reader_row_tuple(table: Table, cols: Optional[Union[str, Sequence[str Raises: ValueError """ - named_tuple_class = namedtuple(tuple_name, cols or [col.name for col in table.columns], rename=False) + named_tuple_class = namedtuple(tuple_name, cols or table.column_names, rename=False) - def _emitter(col_defs: Sequence[Column], j_array: jpy.JType) -> Iterable[Tuple[Any, ...]]: + def _emitter(col_defs: Sequence[ColumnDefinition], j_array: jpy.JType) -> Iterable[Tuple[Any, ...]]: mvs = [memoryview(j_array[i]) if col_def.data_type.is_primitive else j_array[i] for i, col_def in enumerate(col_defs)] return map(named_tuple_class._make, zip(*mvs)) diff --git a/py/server/deephaven/column.py b/py/server/deephaven/column.py index bbb5f5008b9..88839233ff0 100644 --- a/py/server/deephaven/column.py +++ b/py/server/deephaven/column.py @@ -4,16 +4,17 @@ """ This module implements the Column class and functions that work with Columns. """ -from dataclasses import dataclass, field from enum import Enum -from typing import Sequence, Any +from functools import cached_property +from typing import Sequence, Any, Optional +from warnings import warn import jpy import deephaven.dtypes as dtypes from deephaven import DHError -from deephaven.dtypes import DType -from deephaven.dtypes import _instant_array +from deephaven.dtypes import DType, _instant_array, from_jtype +from deephaven._wrapper import JObjectWrapper _JColumnHeader = jpy.get_type("io.deephaven.qst.column.header.ColumnHeader") _JColumn = jpy.get_type("io.deephaven.qst.column.Column") @@ -32,46 +33,151 @@ def __repr__(self): return self.name -@dataclass -class Column: - """ A Column object represents a column definition in a Deephaven Table. """ - name: str - data_type: DType - component_type: DType = None - column_type: ColumnType = ColumnType.NORMAL +class ColumnDefinition(JObjectWrapper): + """A Deephaven column definition.""" - @property - def j_column_header(self): - return _JColumnHeader.of(self.name, self.data_type.qst_type) + j_object_type = _JColumnDefinition + + def __init__(self, j_column_definition: jpy.JType): + self.j_column_definition = j_column_definition @property - def j_column_definition(self): - if hasattr(self.data_type.j_type, 'jclass'): - j_data_type = self.data_type.j_type.jclass - else: - j_data_type = self.data_type.qst_type.clazz() - j_component_type = self.component_type.qst_type.clazz() if self.component_type else None - j_column_type = self.column_type.value - return _JColumnDefinition.fromGenericType(self.name, j_data_type, j_component_type, j_column_type) - - -@dataclass -class InputColumn(Column): - """ An InputColumn represents a user defined column with some input data. """ - input_data: Any = field(default=None) - - def __post_init__(self): + def j_object(self) -> jpy.JType: + return self.j_column_definition + + @cached_property + def name(self) -> str: + """The column name.""" + return self.j_column_definition.getName() + + @cached_property + def data_type(self) -> DType: + """The column data type.""" + return from_jtype(self.j_column_definition.getDataType()) + + @cached_property + def component_type(self) -> Optional[DType]: + """The column component type.""" + return from_jtype(self.j_column_definition.getComponentType()) + + @cached_property + def column_type(self) -> ColumnType: + """The column type.""" + return ColumnType(self.j_column_definition.getColumnType()) + + +class Column(ColumnDefinition): + """A Column object represents a column definition in a Deephaven Table. Deprecated for removal next release, prefer col_def.""" + + def __init__( + self, + name: str, + data_type: DType, + component_type: DType = None, + column_type: ColumnType = ColumnType.NORMAL, + ): + """Deprecated for removal next release, prefer col_def.""" + warn( + "Column is deprecated for removal next release, prefer col_def", + DeprecationWarning, + stacklevel=2, + ) + super().__init__( + col_def(name, data_type, component_type, column_type).j_column_definition + ) + + +class InputColumn: + """An InputColumn represents a user defined column with some input data.""" + + def __init__( + self, + name: str = None, + data_type: DType = None, + component_type: DType = None, + column_type: ColumnType = ColumnType.NORMAL, + input_data: Any = None, + ): + """Creates an InputColumn. + Args: + name (str): the column name + data_type (DType): the column data type + component_type (Optional[DType]): the column component type, None by default + column_type (ColumnType): the column type, NORMAL by default + input_data: Any: the input data, by default is None + + Returns: + a new InputColumn + + Raises: + DHError + """ try: - if self.input_data is None: - self.j_column = _JColumn.empty(self.j_column_header) - else: - if self.data_type.is_primitive: - self.j_column = _JColumn.ofUnsafe(self.name, dtypes.array(self.data_type, self.input_data, - remap=dtypes.null_remap(self.data_type))) - else: - self.j_column = _JColumn.of(self.j_column_header, dtypes.array(self.data_type, self.input_data)) + self._column_definition = col_def( + name, data_type, component_type, column_type + ) + self.j_column = self._to_j_column(input_data) except Exception as e: - raise DHError(e, f"failed to create an InputColumn ({self.name}).") from e + raise DHError(e, f"failed to create an InputColumn ({name}).") from e + + def _to_j_column(self, input_data: Any = None) -> jpy.JType: + if input_data is None: + return _JColumn.empty( + _JColumnHeader.of( + self._column_definition.name, + self._column_definition.data_type.qst_type, + ) + ) + if self._column_definition.data_type.is_primitive: + return _JColumn.ofUnsafe( + self._column_definition.name, + dtypes.array( + self._column_definition.data_type, + input_data, + remap=dtypes.null_remap(self._column_definition.data_type), + ), + ) + return _JColumn.of( + _JColumnHeader.of( + self._column_definition.name, self._column_definition.data_type.qst_type + ), + dtypes.array(self._column_definition.data_type, input_data), + ) + + +def col_def( + name: str, + data_type: DType, + component_type: Optional[DType] = None, + column_type: ColumnType = ColumnType.NORMAL, +) -> ColumnDefinition: + """Creates a ColumnDefinition. + + Args: + name (str): the column name + data_type (DType): the column data type + component_type (Optional[DType]): the column component type, None by default + column_type (ColumnType): the column type, ColumnType.NORMAL by default + + Returns: + a new ColumnDefinition + + Raises: + DHError + """ + try: + return ColumnDefinition( + _JColumnDefinition.fromGenericType( + name, + data_type.j_type.jclass + if hasattr(data_type.j_type, "jclass") + else data_type.qst_type.clazz(), + component_type.qst_type.clazz() if component_type else None, + column_type.value, + ) + ) + except Exception as e: + raise DHError(e, f"failed to create a ColumnDefinition ({name}).") from e def bool_col(name: str, data: Sequence) -> InputColumn: diff --git a/py/server/deephaven/experimental/iceberg.py b/py/server/deephaven/experimental/iceberg.py index 7506bc95a25..0a99d3f1880 100644 --- a/py/server/deephaven/experimental/iceberg.py +++ b/py/server/deephaven/experimental/iceberg.py @@ -8,13 +8,8 @@ from deephaven import DHError from deephaven._wrapper import JObjectWrapper -from deephaven.column import Column -from deephaven.dtypes import DType from deephaven.experimental import s3 - -from deephaven.jcompat import j_table_definition - -from deephaven.table import Table +from deephaven.table import Table, TableDefinition, TableDefinitionLike _JIcebergInstructions = jpy.get_type("io.deephaven.iceberg.util.IcebergInstructions") _JIcebergCatalogAdapter = jpy.get_type("io.deephaven.iceberg.util.IcebergCatalogAdapter") @@ -39,14 +34,14 @@ class IcebergInstructions(JObjectWrapper): j_object_type = _JIcebergInstructions def __init__(self, - table_definition: Optional[Union[Dict[str, DType], List[Column]]] = None, + table_definition: Optional[TableDefinitionLike] = None, data_instructions: Optional[s3.S3Instructions] = None, column_renames: Optional[Dict[str, str]] = None): """ Initializes the instructions using the provided parameters. Args: - table_definition (Optional[Union[Dict[str, DType], List[Column], None]]): the table definition; if omitted, + table_definition (Optional[TableDefinitionLike]): the table definition; if omitted, the definition is inferred from the Iceberg schema. Setting a definition guarantees the returned table will have that definition. This is useful for specifying a subset of the Iceberg schema columns. data_instructions (Optional[s3.S3Instructions]): Special instructions for reading data files, useful when @@ -62,7 +57,7 @@ def __init__(self, builder = self.j_object_type.builder() if table_definition is not None: - builder.tableDefinition(j_table_definition(table_definition)) + builder.tableDefinition(TableDefinition(table_definition).j_table_definition) if data_instructions is not None: builder.dataInstructions(data_instructions.j_object) diff --git a/py/server/deephaven/jcompat.py b/py/server/deephaven/jcompat.py index d807cb472f3..98e6a04565e 100644 --- a/py/server/deephaven/jcompat.py +++ b/py/server/deephaven/jcompat.py @@ -5,7 +5,8 @@ """ This module provides Java compatibility support including convenience functions to create some widely used Java data structures from corresponding Python ones in order to be able to call Java methods. """ -from typing import Any, Callable, Dict, Iterable, List, Sequence, Set, TypeVar, Union, Optional +from typing import Any, Callable, Dict, Iterable, List, Sequence, Set, TypeVar, Union, Optional, Mapping +from warnings import warn import jpy import numpy as np @@ -14,7 +15,7 @@ from deephaven import dtypes, DHError from deephaven._wrapper import unwrap, wrap_j_object, JObjectWrapper from deephaven.dtypes import DType, _PRIMITIVE_DTYPE_NULL_MAP -from deephaven.column import Column +from deephaven.column import ColumnDefinition _NULL_BOOLEAN_AS_BYTE = jpy.get_type("io.deephaven.util.BooleanUtils").NULL_BOOLEAN_AS_BYTE _JPrimitiveArrayConversionUtility = jpy.get_type("io.deephaven.integrations.common.PrimitiveArrayConversionUtility") @@ -327,11 +328,20 @@ def _j_array_to_series(dtype: DType, j_array: jpy.JType, conv_null: bool) -> pd. return s -def j_table_definition(table_definition: Union[Dict[str, DType], List[Column], None]) -> Optional[jpy.JType]: - """Produce a Deephaven TableDefinition from user input. +# Note: unable to import TableDefinitionLike due to circular ref (table.py -> agg.py -> jcompat.py) +def j_table_definition( + table_definition: Union[ + "TableDefinition", + Mapping[str, dtypes.DType], + Iterable[ColumnDefinition], + jpy.JType, + None, + ], +) -> Optional[jpy.JType]: + """Deprecated for removal next release, prefer TableDefinition. Produce a Deephaven TableDefinition from user input. Args: - table_definition (Union[Dict[str, DType], List[Column], None]): the table definition as a dictionary of column + table_definition (Optional[TableDefinitionLike]): the table definition as a dictionary of column names and their corresponding data types or a list of Column objects Returns: @@ -340,22 +350,18 @@ def j_table_definition(table_definition: Union[Dict[str, DType], List[Column], N Raises: DHError """ - if table_definition is None: - return None - elif isinstance(table_definition, Dict): - return _JTableDefinition.of( - [ - Column(name=name, data_type=dtype).j_column_definition - for name, dtype in table_definition.items() - ] - ) - elif isinstance(table_definition, List): - return _JTableDefinition.of( - [col.j_column_definition for col in table_definition] - ) - else: - raise DHError(f"Unexpected table_definition type: {type(table_definition)}") + warn( + "j_table_definition is deprecated for removal next release, prefer TableDefinition", + DeprecationWarning, + stacklevel=2, + ) + from deephaven.table import TableDefinition + return ( + TableDefinition(table_definition).j_table_definition + if table_definition + else None + ) class AutoCloseable(JObjectWrapper): """A context manager wrapper to allow Java AutoCloseable to be used in with statements. diff --git a/py/server/deephaven/learn/__init__.py b/py/server/deephaven/learn/__init__.py index 54261ad1d21..45439cb6bf0 100644 --- a/py/server/deephaven/learn/__init__.py +++ b/py/server/deephaven/learn/__init__.py @@ -71,7 +71,7 @@ def _validate(inputs: Input, outputs: Output, table: Table): input_columns_list = [input_.input.getColNames()[i] for input_ in inputs for i in range(len(input_.input.getColNames()))] input_columns = set(input_columns_list) - table_columns = {col.name for col in table.columns} + table_columns = set(table.definition.keys()) if table_columns >= input_columns: if outputs is not None: output_columns_list = [output.output.getColName() for output in outputs] @@ -99,7 +99,7 @@ def _create_non_conflicting_col_name(table: Table, base_col_name: str) -> str: Returns: column name that is not present in the table. """ - table_col_names = set([col.name for col in table.columns]) + table_col_names = set(table.definition.keys()) if base_col_name not in table_col_names: return base_col_name else: diff --git a/py/server/deephaven/numpy.py b/py/server/deephaven/numpy.py index c87f24ea40c..33dcef3bc14 100644 --- a/py/server/deephaven/numpy.py +++ b/py/server/deephaven/numpy.py @@ -8,10 +8,10 @@ import jpy import numpy as np -from deephaven.dtypes import DType, BusinessCalendar +from deephaven.dtypes import BusinessCalendar from deephaven import DHError, dtypes, new_table -from deephaven.column import Column, InputColumn +from deephaven.column import InputColumn, ColumnDefinition from deephaven.dtypes import DType from deephaven.jcompat import _j_array_to_numpy_array from deephaven.table import Table @@ -27,11 +27,11 @@ def _to_column_name(name: str) -> str: return re.sub(r"\s+", "_", tmp_name) -def _column_to_numpy_array(col_def: Column, j_array: jpy.JType) -> np.ndarray: +def _column_to_numpy_array(col_def: ColumnDefinition, j_array: jpy.JType) -> np.ndarray: """ Produces a numpy array from the given Java array and the Table column definition. Args: - col_def (Column): the column definition + col_def (ColumnDefinition): the column definition j_array (jpy.JType): the Java array Returns: @@ -48,7 +48,7 @@ def _column_to_numpy_array(col_def: Column, j_array: jpy.JType) -> np.ndarray: raise DHError(e, f"failed to create a numpy array for the column {col_def.name}") from e -def _columns_to_2d_numpy_array(col_def: Column, j_arrays: List[jpy.JType]) -> np.ndarray: +def _columns_to_2d_numpy_array(col_def: ColumnDefinition, j_arrays: List[jpy.JType]) -> np.ndarray: """ Produces a 2d numpy array from the given Java arrays of the same component type and the Table column definition """ try: @@ -95,15 +95,15 @@ def to_numpy(table: Table, cols: List[str] = None) -> np.ndarray: if table.is_refreshing: table = table.snapshot() - col_def_dict = {col.name: col for col in table.columns} + table_def = table.definition if not cols: - cols = list(col_def_dict.keys()) + cols = list(table_def.keys()) else: - diff_set = set(cols) - set(col_def_dict.keys()) + diff_set = set(cols) - set(table_def.keys()) if diff_set: raise DHError(message=f"columns - {list(diff_set)} not found") - col_defs = [col_def_dict[col] for col in cols] + col_defs = [table_def[col] for col in cols] if len(set([col_def.data_type for col_def in col_defs])) != 1: raise DHError(message="columns must be of the same data type.") diff --git a/py/server/deephaven/pandas.py b/py/server/deephaven/pandas.py index d946de5e391..8a2be32c53a 100644 --- a/py/server/deephaven/pandas.py +++ b/py/server/deephaven/pandas.py @@ -11,7 +11,7 @@ import pyarrow as pa from deephaven import DHError, new_table, dtypes, arrow -from deephaven.column import Column +from deephaven.column import ColumnDefinition from deephaven.constants import NULL_BYTE, NULL_SHORT, NULL_INT, NULL_LONG, NULL_FLOAT, NULL_DOUBLE, NULL_CHAR from deephaven.jcompat import _j_array_to_series from deephaven.numpy import _make_input_column @@ -22,12 +22,12 @@ _is_dtype_backend_supported = pd.__version__ >= "2.0.0" -def _column_to_series(table: Table, col_def: Column, conv_null: bool) -> pd.Series: +def _column_to_series(table: Table, col_def: ColumnDefinition, conv_null: bool) -> pd.Series: """Produce a copy of the specified column as a pandas.Series object. Args: table (Table): the table - col_def (Column): the column definition + col_def (ColumnDefinition): the column definition conv_null (bool): whether to check for Deephaven nulls in the data and automatically replace them with pd.NA. @@ -133,17 +133,17 @@ def to_pandas(table: Table, cols: List[str] = None, if table.is_refreshing: table = table.snapshot() - col_def_dict = {col.name: col for col in table.columns} + table_def = table.definition if not cols: - cols = list(col_def_dict.keys()) + cols = list(table_def.keys()) else: - diff_set = set(cols) - set(col_def_dict.keys()) + diff_set = set(cols) - set(table_def.keys()) if diff_set: raise DHError(message=f"columns - {list(diff_set)} not found") data = {} for col in cols: - series = _column_to_series(table, col_def_dict[col], conv_null) + series = _column_to_series(table, table_def[col], conv_null) data[col] = series return pd.DataFrame(data=data, columns=cols, copy=False) diff --git a/py/server/deephaven/parquet.py b/py/server/deephaven/parquet.py index 61614c37061..0e7de6af920 100644 --- a/py/server/deephaven/parquet.py +++ b/py/server/deephaven/parquet.py @@ -11,10 +11,8 @@ import jpy from deephaven import DHError -from deephaven.column import Column -from deephaven.dtypes import DType -from deephaven.jcompat import j_array_list, j_table_definition -from deephaven.table import Table, PartitionedTable +from deephaven.jcompat import j_array_list +from deephaven.table import Table, TableDefinition, TableDefinitionLike, PartitionedTable from deephaven.experimental import s3 _JParquetTools = jpy.get_type("io.deephaven.parquet.table.ParquetTools") @@ -69,7 +67,7 @@ def _build_parquet_instructions( generate_metadata_files: Optional[bool] = None, base_name: Optional[str] = None, file_layout: Optional[ParquetFileLayout] = None, - table_definition: Optional[Union[Dict[str, DType], List[Column]]] = None, + table_definition: Optional[TableDefinitionLike] = None, index_columns: Optional[Sequence[Sequence[str]]] = None, special_instructions: Optional[s3.S3Instructions] = None, ): @@ -135,7 +133,7 @@ def _build_parquet_instructions( builder.setFileLayout(_j_file_layout(file_layout)) if table_definition is not None: - builder.setTableDefinition(j_table_definition(table_definition)) + builder.setTableDefinition(TableDefinition(table_definition).j_table_definition) if index_columns: builder.addAllIndexColumns(_j_list_of_list_of_string(index_columns)) @@ -166,7 +164,7 @@ def read( is_legacy_parquet: bool = False, is_refreshing: bool = False, file_layout: Optional[ParquetFileLayout] = None, - table_definition: Union[Dict[str, DType], List[Column], None] = None, + table_definition: Optional[TableDefinitionLike] = None, special_instructions: Optional[s3.S3Instructions] = None, ) -> Table: """ Reads in a table from a single parquet, metadata file, or directory with recognized layout. @@ -235,7 +233,7 @@ def delete(path: str) -> None: def write( table: Table, path: str, - table_definition: Optional[Union[Dict[str, DType], List[Column]]] = None, + table_definition: Optional[TableDefinitionLike] = None, col_instructions: Optional[List[ColumnInstruction]] = None, compression_codec_name: Optional[str] = None, max_dictionary_keys: Optional[int] = None, @@ -302,7 +300,7 @@ def write( def write_partitioned( table: Union[Table, PartitionedTable], destination_dir: str, - table_definition: Optional[Union[Dict[str, DType], List[Column]]] = None, + table_definition: Optional[TableDefinitionLike] = None, col_instructions: Optional[List[ColumnInstruction]] = None, compression_codec_name: Optional[str] = None, max_dictionary_keys: Optional[int] = None, @@ -388,7 +386,7 @@ def write_partitioned( def batch_write( tables: List[Table], paths: List[str], - table_definition: Optional[Union[Dict[str, DType], List[Column]]] = None, + table_definition: Optional[TableDefinitionLike] = None, col_instructions: Optional[List[ColumnInstruction]] = None, compression_codec_name: Optional[str] = None, max_dictionary_keys: Optional[int] = None, diff --git a/py/server/deephaven/stream/kafka/consumer.py b/py/server/deephaven/stream/kafka/consumer.py index 01d33e7463b..abfe1a4f818 100644 --- a/py/server/deephaven/stream/kafka/consumer.py +++ b/py/server/deephaven/stream/kafka/consumer.py @@ -9,11 +9,11 @@ from deephaven import dtypes from deephaven._wrapper import JObjectWrapper -from deephaven.column import Column +from deephaven.column import col_def from deephaven.dherror import DHError from deephaven.dtypes import DType from deephaven.jcompat import j_hashmap, j_properties, j_array_list -from deephaven.table import Table, PartitionedTable +from deephaven.table import Table, TableDefinition, TableDefinitionLike, PartitionedTable _JKafkaTools = jpy.get_type("io.deephaven.kafka.KafkaTools") _JKafkaTools_Consume = jpy.get_type("io.deephaven.kafka.KafkaTools$Consume") @@ -427,13 +427,13 @@ def avro_spec( raise DHError(e, "failed to create a Kafka key/value spec") from e -def json_spec(col_defs: Union[Dict[str, DType], List[Tuple[str, DType]]], mapping: Dict = None) -> KeyValueSpec: +def json_spec(col_defs: Union[TableDefinitionLike, List[Tuple[str, DType]]], mapping: Dict = None) -> KeyValueSpec: """Creates a spec for how to use JSON data when consuming a Kafka stream to a Deephaven table. Args: - col_defs (Union[Dict[str, DType], List[Tuple[str, DType]]): the column definitions, either a map of column - names and Deephaven types, or a list of tuples with two elements, a string for column name and a Deephaven - type for column data type. + col_defs (Union[TableDefinitionLike, List[Tuple[str, DType]]): the table definition, preferably specified as + TableDefinitionLike. A list of tuples with two elements, a string for column name and a Deephaven type for + column data type also works, but is deprecated for removal. mapping (Dict): a dict mapping JSON fields to column names defined in the col_defs argument. Fields starting with a '/' character are interpreted as a JSON Pointer (see RFC 6901, ISSN: 2070-1721 for details, essentially nested fields are represented like "/parent/nested"). @@ -448,10 +448,20 @@ def json_spec(col_defs: Union[Dict[str, DType], List[Tuple[str, DType]]], mappin DHError """ try: - if isinstance(col_defs, dict): - col_defs = [Column(k, v).j_column_definition for k, v in col_defs.items()] + try: + table_def = TableDefinition(col_defs) + except DHError: + table_def = None + + if table_def: + col_defs = [col.j_column_definition for col in table_def.values()] else: - col_defs = [Column(*t).j_column_definition for t in col_defs] + warn( + 'json_spec col_defs for List[Tuple[str, DType]] is deprecated for removal, prefer TableDefinitionLike', + DeprecationWarning, + stacklevel=2, + ) + col_defs = [col_def(*t).j_column_definition for t in col_defs] if mapping is None: return KeyValueSpec(j_spec=_JKafkaTools_Consume.jsonSpec(col_defs)) diff --git a/py/server/deephaven/stream/table_publisher.py b/py/server/deephaven/stream/table_publisher.py index a6c65f47885..57ead700e79 100644 --- a/py/server/deephaven/stream/table_publisher.py +++ b/py/server/deephaven/stream/table_publisher.py @@ -8,11 +8,9 @@ from typing import Callable, Dict, Optional, Tuple, Union, List from deephaven._wrapper import JObjectWrapper -from deephaven.column import Column -from deephaven.dtypes import DType from deephaven.execution_context import get_exec_ctx -from deephaven.jcompat import j_lambda, j_runnable, j_table_definition -from deephaven.table import Table +from deephaven.jcompat import j_lambda, j_runnable +from deephaven.table import Table, TableDefinition, TableDefinitionLike from deephaven.update_graph import UpdateGraph _JTableDefinition = jpy.get_type("io.deephaven.engine.table.TableDefinition") @@ -75,7 +73,7 @@ def publish_failure(self, failure: Exception) -> None: def table_publisher( name: str, - col_defs: Union[Dict[str, DType], List[Column]], + col_defs: TableDefinitionLike, on_flush_callback: Optional[Callable[[TablePublisher], None]] = None, on_shutdown_callback: Optional[Callable[[], None]] = None, update_graph: Optional[UpdateGraph] = None, @@ -85,7 +83,7 @@ def table_publisher( Args: name (str): the name, used for logging - col_defs (Dict[str, DType]): the column definitions for the resulting blink table + col_defs (TableDefinitionLike): the table definition for the resulting blink table on_flush_callback (Optional[Callable[[TablePublisher], None]]): the on-flush callback, if present, is called once at the beginning of each update graph cycle. This is a pattern that allows publishers to add any data they may have been batching. Do note though, this blocks the update cycle from proceeding, so @@ -107,7 +105,7 @@ def adapt_callback(_table_publisher: jpy.JType): j_table_publisher = _JTablePublisher.of( name, - j_table_definition(col_defs), + TableDefinition(col_defs).j_table_definition, j_lambda(adapt_callback, _JConsumer, None) if on_flush_callback else None, j_runnable(on_shutdown_callback) if on_shutdown_callback else None, (update_graph or get_exec_ctx().update_graph).j_update_graph, diff --git a/py/server/deephaven/table.py b/py/server/deephaven/table.py index 26557db18a1..abb22e1031c 100644 --- a/py/server/deephaven/table.py +++ b/py/server/deephaven/table.py @@ -11,11 +11,13 @@ import inspect from enum import Enum from enum import auto +from functools import cached_property from typing import Any, Optional, Callable, Dict, Generator, Tuple, Literal -from typing import Sequence, List, Union, Protocol +from typing import Sequence, List, Union, Protocol, Mapping, Iterable import jpy import numpy as np +import sys from deephaven import DHError from deephaven import dtypes @@ -23,7 +25,7 @@ from deephaven._wrapper import JObjectWrapper from deephaven._wrapper import unwrap from deephaven.agg import Aggregation -from deephaven.column import Column, ColumnType +from deephaven.column import col_def, ColumnDefinition from deephaven.filters import Filter, and_, or_ from deephaven.jcompat import j_unary_operator, j_binary_operator, j_map_to_dict, j_hashmap from deephaven.jcompat import to_sequence, j_array_list @@ -407,19 +409,133 @@ def _sort_column(col, dir_): _JColumnName.of(col))) -def _td_to_columns(table_definition): - cols = [] - j_cols = table_definition.getColumnsArray() - for j_col in j_cols: - cols.append( - Column( - name=j_col.getName(), - data_type=dtypes.from_jtype(j_col.getDataType()), - component_type=dtypes.from_jtype(j_col.getComponentType()), - column_type=ColumnType(j_col.getColumnType()), +if sys.version_info >= (3, 10): + from typing import TypeAlias # novermin + + TableDefinitionLike: TypeAlias = Union[ + "TableDefinition", + Mapping[str, dtypes.DType], + Iterable[ColumnDefinition], + jpy.JType, + ] + """A Union representing objects that can be coerced into a TableDefinition.""" +else: + TableDefinitionLike = Union[ + "TableDefinition", + Mapping[str, dtypes.DType], + Iterable[ColumnDefinition], + jpy.JType, + ] + """A Union representing objects that can be coerced into a TableDefinition.""" + + +class TableDefinition(JObjectWrapper, Mapping): + """A Deephaven table definition, as a mapping from column name to ColumnDefinition.""" + + j_object_type = _JTableDefinition + + @staticmethod + def _to_j_table_definition(table_definition: TableDefinitionLike) -> jpy.JType: + if isinstance(table_definition, TableDefinition): + return table_definition.j_table_definition + if isinstance(table_definition, _JTableDefinition): + return table_definition + if isinstance(table_definition, Mapping): + for name in table_definition.keys(): + if not isinstance(name, str): + raise DHError( + f"Expected TableDefinitionLike Mapping to contain str keys, found type {type(name)}" + ) + for data_type in table_definition.values(): + if not isinstance(data_type, dtypes.DType): + raise DHError( + f"Expected TableDefinitionLike Mapping to contain DType values, found type {type(data_type)}" + ) + column_definitions = [ + col_def(name, data_type) for name, data_type in table_definition.items() + ] + elif isinstance(table_definition, Iterable): + for column_definition in table_definition: + if not isinstance(column_definition, ColumnDefinition): + raise DHError( + f"Expected TableDefinitionLike Iterable to contain ColumnDefinition values, found type {type(column_definition)}" + ) + column_definitions = table_definition + else: + raise DHError( + f"Unexpected TableDefinitionLike type: {type(table_definition)}" ) + return _JTableDefinition.of( + [col.j_column_definition for col in column_definitions] ) - return cols + + def __init__(self, table_definition: TableDefinitionLike): + """Construct a TableDefinition. + + Args: + table_definition (TableDefinitionLike): The table definition like object + + Returns: + A new TableDefinition + + Raises: + DHError + """ + self.j_table_definition = TableDefinition._to_j_table_definition( + table_definition + ) + + @property + def j_object(self) -> jpy.JType: + return self.j_table_definition + + @property + def table(self) -> Table: + """This table definition as a table.""" + return Table(_JTableTools.metaTable(self.j_table_definition)) + + def keys(self): + """The column names as a dictview.""" + return self._dict.keys() + + def items(self): + """The column name, column definition tuples as a dictview.""" + return self._dict.items() + + def values(self): + """The column definitions as a dictview.""" + return self._dict.values() + + @cached_property + def _dict(self) -> Dict[str, ColumnDefinition]: + return { + col.name: col + for col in [ + ColumnDefinition(j_col) + for j_col in self.j_table_definition.getColumnsArray() + ] + } + + def __getitem__(self, key) -> ColumnDefinition: + return self._dict[key] + + def __iter__(self): + return iter(self._dict) + + def __len__(self): + return len(self._dict) + + def __contains__(self, item): + return item in self._dict + + def __eq__(self, other): + return JObjectWrapper.__eq__(self, other) + + def __ne__(self, other): + return JObjectWrapper.__ne__(self, other) + + def __hash__(self): + return JObjectWrapper.__hash__(self) class Table(JObjectWrapper): @@ -435,11 +551,7 @@ def __init__(self, j_table: jpy.JType): self.j_table = jpy.cast(j_table, self.j_object_type) if self.j_table is None: raise DHError("j_table type is not io.deephaven.engine.table.Table") - self._definition = self.j_table.getDefinition() - self._schema = None - self._is_refreshing = None - self._update_graph = None - self._is_flat = None + self._definition = TableDefinition(self.j_table.getDefinition()) def __repr__(self): default_repr = super().__repr__() @@ -465,37 +577,37 @@ def size(self) -> int: @property def is_refreshing(self) -> bool: """Whether this table is refreshing.""" - if self._is_refreshing is None: - self._is_refreshing = self.j_table.isRefreshing() - return self._is_refreshing + return self.j_table.isRefreshing() @property def is_blink(self) -> bool: """Whether this table is a blink table.""" return _JBlinkTableTools.isBlink(self.j_table) - @property + @cached_property def update_graph(self) -> UpdateGraph: """The update graph of the table.""" - if self._update_graph is None: - self._update_graph = UpdateGraph(self.j_table.getUpdateGraph()) - return self._update_graph + return UpdateGraph(self.j_table.getUpdateGraph()) @property def is_flat(self) -> bool: """Whether this table is guaranteed to be flat, i.e. its row set will be from 0 to number of rows - 1.""" - if self._is_flat is None: - self._is_flat = self.j_table.isFlat() - return self._is_flat + return self.j_table.isFlat() @property - def columns(self) -> List[Column]: - """The column definitions of the table.""" - if self._schema: - return self._schema + def definition(self) -> TableDefinition: + """The table definition.""" + return self._definition + + @property + def column_names(self) -> List[str]: + """The column names of the table.""" + return list(self.definition.keys()) - self._schema = _td_to_columns(self._definition) - return self._schema + @property + def columns(self) -> List[ColumnDefinition]: + """The column definitions of the table.""" + return list(self.definition.values()) @property def meta_table(self) -> Table: @@ -2338,13 +2450,8 @@ def j_object(self) -> jpy.JType: def __init__(self, j_partitioned_table): self.j_partitioned_table = j_partitioned_table - self._schema = None + self._definition = None self._table = None - self._key_columns = None - self._unique_keys = None - self._constituent_column = None - self._constituent_changes_permitted = None - self._is_refreshing = None @classmethod def from_partitioned_table(cls, @@ -2352,18 +2459,18 @@ def from_partitioned_table(cls, key_cols: Union[str, List[str]] = None, unique_keys: bool = None, constituent_column: str = None, - constituent_table_columns: List[Column] = None, + constituent_table_columns: Optional[TableDefinitionLike] = None, constituent_changes_permitted: bool = None) -> PartitionedTable: """Creates a PartitionedTable from the provided underlying partitioned Table. - Note: key_cols, unique_keys, constituent_column, constituent_table_columns, + Note: key_cols, unique_keys, constituent_column, constituent_table_definition, constituent_changes_permitted must either be all None or all have values. When they are None, their values will be inferred as follows: | * key_cols: the names of all columns with a non-Table data type | * unique_keys: False | * constituent_column: the name of the first column with a Table data type - | * constituent_table_columns: the column definitions of the first cell (constituent table) in the constituent + | * constituent_table_definition: the table definitions of the first cell (constituent table) in the constituent column. Consequently, the constituent column can't be empty. | * constituent_changes_permitted: the value of table.is_refreshing @@ -2373,7 +2480,7 @@ def from_partitioned_table(cls, key_cols (Union[str, List[str]]): the key column name(s) of 'table' unique_keys (bool): whether the keys in 'table' are guaranteed to be unique constituent_column (str): the constituent column name in 'table' - constituent_table_columns (List[Column]): the column definitions of the constituent table + constituent_table_columns (Optional[TableDefinitionLike]): the table definitions of the constituent table constituent_changes_permitted (bool): whether the values of the constituent column can change Returns: @@ -2390,7 +2497,7 @@ def from_partitioned_table(cls, return PartitionedTable(j_partitioned_table=_JPartitionedTableFactory.of(table.j_table)) if all([arg is not None for arg in none_args]): - table_def = _JTableDefinition.of([col.j_column_definition for col in constituent_table_columns]) + table_def = TableDefinition(constituent_table_columns).j_table_definition j_partitioned_table = _JPartitionedTableFactory.of(table.j_table, j_array_list(to_sequence(key_cols)), unique_keys, @@ -2407,18 +2514,18 @@ def from_partitioned_table(cls, @classmethod def from_constituent_tables(cls, tables: List[Table], - constituent_table_columns: List[Column] = None) -> PartitionedTable: + constituent_table_columns: Optional[TableDefinitionLike] = None) -> PartitionedTable: """Creates a PartitionedTable with a single column named '__CONSTITUENT__' containing the provided constituent tables. The result PartitionedTable has no key columns, and both its unique_keys and constituent_changes_permitted - properties are set to False. When constituent_table_columns isn't provided, it will be set to the column + properties are set to False. When constituent_table_definition isn't provided, it will be set to the table definitions of the first table in the provided constituent tables. Args: tables (List[Table]): the constituent tables - constituent_table_columns (List[Column]): a list of column definitions compatible with all the constituent - tables, default is None + constituent_table_columns (Optional[TableDefinitionLike]): the table definition compatible with all the + constituent tables, default is None Returns: a PartitionedTable @@ -2430,37 +2537,31 @@ def from_constituent_tables(cls, if not constituent_table_columns: return PartitionedTable(j_partitioned_table=_JPartitionedTableFactory.ofTables(to_sequence(tables))) else: - table_def = _JTableDefinition.of([col.j_column_definition for col in constituent_table_columns]) + table_def = TableDefinition(constituent_table_columns).j_table_definition return PartitionedTable(j_partitioned_table=_JPartitionedTableFactory.ofTables(table_def, to_sequence(tables))) except Exception as e: raise DHError(e, "failed to create a PartitionedTable from constituent tables.") from e - @property + @cached_property def table(self) -> Table: """The underlying partitioned table.""" - if self._table is None: - self._table = Table(j_table=self.j_partitioned_table.table()) - return self._table + return Table(j_table=self.j_partitioned_table.table()) @property def update_graph(self) -> UpdateGraph: """The underlying partitioned table's update graph.""" return self.table.update_graph - @property + @cached_property def is_refreshing(self) -> bool: """Whether the underlying partitioned table is refreshing.""" - if self._is_refreshing is None: - self._is_refreshing = self.table.is_refreshing - return self._is_refreshing + return self.table.is_refreshing - @property + @cached_property def key_columns(self) -> List[str]: """The partition key column names.""" - if self._key_columns is None: - self._key_columns = list(self.j_partitioned_table.keyColumnNames().toArray()) - return self._key_columns + return list(self.j_partitioned_table.keyColumnNames().toArray()) def keys(self) -> Table: """Returns a Table containing all the keys of the underlying partitioned table.""" @@ -2469,32 +2570,31 @@ def keys(self) -> Table: else: return self.table.select_distinct(self.key_columns) - @property + @cached_property def unique_keys(self) -> bool: """Whether the keys in the underlying table must always be unique. If keys must be unique, one can expect that self.table.select_distinct(self.key_columns) and self.table.view(self.key_columns) operations always produce equivalent tables.""" - if self._unique_keys is None: - self._unique_keys = self.j_partitioned_table.uniqueKeys() - return self._unique_keys + return self.j_partitioned_table.uniqueKeys() - @property + @cached_property def constituent_column(self) -> str: """The name of the column containing constituent tables.""" - if self._constituent_column is None: - self._constituent_column = self.j_partitioned_table.constituentColumnName() - return self._constituent_column + return self.j_partitioned_table.constituentColumnName() + + @cached_property + def constituent_table_definition(self) -> TableDefinition: + """The table definitions for constituent tables. All constituent tables in a partitioned table have the + same table definitions.""" + return TableDefinition(self.j_partitioned_table.constituentDefinition()) @property - def constituent_table_columns(self) -> List[Column]: + def constituent_table_columns(self) -> List[ColumnDefinition]: """The column definitions for constituent tables. All constituent tables in a partitioned table have the same column definitions.""" - if not self._schema: - self._schema = _td_to_columns(self.j_partitioned_table.constituentDefinition()) + return list(self.constituent_table_definition.values()) - return self._schema - - @property + @cached_property def constituent_changes_permitted(self) -> bool: """Can the constituents of the underlying partitioned table change? Specifically, can the values of the constituent column change? @@ -2509,9 +2609,7 @@ def constituent_changes_permitted(self) -> bool: if the underlying partitioned table is refreshing. Also note that the underlying partitioned table must be refreshing if it contains any refreshing constituents. """ - if self._constituent_changes_permitted is None: - self._constituent_changes_permitted = self.j_partitioned_table.constituentChangesPermitted() - return self._constituent_changes_permitted + return self.j_partitioned_table.constituentChangesPermitted() def merge(self) -> Table: """Makes a new Table that contains all the rows from all the constituent tables. In the merged result, diff --git a/py/server/deephaven/table_factory.py b/py/server/deephaven/table_factory.py index 7efff4b534c..02316928573 100644 --- a/py/server/deephaven/table_factory.py +++ b/py/server/deephaven/table_factory.py @@ -5,7 +5,7 @@ """ This module provides various ways to make a Deephaven table. """ import datetime -from typing import Callable, List, Dict, Any, Union, Sequence, Tuple, Mapping +from typing import Callable, List, Dict, Any, Union, Sequence, Tuple, Mapping, Optional import jpy import numpy as np @@ -13,11 +13,11 @@ from deephaven import execution_context, DHError, time from deephaven._wrapper import JObjectWrapper -from deephaven.column import InputColumn, Column +from deephaven.column import InputColumn from deephaven.dtypes import DType, Duration, Instant from deephaven.execution_context import ExecutionContext from deephaven.jcompat import j_lambda, j_list_to_list, to_sequence -from deephaven.table import Table +from deephaven.table import Table, TableDefinition, TableDefinitionLike from deephaven.update_graph import auto_locking_ctx _JTableFactory = jpy.get_type("io.deephaven.engine.table.TableFactory") @@ -285,7 +285,7 @@ def value_names(self) -> List[str]: return j_list_to_list(self.j_input_table.getValueNames()) -def input_table(col_defs: Dict[str, DType] = None, init_table: Table = None, +def input_table(col_defs: Optional[TableDefinitionLike] = None, init_table: Table = None, key_cols: Union[str, Sequence[str]] = None) -> InputTable: """Creates an in-memory InputTable from either column definitions or an initial table. When key columns are provided, the InputTable will be keyed, otherwise it will be append-only. @@ -298,7 +298,7 @@ def input_table(col_defs: Dict[str, DType] = None, init_table: Table = None, The keyed input table has keys for each row and supports addition/deletion/modification of rows by the keys. Args: - col_defs (Dict[str, DType]): the column definitions + col_defs (Optional[TableDefinitionLike]): the table definition init_table (Table): the initial table key_cols (Union[str, Sequence[str]): the name(s) of the key column(s) @@ -316,8 +316,7 @@ def input_table(col_defs: Dict[str, DType] = None, init_table: Table = None, raise ValueError("both column definitions and init table are provided.") if col_defs: - j_arg_1 = _JTableDefinition.of( - [Column(name=n, data_type=t).j_column_definition for n, t in col_defs.items()]) + j_arg_1 = TableDefinition(col_defs).j_table_definition else: j_arg_1 = init_table.j_table diff --git a/py/server/tests/test_barrage.py b/py/server/tests/test_barrage.py index 28c0a49b6aa..7049d2566ff 100644 --- a/py/server/tests/test_barrage.py +++ b/py/server/tests/test_barrage.py @@ -76,7 +76,7 @@ def test_subscribe(self): session = barrage_session(host="localhost", port=10000, auth_type="Anonymous") t = session.subscribe(ticket=self.shared_ticket.bytes) self.assertEqual(t.size, 1000) - self.assertEqual(len(t.columns), 2) + self.assertEqual(len(t.definition), 2) sp = t.snapshot() self.assertEqual(sp.size, 1000) t1 = t.update("Z = X + Y") @@ -119,7 +119,7 @@ def test_snapshot(self): session = barrage_session(host="localhost", port=10000, auth_type="Anonymous") t = session.snapshot(self.shared_ticket.bytes) self.assertEqual(t.size, 1000) - self.assertEqual(len(t.columns), 2) + self.assertEqual(len(t.definition), 2) t1 = t.update("Z = X + Y") self.assertEqual(t1.size, 1000) t2 = session.snapshot(self.shared_ticket.bytes) diff --git a/py/server/tests/test_column.py b/py/server/tests/test_column.py index 57c94d53fc1..6531b6c3fa3 100644 --- a/py/server/tests/test_column.py +++ b/py/server/tests/test_column.py @@ -12,7 +12,7 @@ from deephaven import DHError, dtypes, new_table, time as dhtime from deephaven import empty_table from deephaven.column import byte_col, char_col, short_col, bool_col, int_col, long_col, float_col, double_col, \ - string_col, datetime_col, jobj_col, ColumnType + string_col, datetime_col, jobj_col, ColumnType, col_def from deephaven.constants import MAX_BYTE, MAX_SHORT, MAX_INT, MAX_LONG from deephaven.jcompat import j_array_list from tests.testbase import BaseTestCase @@ -136,7 +136,8 @@ def test_datetime_col(self): inst = dhtime.to_j_instant(round(time.time())) dt = datetime.datetime.now() _ = datetime_col(name="Datetime", data=[inst, dt, None]) - self.assertEqual(_.data_type, dtypes.Instant) + self.assertEqual(_._column_definition.name, "Datetime") + self.assertEqual(_._column_definition.data_type, dtypes.Instant) ts = pd.Timestamp(dt) np_dt = np.datetime64(dt) @@ -144,17 +145,46 @@ def test_datetime_col(self): # test if we can convert to numpy datetime64 array np.array([pd.Timestamp(dt).to_numpy() for dt in data], dtype=np.datetime64) _ = datetime_col(name="Datetime", data=data) - self.assertEqual(_.data_type, dtypes.Instant) + self.assertEqual(_._column_definition.name, "Datetime") + self.assertEqual(_._column_definition.data_type, dtypes.Instant) data = np.array(['1970-01-01T00:00:00.000-07:00', '2020-01-01T01:00:00.000+07:00']) np.array([pd.Timestamp(str(dt)).to_numpy() for dt in data], dtype=np.datetime64) _ = datetime_col(name="Datetime", data=data) - self.assertEqual(_.data_type, dtypes.Instant) + self.assertEqual(_._column_definition.name, "Datetime") + self.assertEqual(_._column_definition.data_type, dtypes.Instant) data = np.array([1, -1]) data = data.astype(np.int64) _ = datetime_col(name="Datetime", data=data) - self.assertEqual(_.data_type, dtypes.Instant) + self.assertEqual(_._column_definition.name, "Datetime") + self.assertEqual(_._column_definition.data_type, dtypes.Instant) + + def test_col_def_simple(self): + foo_def = col_def("Foo", dtypes.int32) + self.assertEquals(foo_def.name, "Foo") + self.assertEquals(foo_def.data_type, dtypes.int32) + self.assertEquals(foo_def.component_type, None) + self.assertEquals(foo_def.column_type, ColumnType.NORMAL) + + def test_col_def_array(self): + foo_def = col_def("Foo", dtypes.int32_array) + self.assertEquals(foo_def.name, "Foo") + self.assertEquals(foo_def.data_type, dtypes.int32_array) + self.assertEquals(foo_def.component_type, dtypes.int32) + self.assertEquals(foo_def.column_type, ColumnType.NORMAL) + + def test_col_def_partitioning(self): + foo_def = col_def("Foo", dtypes.string, column_type=ColumnType.PARTITIONING) + self.assertEquals(foo_def.name, "Foo") + self.assertEquals(foo_def.data_type, dtypes.string) + self.assertEquals(foo_def.component_type, None) + self.assertEquals(foo_def.column_type, ColumnType.PARTITIONING) + + def test_col_def_invalid_component_type(self): + with self.assertRaises(DHError): + col_def("Foo", dtypes.int32_array, component_type=dtypes.int64) + @dataclass class CustomClass: diff --git a/py/server/tests/test_csv.py b/py/server/tests/test_csv.py index 3de09e9a570..88e291092cd 100644 --- a/py/server/tests/test_csv.py +++ b/py/server/tests/test_csv.py @@ -20,8 +20,7 @@ def test_read_header(self): col_types = [dtypes.string, dtypes.long, dtypes.float64] table_header = {k: v for k, v in zip(col_names, col_types)} t = read_csv('tests/data/test_csv.csv', header=table_header) - t_col_names = [col.name for col in t.columns] - self.assertEqual(col_names, t_col_names) + self.assertEqual(col_names, t.column_names) def test_read_error_col_type(self): col_names = ["Strings", "Longs", "Floats"] @@ -44,9 +43,9 @@ def test_read_error_quote(self): def test_write(self): t = read_csv("tests/data/small_sample.csv") write_csv(t, "./test_write.csv") - t_cols = [col.name for col in t.columns] + t_cols = t.column_names t = read_csv("./test_write.csv") - self.assertEqual(t_cols, [col.name for col in t.columns]) + self.assertEqual(t_cols, t.column_names) col_names = ["Strings", "Longs", "Floats"] col_types = [dtypes.string, dtypes.long, dtypes.float64] @@ -54,7 +53,7 @@ def test_write(self): t = read_csv('tests/data/test_csv.csv', header=table_header) write_csv(t, "./test_write.csv", cols=col_names) t = read_csv('./test_write.csv') - self.assertEqual(col_names, [c.name for c in t.columns]) + self.assertEqual(col_names, t.column_names) import os os.remove("./test_write.csv") diff --git a/py/server/tests/test_data_index.py b/py/server/tests/test_data_index.py index 5b3aad01391..14b21407f7c 100644 --- a/py/server/tests/test_data_index.py +++ b/py/server/tests/test_data_index.py @@ -47,7 +47,7 @@ def test_keys(self): self.assertEqual(["X", "Y"], self.data_index.keys) def test_backing_table(self): - self.assertEqual(3, len(self.data_index.table.columns)) + self.assertEqual(3, len(self.data_index.table.definition)) self.assertEqual(10, self.data_index.table.size) di = data_index(self.data_index.table, self.data_index.keys[0:1]) self.assertEqual(1, len(di.keys)) diff --git a/py/server/tests/test_dbc.py b/py/server/tests/test_dbc.py index 31868028e08..fd3cba4fbf2 100644 --- a/py/server/tests/test_dbc.py +++ b/py/server/tests/test_dbc.py @@ -50,7 +50,7 @@ def test_read_sql_connectorx(self): query = "SELECT t_ts, t_id, t_instrument, t_exchange, t_price, t_size FROM CRYPTO_TRADES LIMIT 10" postgres_url = "postgresql://test:test@postgres:5432/test" dh_table = read_sql(conn=postgres_url, query=query) - self.assertEqual(len(dh_table.columns), 6) + self.assertEqual(len(dh_table.definition), 6) self.assertEqual(dh_table.size, 10) with self.assertRaises(DHError) as cm: @@ -63,13 +63,13 @@ def test_read_sql(self): with self.subTest("odbc"): connection_string = 'Driver={PostgreSQL};Server=postgres;Port=5432;Database=test;Uid=test;Pwd=test;' dh_table = read_sql(conn=connection_string, query=query, driver="odbc") - self.assertEqual(len(dh_table.columns), 6) + self.assertEqual(len(dh_table.definition), 6) self.assertEqual(dh_table.size, 10) with self.subTest("adbc"): uri = "postgresql://postgres:5432/test?user=test&password=test" dh_table = read_sql(conn=uri, query=query, driver="adbc") - self.assertEqual(len(dh_table.columns), 6) + self.assertEqual(len(dh_table.definition), 6) self.assertEqual(dh_table.size, 10) if turbodbc_installed(): @@ -79,7 +79,7 @@ def test_read_sql(self): connection_string = "Driver={PostgreSQL};Server=postgres;Port=5432;Database=test;Uid=test;Pwd=test;" with turbodbc.connect(connection_string=connection_string) as conn: dh_table = read_sql(conn=conn, query=query, driver="odbc") - self.assertEqual(len(dh_table.columns), 6) + self.assertEqual(len(dh_table.definition), 6) self.assertEqual(dh_table.size, 10) with self.subTest("adbc-connection"): @@ -87,7 +87,7 @@ def test_read_sql(self): uri = "postgresql://postgres:5432/test?user=test&password=test" with adbc_driver_postgresql.dbapi.connect(uri) as conn: dh_table = read_sql(conn=conn, query=query, driver="adbc") - self.assertEqual(len(dh_table.columns), 6) + self.assertEqual(len(dh_table.definition), 6) self.assertEqual(dh_table.size, 10) with self.assertRaises(DHError) as cm: diff --git a/py/server/tests/test_experiments.py b/py/server/tests/test_experiments.py index 6de28871a98..fd04cce0c65 100644 --- a/py/server/tests/test_experiments.py +++ b/py/server/tests/test_experiments.py @@ -31,13 +31,13 @@ def test_full_outer_join(self): rt = full_outer_join(t1, t2, on="a = c") self.assertTrue(rt.is_refreshing) self.wait_ticking_table_update(rt, row_count=100, timeout=5) - self.assertEqual(len(rt.columns), len(t1.columns) + len(t2.columns)) + self.assertEqual(len(rt.definition), len(t1.definition) + len(t2.definition)) with self.subTest("full outer join with no matching keys"): t1 = empty_table(2).update(["X = i", "a = i"]) rt = full_outer_join(self.test_table, t1, joins=["Y = a"]) self.assertEqual(rt.size, t1.size * self.test_table.size) - self.assertEqual(len(rt.columns), 1 + len(self.test_table.columns)) + self.assertEqual(len(rt.definition), 1 + len(self.test_table.definition)) with self.subTest("Conflicting column names"): with self.assertRaises(DHError) as cm: @@ -52,13 +52,13 @@ def test_left_outer_join(self): rt = left_outer_join(t1, t2, on="a = c") self.assertTrue(rt.is_refreshing) self.wait_ticking_table_update(rt, row_count=100, timeout=5) - self.assertEqual(len(rt.columns), len(t1.columns) + len(t2.columns)) + self.assertEqual(len(rt.definition), len(t1.definition) + len(t2.definition)) with self.subTest("left outer join with no matching keys"): t1 = empty_table(2).update(["X = i", "a = i"]) rt = left_outer_join(self.test_table, t1, joins=["Y = a"]) self.assertEqual(rt.size, t1.size * self.test_table.size) - self.assertEqual(len(rt.columns), 1 + len(self.test_table.columns)) + self.assertEqual(len(rt.definition), 1 + len(self.test_table.definition)) with self.subTest("Conflicting column names"): with self.assertRaises(DHError) as cm: diff --git a/py/server/tests/test_iceberg.py b/py/server/tests/test_iceberg.py index 62ba31e6636..8934299b74d 100644 --- a/py/server/tests/test_iceberg.py +++ b/py/server/tests/test_iceberg.py @@ -4,7 +4,7 @@ import jpy from deephaven import dtypes -from deephaven.column import Column, ColumnType +from deephaven.column import col_def, ColumnType from tests.testbase import BaseTestCase from deephaven.experimental import s3, iceberg @@ -60,12 +60,10 @@ def test_instruction_create_with_table_definition_dict(self): def test_instruction_create_with_table_definition_list(self): table_def=[ - Column( - "Partition", dtypes.int32, column_type=ColumnType.PARTITIONING - ), - Column("x", dtypes.int32), - Column("y", dtypes.double), - Column("z", dtypes.double), + col_def("Partition", dtypes.int32, column_type=ColumnType.PARTITIONING), + col_def("x", dtypes.int32), + col_def("y", dtypes.double), + col_def("z", dtypes.double), ] iceberg_instructions = iceberg.IcebergInstructions(table_definition=table_def) diff --git a/py/server/tests/test_numpy.py b/py/server/tests/test_numpy.py index 725e69602f1..1c935ed04f7 100644 --- a/py/server/tests/test_numpy.py +++ b/py/server/tests/test_numpy.py @@ -71,14 +71,14 @@ def tearDown(self) -> None: super().tearDown() def test_to_numpy(self): - for col in self.test_table.columns: - with self.subTest(f"test single column to numpy- {col.name}"): - np_array = to_numpy(self.test_table, [col.name]) + for col_name in self.test_table.definition: + with self.subTest(f"test single column to numpy- {col_name}"): + np_array = to_numpy(self.test_table, [col_name]) self.assertEqual((2, 1), np_array.shape) - np.array_equal(np_array, self.np_array_dict[col.name]) + np.array_equal(np_array, self.np_array_dict[col_name]) try: - to_numpy(self.test_table, [col.name for col in self.test_table.columns]) + to_numpy(self.test_table, self.test_table.column_names) except DHError as e: self.assertIn("same data type", e.root_cause) @@ -90,17 +90,17 @@ def test_to_numpy(self): float_col(name="Float3", data=[1111.01111, -1111.01111]), float_col(name="Float4", data=[11111.011111, -11111.011111])] tmp_table = new_table(cols=input_cols) - np_array = to_numpy(tmp_table, [col.name for col in tmp_table.columns]) + np_array = to_numpy(tmp_table, tmp_table.column_names) self.assertEqual((2, 5), np_array.shape) def test_to_numpy_remap(self): - for col in self.test_table.columns: - with self.subTest(f"test single column to numpy - {col.name}"): - np_array = to_numpy(self.test_table, [col.name]) + for col_name in self.test_table.definition: + with self.subTest(f"test single column to numpy - {col_name}"): + np_array = to_numpy(self.test_table, [col_name]) self.assertEqual((2, 1), np_array.shape) try: - to_numpy(self.test_table, [col.name for col in self.test_table.columns]) + to_numpy(self.test_table, self.test_table.column_names) except DHError as e: self.assertIn("same data type", e.root_cause) @@ -140,12 +140,12 @@ def test_to_table(self): float_col(name="Float3", data=[1111.01111, -1111.01111]), float_col(name="Float4", data=[11111.011111, -11111.011111])] tmp_table = new_table(cols=input_cols) - np_array = to_numpy(tmp_table, [col.name for col in tmp_table.columns]) - tmp_table2 = to_table(np_array, [col.name for col in tmp_table.columns]) + np_array = to_numpy(tmp_table, tmp_table.column_names) + tmp_table2 = to_table(np_array, tmp_table.column_names) self.assert_table_equals(tmp_table2, tmp_table) with self.assertRaises(DHError) as cm: - tmp_table3 = to_table(np_array[:, [0, 1, 3]], [col.name for col in tmp_table.columns]) + tmp_table3 = to_table(np_array[:, [0, 1, 3]], tmp_table.column_names) self.assertIn("doesn't match", cm.exception.root_cause) def get_resource_path(self, resource_path) -> str: diff --git a/py/server/tests/test_parquet.py b/py/server/tests/test_parquet.py index 1a5d3b3e31c..50c8cf6f68e 100644 --- a/py/server/tests/test_parquet.py +++ b/py/server/tests/test_parquet.py @@ -14,7 +14,7 @@ from deephaven import DHError, empty_table, dtypes, new_table from deephaven import arrow as dharrow -from deephaven.column import InputColumn, Column, ColumnType, string_col, int_col, char_col, long_col, short_col +from deephaven.column import InputColumn, ColumnType, col_def, string_col, int_col, char_col, long_col, short_col from deephaven.pandas import to_pandas, to_table from deephaven.parquet import (write, batch_write, read, delete, ColumnInstruction, ParquetFileLayout, write_partitioned) @@ -597,12 +597,10 @@ def test_read_kv_partitioned(self): actual = read( kv_dir, table_definition=[ - Column( - "Partition", dtypes.int32, column_type=ColumnType.PARTITIONING - ), - Column("x", dtypes.int32), - Column("y", dtypes.double), - Column("z", dtypes.double), + col_def("Partition", dtypes.int32, column_type=ColumnType.PARTITIONING), + col_def("x", dtypes.int32), + col_def("y", dtypes.double), + col_def("z", dtypes.double), ], file_layout=ParquetFileLayout.KV_PARTITIONED, ) @@ -655,7 +653,7 @@ def test_write_partitioned_data(self): shutil.rmtree(root_dir) def verify_table_from_disk(table): - self.assertTrue(len(table.columns)) + self.assertTrue(len(table.definition)) self.assertTrue(table.columns[0].name == "X") self.assertTrue(table.columns[0].column_type == ColumnType.PARTITIONING) self.assert_table_equals(table.select().sort(["X", "Y"]), source.sort(["X", "Y"])) @@ -696,9 +694,9 @@ def verify_file_names(): shutil.rmtree(root_dir) table_definition = [ - Column("X", dtypes.string, column_type=ColumnType.PARTITIONING), - Column("Y", dtypes.string), - Column("Number", dtypes.int32) + col_def("X", dtypes.string, column_type=ColumnType.PARTITIONING), + col_def("Y", dtypes.string), + col_def("Number", dtypes.int32) ] write_partitioned(source, table_definition=table_definition, destination_dir=root_dir, base_name=base_name, max_dictionary_keys=max_dictionary_keys) diff --git a/py/server/tests/test_partitioned_table.py b/py/server/tests/test_partitioned_table.py index 8da63d726a8..3059a6c40fb 100644 --- a/py/server/tests/test_partitioned_table.py +++ b/py/server/tests/test_partitioned_table.py @@ -65,6 +65,9 @@ def test_constituent_change_permitted(self): def test_constituent_table_columns(self): self.assertEqual(self.test_table.columns, self.partitioned_table.constituent_table_columns) + def test_constituent_table_definition(self): + self.assertEqual(self.test_table.definition, self.partitioned_table.constituent_table_definition) + def test_merge(self): t = self.partitioned_table.merge() self.assert_table_equals(t, self.test_table) @@ -188,7 +191,7 @@ def test_from_partitioned_table(self): key_cols="Y", unique_keys=True, constituent_column="aggPartition", - constituent_table_columns=test_table.columns, + constituent_table_columns=test_table.definition, constituent_changes_permitted=True, ) self.assertEqual(pt.key_columns, pt1.key_columns) @@ -201,7 +204,7 @@ def test_from_partitioned_table(self): key_cols="Y", unique_keys=True, constituent_column="Non-existing", - constituent_table_columns=test_table.columns, + constituent_table_columns=test_table.definition, constituent_changes_permitted=True, ) self.assertIn("no column named", str(cm.exception)) @@ -222,7 +225,7 @@ def test_from_constituent_tables(self): self.assertIn("IncompatibleTableDefinitionException", str(cm.exception)) with self.subTest("Compatible table definition"): - pt = PartitionedTable.from_constituent_tables([test_table, test_table1, test_table3], test_table.columns) + pt = PartitionedTable.from_constituent_tables([test_table, test_table1, test_table3], test_table.definition) def test_keys(self): keys_table = self.partitioned_table.keys() diff --git a/py/server/tests/test_pt_proxy.py b/py/server/tests/test_pt_proxy.py index 982a582fa6f..5cbe973dc0c 100644 --- a/py/server/tests/test_pt_proxy.py +++ b/py/server/tests/test_pt_proxy.py @@ -127,7 +127,7 @@ def test_USV(self): result_pt_proxy = op( self.pt_proxy, formulas=["a", "c", "Sum = a + b + c + d"]) for rct, ct in zip(result_pt_proxy.target.constituent_tables, self.pt_proxy.target.constituent_tables): - self.assertTrue(len(rct.columns) >= 3) + self.assertTrue(len(rct.definition) >= 3) self.assertLessEqual(rct.size, ct.size) def test_select_distinct(self): @@ -144,7 +144,7 @@ def test_natural_join(self): right_table = self.test_table.drop_columns(["b", "c"]).head(5) joined_pt_proxy = pt_proxy.natural_join(right_table, on="a", joins=["d", "e"]) for ct in joined_pt_proxy.target.constituent_tables: - self.assertEqual(len(ct.columns), 5) + self.assertEqual(len(ct.definition), 5) with self.subTest("Join with another Proxy"): with self.assertRaises(DHError) as cm: @@ -163,7 +163,7 @@ def test_natural_join(self): right_proxy = self.test_table.drop_columns(["b", "d"]).partition_by("c").proxy() joined_pt_proxy = pt_proxy.natural_join(right_proxy, on="a", joins="e") for ct in joined_pt_proxy.target.constituent_tables: - self.assertEqual(len(ct.columns), 4) + self.assertEqual(len(ct.definition), 4) def test_exact_join(self): with self.subTest("Join with a Table"): @@ -171,7 +171,7 @@ def test_exact_join(self): right_table = self.test_table.drop_columns(["b", "c"]).group_by('a') joined_pt_proxy = pt_proxy.exact_join(right_table, on="a", joins=["d", "e"]) for ct, jct in zip(pt_proxy.target.constituent_tables, joined_pt_proxy.target.constituent_tables): - self.assertEqual(len(jct.columns), 5) + self.assertEqual(len(jct.definition), 5) self.assertEqual(ct.size, jct.size) self.assertLessEqual(jct.size, right_table.size) @@ -180,7 +180,7 @@ def test_exact_join(self): right_proxy = self.test_table.drop_columns(["b", "d"]).partition_by("c").proxy() joined_pt_proxy = pt_proxy.exact_join(right_proxy, on="a", joins="e") for ct, jct in zip(pt_proxy.target.constituent_tables, joined_pt_proxy.target.constituent_tables): - self.assertEqual(len(jct.columns), 4) + self.assertEqual(len(jct.definition), 4) self.assertEqual(ct.size, jct.size) self.assertLessEqual(jct.size, right_table.size) @@ -247,7 +247,7 @@ def test_count_by(self): agg_pt_proxy = self.pt_proxy.count_by(col="cnt", by=["a"]) for gct, ct in zip(agg_pt_proxy.target.constituent_tables, self.pt_proxy.target.constituent_tables): self.assertLessEqual(gct.size, ct.size) - self.assertEqual(len(gct.columns), 2) + self.assertEqual(len(gct.definition), 2) def test_dedicated_agg(self): ops = [ @@ -268,7 +268,7 @@ def test_dedicated_agg(self): agg_pt_proxy = op(self.pt_proxy, by=["a", "b"]) for gct, ct in zip(agg_pt_proxy.target.constituent_tables, self.pt_proxy.target.constituent_tables): self.assertLessEqual(gct.size, ct.size) - self.assertEqual(len(gct.columns), len(ct.columns)) + self.assertEqual(len(gct.definition), len(ct.definition)) wops = [PartitionedTableProxy.weighted_avg_by, PartitionedTableProxy.weighted_sum_by, @@ -279,7 +279,7 @@ def test_dedicated_agg(self): agg_pt_proxy = wop(self.pt_proxy, wcol="e", by=["a", "b"]) for gct, ct in zip(agg_pt_proxy.target.constituent_tables, self.pt_proxy.target.constituent_tables): self.assertLessEqual(gct.size, ct.size) - self.assertEqual(len(gct.columns), len(ct.columns) - 1) + self.assertEqual(len(gct.definition), len(ct.definition) - 1) def test_agg_by(self): aggs = [ @@ -295,7 +295,7 @@ def test_agg_by(self): agg_pt_proxy = self.pt_proxy.agg_by(aggs=aggs, by=["a"]) for gct, ct in zip(agg_pt_proxy.target.constituent_tables, self.pt_proxy.target.constituent_tables): self.assertLessEqual(gct.size, ct.size) - self.assertEqual(len(gct.columns), 8) + self.assertEqual(len(gct.definition), 8) def test_agg_all_by(self): aggs = [ diff --git a/py/server/tests/test_table.py b/py/server/tests/test_table.py index 6b7ecf2168c..06e0ab2355a 100644 --- a/py/server/tests/test_table.py +++ b/py/server/tests/test_table.py @@ -14,7 +14,7 @@ from deephaven.html import to_html from deephaven.jcompat import j_hashmap from deephaven.pandas import to_pandas -from deephaven.table import Table, SearchDisplayMode, table_diff +from deephaven.table import Table, TableDefinition, SearchDisplayMode, table_diff from tests.testbase import BaseTestCase, table_equals @@ -84,9 +84,19 @@ def test_eq(self): t = self.test_table.where(["a > 500"]) self.assertNotEqual(t, self.test_table) + def test_definition(self): + expected = TableDefinition({ + "a": dtypes.int32, + "b": dtypes.int32, + "c": dtypes.int32, + "d": dtypes.int32, + "e": dtypes.int32 + }) + self.assertEquals(expected, self.test_table.definition) + def test_meta_table(self): t = self.test_table.meta_table - self.assertEqual(len(self.test_table.columns), t.size) + self.assertEqual(len(self.test_table.definition), t.size) def test_coalesce(self): t = self.test_table.update_view(["A = a * b"]) @@ -100,45 +110,45 @@ def test_flatten(self): self.assertTrue(ct.is_flat) def test_drop_columns(self): - column_names = [f.name for f in self.test_table.columns] + column_names = self.test_table.column_names result_table = self.test_table.drop_columns(cols=column_names[:-1]) - self.assertEqual(1, len(result_table.columns)) + self.assertEqual(1, len(result_table.definition)) result_table = self.test_table.drop_columns(cols=column_names[-1]) - self.assertEqual(1, len(self.test_table.columns) - len(result_table.columns)) + self.assertEqual(1, len(self.test_table.definition) - len(result_table.definition)) def test_move_columns(self): - column_names = [f.name for f in self.test_table.columns] + column_names = self.test_table.column_names cols_to_move = column_names[::2] with self.subTest("move-columns"): result_table = self.test_table.move_columns(1, cols_to_move) - result_cols = [f.name for f in result_table.columns] + result_cols = result_table.column_names self.assertEqual(cols_to_move, result_cols[1: len(cols_to_move) + 1]) with self.subTest("move-columns-up"): result_table = self.test_table.move_columns_up(cols_to_move) - result_cols = [f.name for f in result_table.columns] + result_cols = result_table.column_names self.assertEqual(cols_to_move, result_cols[: len(cols_to_move)]) with self.subTest("move-columns-down"): result_table = self.test_table.move_columns_down(cols_to_move) - result_cols = [f.name for f in result_table.columns] + result_cols = result_table.column_names self.assertEqual(cols_to_move, result_cols[-len(cols_to_move):]) cols_to_move = column_names[-1] with self.subTest("move-column"): result_table = self.test_table.move_columns(1, cols_to_move) - result_cols = [f.name for f in result_table.columns] + result_cols = result_table.column_names self.assertEqual([cols_to_move], result_cols[1: len(cols_to_move) + 1]) with self.subTest("move-column-up"): result_table = self.test_table.move_columns_up(cols_to_move) - result_cols = [f.name for f in result_table.columns] + result_cols = result_table.column_names self.assertEqual([cols_to_move], result_cols[: len(cols_to_move)]) with self.subTest("move-column-down"): result_table = self.test_table.move_columns_down(cols_to_move) - result_cols = [f.name for f in result_table.columns] + result_cols = result_table.column_names self.assertEqual([cols_to_move], result_cols[-len(cols_to_move):]) def test_rename_columns(self): @@ -147,10 +157,10 @@ def test_rename_columns(self): ] new_names = [cn.split("=")[0].strip() for cn in cols_to_rename] result_table = self.test_table.rename_columns(cols_to_rename) - result_cols = [f.name for f in result_table.columns] + result_cols = result_table.column_names self.assertEqual(new_names, result_cols[::2]) result_table = self.test_table.rename_columns(cols_to_rename[0]) - result_cols = [f.name for f in result_table.columns] + result_cols = result_table.column_names self.assertEqual(new_names[0], result_cols[::2][0]) def test_update_error(self): @@ -174,14 +184,14 @@ def test_USV(self): result_table = op( self.test_table, formulas=["a", "c", "Sum = a + b + c + d"]) self.assertIsNotNone(result_table) - self.assertTrue(len(result_table.columns) >= 3) + self.assertTrue(len(result_table.definition) >= 3) self.assertLessEqual(result_table.size, self.test_table.size) for op in ops: with self.subTest(op=op): result_table = op(self.test_table, formulas="Sum = a + b + c + d") self.assertIsNotNone(result_table) - self.assertTrue(len(result_table.columns) >= 1) + self.assertTrue(len(result_table.definition) >= 1) self.assertLessEqual(result_table.size, self.test_table.size) def test_select_distinct(self): @@ -430,10 +440,10 @@ def test_dedicated_agg(self): for wop in wops: with self.subTest(wop): result_table = wop(self.test_table, wcol='e', by=["a", "b"]) - self.assertEqual(len(result_table.columns), len(self.test_table.columns) - 1) + self.assertEqual(len(result_table.definition), len(self.test_table.definition) - 1) result_table = wop(self.test_table, wcol='e') - self.assertEqual(len(result_table.columns), len(self.test_table.columns) - 1) + self.assertEqual(len(result_table.definition), len(self.test_table.definition) - 1) def test_count_by(self): num_distinct_a = self.test_table.select_distinct(formulas=["a"]).size @@ -530,26 +540,26 @@ def test_snapshot_when(self): snapshot = self.test_table.snapshot_when(t) self.wait_ticking_table_update(snapshot, row_count=1, timeout=5) self.assertEqual(self.test_table.size, snapshot.size) - self.assertEqual(len(t.columns) + len(self.test_table.columns), len(snapshot.columns)) + self.assertEqual(len(t.definition) + len(self.test_table.definition), len(snapshot.definition)) with self.subTest("initial=True"): snapshot = self.test_table.snapshot_when(t, initial=True) self.assertEqual(self.test_table.size, snapshot.size) - self.assertEqual(len(t.columns) + len(self.test_table.columns), len(snapshot.columns)) + self.assertEqual(len(t.definition) + len(self.test_table.definition), len(snapshot.definition)) with self.subTest("stamp_cols=\"X\""): snapshot = self.test_table.snapshot_when(t, stamp_cols="X") - self.assertEqual(len(snapshot.columns), len(self.test_table.columns) + 1) + self.assertEqual(len(snapshot.definition), len(self.test_table.definition) + 1) with self.subTest("stamp_cols=[\"X\", \"Y\"]"): snapshot = self.test_table.snapshot_when(t, stamp_cols=["X", "Y"]) - self.assertEqual(len(snapshot.columns), len(self.test_table.columns) + 2) + self.assertEqual(len(snapshot.definition), len(self.test_table.definition) + 2) def test_snapshot_when_with_history(self): t = time_table("PT00:00:01") snapshot_hist = self.test_table.snapshot_when(t, history=True) self.wait_ticking_table_update(snapshot_hist, row_count=1, timeout=5) - self.assertEqual(1 + len(self.test_table.columns), len(snapshot_hist.columns)) + self.assertEqual(1 + len(self.test_table.definition), len(snapshot_hist.definition)) self.assertEqual(self.test_table.size, snapshot_hist.size) t = time_table("PT0.1S").update("X = i % 2 == 0 ? i : i - 1").sort("X").tail(10) @@ -1020,7 +1030,7 @@ def test_range_join(self): right_table = self.test_table.select_distinct().sort("b").drop_columns("e") result_table = left_table.range_join(right_table, on=["a = a", "c < b < e"], aggs=aggs) self.assertEqual(result_table.size, left_table.size) - self.assertEqual(len(result_table.columns), len(left_table.columns) + len(aggs)) + self.assertEqual(len(result_table.definition), len(left_table.definition) + len(aggs)) with self.assertRaises(DHError): time_table("PT00:00:00.001").update("a = i").range_join(right_table, on=["a = a", "a < b < c"], aggs=aggs) diff --git a/py/server/tests/test_table_definition.py b/py/server/tests/test_table_definition.py new file mode 100644 index 00000000000..d4d2cd34f86 --- /dev/null +++ b/py/server/tests/test_table_definition.py @@ -0,0 +1,271 @@ +# +# Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +# +import unittest +from typing import Mapping +from deephaven import dtypes, new_table, DHError +from deephaven.table import TableDefinition +from deephaven.column import col_def, string_col, bool_col +from tests.testbase import BaseTestCase + + +class TableDefinitionTestCase(BaseTestCase): + def setUp(self): + super().setUp() + self.test_definition = TableDefinition( + { + "Bool": dtypes.bool_, + "Char": dtypes.char, + "Short": dtypes.short, + "Int": dtypes.int32, + "Long": dtypes.int64, + "Float": dtypes.float32, + "Double": dtypes.float64, + "String": dtypes.string, + "Instant": dtypes.Instant, + } + ) + + def tearDown(self) -> None: + self.test_definition = None + super().tearDown() + + def test_is_mapping(self): + self.assertTrue(isinstance(self.test_definition, Mapping)) + + def test_length(self): + self.assertEquals(9, len(self.test_definition)) + + def test_contains(self): + self.assertTrue("Bool" in self.test_definition) + self.assertTrue("Char" in self.test_definition) + self.assertTrue("Short" in self.test_definition) + self.assertTrue("Int" in self.test_definition) + self.assertTrue("Long" in self.test_definition) + self.assertTrue("Float" in self.test_definition) + self.assertTrue("Double" in self.test_definition) + self.assertTrue("String" in self.test_definition) + self.assertTrue("Instant" in self.test_definition) + self.assertFalse("FooBarBaz" in self.test_definition) + + def test_getitem(self): + self.assertEquals(col_def("Bool", dtypes.bool_), self.test_definition["Bool"]) + self.assertEquals(col_def("Char", dtypes.char), self.test_definition["Char"]) + self.assertEquals(col_def("Short", dtypes.short), self.test_definition["Short"]) + self.assertEquals(col_def("Int", dtypes.int32), self.test_definition["Int"]) + self.assertEquals(col_def("Long", dtypes.int64), self.test_definition["Long"]) + self.assertEquals( + col_def("Float", dtypes.float32), self.test_definition["Float"] + ) + self.assertEquals( + col_def("Double", dtypes.float64), + self.test_definition["Double"], + ) + self.assertEquals( + col_def("String", dtypes.string), self.test_definition["String"] + ) + self.assertEquals( + col_def("Instant", dtypes.Instant), + self.test_definition["Instant"], + ) + with self.assertRaises(KeyError): + self.test_definition["FooBarBaz"] + + def test_get(self): + self.assertEquals( + col_def("Bool", dtypes.bool_), self.test_definition.get("Bool") + ) + self.assertEquals( + col_def("Char", dtypes.char), self.test_definition.get("Char") + ) + self.assertEquals( + col_def("Short", dtypes.short), + self.test_definition.get("Short"), + ) + self.assertEquals(col_def("Int", dtypes.int32), self.test_definition.get("Int")) + self.assertEquals( + col_def("Long", dtypes.int64), self.test_definition.get("Long") + ) + self.assertEquals( + col_def("Float", dtypes.float32), + self.test_definition.get("Float"), + ) + self.assertEquals( + col_def("Double", dtypes.float64), + self.test_definition.get("Double"), + ) + self.assertEquals( + col_def("String", dtypes.string), + self.test_definition.get("String"), + ) + self.assertEquals( + col_def("Instant", dtypes.Instant), + self.test_definition.get("Instant"), + ) + self.assertEquals(None, self.test_definition.get("FooBarBaz")) + + def test_iter(self): + self.assertEquals( + [ + "Bool", + "Char", + "Short", + "Int", + "Long", + "Float", + "Double", + "String", + "Instant", + ], + list(iter(self.test_definition)), + ) + + def test_keys(self): + self.assertEquals( + [ + "Bool", + "Char", + "Short", + "Int", + "Long", + "Float", + "Double", + "String", + "Instant", + ], + list(self.test_definition.keys()), + ) + + def test_values(self): + self.assertEquals( + [ + col_def("Bool", dtypes.bool_), + col_def("Char", dtypes.char), + col_def("Short", dtypes.short), + col_def("Int", dtypes.int32), + col_def("Long", dtypes.int64), + col_def("Float", dtypes.float32), + col_def("Double", dtypes.float64), + col_def("String", dtypes.string), + col_def("Instant", dtypes.Instant), + ], + list(self.test_definition.values()), + ) + + def test_items(self): + self.assertEquals( + [ + ("Bool", col_def("Bool", dtypes.bool_)), + ("Char", col_def("Char", dtypes.char)), + ("Short", col_def("Short", dtypes.short)), + ("Int", col_def("Int", dtypes.int32)), + ("Long", col_def("Long", dtypes.int64)), + ("Float", col_def("Float", dtypes.float32)), + ("Double", col_def("Double", dtypes.float64)), + ("String", col_def("String", dtypes.string)), + ("Instant", col_def("Instant", dtypes.Instant)), + ], + list(self.test_definition.items()), + ) + + def test_equals_hash_and_from_columns(self): + expected_hash = hash(self.test_definition) + for actual in [ + # should be equal to the same exact object + self.test_definition, + # should be equal to a new python object, but same underlying java object + TableDefinition(self.test_definition), + # should be equal to a new python object and new underlying java object + TableDefinition(self.test_definition.values()), + ]: + self.assertEquals(actual, self.test_definition) + self.assertEquals(hash(actual), expected_hash) + + def test_meta_table(self): + expected = new_table( + [ + string_col( + "Name", + [ + "Bool", + "Char", + "Short", + "Int", + "Long", + "Float", + "Double", + "String", + "Instant", + ], + ), + string_col( + "DataType", + [ + "java.lang.Boolean", + "char", + "short", + "int", + "long", + "float", + "double", + "java.lang.String", + "java.time.Instant", + ], + ), + string_col("ColumnType", ["Normal"] * 9), + bool_col("IsPartitioning", [False] * 9), + ] + ) + + self.assert_table_equals(self.test_definition.table, expected) + + def test_from_TableDefinition(self): + self.assertEquals(TableDefinition(self.test_definition), self.test_definition) + + def test_from_JpyJType(self): + self.assertEquals( + TableDefinition(self.test_definition.j_table_definition), + self.test_definition, + ) + + def test_from_Mapping(self): + # This case is already tested, it's how self.test_definition is created + pass + + def test_from_Iterable(self): + self.assertEquals( + TableDefinition(self.test_definition.values()), self.test_definition + ) + self.assertEquals( + TableDefinition(list(self.test_definition.values())), self.test_definition + ) + + def test_from_unexpected_type(self): + with self.assertRaises(DHError): + TableDefinition(42) + + def test_bad_Mapping_key(self): + with self.assertRaises(DHError): + TableDefinition( + { + "Foo": dtypes.int32, + 42: dtypes.string, + } + ) + + def test_bad_Mapping_value(self): + with self.assertRaises(DHError): + TableDefinition( + { + "Foo": dtypes.int32, + "Bar": 42, + } + ) + + def test_bad_Iterable(self): + with self.assertRaises(DHError): + TableDefinition([col_def("Foo", dtypes.int32), 42]) + + +if __name__ == "__main__": + unittest.main() diff --git a/py/server/tests/test_table_factory.py b/py/server/tests/test_table_factory.py index 3b1cfe55062..fe66ba992ef 100644 --- a/py/server/tests/test_table_factory.py +++ b/py/server/tests/test_table_factory.py @@ -41,7 +41,7 @@ def tearDown(self) -> None: def test_empty_table(self): t = empty_table(10) - self.assertEqual(0, len(t.columns)) + self.assertEqual(0, len(t.definition)) def test_empty_table_error(self): with self.assertRaises(DHError) as cm: @@ -52,22 +52,22 @@ def test_empty_table_error(self): def test_time_table(self): t = time_table("PT00:00:01") - self.assertEqual(1, len(t.columns)) + self.assertEqual(1, len(t.definition)) self.assertTrue(t.is_refreshing) t = time_table("PT00:00:01", start_time="2021-11-06T13:21:00 ET") - self.assertEqual(1, len(t.columns)) + self.assertEqual(1, len(t.definition)) self.assertTrue(t.is_refreshing) self.assertEqual("2021-11-06T13:21:00.000000000 ET", _JDateTimeUtils.formatDateTime(t.j_table.getColumnSource("Timestamp").get(0), time.to_j_time_zone('ET'))) t = time_table(1000_000_000) - self.assertEqual(1, len(t.columns)) + self.assertEqual(1, len(t.definition)) self.assertTrue(t.is_refreshing) t = time_table(1000_1000_1000, start_time="2021-11-06T13:21:00 ET") - self.assertEqual(1, len(t.columns)) + self.assertEqual(1, len(t.definition)) self.assertTrue(t.is_refreshing) self.assertEqual("2021-11-06T13:21:00.000000000 ET", _JDateTimeUtils.formatDateTime(t.j_table.getColumnSource("Timestamp").get(0), @@ -75,12 +75,12 @@ def test_time_table(self): p = time.to_timedelta(time.to_j_duration("PT1s")) t = time_table(p) - self.assertEqual(1, len(t.columns)) + self.assertEqual(1, len(t.definition)) self.assertTrue(t.is_refreshing) st = time.to_datetime(time.to_j_instant("2021-11-06T13:21:00 ET")) t = time_table(p, start_time=st) - self.assertEqual(1, len(t.columns)) + self.assertEqual(1, len(t.definition)) self.assertTrue(t.is_refreshing) self.assertEqual("2021-11-06T13:21:00.000000000 ET", _JDateTimeUtils.formatDateTime(t.j_table.getColumnSource("Timestamp").get(0), @@ -88,7 +88,7 @@ def test_time_table(self): def test_time_table_blink(self): t = time_table("PT1s", blink_table=True) - self.assertEqual(1, len(t.columns)) + self.assertEqual(1, len(t.definition)) self.assertTrue(t.is_blink) def test_time_table_error(self): @@ -325,19 +325,18 @@ def test_input_table(self): ] t = new_table(cols=cols) self.assertEqual(t.size, 2) - col_defs = {c.name: c.data_type for c in t.columns} with self.subTest("from table definition"): - append_only_input_table = input_table(col_defs=col_defs) + append_only_input_table = input_table(col_defs=t.definition) self.assertEqual(append_only_input_table.key_names, []) - self.assertEqual(append_only_input_table.value_names, [col.name for col in cols]) + self.assertEqual(append_only_input_table.value_names, [col._column_definition.name for col in cols]) append_only_input_table.add(t) self.assertEqual(append_only_input_table.size, 2) append_only_input_table.add(t) self.assertEqual(append_only_input_table.size, 4) - keyed_input_table = input_table(col_defs=col_defs, key_cols="String") + keyed_input_table = input_table(col_defs=t.definition, key_cols="String") self.assertEqual(keyed_input_table.key_names, ["String"]) - self.assertEqual(keyed_input_table.value_names, [col.name for col in cols if col.name != "String"]) + self.assertEqual(keyed_input_table.value_names, [col._column_definition.name for col in cols if col._column_definition.name != "String"]) keyed_input_table.add(t) self.assertEqual(keyed_input_table.size, 2) keyed_input_table.add(t) @@ -346,14 +345,14 @@ def test_input_table(self): with self.subTest("from init table"): append_only_input_table = input_table(init_table=t) self.assertEqual(append_only_input_table.key_names, []) - self.assertEqual(append_only_input_table.value_names, [col.name for col in cols]) + self.assertEqual(append_only_input_table.value_names, [col._column_definition.name for col in cols]) self.assertEqual(append_only_input_table.size, 2) append_only_input_table.add(t) self.assertEqual(append_only_input_table.size, 4) keyed_input_table = input_table(init_table=t, key_cols="String") self.assertEqual(keyed_input_table.key_names, ["String"]) - self.assertEqual(keyed_input_table.value_names, [col.name for col in cols if col.name != "String"]) + self.assertEqual(keyed_input_table.value_names, [col._column_definition.name for col in cols if col._column_definition.name != "String"]) self.assertEqual(keyed_input_table.size, 2) keyed_input_table.add(t) self.assertEqual(keyed_input_table.size, 2) @@ -368,7 +367,7 @@ def test_input_table(self): keyed_input_table = input_table(init_table=t, key_cols=["String", "Double"]) self.assertEqual(keyed_input_table.key_names, ["String", "Double"]) - self.assertEqual(keyed_input_table.value_names, [col.name for col in cols if col.name != "String" and col.name != "Double"]) + self.assertEqual(keyed_input_table.value_names, [col._column_definition.name for col in cols if col._column_definition.name != "String" and col._column_definition.name != "Double"]) self.assertEqual(keyed_input_table.size, 2) keyed_input_table.delete(t.select(["String", "Double"])) self.assertEqual(keyed_input_table.size, 0) @@ -449,7 +448,7 @@ def test_input_table_empty_data(self): with cm: t = time_table("PT1s", blink_table=True) - it = input_table({c.name: c.data_type for c in t.columns}, key_cols="Timestamp") + it = input_table(t.definition, key_cols="Timestamp") it.add(t) self.assertEqual(it.size, 0) it.delete(t) @@ -467,8 +466,7 @@ def test_j_input_wrapping(self): string_col(name="String", data=["foo", "bar"]), ] t = new_table(cols=cols) - col_defs = {c.name: c.data_type for c in t.columns} - append_only_input_table = input_table(col_defs=col_defs) + append_only_input_table = input_table(col_defs=t.definition) it = _wrapper.wrap_j_object(append_only_input_table.j_table) self.assertTrue(isinstance(it, InputTable)) diff --git a/py/server/tests/test_table_iterator.py b/py/server/tests/test_table_iterator.py index 465ba913453..0bb617fa6f5 100644 --- a/py/server/tests/test_table_iterator.py +++ b/py/server/tests/test_table_iterator.py @@ -22,7 +22,7 @@ def test_iteration_in_chunks(self): test_table = read_csv("tests/data/test_table.csv") total_read_size = 0 for d in test_table.iter_chunk_dict(chunk_size=10): - self.assertEqual(len(d), len(test_table.columns)) + self.assertEqual(len(d), len(test_table.definition)) for col in test_table.columns: self.assertIn(col.name, d) self.assertEqual(d[col.name].dtype, col.data_type.np_type) @@ -36,7 +36,7 @@ def test_iteration_in_chunks(self): test_table.await_update() total_read_size = 0 for d in test_table.iter_chunk_dict(chunk_size=100): - self.assertEqual(len(d), len(test_table.columns)) + self.assertEqual(len(d), len(test_table.definition)) for col in test_table.columns: self.assertIn(col.name, d) self.assertEqual(d[col.name].dtype, col.data_type.np_type) @@ -65,7 +65,7 @@ def test_iteration_in_rows(self): test_table = read_csv("tests/data/test_table.csv") total_read_size = 0 for d in test_table.iter_dict(): - self.assertEqual(len(d), len(test_table.columns)) + self.assertEqual(len(d), len(test_table.definition)) for col in test_table.columns: self.assertIn(col.name, d) self.assertTrue(np.can_cast(col.data_type.np_type, np.dtype(type(d[col.name])))) @@ -77,7 +77,7 @@ def test_iteration_in_rows(self): test_table.await_update() total_read_size = 0 for d in test_table.iter_dict(): - self.assertEqual(len(d), len(test_table.columns)) + self.assertEqual(len(d), len(test_table.definition)) for col in test_table.columns: self.assertIn(col.name, d) v_type = type(d[col.name]) @@ -108,7 +108,7 @@ def test_direct_call_chunks(self): test_table = read_csv("tests/data/test_table.csv") t_iter = test_table.iter_chunk_dict(chunk_size=10) for d in t_iter: - self.assertEqual(len(d), len(test_table.columns)) + self.assertEqual(len(d), len(test_table.definition)) for col in test_table.columns: self.assertIn(col.name, d) self.assertEqual(d[col.name].dtype, col.data_type.np_type) @@ -159,7 +159,7 @@ def test_direct_call_rows(self): test_table = read_csv("tests/data/test_table.csv") t_iter = test_table.iter_dict() for d in t_iter: - self.assertEqual(len(d), len(test_table.columns)) + self.assertEqual(len(d), len(test_table.definition)) for col in test_table.columns: self.assertIn(col.name, d) self.assertTrue(np.can_cast(col.data_type.np_type, np.dtype(type(d[col.name])))) @@ -232,7 +232,7 @@ class CustomClass: with self.subTest("Chunks"): for d in test_table.iter_chunk_dict(chunk_size=10): - self.assertEqual(len(d), len(test_table.columns)) + self.assertEqual(len(d), len(test_table.definition)) for col in test_table.columns: self.assertIn(col.name, d) self.assertEqual(dtypes.from_np_dtype(d[col.name].dtype).np_type, col.data_type.np_type) @@ -240,7 +240,7 @@ class CustomClass: with self.subTest("Rows"): for d in test_table.iter_dict(): - self.assertEqual(len(d), len(test_table.columns)) + self.assertEqual(len(d), len(test_table.definition)) for col in test_table.columns: self.assertIn(col.name, d) v_type = type(d[col.name]) @@ -258,7 +258,7 @@ def test_iteration_in_chunks_tuple(self): test_table = read_csv("tests/data/test_table.csv") total_read_size = 0 for d in test_table.iter_chunk_tuple(chunk_size=10): - self.assertEqual(len(d), len(test_table.columns)) + self.assertEqual(len(d), len(test_table.definition)) for i, col in enumerate(test_table.columns): self.assertEqual(col.name, d._fields[i]) self.assertEqual(d[i].dtype, col.data_type.np_type) @@ -272,7 +272,7 @@ def test_iteration_in_chunks_tuple(self): test_table.await_update() total_read_size = 0 for d in test_table.iter_chunk_tuple(chunk_size=100): - self.assertEqual(len(d), len(test_table.columns)) + self.assertEqual(len(d), len(test_table.definition)) for i, col in enumerate(test_table.columns): self.assertEqual(col.name, d._fields[i]) self.assertEqual(d[i].dtype, col.data_type.np_type) @@ -301,7 +301,7 @@ def test_iteration_in_rows_tuple(self): test_table = read_csv("tests/data/test_table.csv") total_read_size = 0 for d in test_table.iter_tuple(): - self.assertEqual(len(d), len(test_table.columns)) + self.assertEqual(len(d), len(test_table.definition)) for i, col in enumerate(test_table.columns): self.assertEqual(col.name, d._fields[i]) self.assertTrue(np.can_cast(col.data_type.np_type, np.dtype(type(d[i])))) @@ -313,7 +313,7 @@ def test_iteration_in_rows_tuple(self): test_table.await_update() total_read_size = 0 for d in test_table.iter_tuple(): - self.assertEqual(len(d), len(test_table.columns)) + self.assertEqual(len(d), len(test_table.definition)) for i, col in enumerate(test_table.columns): self.assertEqual(col.name, d._fields[i]) v_type = type(d[i]) diff --git a/py/server/tests/test_table_listener.py b/py/server/tests/test_table_listener.py index db570b77414..48915a9c277 100644 --- a/py/server/tests/test_table_listener.py +++ b/py/server/tests/test_table_listener.py @@ -104,7 +104,7 @@ def verify_data_changes(self, changes, cols: Union[str, List[str]]): for change in changes: self.assertTrue(isinstance(change, dict)) if not cols: - cols = [col.name for col in self.test_table.columns] + cols = self.test_table.column_names for col in cols: self.assertIn(col, change.keys()) self.assertTrue(isinstance(change[col], numpy.ndarray)) @@ -274,8 +274,7 @@ def test_listener_func_with_deps(self): ] t = new_table(cols=cols) self.assertEqual(t.size, 2) - col_defs = {c.name: c.data_type for c in t.columns} - dep_table = input_table(col_defs=col_defs) + dep_table = input_table(col_defs=t.definition) def listener_func(update, is_replay): table_update_recorder.record(update, is_replay) diff --git a/py/server/tests/test_updateby.py b/py/server/tests/test_updateby.py index e4ecbc2aae2..e58ce542539 100644 --- a/py/server/tests/test_updateby.py +++ b/py/server/tests/test_updateby.py @@ -177,7 +177,7 @@ def test_em(self): for t in (self.static_table, self.ticking_table): rt = t.update_by(ops=op, by="b") self.assertTrue(rt.is_refreshing is t.is_refreshing) - self.assertEqual(len(rt.columns), 1 + len(t.columns)) + self.assertEqual(len(rt.definition), 1 + len(t.definition)) with update_graph.exclusive_lock(self.test_update_graph): self.assertEqual(rt.size, t.size) @@ -192,7 +192,7 @@ def test_em_proxy(self): rt_proxy = pt_proxy.update_by(op, by="e") for ct, rct in zip(pt_proxy.target.constituent_tables, rt_proxy.target.constituent_tables): self.assertTrue(rct.is_refreshing is ct.is_refreshing) - self.assertEqual(len(rct.columns), 1 + len(ct.columns)) + self.assertEqual(len(rct.definition), 1 + len(ct.definition)) with update_graph.exclusive_lock(self.test_update_graph): self.assertEqual(ct.size, rct.size) @@ -202,7 +202,7 @@ def test_simple_ops(self): for t in (self.static_table, self.ticking_table): rt = t.update_by(ops=op, by="e") self.assertTrue(rt.is_refreshing is t.is_refreshing) - self.assertEqual(len(rt.columns), 2 + len(t.columns)) + self.assertEqual(len(rt.definition), 2 + len(t.definition)) with update_graph.exclusive_lock(self.test_update_graph): self.assertEqual(rt.size, t.size) @@ -230,7 +230,7 @@ def test_rolling_ops(self): for t in (self.static_table, self.ticking_table): rt = t.update_by(ops=op, by="c") self.assertTrue(rt.is_refreshing is t.is_refreshing) - self.assertEqual(len(rt.columns), 2 + len(t.columns)) + self.assertEqual(len(rt.definition), 2 + len(t.definition)) with update_graph.exclusive_lock(self.test_update_graph): self.assertEqual(rt.size, t.size) @@ -245,7 +245,7 @@ def test_rolling_ops_proxy(self): rt_proxy = pt_proxy.update_by(op, by="c") for ct, rct in zip(pt_proxy.target.constituent_tables, rt_proxy.target.constituent_tables): self.assertTrue(rct.is_refreshing is ct.is_refreshing) - self.assertEqual(len(rct.columns), 2 + len(ct.columns)) + self.assertEqual(len(rct.definition), 2 + len(ct.definition)) with update_graph.exclusive_lock(self.test_update_graph): self.assertEqual(ct.size, rct.size) @@ -260,7 +260,7 @@ def test_multiple_ops(self): for t in (self.static_table, self.ticking_table): rt = t.update_by(ops=multiple_ops, by="c") self.assertTrue(rt.is_refreshing is t.is_refreshing) - self.assertEqual(len(rt.columns), 10 + len(t.columns)) + self.assertEqual(len(rt.definition), 10 + len(t.definition)) with update_graph.exclusive_lock(self.test_update_graph): self.assertEqual(rt.size, t.size) diff --git a/py/server/tests/test_vectorization.py b/py/server/tests/test_vectorization.py index ebac32aff93..ab227d02cc5 100644 --- a/py/server/tests/test_vectorization.py +++ b/py/server/tests/test_vectorization.py @@ -234,7 +234,7 @@ def my_sum(*args): source = new_table([int_col(c, [0, 1, 2, 3, 4, 5, 6]) for c in cols]) result = source.update(f"X = my_sum({','.join(cols)})") - self.assertEqual(len(cols) + 1, len(result.columns)) + self.assertEqual(len(cols) + 1, len(result.definition)) self.assertEqual(_udf.vectorized_count, 0) def test_enclosed_by_parentheses(self): From e919d5223ff807d079a32ea1fbd16b7fc7e42200 Mon Sep 17 00:00:00 2001 From: Jianfeng Mao <4297243+jmao-denver@users.noreply.github.com> Date: Thu, 15 Aug 2024 17:07:31 -0600 Subject: [PATCH 27/43] feat: Support on-error callback for Py listeners (#5929) Fixes #5809 --------- Co-authored-by: Ryan Caudy --- Integrations/build.gradle | 1 + .../python/PythonMergedListenerAdapter.java | 39 ++- .../python/PythonReplayListenerAdapter.java | 58 +++- .../impl/InstrumentedTableListenerBase.java | 2 +- .../engine/table/impl/MergedListener.java | 12 + py/server/deephaven/table_listener.py | 111 +++++-- py/server/tests/test_table_listener.py | 282 ++++++++++++++++++ 7 files changed, 468 insertions(+), 37 deletions(-) diff --git a/Integrations/build.gradle b/Integrations/build.gradle index 99abec47535..ac16784d585 100644 --- a/Integrations/build.gradle +++ b/Integrations/build.gradle @@ -15,6 +15,7 @@ dependencies { implementation project(':plugin') implementation project(':Configuration') implementation project(':log-factory') + implementation libs.commons.lang3 testImplementation project(':engine-test-utils') testImplementation project(path: ':Base', configuration: 'tests') diff --git a/Integrations/src/main/java/io/deephaven/integrations/python/PythonMergedListenerAdapter.java b/Integrations/src/main/java/io/deephaven/integrations/python/PythonMergedListenerAdapter.java index 215aec0b81a..e498b60cf70 100644 --- a/Integrations/src/main/java/io/deephaven/integrations/python/PythonMergedListenerAdapter.java +++ b/Integrations/src/main/java/io/deephaven/integrations/python/PythonMergedListenerAdapter.java @@ -7,14 +7,18 @@ import io.deephaven.engine.rowset.RowSetFactory; import io.deephaven.engine.rowset.RowSetShiftData; import io.deephaven.engine.table.ModifiedColumnSet; +import io.deephaven.engine.table.TableListener; import io.deephaven.engine.table.TableUpdate; import io.deephaven.engine.table.impl.ListenerRecorder; import io.deephaven.engine.table.impl.MergedListener; import io.deephaven.engine.table.impl.TableUpdateImpl; import io.deephaven.engine.updategraph.NotificationQueue; import io.deephaven.engine.updategraph.UpdateGraph; +import io.deephaven.internal.log.LoggerFactory; +import io.deephaven.io.logger.Logger; import io.deephaven.util.SafeCloseable; import io.deephaven.util.annotations.ScriptApi; +import org.apache.commons.lang3.exception.ExceptionUtils; import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; import org.jpy.PyObject; @@ -33,7 +37,10 @@ */ @ScriptApi public class PythonMergedListenerAdapter extends MergedListener { - private final PyObject pyCallable; + private static final Logger log = LoggerFactory.getLogger(PythonMergedListenerAdapter.class); + + private final PyObject pyListenerCallable; + private final PyObject pyOnFailureCallback; /** * Create a Python merged listener. @@ -42,23 +49,26 @@ public class PythonMergedListenerAdapter extends MergedListener { * @param dependencies The tables that must be satisfied before this listener is executed. * @param listenerDescription A description for the UpdatePerformanceTracker to append to its entry description, may * be null. - * @param pyObjectIn Python listener object. + * @param pyListener Python listener object. */ private PythonMergedListenerAdapter( @NotNull ListenerRecorder[] recorders, @Nullable NotificationQueue.Dependency[] dependencies, @Nullable String listenerDescription, - @NotNull PyObject pyObjectIn) { + @NotNull PyObject pyListener, + @NotNull PyObject pyOnFailureCallback) { super(Arrays.asList(recorders), Arrays.asList(dependencies), listenerDescription, null); Arrays.stream(recorders).forEach(rec -> rec.setMergedListener(this)); - this.pyCallable = PythonUtils.pyMergeListenerFunc(pyObjectIn); + this.pyListenerCallable = PythonUtils.pyMergeListenerFunc(Objects.requireNonNull(pyListener)); + this.pyOnFailureCallback = Objects.requireNonNull(pyOnFailureCallback); } public static PythonMergedListenerAdapter create( @NotNull ListenerRecorder[] recorders, @Nullable NotificationQueue.Dependency[] dependencies, @Nullable String listenerDescription, - @NotNull PyObject pyObjectIn) { + @NotNull PyObject pyListener, + @NotNull PyObject pyOnFailureCallback) { if (recorders.length < 2) { throw new IllegalArgumentException("At least two listener recorders must be provided"); } @@ -71,7 +81,8 @@ public static PythonMergedListenerAdapter create( final UpdateGraph updateGraph = allItems[0].getUpdateGraph(allItems); try (final SafeCloseable ignored = ExecutionContext.getContext().withUpdateGraph(updateGraph).open()) { - return new PythonMergedListenerAdapter(recorders, dependencies, listenerDescription, pyObjectIn); + return new PythonMergedListenerAdapter(recorders, dependencies, listenerDescription, pyListener, + pyOnFailureCallback); } } @@ -91,6 +102,20 @@ public ArrayList currentRowsAsUpdates() { @Override protected void process() { - pyCallable.call("__call__"); + pyListenerCallable.call("__call__"); + } + + @Override + protected void propagateErrorDownstream(boolean fromProcess, @NotNull Throwable error, + TableListener.@Nullable Entry entry) { + if (!pyOnFailureCallback.isNone()) { + try { + pyOnFailureCallback.call("__call__", ExceptionUtils.getStackTrace(error)); + } catch (Exception e2) { + // If the Python onFailure callback fails, log the new exception + // and continue with the original exception. + log.error().append("Python on_error callback failed: ").append(e2).endl(); + } + } } } diff --git a/Integrations/src/main/java/io/deephaven/integrations/python/PythonReplayListenerAdapter.java b/Integrations/src/main/java/io/deephaven/integrations/python/PythonReplayListenerAdapter.java index b6eae8bcda5..e713044047a 100644 --- a/Integrations/src/main/java/io/deephaven/integrations/python/PythonReplayListenerAdapter.java +++ b/Integrations/src/main/java/io/deephaven/integrations/python/PythonReplayListenerAdapter.java @@ -3,6 +3,7 @@ // package io.deephaven.integrations.python; +import org.apache.commons.lang3.exception.ExceptionUtils; import io.deephaven.engine.context.ExecutionContext; import io.deephaven.engine.table.Table; import io.deephaven.engine.table.TableUpdate; @@ -14,12 +15,16 @@ import io.deephaven.engine.rowset.RowSetShiftData; import io.deephaven.engine.updategraph.NotificationQueue; import io.deephaven.engine.updategraph.UpdateGraph; +import io.deephaven.internal.log.LoggerFactory; +import io.deephaven.io.logger.Logger; import io.deephaven.util.SafeCloseable; import io.deephaven.util.annotations.ScriptApi; +import org.jetbrains.annotations.NotNull; import org.jpy.PyObject; import javax.annotation.Nullable; import java.util.Arrays; +import java.util.Objects; /** @@ -33,7 +38,10 @@ public class PythonReplayListenerAdapter extends InstrumentedTableUpdateListenerAdapter implements TableSnapshotReplayer { private static final long serialVersionUID = -8882402061960621245L; - private final PyObject pyCallable; + private static final Logger log = LoggerFactory.getLogger(PythonReplayListenerAdapter.class); + + private final PyObject pyListenerCallable; + private final PyObject pyOnFailureCallback; private final NotificationQueue.Dependency[] dependencies; /** @@ -43,22 +51,34 @@ public class PythonReplayListenerAdapter extends InstrumentedTableUpdateListener * null. * @param source The source table to which this listener will subscribe. * @param retain Whether a hard reference to this listener should be maintained to prevent it from being collected. - * @param pyObjectIn Python listener object. + * @param pyListener Python listener object. * @param dependencies The tables that must be satisfied before this listener is executed. */ - public static PythonReplayListenerAdapter create(@Nullable String description, Table source, boolean retain, - PyObject pyObjectIn, NotificationQueue.Dependency... dependencies) { + public static PythonReplayListenerAdapter create( + @Nullable String description, + @NotNull Table source, + boolean retain, + @NotNull PyObject pyListener, + @NotNull PyObject pyOnFailureCallback, + @Nullable NotificationQueue.Dependency... dependencies) { final UpdateGraph updateGraph = source.getUpdateGraph(dependencies); try (final SafeCloseable ignored = ExecutionContext.getContext().withUpdateGraph(updateGraph).open()) { - return new PythonReplayListenerAdapter(description, source, retain, pyObjectIn, dependencies); + return new PythonReplayListenerAdapter(description, source, retain, pyListener, pyOnFailureCallback, + dependencies); } } - private PythonReplayListenerAdapter(@Nullable String description, Table source, boolean retain, PyObject pyObjectIn, - NotificationQueue.Dependency... dependencies) { + private PythonReplayListenerAdapter( + @Nullable String description, + @NotNull Table source, + boolean retain, + @NotNull PyObject pyListener, + @NotNull PyObject pyOnFailureCallback, + @Nullable NotificationQueue.Dependency... dependencies) { super(description, source, retain); this.dependencies = dependencies; - this.pyCallable = PythonUtils.pyListenerFunc(pyObjectIn); + this.pyListenerCallable = PythonUtils.pyListenerFunc(Objects.requireNonNull(pyListener)); + this.pyOnFailureCallback = Objects.requireNonNull(pyOnFailureCallback); } @Override @@ -69,13 +89,27 @@ public void replay() { final TableUpdate update = new TableUpdateImpl(source.getRowSet(), emptyRowSet, emptyRowSet, emptyShift, emptyColumnSet); final boolean isReplay = true; - pyCallable.call("__call__", update, isReplay); + pyListenerCallable.call("__call__", update, isReplay); } @Override public void onUpdate(final TableUpdate update) { final boolean isReplay = false; - pyCallable.call("__call__", update, isReplay); + pyListenerCallable.call("__call__", update, isReplay); + } + + @Override + public void onFailureInternal(Throwable originalException, Entry sourceEntry) { + if (!pyOnFailureCallback.isNone()) { + try { + pyOnFailureCallback.call("__call__", ExceptionUtils.getStackTrace(originalException)); + } catch (Throwable e) { + // If the Python onFailure callback fails, log the new exception + // and continue with the original exception. + log.error().append("Python on_error callback failed: ").append(e).endl(); + } + } + super.onFailureInternal(originalException, sourceEntry); } @Override @@ -83,4 +117,8 @@ public boolean canExecute(final long step) { return super.canExecute(step) && (dependencies.length == 0 || Arrays.stream(dependencies).allMatch(t -> t.satisfied(step))); } + + public boolean isFailed() { + return failed; + } } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/InstrumentedTableListenerBase.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/InstrumentedTableListenerBase.java index acdf3605ac6..c14d5eaf20f 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/InstrumentedTableListenerBase.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/InstrumentedTableListenerBase.java @@ -50,7 +50,7 @@ public abstract class InstrumentedTableListenerBase extends LivenessArtifact private final PerformanceEntry entry; private final boolean terminalListener; - private boolean failed = false; + protected boolean failed = false; private static volatile boolean verboseLogging = Configuration .getInstance() .getBooleanWithDefault("InstrumentedTableListenerBase.verboseLogging", false); diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/MergedListener.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/MergedListener.java index 3a5b39e898e..6b25ddfbce0 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/MergedListener.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/MergedListener.java @@ -57,6 +57,9 @@ public abstract class MergedListener extends LivenessArtifact implements Notific protected final PerformanceEntry entry; private final String logPrefix; + private boolean failed; + + @SuppressWarnings("FieldMayBeFinal") private volatile long lastCompletedStep = NotificationStepReceiver.NULL_NOTIFICATION_STEP; private volatile long lastEnqueuedStep = NotificationStepReceiver.NULL_NOTIFICATION_STEP; @@ -96,6 +99,10 @@ protected Iterable getRecorders() { return recorders; } + public boolean isFailed() { + return failed; + } + public final void notifyOnUpstreamError( @NotNull final Throwable upstreamError, @Nullable final TableListener.Entry errorSourceEntry) { notifyInternal(upstreamError, errorSourceEntry); @@ -107,6 +114,10 @@ public void notifyChanges() { private void notifyInternal(@Nullable final Throwable upstreamError, @Nullable final TableListener.Entry errorSourceEntry) { + if (failed) { + return; + } + final long currentStep = getUpdateGraph().clock().currentStep(); synchronized (this) { @@ -150,6 +161,7 @@ protected void propagateError( final boolean uncaughtExceptionFromProcess, @NotNull final Throwable error, @Nullable final TableListener.Entry entry) { + failed = true; forceReferenceCountToZero(); propagateErrorDownstream(uncaughtExceptionFromProcess, error, entry); try { diff --git a/py/server/deephaven/table_listener.py b/py/server/deephaven/table_listener.py index b8d406673d7..1ad4f809eab 100644 --- a/py/server/deephaven/table_listener.py +++ b/py/server/deephaven/table_listener.py @@ -6,7 +6,7 @@ from abc import ABC, abstractmethod from functools import wraps from inspect import signature -from typing import Callable, Union, List, Generator, Dict, Literal, Sequence, Optional +from typing import Callable, Union, List, Generator, Dict, Sequence, Optional import jpy import numpy @@ -17,13 +17,13 @@ from deephaven.jcompat import to_sequence, j_list_to_list from deephaven.table import Table from deephaven._table_reader import _table_reader_all_dict, _table_reader_chunk_dict -from deephaven.update_graph import UpdateGraph _JPythonReplayListenerAdapter = jpy.get_type("io.deephaven.integrations.python.PythonReplayListenerAdapter") _JTableUpdate = jpy.get_type("io.deephaven.engine.table.TableUpdate") _JListenerRecorder = jpy.get_type("io.deephaven.engine.table.impl.ListenerRecorder") _JPythonMergedListenerAdapter = jpy.get_type("io.deephaven.integrations.python.PythonMergedListenerAdapter") + class TableUpdate(JObjectWrapper): """A TableUpdate object represents a table update event. It contains the added, removed, and modified rows in the table. """ @@ -188,13 +188,25 @@ def modified_columns(self) -> List[str]: class TableListener(ABC): - """An abstract table listener class that should be subclassed by any user table listener class.""" + """An abstract table listener class that should be subclassed by any user table listener class. It provides a + default implementation for the on_error method that simply prints out the error.""" @abstractmethod def on_update(self, update: TableUpdate, is_replay: bool) -> None: """The required method on a listener object that receives table updates.""" ... + def on_error(self, e: Exception) -> None: + """The callback method on a listener object that handles the received error. The default implementation simply prints the error. + + Args: + e (Exception): the exception that occurred during the listener's execution. + """ + print(f"An error occurred during listener execution: {self}, {e}") + + +def _default_on_error(e: Exception) -> None: + print(f"An error occurred during listener execution: {e}") def _listener_wrapper(table: Table): """A decorator to wrap a user listener function or on_update method to receive the numpy-converted Table updates. @@ -229,17 +241,25 @@ def _wrap_listener_obj(t: Table, listener: TableListener): return listener +def _error_callback_wrapper(callback: Callable[[Exception], None]): + @wraps(callback) + def wrapper(e): + callback(RuntimeError(e)) + + return wrapper + class TableListenerHandle(JObjectWrapper): """A handle to manage a table listener's lifecycle.""" j_object_type = _JPythonReplayListenerAdapter def __init__(self, t: Table, listener: Union[Callable[[TableUpdate, bool], None], TableListener], description: str = None, - dependencies: Union[Table, Sequence[Table]] = None): + dependencies: Union[Table, Sequence[Table]] = None, on_error: Callable[[Exception], None] = None): """Creates a new table listener handle with dependencies. Table change events are processed by 'listener', which can be either (1) a callable (e.g. function) or - (2) an instance of TableListener type which provides an "on_update" method. + (2) an instance of a TableListener subclass that must override the abstract "on_update" method, and optionally + override the default "on_error" method. The callable or the on_update method must have the following signatures. * (update: TableUpdate, is_replay: bool): support replaying the initial table snapshot and normal table updates @@ -265,6 +285,13 @@ def __init__(self, t: Table, listener: Union[Callable[[TableUpdate, bool], None] the listener is safe, it is not recommended because reading or operating on the result tables of those operations may not be safe. It is best to perform the operations on the dependent tables beforehand, and then add the result tables as dependencies to the listener so that they can be safely read in it. + on_error (Callable[[Exception], None]): a callback function to be invoked when an error occurs during the + listener's execution. It should only be set when the listener is a function, not when it is an instance + of TableListener. Defaults to None. When None, a default callback function will be provided that simply + prints out the received exception. If the callback function itself raises an exception, the new exception + will be logged in the Deephaven server log and will not be further processed by the server. + + Raises: DHError @@ -277,14 +304,23 @@ def __init__(self, t: Table, listener: Union[Callable[[TableUpdate, bool], None] self.dependencies = to_sequence(dependencies) if isinstance(listener, TableListener): + if on_error: + raise DHError(message="Invalid on_error argument for listeners of TableListener type which already have an on_error method.") self.listener_wrapped = _wrap_listener_obj(t, listener) + on_error_callback = _error_callback_wrapper(listener.on_error) elif callable(listener): self.listener_wrapped = _wrap_listener_func(t, listener) + if on_error: + on_error_callback = _error_callback_wrapper(on_error) + else: + on_error_callback = _error_callback_wrapper(_default_on_error) else: raise DHError(message="listener is neither callable nor TableListener object") try: - self.listener_adapter = _JPythonReplayListenerAdapter.create(description, t.j_table, False, self.listener_wrapped, self.dependencies) + self.listener_adapter = _JPythonReplayListenerAdapter.create(description, t.j_table, False, + self.listener_wrapped, on_error_callback, + self.dependencies) except Exception as e: raise DHError(e, "failed to create a table listener.") from e self.started = False @@ -326,7 +362,7 @@ def stop(self) -> None: def listen(t: Table, listener: Union[Callable[[TableUpdate, bool], None], TableListener], description: str = None, do_replay: bool = False, - dependencies: Union[Table, Sequence[Table]] = None) -> TableListenerHandle: + dependencies: Union[Table, Sequence[Table]] = None, on_error: Callable[[Exception], None] = None) -> TableListenerHandle: """This is a convenience function that creates a TableListenerHandle object and immediately starts it to listen for table updates. @@ -352,6 +388,12 @@ def listen(t: Table, listener: Union[Callable[[TableUpdate, bool], None], TableL the listener is safe, it is not recommended because reading or operating on the result tables of those operations may not be safe. It is best to perform the operations on the dependent tables beforehand, and then add the result tables as dependencies to the listener so that they can be safely read in it. + on_error (Callable[[Exception], None]): a callback function to be invoked when an error occurs during the + listener's execution. It should only be set when the listener is a function, not when it is an instance + of TableListener. Defaults to None. When None, a default callback function will be provided that simply + prints out the received exception. If the callback function itself raises an exception, the new exception + will be logged in the Deephaven server log and will not be further processed by the server. + Returns: a TableListenerHandle @@ -359,8 +401,8 @@ def listen(t: Table, listener: Union[Callable[[TableUpdate, bool], None], TableL Raises: DHError """ - table_listener_handle = TableListenerHandle(t=t, dependencies=dependencies, listener=listener, - description=description) + table_listener_handle = TableListenerHandle(t=t, listener=listener, description=description, + dependencies=dependencies, on_error=on_error) table_listener_handle.start(do_replay=do_replay) return table_listener_handle @@ -394,7 +436,8 @@ def table_update(self) -> Optional[TableUpdate]: class MergedListener(ABC): - """An abstract multi-table listener class that should be subclassed by any user multi-table listener class.""" + """An abstract multi-table listener class that should be subclassed by any user multi-table listener class. It + provides a default implementation for the on_error method that simply prints out the error.""" @abstractmethod def on_update(self, updates: Dict[Table, TableUpdate], is_replay: bool) -> None: @@ -403,6 +446,14 @@ def on_update(self, updates: Dict[Table, TableUpdate], is_replay: bool) -> None: """ ... + def on_error(self, e: Exception) -> None: + """ The callback method on a listener object that handles the received error. The default implementation simply prints the error. + + Args: + e (Exception): the exception that occurred during the listener's execution. + """ + print(f"An error occurred during listener execution: {self}, {e}") + class MergedListenerHandle(JObjectWrapper): """A handle to manage a merged listener's lifecycle.""" @@ -413,12 +464,14 @@ def j_object(self) -> jpy.JType: return self.merged_listener_adapter def __init__(self, tables: Sequence[Table], listener: Union[Callable[[Dict[Table, TableUpdate], bool], None], MergedListener], - description: str = None, dependencies: Union[Table, Sequence[Table]] = None): + description: str = None, dependencies: Union[Table, Sequence[Table]] = None, on_error: Callable[[Exception], None] = None): """Creates a new MergedListenerHandle with the provided listener recorders and dependencies. Table change events are processed by 'listener', which can be either (1) a callable (e.g. function) or - (2) an instance of MergedListener type which provides an "on_update" method. + (2) an instance of a MergedListener subclass that must override the abstract "on_update" method, and optionally + override the default "on_error" method. + The callable or the on_update method must have the following signature. *(updates: Dict[Table, TableUpdate], is_replay: bool): support replaying the initial table snapshots and normal table updates The 'updates' parameter is a dictionary of Table to TableUpdate; @@ -444,6 +497,12 @@ def __init__(self, tables: Sequence[Table], listener: Union[Callable[[Dict[Table the listener is safe, it is not recommended because reading or operating on the result tables of those operations may not be safe. It is best to perform the operations on the dependent tables beforehand, and then add the result tables as dependencies to the listener so that they can be safely read in it. + on_error (Callable[[Exception], None]): a callback function to be invoked when an error occurs during the + listener's execution. It should only be set when the listener is a function, not when it is an instance + of MergedListener. Defaults to None. When None, a default callback function will be provided that simply + prints out the received exception. If the callback function itself raises an exception, the new exception + will be logged in the Deephaven server log and will not be further processed by the server. + Raises: DHError @@ -457,20 +516,30 @@ def __init__(self, tables: Sequence[Table], listener: Union[Callable[[Dict[Table self.dependencies = dependencies if isinstance(listener, MergedListener): + if on_error: + raise DHError(message="Invalid on_error argument for listeners of MergedListener type which already have an on_error method.") self.listener = listener.on_update - else: + on_error_callback = _error_callback_wrapper(listener.on_error) + elif callable(listener): self.listener = listener + if on_error: + on_error_callback = _error_callback_wrapper(on_error) + else: + on_error_callback = _error_callback_wrapper(_default_on_error) + else: + raise DHError(message="listener is neither callable nor MergedListener object") + n_params = len(signature(self.listener).parameters) if n_params != 2: raise ValueError("merged listener function must have 2 parameters (updates, is_replay).") - try: self.merged_listener_adapter = _JPythonMergedListenerAdapter.create( to_sequence(self.listener_recorders), to_sequence(self.dependencies), description, - self) + self, + on_error_callback) self.started = False except Exception as e: raise DHError(e, "failed to create a merged listener adapter.") from e @@ -512,7 +581,6 @@ def start(self, do_replay: bool = False) -> None: self.started = True - def stop(self) -> None: """Stop the listener.""" if not self.started: @@ -527,8 +595,8 @@ def stop(self) -> None: def merged_listen(tables: Sequence[Table], listener: Union[Callable[[Dict[Table, TableUpdate]], None], MergedListener], - do_replay: bool = False, description: str = None, dependencies: Union[Table, Sequence[Table]] = None)\ - -> MergedListenerHandle: + do_replay: bool = False, description: str = None, dependencies: Union[Table, Sequence[Table]] = None, + on_error: Callable[[Exception], None] = None) -> MergedListenerHandle: """This is a convenience function that creates a MergedListenerHandle object and immediately starts it to listen for table updates. @@ -555,8 +623,13 @@ def merged_listen(tables: Sequence[Table], listener: Union[Callable[[Dict[Table, the listener is safe, it is not recommended because reading or operating on the result tables of those operations may not be safe. It is best to perform the operations on the dependent tables beforehand, and then add the result tables as dependencies to the listener so that they can be safely read in it. + on_error (Callable[[Exception], None]): a callback function to be invoked when an error occurs during the + listener's execution. It should only be set when the listener is a function, not when it is an instance + of MergedListener. Defaults to None. When None, a default callback function will be provided that simply + prints out the received exception. If the callback function itself raises an exception, the new exception + will be logged in the Deephaven server log and will not be further processed by the server. """ merged_listener_handle = MergedListenerHandle(tables=tables, listener=listener, - description=description, dependencies=dependencies) + description=description, dependencies=dependencies, on_error=on_error) merged_listener_handle.start(do_replay=do_replay) return merged_listener_handle diff --git a/py/server/tests/test_table_listener.py b/py/server/tests/test_table_listener.py index 48915a9c277..e0e6cadb64e 100644 --- a/py/server/tests/test_table_listener.py +++ b/py/server/tests/test_table_listener.py @@ -22,6 +22,7 @@ _JColumnVectors = jpy.get_type("io.deephaven.engine.table.vectors.ColumnVectors") + class TableUpdateRecorder: def __init__(self, table: Optional[Table] = None, chunk_size: int = None, cols: Union[str, List[str]] = None): self.table = table @@ -266,6 +267,8 @@ def on_update(self, update, is_replay): has_added=True, has_removed=True, has_modified=False) self.assertTrue(all([len(ja) > 0 for ja in j_arrays])) + dep_table = dep_table_2 = None + def test_listener_func_with_deps(self): cols = [ @@ -362,6 +365,8 @@ def on_update(self, updates: Dict[Table, TableUpdate], is_replay: bool) -> None: mlh.stop() self.assertGreaterEqual(len(tur.replays), 6) + t1 = t2 = t3 = None + def test_merged_listener_func(self): t1 = time_table("PT1s").update(["X=i % 11"]) t2 = time_table("PT2s").update(["Y=i % 8"]) @@ -392,6 +397,8 @@ def test_ml_func(updates: Dict[Table, TableUpdate], is_replay: bool) -> None: mlh.stop() self.assertGreaterEqual(len(tur.replays), 6) + t1 = t2 = t3 = None + def test_merged_listener_with_deps(self): t1 = time_table("PT1s").update(["X=i % 11"]) t2 = time_table("PT2s").update(["Y=i % 8"]) @@ -422,6 +429,8 @@ def on_update(self, updates: Dict[Table, TableUpdate], is_replay: bool) -> None: self.assertGreaterEqual(len(tur.replays), 6) self.assertTrue(len(j_arrays) > 0 and all([len(ja) > 0 for ja in j_arrays])) + t1 = t2 = t3 = None + def test_merged_listener_error(self): t1 = time_table("PT1s").update(["X=i % 11"]) @@ -437,6 +446,8 @@ def test_ml_func(updates: Dict[Table, TableUpdate]) -> None: mlh = merged_listen([t1, et], test_ml_func) self.assertIn("must be a refreshing table", str(cm.exception)) + t1 = et = None + def test_merged_listener_replay(self): t1 = time_table("PT1s").update(["X=i % 11"]) t2 = time_table("PT2s").update(["Y=i % 8"]) @@ -477,6 +488,277 @@ def test_ml_func(updates: Dict[Table, TableUpdate], is_replay: bool) -> None: self.assertGreaterEqual(len(tur.replays), 6) self.assertEqual(tur.replays.count(True), 2) + t1 = t2 = t3 = None + + def test_on_error_listener_func(self): + t = time_table("PT1S").update("X = i") + with self.subTest("Bad Listener Good Error Callback"): + def bad_listner_func(table_udpate, is_replay: bool) -> None: + raise ValueError("invalid value") + + def on_error(e: Exception) -> None: + nonlocal error_caught + error_caught = True + self.assertIn("invalid value", str(e)) + + error_caught = False + tlh = listen(t, bad_listner_func, on_error=on_error) + t.await_update() + self.assertTrue(error_caught) + self.assertTrue(tlh.j_object.isFailed()) + + with self.subTest("Good Listener Good Error Callback"): + def good_listner_func(table_udpate, is_replay: bool) -> None: + pass + + error_caught = False + tlh = listen(t, good_listner_func, on_error=on_error) + t.await_update() + self.assertFalse(error_caught) + self.assertFalse(tlh.j_object.isFailed()) + + with self.subTest("Bad Listener Bad Error Callback"): + error_caught: bool = False + + def bad_listner_func(table_udpate, is_replay: bool) -> None: + raise ValueError("invalid value") + + def on_error(e: Exception) -> None: + nonlocal error_caught + error_caught = True + self.assertIn("invalid value", str(e)) + raise ValueError("reraise the exception") from e + + tlh = listen(t, bad_listner_func, on_error=on_error) + t.await_update() + self.assertTrue(error_caught) + self.assertTrue(tlh.j_object.isFailed()) + + t = None + + def test_on_error_listener_obj(self): + test_self = self + t = time_table("PT1S").update("X = i") + + with self.subTest("Bad Listener Good Error Callback"): + class BadListener(TableListener): + def on_update(self, update: TableUpdate, is_replay: bool) -> None: + raise ValueError("invalid value") + + def on_error(self, e: Exception) -> None: + nonlocal error_caught + error_caught = True + test_self.assertIn("invalid value", str(e)) + + error_caught = False + bad_listener_obj = BadListener() + tlh = listen(t, bad_listener_obj) + t.await_update() + self.assertTrue(error_caught) + self.assertTrue(tlh.j_object.isFailed()) + + with self.assertRaises(DHError): + def on_error(e: Exception) -> None: + ... + tlh = listen(t, bad_listener_obj, on_error=on_error) + + with self.subTest("Good Listener Good Error Callback"): + class GoodListener(TableListener): + def on_update(self, update: TableUpdate, is_replay: bool) -> None: + ... + + def on_error(self, e: Exception) -> None: + nonlocal error_caught + error_caught = True + test_self.assertIn("invalid value", str(e)) + + error_caught = False + good_listener_obj = GoodListener() + tlh = listen(t, good_listener_obj) + t.await_update() + self.assertFalse(error_caught) + self.assertFalse(tlh.j_object.isFailed()) + + with self.subTest("Bad Listener Bad Error Callback"): + class GoodListener(TableListener): + def on_update(self, update: TableUpdate, is_replay: bool) -> None: + raise ValueError("invalid value") + + def on_error(self, e: Exception) -> None: + nonlocal error_caught + error_caught = True + test_self.assertIn("invalid value", str(e)) + raise ValueError("reraise the exception") from e + + error_caught = False + + good_listener_obj = GoodListener() + tlh = listen(t, good_listener_obj) + t.await_update() + self.assertTrue(error_caught) + self.assertTrue(tlh.j_object.isFailed()) + + t = None + + def test_on_error_merged_listener_func(self): + t1 = time_table("PT1s").update(["X=i % 11"]) + t2 = time_table("PT2s").update(["Y=i % 8"]) + t3 = time_table("PT3s").update(["Z=i % 5"]) + + with self.subTest("Bad Listener Good Error Callback"): + def bad_listner_func(updates: Dict[Table, TableUpdate], is_replay: bool) -> None: + raise ValueError("invalid value") + + def on_error(e: Exception) -> None: + nonlocal error_caught + error_caught = True + self.assertIn("invalid value", str(e)) + + error_caught = False + mlh = merged_listen([t1, t2, t3], bad_listner_func, on_error=on_error) + t1.await_update() + self.assertTrue(error_caught) + self.assertTrue(mlh.j_object.isFailed()) + + with self.subTest("Good Listener Good Error Callback"): + def good_listner_func(updates: Dict[Table, TableUpdate], is_replay: bool) -> None: + pass + + error_caught = False + mlh = merged_listen([t1, t2, t3], good_listner_func, on_error=on_error) + t1.await_update() + self.assertFalse(error_caught) + self.assertFalse(mlh.j_object.isFailed()) + + with self.subTest("Bad Listener Bad Error Callback"): + def bad_listner_func(updates: Dict[Table, TableUpdate], is_replay: bool) -> None: + raise ValueError("invalid value") + + def bad_on_error(e: Exception) -> None: + nonlocal error_caught + error_caught = True + self.assertIn("invalid value", str(e)) + raise ValueError("reraise the exception") from e + + error_caught = False + mlh = merged_listen([t1, t2, t3], bad_listner_func, on_error=bad_on_error) + t1.await_update() + self.assertTrue(error_caught) + self.assertTrue(mlh.j_object.isFailed()) + + t1 = t2 = t3 = None + + def test_on_error_merged_listener_obj(self): + test_self = self + t1 = time_table("PT1s").update(["X=i % 11"]) + t2 = time_table("PT2s").update(["Y=i % 8"]) + t3 = time_table("PT3s").update(["Z=i % 5"]) + + with self.subTest("Bad Listener Good Error Callback"): + class BadListener(MergedListener): + def on_update(self, updates: Dict[Table, TableUpdate], is_replay: bool) -> None: + raise ValueError("invalid value") + + def on_error(self, e: Exception) -> None: + nonlocal error_caught + error_caught = True + test_self.assertIn("invalid value", str(e)) + + error_caught = False + bad_listener_obj = BadListener() + mlh = merged_listen([t1, t2, t3], bad_listener_obj) + t1.await_update() + self.assertTrue(error_caught) + self.assertTrue(mlh.j_object.isFailed()) + + with self.assertRaises(DHError): + def on_error(e: Exception) -> None: + ... + tlh = merged_listen([t1, t2, t3], bad_listener_obj, on_error=on_error) + + + with self.subTest("Good Listener Good Error Callback"): + class GoodListener(MergedListener): + def on_update(self, updates: Dict[Table, TableUpdate], is_replay: bool) -> None: + ... + + def on_error(self, e: Exception) -> None: + nonlocal error_caught + error_caught = True + test_self.assertIn("invalid value", str(e)) + + error_caught = False + good_listener_obj = GoodListener() + mlh = merged_listen([t1, t2, t3], good_listener_obj) + t1.await_update() + self.assertFalse(error_caught) + self.assertFalse(mlh.j_object.isFailed()) + + with self.subTest("Bad Listener Bad Error Callback"): + class BadListener(MergedListener): + def on_update(self, updates: Dict[Table, TableUpdate], is_replay: bool) -> None: + raise ValueError("invalid value") + + def on_error(self, e: Exception) -> None: + nonlocal error_caught + error_caught = True + test_self.assertIn("invalid value", str(e)) + raise ValueError("reraise the exception") from e + + error_caught = False + bad_listener_obj = BadListener() + mlh = merged_listen([t1, t2, t3], bad_listener_obj) + t1.await_update() + self.assertTrue(error_caught) + self.assertTrue(mlh.j_object.isFailed()) + + t1 = t2 = t3 = None + + def test_default_on_error(self): + t = time_table("PT1S").update("X = i") + + def bad_listner_func(table_udpate, is_replay: bool) -> None: + raise ValueError("invalid value") + + error_caught = False + tlh = listen(t, bad_listner_func) + t.await_update() + # the default on_error only logs the error + self.assertFalse(error_caught) + self.assertTrue(tlh.j_object.isFailed()) + + class BadListener(TableListener): + def on_update(self, update, is_replay): + raise ValueError("invalid value") + + tlh = listen(t, BadListener()) + t.await_update() + # the default on_error only logs the error + self.assertFalse(error_caught) + self.assertTrue(tlh.j_object.isFailed()) + + t2 = time_table("PT1S").update("X = i") + def bad_listner_func(updates: Dict[Table, TableUpdate], is_replay: bool) -> None: + raise ValueError("invalid value") + + mlh = merged_listen([t, t2], bad_listner_func) + t.await_update() + # the default on_error only logs the error + self.assertFalse(error_caught) + self.assertTrue(mlh.j_object.isFailed()) + + class BadListener(MergedListener): + def on_update(self, updates: Dict[Table, TableUpdate], is_replay: bool) -> None: + raise ValueError("invalid value") + + mlh = merged_listen([t, t2], BadListener()) + t.await_update() + # the default on_error only logs the error + self.assertFalse(error_caught) + self.assertTrue(mlh.j_object.isFailed()) + + t = t2 = None + if __name__ == "__main__": unittest.main() From 663fae6b5ebdfed05e63d8f6b8bd78a3e6561b25 Mon Sep 17 00:00:00 2001 From: Nate Bauernfeind Date: Thu, 15 Aug 2024 17:21:48 -0600 Subject: [PATCH 28/43] perf: Remove Unnecessary Recursion from Select/Update (#5924) This reorganizes how select/update builds the resulting formula columns, column source maps, and work to be processed on the PUG. Instead of using recursion, it now builds things up in a dynamic-programming style without redoing work. As a result, select/update operations with thousands of columns now use significantly less memory and cpu to perform the same work. A simple 50k select-column update operation initializes in 2.5m (source starts empty and this is on an m2 mac) and has cycle times of ~650ms. --- .../java/io/deephaven/util/SimpleTypeMap.java | 43 +- .../impl/QueryCompilerRequestProcessor.java | 47 +- .../engine/table/impl/QueryTable.java | 88 +- .../table/impl/SelectOrUpdateListener.java | 19 +- .../table/impl/ShiftedColumnsFactory.java | 4 +- .../table/impl/lang/QueryLanguageParser.java | 36 +- .../impl/select/AbstractConditionFilter.java | 2 +- .../impl/select/AbstractFormulaColumn.java | 38 +- .../table/impl/select/DhFormulaColumn.java | 21 +- .../engine/table/impl/select/MatchFilter.java | 3 +- .../engine/table/impl/select/RangeFilter.java | 2 +- .../impl/select/analyzers/BaseLayer.java | 97 -- .../select/analyzers/ConstantColumnLayer.java | 63 +- .../select/analyzers/DependencyLayerBase.java | 74 +- .../select/analyzers/PreserveColumnLayer.java | 76 +- .../select/analyzers/RedirectionLayer.java | 91 +- .../analyzers/SelectAndViewAnalyzer.java | 1013 +++++++++++------ .../SelectAndViewAnalyzerWrapper.java | 128 --- .../select/analyzers/SelectColumnLayer.java | 250 ++-- .../analyzers/SelectOrViewColumnLayer.java | 17 +- .../select/analyzers/StaticFlattenLayer.java | 146 --- .../select/analyzers/ViewColumnLayer.java | 25 +- .../impl/select/codegen/FormulaAnalyzer.java | 64 +- .../impl/lang/TestQueryLanguageParser.java | 2 +- .../VarListChunkInputStreamGenerator.java | 12 +- .../barrage/chunk/VarListChunkReader.java | 2 + .../BoxedBooleanArrayExpansionKernel.java | 2 +- 27 files changed, 1146 insertions(+), 1219 deletions(-) delete mode 100644 engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/BaseLayer.java delete mode 100644 engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/SelectAndViewAnalyzerWrapper.java delete mode 100644 engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/StaticFlattenLayer.java diff --git a/Util/src/main/java/io/deephaven/util/SimpleTypeMap.java b/Util/src/main/java/io/deephaven/util/SimpleTypeMap.java index 7b15c86f8b5..c1ab8b67abb 100644 --- a/Util/src/main/java/io/deephaven/util/SimpleTypeMap.java +++ b/Util/src/main/java/io/deephaven/util/SimpleTypeMap.java @@ -7,17 +7,56 @@ public final class SimpleTypeMap { - public static SimpleTypeMap create(V forBoolean, V forChar, V forByte, V forShort, V forInt, V forLong, - V forFloat, V forDouble, V forObject) { + /** + * Create a mapping from type {@link Class classes} to a value. + * + * @param forBoolean The mapping for {@code boolean} types (note {@link Boolean} maps to {@code forObject}) + * @param forChar The mapping for {@code char} and {@link Character} types + * @param forByte The mapping for {@code byte} and {@link Byte} types + * @param forShort The mapping for {@code short} and {@link Short} types + * @param forInt The mapping for {@code int} and {@link Integer} types + * @param forLong The mapping for {@code long} and {@link Long} types + * @param forFloat The mapping for {@code float} and {@link Float} types + * @param forDouble The mapping for {@code double} and {@link Double} types + * @param forObject The mapping for all other types + * @return A SimpleTypeMap to the provided values + */ + public static SimpleTypeMap create( + V forBoolean, + V forChar, + V forByte, + V forShort, + V forInt, + V forLong, + V forFloat, + V forDouble, + V forObject) { final HashMap, V> map = new HashMap<>(); + map.put(boolean.class, forBoolean); + // Note: Booleans are treated as Objects, unlike other boxed primitives + map.put(char.class, forChar); + map.put(Character.class, forChar); + map.put(byte.class, forByte); + map.put(Byte.class, forByte); + map.put(short.class, forShort); + map.put(Short.class, forShort); + map.put(int.class, forInt); + map.put(Integer.class, forInt); + map.put(long.class, forLong); + map.put(Long.class, forLong); + map.put(float.class, forFloat); + map.put(Float.class, forFloat); + map.put(double.class, forDouble); + map.put(Double.class, forDouble); + return new SimpleTypeMap<>(map, forObject); } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/QueryCompilerRequestProcessor.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/QueryCompilerRequestProcessor.java index ae321271be4..49e6e2e6b14 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/QueryCompilerRequestProcessor.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/QueryCompilerRequestProcessor.java @@ -4,12 +4,11 @@ package io.deephaven.engine.table.impl; import io.deephaven.UncheckedDeephavenException; -import io.deephaven.api.util.NameValidator; import io.deephaven.engine.context.ExecutionContext; import io.deephaven.engine.context.QueryCompiler; import io.deephaven.engine.context.QueryCompilerRequest; -import io.deephaven.engine.context.QueryScope; import io.deephaven.engine.table.impl.perf.QueryPerformanceRecorder; +import io.deephaven.engine.table.impl.select.codegen.FormulaAnalyzer; import io.deephaven.util.MultiException; import io.deephaven.util.SafeCloseable; import io.deephaven.util.CompletionStageFuture; @@ -18,43 +17,43 @@ import org.jetbrains.annotations.VisibleForTesting; import java.util.ArrayList; -import java.util.Collections; import java.util.List; -import java.util.Map; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; -public interface QueryCompilerRequestProcessor { +public abstract class QueryCompilerRequestProcessor { /** * @return An immediate QueryCompilerRequestProcessor */ - static QueryCompilerRequestProcessor.ImmediateProcessor immediate() { + public static QueryCompilerRequestProcessor.ImmediateProcessor immediate() { return new ImmediateProcessor(); } /** * @return A batch QueryCompilerRequestProcessor */ - static QueryCompilerRequestProcessor.BatchProcessor batch() { + public static QueryCompilerRequestProcessor.BatchProcessor batch() { return new BatchProcessor(); } /** - * @return a CachingSupplier that supplies a snapshot of the current query scope variables + * @return a CachingSupplier that supplies a snapshot of current query scope variables and query library imports */ @VisibleForTesting - static CachingSupplier> newQueryScopeVariableSupplier() { - final QueryScope queryScope = ExecutionContext.getContext().getQueryScope(); - return new CachingSupplier<>(() -> Collections.unmodifiableMap( - queryScope.toMap((name, value) -> NameValidator.isValidQueryParameterName(name)))); + public static CachingSupplier newFormulaImportsSupplier() { + return new CachingSupplier<>(FormulaAnalyzer.Imports::new); } + private final CachingSupplier formulaImportsSupplier = newFormulaImportsSupplier(); + /** - * @return a lazily cached snapshot of the current query scope variables + * @return a lazily cached snapshot of current query scope variables and query library imports */ - Map getQueryScopeVariables(); + public final FormulaAnalyzer.Imports getFormulaImports() { + return formulaImportsSupplier.get(); + } /** * Submit a request for compilation. The QueryCompilerRequestProcessor is not required to immediately compile this @@ -62,24 +61,16 @@ static CachingSupplier> newQueryScopeVariableSupplier() { * * @param request the request to compile */ - CompletionStageFuture> submit(@NotNull QueryCompilerRequest request); + public abstract CompletionStageFuture> submit(@NotNull QueryCompilerRequest request); /** * A QueryCompilerRequestProcessor that immediately compiles requests. */ - class ImmediateProcessor implements QueryCompilerRequestProcessor { - - private final CachingSupplier> queryScopeVariableSupplier = newQueryScopeVariableSupplier(); - + public static class ImmediateProcessor extends QueryCompilerRequestProcessor { private ImmediateProcessor() { // force use of static factory method } - @Override - public Map getQueryScopeVariables() { - return queryScopeVariableSupplier.get(); - } - @Override public CompletionStageFuture> submit(@NotNull final QueryCompilerRequest request) { final String desc = "Compile: " + request.description(); @@ -108,20 +99,14 @@ public CompletionStageFuture> submit(@NotNull final QueryCompilerReques *

* The compile method must be called to actually compile the requests. */ - class BatchProcessor implements QueryCompilerRequestProcessor { + public static class BatchProcessor extends QueryCompilerRequestProcessor { private final List requests = new ArrayList<>(); private final List>> resolvers = new ArrayList<>(); - private final CachingSupplier> queryScopeVariableSupplier = newQueryScopeVariableSupplier(); private BatchProcessor() { // force use of static factory method } - @Override - public Map getQueryScopeVariables() { - return queryScopeVariableSupplier.get(); - } - @Override public CompletionStageFuture> submit(@NotNull final QueryCompilerRequest request) { final CompletionStageFuture.Resolver> resolver = CompletionStageFuture.make(); diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/QueryTable.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/QueryTable.java index be12408a63b..1c227ea3ae7 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/QueryTable.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/QueryTable.java @@ -43,7 +43,6 @@ import io.deephaven.engine.table.impl.remote.ConstructSnapshot; import io.deephaven.engine.table.impl.select.*; import io.deephaven.engine.table.impl.select.analyzers.SelectAndViewAnalyzer; -import io.deephaven.engine.table.impl.select.analyzers.SelectAndViewAnalyzerWrapper; import io.deephaven.engine.table.impl.snapshot.SnapshotIncrementalListener; import io.deephaven.engine.table.impl.snapshot.SnapshotInternalListener; import io.deephaven.engine.table.impl.snapshot.SnapshotUtils; @@ -1498,10 +1497,9 @@ public Table update(final Collection newColumns) { */ public SelectValidationResult validateSelect(final SelectColumn... selectColumns) { final SelectColumn[] clones = SelectColumn.copyFrom(selectColumns); - SelectAndViewAnalyzerWrapper analyzerWrapper = SelectAndViewAnalyzer.create( - this, SelectAndViewAnalyzer.Mode.SELECT_STATIC, columns, rowSet, getModifiedColumnSetForUpdates(), true, - false, clones); - return new SelectValidationResult(analyzerWrapper.getAnalyzer(), clones); + SelectAndViewAnalyzer.AnalyzerContext analyzerContext = SelectAndViewAnalyzer.createContext( + this, SelectAndViewAnalyzer.Mode.SELECT_STATIC, true, false, clones); + return new SelectValidationResult(analyzerContext.createAnalyzer(), clones); } private Table selectOrUpdate(Flavor flavor, final SelectColumn... selectColumns) { @@ -1526,18 +1524,16 @@ private Table selectOrUpdate(Flavor flavor, final SelectColumn... selectColumns) } } final boolean publishTheseSources = flavor == Flavor.Update; - final SelectAndViewAnalyzerWrapper analyzerWrapper = SelectAndViewAnalyzer.create( - this, mode, columns, rowSet, getModifiedColumnSetForUpdates(), publishTheseSources, true, - selectColumns); + final SelectAndViewAnalyzer.AnalyzerContext analyzerContext = SelectAndViewAnalyzer.createContext( + this, mode, publishTheseSources, true, selectColumns); - final SelectAndViewAnalyzer analyzer = analyzerWrapper.getAnalyzer(); - final SelectColumn[] processedColumns = analyzerWrapper.getProcessedColumns() + final SelectAndViewAnalyzer analyzer = analyzerContext.createAnalyzer(); + final SelectColumn[] processedColumns = analyzerContext.getProcessedColumns() .toArray(SelectColumn[]::new); // Init all the rows by cooking up a fake Update final TableUpdate fakeUpdate = new TableUpdateImpl( - analyzer.alreadyFlattenedSources() ? RowSetFactory.flat(rowSet.size()) : rowSet.copy(), - RowSetFactory.empty(), RowSetFactory.empty(), + rowSet.copy(), RowSetFactory.empty(), RowSetFactory.empty(), RowSetShiftData.EMPTY, ModifiedColumnSet.ALL); final CompletableFuture waitForResult = new CompletableFuture<>(); @@ -1558,8 +1554,10 @@ this, mode, columns, rowSet, getModifiedColumnSetForUpdates(), publishTheseSourc new SelectAndViewAnalyzer.UpdateHelper(emptyRowSet, fakeUpdate)) { try { - analyzer.applyUpdate(fakeUpdate, emptyRowSet, updateHelper, jobScheduler, - liveResultCapture, analyzer.futureCompletionHandler(waitForResult)); + analyzer.applyUpdate( + fakeUpdate, emptyRowSet, updateHelper, jobScheduler, liveResultCapture, + () -> waitForResult.complete(null), + waitForResult::completeExceptionally); } catch (Exception e) { waitForResult.completeExceptionally(e); } @@ -1580,14 +1578,15 @@ this, mode, columns, rowSet, getModifiedColumnSetForUpdates(), publishTheseSourc } } - final TrackingRowSet resultRowSet = - analyzer.flattenedResult() ? RowSetFactory.flat(rowSet.size()).toTracking() : rowSet; - resultTable = new QueryTable(resultRowSet, analyzerWrapper.getPublishedColumnResources()); + final TrackingRowSet resultRowSet = analyzer.flatResult() && !rowSet.isFlat() + ? RowSetFactory.flat(rowSet.size()).toTracking() + : rowSet; + resultTable = new QueryTable(resultRowSet, analyzerContext.getPublishedColumnSources()); if (liveResultCapture != null) { analyzer.startTrackingPrev(); - final Map effects = analyzerWrapper.calcEffects(); - final SelectOrUpdateListener soul = new SelectOrUpdateListener(updateDescription, this, - resultTable, effects, analyzer); + final Map effects = analyzerContext.calcEffects(); + final SelectOrUpdateListener soul = new SelectOrUpdateListener( + updateDescription, this, resultTable, effects, analyzer); liveResultCapture.transferTo(soul); addUpdateListener(soul); ConstituentDependency.install(resultTable, soul); @@ -1596,11 +1595,6 @@ this, mode, columns, rowSet, getModifiedColumnSetForUpdates(), publishTheseSourc resultTable.setFlat(); } propagateDataIndexes(processedColumns, resultTable); - for (final ColumnSource columnSource : analyzer.getNewColumnSources().values()) { - if (columnSource instanceof PossiblyImmutableColumnSource) { - ((PossiblyImmutableColumnSource) columnSource).setImmutable(); - } - } } } propagateFlatness(resultTable); @@ -1610,10 +1604,10 @@ this, mode, columns, rowSet, getModifiedColumnSetForUpdates(), publishTheseSourc } else { maybeCopyColumnDescriptions(resultTable); } - SelectAndViewAnalyzerWrapper.UpdateFlavor updateFlavor = flavor == Flavor.Update - ? SelectAndViewAnalyzerWrapper.UpdateFlavor.Update - : SelectAndViewAnalyzerWrapper.UpdateFlavor.Select; - return analyzerWrapper.applyShiftsAndRemainingColumns(this, resultTable, updateFlavor); + SelectAndViewAnalyzer.UpdateFlavor updateFlavor = flavor == Flavor.Update + ? SelectAndViewAnalyzer.UpdateFlavor.Update + : SelectAndViewAnalyzer.UpdateFlavor.Select; + return analyzerContext.applyShiftsAndRemainingColumns(this, resultTable, updateFlavor); })); } @@ -1761,15 +1755,16 @@ updateDescription, sizeForInstrumentation(), () -> { createSnapshotControlIfRefreshing(OperationSnapshotControl::new); initializeWithSnapshot(humanReadablePrefix, sc, (usePrev, beforeClockValue) -> { final boolean publishTheseSources = flavor == Flavor.UpdateView; - final SelectAndViewAnalyzerWrapper analyzerWrapper = SelectAndViewAnalyzer.create( - this, SelectAndViewAnalyzer.Mode.VIEW_EAGER, columns, rowSet, - getModifiedColumnSetForUpdates(), publishTheseSources, true, viewColumns); - final SelectColumn[] processedViewColumns = analyzerWrapper.getProcessedColumns() + final SelectAndViewAnalyzer.AnalyzerContext analyzerContext = + SelectAndViewAnalyzer.createContext( + this, SelectAndViewAnalyzer.Mode.VIEW_EAGER, + publishTheseSources, true, viewColumns); + final SelectColumn[] processedViewColumns = analyzerContext.getProcessedColumns() .toArray(SelectColumn[]::new); QueryTable queryTable = new QueryTable( - rowSet, analyzerWrapper.getPublishedColumnResources()); + rowSet, analyzerContext.getPublishedColumnSources()); if (sc != null) { - final Map effects = analyzerWrapper.calcEffects(); + final Map effects = analyzerContext.calcEffects(); final TableUpdateListener listener = new ViewOrUpdateViewListener(updateDescription, this, queryTable, effects); sc.setListenerAndResult(listener, queryTable); @@ -1786,11 +1781,11 @@ updateDescription, sizeForInstrumentation(), () -> { } else { maybeCopyColumnDescriptions(queryTable); } - final SelectAndViewAnalyzerWrapper.UpdateFlavor updateFlavor = + final SelectAndViewAnalyzer.UpdateFlavor updateFlavor = flavor == Flavor.UpdateView - ? SelectAndViewAnalyzerWrapper.UpdateFlavor.UpdateView - : SelectAndViewAnalyzerWrapper.UpdateFlavor.View; - queryTable = analyzerWrapper.applyShiftsAndRemainingColumns( + ? SelectAndViewAnalyzer.UpdateFlavor.UpdateView + : SelectAndViewAnalyzer.UpdateFlavor.View; + queryTable = analyzerContext.applyShiftsAndRemainingColumns( this, queryTable, updateFlavor); result.setValue(queryTable); @@ -1851,14 +1846,13 @@ public Table lazyUpdate(final Collection newColumns) { sizeForInstrumentation(), () -> { checkInitiateOperation(); - final SelectAndViewAnalyzerWrapper analyzerWrapper = SelectAndViewAnalyzer.create( - this, SelectAndViewAnalyzer.Mode.VIEW_LAZY, columns, rowSet, - getModifiedColumnSetForUpdates(), - true, true, selectColumns); - final SelectColumn[] processedColumns = analyzerWrapper.getProcessedColumns() + final SelectAndViewAnalyzer.AnalyzerContext analyzerContext = + SelectAndViewAnalyzer.createContext( + this, SelectAndViewAnalyzer.Mode.VIEW_LAZY, true, true, selectColumns); + final SelectColumn[] processedColumns = analyzerContext.getProcessedColumns() .toArray(SelectColumn[]::new); final QueryTable result = new QueryTable( - rowSet, analyzerWrapper.getPublishedColumnResources()); + rowSet, analyzerContext.getPublishedColumnSources()); if (isRefreshing()) { addUpdateListener(new ListenerImpl( "lazyUpdate(" + Arrays.deepToString(processedColumns) + ')', this, result)); @@ -1868,8 +1862,8 @@ public Table lazyUpdate(final Collection newColumns) { copySortableColumns(result, processedColumns); maybeCopyColumnDescriptions(result, processedColumns); - return analyzerWrapper.applyShiftsAndRemainingColumns( - this, result, SelectAndViewAnalyzerWrapper.UpdateFlavor.LazyUpdate); + return analyzerContext.applyShiftsAndRemainingColumns( + this, result, SelectAndViewAnalyzer.UpdateFlavor.LazyUpdate); }); } } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/SelectOrUpdateListener.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/SelectOrUpdateListener.java index 285cdeabb94..d7236ce6010 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/SelectOrUpdateListener.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/SelectOrUpdateListener.java @@ -15,8 +15,8 @@ import io.deephaven.engine.table.impl.util.JobScheduler; import io.deephaven.engine.table.impl.util.UpdateGraphJobScheduler; -import java.util.BitSet; import java.util.Map; +import java.util.concurrent.atomic.AtomicBoolean; /** * A Shift-Aware listener for Select or Update. It uses the SelectAndViewAnalyzer to calculate how columns affect other @@ -29,8 +29,6 @@ class SelectOrUpdateListener extends BaseTable.ListenerImpl { private final SelectAndViewAnalyzer analyzer; private volatile boolean updateInProgress = false; - private final BitSet completedColumns = new BitSet(); - private final BitSet allNewColumns = new BitSet(); private final boolean enableParallelUpdate; /** @@ -61,7 +59,6 @@ class SelectOrUpdateListener extends BaseTable.ListenerImpl { (QueryTable.ENABLE_PARALLEL_SELECT_AND_UPDATE && getUpdateGraph().parallelismFactor() > 1)) && analyzer.allowCrossColumnParallelization(); - analyzer.setAllNewColumns(allNewColumns); } @Override @@ -76,7 +73,6 @@ public void onUpdate(final TableUpdate upstream) { // - create parallel arrays of pre-shift-keys and post-shift-keys so we can move them in chunks updateInProgress = true; - completedColumns.clear(); final TableUpdate acquiredUpdate = upstream.acquire(); final WritableRowSet toClear = resultRowSet.copyPrev(); @@ -91,15 +87,16 @@ public void onUpdate(final TableUpdate upstream) { jobScheduler = new ImmediateJobScheduler(); } + // do not allow a double-notify + final AtomicBoolean hasNotified = new AtomicBoolean(); analyzer.applyUpdate(acquiredUpdate, toClear, updateHelper, jobScheduler, this, - new SelectAndViewAnalyzer.SelectLayerCompletionHandler(allNewColumns, completedColumns) { - @Override - public void onAllRequiredColumnsCompleted() { + () -> { + if (!hasNotified.getAndSet(true)) { completionRoutine(acquiredUpdate, jobScheduler, toClear, updateHelper); } - - @Override - protected void onError(Exception error) { + }, + error -> { + if (!hasNotified.getAndSet(true)) { handleException(error); } }); diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/ShiftedColumnsFactory.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/ShiftedColumnsFactory.java index 4ab1c70248c..c84deddd93e 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/ShiftedColumnsFactory.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/ShiftedColumnsFactory.java @@ -117,7 +117,7 @@ import io.deephaven.engine.table.impl.select.FormulaColumn; import io.deephaven.engine.table.impl.select.WhereFilter; import io.deephaven.engine.table.impl.select.WhereFilterFactory; -import io.deephaven.engine.table.impl.select.analyzers.SelectAndViewAnalyzerWrapper; +import io.deephaven.engine.table.impl.select.analyzers.SelectAndViewAnalyzer; import io.deephaven.util.mutable.MutableInt; import org.jetbrains.annotations.NotNull; @@ -260,7 +260,7 @@ private static Pair getShiftedTableFilterPair( public static Table getShiftedColumnsTable( @NotNull final Table source, @NotNull FormulaColumn formulaColumn, - @NotNull SelectAndViewAnalyzerWrapper.UpdateFlavor updateFlavor) { + @NotNull SelectAndViewAnalyzer.UpdateFlavor updateFlavor) { String nuggetName = "getShiftedColumnsTable( " + formulaColumn + ", " + updateFlavor + ") "; return QueryPerformanceRecorder.withNugget(nuggetName, source.sizeForInstrumentation(), () -> { Table tableSoFar = source; diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/lang/QueryLanguageParser.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/lang/QueryLanguageParser.java index 5cd2418c83b..d457a4ab0b3 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/lang/QueryLanguageParser.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/lang/QueryLanguageParser.java @@ -182,6 +182,8 @@ public final class QueryLanguageParser extends GenericVisitorAdapter, Q * Create a QueryLanguageParser and parse the given {@code expression}. After construction, the * {@link QueryLanguageParser.Result result} of parsing the {@code expression} is available with the * {@link #getResult()}} method. + *

+ * Note that the provided Collections and Maps must not be mutated concurrently with or after construction. * * @param expression The query language expression to parse * @param packageImports Wildcard package imports @@ -190,9 +192,10 @@ public final class QueryLanguageParser extends GenericVisitorAdapter, Q * imported. * @param variables A map of the names of scope variables to their types * @param variableTypeArguments A map of the names of scope variables to their type arguments - * @param unboxArguments If true it will unbox the query scope arguments - * @param queryScopeVariables A mutable map of the names of query scope variables to their values + * @param queryScopeVariables A map of the names of query scope variables to their values * @param columnVariables A set of column variable names + * @param unboxArguments If true it will unbox the query scope arguments + * @param timeConversionResult The result of converting time literals in the expression * @throws QueryLanguageParseException If any exception or error is encountered */ public QueryLanguageParser( @@ -225,6 +228,8 @@ public QueryLanguageParser( * Create a QueryLanguageParser and parse the given {@code expression}. After construction, the * {@link QueryLanguageParser.Result result} of parsing the {@code expression} is available with the * {@link #getResult()}} method. + *

+ * Note that the provided Collections and Maps must not be mutated concurrently with or after construction. * * @param expression The query language expression to parse * @param packageImports Wildcard package imports @@ -247,6 +252,28 @@ public QueryLanguageParser( variableTypeArguments, null, null, true, null); } + /** + * Create a QueryLanguageParser and parse the given {@code expression}. After construction, the + * {@link QueryLanguageParser.Result result} of parsing the {@code expression} is available with the + * {@link #getResult()}} method. + *

+ * Note that the provided Collections and Maps must not be mutated concurrently with or after construction. + * + * @param expression The query language expression to parse + * @param packageImports Wildcard package imports + * @param classImports Individual class imports + * @param staticImports Wildcard static imports. All static variables and methods for the given classes are + * imported. + * @param variables A map of the names of scope variables to their types + * @param variableTypeArguments A map of the names of scope variables to their type arguments + * @param queryScopeVariables A map of the names of query scope variables to their values + * @param columnVariables A set of column variable names + * @param unboxArguments If true it will unbox the query scope arguments + * @param verifyIdempotence If true, the parser will verify that the result expression will not mutate when parsed + * @param pyCallableWrapperImplName The name of the PyCallableWrapper implementation to use + * @param timeConversionResult The result of converting time literals in the expression + * @throws QueryLanguageParseException If any exception or error is encountered + */ @VisibleForTesting QueryLanguageParser( String expression, @@ -264,9 +291,8 @@ public QueryLanguageParser( this.packageImports = packageImports == null ? Collections.emptySet() : Set.copyOf(packageImports); this.classImports = classImports == null ? Collections.emptySet() : Set.copyOf(classImports); this.staticImports = staticImports == null ? Collections.emptySet() : Set.copyOf(staticImports); - this.variables = variables == null ? Collections.emptyMap() : Map.copyOf(variables); - this.variableTypeArguments = - variableTypeArguments == null ? Collections.emptyMap() : Map.copyOf(variableTypeArguments); + this.variables = variables == null ? Collections.emptyMap() : variables; + this.variableTypeArguments = variableTypeArguments == null ? Collections.emptyMap() : variableTypeArguments; this.queryScopeVariables = queryScopeVariables == null ? new HashMap<>() : queryScopeVariables; this.columnVariables = columnVariables == null ? Collections.emptySet() : columnVariables; this.unboxArguments = unboxArguments; diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/AbstractConditionFilter.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/AbstractConditionFilter.java index e96bb529f1e..2007eee6526 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/AbstractConditionFilter.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/AbstractConditionFilter.java @@ -89,7 +89,7 @@ public synchronized void init( try { final QueryLanguageParser.Result result = FormulaAnalyzer.parseFormula( formula, tableDefinition.getColumnNameMap(), outerToInnerNames, - compilationProcessor.getQueryScopeVariables(), unboxArguments); + compilationProcessor.getFormulaImports(), unboxArguments); formulaShiftColPair = result.getFormulaShiftColPair(); if (formulaShiftColPair != null) { diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/AbstractFormulaColumn.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/AbstractFormulaColumn.java index 06cbd79fd70..ace064b9353 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/AbstractFormulaColumn.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/AbstractFormulaColumn.java @@ -51,7 +51,7 @@ public abstract class AbstractFormulaColumn implements FormulaColumn { private Formula formula; protected QueryScopeParam[] params; protected Map> columnSources; - protected Map> columnDefinitions; + protected Map> columnDefinitions; private TrackingRowSet rowSet; protected Class returnedType; public static final String COLUMN_SUFFIX = "_"; @@ -90,12 +90,28 @@ public List initInputs( @NotNull final Map> columnsOfInterest) { this.rowSet = rowSet; - this.columnSources = columnsOfInterest; - if (usedColumns != null) { - return usedColumns; + if (usedColumns == null) { + initDef(extractDefinitions(columnsOfInterest), QueryCompilerRequestProcessor.immediate()); } + this.columnSources = filterColumnSources(columnsOfInterest); - return initDef(extractDefinitions(columnsOfInterest), QueryCompilerRequestProcessor.immediate()); + return usedColumns; + } + + private Map> filterColumnSources( + final Map> columnsOfInterest) { + if (usedColumns.isEmpty() && usedColumnArrays.isEmpty()) { + return Map.of(); + } + + final HashMap> sources = new HashMap<>(); + for (String columnName : usedColumns) { + sources.put(columnName, columnsOfInterest.get(columnName)); + } + for (String columnName : usedColumnArrays) { + sources.put(columnName, columnsOfInterest.get(columnName)); + } + return sources; } @Override @@ -119,28 +135,32 @@ public void validateSafeForRefresh(BaseTable sourceTable) { } protected void applyUsedVariables( - @NotNull final Map> columnDefinitionMap, + @NotNull final Map> parentColumnDefinitions, @NotNull final Set variablesUsed, @NotNull final Map possibleParams) { // the column definition map passed in is being mutated by the caller, so we need to make a copy - columnDefinitions = Map.copyOf(columnDefinitionMap); + columnDefinitions = new HashMap<>(); final List> paramsList = new ArrayList<>(); usedColumns = new ArrayList<>(); usedColumnArrays = new ArrayList<>(); for (String variable : variablesUsed) { + ColumnDefinition columnDefinition = parentColumnDefinitions.get(variable); if (variable.equals("i")) { usesI = true; } else if (variable.equals("ii")) { usesII = true; } else if (variable.equals("k")) { usesK = true; - } else if (columnDefinitions.get(variable) != null) { + } else if (columnDefinition != null) { + columnDefinitions.put(variable, columnDefinition); usedColumns.add(variable); } else { String strippedColumnName = variable.substring(0, Math.max(0, variable.length() - COLUMN_SUFFIX.length())); - if (variable.endsWith(COLUMN_SUFFIX) && columnDefinitions.get(strippedColumnName) != null) { + columnDefinition = parentColumnDefinitions.get(strippedColumnName); + if (variable.endsWith(COLUMN_SUFFIX) && columnDefinition != null) { + columnDefinitions.put(strippedColumnName, columnDefinition); usedColumnArrays.add(strippedColumnName); } else if (possibleParams.containsKey(variable)) { paramsList.add(new QueryScopeParam<>(variable, possibleParams.get(variable))); diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/DhFormulaColumn.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/DhFormulaColumn.java index 56a8aecfddc..7ed70800dfa 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/DhFormulaColumn.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/DhFormulaColumn.java @@ -34,8 +34,7 @@ import io.deephaven.io.logger.Logger; import io.deephaven.util.CompletionStageFuture; import io.deephaven.util.type.TypeUtils; -import io.deephaven.vector.ObjectVector; -import io.deephaven.vector.Vector; +import io.deephaven.vector.VectorFactory; import org.jetbrains.annotations.NotNull; import org.jpy.PyObject; @@ -161,21 +160,7 @@ private static Map> makeNameToTypeDict(final String[] names, } public static Class getVectorType(Class declaredType) { - if (!io.deephaven.util.type.TypeUtils.isConvertibleToPrimitive(declaredType) || declaredType == boolean.class - || declaredType == Boolean.class) { - return ObjectVector.class; - } else { - final String declaredTypeSimpleName = - io.deephaven.util.type.TypeUtils.getUnboxedType(declaredType).getSimpleName(); - try { - return Class.forName(Vector.class.getPackage().getName() + '.' - + Character.toUpperCase(declaredTypeSimpleName.charAt(0)) - + declaredTypeSimpleName.substring(1) - + "Vector"); - } catch (ClassNotFoundException e) { - throw new RuntimeException("Unexpected exception for type " + declaredType, e); - } - } + return VectorFactory.forElementType(declaredType).vectorType(); } @Override @@ -195,7 +180,7 @@ public List initDef( try { final QueryLanguageParser.Result result = FormulaAnalyzer.parseFormula( formulaString, columnDefinitionMap, Collections.emptyMap(), - compilationRequestProcessor.getQueryScopeVariables()); + compilationRequestProcessor.getFormulaImports()); analyzedFormula = FormulaAnalyzer.analyze(formulaString, columnDefinitionMap, result); hasConstantValue = result.isConstantValueExpression(); formulaShiftColPair = result.getFormulaShiftColPair(); diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/MatchFilter.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/MatchFilter.java index d4026f087aa..4a7921e39b1 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/MatchFilter.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/MatchFilter.java @@ -223,7 +223,8 @@ public synchronized void init( return; } final List valueList = new ArrayList<>(); - final Map queryScopeVariables = compilationProcessor.getQueryScopeVariables(); + final Map queryScopeVariables = + compilationProcessor.getFormulaImports().getQueryScopeVariables(); final ColumnTypeConvertor convertor = ColumnTypeConvertorFactory.getConvertor(column.getDataType()); for (String strValue : strValues) { convertor.convertValue(column, tableDefinition, strValue, queryScopeVariables, valueList::add); diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/RangeFilter.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/RangeFilter.java index 65d65b75e03..5236862c6d5 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/RangeFilter.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/RangeFilter.java @@ -183,7 +183,7 @@ public void init( try { boolean wasAnArrayType = convertor.convertValue( - def, tableDefinition, value, compilationProcessor.getQueryScopeVariables(), + def, tableDefinition, value, compilationProcessor.getFormulaImports().getQueryScopeVariables(), realValue::setValue); if (wasAnArrayType) { conversionError = diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/BaseLayer.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/BaseLayer.java deleted file mode 100644 index fb79fc32e01..00000000000 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/BaseLayer.java +++ /dev/null @@ -1,97 +0,0 @@ -// -// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending -// -package io.deephaven.engine.table.impl.select.analyzers; - -import io.deephaven.base.log.LogOutput; -import io.deephaven.engine.liveness.LivenessNode; -import io.deephaven.engine.table.TableUpdate; -import io.deephaven.engine.table.ModifiedColumnSet; -import io.deephaven.engine.table.ColumnSource; -import io.deephaven.engine.rowset.RowSet; -import io.deephaven.engine.table.impl.util.JobScheduler; -import org.jetbrains.annotations.Nullable; - -import java.util.*; - -public class BaseLayer extends SelectAndViewAnalyzer { - private final Map> sources; - private final boolean publishTheseSources; - - BaseLayer(Map> sources, boolean publishTheseSources) { - super(BASE_LAYER_INDEX); - this.sources = sources; - this.publishTheseSources = publishTheseSources; - } - - @Override - int getLayerIndexFor(String column) { - if (sources.containsKey(column)) { - return BASE_LAYER_INDEX; - } - throw new IllegalArgumentException("Unknown column: " + column); - } - - @Override - void setBaseBits(BitSet bitset) { - bitset.set(BASE_LAYER_INDEX); - } - - @Override - public void setAllNewColumns(BitSet bitset) { - bitset.set(BASE_LAYER_INDEX); - } - - @Override - void populateModifiedColumnSetRecurse(ModifiedColumnSet mcsBuilder, Set remainingDepsToSatisfy) { - mcsBuilder.setAll(remainingDepsToSatisfy.toArray(String[]::new)); - } - - @Override - final Map> getColumnSourcesRecurse(GetMode mode) { - // We specifically return a LinkedHashMap so the columns get populated in order - final Map> result = new LinkedHashMap<>(); - if (mode == GetMode.All || (mode == GetMode.Published && publishTheseSources)) { - result.putAll(sources); - } - return result; - } - - @Override - public void applyUpdate(TableUpdate upstream, RowSet toClear, UpdateHelper helper, JobScheduler jobScheduler, - @Nullable LivenessNode liveResultOwner, SelectLayerCompletionHandler onCompletion) { - // nothing to do at the base layer - onCompletion.onLayerCompleted(BASE_LAYER_INDEX); - } - - @Override - final Map> calcDependsOnRecurse(boolean forcePublishAllSources) { - final Map> result = new HashMap<>(); - if (publishTheseSources || forcePublishAllSources) { - for (final String col : sources.keySet()) { - result.computeIfAbsent(col, dummy -> new HashSet<>()).add(col); - } - } - return result; - } - - @Override - public SelectAndViewAnalyzer getInner() { - return null; - } - - @Override - public void startTrackingPrev() { - // nothing to do - } - - @Override - public LogOutput append(LogOutput logOutput) { - return logOutput.append("{BaseLayer").append(", layerIndex=").append(getLayerIndex()).append("}"); - } - - @Override - public boolean allowCrossColumnParallelization() { - return true; - } -} diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/ConstantColumnLayer.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/ConstantColumnLayer.java index c5eff7f3132..4738d944825 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/ConstantColumnLayer.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/ConstantColumnLayer.java @@ -5,41 +5,23 @@ import io.deephaven.base.log.LogOutput; import io.deephaven.chunk.attributes.Values; -import io.deephaven.engine.liveness.LivenessNode; import io.deephaven.engine.rowset.RowSequence; -import io.deephaven.engine.rowset.RowSet; import io.deephaven.engine.rowset.RowSetFactory; import io.deephaven.engine.table.ChunkSource; import io.deephaven.engine.table.ModifiedColumnSet; -import io.deephaven.engine.table.TableUpdate; import io.deephaven.engine.table.WritableColumnSource; import io.deephaven.engine.table.impl.select.SelectColumn; import io.deephaven.engine.table.impl.select.VectorChunkAdapter; -import io.deephaven.engine.table.impl.util.JobScheduler; -import org.jetbrains.annotations.Nullable; - -import java.util.Arrays; -import java.util.BitSet; public class ConstantColumnLayer extends SelectOrViewColumnLayer { - private final BitSet dependencyBitSet; - private final boolean flattenedResult; - private final boolean alreadyFlattenedSources; ConstantColumnLayer( - SelectAndViewAnalyzer inner, - String name, - SelectColumn sc, - WritableColumnSource ws, - String[] deps, - ModifiedColumnSet mcsBuilder, - boolean flattenedResult, - boolean alreadyFlattenedSources) { - super(inner, name, sc, ws, null, deps, mcsBuilder); - this.dependencyBitSet = new BitSet(); - this.flattenedResult = flattenedResult; - this.alreadyFlattenedSources = alreadyFlattenedSources; - Arrays.stream(deps).mapToInt(inner::getLayerIndexFor).forEach(dependencyBitSet::set); + final SelectAndViewAnalyzer.AnalyzerContext context, + final SelectColumn sc, + final WritableColumnSource ws, + final String[] deps, + final ModifiedColumnSet mcsBuilder) { + super(context, sc, ws, null, deps, mcsBuilder); initialize(ws); } @@ -60,38 +42,17 @@ private void initialize(final WritableColumnSource writableSource) { } @Override - public void applyUpdate(final TableUpdate upstream, final RowSet toClear, final UpdateHelper helper, - final JobScheduler jobScheduler, @Nullable final LivenessNode liveResultOwner, - final SelectLayerCompletionHandler onCompletion) { - // Nothing to do at this level, but need to recurse because my inner layers might need to be called (e.g. - // because they are SelectColumnLayers) - inner.applyUpdate(upstream, toClear, helper, jobScheduler, liveResultOwner, - new SelectLayerCompletionHandler(dependencyBitSet, onCompletion) { - @Override - public void onAllRequiredColumnsCompleted() { - // we don't need to do anything specific here; our result value is constant - onCompletion.onLayerCompleted(getLayerIndex()); - } - }); - } - - @Override - public LogOutput append(LogOutput logOutput) { - return logOutput.append("{ConstantColumnLayer: ").append(selectColumn.toString()).append("}"); + public boolean hasRefreshingLogic() { + return false; } @Override - public boolean flattenedResult() { - return flattenedResult; + boolean allowCrossColumnParallelization() { + return true; } @Override - public boolean alreadyFlattenedSources() { - return alreadyFlattenedSources; - } - - @Override - public boolean allowCrossColumnParallelization() { - return inner.allowCrossColumnParallelization(); + public LogOutput append(LogOutput logOutput) { + return logOutput.append("{ConstantColumnLayer: ").append(selectColumn.toString()).append("}"); } } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/DependencyLayerBase.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/DependencyLayerBase.java index 67fa89b424a..7d00107d17e 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/DependencyLayerBase.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/DependencyLayerBase.java @@ -3,7 +3,6 @@ // package io.deephaven.engine.table.impl.select.analyzers; -import io.deephaven.engine.table.ColumnDefinition; import io.deephaven.vector.Vector; import io.deephaven.engine.table.ModifiedColumnSet; import io.deephaven.engine.table.impl.select.SelectColumn; @@ -11,75 +10,38 @@ import java.util.*; -public abstract class DependencyLayerBase extends SelectAndViewAnalyzer { - final SelectAndViewAnalyzer inner; +public abstract class DependencyLayerBase extends SelectAndViewAnalyzer.Layer { final String name; final SelectColumn selectColumn; final boolean selectColumnHoldsVector; final ColumnSource columnSource; - // probably don't need this any more - private final String[] dependencies; final ModifiedColumnSet myModifiedColumnSet; - - DependencyLayerBase(SelectAndViewAnalyzer inner, String name, SelectColumn selectColumn, - ColumnSource columnSource, - String[] dependencies, ModifiedColumnSet mcsBuilder) { - super(inner.getLayerIndex() + 1); - this.inner = inner; - this.name = name; + final BitSet myLayerDependencySet; + + DependencyLayerBase( + final SelectAndViewAnalyzer.AnalyzerContext context, + final SelectColumn selectColumn, + final ColumnSource columnSource, + final String[] dependencies, + final ModifiedColumnSet mcsBuilder) { + super(context.getNextLayerIndex()); + this.name = selectColumn.getName(); this.selectColumn = selectColumn; selectColumnHoldsVector = Vector.class.isAssignableFrom(selectColumn.getReturnedType()); this.columnSource = columnSource; - this.dependencies = dependencies; - final Set remainingDepsToSatisfy = new HashSet<>(Arrays.asList(dependencies)); - inner.populateModifiedColumnSetRecurse(mcsBuilder, remainingDepsToSatisfy); + context.populateParentDependenciesMCS(mcsBuilder, dependencies); this.myModifiedColumnSet = mcsBuilder; + this.myLayerDependencySet = new BitSet(); + context.populateLayerDependencySet(myLayerDependencySet, dependencies); } @Override - void populateModifiedColumnSetRecurse(ModifiedColumnSet mcsBuilder, Set remainingDepsToSatisfy) { - // Later-defined columns override earlier-defined columns. So we satisfy column dependencies "on the way - // down" the recursion. - if (remainingDepsToSatisfy.remove(name)) { - // Caller had a dependency on us, so caller gets our dependencies - mcsBuilder.setAll(myModifiedColumnSet); - } - inner.populateModifiedColumnSetRecurse(mcsBuilder, remainingDepsToSatisfy); - } - - @Override - final Map> calcDependsOnRecurse(boolean forcePublishAllResources) { - final Map> result = inner.calcDependsOnRecurse(forcePublishAllResources); - final Set thisResult = new HashSet<>(); - for (final String dep : dependencies) { - final Set innerDependencies = result.get(dep); - if (innerDependencies == null) { - // There are no further expansions of 'dep', so add it as a dependency. - thisResult.add(dep); - } else { - // Instead of adding 'dep', add what 'dep' expands to. - thisResult.addAll(innerDependencies); - } - } - result.put(name, thisResult); - return result; - } - - @Override - public SelectAndViewAnalyzer getInner() { - return inner; - } - - @Override - int getLayerIndexFor(String column) { - if (name.equals(column)) { - return getLayerIndex(); - } - return inner.getLayerIndexFor(column); + Set getLayerColumnNames() { + return Set.of(name); } @Override - void setBaseBits(BitSet bitset) { - inner.setBaseBits(bitset); + public ModifiedColumnSet getModifiedColumnSet() { + return myModifiedColumnSet; } } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/PreserveColumnLayer.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/PreserveColumnLayer.java index 8d687cdc8c8..9b0f4b690f7 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/PreserveColumnLayer.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/PreserveColumnLayer.java @@ -4,69 +4,41 @@ package io.deephaven.engine.table.impl.select.analyzers; import io.deephaven.base.log.LogOutput; -import io.deephaven.engine.liveness.LivenessNode; -import io.deephaven.engine.table.TableUpdate; import io.deephaven.engine.table.ModifiedColumnSet; import io.deephaven.engine.table.impl.select.SelectColumn; import io.deephaven.engine.table.ColumnSource; -import io.deephaven.engine.rowset.RowSet; -import io.deephaven.engine.table.impl.util.JobScheduler; -import org.jetbrains.annotations.Nullable; -import java.util.Arrays; -import java.util.BitSet; import java.util.Map; /** * A layer that copies a column from our input to our output. - * + *

* {@implNote This class is part of the Deephaven engine, and not intended for direct use.} */ final public class PreserveColumnLayer extends DependencyLayerBase { - private final BitSet dependencyBitSet; - PreserveColumnLayer(SelectAndViewAnalyzer inner, String name, SelectColumn sc, ColumnSource cs, String[] deps, - ModifiedColumnSet mcsBuilder) { - super(inner, name, sc, cs, deps, mcsBuilder); - this.dependencyBitSet = new BitSet(); - Arrays.stream(deps).mapToInt(inner::getLayerIndexFor).forEach(dependencyBitSet::set); + PreserveColumnLayer( + final SelectAndViewAnalyzer.AnalyzerContext context, + final SelectColumn sc, + final ColumnSource cs, + final String[] deps, + final ModifiedColumnSet mcsBuilder) { + super(context, sc, cs, deps, mcsBuilder); } @Override - public void applyUpdate(TableUpdate upstream, RowSet toClear, UpdateHelper helper, JobScheduler jobScheduler, - @Nullable LivenessNode liveResultOwner, SelectLayerCompletionHandler onCompletion) { - // Nothing to do at this level, but need to recurse because my inner layers might need to be called (e.g. - // because they are SelectColumnLayers) - inner.applyUpdate(upstream, toClear, helper, jobScheduler, liveResultOwner, - new SelectLayerCompletionHandler(dependencyBitSet, onCompletion) { - @Override - public void onAllRequiredColumnsCompleted() { - // we don't need to do anything specific here - onCompletion.onLayerCompleted(getLayerIndex()); - } - }); + public boolean hasRefreshingLogic() { + return false; } @Override - Map> getColumnSourcesRecurse(GetMode mode) { - // our column is not a new column, so we need to make sure that we do not double enable previous tracking - final Map> result = inner.getColumnSourcesRecurse(mode); - switch (mode) { - case New: - // we have no new sources - break; - case Published: - case All: - result.put(name, columnSource); - break; - } - return result; + void populateColumnSources(final Map> result) { + result.put(name, columnSource); } @Override - public void startTrackingPrev() { - // nothing to do, here but the inner needs to be called - inner.startTrackingPrev(); + boolean allowCrossColumnParallelization() { + return true; } @Override @@ -74,24 +46,4 @@ public LogOutput append(LogOutput logOutput) { return logOutput.append("{PreserveColumnLayer: ").append(name).append(", layerIndex=").append(getLayerIndex()) .append("}"); } - - @Override - public boolean flattenedResult() { - // preserve layer is only flattened if the inner is flattened - // the "flattenedResult" means that we are flattening the table as part of select. For a pre-existing column, we - // could not preserve a layer while flattening, but if we are preserving a newly generated column; it is valid - // for the result to have been flattened as part of select. - return inner.flattenedResult(); - } - - @Override - public boolean alreadyFlattenedSources() { - // a preserve layer is only already flattened if the inner is already flattened - return inner.alreadyFlattenedSources(); - } - - @Override - public boolean allowCrossColumnParallelization() { - return inner.allowCrossColumnParallelization(); - } } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/RedirectionLayer.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/RedirectionLayer.java index 8a33acfa00d..46fe6ea4c31 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/RedirectionLayer.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/RedirectionLayer.java @@ -4,82 +4,83 @@ package io.deephaven.engine.table.impl.select.analyzers; import io.deephaven.base.log.LogOutput; -import io.deephaven.base.verify.Assert; import io.deephaven.engine.liveness.LivenessNode; import io.deephaven.engine.rowset.*; import io.deephaven.engine.rowset.RowSetFactory; -import io.deephaven.engine.table.TableUpdate; -import io.deephaven.engine.table.ModifiedColumnSet; import io.deephaven.engine.table.ColumnSource; +import io.deephaven.engine.table.ModifiedColumnSet; +import io.deephaven.engine.table.TableUpdate; import io.deephaven.engine.table.impl.util.*; import io.deephaven.util.mutable.MutableLong; import org.apache.commons.lang3.mutable.MutableObject; import org.jetbrains.annotations.Nullable; import java.util.*; +import java.util.function.Consumer; /** * A layer that maintains the row redirection for future SelectColumnLayers. - * + *

* {@implNote This class is part of the Deephaven engine, and not intended for direct use.} */ -public final class RedirectionLayer extends SelectAndViewAnalyzer { - private final SelectAndViewAnalyzer inner; +public final class RedirectionLayer extends SelectAndViewAnalyzer.Layer { private final TrackingRowSet resultRowSet; private final WritableRowRedirection rowRedirection; private final WritableRowSet freeValues = RowSetFactory.empty(); + private final BitSet layerDependencySet = new BitSet(); private long maxInnerIndex; - RedirectionLayer(SelectAndViewAnalyzer inner, TrackingRowSet resultRowSet, WritableRowRedirection rowRedirection) { - super(REDIRECTION_LAYER_INDEX); - Assert.eq(inner.getLayerIndex(), "inner.getLayerIndex()", BASE_LAYER_INDEX); - this.inner = inner; + RedirectionLayer( + final SelectAndViewAnalyzer.AnalyzerContext context, + final TrackingRowSet resultRowSet, + final WritableRowRedirection rowRedirection) { + super(context.getNextLayerIndex()); this.resultRowSet = resultRowSet; this.rowRedirection = rowRedirection; this.maxInnerIndex = -1; } @Override - int getLayerIndexFor(String column) { - // Result columns' applyUpdate depend on the result of the redirection. - Assert.eq(inner.getLayerIndexFor(column), "inner.getLayerIndexFor(column)", BASE_LAYER_INDEX); - return REDIRECTION_LAYER_INDEX; + Set getLayerColumnNames() { + return Set.of(); } @Override - void setBaseBits(BitSet bitset) { - inner.setBaseBits(bitset); - bitset.set(REDIRECTION_LAYER_INDEX); + void populateColumnSources(final Map> result) { + // we don't generate any column sources, so we don't need to do anything here } @Override - public void populateModifiedColumnSetRecurse(ModifiedColumnSet mcsBuilder, Set remainingDepsToSatisfy) { - inner.populateModifiedColumnSetRecurse(mcsBuilder, remainingDepsToSatisfy); + ModifiedColumnSet getModifiedColumnSet() { + return ModifiedColumnSet.EMPTY; } @Override - public Map> getColumnSourcesRecurse(GetMode mode) { - return inner.getColumnSourcesRecurse(mode); + BitSet getLayerDependencySet() { + return layerDependencySet; } @Override - public void applyUpdate(TableUpdate upstream, RowSet toClear, UpdateHelper helper, JobScheduler jobScheduler, - @Nullable LivenessNode liveResultOwner, SelectLayerCompletionHandler onCompletion) { - final BitSet baseLayerBitSet = new BitSet(); - inner.setBaseBits(baseLayerBitSet); - inner.applyUpdate(upstream, toClear, helper, jobScheduler, liveResultOwner, - new SelectLayerCompletionHandler(baseLayerBitSet, onCompletion) { - @Override - public void onAllRequiredColumnsCompleted() { - // we only have a base layer underneath us, so we do not care about the bitSet; it is always - // empty - doApplyUpdate(upstream, toClear, helper, onCompletion); - } - }); + boolean allowCrossColumnParallelization() { + return true; + } + + @Override + public Runnable createUpdateHandler( + final TableUpdate upstream, + final RowSet toClear, + final SelectAndViewAnalyzer.UpdateHelper helper, + final JobScheduler jobScheduler, + @Nullable final LivenessNode liveResultOwner, + final Runnable onSuccess, + final Consumer onError) { + // note that we process this layer directly because all subsequent layers depend on it + return () -> doApplyUpdate(upstream, onSuccess); } - private void doApplyUpdate(TableUpdate upstream, RowSet toClear, UpdateHelper helper, - SelectLayerCompletionHandler onCompletion) { + private void doApplyUpdate( + final TableUpdate upstream, + final Runnable onSuccess) { // we need to remove the removed values from our row redirection, and add them to our free RowSet; so that // updating tables will not consume more space over the course of a day for abandoned rows final RowSetBuilderRandom innerToFreeBuilder = RowSetFactory.builderRandom(); @@ -150,32 +151,16 @@ private void doApplyUpdate(TableUpdate upstream, RowSet toClear, UpdateHelper he freeValues.removeRange(0, lastAllocated.get()); } - onCompletion.onLayerCompleted(REDIRECTION_LAYER_INDEX); - } - - @Override - public Map> calcDependsOnRecurse(boolean forcePublishAllResources) { - return inner.calcDependsOnRecurse(forcePublishAllResources); - } - - @Override - public SelectAndViewAnalyzer getInner() { - return inner; + onSuccess.run(); } @Override public void startTrackingPrev() { rowRedirection.startTrackingPrevValues(); - inner.startTrackingPrev(); } @Override public LogOutput append(LogOutput logOutput) { return logOutput.append("{RedirectionLayer").append(", layerIndex=").append(getLayerIndex()).append("}"); } - - @Override - public boolean allowCrossColumnParallelization() { - return inner.allowCrossColumnParallelization(); - } } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/SelectAndViewAnalyzer.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/SelectAndViewAnalyzer.java index 099d70d4eb6..6efaf2425b2 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/SelectAndViewAnalyzer.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/SelectAndViewAnalyzer.java @@ -3,46 +3,60 @@ // package io.deephaven.engine.table.impl.select.analyzers; +import gnu.trove.map.TObjectIntMap; +import gnu.trove.map.hash.TObjectIntHashMap; import io.deephaven.base.Pair; +import io.deephaven.base.log.LogOutput; import io.deephaven.base.log.LogOutputAppendable; +import io.deephaven.base.verify.Assert; import io.deephaven.engine.liveness.LivenessNode; import io.deephaven.engine.rowset.RowSet; import io.deephaven.engine.rowset.RowSetFactory; +import io.deephaven.engine.rowset.RowSetShiftData; import io.deephaven.engine.rowset.TrackingRowSet; import io.deephaven.engine.table.*; import io.deephaven.engine.table.impl.MatchPair; import io.deephaven.engine.table.impl.QueryCompilerRequestProcessor; import io.deephaven.engine.table.impl.QueryTable; +import io.deephaven.engine.table.impl.ShiftedColumnsFactory; +import io.deephaven.engine.table.impl.TableUpdateImpl; import io.deephaven.engine.table.impl.select.FormulaColumn; import io.deephaven.engine.table.impl.select.SelectColumn; import io.deephaven.engine.table.impl.select.SourceColumn; import io.deephaven.engine.table.impl.select.SwitchColumn; import io.deephaven.engine.table.impl.sources.InMemoryColumnSource; +import io.deephaven.engine.table.impl.sources.PossiblyImmutableColumnSource; +import io.deephaven.engine.table.impl.sources.RedirectedColumnSource; import io.deephaven.engine.table.impl.sources.SingleValueColumnSource; import io.deephaven.engine.table.impl.sources.WritableRedirectedColumnSource; import io.deephaven.engine.table.impl.util.InverseWrappedRowSetRowRedirection; import io.deephaven.engine.table.impl.util.JobScheduler; import io.deephaven.engine.table.impl.util.RowRedirection; +import io.deephaven.engine.table.impl.util.WrappedRowSetRowRedirection; import io.deephaven.engine.table.impl.util.WritableRowRedirection; import io.deephaven.engine.updategraph.UpdateGraph; import io.deephaven.io.log.impl.LogOutputStringImpl; import io.deephaven.util.SafeCloseable; import io.deephaven.util.SafeCloseablePair; import io.deephaven.vector.Vector; +import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; import java.util.*; -import java.util.concurrent.CompletableFuture; +import java.util.concurrent.locks.ReentrantLock; import java.util.function.Consumer; import java.util.stream.Stream; -public abstract class SelectAndViewAnalyzer implements LogOutputAppendable { +public class SelectAndViewAnalyzer implements LogOutputAppendable { private static final Consumer> NOOP = ignore -> { }; public enum Mode { VIEW_LAZY, VIEW_EAGER, SELECT_STATIC, SELECT_REFRESHING, SELECT_REDIRECTED_REFRESHING, SELECT_REDIRECTED_STATIC } + public enum UpdateFlavor { + Select, View, Update, UpdateView, LazyUpdate + } public static void initializeSelectColumns( final Map> parentColumnMap, @@ -65,26 +79,23 @@ public static void initializeSelectColumns( } } - public static SelectAndViewAnalyzerWrapper create( - QueryTable sourceTable, Mode mode, Map> columnSources, - TrackingRowSet rowSet, ModifiedColumnSet parentMcs, boolean publishTheseSources, boolean useShiftedColumns, - SelectColumn... selectColumns) { - return create(sourceTable, mode, columnSources, rowSet, parentMcs, publishTheseSources, useShiftedColumns, - true, selectColumns); - } - - public static SelectAndViewAnalyzerWrapper create( - final QueryTable sourceTable, + public static AnalyzerContext createContext( + final QueryTable parentTable, final Mode mode, - final Map> columnSources, - TrackingRowSet rowSet, - final ModifiedColumnSet parentMcs, - final boolean publishTheseSources, + final boolean publishParentSources, boolean useShiftedColumns, - final boolean allowInternalFlatten, final SelectColumn... selectColumns) { - final UpdateGraph updateGraph = sourceTable.getUpdateGraph(); - SelectAndViewAnalyzer analyzer = createBaseLayer(columnSources, publishTheseSources); + final UpdateGraph updateGraph = parentTable.getUpdateGraph(); + + final Map> columnSources = parentTable.getColumnSourceMap(); + final TrackingRowSet rowSet = parentTable.getRowSet(); + + final boolean parentIsFlat = parentTable.isFlat(); + final boolean flatResult = !parentIsFlat + && (columnSources.isEmpty() || !publishParentSources) + && mode == Mode.SELECT_STATIC; + final AnalyzerContext context = new AnalyzerContext(parentTable, publishParentSources, flatResult); + final Map> columnDefinitions = new LinkedHashMap<>(); final RowRedirection rowRedirection; if (mode == Mode.SELECT_REDIRECTED_STATIC) { @@ -92,19 +103,12 @@ public static SelectAndViewAnalyzerWrapper create( } else if (mode == Mode.SELECT_REDIRECTED_REFRESHING && rowSet.size() < Integer.MAX_VALUE) { final WritableRowRedirection writableRowRedirection = WritableRowRedirection.FACTORY.createRowRedirection(rowSet.intSize()); - analyzer = analyzer.createRedirectionLayer(rowSet, writableRowRedirection); + context.addLayer(new RedirectionLayer(context, rowSet, writableRowRedirection)); rowRedirection = writableRowRedirection; } else { rowRedirection = null; } - List processedCols = new LinkedList<>(); - List remainingCols = null; - FormulaColumn shiftColumn = null; - boolean shiftColumnHasPositiveOffset = false; - - final HashSet resultColumns = new HashSet<>(); - // First pass to initialize all columns and to compile formulas in one batch. final QueryCompilerRequestProcessor.BatchProcessor compilationProcessor = QueryCompilerRequestProcessor.batch(); for (Map.Entry> entry : columnSources.entrySet()) { @@ -114,9 +118,10 @@ public static SelectAndViewAnalyzerWrapper create( columnDefinitions.put(name, cd); } + final Set resultColumnNames = new HashSet<>(); for (final SelectColumn sc : selectColumns) { - if (remainingCols != null) { - remainingCols.add(sc); + if (context.remainingCols != null) { + context.remainingCols.add(sc); continue; } @@ -126,55 +131,50 @@ public static SelectAndViewAnalyzerWrapper create( columnDefinitions.put(sc.getName(), cd); if (useShiftedColumns && hasConstantArrayAccess(sc)) { - remainingCols = new LinkedList<>(); - shiftColumn = sc instanceof FormulaColumn + context.remainingCols = new LinkedList<>(); + context.shiftColumn = sc instanceof FormulaColumn ? (FormulaColumn) sc : (FormulaColumn) ((SwitchColumn) sc).getRealColumn(); - shiftColumnHasPositiveOffset = hasPositiveOffsetConstantArrayAccess(sc); + context.shiftColumnHasPositiveOffset = hasPositiveOffsetConstantArrayAccess(sc); continue; } - processedCols.add(sc); + // In our first pass, determine whether any columns will be preserved so that we don't prematurely flatten. + final SourceColumn realColumn = tryToGetSourceColumn(sc); + + if (realColumn != null && !resultColumnNames.contains(realColumn.getSourceName())) { + // if we are preserving a column, then we cannot change key space + context.flatResult &= !shouldPreserve(columnSources.get(realColumn.getSourceName())); + } + + // TODO (deephaven#5760): If layers may define more than one column, we'll need to add all of them here. + resultColumnNames.add(sc.getName()); + + context.processedCols.add(sc); } compilationProcessor.compile(); // Second pass builds the analyzer and destination columns - final TrackingRowSet originalRowSet = rowSet; - boolean flatResult = rowSet.isFlat(); - // if we preserve a column, we set this to false - boolean flattenedResult = !flatResult - && allowInternalFlatten - && (columnSources.isEmpty() || !publishTheseSources) - && mode == Mode.SELECT_STATIC; - int numberOfInternallyFlattenedColumns = 0; - final HashMap> resultAlias = new HashMap<>(); - for (final SelectColumn sc : processedCols) { - - sc.initInputs(rowSet, analyzer.getAllColumnSources()); + for (final SelectColumn sc : context.processedCols) { - // When flattening the result, intermediate columns generate results in position space. When we discover - // that a select column depends on an intermediate result, then we must flatten all parent columns so - // that all dependent columns are in the same result-key space. - if (!flatResult && flattenedResult && Stream.concat(sc.getColumns().stream(), sc.getColumnArrays().stream()) - .anyMatch(resultColumns::contains)) { - analyzer = analyzer.createStaticFlattenLayer(rowSet); - rowSet = RowSetFactory.flat(rowSet.size()).toTracking(); - flatResult = true; + // if this select column depends on result column then its updates must happen in result-key-space + // note: if flatResult is true then we are not preserving any parent columns + final boolean useResultKeySpace = context.flatResult + && Stream.concat(sc.getColumns().stream(), sc.getColumnArrays().stream()) + .anyMatch(columnName -> context.getLayerIndexFor(columnName) != Layer.PARENT_TABLE_INDEX); - // we must re-initialize the column inputs as they may have changed post-flatten - sc.initInputs(rowSet, analyzer.getAllColumnSources()); - } + sc.initInputs(rowSet, useResultKeySpace ? context.allSourcesInResultKeySpace : context.allSources); - resultColumns.add(sc.getName()); - // this shadows any known alias + // TODO (deephaven-core#5760): If layers may define more than one column, we'll need to fix resultAlias. + // new columns shadow known aliases resultAlias.remove(sc.getName()); final Stream allDependencies = Stream.concat(sc.getColumns().stream(), sc.getColumnArrays().stream()); final String[] distinctDeps = allDependencies.distinct().toArray(String[]::new); - final ModifiedColumnSet mcsBuilder = new ModifiedColumnSet(parentMcs); + final ModifiedColumnSet mcsBuilder = new ModifiedColumnSet(parentTable.getModifiedColumnSetForUpdates()); if (useShiftedColumns && hasConstantArrayAccess(sc)) { // we use the first shifted column to split between processed columns and remaining columns @@ -183,104 +183,78 @@ public static SelectAndViewAnalyzerWrapper create( // shifted columns appear to not be safe for refresh, so we do not validate them until they are rewritten // using the intermediary shifted column - if (sourceTable.isRefreshing()) { - sc.validateSafeForRefresh(sourceTable); + if (parentTable.isRefreshing()) { + sc.validateSafeForRefresh(parentTable); } if (hasConstantValue(sc)) { final WritableColumnSource constViewSource = SingleValueColumnSource.getSingleValueColumnSource(sc.getReturnedType()); - analyzer = analyzer.createLayerForConstantView( - sc.getName(), sc, constViewSource, distinctDeps, mcsBuilder, flattenedResult, - flatResult && flattenedResult); + context.addLayer(new ConstantColumnLayer(context, sc, constViewSource, distinctDeps, mcsBuilder)); continue; } - final SourceColumn realColumn; - if (sc instanceof SourceColumn) { - realColumn = (SourceColumn) sc; - } else if ((sc instanceof SwitchColumn) && ((SwitchColumn) sc).getRealColumn() instanceof SourceColumn) { - realColumn = (SourceColumn) ((SwitchColumn) sc).getRealColumn(); - } else { - realColumn = null; - } - - if (realColumn != null && shouldPreserve(sc)) { - boolean sourceIsNew = resultColumns.contains(realColumn.getSourceName()); - if (!sourceIsNew) { - if (numberOfInternallyFlattenedColumns > 0) { - // we must preserve this column, but have already created an analyzer for the internally - // flattened column, therefore must start over without permitting internal flattening - return create(sourceTable, mode, columnSources, originalRowSet, parentMcs, publishTheseSources, - useShiftedColumns, false, selectColumns); - } else { - // we can not flatten future columns because we are preserving a column that may not be flat - flattenedResult = false; - } - } - - analyzer = analyzer.createLayerForPreserve( - sc.getName(), sc, sc.getDataView(), distinctDeps, mcsBuilder); - - continue; - } - - // look for an existing alias that can be preserved instead + final SourceColumn realColumn = tryToGetSourceColumn(sc); if (realColumn != null) { + if (shouldPreserve(sc.getDataView())) { + context.addLayer(new PreserveColumnLayer(context, sc, sc.getDataView(), distinctDeps, mcsBuilder)); + continue; + } + // look for an existing alias that can be preserved instead final ColumnSource alias = resultAlias.get(realColumn.getSourceName()); if (alias != null) { - analyzer = analyzer.createLayerForPreserve(sc.getName(), sc, alias, distinctDeps, mcsBuilder); + context.addLayer(new PreserveColumnLayer(context, sc, alias, distinctDeps, mcsBuilder)); continue; } } - // if this is a source column, then results are eligible for aliasing + // if this is a SourceColumn, then results are eligible for aliasing final Consumer> maybeCreateAlias = realColumn == null ? NOOP : cs -> resultAlias.put(realColumn.getSourceName(), cs); final long targetDestinationCapacity = - rowSet.isEmpty() ? 0 : (flattenedResult ? rowSet.size() : rowSet.lastRowKey() + 1); + rowSet.isEmpty() ? 0 : (context.flatResult ? rowSet.size() : rowSet.lastRowKey() + 1); switch (mode) { case VIEW_LAZY: { final ColumnSource viewCs = sc.getLazyView(); maybeCreateAlias.accept(viewCs); - analyzer = analyzer.createLayerForView(sc.getName(), sc, viewCs, distinctDeps, mcsBuilder); + context.addLayer(new ViewColumnLayer(context, sc, viewCs, distinctDeps, mcsBuilder)); break; } case VIEW_EAGER: { final ColumnSource viewCs = sc.getDataView(); maybeCreateAlias.accept(viewCs); - analyzer = analyzer.createLayerForView(sc.getName(), sc, viewCs, distinctDeps, mcsBuilder); + context.addLayer(new ViewColumnLayer(context, sc, viewCs, distinctDeps, mcsBuilder)); break; } case SELECT_STATIC: { // We need to call newDestInstance because only newDestInstance has the knowledge to endow our // created array with the proper componentType (in the case of Vectors). - final WritableColumnSource scs = - flatResult || flattenedResult ? sc.newFlatDestInstance(targetDestinationCapacity) - : sc.newDestInstance(targetDestinationCapacity); + final WritableColumnSource scs = parentIsFlat || context.flatResult + ? sc.newFlatDestInstance(targetDestinationCapacity) + : sc.newDestInstance(targetDestinationCapacity); + maybeSetStaticColumnSourceImmutable(scs); maybeCreateAlias.accept(scs); - analyzer = analyzer.createLayerForSelect(updateGraph, rowSet, sc.getName(), sc, scs, null, - distinctDeps, mcsBuilder, false, flattenedResult, flatResult && flattenedResult); - if (flattenedResult) { - numberOfInternallyFlattenedColumns++; - } + context.addLayer(new SelectColumnLayer( + updateGraph, rowSet, context, sc, scs, null, distinctDeps, mcsBuilder, false, + useResultKeySpace)); break; } case SELECT_REDIRECTED_STATIC: { final WritableColumnSource underlyingSource = sc.newDestInstance(rowSet.size()); final WritableColumnSource scs = WritableRedirectedColumnSource.maybeRedirect( rowRedirection, underlyingSource, rowSet.size()); + maybeSetStaticColumnSourceImmutable(scs); maybeCreateAlias.accept(scs); - analyzer = analyzer.createLayerForSelect(updateGraph, rowSet, sc.getName(), sc, scs, - underlyingSource, distinctDeps, mcsBuilder, true, false, false); + context.addLayer(new SelectColumnLayer( + updateGraph, rowSet, context, sc, scs, underlyingSource, distinctDeps, mcsBuilder, true, + useResultKeySpace)); break; } case SELECT_REDIRECTED_REFRESHING: case SELECT_REFRESHING: { // We need to call newDestInstance because only newDestInstance has the knowledge to endow our // created array with the proper componentType (in the case of Vectors). - // TODO: use DeltaAwareColumnSource WritableColumnSource scs = sc.newDestInstance(targetDestinationCapacity); WritableColumnSource underlyingSource = null; if (rowRedirection != null) { @@ -289,8 +263,9 @@ public static SelectAndViewAnalyzerWrapper create( rowRedirection, underlyingSource, rowSet.intSize()); } maybeCreateAlias.accept(scs); - analyzer = analyzer.createLayerForSelect(updateGraph, rowSet, sc.getName(), sc, scs, - underlyingSource, distinctDeps, mcsBuilder, rowRedirection != null, false, false); + context.addLayer(new SelectColumnLayer( + updateGraph, rowSet, context, sc, scs, underlyingSource, distinctDeps, mcsBuilder, + rowRedirection != null, useResultKeySpace)); break; } default: @@ -298,8 +273,25 @@ public static SelectAndViewAnalyzerWrapper create( } } - return new SelectAndViewAnalyzerWrapper(analyzer, shiftColumn, shiftColumnHasPositiveOffset, remainingCols, - processedCols); + return context; + } + + private static void maybeSetStaticColumnSourceImmutable(final ColumnSource columnSource) { + if (columnSource instanceof PossiblyImmutableColumnSource) { + ((PossiblyImmutableColumnSource) columnSource).setImmutable(); + } + } + + private static @Nullable SourceColumn tryToGetSourceColumn(final SelectColumn sc) { + final SourceColumn realColumn; + if (sc instanceof SourceColumn) { + realColumn = (SourceColumn) sc; + } else if ((sc instanceof SwitchColumn) && ((SwitchColumn) sc).getRealColumn() instanceof SourceColumn) { + realColumn = (SourceColumn) ((SwitchColumn) sc).getRealColumn(); + } else { + realColumn = null; + } + return realColumn; } private static boolean hasConstantArrayAccess(final SelectColumn sc) { @@ -343,110 +335,439 @@ private static boolean hasConstantValue(final SelectColumn sc) { return false; } - private static boolean shouldPreserve(final SelectColumn sc) { - // we already know sc is a SourceColumn or switches to a SourceColumn - final ColumnSource sccs = sc.getDataView(); - return sccs instanceof InMemoryColumnSource && ((InMemoryColumnSource) sccs).isInMemory() - && !Vector.class.isAssignableFrom(sc.getReturnedType()); + private static boolean shouldPreserve(final ColumnSource columnSource) { + return columnSource instanceof InMemoryColumnSource && ((InMemoryColumnSource) columnSource).isInMemory() + && !Vector.class.isAssignableFrom(columnSource.getType()); } - static final int BASE_LAYER_INDEX = 0; - static final int REDIRECTION_LAYER_INDEX = 1; + /** The layers that make up this analyzer. */ + private final Layer[] layers; - /** - * The layerIndex is used to identify each layer uniquely within the bitsets for completion. - */ - private final int layerIndex; + /** Whether the result should be flat. */ + private final boolean flatResult; - public SelectAndViewAnalyzer(int layerIndex) { - this.layerIndex = layerIndex; - } + private final BitSet requiredLayers = new BitSet(); + private final BitSet remainingLayers = new BitSet(); - int getLayerIndex() { - return layerIndex; + private SelectAndViewAnalyzer( + final Layer[] layers, + final boolean flatResult) { + this.layers = layers; + this.flatResult = flatResult; + for (final Layer layer : layers) { + if (layer.hasRefreshingLogic()) { + requiredLayers.set(layer.getLayerIndex()); + } else { + this.layers[layer.getLayerIndex()] = null; + } + } } - /** - * Set the bits in bitset that represent the base layer and optional redirection layer. No other jobs can be - * executed until all of these bits are set. - * - * @param bitset the bitset to manipulate. - */ - abstract void setBaseBits(BitSet bitset); + public final static class AnalyzerContext { - /** - * Set the bits in bitset that represent all the new columns. This is used to identify when the select or update - * operation is complete. - * - * @param bitset the bitset to manipulate. - */ - public void setAllNewColumns(BitSet bitset) { - getInner().setAllNewColumns(bitset); - bitset.set(getLayerIndex()); - } + /** The analyzer that we are building. */ + private final List layers = new ArrayList<>(); + /** + * The sources that are available to the analyzer, including parent columns. Parent columns are in parent key + * space, others are in result key space. + */ + private final Map> allSources = new LinkedHashMap<>(); + /** The sources that are available to the analyzer, including parent columns, in result key space. */ + private final Map> allSourcesInResultKeySpace; + /** The sources that are published to the child table. */ + private final Map> publishedSources = new LinkedHashMap<>(); + /** A mapping from result column name to the layer index that created it. */ + private final TObjectIntMap columnToLayerIndex; + /** The select columns that have been processed so far. */ + private final List processedCols = new ArrayList<>(); + + /** A holder for the shift column, if any. */ + private FormulaColumn shiftColumn; + /** Whether the shift column has a positive offset. */ + private boolean shiftColumnHasPositiveOffset; + /** The columns that will need to be processed after the shift column. */ + private List remainingCols; + /** Whether the result should be flat. */ + private boolean flatResult; + /** The layer that will be used to process redirection, if we have one. */ + private int redirectionLayer = Layer.UNSET_INDEX; + + AnalyzerContext( + final QueryTable parentTable, + final boolean publishParentSources, + final boolean flatResult) { + final Map> parentSources = parentTable.getColumnSourceMap(); + columnToLayerIndex = new TObjectIntHashMap<>(parentSources.size(), 0.5f, Layer.UNSET_INDEX); + + this.flatResult = flatResult; + + allSources.putAll(parentSources); + for (final String columnName : allSources.keySet()) { + columnToLayerIndex.put(columnName, Layer.PARENT_TABLE_INDEX); + } - private static SelectAndViewAnalyzer createBaseLayer(Map> sources, - boolean publishTheseSources) { - return new BaseLayer(sources, publishTheseSources); - } + if (publishParentSources) { + publishedSources.putAll(parentSources); + } - private RedirectionLayer createRedirectionLayer(TrackingRowSet resultRowSet, - WritableRowRedirection rowRedirection) { - return new RedirectionLayer(this, resultRowSet, rowRedirection); - } + if (!flatResult) { + // result key space is the same as parent key space + allSourcesInResultKeySpace = allSources; + } else { + allSourcesInResultKeySpace = new HashMap<>(); - private StaticFlattenLayer createStaticFlattenLayer(TrackingRowSet parentRowSet) { - return new StaticFlattenLayer(this, parentRowSet); - } + final RowRedirection rowRedirection = new WrappedRowSetRowRedirection(parentTable.getRowSet()); + allSources.forEach((name, cs) -> allSourcesInResultKeySpace.put(name, + RedirectedColumnSource.maybeRedirect(rowRedirection, cs))); + } + } - private SelectAndViewAnalyzer createLayerForSelect( - UpdateGraph updateGraph, RowSet parentRowset, String name, SelectColumn sc, WritableColumnSource cs, - WritableColumnSource underlyingSource, String[] parentColumnDependencies, ModifiedColumnSet mcsBuilder, - boolean isRedirected, boolean flattenResult, boolean alreadyFlattened) { - return new SelectColumnLayer(updateGraph, parentRowset, this, name, sc, cs, underlyingSource, - parentColumnDependencies, - mcsBuilder, isRedirected, flattenResult, alreadyFlattened); - } + /** + * Add a layer to the analyzer. + * + * @param layer the layer to add + */ + void addLayer(final Layer layer) { + if (layer instanceof RedirectionLayer) { + if (redirectionLayer != Layer.UNSET_INDEX) { + throw new IllegalStateException("Cannot have more than one redirection layer"); + } + redirectionLayer = layers.size(); + } - private SelectAndViewAnalyzer createLayerForConstantView(String name, SelectColumn sc, WritableColumnSource cs, - String[] parentColumnDependencies, ModifiedColumnSet mcsBuilder, boolean flattenResult, - boolean alreadyFlattened) { - return new ConstantColumnLayer(this, name, sc, cs, parentColumnDependencies, mcsBuilder, flattenResult, - alreadyFlattened); - } + layer.populateColumnSources(allSources); + if (flatResult) { + layer.populateColumnSources(allSourcesInResultKeySpace); + } + layer.populateColumnSources(publishedSources); - private SelectAndViewAnalyzer createLayerForView(String name, SelectColumn sc, ColumnSource cs, - String[] parentColumnDependencies, ModifiedColumnSet mcsBuilder) { - return new ViewColumnLayer(this, name, sc, cs, parentColumnDependencies, mcsBuilder); - } + layers.add(layer); - private SelectAndViewAnalyzer createLayerForPreserve(String name, SelectColumn sc, ColumnSource cs, - String[] parentColumnDependencies, ModifiedColumnSet mcsBuilder) { - return new PreserveColumnLayer(this, name, sc, cs, parentColumnDependencies, mcsBuilder); - } + for (final String columnName : layer.getLayerColumnNames()) { + columnToLayerIndex.put(columnName, layer.getLayerIndex()); + } + } - abstract void populateModifiedColumnSetRecurse(ModifiedColumnSet mcsBuilder, Set remainingDepsToSatisfy); + /** + * @return the next layerIndex to use + */ + int getNextLayerIndex() { + return layers.size(); + } - enum GetMode { - All, New, Published - } + /** + * Return the layerIndex for a given string column. + * + * @param column the name of the column + * + * @return the layerIndex + */ + int getLayerIndexFor(String column) { + final int layerIndex = columnToLayerIndex.get(column); + if (layerIndex == Layer.UNSET_INDEX) { + throw new IllegalStateException("Column " + column + " not found in any layer of the analyzer"); + } + return layerIndex; + } - public final Map> getAllColumnSources() { - return getColumnSourcesRecurse(GetMode.All); - } + /** + * Populate the ModifiedColumnSet with all indirect/direct dependencies on the parent table. + * + * @param mcsBuilder the result ModifiedColumnSet to populate + * @param dependencies the immediate dependencies + */ + void populateParentDependenciesMCS( + final ModifiedColumnSet mcsBuilder, + final String[] dependencies) { + for (final String dep : dependencies) { + final int layerIndex = getLayerIndexFor(dep); + if (layerIndex == Layer.PARENT_TABLE_INDEX) { + // this is a preserved parent column + mcsBuilder.setAll(dep); + } else { + mcsBuilder.setAll(layers.get(layerIndex).getModifiedColumnSet()); + } + } + } - public final Map> getNewColumnSources() { - return getColumnSourcesRecurse(GetMode.New); - } + /** + * Populate the layer dependency set with the layer indices that the dependencies are in. + * + * @param layerDependencySet the result bitset to populate + * @param dependencies the dependencies + */ + void populateLayerDependencySet( + final BitSet layerDependencySet, + final String[] dependencies) { + for (final String dep : dependencies) { + final int layerIndex = getLayerIndexFor(dep); + if (layerIndex != Layer.PARENT_TABLE_INDEX) { + // note that implicitly preserved columns do not belong to a layer. + layerDependencySet.or(layers.get(layerIndex).getLayerDependencySet()); + } + } + } + + /** + * Set the redirection layer in the bitset if the analyzer has any redirection. + * + * @param layerDependencies the result bitset to populate + */ + void setRedirectionLayer(final BitSet layerDependencies) { + if (redirectionLayer != Layer.UNSET_INDEX) { + layerDependencies.set(redirectionLayer); + } + } + + /** + * @return the column sources that are published through the child table + */ + public Map> getPublishedColumnSources() { + // Note that if we have a shift column that we forcibly publish all columns. + return shiftColumn == null ? publishedSources : allSources; + } + + /** + * @return the final analyzer + */ + public SelectAndViewAnalyzer createAnalyzer() { + return new SelectAndViewAnalyzer(layers.toArray(Layer[]::new), flatResult); + } + + /** + * @return which select columns were included in the result (not including the shift, or post-shift, columns) + */ + public List getProcessedColumns() { + return processedCols; + } + + /** + * @return whether the result should be flat + */ + public boolean isFlatResult() { + return flatResult; + } + + /** + * Our job here is to calculate the effects: a map from incoming column to a list of columns that it effects. We + * do this in two stages. In the first stage we create a map from column to (set of dependent columns). In the + * second stage we reverse that map. + * + * @return the effects map + */ + public Map calcEffects() { + final Map> resultMap = getPublishedColumnSources(); + + // Create the mapping from result column to dependent source columns. + final Map dependsOn = new HashMap<>(); + for (final String columnName : resultMap.keySet()) { + final int layerIndex = getLayerIndexFor(columnName); + final String[] dependencies; + if (layerIndex == Layer.PARENT_TABLE_INDEX) { + dependencies = new String[] {columnName}; + } else { + dependencies = layers.get(layerIndex).getModifiedColumnSet().dirtyColumnNames(); + } + dependsOn.put(columnName, dependencies); + } + + // Now create the mapping from source column to result columns. + final Map> effects = new HashMap<>(); + for (Map.Entry entry : dependsOn.entrySet()) { + final String depender = entry.getKey(); + for (final String dependee : entry.getValue()) { + effects.computeIfAbsent(dependee, dummy -> new ArrayList<>()).add(depender); + } + } + + // Convert effects type into result type + final Map result = new HashMap<>(); + for (Map.Entry> entry : effects.entrySet()) { + final String[] value = entry.getValue().toArray(String[]::new); + result.put(entry.getKey(), value); + } + return result; + } + + /** + * Shift columns introduce intermediary table operations. This method applies remaining work to the result built + * so far. + * + * @param parentTable the source table + * @param resultSoFar the intermediate result + * @param updateFlavor the update flavor + * @return the final result + */ + public QueryTable applyShiftsAndRemainingColumns( + @NotNull final QueryTable parentTable, + @NotNull QueryTable resultSoFar, + final UpdateFlavor updateFlavor) { + if (shiftColumn != null) { + resultSoFar = (QueryTable) ShiftedColumnsFactory.getShiftedColumnsTable( + resultSoFar, shiftColumn, updateFlavor); + } + + // shift columns may introduce modifies that are not present in the original table; set these before using + if (parentTable.isRefreshing()) { + if (shiftColumn == null && parentTable.isAddOnly()) { + resultSoFar.setAttribute(Table.ADD_ONLY_TABLE_ATTRIBUTE, true); + } + if ((shiftColumn == null || !shiftColumnHasPositiveOffset) && parentTable.isAppendOnly()) { + // note if the shift offset is non-positive, then this result is still append-only + resultSoFar.setAttribute(Table.APPEND_ONLY_TABLE_ATTRIBUTE, true); + } + if (parentTable.hasAttribute(Table.TEST_SOURCE_TABLE_ATTRIBUTE)) { + // be convenient for test authors by propagating the test source table attribute + resultSoFar.setAttribute(Table.TEST_SOURCE_TABLE_ATTRIBUTE, true); + } + if (parentTable.isBlink()) { + // blink tables, although possibly not useful, can have shift columns + resultSoFar.setAttribute(Table.BLINK_TABLE_ATTRIBUTE, true); + } + } - public final Map> getPublishedColumnSources() { - return getColumnSourcesRecurse(GetMode.Published); + boolean isMultiStateSelect = shiftColumn != null || remainingCols != null; + if (isMultiStateSelect && (updateFlavor == UpdateFlavor.Select || updateFlavor == UpdateFlavor.View)) { + List newResultColumns = new LinkedList<>(); + for (SelectColumn processed : processedCols) { + newResultColumns.add(new SourceColumn(processed.getName())); + } + if (shiftColumn != null) { + newResultColumns.add(new SourceColumn(shiftColumn.getName())); + } + if (remainingCols != null) { + newResultColumns.addAll(remainingCols); + } + + if (updateFlavor == UpdateFlavor.Select) { + resultSoFar = (QueryTable) resultSoFar.select(newResultColumns); + } else { + resultSoFar = (QueryTable) resultSoFar.view(newResultColumns); + } + } else if (remainingCols != null) { + switch (updateFlavor) { + case Update: { + resultSoFar = (QueryTable) resultSoFar.update(remainingCols); + break; + } + case UpdateView: { + resultSoFar = (QueryTable) resultSoFar.updateView(remainingCols); + break; + } + case LazyUpdate: { + resultSoFar = (QueryTable) resultSoFar.lazyUpdate(remainingCols); + break; + } + default: + throw new IllegalStateException("Unexpected update flavor: " + updateFlavor); + } + } + + return resultSoFar; + } } - abstract Map> getColumnSourcesRecurse(GetMode mode); + static abstract class Layer implements LogOutputAppendable { + private static final BitSet EMPTY_BITSET = new BitSet(); + + public static final int UNSET_INDEX = -1; + public static final int PARENT_TABLE_INDEX = -2; + + /** + * The layerIndex is used to identify each layer uniquely within the bitsets for completion. + */ + private final int layerIndex; + + Layer(int layerIndex) { + this.layerIndex = layerIndex; + } + + /** + * @return which index in the layer stack this layer is + */ + int getLayerIndex() { + return layerIndex; + } + + /** + * @return whether this layer has refreshing logic and needs to be updated + */ + boolean hasRefreshingLogic() { + return true; + } + + /** + * @return the modified column set of the parent table that this layer indirectly depends on + */ + ModifiedColumnSet getModifiedColumnSet() { + return failNoRefreshingLogic(); + } + + /** + * @return the layer dependency set indicating which layers this layer depends on + */ + BitSet getLayerDependencySet() { + return EMPTY_BITSET; + } + + @Override + public String toString() { + return new LogOutputStringImpl().append(this).toString(); + } + + void startTrackingPrev() { + // default is that there is nothing to do + } + + /** + * @return the column names created by this layer + */ + abstract Set getLayerColumnNames(); + + /** + * Populate the column sources for this layer. + * + * @param result the map to populate + */ + abstract void populateColumnSources(Map> result); + + /** + * @return true if this layer allows parallelization across columns + */ + abstract boolean allowCrossColumnParallelization(); + + /** + * Apply this update to this Layer. + * + * @param upstream the upstream update + * @param toClear rows that used to exist and no longer exist + * @param helper convenience class that memoizes reusable calculations for this update + * @param jobScheduler scheduler for parallel sub-tasks + * @param liveResultOwner {@link LivenessNode node} to be used to manage/unmanage results that happen to be + * {@link io.deephaven.engine.liveness.LivenessReferent liveness referents} + * @param onSuccess called when the update completed successfully + * @param onError called when the update failed + */ + Runnable createUpdateHandler( + TableUpdate upstream, + RowSet toClear, + UpdateHelper helper, + JobScheduler jobScheduler, + @Nullable LivenessNode liveResultOwner, + Runnable onSuccess, + Consumer onError) { + return failNoRefreshingLogic(); + } + + private T failNoRefreshingLogic() { + throw new UnsupportedOperationException(String.format( + "%s does not have any refreshing logic", this.getClass().getSimpleName())); + } + } public static class UpdateHelper implements SafeCloseable { private RowSet existingRows; + private TableUpdate upstreamInResultSpace; private SafeCloseablePair shiftedWithModifies; private SafeCloseablePair shiftedWithoutModifies; @@ -458,6 +779,21 @@ public UpdateHelper(RowSet parentRowSet, TableUpdate upstream) { this.upstream = upstream; } + /** + * Flatten the upstream update from the parent key space to the destination key space. We are guaranteed to be + * in STATIC_SELECT mode. + * + * @return the flattened update + */ + TableUpdate resultKeySpaceUpdate() { + if (upstreamInResultSpace == null) { + upstreamInResultSpace = new TableUpdateImpl( + RowSetFactory.flat(upstream.added().size()), RowSetFactory.empty(), RowSetFactory.empty(), + RowSetShiftData.EMPTY, ModifiedColumnSet.EMPTY); + } + return upstreamInResultSpace; + } + private RowSet getExisting() { if (existingRows == null) { existingRows = parentRowSet.minus(upstream.added()); @@ -507,6 +843,10 @@ public void close() { shiftedWithoutModifies.close(); shiftedWithoutModifies = null; } + if (upstreamInResultSpace != null) { + upstreamInResultSpace.release(); + upstreamInResultSpace = null; + } } } @@ -519,195 +859,174 @@ public void close() { * @param jobScheduler scheduler for parallel sub-tasks * @param liveResultOwner {@link LivenessNode node} to be used to manage/unmanage results that happen to be * {@link io.deephaven.engine.liveness.LivenessReferent liveness referents} - * @param onCompletion Called when an inner column is complete. The outer layer should pass the {@code onCompletion} - */ - public abstract void applyUpdate(TableUpdate upstream, RowSet toClear, UpdateHelper helper, - JobScheduler jobScheduler, @Nullable LivenessNode liveResultOwner, - SelectLayerCompletionHandler onCompletion); - - /** - * Our job here is to calculate the effects: a map from incoming column to a list of columns that it effects. We do - * this in two stages. In the first stage we create a map from column to (set of dependent columns). In the second - * stage we reverse that map. + * @param onSuccess called when the update completed successfully + * @param onError called when the update failed */ - public final Map calcEffects(boolean forcePublishAllResources) { - final Map> dependsOn = calcDependsOnRecurse(forcePublishAllResources); - - // Now create effects, which is the inverse of dependsOn: - // An entry W -> [X, Y, Z] in effects means that W affects X, Y, and Z - final Map> effects = new HashMap<>(); - for (Map.Entry> entry : dependsOn.entrySet()) { - final String depender = entry.getKey(); - for (final String dependee : entry.getValue()) { - effects.computeIfAbsent(dependee, dummy -> new ArrayList<>()).add(depender); + public void applyUpdate( + final TableUpdate upstream, + final RowSet toClear, + final UpdateHelper helper, + final JobScheduler jobScheduler, + @Nullable final LivenessNode liveResultOwner, + final Runnable onSuccess, + final Consumer onError) { + + Assert.assertion(remainingLayers.isEmpty(), "remainingLayers.isEmpty()"); + remainingLayers.or(requiredLayers); + + final Runnable[] runners = new Runnable[layers.length]; + final UpdateScheduler scheduler = new UpdateScheduler(runners, onSuccess, onError); + + for (int ii = 0; ii < layers.length; ++ii) { + final Layer layer = layers[ii]; + if (layer != null) { + // TODO (deephaven-core#4896): this error handling allows concurrent layers to fail without ensuring + // that other tasks are finished. + runners[ii] = layer.createUpdateHandler( + upstream, toClear, helper, jobScheduler, liveResultOwner, + () -> scheduler.onLayerComplete(layer.getLayerIndex()), onError); } } - // Convert effects type into result type - final Map result = new HashMap<>(); - for (Map.Entry> entry : effects.entrySet()) { - final String[] value = entry.getValue().toArray(String[]::new); - result.put(entry.getKey(), value); - } - return result; + + scheduler.tryToKickOffWork(); } - abstract Map> calcDependsOnRecurse(boolean forcePublishAllResources); + private class UpdateScheduler { + private final ReentrantLock runLock = new ReentrantLock(); - public abstract SelectAndViewAnalyzer getInner(); + private final Runnable[] runners; + private final Runnable onSuccess; + private final Consumer onError; - public abstract void startTrackingPrev(); + private volatile boolean needsRun; + /** whether we have already invoked onSuccess */ + private boolean updateComplete; - /** - * Was the result internally flattened? Only the STATIC_SELECT case flattens the result. If the result preserves any - * columns, then flattening is not permitted. Because all the other layers cannot internally flatten, the default - * implementation returns false. - */ - public boolean flattenedResult() { - return false; - } + public UpdateScheduler( + final Runnable[] runners, + final Runnable onSuccess, + final Consumer onError) { + this.runners = runners; + this.onSuccess = onSuccess; + this.onError = onError; + } - /** - * Have the column sources already been flattened? Only the STATIC_SELECT case flattens the result. A static flatten - * layer is only added if SelectColumn depends on an intermediate result. - */ - public boolean alreadyFlattenedSources() { - return false; - } + public void onLayerComplete(final int layerIndex) { + synchronized (remainingLayers) { + remainingLayers.set(layerIndex, false); + } - /** - * Return the layerIndex for a given string column. - * - *

- * This is executed recursively, because later columns in a select statement hide earlier columns. - *

- * - * @param column the name of the column - * - * @return the layerIndex - */ - abstract int getLayerIndexFor(String column); + tryToKickOffWork(); + } - /** - * Can all of our columns permit parallel updates? - */ - abstract public boolean allowCrossColumnParallelization(); + private void tryToKickOffWork() { + needsRun = true; + while (true) { + if (runLock.isHeldByCurrentThread() || !runLock.tryLock()) { + // do not permit re-entry or waiting on another thread doing exactly this work + return; + } - /** - * A class that handles the completion of one select column. The handlers are chained together; all downstream - * dependencies may execute when a column completes. - */ - public static abstract class SelectLayerCompletionHandler { - /** - * Note that the completed columns are shared among the entire chain of completion handlers. - */ - private final BitSet completedColumns; - private final SelectLayerCompletionHandler nextHandler; - private final BitSet requiredColumns; - private volatile boolean fired = false; + try { + if (needsRun) { + needsRun = false; + doKickOffWork(); + } + } catch (final Exception exception) { + try { + onError.accept(exception); + } catch (final Exception ignored) { + } + } finally { + runLock.unlock(); + } - /** - * Create a new completion handler that calls nextHandler after its own processing. The completedColumns BitSet - * is shared among all handlers. - * - * @param requiredColumns the columns required for this layer - * @param nextHandler the next handler to call - */ - SelectLayerCompletionHandler(BitSet requiredColumns, SelectLayerCompletionHandler nextHandler) { - this.requiredColumns = requiredColumns; - this.completedColumns = nextHandler.completedColumns; - this.nextHandler = nextHandler; + if (!needsRun) { + return; + } + } } - /** - * Create the final completion handler, which has no next handler. - * - * @param requiredColumns the columns required for this handler to fire - * @param completedColumns the set of completed columns, shared with all the other handlers - */ - public SelectLayerCompletionHandler(BitSet requiredColumns, BitSet completedColumns) { - this.requiredColumns = requiredColumns; - this.completedColumns = completedColumns; - this.nextHandler = null; - } + private void doKickOffWork() { + if (updateComplete) { + // we may have already completed the update, but are checking again due to the potential of a race + return; + } - /** - * Called when a single column is completed. - *

- * If we are ready, then we call {@link #onAllRequiredColumnsCompleted()}. - *

- * We may not be ready, but other columns downstream of us may be ready, so they are also notified (the - * nextHandler). - * - * @param completedColumn the layerIndex of the completedColumn - */ - void onLayerCompleted(int completedColumn) { - if (!fired) { + int nextLayer = 0; + while (nextLayer >= 0) { + boolean complete; boolean readyToFire = false; - synchronized (completedColumns) { - if (!fired) { - completedColumns.set(completedColumn); - if (requiredColumns.get(completedColumn) || requiredColumns.isEmpty()) { - readyToFire = requiredColumns.stream().allMatch(completedColumns::get); - if (readyToFire) { - fired = true; - } + Runnable runner = null; + synchronized (remainingLayers) { + complete = remainingLayers.isEmpty(); + nextLayer = complete ? -1 : remainingLayers.nextSetBit(nextLayer); + + if (nextLayer != -1) { + if ((runner = runners[nextLayer]) != null) { + readyToFire = !layers[nextLayer].getLayerDependencySet().intersects(remainingLayers); + } + + if (readyToFire) { + runners[nextLayer] = null; + } else { + ++nextLayer; } } } + if (readyToFire) { - onAllRequiredColumnsCompleted(); + runner.run(); + } else if (complete) { + updateComplete = true; + onSuccess.run(); + return; } } - if (nextHandler != null) { - nextHandler.onLayerCompleted(completedColumn); - } } + } - protected void onError(Exception error) { - if (nextHandler != null) { - nextHandler.onError(error); + public void startTrackingPrev() { + for (final Layer layer : layers) { + if (layer != null) { + layer.startTrackingPrev(); } } - - /** - * Called when all required columns are completed. - */ - protected abstract void onAllRequiredColumnsCompleted(); } /** - * Create a completion handler that signals a future when the update is completed. - * - * @param waitForResult a void future indicating success or failure - * - * @return a completion handler that will signal the future + * Is the result of this select/view flat? */ - public SelectLayerCompletionHandler futureCompletionHandler(CompletableFuture waitForResult) { - final BitSet completedColumns = new BitSet(); - final BitSet requiredColumns = new BitSet(); - - setAllNewColumns(requiredColumns); + public boolean flatResult() { + return flatResult; + } - return new SelectLayerCompletionHandler(requiredColumns, completedColumns) { - boolean errorOccurred = false; + /** + * Can all of our columns permit parallel updates? + */ + public boolean allowCrossColumnParallelization() { + return Arrays.stream(layers) + .filter(Objects::nonNull) + .allMatch(Layer::allowCrossColumnParallelization); + } - @Override - public void onAllRequiredColumnsCompleted() { - if (errorOccurred) { - return; - } - waitForResult.complete(null); + @Override + public LogOutput append(LogOutput logOutput) { + logOutput = logOutput.append("SelectAndViewAnalyzer{"); + boolean first = true; + for (final Layer layer : layers) { + if (layer == null) { + continue; } - - @Override - protected void onError(Exception error) { - if (errorOccurred) { - return; - } - errorOccurred = true; - waitForResult.completeExceptionally(error); + if (first) { + first = false; + } else { + logOutput = logOutput.append(", "); } - }; + logOutput = logOutput.append(layer); + + } + return logOutput.append("}"); } @Override diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/SelectAndViewAnalyzerWrapper.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/SelectAndViewAnalyzerWrapper.java deleted file mode 100644 index ec4fcf6f534..00000000000 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/SelectAndViewAnalyzerWrapper.java +++ /dev/null @@ -1,128 +0,0 @@ -// -// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending -// -package io.deephaven.engine.table.impl.select.analyzers; - -import io.deephaven.engine.table.ColumnSource; -import io.deephaven.engine.table.Table; -import io.deephaven.engine.table.impl.QueryTable; -import io.deephaven.engine.table.impl.ShiftedColumnsFactory; -import io.deephaven.engine.table.impl.select.FormulaColumn; -import io.deephaven.engine.table.impl.select.SelectColumn; -import io.deephaven.engine.table.impl.select.SourceColumn; -import org.jetbrains.annotations.NotNull; - -import java.util.LinkedList; -import java.util.List; -import java.util.Map; - -public class SelectAndViewAnalyzerWrapper { - public enum UpdateFlavor { - Select, View, Update, UpdateView, LazyUpdate - } - - private final SelectAndViewAnalyzer analyzer; - private final FormulaColumn shiftColumn; - private final boolean shiftColumnHasPositiveOffset; - private final List remainingCols; - private final List processedColumns; - - SelectAndViewAnalyzerWrapper( - SelectAndViewAnalyzer analyzer, - FormulaColumn shiftColumn, - boolean shiftColumnHasPositiveOffset, - List remainingCols, - List processedColumns) { - this.analyzer = analyzer; - this.shiftColumn = shiftColumn; - this.shiftColumnHasPositiveOffset = shiftColumnHasPositiveOffset; - this.remainingCols = remainingCols; - this.processedColumns = processedColumns; - } - - public final Map> getPublishedColumnResources() { - if (shiftColumn == null) { - return analyzer.getPublishedColumnSources(); - } else { - return analyzer.getAllColumnSources(); - } - } - - public final Map calcEffects() { - return analyzer.calcEffects(shiftColumn != null); - } - - public SelectAndViewAnalyzer getAnalyzer() { - return analyzer; - } - - public List getProcessedColumns() { - return processedColumns; - } - - public QueryTable applyShiftsAndRemainingColumns( - @NotNull QueryTable sourceTable, @NotNull QueryTable queryTable, UpdateFlavor updateFlavor) { - if (shiftColumn != null) { - queryTable = (QueryTable) ShiftedColumnsFactory.getShiftedColumnsTable( - queryTable, shiftColumn, updateFlavor); - } - - // shift columns may introduce modifies that are not present in the original table; set these before using - if (sourceTable.isRefreshing()) { - if (shiftColumn == null && sourceTable.isAddOnly()) { - queryTable.setAttribute(Table.ADD_ONLY_TABLE_ATTRIBUTE, true); - } - if ((shiftColumn == null || !shiftColumnHasPositiveOffset) && sourceTable.isAppendOnly()) { - // note if the shift offset is non-positive, then this result is still append-only - queryTable.setAttribute(Table.APPEND_ONLY_TABLE_ATTRIBUTE, true); - } - if (sourceTable.hasAttribute(Table.TEST_SOURCE_TABLE_ATTRIBUTE)) { - // be convenient for test authors by propagating the test source table attribute - queryTable.setAttribute(Table.TEST_SOURCE_TABLE_ATTRIBUTE, true); - } - if (sourceTable.isBlink()) { - // blink tables, although possibly not useful, can have shift columns - queryTable.setAttribute(Table.BLINK_TABLE_ATTRIBUTE, true); - } - } - - boolean isMultiStateSelect = shiftColumn != null || remainingCols != null; - if (isMultiStateSelect && (updateFlavor == UpdateFlavor.Select || updateFlavor == UpdateFlavor.View)) { - List newResultColumns = new LinkedList<>(); - for (SelectColumn processed : processedColumns) { - newResultColumns.add(new SourceColumn(processed.getName())); - } - if (shiftColumn != null) { - newResultColumns.add(new SourceColumn(shiftColumn.getName())); - } - if (remainingCols != null) { - newResultColumns.addAll(remainingCols); - } - - if (updateFlavor == UpdateFlavor.Select) { - queryTable = (QueryTable) queryTable.select(newResultColumns); - } else { - queryTable = (QueryTable) queryTable.view(newResultColumns); - } - } else if (remainingCols != null) { - switch (updateFlavor) { - case Update: { - queryTable = (QueryTable) queryTable.update(remainingCols); - break; - } - case UpdateView: { - queryTable = (QueryTable) queryTable.updateView(remainingCols); - break; - } - case LazyUpdate: { - queryTable = (QueryTable) queryTable.lazyUpdate(remainingCols); - break; - } - default: - throw new IllegalStateException("Unexpected update flavor: " + updateFlavor); - } - } - - return queryTable; - } -} diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/SelectColumnLayer.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/SelectColumnLayer.java index b7177e9fe39..431d00ee7e7 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/SelectColumnLayer.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/SelectColumnLayer.java @@ -40,24 +40,22 @@ import static io.deephaven.chunk.util.pools.ChunkPoolConstants.LARGEST_POOLED_CHUNK_CAPACITY; final public class SelectColumnLayer extends SelectOrViewColumnLayer { - /** - * The same reference as super.columnSource, but as a WritableColumnSource and maybe reinterpreted - */ + /** The same reference as super.columnSource, but as a WritableColumnSource and maybe reinterpreted */ private final WritableColumnSource writableSource; - /** - * The execution context the select column layer was constructed in - */ + /** The execution context the select column layer was constructed in */ private final ExecutionContext executionContext; private final UpdateGraph updateGraph; - /** - * Our parent row set, used for ensuring capacity. - */ + /** Our parent row set, used for ensuring capacity */ private final RowSet parentRowSet; + /** Whether our result is redirected */ private final boolean isRedirected; + /** Whether our result is flattened */ private final boolean flattenedResult; - private final boolean alreadyFlattenedSources; + /** Whether our dependencies are in the result key space instead of parent key space */ + private final boolean sourcesAreInResultKeySpace; + /** Which layers we depend on */ private final BitSet dependencyBitSet; private final boolean canParallelizeThisColumn; private final boolean isSystemic; @@ -73,14 +71,22 @@ final public class SelectColumnLayer extends SelectOrViewColumnLayer { private ChunkSource.WithPrev chunkSource; SelectColumnLayer( - UpdateGraph updateGraph, RowSet parentRowSet, SelectAndViewAnalyzer inner, String name, SelectColumn sc, - WritableColumnSource ws, WritableColumnSource underlying, String[] deps, ModifiedColumnSet mcsBuilder, - boolean isRedirected, boolean flattenedResult, boolean alreadyFlattenedSources) { - super(inner, name, sc, ws, underlying, deps, mcsBuilder); + final UpdateGraph updateGraph, + final RowSet parentRowSet, + final SelectAndViewAnalyzer.AnalyzerContext context, + final SelectColumn sc, + final WritableColumnSource ws, + final WritableColumnSource underlying, + final String[] deps, + final ModifiedColumnSet mcsBuilder, + final boolean isRedirected, + final boolean sourcesAreInResultKeySpace) { + super(context, sc, ws, underlying, deps, mcsBuilder); this.updateGraph = updateGraph; this.parentRowSet = parentRowSet; this.writableSource = ReinterpretUtils.maybeConvertToWritablePrimitive(ws); this.isRedirected = isRedirected; + this.sourcesAreInResultKeySpace = sourcesAreInResultKeySpace; final ExecutionContext userSuppliedContext = ExecutionContext.getContextToRecord(); if (userSuppliedContext != null) { @@ -91,10 +97,16 @@ final public class SelectColumnLayer extends SelectOrViewColumnLayer { } dependencyBitSet = new BitSet(); - Arrays.stream(deps).mapToInt(inner::getLayerIndexFor).forEach(dependencyBitSet::set); + Arrays.stream(deps) + .mapToInt(context::getLayerIndexFor) + .filter(layerIndex -> layerIndex >= 0) + .forEach(dependencyBitSet::set); + if (isRedirected) { + // we cannot write to the redirected column until after the redirection has been updated + context.setRedirectionLayer(dependencyBitSet); + } - this.flattenedResult = flattenedResult; - this.alreadyFlattenedSources = alreadyFlattenedSources; + this.flattenedResult = context.isFlatResult(); // We can only parallelize this column if we are not redirected, our destination provides ensure previous, and // the select column is stateless @@ -136,9 +148,33 @@ private ChunkSource getChunkSource() { } @Override - public void applyUpdate(final TableUpdate upstream, final RowSet toClear, - final UpdateHelper helper, final JobScheduler jobScheduler, @Nullable final LivenessNode liveResultOwner, - final SelectLayerCompletionHandler onCompletion) { + public BitSet getLayerDependencySet() { + return dependencyBitSet; + } + + @Override + public Runnable createUpdateHandler( + final TableUpdate originalUpdate, + final RowSet toClear, + final SelectAndViewAnalyzer.UpdateHelper helper, + final JobScheduler jobScheduler, + @Nullable final LivenessNode liveResultOwner, + final Runnable onSuccess, + final Consumer onError) { + final TableUpdate upstream; + if (!sourcesAreInResultKeySpace) { + upstream = originalUpdate; + } else { + // This better be the static fake update. + Assert.eqTrue(originalUpdate.added().size() == parentRowSet.size(), + "originalUpdate.added().size() == parentRowSet.size()"); + Assert.eqTrue(originalUpdate.removed().isEmpty(), "originalUpdate.removed.isEmpty()"); + Assert.eqTrue(originalUpdate.modified().isEmpty(), "originalUpdate.modified.isEmpty()"); + Assert.eqTrue(originalUpdate.shifted().empty(), "originalUpdate.shifted.empty()"); + + upstream = helper.resultKeySpaceUpdate(); + } + if (upstream.removed().isNonempty()) { if (isRedirected) { clearObjectsAtThisLevel(upstream.removed()); @@ -148,80 +184,80 @@ public void applyUpdate(final TableUpdate upstream, final RowSet toClear, } } - // recurse so that dependent intermediate columns are already updated - inner.applyUpdate(upstream, toClear, helper, jobScheduler, liveResultOwner, - new SelectLayerCompletionHandler(dependencyBitSet, onCompletion) { - @Override - public void onAllRequiredColumnsCompleted() { - // We don't want to bother with threads if we are going to process a small update - final long totalSize = upstream.added().size() + upstream.modified().size(); - - // If we have shifts, that makes everything nasty; so we do not want to deal with it - final boolean hasShifts = upstream.shifted().nonempty(); - - final boolean serialTableOperationsSafe = updateGraph.serialTableOperationsSafe() - || updateGraph.sharedLock().isHeldByCurrentThread() - || updateGraph.exclusiveLock().isHeldByCurrentThread(); - - if (canParallelizeThisColumn && jobScheduler.threadCount() > 1 && !hasShifts && - ((resultTypeIsTableOrRowSet && totalSize > 0) - || totalSize >= QueryTable.MINIMUM_PARALLEL_SELECT_ROWS)) { - final long divisionSize = resultTypeIsTableOrRowSet ? 1 - : Math.max(QueryTable.MINIMUM_PARALLEL_SELECT_ROWS, - (totalSize + jobScheduler.threadCount() - 1) / jobScheduler.threadCount()); - final List updates = new ArrayList<>(); - // divide up the additions and modifications - try (final RowSequence.Iterator rsAddIt = upstream.added().getRowSequenceIterator(); - final RowSequence.Iterator rsModIt = upstream.modified().getRowSequenceIterator()) { - while (rsAddIt.hasMore() || rsModIt.hasMore()) { - final TableUpdateImpl update = new TableUpdateImpl(); - update.modifiedColumnSet = upstream.modifiedColumnSet(); - update.shifted = RowSetShiftData.EMPTY; - update.removed = RowSetFactory.empty(); - - if (rsAddIt.hasMore()) { - update.added = rsAddIt.getNextRowSequenceWithLength(divisionSize).asRowSet(); - } else { - update.added = RowSetFactory.empty(); - } - - if (update.added.size() < divisionSize && rsModIt.hasMore()) { - update.modified = rsModIt - .getNextRowSequenceWithLength(divisionSize - update.added().size()) - .asRowSet(); - } else { - update.modified = RowSetFactory.empty(); - } - - updates.add(update); - } - } - - if (updates.isEmpty()) { - throw new IllegalStateException(); - } + return () -> { + // We don't want to bother with threads if we are going to process a small update + final long totalSize = upstream.added().size() + upstream.modified().size(); + + // If we have shifts, that makes everything nasty; so we do not want to deal with it + final boolean hasShifts = upstream.shifted().nonempty(); + + final boolean serialTableOperationsSafe = updateGraph.serialTableOperationsSafe() + || updateGraph.sharedLock().isHeldByCurrentThread() + || updateGraph.exclusiveLock().isHeldByCurrentThread(); + + if (canParallelizeThisColumn && jobScheduler.threadCount() > 1 && !hasShifts && + ((resultTypeIsTableOrRowSet && totalSize > 0) + || totalSize >= QueryTable.MINIMUM_PARALLEL_SELECT_ROWS)) { + final long divisionSize = resultTypeIsTableOrRowSet ? 1 + : Math.max(QueryTable.MINIMUM_PARALLEL_SELECT_ROWS, + (totalSize + jobScheduler.threadCount() - 1) / jobScheduler.threadCount()); + final List updates = new ArrayList<>(); + // divide up the additions and modifications + try (final RowSequence.Iterator rsAddIt = upstream.added().getRowSequenceIterator(); + final RowSequence.Iterator rsModIt = upstream.modified().getRowSequenceIterator()) { + while (rsAddIt.hasMore() || rsModIt.hasMore()) { + final TableUpdateImpl update = new TableUpdateImpl(); + update.modifiedColumnSet = upstream.modifiedColumnSet(); + update.shifted = RowSetShiftData.EMPTY; + update.removed = RowSetFactory.empty(); + + if (rsAddIt.hasMore()) { + update.added = rsAddIt.getNextRowSequenceWithLength(divisionSize).asRowSet(); + } else { + update.added = RowSetFactory.empty(); + } - jobScheduler.submit( - executionContext, - () -> prepareParallelUpdate(jobScheduler, upstream, toClear, helper, - liveResultOwner, onCompletion, this::onError, updates, - serialTableOperationsSafe), - SelectColumnLayer.this, this::onError); + if (update.added.size() < divisionSize && rsModIt.hasMore()) { + update.modified = rsModIt + .getNextRowSequenceWithLength(divisionSize - update.added().size()) + .asRowSet(); } else { - jobScheduler.submit( - executionContext, - () -> doSerialApplyUpdate(upstream, toClear, helper, liveResultOwner, onCompletion, - serialTableOperationsSafe), - SelectColumnLayer.this, this::onError); + update.modified = RowSetFactory.empty(); } + + updates.add(update); } - }); + } + + if (updates.isEmpty()) { + throw new IllegalStateException(); + } + + jobScheduler.submit( + executionContext, + () -> prepareParallelUpdate(jobScheduler, upstream, toClear, helper, liveResultOwner, onSuccess, + onError, updates, serialTableOperationsSafe), + SelectColumnLayer.this, onError); + } else { + jobScheduler.submit( + executionContext, + () -> doSerialApplyUpdate(upstream, toClear, helper, liveResultOwner, onSuccess, + serialTableOperationsSafe), + SelectColumnLayer.this, onError); + } + }; } - private void prepareParallelUpdate(final JobScheduler jobScheduler, final TableUpdate upstream, - final RowSet toClear, final UpdateHelper helper, @Nullable final LivenessNode liveResultOwner, - final SelectLayerCompletionHandler onCompletion, final Consumer onError, - final List splitUpdates, final boolean serialTableOperationsSafe) { + private void prepareParallelUpdate( + final JobScheduler jobScheduler, + final TableUpdate upstream, + final RowSet toClear, + final SelectAndViewAnalyzer.UpdateHelper helper, + @Nullable final LivenessNode liveResultOwner, + final Runnable onSuccess, + final Consumer onError, + final List splitUpdates, + final boolean serialTableOperationsSafe) { // we have to do removal and previous initialization before we can do any of the actual filling in multiple // threads to avoid concurrency problems with our destination column sources doEnsureCapacity(); @@ -250,13 +286,17 @@ private void prepareParallelUpdate(final JobScheduler jobScheduler, final TableU if (!isRedirected) { clearObjectsAtThisLevel(toClear); } - onCompletion.onLayerCompleted(getLayerIndex()); + onSuccess.run(); }, onError); } - private void doSerialApplyUpdate(final TableUpdate upstream, final RowSet toClear, final UpdateHelper helper, - @Nullable final LivenessNode liveResultOwner, final SelectLayerCompletionHandler onCompletion, + private void doSerialApplyUpdate( + final TableUpdate upstream, + final RowSet toClear, + final SelectAndViewAnalyzer.UpdateHelper helper, + @Nullable final LivenessNode liveResultOwner, + final Runnable onSuccess, final boolean serialTableOperationsSafe) { doEnsureCapacity(); final boolean oldSafe = updateGraph.setSerialTableOperationsSafe(serialTableOperationsSafe); @@ -269,11 +309,14 @@ private void doSerialApplyUpdate(final TableUpdate upstream, final RowSet toClea if (!isRedirected) { clearObjectsAtThisLevel(toClear); } - onCompletion.onLayerCompleted(getLayerIndex()); + onSuccess.run(); } - private void doParallelApplyUpdate(final TableUpdate upstream, final UpdateHelper helper, - @Nullable final LivenessNode liveResultOwner, final boolean serialTableOperationsSafe, + private void doParallelApplyUpdate( + final TableUpdate upstream, + final SelectAndViewAnalyzer.UpdateHelper helper, + @Nullable final LivenessNode liveResultOwner, + final boolean serialTableOperationsSafe, final long startOffset) { final boolean oldSafe = updateGraph.setSerialTableOperationsSafe(serialTableOperationsSafe); try { @@ -285,8 +328,11 @@ private void doParallelApplyUpdate(final TableUpdate upstream, final UpdateHelpe upstream.release(); } - private Boolean doApplyUpdate(final TableUpdate upstream, final UpdateHelper helper, - @Nullable final LivenessNode liveResultOwner, final long startOffset) { + private Boolean doApplyUpdate( + final TableUpdate upstream, + final SelectAndViewAnalyzer.UpdateHelper helper, + @Nullable final LivenessNode liveResultOwner, + final long startOffset) { final int PAGE_SIZE = 4096; final LongToIntFunction contextSize = (long size) -> size > PAGE_SIZE ? PAGE_SIZE : (int) size; @@ -594,16 +640,6 @@ private void clearObjectsAtThisLevel(RowSet keys) { } } - @Override - public boolean flattenedResult() { - return flattenedResult; - } - - @Override - public boolean alreadyFlattenedSources() { - return alreadyFlattenedSources; - } - @Override public LogOutput append(LogOutput logOutput) { return logOutput.append("{SelectColumnLayer: ").append(selectColumn.toString()).append(", layerIndex=") @@ -612,6 +648,6 @@ public LogOutput append(LogOutput logOutput) { @Override public boolean allowCrossColumnParallelization() { - return selectColumn.isStateless() && inner.allowCrossColumnParallelization(); + return selectColumn.isStateless(); } } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/SelectOrViewColumnLayer.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/SelectOrViewColumnLayer.java index 5fbef5b9d74..ebf2ac05ec2 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/SelectOrViewColumnLayer.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/SelectOrViewColumnLayer.java @@ -12,18 +12,20 @@ public abstract class SelectOrViewColumnLayer extends DependencyLayerBase { private final ColumnSource optionalUnderlying; - SelectOrViewColumnLayer(SelectAndViewAnalyzer inner, String name, SelectColumn sc, - ColumnSource ws, ColumnSource optionalUnderlying, - String[] deps, ModifiedColumnSet mcsBuilder) { - super(inner, name, sc, ws, deps, mcsBuilder); + SelectOrViewColumnLayer( + final SelectAndViewAnalyzer.AnalyzerContext context, + final SelectColumn sc, + final ColumnSource ws, + final ColumnSource optionalUnderlying, + final String[] deps, + final ModifiedColumnSet mcsBuilder) { + super(context, sc, ws, deps, mcsBuilder); this.optionalUnderlying = optionalUnderlying; } @Override - final Map> getColumnSourcesRecurse(GetMode mode) { - final Map> result = inner.getColumnSourcesRecurse(mode); + void populateColumnSources(final Map> result) { result.put(name, columnSource); - return result; } @Override @@ -32,6 +34,5 @@ public void startTrackingPrev() { if (optionalUnderlying != null) { optionalUnderlying.startTrackingPrevValues(); } - inner.startTrackingPrev(); } } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/StaticFlattenLayer.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/StaticFlattenLayer.java deleted file mode 100644 index 25827b2ca19..00000000000 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/StaticFlattenLayer.java +++ /dev/null @@ -1,146 +0,0 @@ -// -// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending -// -package io.deephaven.engine.table.impl.select.analyzers; - -import io.deephaven.base.log.LogOutput; -import io.deephaven.base.verify.Assert; -import io.deephaven.engine.liveness.LivenessNode; -import io.deephaven.engine.rowset.RowSet; -import io.deephaven.engine.rowset.RowSetFactory; -import io.deephaven.engine.rowset.RowSetShiftData; -import io.deephaven.engine.rowset.TrackingRowSet; -import io.deephaven.engine.table.ColumnDefinition; -import io.deephaven.engine.table.ColumnSource; -import io.deephaven.engine.table.ModifiedColumnSet; -import io.deephaven.engine.table.TableUpdate; -import io.deephaven.engine.table.impl.TableUpdateImpl; -import io.deephaven.engine.table.impl.sources.RedirectedColumnSource; -import io.deephaven.engine.table.impl.util.RowRedirection; -import io.deephaven.engine.table.impl.util.WrappedRowSetRowRedirection; -import io.deephaven.engine.table.impl.util.JobScheduler; -import org.jetbrains.annotations.Nullable; - -import java.util.BitSet; -import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedHashMap; -import java.util.Map; -import java.util.Set; - -final public class StaticFlattenLayer extends SelectAndViewAnalyzer { - private final SelectAndViewAnalyzer inner; - private final TrackingRowSet parentRowSet; - private final Map> overriddenColumns; - - StaticFlattenLayer(SelectAndViewAnalyzer inner, TrackingRowSet parentRowSet) { - super(inner.getLayerIndex() + 1); - this.inner = inner; - this.parentRowSet = parentRowSet; - final HashSet alreadyFlattenedColumns = new HashSet<>(); - inner.getNewColumnSources().forEach((name, cs) -> { - alreadyFlattenedColumns.add(name); - }); - - final RowRedirection rowRedirection = new WrappedRowSetRowRedirection(parentRowSet); - overriddenColumns = new HashMap<>(); - inner.getAllColumnSources().forEach((name, cs) -> { - if (alreadyFlattenedColumns.contains(name)) { - return; - } - - overriddenColumns.put(name, RedirectedColumnSource.maybeRedirect(rowRedirection, cs)); - }); - } - - @Override - void setBaseBits(BitSet bitset) { - inner.setBaseBits(bitset); - } - - @Override - void populateModifiedColumnSetRecurse(ModifiedColumnSet mcsBuilder, Set remainingDepsToSatisfy) { - inner.populateModifiedColumnSetRecurse(mcsBuilder, remainingDepsToSatisfy); - } - - @Override - Map> getColumnSourcesRecurse(GetMode mode) { - final Map> innerColumns = inner.getColumnSourcesRecurse(mode); - - if (overriddenColumns.keySet().stream().noneMatch(innerColumns::containsKey)) { - return innerColumns; - } - - final Map> columns = new LinkedHashMap<>(); - innerColumns.forEach((name, cs) -> columns.put(name, overriddenColumns.getOrDefault(name, cs))); - return columns; - } - - @Override - public void applyUpdate(TableUpdate upstream, RowSet toClear, UpdateHelper helper, JobScheduler jobScheduler, - @Nullable LivenessNode liveResultOwner, SelectLayerCompletionHandler onCompletion) { - // this must be the fake update used to initialize the result table - Assert.eqTrue(upstream.added().isFlat(), "upstream.added.isFlat()"); - Assert.eq(upstream.added().size(), "upstream.added.size()", parentRowSet.size(), "parentRowSet.size()"); - Assert.eqTrue(upstream.removed().isEmpty(), "upstream.removed.isEmpty()"); - Assert.eqTrue(upstream.modified().isEmpty(), "upstream.modified.isEmpty()"); - - final BitSet baseLayerBitSet = new BitSet(); - inner.setBaseBits(baseLayerBitSet); - final TableUpdate innerUpdate = new TableUpdateImpl( - parentRowSet.copy(), RowSetFactory.empty(), RowSetFactory.empty(), - RowSetShiftData.EMPTY, ModifiedColumnSet.EMPTY); - inner.applyUpdate(innerUpdate, toClear, helper, jobScheduler, liveResultOwner, - new SelectLayerCompletionHandler(baseLayerBitSet, onCompletion) { - @Override - public void onAllRequiredColumnsCompleted() { - onCompletion.onLayerCompleted(getLayerIndex()); - } - }); - } - - @Override - Map> calcDependsOnRecurse(boolean forcePublishAllResources) { - return inner.calcDependsOnRecurse(forcePublishAllResources); - } - - @Override - public SelectAndViewAnalyzer getInner() { - return inner; - } - - @Override - int getLayerIndexFor(String column) { - if (overriddenColumns.containsKey(column)) { - return getLayerIndex(); - } - return inner.getLayerIndexFor(column); - } - - @Override - public void startTrackingPrev() { - throw new UnsupportedOperationException("StaticFlattenLayer is used in only non-refreshing scenarios"); - } - - @Override - public LogOutput append(LogOutput logOutput) { - return logOutput.append("{StaticFlattenLayer").append(", layerIndex=").append(getLayerIndex()).append("}"); - } - - @Override - public boolean allowCrossColumnParallelization() { - return inner.allowCrossColumnParallelization(); - } - - @Override - public boolean flattenedResult() { - // this layer performs a flatten, so the result is flattened - return true; - } - - @Override - public boolean alreadyFlattenedSources() { - // this layer performs a flatten, so the sources are now flattened - return true; - } -} diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/ViewColumnLayer.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/ViewColumnLayer.java index 84bdda755a5..3019b5277b1 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/ViewColumnLayer.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/analyzers/ViewColumnLayer.java @@ -4,35 +4,30 @@ package io.deephaven.engine.table.impl.select.analyzers; import io.deephaven.base.log.LogOutput; -import io.deephaven.base.verify.Assert; import io.deephaven.configuration.Configuration; -import io.deephaven.engine.liveness.LivenessNode; import io.deephaven.engine.liveness.LivenessReferent; -import io.deephaven.engine.rowset.RowSet; import io.deephaven.engine.table.ColumnSource; import io.deephaven.engine.table.ModifiedColumnSet; -import io.deephaven.engine.table.TableUpdate; import io.deephaven.engine.table.impl.select.SelectColumn; -import io.deephaven.engine.table.impl.util.JobScheduler; import org.jetbrains.annotations.NotNull; -import org.jetbrains.annotations.Nullable; final public class ViewColumnLayer extends SelectOrViewColumnLayer { private static final boolean ALLOW_LIVENESS_REFERENT_RESULTS = Configuration.getInstance() .getBooleanForClassWithDefault(ViewColumnLayer.class, "allowLivenessReferentResults", false); - ViewColumnLayer(SelectAndViewAnalyzer inner, String name, SelectColumn sc, ColumnSource cs, String[] deps, - ModifiedColumnSet mcsBuilder) { - super(inner, name, sc, checkResultType(cs), null, deps, mcsBuilder); + ViewColumnLayer( + final SelectAndViewAnalyzer.AnalyzerContext context, + final SelectColumn sc, + final ColumnSource cs, + final String[] deps, + final ModifiedColumnSet mcsBuilder) { + super(context, sc, checkResultType(cs), null, deps, mcsBuilder); } @Override - public void applyUpdate(TableUpdate upstream, RowSet toClear, UpdateHelper helper, JobScheduler jobScheduler, - @Nullable LivenessNode liveResultOwner, SelectLayerCompletionHandler completionHandler) { - // To be parallel with SelectColumnLayer, we would recurse here, but since this is ViewColumnLayer - // (and all my inner layers are ViewColumnLayer), there's nothing to do. - Assert.eqNull(completionHandler, "completionHandler"); + public boolean hasRefreshingLogic() { + return false; } @Override @@ -47,7 +42,7 @@ public boolean allowCrossColumnParallelization() { return false; } - private static ColumnSource checkResultType(@NotNull final ColumnSource cs) { + private static ColumnSource checkResultType(@NotNull final ColumnSource cs) { final Class resultType = cs.getType(); if (!ALLOW_LIVENESS_REFERENT_RESULTS && LivenessReferent.class.isAssignableFrom(resultType)) { throw new UnsupportedOperationException(String.format( diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/codegen/FormulaAnalyzer.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/codegen/FormulaAnalyzer.java index fe162c1f305..0ef5b12f59e 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/codegen/FormulaAnalyzer.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/codegen/FormulaAnalyzer.java @@ -3,6 +3,7 @@ // package io.deephaven.engine.table.impl.select.codegen; +import io.deephaven.api.util.NameValidator; import io.deephaven.engine.context.ExecutionContext; import io.deephaven.engine.context.QueryLibrary; import io.deephaven.engine.table.ColumnDefinition; @@ -12,8 +13,6 @@ import io.deephaven.vector.ObjectVector; import io.deephaven.engine.table.impl.select.DhFormulaColumn; import io.deephaven.engine.table.impl.select.formula.FormulaSourceDescriptor; -import io.deephaven.engine.table.WritableColumnSource; -import io.deephaven.engine.rowset.TrackingWritableRowSet; import io.deephaven.internal.log.LoggerFactory; import io.deephaven.io.logger.Logger; import org.jetbrains.annotations.NotNull; @@ -28,9 +27,45 @@ public class FormulaAnalyzer { private static final Logger log = LoggerFactory.getLogger(FormulaAnalyzer.class); + /** + * A container to hold a single copy of imports required to compile formulas for one operation. + */ + public static final class Imports { + private final Map queryScopeVariables; + private final Collection packageImports; + private final Collection> classImports; + private final Collection> staticImports; + + public Imports() { + final ExecutionContext context = ExecutionContext.getContext(); + queryScopeVariables = Collections.unmodifiableMap( + context.getQueryScope().toMap((name, value) -> NameValidator.isValidQueryParameterName(name))); + final QueryLibrary queryLibrary = context.getQueryLibrary(); + packageImports = Set.copyOf(queryLibrary.getPackageImports()); + classImports = Set.copyOf(queryLibrary.getClassImports()); + staticImports = Set.copyOf(queryLibrary.getStaticImports()); + } + + public Map getQueryScopeVariables() { + return queryScopeVariables; + } + + public Collection getPackageImports() { + return packageImports; + } + + public Collection> getClassImports() { + return classImports; + } + + public Collection> getStaticImports() { + return staticImports; + } + } + public static Result analyze(final String rawFormulaString, final Map> columnDefinitionMap, - final QueryLanguageParser.Result queryLanguageResult) throws Exception { + final QueryLanguageParser.Result queryLanguageResult) { log.debug().append("Expression (after language conversion) : ") .append(queryLanguageResult.getConvertedExpression()) @@ -75,7 +110,7 @@ public static Result analyze(final String rawFormulaString, * @param formulaString The raw formula string * @param availableColumns The columns available for use in the formula * @param columnRenames Outer to inner column name mapping - * @param queryScopeVariables The query scope variables + * @param imports The query scope variables, package, class, and static imports * @return The parsed formula {@link QueryLanguageParser.Result result} * @throws Exception If the formula cannot be parsed */ @@ -83,8 +118,8 @@ public static QueryLanguageParser.Result parseFormula( @NotNull final String formulaString, @NotNull final Map> availableColumns, @NotNull final Map columnRenames, - @NotNull final Map queryScopeVariables) throws Exception { - return parseFormula(formulaString, availableColumns, columnRenames, queryScopeVariables, true); + @NotNull final Imports imports) throws Exception { + return parseFormula(formulaString, availableColumns, columnRenames, imports, true); } /** @@ -93,7 +128,7 @@ public static QueryLanguageParser.Result parseFormula( * @param formulaString The raw formula string * @param availableColumns The columns available for use in the formula * @param columnRenames Outer to inner column name mapping - * @param queryScopeVariables The query scope variables + * @param imports The query scope variables, package, class, and static imports * @param unboxArguments If true it will unbox the query scope arguments * @return The parsed formula {@link QueryLanguageParser.Result result} * @throws Exception If the formula cannot be parsed @@ -102,7 +137,7 @@ public static QueryLanguageParser.Result parseFormula( @NotNull final String formulaString, @NotNull final Map> availableColumns, @NotNull final Map columnRenames, - @NotNull final Map queryScopeVariables, + @NotNull final Imports imports, final boolean unboxArguments) throws Exception { final TimeLiteralReplacedExpression timeConversionResult = @@ -177,7 +212,7 @@ public static QueryLanguageParser.Result parseFormula( } // Parameters come last. - for (Map.Entry param : queryScopeVariables.entrySet()) { + for (Map.Entry param : imports.queryScopeVariables.entrySet()) { if (possibleVariables.containsKey(param.getKey())) { // Columns and column arrays take precedence over parameters. continue; @@ -200,13 +235,10 @@ public static QueryLanguageParser.Result parseFormula( possibleVariables.putAll(timeConversionResult.getNewVariables()); - final QueryLibrary queryLibrary = ExecutionContext.getContext().getQueryLibrary(); - final Set> classImports = new HashSet<>(queryLibrary.getClassImports()); - classImports.add(TrackingWritableRowSet.class); - classImports.add(WritableColumnSource.class); - return new QueryLanguageParser(timeConversionResult.getConvertedFormula(), queryLibrary.getPackageImports(), - classImports, queryLibrary.getStaticImports(), possibleVariables, possibleVariableParameterizedTypes, - queryScopeVariables, columnVariables, unboxArguments, timeConversionResult).getResult(); + return new QueryLanguageParser(timeConversionResult.getConvertedFormula(), imports.getPackageImports(), + imports.getClassImports(), imports.getStaticImports(), possibleVariables, + possibleVariableParameterizedTypes, imports.getQueryScopeVariables(), columnVariables, unboxArguments, + timeConversionResult).getResult(); } public static class Result { diff --git a/engine/table/src/test/java/io/deephaven/engine/table/impl/lang/TestQueryLanguageParser.java b/engine/table/src/test/java/io/deephaven/engine/table/impl/lang/TestQueryLanguageParser.java index 32523cbf766..4f1558f0c1d 100644 --- a/engine/table/src/test/java/io/deephaven/engine/table/impl/lang/TestQueryLanguageParser.java +++ b/engine/table/src/test/java/io/deephaven/engine/table/impl/lang/TestQueryLanguageParser.java @@ -3178,7 +3178,7 @@ private void check(String expression, String resultExpression, Class resultTy final Map possibleParams; final QueryScope queryScope = ExecutionContext.getContext().getQueryScope(); if (!(queryScope instanceof PoisonedQueryScope)) { - possibleParams = QueryCompilerRequestProcessor.newQueryScopeVariableSupplier().get(); + possibleParams = QueryCompilerRequestProcessor.newFormulaImportsSupplier().get().getQueryScopeVariables(); } else { possibleParams = null; } diff --git a/extensions/barrage/src/main/java/io/deephaven/extensions/barrage/chunk/VarListChunkInputStreamGenerator.java b/extensions/barrage/src/main/java/io/deephaven/extensions/barrage/chunk/VarListChunkInputStreamGenerator.java index 8142c8d7d22..d85c2d6c3d4 100644 --- a/extensions/barrage/src/main/java/io/deephaven/extensions/barrage/chunk/VarListChunkInputStreamGenerator.java +++ b/extensions/barrage/src/main/java/io/deephaven/extensions/barrage/chunk/VarListChunkInputStreamGenerator.java @@ -46,11 +46,17 @@ private synchronized void computePayload() { final Class myType = type.getComponentType(); final Class myComponentType = myType != null ? myType.getComponentType() : null; - ChunkType chunkType = ChunkType.fromElementType(myType); - if (chunkType == ChunkType.Boolean) { - // the internal payload is in bytes (to handle nulls), but the wire format is packed bits + + final ChunkType chunkType; + if (myType == boolean.class || myType == Boolean.class) { + // Note: Internally booleans are passed around as bytes, but the wire format is packed bits. chunkType = ChunkType.Byte; + } else if (myType != null && !myType.isPrimitive()) { + chunkType = ChunkType.Object; + } else { + chunkType = ChunkType.fromElementType(myType); } + final ArrayExpansionKernel kernel = ArrayExpansionKernel.makeExpansionKernel(chunkType, myType); offsets = WritableIntChunk.makeWritableChunk(chunk.size() + 1); diff --git a/extensions/barrage/src/main/java/io/deephaven/extensions/barrage/chunk/VarListChunkReader.java b/extensions/barrage/src/main/java/io/deephaven/extensions/barrage/chunk/VarListChunkReader.java index dddc6414b1e..4e5b8cb0bd7 100644 --- a/extensions/barrage/src/main/java/io/deephaven/extensions/barrage/chunk/VarListChunkReader.java +++ b/extensions/barrage/src/main/java/io/deephaven/extensions/barrage/chunk/VarListChunkReader.java @@ -36,6 +36,8 @@ public VarListChunkReader(final StreamReaderOptions options, final TypeInfo type if (componentType == boolean.class || componentType == Boolean.class) { // Note: Internally booleans are passed around as bytes, but the wire format is packed bits. chunkType = ChunkType.Byte; + } else if (componentType != null && !componentType.isPrimitive()) { + chunkType = ChunkType.Object; } else { chunkType = ChunkType.fromElementType(componentType); } diff --git a/extensions/barrage/src/main/java/io/deephaven/extensions/barrage/chunk/array/BoxedBooleanArrayExpansionKernel.java b/extensions/barrage/src/main/java/io/deephaven/extensions/barrage/chunk/array/BoxedBooleanArrayExpansionKernel.java index c6b901a7dd3..0a02ddb31f9 100644 --- a/extensions/barrage/src/main/java/io/deephaven/extensions/barrage/chunk/array/BoxedBooleanArrayExpansionKernel.java +++ b/extensions/barrage/src/main/java/io/deephaven/extensions/barrage/chunk/array/BoxedBooleanArrayExpansionKernel.java @@ -47,7 +47,7 @@ public WritableChunk expand(final ObjectChunk source continue; } for (int j = 0; j < row.length; ++j) { - final byte value = row[j] ? BooleanUtils.TRUE_BOOLEAN_AS_BYTE : BooleanUtils.FALSE_BOOLEAN_AS_BYTE; + final byte value = BooleanUtils.booleanAsByte(row[j]); result.set(lenWritten + j, value); } lenWritten += row.length; From 903abba187480217372bab136403a65f281486d5 Mon Sep 17 00:00:00 2001 From: Colin Alworth Date: Fri, 16 Aug 2024 11:34:18 -0500 Subject: [PATCH 29/43] fix: Go and JS clients should not require v1+v2 auth (#5946) The Go client required sending v2 auth, but receiving v1 responses - this change makes it consistently use v1 auth. The JS client incorrectly closed the stream only after it had received headers - now it can close right away, and just wait for headers to arrive normally. Prerequisite to #5922 --- go/pkg/client/tokens.go | 27 +++++++++++++------ .../web/client/api/WorkerConnection.java | 2 +- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/go/pkg/client/tokens.go b/go/pkg/client/tokens.go index e00d481c10c..8594e06cbb0 100644 --- a/go/pkg/client/tokens.go +++ b/go/pkg/client/tokens.go @@ -7,7 +7,9 @@ import ( "fmt" "github.com/apache/arrow/go/v8/arrow/flight" configpb2 "github.com/deephaven/deephaven-core/go/internal/proto/config" + sessionpb2 "github.com/deephaven/deephaven-core/go/internal/proto/session" "google.golang.org/grpc/metadata" + "google.golang.org/protobuf/proto" "log" "strconv" "sync" @@ -36,8 +38,19 @@ func withAuthToken(ctx context.Context, token []byte) context.Context { } // requestToken requests a new token from flight. -func requestToken(handshakeClient flight.FlightService_HandshakeClient, handshakeReq *flight.HandshakeRequest) ([]byte, error) { - err := handshakeClient.Send(handshakeReq) +func requestToken(handshakeClient flight.FlightService_HandshakeClient, authType string, authToken []byte) ([]byte, error) { + + war := sessionpb2.WrappedAuthenticationRequest{ + Type: authType, + Payload: authToken, + } + payload, err := proto.Marshal(&war) + if err != nil { + return nil, err + } + handshakeReq := flight.HandshakeRequest{Payload: []byte(payload)} + + err = handshakeClient.Send(&handshakeReq) if err != nil { return nil, err @@ -122,15 +135,13 @@ func (tr *tokenManager) Close() error { // "user:password"; when auth_type is DefaultAuth, it will be ignored; when auth_type is a custom-built // authenticator, it must conform to the specific requirement of the authenticator. func newTokenManager(ctx context.Context, fs *flightStub, cfg configpb2.ConfigServiceClient, authType string, authToken string) (*tokenManager, error) { - authString := makeAuthString(authType, authToken) - - handshakeClient, err := fs.handshake(withAuth(ctx, authString)) + handshakeClient, err := fs.handshake(ctx) if err != nil { return nil, err } - tkn, err := requestToken(handshakeClient, &flight.HandshakeRequest{Payload: []byte(authString)}) + tkn, err := requestToken(handshakeClient, authType, []byte(authToken)) if err != nil { return nil, err @@ -174,10 +185,10 @@ func newTokenManager(ctx context.Context, fs *flightStub, cfg configpb2.ConfigSe var tkn []byte if err == nil { - tkn, err = requestToken(handshakeClient, &flight.HandshakeRequest{Payload: oldToken}) + tkn, err = requestToken(handshakeClient, "Bearer", oldToken) } else { log.Println("Old token has an error during token update. Attempting to acquire a fresh token. err=", err) - tkn, err = requestToken(handshakeClient, &flight.HandshakeRequest{Payload: []byte(authString)}) + tkn, err = requestToken(handshakeClient, authType, []byte(authToken)) } if err != nil { diff --git a/web/client-api/src/main/java/io/deephaven/web/client/api/WorkerConnection.java b/web/client-api/src/main/java/io/deephaven/web/client/api/WorkerConnection.java index 625b80443df..86549556f5a 100644 --- a/web/client-api/src/main/java/io/deephaven/web/client/api/WorkerConnection.java +++ b/web/client-api/src/main/java/io/deephaven/web/client/api/WorkerConnection.java @@ -496,7 +496,6 @@ private Promise authUpdate() { info.fireEvent(EVENT_REFRESH_TOKEN_UPDATED, init); } } - handshake.end(); }); handshake.onStatus(status -> { if (status.isOk()) { @@ -526,6 +525,7 @@ private Promise authUpdate() { }); handshake.send(new HandshakeRequest()); + handshake.end(); }); } From 2f6530bd161c5d0050a03a20d8a07ff071a849ff Mon Sep 17 00:00:00 2001 From: Jianfeng Mao <4297243+jmao-denver@users.noreply.github.com> Date: Fri, 16 Aug 2024 13:55:37 -0600 Subject: [PATCH 30/43] fix: post merge improvement on table listener code and docstring (#5951) Some defensive coding Code/docstring readability improvement --------- Co-authored-by: Chip Kent <5250374+chipkent@users.noreply.github.com> --- .../python/PythonMergedListenerAdapter.java | 2 ++ .../python/PythonReplayListenerAdapter.java | 3 ++ py/server/deephaven/table_listener.py | 30 +++++++++---------- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/Integrations/src/main/java/io/deephaven/integrations/python/PythonMergedListenerAdapter.java b/Integrations/src/main/java/io/deephaven/integrations/python/PythonMergedListenerAdapter.java index e498b60cf70..0a43aae2ac7 100644 --- a/Integrations/src/main/java/io/deephaven/integrations/python/PythonMergedListenerAdapter.java +++ b/Integrations/src/main/java/io/deephaven/integrations/python/PythonMergedListenerAdapter.java @@ -116,6 +116,8 @@ protected void propagateErrorDownstream(boolean fromProcess, @NotNull Throwable // and continue with the original exception. log.error().append("Python on_error callback failed: ").append(e2).endl(); } + } else { + log.error().append("Python on_error callback is None: ").append(ExceptionUtils.getStackTrace(error)).endl(); } } } diff --git a/Integrations/src/main/java/io/deephaven/integrations/python/PythonReplayListenerAdapter.java b/Integrations/src/main/java/io/deephaven/integrations/python/PythonReplayListenerAdapter.java index e713044047a..8df5e544224 100644 --- a/Integrations/src/main/java/io/deephaven/integrations/python/PythonReplayListenerAdapter.java +++ b/Integrations/src/main/java/io/deephaven/integrations/python/PythonReplayListenerAdapter.java @@ -108,6 +108,9 @@ public void onFailureInternal(Throwable originalException, Entry sourceEntry) { // and continue with the original exception. log.error().append("Python on_error callback failed: ").append(e).endl(); } + } else { + log.error().append("Python on_error callback is None: ") + .append(ExceptionUtils.getStackTrace(originalException)).endl(); } super.onFailureInternal(originalException, sourceEntry); } diff --git a/py/server/deephaven/table_listener.py b/py/server/deephaven/table_listener.py index 1ad4f809eab..ecc6b0016f6 100644 --- a/py/server/deephaven/table_listener.py +++ b/py/server/deephaven/table_listener.py @@ -23,6 +23,7 @@ _JListenerRecorder = jpy.get_type("io.deephaven.engine.table.impl.ListenerRecorder") _JPythonMergedListenerAdapter = jpy.get_type("io.deephaven.integrations.python.PythonMergedListenerAdapter") +_DEFAULT_ON_ERROR_CALLBACK = lambda e : print(f"An error occurred during table update processing: {e}") class TableUpdate(JObjectWrapper): """A TableUpdate object represents a table update event. It contains the added, removed, and modified rows in the @@ -202,12 +203,9 @@ def on_error(self, e: Exception) -> None: Args: e (Exception): the exception that occurred during the listener's execution. """ - print(f"An error occurred during listener execution: {self}, {e}") + print(f"An error occurred during table update processing: {self}, {e}") -def _default_on_error(e: Exception) -> None: - print(f"An error occurred during listener execution: {e}") - def _listener_wrapper(table: Table): """A decorator to wrap a user listener function or on_update method to receive the numpy-converted Table updates. @@ -225,7 +223,6 @@ def wrapper(update, *args): return decorator - def _wrap_listener_func(t: Table, listener: Callable[[TableUpdate, bool], None]): n_params = len(signature(listener).parameters) if n_params != 2: @@ -248,6 +245,7 @@ def wrapper(e): return wrapper + class TableListenerHandle(JObjectWrapper): """A handle to manage a table listener's lifecycle.""" j_object_type = _JPythonReplayListenerAdapter @@ -287,12 +285,11 @@ def __init__(self, t: Table, listener: Union[Callable[[TableUpdate, bool], None] and then add the result tables as dependencies to the listener so that they can be safely read in it. on_error (Callable[[Exception], None]): a callback function to be invoked when an error occurs during the listener's execution. It should only be set when the listener is a function, not when it is an instance - of TableListener. Defaults to None. When None, a default callback function will be provided that simply + of TableListener. When the listener is a TableListener, TableListener.on_error will be used. + Defaults to None. When None, a default callback function will be provided that simply prints out the received exception. If the callback function itself raises an exception, the new exception will be logged in the Deephaven server log and will not be further processed by the server. - - Raises: DHError """ @@ -313,7 +310,7 @@ def __init__(self, t: Table, listener: Union[Callable[[TableUpdate, bool], None] if on_error: on_error_callback = _error_callback_wrapper(on_error) else: - on_error_callback = _error_callback_wrapper(_default_on_error) + on_error_callback = _error_callback_wrapper(_DEFAULT_ON_ERROR_CALLBACK) else: raise DHError(message="listener is neither callable nor TableListener object") @@ -390,11 +387,11 @@ def listen(t: Table, listener: Union[Callable[[TableUpdate, bool], None], TableL and then add the result tables as dependencies to the listener so that they can be safely read in it. on_error (Callable[[Exception], None]): a callback function to be invoked when an error occurs during the listener's execution. It should only be set when the listener is a function, not when it is an instance - of TableListener. Defaults to None. When None, a default callback function will be provided that simply + of TableListener. When the listener is a TableListener, TableListener.on_error will be used. + Defaults to None. When None, a default callback function will be provided that simply prints out the received exception. If the callback function itself raises an exception, the new exception will be logged in the Deephaven server log and will not be further processed by the server. - Returns: a TableListenerHandle @@ -452,7 +449,7 @@ def on_error(self, e: Exception) -> None: Args: e (Exception): the exception that occurred during the listener's execution. """ - print(f"An error occurred during listener execution: {self}, {e}") + print(f"An error occurred during table update processing: {self}, {e}") class MergedListenerHandle(JObjectWrapper): @@ -499,11 +496,11 @@ def __init__(self, tables: Sequence[Table], listener: Union[Callable[[Dict[Table and then add the result tables as dependencies to the listener so that they can be safely read in it. on_error (Callable[[Exception], None]): a callback function to be invoked when an error occurs during the listener's execution. It should only be set when the listener is a function, not when it is an instance - of MergedListener. Defaults to None. When None, a default callback function will be provided that simply + of MergedListener. When the listener is a MergedListener, MergedListener.on_error will be used. + Defaults to None. When None, a default callback function will be provided that simply prints out the received exception. If the callback function itself raises an exception, the new exception will be logged in the Deephaven server log and will not be further processed by the server. - Raises: DHError """ @@ -525,7 +522,7 @@ def __init__(self, tables: Sequence[Table], listener: Union[Callable[[Dict[Table if on_error: on_error_callback = _error_callback_wrapper(on_error) else: - on_error_callback = _error_callback_wrapper(_default_on_error) + on_error_callback = _error_callback_wrapper(_DEFAULT_ON_ERROR_CALLBACK) else: raise DHError(message="listener is neither callable nor MergedListener object") @@ -625,7 +622,8 @@ def merged_listen(tables: Sequence[Table], listener: Union[Callable[[Dict[Table, and then add the result tables as dependencies to the listener so that they can be safely read in it. on_error (Callable[[Exception], None]): a callback function to be invoked when an error occurs during the listener's execution. It should only be set when the listener is a function, not when it is an instance - of MergedListener. Defaults to None. When None, a default callback function will be provided that simply + of MergedListener. When the listener is a MergedListener, MergedListener.on_error will be used. + Defaults to None. When None, a default callback function will be provided that simply prints out the received exception. If the callback function itself raises an exception, the new exception will be logged in the Deephaven server log and will not be further processed by the server. """ From 59d591c1c980d500bb3ea6f66ee477caa6b23bf9 Mon Sep 17 00:00:00 2001 From: Stan Brubaker <120737309+stanbrub@users.noreply.github.com> Date: Fri, 16 Aug 2024 17:43:20 -0600 Subject: [PATCH 31/43] chore: Bump to 0.37.0-SNAPSHOT (#5952) --- R/rdeephaven/DESCRIPTION | 2 +- cpp-client/deephaven/CMakeLists.txt | 2 +- gradle.properties | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/rdeephaven/DESCRIPTION b/R/rdeephaven/DESCRIPTION index 40d96753586..99188c3bdaa 100644 --- a/R/rdeephaven/DESCRIPTION +++ b/R/rdeephaven/DESCRIPTION @@ -1,7 +1,7 @@ Package: rdeephaven Type: Package Title: R Client for Deephaven Core -Version: 0.36.0 +Version: 0.37.0 Date: 2023-05-12 Author: Deephaven Data Labs Maintainer: Alex Peters diff --git a/cpp-client/deephaven/CMakeLists.txt b/cpp-client/deephaven/CMakeLists.txt index 2c8be6d3a3d..32e7b5fabe5 100644 --- a/cpp-client/deephaven/CMakeLists.txt +++ b/cpp-client/deephaven/CMakeLists.txt @@ -8,7 +8,7 @@ endif() project(deephaven) -set(deephaven_VERSION 0.36.0) +set(deephaven_VERSION 0.37.0) set(CMAKE_CXX_STANDARD 17) # for CMAKE_INSTALL_{dir} diff --git a/gradle.properties b/gradle.properties index 3db9b09aa85..e03001083f8 100644 --- a/gradle.properties +++ b/gradle.properties @@ -9,7 +9,7 @@ # Re-builders who want to inherit the base version, but have their own qualifier can set -PdeephavenBaseQualifier="customQualifier": "X.Y.Z-customQualifier". # # Re-builders who want a fully custom version can set -PdeephavenBaseVersion="customVersion" -PdeephavenBaseQualifier="": "customVersion". -deephavenBaseVersion=0.36.0 +deephavenBaseVersion=0.37.0 deephavenBaseQualifier=SNAPSHOT #org.gradle.debug From b5cae98c2f11b032cdd1b9c248dc5b4a0f95314a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 19 Aug 2024 07:20:37 -0700 Subject: [PATCH 32/43] chore(github-actions): bump cla-assistant/github-action from 2.4.0 to 2.5.1 (#5953) Bumps [cla-assistant/github-action](https://github.com/cla-assistant/github-action) from 2.4.0 to 2.5.1. - [Release notes](https://github.com/cla-assistant/github-action/releases) - [Commits](contributor-assistant/github-action@v2.4.0...v2.5.1) --- updated-dependencies: - dependency-name: cla-assistant/github-action dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/cla.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cla.yml b/.github/workflows/cla.yml index d337a5782cf..02acd5109e9 100644 --- a/.github/workflows/cla.yml +++ b/.github/workflows/cla.yml @@ -21,7 +21,7 @@ jobs: steps: - name: "CLA Assistant" if: (github.event.comment.body == 'recheck' || github.event.comment.body == 'I have read the CLA Document and I hereby sign the CLA') || github.event_name == 'pull_request_target' - uses: cla-assistant/github-action@v2.4.0 + uses: cla-assistant/github-action@v2.5.1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} PERSONAL_ACCESS_TOKEN : ${{ secrets.CLA_PERSONAL_ACCESS_TOKEN }} From 1af5b9238f0f2b745514c9835beff7d36c2bb162 Mon Sep 17 00:00:00 2001 From: robbcamera <84203088+robbcamera@users.noreply.github.com> Date: Wed, 21 Aug 2024 12:24:23 -0400 Subject: [PATCH 33/43] feat: Add static getter for TableDefinition in KafkaTools (#5956) --- .../java/io/deephaven/kafka/KafkaTools.java | 105 +++++++++++++----- 1 file changed, 77 insertions(+), 28 deletions(-) diff --git a/extensions/kafka/src/main/java/io/deephaven/kafka/KafkaTools.java b/extensions/kafka/src/main/java/io/deephaven/kafka/KafkaTools.java index e51f44251d6..34cee93c488 100644 --- a/extensions/kafka/src/main/java/io/deephaven/kafka/KafkaTools.java +++ b/extensions/kafka/src/main/java/io/deephaven/kafka/KafkaTools.java @@ -1218,33 +1218,30 @@ public Function visit(@NotNull final PerPar } /** - * Consume from Kafka to {@link StreamConsumer stream consumers} supplied by {@code streamConsumerRegistrar}. - * - * @param kafkaProperties Properties to configure this table and also to be passed to create the KafkaConsumer - * @param topic Kafka topic name - * @param partitionFilter A predicate returning true for the partitions to consume. The convenience constant - * {@code ALL_PARTITIONS} is defined to facilitate requesting all partitions. - * @param partitionToInitialOffset A function specifying the desired initial offset for each partition consumed - * @param keySpec Conversion specification for Kafka record keys - * @param valueSpec Conversion specification for Kafka record values - * @param streamConsumerRegistrarProvider A provider for a function to - * {@link StreamPublisher#register(StreamConsumer) register} {@link StreamConsumer} instances. The registered - * stream consumers must accept {@link ChunkType chunk types} that correspond to - * {@link StreamChunkUtils#chunkTypeForColumnIndex(TableDefinition, int)} for the supplied - * {@link TableDefinition}. See {@link StreamConsumerRegistrarProvider#single(SingleConsumerRegistrar) - * single} and {@link StreamConsumerRegistrarProvider#perPartition(PerPartitionConsumerRegistrar) - * per-partition}. - * @param consumerLoopCallback callback to inject logic into the ingester's consumer loop + * Basic holder structure used to pass multiple objects back to a calling method. */ - public static void consume( + private static class ConsumeStruct { + final TableDefinition tableDefinition; + final KafkaStreamPublisher.Parameters publisherParameters; + final Deserializer keyDeser; + final Deserializer valueDeser; + + private ConsumeStruct( + @NotNull final TableDefinition tableDefinition, + @NotNull final KafkaStreamPublisher.Parameters publisherParameters, + @NotNull final Deserializer keyDeser, + @NotNull final Deserializer valueDeser) { + this.tableDefinition = tableDefinition; + this.publisherParameters = publisherParameters; + this.keyDeser = keyDeser; + this.valueDeser = valueDeser; + } + } + + private static ConsumeStruct getConsumeStruct( @NotNull final Properties kafkaProperties, - @NotNull final String topic, - @NotNull final IntPredicate partitionFilter, - @NotNull final InitialOffsetLookup partitionToInitialOffset, @NotNull final Consume.KeyOrValueSpec keySpec, - @NotNull final Consume.KeyOrValueSpec valueSpec, - @NotNull final StreamConsumerRegistrarProvider streamConsumerRegistrarProvider, - @Nullable final ConsumerLoopCallback consumerLoopCallback) { + @NotNull final Consume.KeyOrValueSpec valueSpec) { if (Consume.isIgnore(keySpec) && Consume.isIgnore(valueSpec)) { throw new IllegalArgumentException( "can't ignore both key and value: keySpec and valueSpec can't both be ignore specs"); @@ -1297,12 +1294,64 @@ public static void consume( .setValueToChunkObjectMapper(valueIngestData.toObjectChunkMapper); } - final KafkaStreamPublisher.Parameters publisherParameters = publisherParametersBuilder.build(); + return new ConsumeStruct(tableDefinition, publisherParametersBuilder.build(), keyDeser, valueDeser); + } + + /** + * Construct a {@link TableDefinition} based on the input Properties and {@link Consume.KeyOrValueSpec} parameters. + * Given the same input Properties and Consume.KeyOrValueSpec parameters, the returned TableDefinition is the same + * as the TableDefinition of the table produced by + * {@link #consumeToTable(Properties, String, IntPredicate, IntToLongFunction, Consume.KeyOrValueSpec, Consume.KeyOrValueSpec, TableType)} + * + * @param kafkaProperties Properties to configure this table + * @param keySpec Conversion specification for Kafka record keys + * @param valueSpec Conversion specification for Kafka record values + * @return A TableDefinition derived from the input Properties and KeyOrValueSpec instances + */ + @SuppressWarnings("unused") + public static TableDefinition getTableDefinition( + @NotNull final Properties kafkaProperties, + @NotNull final Consume.KeyOrValueSpec keySpec, + @NotNull final Consume.KeyOrValueSpec valueSpec) { + return getConsumeStruct(kafkaProperties, keySpec, valueSpec).tableDefinition; + } + + /** + * Consume from Kafka to {@link StreamConsumer stream consumers} supplied by {@code streamConsumerRegistrar}. + * + * @param kafkaProperties Properties to configure this table and also to be passed to create the KafkaConsumer + * @param topic Kafka topic name + * @param partitionFilter A predicate returning true for the partitions to consume. The convenience constant + * {@code ALL_PARTITIONS} is defined to facilitate requesting all partitions. + * @param partitionToInitialOffset A function specifying the desired initial offset for each partition consumed + * @param keySpec Conversion specification for Kafka record keys + * @param valueSpec Conversion specification for Kafka record values + * @param streamConsumerRegistrarProvider A provider for a function to + * {@link StreamPublisher#register(StreamConsumer) register} {@link StreamConsumer} instances. The registered + * stream consumers must accept {@link ChunkType chunk types} that correspond to + * {@link StreamChunkUtils#chunkTypeForColumnIndex(TableDefinition, int)} for the supplied + * {@link TableDefinition}. See {@link StreamConsumerRegistrarProvider#single(SingleConsumerRegistrar) + * single} and {@link StreamConsumerRegistrarProvider#perPartition(PerPartitionConsumerRegistrar) + * per-partition}. + * @param consumerLoopCallback callback to inject logic into the ingester's consumer loop + */ + public static void consume( + @NotNull final Properties kafkaProperties, + @NotNull final String topic, + @NotNull final IntPredicate partitionFilter, + @NotNull final InitialOffsetLookup partitionToInitialOffset, + @NotNull final Consume.KeyOrValueSpec keySpec, + @NotNull final Consume.KeyOrValueSpec valueSpec, + @NotNull final StreamConsumerRegistrarProvider streamConsumerRegistrarProvider, + @Nullable final ConsumerLoopCallback consumerLoopCallback) { + final ConsumeStruct consumeStruct = getConsumeStruct(kafkaProperties, keySpec, valueSpec); + final MutableObject kafkaIngesterHolder = new MutableObject<>(); final Function kafkaRecordConsumerFactory = streamConsumerRegistrarProvider.walk( - new KafkaRecordConsumerFactoryCreator(publisherParameters, kafkaIngesterHolder::getValue)); + new KafkaRecordConsumerFactoryCreator(consumeStruct.publisherParameters, + kafkaIngesterHolder::getValue)); final KafkaIngester ingester = new KafkaIngester( log, @@ -1311,8 +1360,8 @@ public static void consume( partitionFilter, kafkaRecordConsumerFactory, partitionToInitialOffset, - keyDeser, - valueDeser, + consumeStruct.keyDeser, + consumeStruct.valueDeser, consumerLoopCallback); kafkaIngesterHolder.setValue(ingester); ingester.start(); From b04b54b9ee1eb8869a0ae057c50f288ecf439d32 Mon Sep 17 00:00:00 2001 From: Devin Smith Date: Wed, 21 Aug 2024 09:31:23 -0700 Subject: [PATCH 34/43] chore: bump to Gradle 8.9 (#5788) This should be a quality-of-life release, improving Gradle error messages and IDE integrations. https://docs.gradle.org/8.9/release-notes.html --- gradle/wrapper/gradle-wrapper.jar | Bin 43453 -> 43583 bytes gradle/wrapper/gradle-wrapper.properties | 4 ++-- gradlew | 5 ++++- gradlew.bat | 2 ++ 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar index e6441136f3d4ba8a0da8d277868979cfbc8ad796..a4b76b9530d66f5e68d973ea569d8e19de379189 100644 GIT binary patch delta 12612 zcmY+pRa6|n(lttO3GVLh?(Xh3xVuAe26uONcL=V5;I6?T_zdn2`Oi5I_gl9gx~lft zRjVKRp?B~8Wyrx5$mS3|py!Njy{0Wt4i%@s8v88pK z6fPNA45)|*9+*w5kcg$o)}2g}%JfXe6l9ig4T8ia3Hlw#3f^fAKW63%<~GZJd-0YA z9YjleCs~#Y?V+`#nr+49hhsr$K$k!lg}AZDw@>2j=f7t~5IW6#K|lAX7|^N}lJ)I!km`nrwx> z))1Es16__aXGVzQM0EC8xH+O!nqTFBg9Ci{NwRK*CP<6s`Gq(~#lqb(zOlh6ZDBK* zr$|NDj^s6VanrKa+QC;5>twePaexqRI%RO~OY075y?NN90I|f^(P# zF=b>fZ73b5JzD`#GC3lTQ_B3lMeBWgQUGYnFw*HQC}^z{$6G4j(n4y-pRxPT(d2Wgb%vCH(?+t&Pj z)QM`zc`U`+<~D+9E{4Uj2kc#*6eZMU$4Oj6QMfA^K!rbl`iBix=2sPrs7j@aqIrE zTaZJ2M09>rp$mgyUZ!r2$UK{+DGqgl`n;*qFF~M(r#eh`T{MO?2&j?xgr8FU$u3-` zhRDc_I23LL4)K&xg$^&l-W=!Jp-P(_Ie07q>Je;QLxi8LaEc%;WIacJD_T69egF?7 z;I_Sg_!+qrur8$Hq4grigaiVF>U7uWJ@Hkd&%kmFnQN-P^fq0gB1|uRt!U#X;DnlV zo?yHWTw7g5B;#xxY`adhi4yZn@f(7-Xa(J6S=#d@&rlFw!qfvholE>MEb|VWn^g}G zMSrK&zQ^vDId&ojL!{%{o7?s{7;{+u%L{|tar(gp?Uxq3p?xAysB>0E$eG#$tvkk9 z2Q2gEP17{U6@UD*v({5MP-CTZfvWMItVjb4c;i~WLq&{?Q1(koX&vt7+$z}10{^Id z{KDjGi0JpD7@;~odF__0m|p;5rIrHidOP9^mwKe#-&JX-X@acc)06G{LO1Wu)#gvZ za~y9(fhA%UwkDOVU1LBJ`0ROE z4&)dJKK%mG@+CIm?+wt9f~@xIMr8}UH*K1j| z0pppo{7gv3v{URwxVMeg>Ps!L5IKxm zjac2egjgb0vH5i75$s|sY_RYec#>faqJk|AGgV;v=^%BM(^p{p;(^SVt-88G9f!q; z>p}9E4^f0=01S2pQBE4}9YqE%TV)*hlU^8k9{&=K76+*Ax^r=AkBb%OCP^P2nm0Ri z;D-|Zk?gGeU<12ti2CnPVNA(Pb)02+r|&yTWW-OJO7 zNLb0pps6aN?A~NJp5kj{{IOlf!5KWMleV@-hYLift)D>-7K+tgs=7Ake}oBnIy-y1 z(Hn@Hjw=_(x>dO5ysQsrnE%A*bk0K<-j{1Yqz@#n#jOL^AzCr#wR|WYzqk6i7v)Lf zkXdKxzuu20aP{Tbg$(+9&oh7cd(Uoqqf<#ujb$q4sZ~gxFbQfS zS)kNklyL*{2AELgjZ(LBu*>S(oH5AaJ;YiB@;l@=O%F6B?oanzoYRM^fQ9-<~^=3$H0g^JPMLQo@SZ@QuNvy)tyJ)LSj`+()#fy?{aV4Yg^7dlQ7AQM^3GLCR2dAFR zJjtfKiVqF`l-H_fz0HD|9g>)pOxn}k!vdZ=DO!7Sikm{Z%P6BrRkBS6W?ZB5W&7rT z@uYpf@M@a!z7H&o@-yrcCL^Ff3e7p3T`R9p?@o-acXmbTSa0>ZANzCSgovsd%;i$| zVus`not!oL#(W`L-!9w0jdaECaG4hk{V7IOs676ZquZH~0TX5hDq|)x z6T497l|E?f4)LA>j=S8}b$0LS=I4h|hUFJYJODT8Li@#6kF$k0)@*l{RnM1HQ%?VT ze-Pqlc!~t(oumVC*?5fwR;P6u{tHaZ~*LlD;B)4f? z?lpWfa2P@)g57flVl83Ej%P`2)gGyaPjhvD(%i~{`2b>#3!+y&` z!2nuwHMFA-zUY}f1^0B8<`N)Gr=A4TS@b1qykmd0Pq{?r)+1^^+D(=xasb^Tf!oK9 zBLL+*p6M_#ufgLzgq1zcSwZsZnQWFLC3`Yxdg-2=*tT`J9nrfYt)RF)YryBf8_gW{ zvKbB+oZLehfT)S#<|y1)E0hW^?+AnqPXq9Hu;v3dsMGdr{SVyF63;K<8VcgI#~}1i zLYSBL0K;RTT(;>2x=*!1Di9w0mwr;`CN}kM65|Ay{~z}_^JKOsRaN<~#9O^iiW<5P zYN7r~HV!#Nz~IZU`P>1Xe%4f~K}KcF#X&5kO*G}-)74S*tQ8CietdPcA1Yl;S=Mr# z`#MYY!{s^uo=jn7;k6O%(}fN+*0cWMpt~#n9DR<3NyU?+3D^AgI}S)Cu-Tljg`VY} zX1=fq$?8$DtOeGxE6f8lbS_6Q3C4+LDTO$}_IpM$Xv<|QSC%+Oll^q$y`7o@jD{dp zNDl|&X)r7wETa-#h*d`KXntxI(Y{vLha{$0i7@G8xx^m=c<{lJ9?p-i!^W{%j7-oo z0W^SzZ^(Wkyz*We{lEn%Yhu-ycUOHtrRiVJL4~&S91*D0MrLu}Q>v-Mc?GcWfpyz% zX|UvcN@krFO#@v|CtYM}g|=L3%aMo$E5<@CM%c*;?u>LOTz00@+dt1{yg1y=$h+{|D17U}$*^fE^H&8b431EUE z<9tv0V_#%#&1N#j7AKCj!tTK@J%oFW*ESW<(#Gl#Xs%v<@AitI?s92nLzm<)w3Wkkom1f$gcdUi%g_*jofy&}N#luL<$GVIe{iQkQ)sIHVy zBgItnPBFamrv6Kb{eE($Q(f`ZPeW!Hm%Y@F*OF1sKB{Yy|C>WEv_mfvv-N-jh)B-5 z4a!1WcT@9a+hGaBrc~sz=>G?Q!*Zp^JFRUvBMyNR1;`)j$RhH$6gEyVKhd$&K-CFT zXaWC-Y=fyOnqT84iMn9o5oLEOI(_3fk!W^8-74|q1QhQ|CmT0i=b;6Z3u?E{p7V{? z;f#Q-33!L+4&QQcZ~GAqu$NS{M;u%`+#9=7^Oa5PKvCCCWNG_~l(CidS!+xr-*gg{ z$UQ`_1tLT_9jB=Hckkwu>G{s0b0F4bnR7GibmHo?>TR&<3?D;5Fb#gd8*wYa$$~ar z7epl1qM)L{kwiNjQk}?)CFpNTd?0wAOUZ|gC{Ub|c-7h~+Rm(JbdoRe!RNVBQi!M8 z+~U6E2X&KSA*T6KJvsqwqZl#1&==Dm(#b^&VAKQ>7ygv*Fyr;)q9*^F@dCTg2g!w~ z%hg)UXAUyIpIbLXJv1nZX+a_C)BOH2hUim|>=JHCRf(!dtTidb&*~I!JrfRe+PO>w z@ox$G2a3i9d_N9J=|2$y2m-P&#PTNwe!oLBZFs;z|F5kXvBDn<)WwE0E3$ow=zg3R zK(9;sf0t;VEV3@gAg7jRtnj%-6O@!Hvg*;XcUAw}!=2*aErvB(eQIm(-UGmq^J=XN zTqJo$Y|WKo^HlBF3BXJrA#}7ZLg=r*w`I*~Ix`o&2k8^(0mt8Rp=A>F`&gehhp@Jy z^e^#B2!~$LvNCKugg)8)-G%&THdk~kfextilegP9?#C#()F59U$&eo(h|5>ceo*Em z{PEE79T$YP|Kr7K`WBHbtQwyxFkCl6xX&+oUf90B5xoi3_5KHHCyEE*oPbOQkfMz& z6^hT8_NXd2iWk{q9IKae1{_7hMPH8I7_BMtVOM4 z6jm?E0QJOn$qrgsJ`9w##GB9?G})-GXSQo6(tYS(Q0-Ct$co?Zzl0?NHsDRron?;_ zZZgQg)%XW>P?8_&zoGuF(>Och2kEJXsu1_X&~w87x!b z>~h!a>e7{`p@+#hXF88wI*JeWRZ;J4ev4<}HWf|Z;(7$E!S5l9wzBHFe>^I{2`a;a)QnAwa2xv1e(bq$<}!8o^ofGvYpk7dBR+`*%iE;hUY5 zaHF}OjGO9r*{%lmcK^uFiTHgoUD`^9Nx@~;Bg!V* zuuJ&ti{DQiq7RyJAR94wem{}cPK1J(Yxnn_{=>?USqz-~&QXRStS^s-7TksZ$AEI! z#og36s3JGtGU{CnDHRFtipFqvrE*gw7_K@NN0h+ItTq@4fqN!HeQU1y7*X?9+IfZT4Vxebpt z%#VzgdDK~-&+=Z*#>=n#XUhNvBZp3=Cr41jMqwJkHLf3L7Vm~V#GgJ(Jpii~PmJ#s zA7Ft!{xD@z>9DUb4JbiUBdNEcU4BO$651iN*mp*f)HbRRM`Cx5cR?5IfEcU{IZWwf zz(M6CDv)>xa3x}K6%tP^i15P1&&DOLK=k~+jNR$UK3frSl+|PjSC-dBItvD~LL! z>_g(YYdO4k(5EbPOw+v+;G7~jYm>F@Ai|o`gs%F)F8tDz$dl7Q%aCe|v|$UkAul_R zNlA-beBX^IJU?kgS`E$it7nF4DaI!SJAGq)2P&Few(-|tp z?K+%D3e4{pfkayrcbm0ftu6Ol2ZzdKM+4i!hNP3NRL`EvvZJ3yvNr2MV%igZ4kj``Qrdb_OI$7jWP z;l0DYf&0(-*QcP5zrP`HVznW+SbH63Qx$7_9~NjRNg7eKqI!UJ=XH`g^=t8GiFTu( z?2L{JKEu%jJx&XjNzU(*!ZNmL1@RlJA0G$2_LrAb_7lmjil(GSlSM zwTes`m+3R;3#N~Xg#9owh3ycXV8@ZlaY_16kpPFA={721b~URO4HD3sp%fmkZM}k) zZB0#)kP=RkNB~R-MCk8aljG_bagt4vIb~8)BV%(b8_;)&Kf9GX+%O_cNG|(D$!3&D zL(I8}*LqN5NntipFlN13=`D>6!{D@CFMBH0kW3=HccJV+xW~|$qeFR5i-2{X+iWMu zI2$gepQ)H_B%ip_BlWOQ*|pErXs|4ir{IHccgaIJ84irE{?+$KDABXr&f`jB^V-c% z$$u`uU1YB^{<+UN2cNg#7&0bz@yF?5>j|;)5&IV3wIQp58X#OE-M^$HdyvL|Um5t? zhZlAG!Mz%XkUe3t471JM*Yur}o30vzu6RN7gJyNcf!IItsDO730mcJ*O!~V``y5=3 zNJGp34DZ}wd1H6V`Uuy%es>BiO_aE-S8jzir#$& zyk)@2a5tP$@g%jW^b^JGdo)X@Q%sE`^lDQmY9m%uDFpPX`w9%=yQ+nneMm#OaXcD` z9}{tn5A2b2z9783vL2_jSao?uxJhWJoq%47*RafM4o0@gY(p)F>qT4^XM5GLzV#6j zC+HoGhAne7o_w{WUo(B++z7lU3Y0k1rYv9|TSv0vR-Du(5=VakbbelgZTeDn+a_Wv zq_j-^+Qz1WAl;Zg>ahX|CERbX1V%B!hTKN?M}fGoA07M(WU&NfT&TmN`P@56U2 z^)vLDs|Ln~0iTtn-?KTeQl@T&bskJFuTUS!m+$CS9vnd}8(UMO|Kv6TCfGN9NUu&4 zL{)GTxPq>fwsJ~aU=4Qhuq8*RzDsP(LZh$BHezq&9gK$IS<|DYbm})$QTGCS6T;Dr zEkLct!b+#<1r9OKG@P!f1wm8>=Nz!7OzJm!g<+`?N3;YaA3(P@EL=(sTaRMDD!c8=-XN^4BXp(eVkj$NmEMYPP>YJ4bJ3yUud z<3BeJAJ$6z^TuywnfH5lv#$lgwraNw{IV=tIznPH1DT`v-5yS=!)J<}xxl}uZf9azA2A97Haf!;<3y01hlw?dWNEv@TLi1s-mO4vmIT%O_42nS z$VRWrs9NngqRRkWAnWkn%`Rw@?wH|)7XL`EL5EZu$qyJW31&CB^T_)qwIv!{;E_6 zo-9XAryQRlk-O0>o#-SZO>|6OYq;}<*>Wu1AsVRiXY4f8qb;+sItv3AyS!4Ry+q}) zA!pAB|BmC;=RIOk^^vlsEH(!Q!7_1FK~ZB2err*o!+b(r=m1b?$6d!%zmN+69LXnT z&gRmM+n_R-F@sT*IYv0_mGPvur!u`iWbQO7SqiGFLeY&yga zf`lM&B74FA2C?N@8_z652fjhBEoDUKbP8hL{0{HAF%qDo7)o3=3rg#6)T7%%5^wl% z9R0*S*<~>nzYOdQk2l`9h#t+gJy_xujw6xjV(8S<_DbVg61&pT%Hi42l%D73G?adn znB%UdNM0p}lEF-P2%TAMam2zpQev71e>a$$%i+r~b+D9G9pF|oY_*(-u*89oKsXLY+UIbqq)MQ%(GYS{(*n_S_*RN$*~`zUtab%0aKwhx znc)Yo?{xq1sJCgQD)TeTci1ucvbez9q=A72H(-SB18Kl&6^vHV8^i!p@>iF!DIw17 z+8Q)TNisB7>pwyww4y)yJx*wX6SJO78eLBC-ar1+k$Z9fy;wBD|3kzI{<+l*>PSY^ z_?nLOZaeWbU@C3hfK?X;Di*8CHCPkx2qco6(ZyJdqSzp^TJ_5Lpa0UP{Gy+!b0Lr% z@xYxSjUKoY6L#>$qx~KD$-0=|OF7zhVP~ntMgEALYPIfhj@+ z!;JJ7te>CcovruwHsJH6Lta$nm|%^C@=V-rmhU{+I~0(|XHQ9jt@L7pb{gx#{4r!) zg($FyFTslcgu(~6lYr$nW?)%*l#VJ=R-jxK(x=t1bWlu(nL66T#qj%3aZ@uVhy}Co zDU_q61DD5FqqJ*#c|(M5tV)XBN?Ac^12*q)VN4yKPJ|#==S_`_QD9|0ls!`2)SwuHDRA_OfXQDq3%qW&MZB}Z!=k-9xqev8jHz(H z{^D@cIB~QiK>~wa)A&^Ll^Wi6QgCzU;iv-BHsLBs zH7=jN%|>0S`SjP%M&AF1PNVDp_FZ?2Bm@7`DC&v(pYrw!!yD#4 z6+<=HS0Ln6MhoKxF<%~H`y20{vf#pxh=;j{zY381gvAFekgG|>G1zo8$&az{V=;JR zy_puF4$L$?EMhT?;TpQoR*j16ll`#AS4e96C}yp_aGKkBe?1H|k_;gG-~Xorc<;lI zkB}fB{$c-D2mGA&{rm<*@F5)c3X+6??g~XoEwuzSuch0D@W~P5(2I8v8F$c2$Vw51 zP#YLSBDqtWW^EYBl^QYHF+MA7am6f4DOhwnJM=W9$uvMOsZ%_~?)2C#wb?CkI$7{K zEi)=#|5pFvg^){zK5kpBLjB2kZ+$ZB|L=W|aNwyyb(gC2l7bcpx{E-H@)q6@D6N^xh`{1E%ItF2$eeB_SjI@b2WgTpS1thwg&n`jiIzw^TtXUyB{00($GIq>vbj|}bav}}Q_~wp3>k8!E@hVC;OMUTu|= zAy#vXH*GrUHu7^cNZWe1>y;2(51js9wbu+R3Aa*(wzH9+X0dIsf&gc_x|_LP z>~CF^?(~U}+l~ehe|i>?4eo!xkq&Lk+RR-1duNP#o~>@1x)s&i&u zRaYL@+D&_M|JLI6fHbEr_`U;HgPTh#E3?sB)A$*gqyBgg*ql|a-m*TX5rACbWKCE6 zdeQ`v8m6>g^ugv`p|HY^#1QZrGGUj0^HVDc@{?Q0yhalbBEV{+|HzC^-{&e{5K%z9 z6Bxtnfu1!@Mp+Q&*&~;FOg&*Vm<@4b;{FG0-!UUXX!|)1w}op!B_|7_s~d(+=9Gba zKp8`LaB4D(H=cGcspJ_TjYaOwMb=sGn^gtUVhK!UI~2KKYEE-NC}F>+BEY7IVvy%KRvm00tg!Q`y=er}wpEetX}K@;}(}{s9AzV#q2@ zBy7}->|N?13POrs`;U?(qAG(I$~Gt+Rgw%aNZ_0fs_utVvRJT-7z4!@x36v@=NBX=IqkK{#Kg0w48de@?#Yb4M(Svj5=T+<ONr8-oh7l?Cji@+erqur zFhZ=9|Lk=$`c}v4u`)-!!UI=!9Jo@h&7p4RlS#u! zZ7-prn75JkV?VjptX;@$#`U`{vB!=Z?V`T*FBF>J?vsML7e6@2GbUteMFfX-TUu{2 zLNIG*;dV)8GV8gAgEf#)X3A>p3^CRka1v?~8x^anBhQ=L=LsOl=&pcOYHo98m##ye z34MtGCDK!`ptl?taGMr5q{!zVc? zG00e){TV?`YA9eB;(lA3lXI?RrB4BYQGk?vOmTIUJED=(`_*gtn2DB-t4WW54as*W zb2kD-lWX>lb$+W!VFakki>B^Vc+u$?NLF>)!U%b@Y}gYJ>m2H=^x0=nsE0TF^Yu0h ztgH8-o1%+jCk(+&`|)tTfEVHq0cMeFa{Uz)X$;fCq%Y=SOWML6bYfeP8j5hktL`KK z(18`XrUn&WN9PtFxh&dX`y~YBsmdhi7Kw%tKzM%^VEhdD<_XkulW-x=JN6OPbFI4@ zzDDRN+f=@{0h*MswwOqG6gJ?{NuHx(y-|FUGsxyZ*x0~$MW(eY>vqq4Fh#t7uzw=- zKB?|!0N~!h^AMdLa)oR!Ca#HZ9&Zf)ghuO<^RN)4twRlygHnQG(BE{cDc5E}OF4;xss6gYyV~EcJvJkX)xNWb=@yw!uq0v-sf^rvkp-;?DPWK@*SEw|V;IH=7 zfQqEV_>DjOPT~8X*J|H8=&RnzK4~S7ML~nLX^%s-Vqc^aWy7N$y57qciZGcqy#=zU zs8hcHiI=D$+RB{|62{ohCTiaML6FI4Uhzo5D{Jik@poCs0w7F)*w}F4r0sJ~#u-72 z5bK=ANt=M$Dh5NKnxGsg9NRR?WD-x|FhTwBjd zD<-K>44DB~i%frJOfnzh1R>PRY34kw!6~p3M$JLaD1r@`=h)~Ngks-(gdXh^Q?BTP zZ^Zj5w1AwtuR2$~E7s9iZdF}z%pv1em^V2rM{1tLUY@-+Sc0(9jA|iZWml1;v13=U zHf?y@#mb--7z6$ue>`qjhE~brk$AY-RG90~5wcBbDReXR2)pKg{L>;H(DI`U!MLNQ zY9rFJP@ZQ}jlcMh%WSCo%vf+nd0Gmd*F%KMIe>slCUh)8Ma|;M_I+v#;|ueg9oLg; zq2HtZX%&#F7vdpNlkX?}(C7dGC^y#NB#m4%69RzTNrk%4ol~hSI%>2r6B|*ZkW(*P z;u#s;+faHo{tfy+1L^RzWDi*^JR0iY(zJDB36y_QJ+|E-2x+cY z!V8uLNktH~q>WQZuY!Ap66WP|E!0PA1jK~)^8oJVGbspJs6QL!!-5Qm7 zHYI|_`Actg?vDzdg5{86w@GS$G6ANzff7->6i5pB$T4O}`fZ_;{217Om0gN5zTr12 z5mW{hCzCE-QubjxN$TAE-XgI-8dTY@OZmq`y+y_>dk*(qXF0{nam|q@~i}Utp*k{yurq(DW54hkDT4bbg z=_etM?Nf5W^o-HEu9_?&xEqPg^P^mTxLH8n%u$!mWvFG|{&)jtnU&6|5-`~eaNz0%D1BDo`{ zS1N5(KW5v^2eLdd_%`uaRndF@h0Uo6=M|8?b~KbOLZk{HXEnGmtgZXf2inI*1r%n! zQ3&%RI4r{f&dwW~HwH0Ked9b!k6{>_19H z_Ai>5IChDMY(FfMyG%;30?SQ{iV9KyGru62+Y)~qSQ91}b~}w<&*}R&1c#$O`H@~c z5)2S_eXx}M#N{MuGeQS9@#UJB@;W_j50b}jIhxMPloEFQZdvwxiU^RYycTzgK)-vl3LT&$L8~@68$C8~5_U{cR$E#w*x65(qw&eoL@>%ZHvj zWnEMlSh*(o&oy|J7eJ5OD`ssy%F?*Vp?`Cq;FShyl{ZoKCG5g{y}>usznni#8ki(i zO{w@n{iAj1_ooX@+s*!uW60WcH~*bNOT6z%0jVML5};wVrQp~`Uss_{cO2oud_nNA8^B$?07fJ6?iI)Q zuo9G)O-z)DqstrBqf>B%S05hf-wep0@$BFHKSrkZ{za3D)yVzRz)2{wf8(Wp+xyAM z$rtyx$gi3A=V~V!`Q3;BM0$>*VVtxEM|xDL^gew7ydy3Q6YzD&THRz*q33Ms_D;M- zbCx1Ft#UNB)V3bf`~{ImI72OTp^|bF8?G8#FRj+Biy8ET5#rA3sd|0FR@U(LAJ%w8 zS1%n8Z=Amhw)92rIsof=YVWF4jw&F*j1LG@-`+cR0-~2LqXRH8(Ccne{y#MCPncF64U`0uO zWmi$dlii~1D0rLR{qc|_2M!C$t8^=G7xQY)9!#Y331A|>N)EhmyVdLWL9I3YLJ`7? zZmpqUJB>Ni9oiL)^1IK1UoMyhWE{$9M2M6Xi zPKk7GpMsA6vjZbU7~i+u|J6Nk|Ci!Y3UMUT2|`M;JsNQACdJ%ooo9Yt{?A+0hMpxi znEa~~sxC>rKrU6bd=WRb;%wsH>A#j4{({&1GYSNR57Gama(3)2A;SM>qop}l>Jk2* zn1+C$fIxuwzg3mCU#SOqb-wOCb6mBcYlA5+mt<&_J~sBxc(GQtBFINUO~Mr7<-uu($>P HJ4oML2Lo<@i8BwbL^1~GkG`E7C$SEa_ zF^}Ea+#Je`Xy6;#D0FPnSrR%Y!QGA~NA^{oWmW8C<3dr{x6wWQ{4+bzemqV5W$i5~ z=J0jXZ>uZb>DT@0Ks?4QJ{`z?8JWl3$y;2pj#$XP*pv$>$g(z43{YH9KmmR6<#sIn zA`#=0#sgycaBQ^&}Xba!|KaZ8~b30v~nLt z9%#gz_*=~KD{3t^X~l>480*}PhKN=??g`RV|4Ud{Gyyl187MJ}r(#e+H$GEdI+p1s zq_25h;fV)$EPK%Dw-(G=f`yHB-_tttsC!?k7*#!|4a>`Ahj8nm?&n>NRs%jkZW^3-0P_yMP5&*6a26{MRj1&TPF zyE#|c)5uUHzMWx=rMKpuPih*V=S;W3MzIZTw2uTbr}8`p2bm+Z6Sa%vvWAWSf4H)p(+ zSQ8;EvUa#wqWV+9vmIio(%7wukK2SwjUS8Yl%Rq%=~PU)2$Tvm6`1!r3H@U#_|bB0 zmlT1PS3wPB(b&^+@YY7Y$n4l3mV3-X0$>z|gZp6O*Lhzn&?Gad2ZCF;+#95-Y?#y+ z?*l@Yf=a4w{Px=o!N|3~_XKfk&G;fN>Ps&dp2FpA~qD=0~=!NOS@B#XAKKkND>Y{4>rqxrViKD7;?>j8`R` z&G)3FN|dfsxnaI^!d1G%=>AbTTxZWo;n-DLrQ!sj=f~VAOe5zhGS(dgx|!ls62fbX zV@<7Ck^!}R=`Swr?(7w1rY6Nmq~sfXJ?TiKJLn=&SQdEt9$@0 zA+h1Wbwbri0s-stc8yVq;mRa6@kEf8^KXUz&jcic!+avDvvJFa>k0ioWug=T3oPw; zyj4it&0@>_*uI@2=^+T7sL1_!^aJW@Xfo8aC#3^WtQC7fET8b9C} z*u^ue6Ojn z7@(eskJ2+cNnH9~VyfIh<-|7!je~vGy*odz(sk-u$~SrYF3glruZ*W`{sqnS+9=;Z zh{D@MSG91%lr&ua8%$sJF%y1I<|e;EdfJykY8#D$Hc_81n5`$7;1N|b0tvvPLzSg& zn7!5x?T*@rQUKcUhTIjV(rw*5oQYlm5DbEO?60#mohHfbR$3_x#+PZoYi@Vd4`#YgKyTd^!4n{fN~WZDY61sAOm6 zl!d^i*a01QxpWM9Pcl?&{RgO}uq%ErOk5WpECvnfEh!*YP&1Sl)uTN4hg??Vqs~i5 zYsfufz3?{TtwuBN=`0~Qg1PlWH#OGG$ zLLWU17$v``)CE1cds_7kj8mJ{-+l8{DS|zAQ&3|qpOY=!J|kXUhXue9|H>4gqk|n) z-i34GmxLFj8asb3D#D&=ya*a5`C<=o?G;Ev^LV%;l#nH#O=7Nh@z1Do>j6Q;I5S2P zhg|AZbC&|c7}uSJt57s2IK#rSWuararn-02dkptTjo*R{c5o(bWV}_k3BBnKcE|6l zrHl&ezUyw^DmaMdDFVn<8ZY=7_{u{uW&*F<7Al6};lD(u;SB=RpIwI)PTyL=e25h* zGi{lRT}snjbMK~IUx|EGonH+w;iC2Ws)x>=5_{5$m?K z5(*1jMn%u0V1Y%m@`YS3kskt~`1p(rA4uk;Cs!w^KL$w>MH)+cP6|XKr4FfHIATJH z!EGAK4N>1yFR`-zW|w%ByRe#=&kA&#WyUldDGpt!wf-8SFWiSi!5QZL+l7*CE?u!NW1T$<1rdLJ9y3u{_zvHaM?#Rm4 zFk}^1!ffcrB|XK3gsO-s=wr*sUe&^$yN|KxrA)uW00Gu60%pw_+DcUjW`oW<35OC8 zq2{j8SgC}W$?10pvFU83(SL$%C?Kctu3*cs0aa%q!fjn1%xD*Jrm!F3HGR9-C{b?- zHp(cL;ezXMpL@0-1v0DMWddSDNZ5h?q50cOZyVi#bU3&PWE=(hpVn|M4_KYG5h9LffKNRsfhr^=SYiKg?#r&HNMi2@cd4aYL9lw(5_IvQJ zcB*DD()hUSAD^PdA0y|QrVnqwgI@pUXZXjHq3lG2OU&7sPOxxU$Y3&ytj6Qb=2#cC z;{d-{k|xI*bu+Vy&N+}{i(+1me!M;nshY_*&ZQLTGG*xNw#{RpI`3^eGfHck+*38NRgiGahkFethtVY=czJs#)VVc{T65rhU#3Vf?X)8f0)X{w!J3J{z|Sq|%?)nA+zo?$>L9@o`Kc|*7sJo4UjIqu0Ir~S5k^vEH};6K?-dZ0h*m%-1L zf!VC%YbM1~sZOG5zu&Sh>R;(md*_)kGHP)<;OA44W?y53PI%{&@MEN}9TOiqu+1a3AGetBr$c)Ao3OX>iGxmA;^^_alwS818r4Pn&uYe^;z6dh z)68T|AN=hjNdGpF7n>y+RTAZc9&opTXf zqWfK_dUv=mW{p_vN>|(cIkd(+Jy}qnK{IW%X*3!l`^H~FbAHwof+vLZ0C2ZXN1$v7 zgN&R9c8IO`fkR{6U%ERq8FN<1DQYbAN0-pH7EfcA{A&nhT!Be>jj>J!bNRw4NF|}! z1c70_#fkk!VQ!q1h2ff@`yDyrI1`np>*e#D4-Z~*!T^8#o*$V~!8bWQaie?P@KGBb z8rXc!YDL!$3ZgZZ%;-%~0Kn<+d+{xJ$stQbtN8GWV?MCJvzPU|(E(1z;rFw{&6vy) z3*@y%7Tx8rH-p$boS>bLyod?OKRE8v`QSBvGfY6f}_{Zo1q85xoyOF16n~yHx2W ziydUoYLkJmzq|n&2S(O!ZmLdP1(o1Jsq88cX)x3V-BK5eF&0e_0G!5?U7&3KN0`mc zH&Lt)q8!d_VgzxyL^(@xrbp2y)Hmr^V48));RSfE=*Ly0uh9!$3dv-vMZr2URf@l5zdwLjGZB zugY>7_fd_vbV*Qv1?H~>Z%RD%nEeFSI$n$$Lrpc6g>i4+XdBB!%zM$Bhrz5Swzyg? z$~I~n@~-wTBY3-T&pr+|gC+OHDoR?I(eLWa{Z#Rsh>lc~%u0!&R|s0pA*w<7QZ}{i z*AFr~0F3y~f$MGh_HDL7J_1?SxKL}fWIk!$G}`^{)xh*dZ5kK>xGL9>V`WZZg_ z)^Vm)EQK`yfh5KiR(vb&aHvhich z_5o+{d~0+4BEBqYJXyXBIEb1UgVDs;a!N2$9WA>CbfrWryqT25)S4E4)QXBd*3jN} z?phkAt`1rKW?xoLzEm!*IfkH|P>BtECVr0l8-IGk_`UjE#IWkUGqvyS+dMrCnFl<7RCgSMX^qn|Ld_4iYRldO zY&cHhv)GDo8nKvKwAbfyLR%t?9gG?R7~PSD#4D-;?F&!kV59O}neYut5AGbKwy-(U zqyBi=&Mgj|VIo>$u!DHM`R7O?W8-idbePuxiJMH``6c_5L-chKd}=rGC5Gfrc{f!* zWFEBm?l@_b7kzY7%1RQQbG5V<4=ZlkZ%sF74Q|mKOc7Ak7dP2#quiGcZ0_J%7Q?j{ zv9{WFw;n5G-Mn%r#0R;{jLt{yy}9J6rQ(>X9pJ`7Xy?Zv z=lNit#qXaq?CnElK^zF~sG}U5oCpR0T>FH=ZX}Prju$);?;VOhFH8L3I><9P_A|C+ z{;>~dk%9rrq(snjsEm}oUz2FQ21MCG*e?g)?{!&|eg7PX@I+Q0!hL6C7ZVY|g2E>i zr!Ri2@OfEu$)d52+>+cpgh6Z;cLYCZ&EMR0i<^~4&wEu_bdo;y^6}+U2GIQgW$|Od z_jg{O=pU>0-H$P-EOlWyQy#W0r@@_uT}Lg+!d5NxMii7aT1=|qm6BRaWOf{Pws54v zTu=}LR!V(JzI07>QR;;px0+zq=(s+XH-0~rVbmGp8<)7G+Jf)UYs<$Dd>-K+4}CsD zS}KYLmkbRvjwBO3PB%2@j(vOpm)!JABH_E7X^f#V-bzifSaKtE)|QrczC1$sC<<*Y z$hY*3E10fYk`2W09gM_U<2>+r^+ro$Bqh-O7uSa)cfPE_<#^O) zF+5V;-8LaCLKdIh3UB@idQZL`0Vx8`OE#6*1<;8(zi&E7MWB1S%~HAm%axyIHN2vd zA(pJGm_PraB0Aat3~?obWBs?iSc*NhM!{-l_WNCx4@F7I?)5&oI|z{o@JKd1HZ}zf*#}JjK3$ z-;3V*WJZvUcKvSOBH4c7C{fl8oRw8-vfgKQjNiR|KhQ%k6hWNEke(k8w-Ro| z7Y3)FsY-?7%;VT64vRM)l0%&HI~BXkSAOV#F3Bf#|3QLZM%6C{paqLTb3MU-_)`{R zRdfVQ)uX90VCa3ja$8m;cdtxQ*(tNjIfVb%#TCJWeH?o4RY#LWpyZBJHR| z6G-!4W5O^Z8U}e5GfZ!_M{B``ve{r0Z#CXV0x@~X#Pc;}{{ClY_uw^=wWurj0RKnoFzeY` z;gS!PCLCo*c}-hLc?C&wv&>P1hH75=p#;D3{Q8UZ0ctX!b)_@Ur=WCMEuz>pTs$@s z#7bIutL9Pm2FDb~d+H}uBI#pu6R}T{nzpz9U0XLb9lu@=9bTY&PEyFwhHHtXFX~6C zrcg|qqTk(|MIM%KQ<@j=DOjt|V)+8K26wE_CBNnZTg+Z+s}AU|jp6CFoIptG1{J*# z7Ne~l;ba*=bSwAMQ|Vq#fW~+je4PXA91YFzBubNF?ovIOw-$C-8=Ehed{lGD0}(Id zRe4sh8L>&T%{>8o))he}eE;5_ zxoXk3wX?MyNl-xF!q1d$G?=wp^`@09(jU&X zOqZIBI#dN`2PJNdATR3ivtub|nO$dulSaP|e4)WXF1YAGN1pDQIbIjXFG!oC85Mt; zW$eteoL{y^5t4TMRwP$jNPjZFpGsWnGe=jMMqKtcZm9Y9PFZLi*1p@qoKKub^T@2+ zk$@*KYdQ?Z`}<%4ALwk*Yc{(WTf@#u;as(fvE^9{Gk)lWbJP*SjttWofV0s?AB({~l zZI1hZVWFT~W-T?nfMMcnCS4-#6H-MU7H$KxD;yaM46K4Kc@~Q>xzB+QnD_I`b_l3m zo9pRx46b!p?a^&zCDwygqqV3epjs(s0NQI6ARA1n!Yy-qduipxQ& zUAlqRpNjBS+y-ZheD(!R;F}&^V_}b_gqH%tVZ5%%ziO7k^w=es+wZtK^i*vmrWNLMs{oWu_CIov|s1raZiS)>38>pYu;i+-t zI_DiNe6aA4KTZ2P09qPj(0~K4nUq^0+f(2$g`229zkG4jLzRvJUWE0oF1XHL4t3UN zDH466G56sy9hTZoAJB!C3;@F;ONxEk5u6Mv%zdo}Rq`=* zw1n7MOhfNSV48TS989ArIcj`C%Gk8~93~u>)!Yt2b4ZriKj9x2d`H2HQNJ=I>hkDlcZn zqRj>!;oRMTIOu zx|Zfsu~v76T{z7AC(jxj^c@tnJHZtGPsq$DE!8kqvkDx5W?KUJPL+!Ffpwfa+|5z5 zKPCiOPqZZrAG;2%OH0T$W|`C@C*!Z`@Wkop{CTjB&Tk`+{XPnt`ND`Haz;xV`H^RS zyXYtw@WlqTvToi;=mq1<-|IQ(gcOpU%)b#_46|IuWL#4$oYLbqwuk6=Q@xZaJSKVF zZcHs~ZBl;&lF3=+nK; zF`4gSCeZXlwmC_t4I`#PUNQ*)Uv&oGxMALip|sxv^lyVV73tKI7)+QY5=tEMas{vTD-BaTJ^*Y6gq~PU;F5X!sxqiq$iFCo+Uv7m%1w((=e}Vf*=dtds|6 zbX}91!G?C*KG03eHoN}RZS9DJxa&8YwNCT8?JxMXyZqZr13NA|GB{+vG`08C{V(yy zf*Lw$+tYSU_+dI`3n{bMrPdDb`A=Mkg!O=k>1|*3MC8j~- zXL79J4E=U^H=iBLTeHE_OKzE&dws8RNynsSJ!d;`zK?P92U{f)xvD7VQVosrXZrL+ z6lMVdD1YgL;%(1cq{#bS6yXmp|DS@nax#AqqlZhtUQdh<^2vr5`EpAO

LGYq)sa(w9^3-f}NHy=GR4v%t2YZly3m1G@5y`xBh_HGrD%f z>;|Ty?9FiJAc&UVD(StT4I` zfVQwxhE9bXE6r2mKO8Ag7{L^jCyqQb0QqKDPE=RAgqn8q1O^>(z7h5kE(6va%QqRZ zkIOmp(})rLSS(2{=C12e&@!W2=Jel-^_R``0xHO^+t!(oXbcv5yhD4g*$t_F)_5Dl zSVCgesW%;DtYPCFs{G;GX_o?1J3;QQPPv)rWw;>} zJ&KwnUqwNXloNXlK_+pNDfI~hON#SokVJb&ilg8d7^NWo2ZQymCqQMnjfi>ePibjr z-Z@q!?RGN$Mj}Nk){X_vaj6?Mj$>ACR*z|6MsXy3VZ^PFn@yHkPo(>m(iWepn8SC@ z>D2;R4m+gDRZ=SIX!b+CP(qE=JDIUkn=D$aUu+Ihn9-+k1LS3PreQg0N5eWIG@x${nC3v^7caS>1!PKNAY9J z#}E}Q9w#SP>(GY7Hbj&z4$Li6o5taBO|4+F`yS9zq*LJ<38wy4I>HA9(&GYrk4dLajKGww))BWli6Ln1A^Lda@N~p+snkb9C z@OthI+<##vp8!HVQT4Wk(=@zQ{OvZ$EKWS73+JHb)eYLGD-cqi6^|vd$<+IHuc?Nq zW7JertT~3))4?J|28n$I@nAD0c1%9C&IVhEZX~mUsf{efyS(XNG%ch;!N~d7S(Ri7 zb&=BuON95aVA&kLn6&MVU|x}xPMp7xwWxNU1wS+F6#y}1@^wQZB*(&ecT?RnQcI}Y z2*z!^!D?gDUhc@;M^OpLs4mq>C&p{}OWVv<)S9KMars@0JQ{c_ScGsFo3BJ)Irg++ zAWwypJdTO-_{Uh8m(Z!3KL7K{ZZzKHj;{M8I$mV>k znTM?sa0);^=X^cglL`uC+^J)M7nEa$w=VwFULg~%DJllw+7dJAj3{qnP5i3@wr7%y zjXp?Wl2%Th=my&3u?Q$RV6N5tzKMSPTsc#J+-cDDp~qFB6bL2C8AS7Y3PKtVhdhl) zIaLqH5+OnWPWSt(lQCgkN8lczc-V%_iZ{>#1%Z$N*>lu#S;0MZ$T2Y8Kg!U;hAZj> z6S#%$DQ_`Ic%Zr@?}GgjRXg@qTj^17n`65oJ@Wj0u1X8&+UVd|Xs?J+i_^GZ94m6= zUc96~Q`OJvlKB_Lr15*Yw_PUPEr?f?H&00b^-W%26mD)(n(rGGNfK9~2h=C>p-7BZ zFd&*&Msdu{w~(eyFOglwCPH^Rb}O(N7LtS+nnEwDx*pGD?|&9Si~M43a+*L(b0$5A zv`T`(G3xO;I_sx;FwTP21ZlfDpz zOo?}Vlgf~fo{YWm@n_JyD*frOg{XsvBA~|Tn4V6hu>Gd>89-rblfVJUaGvj6X%NZ} z$tFF9sx=4_$*c~G`9iPLGh@=sV+O{D2-t*K@J7H=`V+oVt}8?04WwU3h1BgS!f%1P zFak-T#7`TtLcR=Yz>g0R!ZQrH!YiZOQN=_V-UyncN1Rc18?KY?#O`v#JK+pq0K$~H z3D@v9DZF42R)b9#BBX{^$DOMlJ!g)Gc za{o-1e%F6NvgKq9tC8pV+9S$;9*zNv{J*)n&dmf~anP1)4~N%~h#c(=B#3*KgzhCKhFdgDoWi2IDog{RVyzK|Y`rCUs3T~pJMmdZJy4?b z&s5G=zhf**(t7Y^oC_mcTsE-{^}wiaoUu&?kojLKs>SJPxjcP>{a5CbXCx92AcBE) zHtqP}LjZ{W>PH?Tu(E0X=%{PBMW@F_?#7b&#!^q`<-5$ur+-q6 z{dn=(^UZw6*3-XM_(=@<1_*i&XM4=0t5u!gm6 z{UlmNGPKgO_;e;q9|#esq~Sq`<}%d{+sRmhvsA{5i*91=tub>OZZ%)xUA#4q$dDyy z1`w4%?OPLg3JeZb#cqSMO?*Xn%|-FCcuH2i2fn_{IFusub6;NQdN|7TD1N?%E8*g? z$apAt@cEe!I%jB=*q$p_3=t_5R0ph%{qaq+QDg!c99Y!Xa!&oDZOeis_ot)gNXr{l zdY$|So2Qed2Y7KMNBrS^E169kG%h<+z{Z_p_;shB!uY)>yAVcK=&!bg`lVg)4T1|7 z0}7FpfydVH4F87K@c!nEG+WGKm{Ouo)Slpl;#qcEIQ0zdMfLA#;dBxYw;p;KoVv6| z3_D5&7rJdG12CnDSvZUW?$UC6^UVSW^|vw|o-_4bz)(w5(3AiVhpeT(|=f#x_}E?s#qHZF#xA6AF_ujl$G z-jHD%q(d2}v2PhXx&6YWps~m(^+RXl91Q#xRRJBhjKl$FG4bk);|ag;ieUZ&!Ii3$ z(iGz1+0m7#g5>ASldBbNZL=ZHh=tmmJt$!71; zIML2GhEz1pg@1rQN(M^_691wAGkJ@Pga_05WuQ6! zG5RkGY2^`@(H~pp7&Ga+Pwh3L!Njj!-rc;^bTIfo5hP@H##1X8xUZJckrx>id`bAd3QUx9GuomqBYZ!uN1-&o zvTxC?;p8vL67&fW8fw(YOqt>L@bdLrEF*3OgYe$4n4{ zEB40LiU#6-0@5jdN`0w}N0qi@c0~oT2FP z)LNk&a82my?jv(tQpiMi$TK_L@lub#lsM$R{Dk?Ya@%%%huZkct~tSWM714c!45k}-ZLVA-bVM`>|_ZBbW_m-7| z3U%xrAhi}n?T(2F{_n4EZ10inkIFl#y09?7$uwBoJgqY8vylwev)fDOn;>0R!aEnV zBz%j0Mqpx~EZU3q@%+oV7;}|vt7$~ou@faEIq{p?FY$XXg&6*K)b_LP=}gi9`Bij3 zN`zEo|B6*|-;>S`rNa^BKRDbDAk>X#MsR`EvL>6bqU@SaDDs z8>bu@3YdRaWs*Te@G-UHjU%F~kTHw5(0PVJ+pwh#ha2u;DB+UMo@A5UYIl#5rtBV- zGX_hIpw}3C@H*Us(Cc-d#-gNrG#w$(9+S=GxO>3SR`SE2fHZ2KrDc#_C^$jI>Y}#; zMwY=R6@+dWi~0RXw(c@3GZ&%~9K(q&ee0Zw;pwL`E_tZak-#8^_b)Dpyi73^he?xV zXJ08&wh5-M&}qy4f7!D&=E)puDD(Nmg1d_(j`4LvxM5x_huNg-pGG%9rYqO6mImyJ@}*3Y>^3OvcnTG%EV1) zq_Ap?Z!Iw__7#D=pOWnQN$gB!Mr0!9yx|g<4icJh{cFOu3B8}&RiYm+Mb;VEK``LK zL(NcpcTiGieOIssSjr?ob}^``nNf&UcJhXyncO9m{6gD$kqSD`S69(aF8dkWz5>!9 zBLe4Sib7Hs2x_L2Ls6Ish$MGVKrGt5+_2zCyP1byaCg3upo+-I}R4&$m)8 zQ7|jc1Z^VWggpuQj*cP;>Zo9LS!VSzrqmZczaf;u`d0J(f%Z9r%An@s!e>n9%y=n!IZ_tVGu{Jmsbp}Fk%HJIU?a+-~bjfLTuH|JExA8EROowzr zqW9{YyZhR0a4clRK>1I4Ncx&WER~{iE;F^$T7K%X@3PGOA%6#Z%p3TS^&M;Dnjw@i z^o!$9nhcsmcHcY4?4j9+ofL_CWsZ4Hcch(rjsGfGD(nsH>w}^ERqGnz%iGj0j{g}h z7wMkJ-2Z2~eS>2!i}0~B63i;>SyFJU2+>VCS^AxaDOx%g6-t0eM^P<3+*z`ztvOqrG3)&#$K?& z_Y0wbWID47@cU`E1A6A&!`aZk0ZE@z-h#l1NqX2#`$Uev2gepW`rf8*!=rD5&;Jb{ zl08rU>dPo=K%-1Ao1~G-@4ve~y5#9E8x;TE0k5d^TC(=Zc>mwjW^c=+U-<9}b0ku~}gj z3sbW>R2M6DR!g#NUP;nxo>)@7*=RP{U18SDop6b2&PHce^&h97@xx3t+VK+!keE#} z;(Uf&89as9k8{$nkLbuB!-d7TP`_VJpL^Xs8OKB~ri$YUbW8fch64}7|0EWoT(TRj{ z*GT<7Y<7DsrCi79ZsM)z#c(!nNOGySOCkY1fAuQOq12&iUVC!a`#O;dBLf=d?&4*B zI~LgAO7E0qxK(uRTM;IgJ}+z^gD+bi-6I!3x{r9`l~%8TRP%UE0V8E*Sz>Nl1NVG<<7(wDHZ+HcOkQm$O&k+vyx)y)x{Pz!U8hS$*m zByc0h6BUI*BOpuL==P+H|Hx%`>7!W+1H!l9vi&)`V zyn2o9{z=lc+VX*!Vh~SF=)L}Z40XeG>LF6cP^b+R$NxSeUqbK^Q*UTalKzP8X%{9@RSCXm_NhF>{=S2 zi}ezam_^P`S!!-cyEW9y7DBbK93roz@Raccy*v}?mKXScU9E_4g;hBU7}zSofAFda zKYEe?{{I54 diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index 515ab9d5f18..9036682bf0f 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,7 +1,7 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionSha256Sum=f8b4f4772d302c8ff580bc40d0f56e715de69b163546944f787c87abf209c961 -distributionUrl=https\://services.gradle.org/distributions/gradle-8.8-all.zip +distributionSha256Sum=682b4df7fe5accdca84a4d1ef6a3a6ab096b3efd5edf7de2bd8c758d95a93703 +distributionUrl=https\://services.gradle.org/distributions/gradle-8.10-all.zip networkTimeout=10000 validateDistributionUrl=true zipStoreBase=GRADLE_USER_HOME diff --git a/gradlew b/gradlew index b740cf13397..f5feea6d6b1 100755 --- a/gradlew +++ b/gradlew @@ -15,6 +15,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # +# SPDX-License-Identifier: Apache-2.0 +# ############################################################################## # @@ -84,7 +86,8 @@ done # shellcheck disable=SC2034 APP_BASE_NAME=${0##*/} # Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) -APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit +APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s +' "$PWD" ) || exit # Use the maximum available, or set MAX_FD != -1 to use that value. MAX_FD=maximum diff --git a/gradlew.bat b/gradlew.bat index 7101f8e4676..9b42019c791 100644 --- a/gradlew.bat +++ b/gradlew.bat @@ -13,6 +13,8 @@ @rem See the License for the specific language governing permissions and @rem limitations under the License. @rem +@rem SPDX-License-Identifier: Apache-2.0 +@rem @if "%DEBUG%"=="" @echo off @rem ########################################################################## From 76f27bec85364d1b397ff7764d5ed7299ca465fb Mon Sep 17 00:00:00 2001 From: Nate Bauernfeind Date: Wed, 21 Aug 2024 14:30:18 -0600 Subject: [PATCH 35/43] fix: Parallel Where should not NPE on a Big TableUpdate With Both Adds and Mods (#5961) --- .../table/impl/AbstractFilterExecution.java | 4 +- .../impl/QueryTableWhereParallelTest.java | 48 +++++++++++++++++++ 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/AbstractFilterExecution.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/AbstractFilterExecution.java index 3659e8bb530..6c388b3ee7d 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/AbstractFilterExecution.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/AbstractFilterExecution.java @@ -190,12 +190,12 @@ private void doFilterParallel( // Clean up the row sets created by the filter. try (final RowSet ignored = adds; final RowSet ignored2 = mods) { - if (addedResult != null) { + if (addedResult != null && adds != null) { synchronized (addedResult) { addedResult.insert(adds); } } - if (modifiedResult != null) { + if (modifiedResult != null && mods != null) { synchronized (modifiedResult) { modifiedResult.insert(mods); } diff --git a/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableWhereParallelTest.java b/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableWhereParallelTest.java index 65b27a6b380..0659f11a139 100644 --- a/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableWhereParallelTest.java +++ b/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableWhereParallelTest.java @@ -5,12 +5,23 @@ import gnu.trove.list.TLongList; import gnu.trove.list.array.TLongArrayList; +import io.deephaven.engine.context.ExecutionContext; import io.deephaven.engine.rowset.RowSet; +import io.deephaven.engine.rowset.RowSetFactory; +import io.deephaven.engine.rowset.RowSetShiftData; import io.deephaven.engine.rowset.WritableRowSet; +import io.deephaven.engine.table.ModifiedColumnSet; import io.deephaven.engine.table.Table; import io.deephaven.engine.table.TableDefinition; +import io.deephaven.engine.table.TableUpdate; import io.deephaven.engine.table.impl.select.WhereFilter; import io.deephaven.engine.table.impl.select.WhereFilterImpl; +import io.deephaven.engine.table.impl.sources.RowKeyColumnSource; +import io.deephaven.engine.table.impl.sources.RowPositionColumnSource; +import io.deephaven.engine.testutil.ControlledUpdateGraph; +import io.deephaven.engine.testutil.EvalNugget; +import io.deephaven.engine.testutil.EvalNuggetInterface; +import io.deephaven.engine.testutil.TstUtils; import io.deephaven.engine.util.TableTools; import io.deephaven.test.types.OutOfBandTest; import org.jetbrains.annotations.NotNull; @@ -21,6 +32,7 @@ import java.util.Collections; import java.util.List; +import java.util.Map; import static org.junit.Assert.assertEquals; @@ -119,4 +131,40 @@ public WhereFilter copy() { QueryTable.PARALLEL_WHERE_ROWS_PER_SEGMENT = oldSize; } } + + @Test + public void testParallelExecutionViaTableUpdate() { + final ControlledUpdateGraph updateGraph = ExecutionContext.getContext().getUpdateGraph().cast(); + + final long oldSize = QueryTable.PARALLEL_WHERE_ROWS_PER_SEGMENT; + try { + QueryTable.PARALLEL_WHERE_ROWS_PER_SEGMENT = 1_000; + final QueryTable table = TstUtils.testRefreshingTable(RowSetFactory.flat(1500).toTracking()) + .withAdditionalColumns(Map.of("K", new RowKeyColumnSource())); + table.setRefreshing(true); + table.setAttribute(BaseTable.TEST_SOURCE_TABLE_ATTRIBUTE, true); + final Table source = table.updateView("J = ii % 2 == 0 ? K : 0"); + + final EvalNuggetInterface[] en = new EvalNuggetInterface[] { + EvalNugget.from(() -> source.where("K == J")), + }; + + updateGraph.runWithinUnitTestCycle(() -> { + final RowSet added = RowSetFactory.fromRange(1500, 2999); + final RowSet modified = RowSetFactory.fromRange(0, 1499); + table.getRowSet().writableCast().insert(added); + + final TableUpdate upstream = new TableUpdateImpl( + added, RowSetFactory.empty(), modified, RowSetShiftData.EMPTY, ModifiedColumnSet.ALL); + + table.notifyListeners(upstream); + }); + + // Ensure the table is as expected. + TstUtils.validate(en); + + } finally { + QueryTable.PARALLEL_WHERE_ROWS_PER_SEGMENT = oldSize; + } + } } From f61159ab35bc076d2cb908f406297533076529f3 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 21 Aug 2024 20:50:57 +0000 Subject: [PATCH 36/43] fix: Update web version 0.90.0 (#5963) Release notes https://github.com/deephaven/web-client-ui/releases/tag/v0.90.0 Co-authored-by: deephaven-internal <66694643+deephaven-internal@users.noreply.github.com> --- web/client-ui/Dockerfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/web/client-ui/Dockerfile b/web/client-ui/Dockerfile index 4ea68fceda6..a48144f7127 100644 --- a/web/client-ui/Dockerfile +++ b/web/client-ui/Dockerfile @@ -2,10 +2,10 @@ FROM deephaven/node:local-build WORKDIR /usr/src/app # Most of the time, these versions are the same, except in cases where a patch only affects one of the packages -ARG WEB_VERSION=0.89.0 -ARG GRID_VERSION=0.89.0 -ARG CHART_VERSION=0.89.0 -ARG WIDGET_VERSION=0.89.0 +ARG WEB_VERSION=0.90.0 +ARG GRID_VERSION=0.90.0 +ARG CHART_VERSION=0.90.0 +ARG WIDGET_VERSION=0.90.0 # Pull in the published code-studio package from npmjs and extract is RUN set -eux; \ From 7b13defc15212450a0ae05ec938b067989e95b46 Mon Sep 17 00:00:00 2001 From: Colin Alworth Date: Wed, 21 Aug 2024 20:36:15 -0500 Subject: [PATCH 37/43] fix: JS API should use GetConfigurationConstants for auth calls (#5959) In addition to more closely following what other Deephaven clients do, this prevents an issue where Open/Next pairs of gRPC calls (to emulate a bidirectional call) can race each other to redeem a one-time use auth token. Fixes #5955 --- .../deephaven/web/client/api/CoreClient.java | 13 -- .../web/client/api/WorkerConnection.java | 80 +++++----- .../stream/HandshakeStreamFactory.java | 150 ------------------ .../web/client/api/grpc/UnaryWithHeaders.java | 44 +++++ 4 files changed, 87 insertions(+), 200 deletions(-) delete mode 100644 web/client-api/src/main/java/io/deephaven/web/client/api/barrage/stream/HandshakeStreamFactory.java create mode 100644 web/client-api/src/main/java/io/deephaven/web/client/api/grpc/UnaryWithHeaders.java diff --git a/web/client-api/src/main/java/io/deephaven/web/client/api/CoreClient.java b/web/client-api/src/main/java/io/deephaven/web/client/api/CoreClient.java index 8687dedcd18..6c6c2d2d556 100644 --- a/web/client-api/src/main/java/io/deephaven/web/client/api/CoreClient.java +++ b/web/client-api/src/main/java/io/deephaven/web/client/api/CoreClient.java @@ -131,19 +131,6 @@ public Promise login(@TsTypeRef(LoginCredentials.class) JsPropertyMap login = loginPromise.asPromise(); - // fetch configs and check session timeout - login.then(ignore -> getServerConfigValues()).then(configs -> { - for (String[] config : configs) { - if (config[0].equals("http.session.durationMs")) { - workerConnection.setSessionTimeoutMs(Double.parseDouble(config[1])); - } - } - return null; - }).catch_(ignore -> { - // Ignore this failure and suppress browser logging, we have a safe fallback - return Promise.resolve((Object) null); - }); - if (alreadyRunning) { ideConnection.connection.get().forceReconnect(); } diff --git a/web/client-api/src/main/java/io/deephaven/web/client/api/WorkerConnection.java b/web/client-api/src/main/java/io/deephaven/web/client/api/WorkerConnection.java index 86549556f5a..ab88008a3de 100644 --- a/web/client-api/src/main/java/io/deephaven/web/client/api/WorkerConnection.java +++ b/web/client-api/src/main/java/io/deephaven/web/client/api/WorkerConnection.java @@ -23,13 +23,12 @@ import io.deephaven.javascript.proto.dhinternal.arrow.flight.flatbuf.schema_generated.org.apache.arrow.flatbuf.Schema; import io.deephaven.javascript.proto.dhinternal.arrow.flight.protocol.browserflight_pb_service.BrowserFlightServiceClient; import io.deephaven.javascript.proto.dhinternal.arrow.flight.protocol.flight_pb.FlightData; -import io.deephaven.javascript.proto.dhinternal.arrow.flight.protocol.flight_pb.HandshakeRequest; -import io.deephaven.javascript.proto.dhinternal.arrow.flight.protocol.flight_pb.HandshakeResponse; import io.deephaven.javascript.proto.dhinternal.arrow.flight.protocol.flight_pb_service.FlightServiceClient; import io.deephaven.javascript.proto.dhinternal.browserheaders.BrowserHeaders; import io.deephaven.javascript.proto.dhinternal.flatbuffers.Builder; import io.deephaven.javascript.proto.dhinternal.flatbuffers.Long; import io.deephaven.javascript.proto.dhinternal.grpcweb.grpc.Code; +import io.deephaven.javascript.proto.dhinternal.grpcweb.grpc.UnaryOutput; import io.deephaven.javascript.proto.dhinternal.io.deephaven.barrage.flatbuf.barrage_generated.io.deephaven.barrage.flatbuf.BarrageMessageType; import io.deephaven.javascript.proto.dhinternal.io.deephaven.barrage.flatbuf.barrage_generated.io.deephaven.barrage.flatbuf.BarrageMessageWrapper; import io.deephaven.javascript.proto.dhinternal.io.deephaven.barrage.flatbuf.barrage_generated.io.deephaven.barrage.flatbuf.BarrageSubscriptionOptions; @@ -40,6 +39,9 @@ import io.deephaven.javascript.proto.dhinternal.io.deephaven.proto.application_pb.FieldsChangeUpdate; import io.deephaven.javascript.proto.dhinternal.io.deephaven.proto.application_pb.ListFieldsRequest; import io.deephaven.javascript.proto.dhinternal.io.deephaven.proto.application_pb_service.ApplicationServiceClient; +import io.deephaven.javascript.proto.dhinternal.io.deephaven.proto.config_pb.ConfigurationConstantsRequest; +import io.deephaven.javascript.proto.dhinternal.io.deephaven.proto.config_pb.ConfigurationConstantsResponse; +import io.deephaven.javascript.proto.dhinternal.io.deephaven.proto.config_pb_service.ConfigService; import io.deephaven.javascript.proto.dhinternal.io.deephaven.proto.config_pb_service.ConfigServiceClient; import io.deephaven.javascript.proto.dhinternal.io.deephaven.proto.console_pb.LogSubscriptionData; import io.deephaven.javascript.proto.dhinternal.io.deephaven.proto.console_pb.LogSubscriptionRequest; @@ -72,13 +74,13 @@ import io.deephaven.web.client.api.barrage.def.ColumnDefinition; import io.deephaven.web.client.api.barrage.def.InitialTableDefinition; import io.deephaven.web.client.api.barrage.stream.BiDiStream; -import io.deephaven.web.client.api.barrage.stream.HandshakeStreamFactory; import io.deephaven.web.client.api.barrage.stream.ResponseStreamWrapper; import io.deephaven.web.client.api.batch.RequestBatcher; import io.deephaven.web.client.api.batch.TableConfig; import io.deephaven.web.client.api.console.JsVariableChanges; import io.deephaven.web.client.api.console.JsVariableDefinition; import io.deephaven.web.client.api.console.JsVariableType; +import io.deephaven.web.client.api.grpc.UnaryWithHeaders; import io.deephaven.web.client.api.i18n.JsTimeZone; import io.deephaven.web.client.api.impl.TicketAndPromise; import io.deephaven.web.client.api.lifecycle.HasLifecycle; @@ -479,54 +481,58 @@ private Promise authUpdate() { DomGlobal.clearTimeout(scheduledAuthUpdate); scheduledAuthUpdate = null; } - return new Promise<>((resolve, reject) -> { - // the streamfactory will automatically reference our existing metadata, but we can listen to update it - BiDiStream handshake = HandshakeStreamFactory.create(this); - handshake.onHeaders(headers -> { - // unchecked cast is required here due to "aliasing" in ts/webpack resulting in BrowserHeaders != - // Metadata - JsArray authorization = Js.uncheckedCast(headers).get(FLIGHT_AUTH_HEADER_NAME); - if (authorization.length > 0) { - JsArray existing = metadata().get(FLIGHT_AUTH_HEADER_NAME); - if (!existing.getAt(0).equals(authorization.getAt(0))) { - // use this new token - metadata().set(FLIGHT_AUTH_HEADER_NAME, authorization); - CustomEventInit init = CustomEventInit.create(); - init.setDetail(new JsRefreshToken(authorization.getAt(0), sessionTimeoutMs)); - info.fireEvent(EVENT_REFRESH_TOKEN_UPDATED, init); + return UnaryWithHeaders.call( + this, ConfigService.GetConfigurationConstants, new ConfigurationConstantsRequest()) + .then(result -> { + BrowserHeaders headers = result.getHeaders(); + // unchecked cast is required here due to "aliasing" in ts/webpack resulting in BrowserHeaders != + // Metadata + JsArray authorization = + Js.uncheckedCast(headers).get(FLIGHT_AUTH_HEADER_NAME); + if (authorization.length > 0) { + JsArray existing = metadata().get(FLIGHT_AUTH_HEADER_NAME); + if (!existing.getAt(0).equals(authorization.getAt(0))) { + // use this new token + metadata().set(FLIGHT_AUTH_HEADER_NAME, authorization); + CustomEventInit init = CustomEventInit.create(); + init.setDetail(new JsRefreshToken(authorization.getAt(0), sessionTimeoutMs)); + info.fireEvent(EVENT_REFRESH_TOKEN_UPDATED, init); + } } - } - }); - handshake.onStatus(status -> { - if (status.isOk()) { + + // Read the timeout from the server, we'll refresh at less than that + result.getMessage().getConfigValuesMap().forEach((item, key) -> { + if (key.equals("http.session.durationMs")) { + sessionTimeoutMs = Double.parseDouble(item.getStringValue()); + } + }); + // schedule an update based on our currently configured delay scheduledAuthUpdate = DomGlobal.setTimeout(ignore -> { authUpdate(); }, sessionTimeoutMs / 2); - resolve.onInvoke((Void) null); - } else { - if (status.getCode() == Code.Unauthenticated) { + return Promise.resolve((Void) null); + }).catch_(err -> { + UnaryOutput result = (UnaryOutput) err; + if (result.getStatus() == Code.Unauthenticated) { // explicitly clear out any metadata for authentication, and signal that auth failed metadata.delete(FLIGHT_AUTH_HEADER_NAME); // Fire an event for the UI to attempt to re-auth info.fireEvent(CoreClient.EVENT_RECONNECT_AUTH_FAILED); - return; + + // We return here rather than continue and call checkStatus() + return Promise.reject("Authentication failed, please reconnect"); } - // TODO deephaven-core#2564 fire an event for the UI to re-auth - checkStatus(status); - if (status.getDetails() == null || status.getDetails().isEmpty()) { - reject.onInvoke("Error occurred while authenticating, gRPC status " + status.getCode()); + checkStatus(ResponseStreamWrapper.Status.of(result.getStatus(), result.getMessage().toString(), + result.getTrailers())); + if (result.getMessage() == null || result.getMessage().toString().isEmpty()) { + return Promise.reject(result.getMessage()); } else { - reject.onInvoke(status.getDetails()); + return Promise.reject("Error occurred while authenticating, gRPC status " + result.getStatus()); } - } - }); - - handshake.send(new HandshakeRequest()); - handshake.end(); - }); + }); } private void subscribeToTerminationNotification() { diff --git a/web/client-api/src/main/java/io/deephaven/web/client/api/barrage/stream/HandshakeStreamFactory.java b/web/client-api/src/main/java/io/deephaven/web/client/api/barrage/stream/HandshakeStreamFactory.java deleted file mode 100644 index bae2d3c41fd..00000000000 --- a/web/client-api/src/main/java/io/deephaven/web/client/api/barrage/stream/HandshakeStreamFactory.java +++ /dev/null @@ -1,150 +0,0 @@ -// -// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending -// -package io.deephaven.web.client.api.barrage.stream; - -import elemental2.core.Function; -import elemental2.core.JsArray; -import io.deephaven.javascript.proto.dhinternal.arrow.flight.protocol.browserflight_pb_service.BrowserFlightService; -import io.deephaven.javascript.proto.dhinternal.arrow.flight.protocol.browserflight_pb_service.ResponseStream; -import io.deephaven.javascript.proto.dhinternal.arrow.flight.protocol.flight_pb.HandshakeRequest; -import io.deephaven.javascript.proto.dhinternal.arrow.flight.protocol.flight_pb.HandshakeResponse; -import io.deephaven.javascript.proto.dhinternal.arrow.flight.protocol.flight_pb_service.BidirectionalStream; -import io.deephaven.javascript.proto.dhinternal.arrow.flight.protocol.flight_pb_service.FlightService; -import io.deephaven.javascript.proto.dhinternal.grpcweb.Grpc; -import io.deephaven.javascript.proto.dhinternal.grpcweb.client.ClientRpcOptions; -import io.deephaven.javascript.proto.dhinternal.grpcweb.grpc.Client; -import io.deephaven.javascript.proto.dhinternal.grpcweb.invoke.InvokeRpcOptions; -import io.deephaven.javascript.proto.dhinternal.grpcweb.invoke.Request; -import io.deephaven.web.client.api.WorkerConnection; -import jsinterop.base.Js; - -import java.util.HashMap; -import java.util.Map; - -/** - * Improbable-eng's grpc-web implementation doesn't pass headers to api callers, only trailers (which sometimes includes - * headers) are included when calls fail, but never when successful. The current Flight auth v2 setup requires reading - * headers from responses, but strictly speaking doesn't require reading them from all calls - we can make extra - * FlightService/Handshake calls as long as we can read the response headers with them. - *

- *

- * This class includes a custom implementation of the Handshake method that is able to notify callers about headers that - * are received. - */ -public class HandshakeStreamFactory { - - private static final String DATA_EVENT_LISTENER_NAME = "data"; - private static final String END_EVENT_LISTENER_NAME = "end"; - private static final String STATUS_EVENT_LISTENER_NAME = "status"; - private static final String HEADERS_EVENT_LISTENER_NAME = "headers"; - - public static BiDiStream create(WorkerConnection connection) { - return connection.streamFactory().create( - metadata -> { - Map> listeners = listenerMap(); - ClientRpcOptions options = ClientRpcOptions.create(); - options.setHost(connection.flightServiceClient().serviceHost); - options.setTransport(null);// ts doesn't expose these two, stick with defaults for now - options.setDebug(false); - Client client = Grpc.client(FlightService.Handshake, - (io.deephaven.javascript.proto.dhinternal.grpcweb.grpc.ClientRpcOptions) options); - client.onEnd((status, statusMessage, trailers) -> { - listeners.get(STATUS_EVENT_LISTENER_NAME).forEach((item, index) -> item.call(null, - ResponseStreamWrapper.Status.of(status, statusMessage, metadata))); - listeners.get(END_EVENT_LISTENER_NAME).forEach((item, index) -> item.call(null, - ResponseStreamWrapper.Status.of(status, statusMessage, metadata))); - listeners.clear(); - }); - client.onMessage(message -> { - listeners.get(DATA_EVENT_LISTENER_NAME).forEach((item, index) -> item.call(null, message)); - }); - client.onHeaders(headers -> { - listeners.get(HEADERS_EVENT_LISTENER_NAME) - .forEach((item, index) -> item.call(null, headers)); - }); - client.start(metadata); - - return new BidirectionalStream() { - @Override - public void cancel() { - listeners.clear(); - client.close(); - } - - @Override - public void end() { - client.finishSend(); - } - - @Override - public BidirectionalStream on(String type, - Function handler) { - listeners.get(type).push(handler); - return this; - } - - @Override - public BidirectionalStream write( - HandshakeRequest message) { - client.send(message); - return this; - } - }; - }, - (first, metadata) -> { - Map> listeners = listenerMap(); - io.deephaven.javascript.proto.dhinternal.grpcweb.grpc.InvokeRpcOptions props = - Js.cast(InvokeRpcOptions.create()); - props.setRequest(first); - props.setHost(connection.browserFlightServiceClient().serviceHost); - props.setMetadata(metadata); - props.setTransport(null);// ts doesnt expose these two, stick with defaults for now - props.setDebug(false); - props.setOnMessage(responseMessage -> { - listeners.get(DATA_EVENT_LISTENER_NAME) - .forEach((item, index) -> item.call(null, responseMessage)); - }); - props.setOnEnd((status, statusMessage, trailers) -> { - listeners.get(STATUS_EVENT_LISTENER_NAME).forEach( - (item, index) -> item.call(null, - ResponseStreamWrapper.Status.of(status, statusMessage, metadata))); - listeners.get(END_EVENT_LISTENER_NAME).forEach( - (item, index) -> item.call(null, - ResponseStreamWrapper.Status.of(status, statusMessage, metadata))); - listeners.clear(); - }); - props.setOnHeaders(headers -> { - listeners.get(HEADERS_EVENT_LISTENER_NAME) - .forEach((item, index) -> item.call(null, headers)); - }); - Request client = Grpc.invoke.onInvoke(BrowserFlightService.OpenHandshake, props); - - return new ResponseStream() { - @Override - public void cancel() { - listeners.clear(); - client.getClose().onInvoke(); - } - - @Override - public ResponseStream on(String type, Function handler) { - listeners.get(type).push(handler); - return this; - } - }; - }, - (next, headers, callback) -> connection.browserFlightServiceClient().nextHandshake(next, headers, - callback::apply), - new HandshakeRequest()); - } - - private static Map> listenerMap() { - Map> listeners = new HashMap<>(); - listeners.put(DATA_EVENT_LISTENER_NAME, new JsArray<>()); - listeners.put(END_EVENT_LISTENER_NAME, new JsArray<>()); - listeners.put(STATUS_EVENT_LISTENER_NAME, new JsArray<>()); - listeners.put(HEADERS_EVENT_LISTENER_NAME, new JsArray<>()); - return listeners; - } -} diff --git a/web/client-api/src/main/java/io/deephaven/web/client/api/grpc/UnaryWithHeaders.java b/web/client-api/src/main/java/io/deephaven/web/client/api/grpc/UnaryWithHeaders.java new file mode 100644 index 00000000000..2b82a40d2b8 --- /dev/null +++ b/web/client-api/src/main/java/io/deephaven/web/client/api/grpc/UnaryWithHeaders.java @@ -0,0 +1,44 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.web.client.api.grpc; + +import elemental2.promise.IThenable; +import elemental2.promise.Promise; +import io.deephaven.javascript.proto.dhinternal.grpcweb.Grpc; +import io.deephaven.javascript.proto.dhinternal.grpcweb.grpc.Code; +import io.deephaven.javascript.proto.dhinternal.grpcweb.unary.UnaryOutput; +import io.deephaven.javascript.proto.dhinternal.grpcweb.unary.UnaryRpcOptions; +import io.deephaven.web.client.api.WorkerConnection; + +public class UnaryWithHeaders { + + /** + * Improbable-eng's grpc-web implementation doesn't pass headers to api callers - this changes the contract a bit so + * that we can get a typed UnaryOutput with the headers/trailers intact. + * + * @param connection provides access to the metadata and the server url + * @param methodDescriptor the service method to invoke + * @return a promise that will resolve to the response plus headers/trailers, or reject with the headers/trailers + * @param type of the message object + */ + public static Promise> call(WorkerConnection connection, Object methodDescriptor, + Req request) { + return new Promise<>((resolve, reject) -> { + UnaryRpcOptions props = UnaryRpcOptions.create(); + props.setHost(connection.configServiceClient().serviceHost); + props.setMetadata(connection.metadata()); + props.setTransport(null);// ts doesn't expose these two, stick with defaults for now + props.setDebug(false); + props.setOnEnd(response -> { + if (response.getStatus() != Code.OK) { + reject.onInvoke(response); + } else { + resolve.onInvoke((IThenable>) response); + } + }); + props.setRequest(request); + Grpc.unary.onInvoke(methodDescriptor, props); + }); + } +} From 652138274e298b07707350adf43e403e2bf469f8 Mon Sep 17 00:00:00 2001 From: Devin Smith Date: Thu, 22 Aug 2024 13:47:41 -0700 Subject: [PATCH 38/43] feat: Expose Python/Groovy version to clients (#5970) * Sets `groovy.version` configuration property, as sourced from class `groovy.lang.GroovyShell`. * Sets `python.version` configuration property, as sourced from python code `platform.python_version()` * Adds `groovy.version` and `python.version` to `client.configuration.list` in dh-defaults.prop (to be returned to client as part of `ConfigService.GetConfigurationConstants` gRPC) * Adds `io.deephaven.client.examples.PrintConfigurationConstants` to invoke and print out results of `ConfigService.GetConfigurationConstants` Fixes #5938 --- .../python/PythonDeephavenSession.java | 26 ++++++++++++++++- java-client/session-examples/build.gradle | 1 + .../examples/PrintConfigurationConstants.java | 28 +++++++++++++++++++ .../src/main/resources/dh-defaults.prop | 4 +-- 4 files changed, 56 insertions(+), 3 deletions(-) create mode 100644 java-client/session-examples/src/main/java/io/deephaven/client/examples/PrintConfigurationConstants.java diff --git a/Integrations/src/main/java/io/deephaven/integrations/python/PythonDeephavenSession.java b/Integrations/src/main/java/io/deephaven/integrations/python/PythonDeephavenSession.java index f14f3459504..f8b69e9d0ae 100644 --- a/Integrations/src/main/java/io/deephaven/integrations/python/PythonDeephavenSession.java +++ b/Integrations/src/main/java/io/deephaven/integrations/python/PythonDeephavenSession.java @@ -57,8 +57,25 @@ public class PythonDeephavenSession extends AbstractScriptSession scope; private final PythonScriptSessionModule module; @@ -67,6 +84,10 @@ public class PythonDeephavenSession extends AbstractScriptSession + * Sets the configuration property {@value PYTHON_VERSION_PROPERTY} to the value returned from the python code + * {@code platform.python_version()}. + * * @param updateGraph the default update graph to install for the repl * @param operationInitializer the default operation initializer to install for the repl * @param objectTypeLookup the object type lookup @@ -96,16 +117,19 @@ public PythonDeephavenSession( registerJavaExecutor(threadInitializationFactory); publishInitial(); + + final Configuration configuration = Configuration.getInstance(); /* * And now the user-defined initialization scripts, if any. */ if (runInitScripts) { - String[] scripts = Configuration.getInstance().getProperty("PythonDeephavenSession.initScripts").split(","); + String[] scripts = configuration.getProperty("PythonDeephavenSession.initScripts").split(","); for (String script : scripts) { runScript(script); } } + setPythonVersion(configuration); } /** diff --git a/java-client/session-examples/build.gradle b/java-client/session-examples/build.gradle index db9d4d859bf..f6467f1bdaf 100644 --- a/java-client/session-examples/build.gradle +++ b/java-client/session-examples/build.gradle @@ -49,6 +49,7 @@ application.applicationDistribution.into('bin') { from(createApplication('message-stream-send-receive', 'io.deephaven.client.examples.MessageStreamSendReceive')) from(createApplication('filter-table', 'io.deephaven.client.examples.FilterTable')) from(createApplication('create-shared-id', 'io.deephaven.client.examples.CreateSharedId')) + from(createApplication('print-configuration-constants', 'io.deephaven.client.examples.PrintConfigurationConstants')) fileMode = 0755 } diff --git a/java-client/session-examples/src/main/java/io/deephaven/client/examples/PrintConfigurationConstants.java b/java-client/session-examples/src/main/java/io/deephaven/client/examples/PrintConfigurationConstants.java new file mode 100644 index 00000000000..8a2bf16ccc4 --- /dev/null +++ b/java-client/session-examples/src/main/java/io/deephaven/client/examples/PrintConfigurationConstants.java @@ -0,0 +1,28 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.client.examples; + +import io.deephaven.client.impl.Session; +import io.deephaven.proto.backplane.grpc.ConfigValue; +import picocli.CommandLine; +import picocli.CommandLine.Command; + +import java.util.Map.Entry; + +@Command(name = "print-configuration-constants", mixinStandardHelpOptions = true, + description = "Print configuration constants", version = "0.1.0") +class PrintConfigurationConstants extends SingleSessionExampleBase { + + @Override + protected void execute(Session session) throws Exception { + for (Entry entry : session.getConfigurationConstants().get().entrySet()) { + System.out.println(entry.getKey() + "=" + entry.getValue().getStringValue()); + } + } + + public static void main(String[] args) { + int execute = new CommandLine(new PrintConfigurationConstants()).execute(args); + System.exit(execute); + } +} diff --git a/props/configs/src/main/resources/dh-defaults.prop b/props/configs/src/main/resources/dh-defaults.prop index 94e1a947bf3..877a9f8d056 100644 --- a/props/configs/src/main/resources/dh-defaults.prop +++ b/props/configs/src/main/resources/dh-defaults.prop @@ -62,9 +62,9 @@ web.webgl.editable=true authentication.client.configuration.list=AuthHandlers # List of configuration properties to provide to authenticated clients, so they can interact with the server. -client.configuration.list=java.version,deephaven.version,barrage.version,http.session.durationMs,file.separator,web.storage.layout.directory,web.storage.notebook.directory,web.webgl,web.webgl.editable +client.configuration.list=java.version,deephaven.version,barrage.version,groovy.version,python.version,http.session.durationMs,file.separator,web.storage.layout.directory,web.storage.notebook.directory,web.webgl,web.webgl.editable # Version list to add to the configuration property list. Each `=`-delimited pair denotes a short name for a versioned # jar, and a class that is found in that jar. Any such keys will be made available to the client.configuration.list # as .version. -client.version.list=deephaven=io.deephaven.engine.table.Table,barrage=io.deephaven.barrage.flatbuf.BarrageMessageWrapper +client.version.list=deephaven=io.deephaven.engine.table.Table,barrage=io.deephaven.barrage.flatbuf.BarrageMessageWrapper,groovy=groovy.lang.GroovyShell From 3aa117d506660873d9935c0220f7f55727017760 Mon Sep 17 00:00:00 2001 From: Corey Kosak Date: Thu, 22 Aug 2024 23:31:02 -0400 Subject: [PATCH 39/43] fix(cpp-client): Provide correct instructions for making a RelWithDebInfo build (#5975) Also for building with more cores --- cpp-client/README.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cpp-client/README.md b/cpp-client/README.md index c815e042b46..56ff4769695 100644 --- a/cpp-client/README.md +++ b/cpp-client/README.md @@ -281,12 +281,14 @@ Notes 9. Now configure the build for Deephaven Core: ``` - cmake -B build -S . -DCMAKE_TOOLCHAIN_FILE=%VCPKG_ROOT%/scripts/buildsystems/vcpkg.cmake -DCMAKE_INSTALL_PREFIX=%DHINSTALL% -DX_VCPKG_APPLOCAL_DEPS_INSTALL=ON -DCMAKE_BUILD_TYPE=RelWithDebInfo + cmake -B build -S . -DCMAKE_TOOLCHAIN_FILE=%VCPKG_ROOT%/scripts/buildsystems/vcpkg.cmake -DCMAKE_INSTALL_PREFIX=%DHINSTALL% -DX_VCPKG_APPLOCAL_DEPS_INSTALL=ON ``` -10. Finally, build and install Deephaven Core: +10. Finally, build and install Deephaven Core. Note that the build type (RelWithDebInfo) is specified differently for the Windows build + than it is for the Ubuntu build. For Windows, we specify the configuration type directly in the build step using the --config flag. ``` - cmake --build build --target install + # Replace '16' by the number of CPU threads you want to use for building + cmake --build build --config RelWithDebInfo --target install -- /p:CL_MPCount=16 -m:1 ``` 11. Run the tests. From 8fb1bc776afd47ef4bc989e95f83fbfc604e8bc2 Mon Sep 17 00:00:00 2001 From: Mike Bender Date: Fri, 23 Aug 2024 09:42:03 -0400 Subject: [PATCH 40/43] chore: Add template for grouping BREAKING CHANGES to the top (#5964) - Also added a cog.toml file - Just run `cog changelog` to generate the full changelog - For between two tags/commits, add the range as an arg, e.g. `cog changelog v0.35.0..v0.36.0` - Generates a changelog file with the text from the Breaking Changes footers at the top --- cog.toml | 6 ++++++ deephaven-changelog | 51 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 cog.toml create mode 100644 deephaven-changelog diff --git a/cog.toml b/cog.toml new file mode 100644 index 00000000000..bf4a185fe6c --- /dev/null +++ b/cog.toml @@ -0,0 +1,6 @@ +[changelog] +path = "CHANGELOG.md" +template = "deephaven-changelog" +remote = "github.com" +repository = "deephaven-core" +owner = "deephaven" \ No newline at end of file diff --git a/deephaven-changelog b/deephaven-changelog new file mode 100644 index 00000000000..2b1048e34d5 --- /dev/null +++ b/deephaven-changelog @@ -0,0 +1,51 @@ +{# Tera templates are used to generate the changelog content -#} +{# https://keats.github.io/tera/docs/ -#} +{# Based on Cocogittos remote template, but adds breaking changes: https://github.com/cocogitto/cocogitto/blob/main/src/conventional/changelog/template/remote -#} +{# First display all the breaking changes -#} +{% set breaking_commits = commits | filter(attribute="breaking_change", value=true) -%} +{% if breaking_commits | length > 0 -%} +#### ⚠ Breaking Changes + +{% for commit in breaking_commits -%} +{% set commit_link = repository_url ~ "/commit/" ~ commit.id -%} +{% set shorthand = commit.id | truncate(length=7, end="") -%} +{% for footer in commit.footer | filter(attribute="token", value="BREAKING CHANGE") -%} +- {{ footer.content }} - ([{{shorthand}}]({{ commit_link }})) +{% endfor -%} +{% endfor -%} +{% endif %} + +{# Now group the rest of the commits and display them -#} +{% for type, typed_commits in commits | sort(attribute="type")| group_by(attribute="type") -%} +#### {{ type | upper_first }} +{% for scope, scoped_commits in typed_commits | group_by(attribute="scope") -%} + +{% for commit in scoped_commits | sort(attribute="scope") -%} + {% if commit.author and repository_url -%} + {% set author = "@" ~ commit.author -%} + {% set author_link = platform ~ "/" ~ commit.author -%} + {% set author = "[" ~ author ~ "](" ~ author_link ~ ")" -%} + {% else -%} + {% set author = commit.signature -%} + {% endif -%} + {% set commit_link = repository_url ~ "/commit/" ~ commit.id -%} + {% set shorthand = commit.id | truncate(length=7, end="") -%} + - **({{ scope }})** {{ commit.summary }} - ([{{shorthand}}]({{ commit_link }})) - {{ author }} +{% endfor -%} + +{% endfor -%} + +{% for commit in typed_commits | unscoped -%} + {% if commit.author and repository_url -%} + {% set author = "@" ~ commit.author -%} + {% set author_link = platform ~ "/" ~ commit.author -%} + {% set author = "[" ~ author ~ "](" ~ author_link ~ ")" -%} + {% else -%} + {% set author = commit.signature -%} + {% endif -%} + {% set commit_link = repository_url ~ "/commit/" ~ commit.id -%} + {% set shorthand = commit.id | truncate(length=7, end="") -%} + - {{ commit.summary }} - ([{{shorthand}}]({{ commit_link }})) - {{ author }} +{% endfor -%} + +{% endfor -%} \ No newline at end of file From 4b91586a373f2088a5bca0a5b1f9adc37aa493ad Mon Sep 17 00:00:00 2001 From: Andrew <3199649+abaranec@users.noreply.github.com> Date: Fri, 23 Aug 2024 10:41:38 -0400 Subject: [PATCH 41/43] feat: Add fileIO() method (#5974) This fixes #5973 --- .../io/deephaven/iceberg/util/IcebergCatalogAdapter.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/IcebergCatalogAdapter.java b/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/IcebergCatalogAdapter.java index 486bcf18655..4955ca9223b 100644 --- a/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/IcebergCatalogAdapter.java +++ b/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/IcebergCatalogAdapter.java @@ -772,4 +772,12 @@ private Table readTableInternal( public Catalog catalog() { return catalog; } + + /** + * Returns the underlying Iceberg {@link FileIO} used by this adapter. + */ + @SuppressWarnings("unused") + public FileIO fileIO() { + return fileIO; + } } From a0dcbbf5edef4d97476ba81e55dc0d863b70f31f Mon Sep 17 00:00:00 2001 From: Devin Smith Date: Fri, 23 Aug 2024 07:52:41 -0700 Subject: [PATCH 42/43] chore: bump docker images (#5971) The environment variable for building py-client-ticking needed to be updated from `CFLAGS` TO `CPPFLAGS`; this change was necessary because the newer version of setuptools in the manylinux base image seems to no longer include `CFLAGS`. --- docker/registry/cpp-clients-multi-base/gradle.properties | 2 +- docker/registry/fedora/gradle.properties | 2 +- docker/registry/go/gradle.properties | 2 +- docker/registry/localstack/gradle.properties | 2 +- docker/registry/manylinux2014_x86_64/gradle.properties | 2 +- docker/registry/minio/gradle.properties | 2 +- docker/registry/protoc-base/gradle.properties | 2 +- docker/registry/python/gradle.properties | 2 +- docker/registry/server-base/gradle.properties | 2 +- docker/registry/slim-base/gradle.properties | 2 +- docker/server-jetty/src/main/server-jetty/requirements.txt | 2 +- docker/server/src/main/server-netty/requirements.txt | 2 +- py/client-ticking/README.md | 2 +- py/client-ticking/pyClientTickingWheel/entrypoint.sh | 2 +- 14 files changed, 14 insertions(+), 14 deletions(-) diff --git a/docker/registry/cpp-clients-multi-base/gradle.properties b/docker/registry/cpp-clients-multi-base/gradle.properties index cec91cf29c1..02d12f80abd 100644 --- a/docker/registry/cpp-clients-multi-base/gradle.properties +++ b/docker/registry/cpp-clients-multi-base/gradle.properties @@ -1,5 +1,5 @@ io.deephaven.project.ProjectType=DOCKER_REGISTRY deephaven.registry.imageName=ghcr.io/deephaven/cpp-clients-multi-base:latest # When the image version changes, remember to update cpp-client/build-dependencies.sh to match the image's content -deephaven.registry.imageId=ghcr.io/deephaven/cpp-clients-multi-base@sha256:5b39f286d94a335890314577faf9211bd1b72299efa328f917ddab92c99a437e +deephaven.registry.imageId=ghcr.io/deephaven/cpp-clients-multi-base@sha256:852925d8939b19344f15b9e752e9ed82724c7009f75df8a33912d85cb73e630e deephaven.registry.platform=linux/amd64 diff --git a/docker/registry/fedora/gradle.properties b/docker/registry/fedora/gradle.properties index 2068e113e91..c7ef2bb21b0 100644 --- a/docker/registry/fedora/gradle.properties +++ b/docker/registry/fedora/gradle.properties @@ -1,3 +1,3 @@ io.deephaven.project.ProjectType=DOCKER_REGISTRY deephaven.registry.imageName=fedora:39 -deephaven.registry.imageId=fedora@sha256:e8792bee618a8d1c2ca8fbcf641ceb828d1b6b69bfac1ff70792f8bd5ed10ddd +deephaven.registry.imageId=fedora@sha256:2922a1237abbb7f8517018e4f5d7a82a618f6ec09f386799e8595f9e1c39f021 diff --git a/docker/registry/go/gradle.properties b/docker/registry/go/gradle.properties index f5c58465c3f..1406a8fa3d1 100644 --- a/docker/registry/go/gradle.properties +++ b/docker/registry/go/gradle.properties @@ -1,3 +1,3 @@ io.deephaven.project.ProjectType=DOCKER_REGISTRY deephaven.registry.imageName=golang:1 -deephaven.registry.imageId=golang@sha256:829eff99a4b2abffe68f6a3847337bf6455d69d17e49ec1a97dac78834754bd6 +deephaven.registry.imageId=golang@sha256:613a108a4a4b1dfb6923305db791a19d088f77632317cfc3446825c54fb862cd diff --git a/docker/registry/localstack/gradle.properties b/docker/registry/localstack/gradle.properties index 783af51a431..275f102fbd5 100644 --- a/docker/registry/localstack/gradle.properties +++ b/docker/registry/localstack/gradle.properties @@ -1,3 +1,3 @@ io.deephaven.project.ProjectType=DOCKER_REGISTRY deephaven.registry.imageName=localstack/localstack:3 -deephaven.registry.imageId=localstack/localstack@sha256:54fcf172f6ff70909e1e26652c3bb4587282890aff0d02c20aa7695469476ac0 +deephaven.registry.imageId=localstack/localstack@sha256:231148e6d60d040441ee0b418ab181eaedf30d18bca23ce5b44dfb863c40fb7c diff --git a/docker/registry/manylinux2014_x86_64/gradle.properties b/docker/registry/manylinux2014_x86_64/gradle.properties index ee6d3d3a0a7..b11f6c7070a 100644 --- a/docker/registry/manylinux2014_x86_64/gradle.properties +++ b/docker/registry/manylinux2014_x86_64/gradle.properties @@ -1,4 +1,4 @@ io.deephaven.project.ProjectType=DOCKER_REGISTRY deephaven.registry.imageName=quay.io/pypa/manylinux2014_x86_64:latest -deephaven.registry.imageId=quay.io/pypa/manylinux2014_x86_64@sha256:07a0f9d59d8d075bb4f84dffc5dd76d991b90d7e4f315d1ef9f48a4f04390b71 +deephaven.registry.imageId=quay.io/pypa/manylinux2014_x86_64@sha256:545ae43918a0b5a871a09ccb4421c68230a109a21d99cd6f4806b2e4c34543d2 deephaven.registry.platform=linux/amd64 diff --git a/docker/registry/minio/gradle.properties b/docker/registry/minio/gradle.properties index 6d8fedc8c19..1ab1b39c1d1 100644 --- a/docker/registry/minio/gradle.properties +++ b/docker/registry/minio/gradle.properties @@ -1,3 +1,3 @@ io.deephaven.project.ProjectType=DOCKER_REGISTRY deephaven.registry.imageName=minio/minio:latest -deephaven.registry.imageId=minio/minio@sha256:77ff9f7e12549d269990b167fa21da010fa8b0beb40d6064569b8887e37c456b +deephaven.registry.imageId=minio/minio@sha256:6f23072e3e222e64fe6f86b31a7f7aca971e5129e55cbccef649b109b8e651a1 diff --git a/docker/registry/protoc-base/gradle.properties b/docker/registry/protoc-base/gradle.properties index 14302326f6d..3888cf90fab 100644 --- a/docker/registry/protoc-base/gradle.properties +++ b/docker/registry/protoc-base/gradle.properties @@ -1,5 +1,5 @@ io.deephaven.project.ProjectType=DOCKER_REGISTRY deephaven.registry.imageName=ghcr.io/deephaven/protoc-base:latest -deephaven.registry.imageId=ghcr.io/deephaven/protoc-base@sha256:a7f819be945c058a96079960c94fea0ca04c6189aa92c0ed45fcf4131ecbbf23 +deephaven.registry.imageId=ghcr.io/deephaven/protoc-base@sha256:70f31806515b4d160ab6ddf9b7b6612441b5918b0f1c5586ee2cc9ad8a4c81da # TODO(deephaven-base-images#54): arm64 native image for cpp-client-base deephaven.registry.platform=linux/amd64 diff --git a/docker/registry/python/gradle.properties b/docker/registry/python/gradle.properties index 3b94db0e04e..0cec680f6ef 100644 --- a/docker/registry/python/gradle.properties +++ b/docker/registry/python/gradle.properties @@ -1,3 +1,3 @@ io.deephaven.project.ProjectType=DOCKER_REGISTRY deephaven.registry.imageName=python:3.10 -deephaven.registry.imageId=python@sha256:d47cb1b09be409a0cb5efeb01dd3ff5d44a5e28d371fa1a736c8928ab72fffd2 +deephaven.registry.imageId=python@sha256:09447073b9603858602b4a725ad92e4fd4c8f6979768cee295afae67fd997718 diff --git a/docker/registry/server-base/gradle.properties b/docker/registry/server-base/gradle.properties index 75412cde87f..a9314887d1b 100644 --- a/docker/registry/server-base/gradle.properties +++ b/docker/registry/server-base/gradle.properties @@ -1,3 +1,3 @@ io.deephaven.project.ProjectType=DOCKER_REGISTRY deephaven.registry.imageName=ghcr.io/deephaven/server-base:edge -deephaven.registry.imageId=ghcr.io/deephaven/server-base@sha256:3304ec00e18bf86aee18b3491c7d92b326773deacfd98d1556facaddb1957a63 +deephaven.registry.imageId=ghcr.io/deephaven/server-base@sha256:66f8cecdac170dfb8bf284e41684480359a15443dc5a5b8a1efc95987f3ddcb5 diff --git a/docker/registry/slim-base/gradle.properties b/docker/registry/slim-base/gradle.properties index cc812a7b768..61745e6630c 100644 --- a/docker/registry/slim-base/gradle.properties +++ b/docker/registry/slim-base/gradle.properties @@ -1,3 +1,3 @@ io.deephaven.project.ProjectType=DOCKER_REGISTRY deephaven.registry.imageName=ghcr.io/deephaven/server-slim-base:edge -deephaven.registry.imageId=ghcr.io/deephaven/server-slim-base@sha256:ba5ce0113b246c50179e4355564f202840319f09a95cd921158c9042a528e03f +deephaven.registry.imageId=ghcr.io/deephaven/server-slim-base@sha256:0605678d4961227fa0a8e83c0c8b030f56e581c070504dcca49a44a9ab43d8f0 diff --git a/docker/server-jetty/src/main/server-jetty/requirements.txt b/docker/server-jetty/src/main/server-jetty/requirements.txt index a80149cbaaa..72c871eb253 100644 --- a/docker/server-jetty/src/main/server-jetty/requirements.txt +++ b/docker/server-jetty/src/main/server-jetty/requirements.txt @@ -2,7 +2,7 @@ adbc-driver-manager==1.1.0 adbc-driver-postgresql==1.1.0 connectorx==0.3.3; platform.machine == 'x86_64' deephaven-plugin==0.6.0 -importlib_resources==6.4.0 +importlib_resources==6.4.3 java-utilities==0.3.0 jedi==0.19.1 jpy==0.18.0 diff --git a/docker/server/src/main/server-netty/requirements.txt b/docker/server/src/main/server-netty/requirements.txt index a80149cbaaa..72c871eb253 100644 --- a/docker/server/src/main/server-netty/requirements.txt +++ b/docker/server/src/main/server-netty/requirements.txt @@ -2,7 +2,7 @@ adbc-driver-manager==1.1.0 adbc-driver-postgresql==1.1.0 connectorx==0.3.3; platform.machine == 'x86_64' deephaven-plugin==0.6.0 -importlib_resources==6.4.0 +importlib_resources==6.4.3 java-utilities==0.3.0 jedi==0.19.1 jpy==0.18.0 diff --git a/py/client-ticking/README.md b/py/client-ticking/README.md index 6a7ece5be63..2a01e869c7a 100644 --- a/py/client-ticking/README.md +++ b/py/client-ticking/README.md @@ -72,7 +72,7 @@ cd ${DHROOT}/py/client-ticking ```sh # Ensure the DHCPP environment variable is set per the instructions above rm -rf build dist # Ensure we clean the remnants of any pre-existing build. -DEEPHAVEN_VERSION=$(../../gradlew :printVersion -q) CFLAGS="-I${DHCPP}/include" LDFLAGS="-L${DHCPP}/lib" python3 setup.py build_ext -i +DEEPHAVEN_VERSION=$(../../gradlew :printVersion -q) CPPFLAGS="-I${DHCPP}/include" LDFLAGS="-L${DHCPP}/lib" python3 setup.py build_ext -i ``` #### Install pydeephaven-ticking diff --git a/py/client-ticking/pyClientTickingWheel/entrypoint.sh b/py/client-ticking/pyClientTickingWheel/entrypoint.sh index 413a6a011d7..af3d784a28a 100755 --- a/py/client-ticking/pyClientTickingWheel/entrypoint.sh +++ b/py/client-ticking/pyClientTickingWheel/entrypoint.sh @@ -13,7 +13,7 @@ rm -f ./*.cpp ./*.so PATH="/opt/python/${PYTHON_TAG}/bin:$PATH" MAKEFLAGS="-j${NCPUS}" \ - CFLAGS="-I${DHCPP}/include" \ + CPPFLAGS="-I${DHCPP}/include" \ LDFLAGS="-L${DHCPP}/lib" \ DEEPHAVEN_VERSION="${DEEPHAVEN_VERSION}" \ python setup.py build_ext -i From 187bbb62270ef2e598f3990bf6060d8d687bfd97 Mon Sep 17 00:00:00 2001 From: Devin Smith Date: Fri, 23 Aug 2024 08:18:31 -0700 Subject: [PATCH 43/43] fix: use correct syntax for code-block in python documentation (#5965) See the example usage illustrated here: https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#directive-code-block --- py/server/deephaven/json/__init__.py | 68 ++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/py/server/deephaven/json/__init__.py b/py/server/deephaven/json/__init__.py index 77957733df8..3112b2d68cc 100644 --- a/py/server/deephaven/json/__init__.py +++ b/py/server/deephaven/json/__init__.py @@ -7,38 +7,45 @@ example, the JSON object .. code-block:: json + { "name": "Foo", "age": 42, "location": { "lat": 45.018269, "lon": -93.473892 } } can be modelled with the dictionary .. code-block:: python + { "name": str, "age": int, "location": { "lat": float, "lon": float } } Notice that this allows for the nested modelling of JSON values. Other common constructions involve the modelling of JSON arrays. For example, a variable-length JSON array where the elements are the same type .. code-block:: json + [42, 31, ..., 12345] can be modelled with a single-element list containing the element type .. code-block:: python + [ int ] If the JSON array is a fixed size and each elements' type is known, for example .. code-block:: json + ["Foo", 42, [45.018269, -93.473892]] can be modelled with a tuple containing each type .. code-block:: python + (str, int, (float, float)) Notice again that this allows for the nested modelling of JSON values. Of course, these constructions can be all be used together. For example, the JSON object .. code-block:: json + { "name": "Foo", "locations": [ @@ -51,6 +58,7 @@ can be modelled as .. code-block:: python + {"name": str, "locations": [(float, float)]} See the methods in this module more more details on modelling JSON values. @@ -200,7 +208,9 @@ def json_val(json_value_type: JsonValueType) -> JsonValue: class RepeatedFieldBehavior(Enum): """ The behavior to use when a repeated field is encountered in a JSON object. For example, + .. code-block:: json + { "foo": 42, "foo": 43 @@ -222,6 +232,7 @@ class ObjectField: simplify by just using the JsonValueType. For example, .. code-block:: python + { "name": ObjectField(str), "age": ObjectField(int), @@ -230,6 +241,7 @@ class ObjectField: could be simplified to .. code-block:: python + { "name": str, "age": int, @@ -294,22 +306,26 @@ def object_val( """Creates an object value. For example, the JSON object .. code-block:: json + { "name": "foo", "age": 42 } might be modelled as the object type .. code-block:: python + object_val({ "name": str, "age": int }) In contexts where the user needs to create a JsonValueType and isn't changing any default values, the user can simplify by using a Dict[str, Union[JsonValueType, ObjectField]]. For example, .. code-block:: python + some_method(object_val({ "name": str, "age": int })) could be simplified to .. code-block:: python + some_method({ "name": str, "age": int }) Args: @@ -356,9 +372,11 @@ def typed_object_val( """Creates a type-discriminated object value. For example, the JSON objects .. code-block:: json + { "type": "trade", "symbol": "FOO", "price": 70.03, "size": 42 } .. code-block:: json + { "type": "quote", "symbol": "BAR", "bid": 10.01, "ask": 10.05 } might be modelled as a type-discriminated object with "type" as the type field, "symbol" as a shared field, with a @@ -366,6 +384,7 @@ def typed_object_val( field: .. code-block:: python + typed_object_val( "type", {"symbol": str}, @@ -432,22 +451,26 @@ def array_val( """Creates a "typed array", where all elements of the array have the same element type. For example, the JSON array .. code-block:: json + [1, 42, 43, 13] might be modelled as an array of ints .. code-block:: python + array_val(int) In contexts where the user needs to create a JsonValueType and isn't changing any default values, the user can simplify by using a list with a single element type. For example, .. code-block:: python + some_method(array_val(element)) could be simplified to .. code-block:: python + some_method([element]) Args: @@ -474,6 +497,7 @@ def object_entries_val( variable and all the values types are the same. For example, the JSON object .. code-block:: json + { "foo": 1, "bar": 42, @@ -485,6 +509,7 @@ def object_entries_val( might be modelled as the object kv type .. code-block:: python + object_entries_val(int) Args: @@ -511,16 +536,19 @@ def tuple_val( """Creates a tuple value. For example, the JSON array .. code-block:: json + ["foo", 42, 5.72] might be modelled as the tuple type .. code-block:: python + tuple_val((str, int, float)) To provide meaningful names, a dictionary can be used: .. code-block:: python + tuple_val({"name": str, "age": int, "height": float}) otherwise, default names based on the indexes of the values will be used. @@ -529,11 +557,13 @@ def tuple_val( names, the user can simplify passing through a python tuple type. For example, .. code-block:: python + some_method(tuple_val((tuple_type_1, tuple_type_2))) could be simplified to .. code-block:: python + some_method((tuple_type_1, tuple_type_2)) Args: @@ -571,22 +601,26 @@ def bool_val( """Creates a bool value. For example, the JSON boolean .. code-block:: json + True might be modelled as the bool type .. code-block:: python + bool_val() In contexts where the user needs to create a JsonValueType and isn't changing any default values, the user can simplify by using the python built-in bool type. For example, .. code-block:: python + some_method(bool_val()) could be simplified to .. code-block:: python + some_method(bool) Args: @@ -623,11 +657,13 @@ def char_val( """Creates a char value. For example, the JSON string .. code-block:: json + "F" might be modelled as the char type .. code-block:: python + char_val() Args: @@ -664,11 +700,13 @@ def byte_val( """Creates a byte (signed 8-bit) value. For example, the JSON integer .. code-block:: json + 42 might be modelled as the byte type .. code-block:: python + byte_val() Args: @@ -709,11 +747,13 @@ def short_val( """Creates a short (signed 16-bit) value. For example, the JSON integer .. code-block:: json + 30000 might be modelled as the short type .. code-block:: python + short_val() Args: @@ -754,11 +794,13 @@ def int_val( """Creates an int (signed 32-bit) value. For example, the JSON integer .. code-block:: json + 100000 might be modelled as the int type .. code-block:: python + int_val() Args: @@ -799,22 +841,26 @@ def long_val( """Creates a long (signed 64-bit) value. For example, the JSON integer .. code-block:: json + 8000000000 might be modelled as the long type .. code-block:: python + long_val() In contexts where the user needs to create a JsonValueType and isn't changing any default values, the user can simplify by using the python built-in long type. For example, .. code-block:: python + some_method(long_val()) could be simplified to .. code-block:: python + some_method(int) Args: @@ -854,11 +900,13 @@ def float_val( """Creates a float (signed 32-bit) value. For example, the JSON decimal .. code-block:: json + 42.42 might be modelled as the float type .. code-block:: python + float_val() Args: @@ -897,22 +945,26 @@ def double_val( """Creates a double (signed 64-bit) value. For example, the JSON decimal .. code-block:: json + 42.42424242 might be modelled as the double type .. code-block:: python + double_val() In contexts where the user needs to create a JsonValueType and isn't changing any default values, the user can simplify by using the python built-in float type. For example, .. code-block:: python + some_method(double_val()) could be simplified to .. code-block:: python + some_method(float) Args: @@ -953,22 +1005,26 @@ def string_val( """Creates a String value. For example, the JSON string .. code-block:: json + "Hello, world!" might be modelled as the string type .. code-block:: python + string_val() In contexts where the user needs to create a JsonValueType and isn't changing any default values, the user can simplify by using the python built-in str type. For example, .. code-block:: python + some_method(string_val()) could be simplified to .. code-block:: python + some_method(str) Args: @@ -1012,32 +1068,38 @@ def instant_val( """Creates an Instant value. For example, the JSON string .. code-block:: json + "2009-02-13T23:31:30.123456789Z" might be modelled as the Instant type .. code-block:: python + instant_val() In another example, the JSON decimal .. code-block:: json + 1234567890.123456789 might be modelled as the Instant type .. code-block:: python + instant_val(number_format="s", allow_decimal=True) In contexts where the user needs to create a JsonValueType and isn't changing any default values, the user can simplify by using the python datetime.datetime type. For example, .. code-block:: python + some_method(instant_val()) could be simplified to .. code-block:: python + some_method(datetime.datetime) Args: @@ -1106,11 +1168,13 @@ def big_integer_val( """Creates a BigInteger value. For example, the JSON integer .. code-block:: json + 123456789012345678901 might be modelled as the BigInteger type .. code-block:: python + big_integer_val() Args: @@ -1150,11 +1214,13 @@ def big_decimal_val( """Creates a BigDecimal value. For example, the JSON decimal .. code-block:: json + 123456789012345678901.42 might be modelled as the BigDecimal type .. code-block:: python + big_decimal_val() Args: @@ -1207,11 +1273,13 @@ def skip_val( This may be useful in combination with an object type where allow_unknown_fields=False. For example, the JSON object .. code-block:: json + { "name": "foo", "age": 42 } might be modelled as the object type .. code-block:: python + object_val({ "name": str, "age": skip_val() }, allow_unknown_fields=False) Args: