Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add the optional dependencies parameters in the transform/partitioned_transform methods #4891

Merged
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 24 additions & 6 deletions py/server/deephaven/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -2663,13 +2663,18 @@ def constituent_tables(self) -> List[Table]:
"""Returns all the current constituent tables."""
return list(map(Table, self.j_partitioned_table.constituents()))

def transform(self, func: Callable[[Table], Table]) -> PartitionedTable:
def transform(self, func: Callable[[Table], Table],
dependencies: Optional[Sequence[Union[Table, PartitionedTable]]] = None) -> PartitionedTable:
"""Apply the provided function to all constituent Tables and produce a new PartitionedTable with the results
as its constituents, with the same data for all other columns in the underlying partitioned Table. Note that
if the Table underlying this PartitionedTable changes, a corresponding change will propagate to the result.

Args:
func (Callable[[Table], Table]): a function which takes a Table as input and returns a new Table
dependencies (Optional[Sequence[Union[Table, PartitionedTable]]]): additional dependencies that must be
satisfied before applying the provided transform function to added or modified constituents during
update processing, use this when the transform function uses additional Table or Partitioned Table
inputs besides the constituents of this PartitionedTable. Defaults to None.
jmao-denver marked this conversation as resolved.
Show resolved Hide resolved

Returns:
a PartitionedTable
Expand All @@ -2679,13 +2684,18 @@ def transform(self, func: Callable[[Table], Table]) -> PartitionedTable:
"""
try:
j_operator = j_unary_operator(func, dtypes.from_jtype(Table.j_object_type.jclass))
with auto_locking_ctx(self):
j_pt = self.j_partitioned_table.transform(j_operator)
dependencies = to_sequence(dependencies, wrapped=True)
j_dependencies = [d.j_table for d in dependencies if isinstance(d, Table)]
j_dependencies.extend([d.table.j_table for d in dependencies if isinstance(d, PartitionedTable)])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There should be a size assert because this will happily run if non Table / PartitionedTable garbage is included.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be an overkill, I think it is safe to assume that users who need to use this parameter knows what they are doing.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will Python's type system (hah) allow that?

with auto_locking_ctx(self, *dependencies):
j_pt = self.j_partitioned_table.transform(j_operator, j_dependencies)
return PartitionedTable(j_partitioned_table=j_pt)
except Exception as e:
raise DHError(e, "failed to transform the PartitionedTable.") from e

def partitioned_transform(self, other: PartitionedTable, func: Callable[[Table, Table], Table]) -> PartitionedTable:
def partitioned_transform(self, other: PartitionedTable, func: Callable[[Table, Table], Table],
dependencies: Optional[Sequence[Union[Table, PartitionedTable]]] = None) -> \
PartitionedTable:
"""Join the underlying partitioned Tables from this PartitionedTable and other on the key columns, then apply
the provided function to all pairs of constituent Tables with the same keys in order to produce a new
PartitionedTable with the results as its constituents, with the same data for all other columns in the
Expand All @@ -2698,6 +2708,10 @@ def partitioned_transform(self, other: PartitionedTable, func: Callable[[Table,
other (PartitionedTable): the other Partitioned table whose constituent tables will be passed in as the 2nd
argument to the provided function
func (Callable[[Table, Table], Table]): a function which takes two Tables as input and returns a new Table
dependencies (Optional[Sequence[Union[Table, PartitionedTable]]]): additional dependencies that must be
satisfied before applying the provided transform function to added, modified, or newly-matched
constituents during update processing, use this when the transform function uses additional Table or
Partitioned Table inputs besides the constituents of this PartitionedTable. Defaults to None.
jmao-denver marked this conversation as resolved.
Show resolved Hide resolved

Returns:
a PartitionedTable
Expand All @@ -2707,8 +2721,12 @@ def partitioned_transform(self, other: PartitionedTable, func: Callable[[Table,
"""
try:
j_operator = j_binary_operator(func, dtypes.from_jtype(Table.j_object_type.jclass))
with auto_locking_ctx(self, other):
j_pt = self.j_partitioned_table.partitionedTransform(other.j_partitioned_table, j_operator)
dependencies = to_sequence(dependencies, wrapped=True)
j_dependencies = [d.j_table for d in dependencies if isinstance(d, Table)]
j_dependencies.extend([d.table.j_table for d in dependencies if isinstance(d, PartitionedTable)])
jmao-denver marked this conversation as resolved.
Show resolved Hide resolved
with auto_locking_ctx(self, other, *dependencies):
j_pt = self.j_partitioned_table.partitionedTransform(other.j_partitioned_table, j_operator,
j_dependencies)
return PartitionedTable(j_partitioned_table=j_pt)
except Exception as e:
raise DHError(e, "failed to transform the PartitionedTable with another PartitionedTable.") from e
Expand Down
9 changes: 9 additions & 0 deletions py/server/tests/test_partitioned_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from deephaven.filters import Filter

from deephaven import read_csv, DHError, new_table, update_graph, time_table, empty_table
from deephaven.update_graph import shared_lock
from tests.testbase import BaseTestCase
from deephaven.execution_context import get_exec_ctx

Expand Down Expand Up @@ -128,6 +129,10 @@ def test_transform(self):
pt = self.partitioned_table.transform(Transformer)
self.assertIn("f", [col.name for col in pt.constituent_table_columns])

with shared_lock(self.test_table):
jmao-denver marked this conversation as resolved.
Show resolved Hide resolved
pt = self.partitioned_table.transform(Transformer, dependencies=[self.test_table])
self.assertIn("f", [col.name for col in pt.constituent_table_columns])

with self.assertRaises(DHError) as cm:
pt = self.partitioned_table.transform(lambda t, t1: t.join(t1))
self.assertRegex(str(cm.exception), r"missing .* argument")
Expand All @@ -141,6 +146,10 @@ def test_partitioned_transform(self):
pt = self.partitioned_table.partitioned_transform(other_pt, PartitionedTransformer())
self.assertIn("f", [col.name for col in pt.constituent_table_columns])

with shared_lock(other_pt):
pt = self.partitioned_table.partitioned_transform(other_pt, PartitionedTransformer(), dependencies=[other_pt])
self.assertIn("f", [col.name for col in pt.constituent_table_columns])

def test_partition_agg(self):
with update_graph.shared_lock(self.test_update_graph):
test_table = time_table("PT00:00:00.001").update(["X=i", "Y=i%13", "Z=X*Y"])
Expand Down
Loading