Skip to content

Commit

Permalink
[CALCITE-6595] Preserve collation on non-distinct aggCalls in Aggrega…
Browse files Browse the repository at this point in the history
…teExpandWithinDistinctRule
  • Loading branch information
tjbanghart committed Sep 25, 2024
1 parent cd6762e commit aeab8af
Show file tree
Hide file tree
Showing 5 changed files with 82 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,7 @@ public AggregateCall rename(@Nullable String name) {
}
buf.append(")");
}
if (!collation.equals(RelCollations.EMPTY)) {
if (hasCollation()) {
buf.append(" WITHIN GROUP (");
buf.append(collation);
buf.append(")");
Expand All @@ -465,6 +465,11 @@ public boolean hasFilter() {
return filterArg >= 0;
}

/** Returns true if this AggregateCall has a non-empty collation. Returns false otherwise. */
public boolean hasCollation() {
return !collation.equals(RelCollations.EMPTY);
}

/** Withs {@link #filterArg}. */
public AggregateCall withFilter(int filterArg) {
return filterArg == this.filterArg ? this
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -348,10 +348,13 @@ int getCount(int filterArg) {
if (c.distinctKeys == null) {
RelBuilder.AggCall aggCall =
b.aggregateCall(c.getParserPosition(), c.getAggregation(), b.fields(c.getArgList()));
registrar.registerAgg(i,
c.hasFilter()
? aggCall.filter(b.field(c.filterArg))
: aggCall);
if (c.hasFilter()) {
aggCall = aggCall.filter(b.field(c.filterArg));
}
if (c.hasCollation()) {
aggCall = aggCall.sort(b.fields(c.getCollation()));
}
registrar.registerAgg(i, aggCall);
} else {
for (int inputIdx : c.getArgList()) {
registrar.register(inputIdx, c.filterArg);
Expand Down
18 changes: 18 additions & 0 deletions core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -2417,6 +2417,24 @@ private void checkSemiOrAntiJoinProjectTranspose(JoinRelType type) {
sql(sql).withProgram(program).check();
}

/** Test case for
* <a href="https://issues.apache.org/jira/browse/CALCITE-6595">[CALCITE-6595]
* Preserve collation for non-distinct aggregate calls with AggregateExpandWithinDistinctRule</a>.
*
* <p>Test that AggregateExpandWithinDistinctRule preserves collation on non-distinct aggregates
* with a WITHIN GROUP clause in a query that also includes a distinct aggregate. */
@Test void testWithinDistinctPreservesNonDistinctCollation() {
final String sql = "SELECT SUM(sal) WITHIN DISTINCT (job),\n"
+ "LISTAGG(ename, '; ') WITHIN GROUP (ORDER BY sal DESC)\n"
+ " FROM Emp\n"
+ "GROUP BY deptno";
HepProgram program = new HepProgramBuilder()
.addRuleInstance(CoreRules.AGGREGATE_REDUCE_FUNCTIONS)
.addRuleInstance(CoreRules.AGGREGATE_EXPAND_WITHIN_DISTINCT)
.build();
sql(sql).withProgram(program).check();
}

/** Tests {@link AggregateExpandWithinDistinctRule}. Includes multiple
* different filters for the aggregate calls, and all aggregate calls have the
* same distinct keys, so there is no need to filter based on
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17378,6 +17378,33 @@ LogicalProject(DEPTNO=[$0], EXPR$1=[CASE(=($2, 0), null:INTEGER, $1)], EXPR$2=[$
LogicalAggregate(group=[{0, 3}], groups=[[{0, 3}, {0}]], agg#0=[$SUM0($1) FILTER $2], agg#1=[COUNT() FILTER $2], agg#2=[MIN($1)], agg#3=[MAX($1)], agg#4=[GROUPING($0, $3)])
LogicalProject(DEPTNO=[$7], SAL=[$5], $f2=[>($5, 1000)], JOB=[$2])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
</TestCase>
<TestCase name="testWithinDistinctPreservesNonDistinctCollation">
<Resource name="sql">
<![CDATA[SELECT SUM(sal) WITHIN DISTINCT (job),
LISTAGG(ename, '; ') WITHIN GROUP (ORDER BY sal DESC)
FROM Emp
GROUP BY deptno]]>
</Resource>
<Resource name="planBefore">
<![CDATA[
LogicalProject(EXPR$0=[$1], EXPR$1=[$2])
LogicalAggregate(group=[{0}], EXPR$0=[SUM($1) WITHIN DISTINCT ($2)], EXPR$1=[LISTAGG($3, $4) WITHIN GROUP ([1 DESC])])
LogicalProject(DEPTNO=[$7], SAL=[$5], JOB=[$2], ENAME=[$1], $f4=['; '])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
<Resource name="planAfter">
<![CDATA[
LogicalProject(EXPR$0=[$1], EXPR$1=[$2])
LogicalProject(DEPTNO=[$0], $f1=[$1], $f2=[CAST($2):VARCHAR(20) NOT NULL])
LogicalAggregate(group=[{0}], agg#0=[$SUM0($1) FILTER $3], agg#1=[MIN($2) FILTER $4])
LogicalProject(DEPTNO=[$0], $f2=[$2], $f4=[$4], $f6=[AND(=($5, 0), $THROW_UNLESS(OR(<>($5, 0), =($2, $3)), 'more than one distinct value in agg UNIQUE_VALUE'))], $f7=[=($5, 1)])
LogicalAggregate(group=[{0, 2}], groups=[[{0, 2}, {0}]], agg#0=[MIN($1)], agg#1=[MAX($1)], agg#2=[LISTAGG($3, $4) WITHIN GROUP ([1 DESC])], agg#3=[GROUPING($0, $2)])
LogicalProject(DEPTNO=[$7], SAL=[$5], JOB=[$2], ENAME=[$1], $f4=['; '])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
</TestCase>
Expand Down
24 changes: 24 additions & 0 deletions core/src/test/resources/sql/within-distinct.iq
Original file line number Diff line number Diff line change
Expand Up @@ -939,4 +939,28 @@ FROM FriendStates;

!ok

#####################################################################
# Test case for [CALCITE-6595]: AggregateExpandWithinDistinctRule preserves collation on non-distinct aggregates with a
# WITHIN GROUP clause in a query that also includes a distinct aggregate.

WITH FriendStates
AS (SELECT * FROM (VALUES
('Alice', 789, 'UT'),
('Bob', 25, 'UT'),
('Carlos', 25, 'UT'),
('Dan', 678, 'UT'),
('Erin', 567, 'WY'),
('Forest', NULL, 'WY')) AS FriendStates (name, age, state))
SELECT SUM(age) WITHIN DISTINCT (name) as sum_age,
REPLACE(LISTAGG(name, ',') WITHIN GROUP (ORDER BY name DESC), ' ', '') as ordered_name
FROM FriendStates;
+---------+----------------------------------+
| SUM_AGE | ORDERED_NAME |
+---------+----------------------------------+
| 2084 | Forest,Erin,Dan,Carlos,Bob,Alice |
+---------+----------------------------------+
(1 row)

!ok

# End within-distinct.iq

0 comments on commit aeab8af

Please sign in to comment.